Merge lp:~jimpop/mailman/contrib-sitemapgen into lp:mailman/2.1

Proposed by Jim Popovitch
Status: Merged
Merged at revision: 1771
Proposed branch: lp:~jimpop/mailman/contrib-sitemapgen
Merge into: lp:mailman/2.1
Diff against target: 184 lines (+175/-0)
2 files modified
contrib/README.sitemapgen (+11/-0)
contrib/sitemapgen (+164/-0)
To merge this branch: bzr merge lp:~jimpop/mailman/contrib-sitemapgen
Reviewer Review Type Date Requested Status
Mark Sapiro Approve
Review via email: mp+347941@code.launchpad.net

Commit message

Aforementioned sitemapgen file which was hacked up from bin/sync_members

Description of the change

Generates sitemap.xml.gz files for a given listname.

To post a comment you must log in.
Revision history for this message
Mark Sapiro (msapiro) wrote :

Thank you for the contribution.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'contrib/README.sitemapgen'
2--- contrib/README.sitemapgen 1970-01-01 00:00:00 +0000
3+++ contrib/README.sitemapgen 2018-06-14 02:29:25 +0000
4@@ -0,0 +1,11 @@
5+Hacked from bin/sync_members
6+
7+Copy mailman/contrib/sitemapgen to mailman/bin/ and execute it as so:
8+
9+ /path/to/mailman/bin/sitemapgen -l <listname>
10+
11+Alternatively add it to cron like so:
12+
13+5 0 * * * for l in `/path/to/mailman/bin/list_lists -apb`; do
14+ /path/to/mailman/bin/sitemapgen $l; done
15+
16
17=== added file 'contrib/sitemapgen'
18--- contrib/sitemapgen 1970-01-01 00:00:00 +0000
19+++ contrib/sitemapgen 2018-06-14 02:29:25 +0000
20@@ -0,0 +1,164 @@
21+#! @PYTHON@
22+
23+# For a given listname, this script generates sitemap.xml.gz files
24+# under archives/private/<listname>/
25+#
26+# Copyright (C) 1998-2018 by the Free Software Foundation, Inc.
27+#
28+# graciously hacked from bin/sync_members
29+#
30+
31+"""Build Sitemap files for an archive
32+
33+Usage: %(program)s [options] listname
34+
35+Where `options' are:
36+
37+ --help
38+ -h
39+ Print this message.
40+
41+ listname
42+ Required. This specifies the list to generate sitemaps for.
43+"""
44+
45+import os
46+import sys
47+import paths
48+# Import this /after/ paths so that the sys.path is properly hacked
49+import email.Utils
50+from Mailman import MailList
51+from Mailman import Errors
52+from Mailman import Utils
53+from Mailman.UserDesc import UserDesc
54+from Mailman import mm_cfg
55+from Mailman.i18n import _
56+import getopt
57+import re
58+import time
59+from stat import *
60+from datetime import datetime, timedelta
61+import gzip
62+
63+
64
65+# sitemap priorities in age-in-weeks/priority/changefreq tuples
66+priorities = ([1, 1.0, "daily"],
67+ [4, 1.0, "weekly"],
68+ [30, 1.0, "monthly"],
69+ [52, 0.9, "never"],
70+ [100, 0.8, "never"],
71+ [200, 0.7, "never"],
72+ [300, 0.6, "never"],
73+ [400, 0.5, "never"])
74+
75+
76
77+program = sys.argv[0]
78+
79+def usage(code, msg=''):
80+ if code:
81+ fd = sys.stderr
82+ else:
83+ fd = sys.stdout
84+ print >> fd, _(__doc__)
85+ if msg:
86+ print >> fd, msg
87+ sys.exit(code)
88+
89+
90+
91
92+def main():
93+ listname = None
94+
95+ # TBD: can't use getopt with this command line syntax, which is broken and
96+ # should be changed to be getopt compatible.
97+ i = 1
98+ while i < len(sys.argv):
99+ opt = sys.argv[i]
100+ if opt in ('-h', '--help'):
101+ usage(0)
102+ else:
103+ try:
104+ listname = sys.argv[i].lower()
105+ i += 1
106+ except IndexError:
107+ usage(1, _('No listname given'))
108+ break
109+
110+ if listname is None:
111+ usage(1, _('Must have a listname'))
112+
113+ # get the locked list object
114+ try:
115+ mlist = MailList.MailList(listname, lock=0)
116+ except Errors.MMListError, e:
117+ print _('No such list: %(listname)s')
118+ sys.exit(1)
119+
120+ rootdir = mlist.archive_dir()
121+ rooturl = mlist.GetBaseArchiveURL()
122+
123+ reArcPath = re.compile(r'^\d+')
124+ reArcFile = re.compile(r'\d+\.html')
125+
126+ sitemaps = []
127+
128+ now = datetime.now()
129+
130+ for folder in os.listdir(rootdir):
131+ path = os.path.join(rootdir,folder)
132+ if not os.path.isdir(path) or not reArcPath.search(folder):
133+ continue
134+
135+ dirtime = os.path.getmtime(path)
136+
137+ os.umask(0022)
138+ sitemap = os.path.join(rootdir,folder,"sitemap.xml.gz")
139+ f = gzip.open(sitemap, 'wb')
140+
141+ f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
142+ f.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n')
143+
144+ for file in os.listdir(path):
145+ if not reArcFile.search(file):
146+ continue
147+
148+ # get timestamp of file
149+ st = os.stat(os.path.join(rootdir,folder,file))
150+ mtime = st[ST_MTIME] #modification time
151+
152+ ts = datetime.fromtimestamp(mtime)
153+ for weeks, priority, changefreq in priorities:
154+ if ts > now - timedelta(weeks = weeks):
155+ break
156+
157+ f.write(' <url>\n <loc>' + os.path.join(rooturl,folder,file) + '</loc>\n')
158+ f.write(' <lastmod>' + time.strftime("%Y-%m-%d",time.gmtime(mtime)) + '</lastmod>\n')
159+ f.write(' <changefreq>' + changefreq + '</changefreq>\n')
160+ f.write(' <priority>' + str(priority) + '</priority>\n')
161+ f.write(' </url>\n')
162+
163+ f.write('</urlset>\n')
164+ f.close()
165+
166+ sitemaps.append((os.path.join(rooturl,folder,"sitemap.xml.gz")))
167+
168+
169+ # write out the sitemapindex file
170+ sitemapindex = os.path.join(rootdir,"sitemap.xml.gz")
171+ f = gzip.open(sitemapindex, 'wb')
172+
173+ f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
174+ f.write('<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n')
175+
176+ for sitemap in sitemaps:
177+ f.write(' <sitemap>\n <loc>' + sitemap + '</loc>\n')
178+ f.write(' <lastmod>' + time.strftime("%Y-%m-%d", now.timetuple()) + '</lastmod>\n')
179+ f.write(' </sitemap>\n')
180+
181+ f.write('</sitemapindex>\n')
182+ f.close()
183+
184+
185+if __name__ == '__main__':
186+ main()
187+