Merge lp:~vmiklos/bzr-fastimport/darcs into lp:~bzr/bzr-fastimport/fastimport.dev
- darcs
- Merge into fastimport.dev
Proposed by
Miklos Vajna
Status: | Merged | ||||||||
---|---|---|---|---|---|---|---|---|---|
Merged at revision: | not available | ||||||||
Proposed branch: | lp:~vmiklos/bzr-fastimport/darcs | ||||||||
Merge into: | lp:~bzr/bzr-fastimport/fastimport.dev | ||||||||
Diff against target: |
797 lines 4 files modified
exporters/darcs/darcs-fast-export (+359/-305) exporters/darcs/darcs-fast-export.txt (+4/-0) exporters/darcs/t/lib-httpd.sh (+67/-0) exporters/darcs/t/test2-git-http.sh (+22/-0) |
||||||||
To merge this branch: | bzr merge lp:~vmiklos/bzr-fastimport/darcs | ||||||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Ian Clatworthy | Approve | ||
Review via email: mp+13776@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Miklos Vajna (vmiklos) wrote : | # |
lp:~vmiklos/bzr-fastimport/darcs
updated
- 257. By Ian Clatworthy
-
Get fastimport working on non-chk repositories again for bzr versions after 2.0.0
- 258. By Ian Clatworthy
-
Handle multi-level branches
Revision history for this message
Ian Clatworthy (ian-clatworthy) : | # |
review:
Approve
lp:~vmiklos/bzr-fastimport/darcs
updated
- 259. By Ian Clatworthy
-
http read support for darcs-fast-export
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'exporters/darcs/darcs-fast-export' |
2 | --- exporters/darcs/darcs-fast-export 2009-09-23 22:09:43 +0000 |
3 | +++ exporters/darcs/darcs-fast-export 2009-10-22 10:35:17 +0000 |
4 | @@ -4,7 +4,7 @@ |
5 | |
6 | darcs-fast-export - darcs backend for fast data importers |
7 | |
8 | - Copyright (c) 2008 Miklos Vajna <vmiklos@frugalware.org> |
9 | + Copyright (c) 2008, 2009 Miklos Vajna <vmiklos@frugalware.org> |
10 | Copyright (c) 2008 Matthias Andree <matthias.andree@gmx.de> |
11 | |
12 | This program is free software; you can redistribute it and/or modify |
13 | @@ -33,312 +33,366 @@ |
14 | import subprocess |
15 | import optparse |
16 | import re |
17 | +import urllib |
18 | +import StringIO |
19 | |
20 | sys = reload(sys) |
21 | sys.setdefaultencoding("utf-8") |
22 | |
23 | -def __get_zone(): |
24 | - now = time.localtime() |
25 | - if time.daylight and now[-1]: |
26 | - offset = time.altzone |
27 | - else: |
28 | - offset = time.timezone |
29 | - hours, minutes = divmod(abs(offset), 3600) |
30 | - if offset > 0: |
31 | - sign = "-" |
32 | - else: |
33 | - sign = "+" |
34 | - return sign, hours, minutes |
35 | - |
36 | -def get_zone_str(): |
37 | - sign, hours, minutes = __get_zone() |
38 | - return "%s%02d%02d" % (sign, hours, minutes // 60) |
39 | - |
40 | -def get_zone_int(): |
41 | - sign, hours, minutes = __get_zone() |
42 | - ret = hours*3600+minutes*60 |
43 | - if sign == "-": |
44 | - ret *= -1 |
45 | - return ret |
46 | - |
47 | -def get_patchname(patch): |
48 | - ret = [] |
49 | - s = "" |
50 | - if patch.attributes['inverted'].value == 'True': |
51 | - s = "UNDO: " |
52 | - cs = patch.getElementsByTagName("name")[0].childNodes |
53 | - if cs.length > 0: |
54 | - ret.append(s + cs[0].data) |
55 | - lines = patch.getElementsByTagName("comment") |
56 | - if lines: |
57 | - for i in lines[0].childNodes[0].data.split('\n'): |
58 | - if not i.startswith("Ignore-this: "): |
59 | - ret.append(i) |
60 | - return "\n".join(ret).encode('utf-8') |
61 | - |
62 | -def get_author(patch): |
63 | - """darcs allows any freeform string, but fast-import has a more |
64 | - strict format, so fix up broken author names here.""" |
65 | - |
66 | - author = patch.attributes['author'].value |
67 | - if author in authormap: |
68 | - author = authormap[author] |
69 | - if not len(author): |
70 | - author = "darcs-fast-export <darcs-fast-export>" |
71 | - # add missing name |
72 | - elif not ">" in author: |
73 | - author = "%s <%s>" % (author.split('@')[0], author) |
74 | - # avoid double quoting |
75 | - elif author[0] == '"' and author[-1] == '"': |
76 | - author = author[1:-1] |
77 | - # name after email |
78 | - elif author[-1] != '>': |
79 | - author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1] |
80 | - return author.encode('utf-8') |
81 | - |
82 | -def get_date(patch): |
83 | - try: |
84 | - date = time.strptime(patch, "%Y%m%d%H%M%S") |
85 | - except ValueError: |
86 | - date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y') |
87 | - return int(time.mktime(date)) + get_zone_int() |
88 | - |
89 | -def progress(s): |
90 | - print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s) |
91 | - sys.stdout.flush() |
92 | - |
93 | -def log(s): |
94 | - logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)) |
95 | - logsock.flush() |
96 | - |
97 | -hashes = [] |
98 | -def parse_inventory(sock=None): |
99 | - prev = None |
100 | - nextprev = False |
101 | - buf = [] |
102 | - if not sock: |
103 | - sock = open(os.path.join("_darcs", "hashed_inventory")) |
104 | - for i in sock.readlines(): |
105 | - if i.startswith("hash"): |
106 | - buf.insert(0, i[6:-1]) |
107 | - if i.startswith("Starting with inventory:"): |
108 | - nextprev = True |
109 | - elif nextprev: |
110 | - prev = i[:-1] |
111 | - nextprev = False |
112 | - sock.close() |
113 | - for i in buf: |
114 | - hashes.insert(0, i) |
115 | - if prev: |
116 | - sock = gzip.open(os.path.join("_darcs", "inventories", prev)) |
117 | - parse_inventory(sock) |
118 | - |
119 | -# Option Parser |
120 | -usage="%prog [options] darcsrepo" |
121 | -opp = optparse.OptionParser(usage=usage) |
122 | -opp.add_option("--import-marks", metavar="IFILE", |
123 | - help="read state for incremental imports from IFILE") |
124 | -opp.add_option("--export-marks", metavar="OFILE", |
125 | - help="write state for incremental imports from OFILE") |
126 | -opp.add_option("--encoding", |
127 | - help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess") |
128 | -opp.add_option("--authors-file", metavar="F", |
129 | - help="read author transformations in old=new format from F") |
130 | -opp.add_option("--working", metavar="W", |
131 | - help="working directory which is removed at the end of non-incremental conversions") |
132 | -opp.add_option("--logfile", metavar="L", |
133 | - help="log file which contains the output of external programs invoked during the conversion") |
134 | -opp.add_option("--git-branch", metavar="B", |
135 | - help="git branch [default: refs/heads/master]") |
136 | -opp.add_option("--progress", metavar="P", |
137 | - help="insert progress statements after every n commit [default: 100]") |
138 | -(options, args) = opp.parse_args() |
139 | -if len(args) < 1: |
140 | - opp.error("darcsrepo required") |
141 | - |
142 | -export_marks = [] |
143 | -import_marks = [] |
144 | -if options.import_marks: |
145 | - sock = open(options.import_marks) |
146 | - for i in sock.readlines(): |
147 | - line = i.strip() |
148 | - if not len(line): |
149 | - continue |
150 | - import_marks.append(line.split(' ')[1]) |
151 | - export_marks.append(line) |
152 | - sock.close() |
153 | - |
154 | -# read author mapping file in gitauthors format, |
155 | -# i. e. in=out (one per # line) |
156 | -authormap = {} |
157 | -if options.authors_file: |
158 | - sock = open(options.authors_file) |
159 | - authormap = dict([i.strip().split('=',1) for i in sock]) |
160 | - sock.close() |
161 | - |
162 | -origin = os.path.abspath(args[0]) |
163 | -if options.working: |
164 | - working = os.path.abspath(options.working) |
165 | -else: |
166 | - working = "%s.darcs" % origin |
167 | -patchfile = "%s.patch" % origin |
168 | -if options.logfile: |
169 | - logfile = os.path.abspath(options.logfile) |
170 | -else: |
171 | - logfile = "%s.log" % origin |
172 | -logsock = open(logfile, "a") |
173 | -if options.git_branch: |
174 | - git_branch = options.git_branch |
175 | -else: |
176 | - git_branch = "refs/heads/master" |
177 | - |
178 | -if options.progress: |
179 | - prognum = int(options.progress) |
180 | -else: |
181 | - prognum = 100 |
182 | - |
183 | -progress("getting list of patches") |
184 | -if not len(import_marks): |
185 | - sock = os.popen("darcs changes --xml --reverse --repo %s" % origin) |
186 | -else: |
187 | - sock = os.popen("darcs changes --xml --reverse --repo %s --from-match 'hash %s'" % (origin, import_marks[-1])) |
188 | -buf = sock.read() |
189 | -sock.close() |
190 | -# this is hackish. we need to escape some bad chars, otherwise the xml |
191 | -# will not be valid |
192 | -buf = buf.replace('\x1b', '^[') |
193 | -if options.encoding: |
194 | - xmldoc = xml.dom.minidom.parseString(unicode(buf, options.encoding).encode('utf-8')) |
195 | -else: |
196 | - try: |
197 | - xmldoc = xml.dom.minidom.parseString(buf) |
198 | - except xml.parsers.expat.ExpatError: |
199 | +class Handler: |
200 | + def __init__(self): |
201 | + self.hashes = [] |
202 | + self.authormap = {} |
203 | + self.export_marks = [] |
204 | + self.import_marks = [] |
205 | + |
206 | + def __get_zone(self): |
207 | + now = time.localtime() |
208 | + if time.daylight and now[-1]: |
209 | + offset = time.altzone |
210 | + else: |
211 | + offset = time.timezone |
212 | + hours, minutes = divmod(abs(offset), 3600) |
213 | + if offset > 0: |
214 | + sign = "-" |
215 | + else: |
216 | + sign = "+" |
217 | + return sign, hours, minutes |
218 | + |
219 | + def get_zone_str(self): |
220 | + sign, hours, minutes = self.__get_zone() |
221 | + return "%s%02d%02d" % (sign, hours, minutes // 60) |
222 | + |
223 | + def get_zone_int(self): |
224 | + sign, hours, minutes = self.__get_zone() |
225 | + ret = hours*3600+minutes*60 |
226 | + if sign == "-": |
227 | + ret *= -1 |
228 | + return ret |
229 | + |
230 | + def get_patchname(self, patch): |
231 | + ret = [] |
232 | + s = "" |
233 | + if patch.attributes['inverted'].value == 'True': |
234 | + s = "UNDO: " |
235 | + cs = patch.getElementsByTagName("name")[0].childNodes |
236 | + if cs.length > 0: |
237 | + ret.append(s + cs[0].data) |
238 | + lines = patch.getElementsByTagName("comment") |
239 | + if lines: |
240 | + for i in lines[0].childNodes[0].data.split('\n'): |
241 | + if not i.startswith("Ignore-this: "): |
242 | + ret.append(i) |
243 | + return "\n".join(ret).encode('utf-8') |
244 | + |
245 | + def get_author(self, patch): |
246 | + """darcs allows any freeform string, but fast-import has a more |
247 | + strict format, so fix up broken author names here.""" |
248 | + |
249 | + author = patch.attributes['author'].value |
250 | + if author in self.authormap: |
251 | + author = self.authormap[author] |
252 | + if not len(author): |
253 | + author = "darcs-fast-export <darcs-fast-export>" |
254 | + # add missing name |
255 | + elif not ">" in author: |
256 | + author = "%s <%s>" % (author.split('@')[0], author) |
257 | + # avoid double quoting |
258 | + elif author[0] == '"' and author[-1] == '"': |
259 | + author = author[1:-1] |
260 | + # name after email |
261 | + elif author[-1] != '>': |
262 | + author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1] |
263 | + return author.encode('utf-8') |
264 | + |
265 | + def get_date(self, patch): |
266 | try: |
267 | - import chardet |
268 | - except ImportError: |
269 | - sys.exit("Error, encoding is not utf-8. Please " + |
270 | - "either specify it with the --encoding " + |
271 | - "option or install chardet.") |
272 | - progress("encoding is not utf8, guessing charset") |
273 | - encoding = chardet.detect(buf)['encoding'] |
274 | - progress("detected encoding is %s" % encoding) |
275 | - xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8')) |
276 | -sys.stdout.flush() |
277 | - |
278 | -darcs2 = False |
279 | -oldfashionedpatch = True |
280 | -cwd = os.getcwd() |
281 | -if os.path.exists(os.path.join(origin, "_darcs", "format")): |
282 | - sock = open(os.path.join(origin, "_darcs", "format")) |
283 | - format = [x.strip() for x in sock] |
284 | - sock.close() |
285 | - darcs2 = 'darcs-2' in format |
286 | - oldfashionedpatch = not 'hashed' in format |
287 | -if not oldfashionedpatch: |
288 | - progress("parsing the inventory") |
289 | - os.chdir(origin) |
290 | - parse_inventory() |
291 | -if not options.import_marks or not os.path.exists(working): |
292 | - # init the tmp darcs repo |
293 | - os.mkdir(working) |
294 | - os.chdir(working) |
295 | - if darcs2: |
296 | - os.system("darcs init --darcs-2") |
297 | - else: |
298 | - os.system("darcs init --old-fashioned-inventory") |
299 | -else: |
300 | - os.chdir(working) |
301 | -if options.import_marks: |
302 | - sock = os.popen("darcs pull -a --match 'hash %s' %s" % (import_marks[-1], origin)) |
303 | - log("Building/updating working directory:\n%s" % sock.read()) |
304 | - sock.close() |
305 | - |
306 | -# this is the number of the NEXT patch |
307 | -count = 1 |
308 | -patches = xmldoc.getElementsByTagName('patch') |
309 | -if len(import_marks): |
310 | - patches = patches[1:] |
311 | - count = len(import_marks) + 1 |
312 | -if len(export_marks): |
313 | - # this is the mark number of the NEXT patch |
314 | - markcount = int(export_marks[-1].split(' ')[0][1:]) + 1 |
315 | -else: |
316 | - markcount = count |
317 | -# this may be huge and we need it many times |
318 | -patchnum = len(patches) |
319 | - |
320 | -if not len(import_marks): |
321 | - progress("starting export, repo has %d patches" % patchnum) |
322 | -else: |
323 | - progress("continuing export, %d patches to convert" % patchnum) |
324 | -paths = [] |
325 | -for i in patches: |
326 | - # apply the patch |
327 | - hash = i.attributes['hash'].value |
328 | - buf = ["\nNew patches:\n"] |
329 | - if oldfashionedpatch: |
330 | - sock = gzip.open(os.path.join(origin, "_darcs", "patches", hash)) |
331 | - else: |
332 | - sock = gzip.open(os.path.join(origin, "_darcs", "patches", hashes[count-1])) |
333 | - buf.append(sock.read()) |
334 | - sock.close() |
335 | - sock = os.popen("darcs changes --context") |
336 | - buf.append(sock.read()) |
337 | - sock.close() |
338 | - sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
339 | - sock.stdin.write("".join(buf)) |
340 | - sock.stdin.close() |
341 | - log("Applying %s:\n%s" % (hash, sock.stdout.read())) |
342 | - sock.stdout.close() |
343 | - message = get_patchname(i) |
344 | - # export the commit |
345 | - print "commit %s" % git_branch |
346 | - print "mark :%s" % markcount |
347 | - if options.export_marks: |
348 | - export_marks.append(":%s %s" % (markcount, hash)) |
349 | - date = get_date(i.attributes['date'].value) |
350 | - print "committer %s %s %s" % (get_author(i), date, get_zone_str()) |
351 | - print "data %d\n%s" % (len(message), message) |
352 | - if markcount > 1: |
353 | - print "from :%s" % (markcount-1) |
354 | - # export the files |
355 | - for j in paths: |
356 | - print "D %s" % j |
357 | - paths = [] |
358 | - for (root, dirs, files) in os.walk ("."): |
359 | - for f in files: |
360 | - j = os.path.normpath(os.path.join(root, f)) |
361 | - if j.startswith("_darcs") or "-darcs-backup" in j: |
362 | - continue |
363 | - paths.append(j) |
364 | - sock = open(j) |
365 | - buf = sock.read() |
366 | - sock.close() |
367 | - # darcs does not track the executable bit :/ |
368 | - print "M 644 inline %s" % j |
369 | - print "data %s\n%s" % (len(buf), buf) |
370 | - if message[:4] == "TAG ": |
371 | - tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_') |
372 | - print "tag %s" % tag |
373 | - print "from :%s" % markcount |
374 | - print "tagger %s %s %s" % (get_author(i), date, get_zone_str()) |
375 | - print "data %d\n%s" % (len(message), message) |
376 | - if count % prognum == 0: |
377 | - progress("%d/%d patches" % (count, patchnum)) |
378 | - count += 1 |
379 | - markcount += 1 |
380 | - |
381 | -os.chdir(cwd) |
382 | - |
383 | -if not options.export_marks: |
384 | - shutil.rmtree(working) |
385 | -logsock.close() |
386 | - |
387 | -if options.export_marks: |
388 | - progress("writing export marks") |
389 | - sock = open(options.export_marks, 'w') |
390 | - sock.write("\n".join(export_marks)) |
391 | - sock.write("\n") |
392 | - sock.close() |
393 | - |
394 | -progress("finished") |
395 | + date = time.strptime(patch, "%Y%m%d%H%M%S") |
396 | + except ValueError: |
397 | + date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y') |
398 | + return int(time.mktime(date)) + self.get_zone_int() |
399 | + |
400 | + def progress(self, s): |
401 | + print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s) |
402 | + sys.stdout.flush() |
403 | + |
404 | + def log(self, s): |
405 | + self.logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)) |
406 | + self.logsock.flush() |
407 | + |
408 | + def parse_inventory(self, sock=None): |
409 | + prev = None |
410 | + nextprev = False |
411 | + buf = [] |
412 | + if not sock: |
413 | + sock = self.open(os.path.join(self.origin, "_darcs", "hashed_inventory")) |
414 | + for i in sock.readlines(): |
415 | + if i.startswith("hash"): |
416 | + buf.insert(0, i[6:-1]) |
417 | + if i.startswith("Starting with inventory:"): |
418 | + nextprev = True |
419 | + elif nextprev: |
420 | + prev = i[:-1] |
421 | + nextprev = False |
422 | + sock.close() |
423 | + for i in buf: |
424 | + self.hashes.insert(0, i) |
425 | + if prev: |
426 | + sock = self.gzip_open(os.path.join(self.origin, "_darcs", "inventories", prev)) |
427 | + self.parse_inventory(sock) |
428 | + |
429 | + # this is like gzip.open but supports urls as well |
430 | + def gzip_open(self, path): |
431 | + if os.path.exists(path): |
432 | + return gzip.open(path) |
433 | + buf = urllib.urlopen(path).read() |
434 | + sock = StringIO.StringIO(buf) |
435 | + return gzip.GzipFile(fileobj=sock) |
436 | + |
437 | + # this is like os.path.exists but supports urls as well |
438 | + def path_exists(self, path): |
439 | + if os.path.exists(path): |
440 | + return True |
441 | + else: |
442 | + return urllib.urlopen(path).getcode() == 200 |
443 | + |
444 | + # this is like open, but supports urls as well |
445 | + def open(self, path): |
446 | + if os.path.exists(path): |
447 | + return open(path) |
448 | + else: |
449 | + return urllib.urlopen(path) |
450 | + |
451 | + def handle_opts(self): |
452 | + # Option Parser |
453 | + usage="%prog [options] darcsrepo" |
454 | + opp = optparse.OptionParser(usage=usage) |
455 | + opp.add_option("--import-marks", metavar="IFILE", |
456 | + help="read state for incremental imports from IFILE") |
457 | + opp.add_option("--export-marks", metavar="OFILE", |
458 | + help="write state for incremental imports from OFILE") |
459 | + opp.add_option("--encoding", |
460 | + help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess") |
461 | + opp.add_option("--authors-file", metavar="F", |
462 | + help="read author transformations in old=new format from F") |
463 | + opp.add_option("--working", metavar="W", |
464 | + help="working directory which is removed at the end of non-incremental conversions") |
465 | + opp.add_option("--logfile", metavar="L", |
466 | + help="log file which contains the output of external programs invoked during the conversion") |
467 | + opp.add_option("--git-branch", metavar="B", |
468 | + help="git branch [default: refs/heads/master]") |
469 | + opp.add_option("--progress", metavar="P", |
470 | + help="insert progress statements after every n commit [default: 100]") |
471 | + (self.options, self.args) = opp.parse_args() |
472 | + if len(self.args) < 1: |
473 | + opp.error("darcsrepo required") |
474 | + |
475 | + # read author mapping file in gitauthors format, |
476 | + # i. e. in=out (one per # line) |
477 | + if self.options.authors_file: |
478 | + sock = open(self.options.authors_file) |
479 | + self.authormap = dict([i.strip().split('=',1) for i in sock]) |
480 | + sock.close() |
481 | + |
482 | + if "://" not in self.args[0]: |
483 | + self.origin = os.path.abspath(self.args[0]) |
484 | + else: |
485 | + self.origin = self.args[0].strip('/') |
486 | + if self.options.working: |
487 | + self.working = os.path.abspath(self.options.working) |
488 | + else: |
489 | + if "://" not in self.origin: |
490 | + self.working = "%s.darcs" % self.origin |
491 | + else: |
492 | + self.working = "%s.darcs" % os.path.split(self.origin)[-1] |
493 | + if self.options.logfile: |
494 | + logfile = os.path.abspath(self.options.logfile) |
495 | + else: |
496 | + if "://" not in self.origin: |
497 | + logfile = "%s.log" % self.origin |
498 | + else: |
499 | + logfile = "%s.log" % os.path.split(self.origin)[-1] |
500 | + self.logsock = open(logfile, "a") |
501 | + if self.options.git_branch: |
502 | + self.git_branch = self.options.git_branch |
503 | + else: |
504 | + self.git_branch = "refs/heads/master" |
505 | + |
506 | + if self.options.progress: |
507 | + self.prognum = int(self.options.progress) |
508 | + else: |
509 | + self.prognum = 100 |
510 | + |
511 | + def handle_import_marks(self): |
512 | + if self.options.import_marks: |
513 | + sock = open(self.options.import_marks) |
514 | + for i in sock.readlines(): |
515 | + line = i.strip() |
516 | + if not len(line): |
517 | + continue |
518 | + self.import_marks.append(line.split(' ')[1]) |
519 | + self.export_marks.append(line) |
520 | + sock.close() |
521 | + |
522 | + def get_patches(self): |
523 | + self.progress("getting list of patches") |
524 | + if not len(self.import_marks): |
525 | + sock = os.popen("darcs changes --xml --reverse --repo %s" % self.origin) |
526 | + else: |
527 | + sock = os.popen("darcs changes --xml --reverse --repo %s --from-match 'hash %s'" % (self.origin, self.import_marks[-1])) |
528 | + buf = sock.read() |
529 | + sock.close() |
530 | + # this is hackish. we need to escape some bad chars, otherwise the xml |
531 | + # will not be valid |
532 | + buf = buf.replace('\x1b', '^[') |
533 | + if self.options.encoding: |
534 | + xmldoc = xml.dom.minidom.parseString(unicode(buf, self.options.encoding).encode('utf-8')) |
535 | + else: |
536 | + try: |
537 | + xmldoc = xml.dom.minidom.parseString(buf) |
538 | + except xml.parsers.expat.ExpatError: |
539 | + try: |
540 | + import chardet |
541 | + except ImportError: |
542 | + sys.exit("Error, encoding is not utf-8. Please " + |
543 | + "either specify it with the --encoding " + |
544 | + "option or install chardet.") |
545 | + self.progress("encoding is not utf8, guessing charset") |
546 | + encoding = chardet.detect(buf)['encoding'] |
547 | + self.progress("detected encoding is %s" % encoding) |
548 | + xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8')) |
549 | + sys.stdout.flush() |
550 | + return xmldoc.getElementsByTagName('patch') |
551 | + |
552 | + def setup_workdir(self): |
553 | + darcs2 = False |
554 | + self.oldfashionedpatch = True |
555 | + self.cwd = os.getcwd() |
556 | + if self.path_exists(os.path.join(self.origin, "_darcs", "format")): |
557 | + sock = self.open(os.path.join(self.origin, "_darcs", "format")) |
558 | + format = [x.strip() for x in sock] |
559 | + sock.close() |
560 | + darcs2 = 'darcs-2' in format |
561 | + self.oldfashionedpatch = not 'hashed' in format |
562 | + if not self.oldfashionedpatch: |
563 | + self.progress("parsing the inventory") |
564 | + if "://" not in self.origin: |
565 | + os.chdir(self.origin) |
566 | + self.parse_inventory() |
567 | + if not self.options.import_marks or not os.path.exists(self.working): |
568 | + # init the tmp darcs repo |
569 | + os.mkdir(self.working) |
570 | + os.chdir(self.working) |
571 | + if darcs2: |
572 | + os.system("darcs init --darcs-2") |
573 | + else: |
574 | + os.system("darcs init --old-fashioned-inventory") |
575 | + else: |
576 | + os.chdir(self.working) |
577 | + if self.options.import_marks: |
578 | + sock = os.popen("darcs pull -a --match 'hash %s' %s" % (self.import_marks[-1], self.origin)) |
579 | + self.log("Building/updating working directory:\n%s" % sock.read()) |
580 | + sock.close() |
581 | + |
582 | + def export_patches(self): |
583 | + patches = self.get_patches() |
584 | + # this is the number of the NEXT patch |
585 | + count = 1 |
586 | + if len(self.import_marks): |
587 | + patches = patches[1:] |
588 | + count = len(self.import_marks) + 1 |
589 | + if len(self.export_marks): |
590 | + # this is the mark number of the NEXT patch |
591 | + markcount = int(self.export_marks[-1].split(' ')[0][1:]) + 1 |
592 | + else: |
593 | + markcount = count |
594 | + # this may be huge and we need it many times |
595 | + patchnum = len(patches) |
596 | + |
597 | + if not len(self.import_marks): |
598 | + self.progress("starting export, repo has %d patches" % patchnum) |
599 | + else: |
600 | + self.progress("continuing export, %d patches to convert" % patchnum) |
601 | + paths = [] |
602 | + for i in patches: |
603 | + # apply the patch |
604 | + hash = i.attributes['hash'].value |
605 | + buf = ["\nNew patches:\n"] |
606 | + if self.oldfashionedpatch: |
607 | + sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", hash)) |
608 | + else: |
609 | + sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", self.hashes[count-1])) |
610 | + buf.append(sock.read()) |
611 | + sock.close() |
612 | + sock = os.popen("darcs changes --context") |
613 | + buf.append(sock.read()) |
614 | + sock.close() |
615 | + sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
616 | + sock.stdin.write("".join(buf)) |
617 | + sock.stdin.close() |
618 | + self.log("Applying %s:\n%s" % (hash, sock.stdout.read())) |
619 | + sock.stdout.close() |
620 | + message = self.get_patchname(i) |
621 | + # export the commit |
622 | + print "commit %s" % self.git_branch |
623 | + print "mark :%s" % markcount |
624 | + if self.options.export_marks: |
625 | + self.export_marks.append(":%s %s" % (markcount, hash)) |
626 | + date = self.get_date(i.attributes['date'].value) |
627 | + print "committer %s %s %s" % (self.get_author(i), date, self.get_zone_str()) |
628 | + print "data %d\n%s" % (len(message), message) |
629 | + if markcount > 1: |
630 | + print "from :%s" % (markcount-1) |
631 | + # export the files |
632 | + for j in paths: |
633 | + print "D %s" % j |
634 | + paths = [] |
635 | + for (root, dirs, files) in os.walk ("."): |
636 | + for f in files: |
637 | + j = os.path.normpath(os.path.join(root, f)) |
638 | + if j.startswith("_darcs") or "-darcs-backup" in j: |
639 | + continue |
640 | + paths.append(j) |
641 | + sock = open(j) |
642 | + buf = sock.read() |
643 | + sock.close() |
644 | + # darcs does not track the executable bit :/ |
645 | + print "M 644 inline %s" % j |
646 | + print "data %s\n%s" % (len(buf), buf) |
647 | + if message[:4] == "TAG ": |
648 | + tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_') |
649 | + print "tag %s" % tag |
650 | + print "from :%s" % markcount |
651 | + print "tagger %s %s %s" % (self.get_author(i), date, self.get_zone_str()) |
652 | + print "data %d\n%s" % (len(message), message) |
653 | + if count % self.prognum == 0: |
654 | + self.progress("%d/%d patches" % (count, patchnum)) |
655 | + count += 1 |
656 | + markcount += 1 |
657 | + |
658 | + os.chdir(self.cwd) |
659 | + |
660 | + if not self.options.export_marks: |
661 | + shutil.rmtree(self.working) |
662 | + self.logsock.close() |
663 | + |
664 | + def handle_export_marks(self): |
665 | + if self.options.export_marks: |
666 | + self.progress("writing export marks") |
667 | + sock = open(self.options.export_marks, 'w') |
668 | + sock.write("\n".join(self.export_marks)) |
669 | + sock.write("\n") |
670 | + sock.close() |
671 | + |
672 | + self.progress("finished") |
673 | + |
674 | + def handle(self): |
675 | + self.handle_opts() |
676 | + self.handle_import_marks() |
677 | + self.setup_workdir() |
678 | + self.export_patches() |
679 | + self.handle_export_marks() |
680 | + |
681 | +if __name__ == "__main__": |
682 | + h = Handler() |
683 | + h.handle() |
684 | |
685 | === modified file 'exporters/darcs/darcs-fast-export.txt' |
686 | --- exporters/darcs/darcs-fast-export.txt 2009-08-10 22:20:43 +0000 |
687 | +++ exporters/darcs/darcs-fast-export.txt 2009-10-22 10:35:17 +0000 |
688 | @@ -18,6 +18,10 @@ |
689 | repository. It supports incremental conversion as well, via the |
690 | --import-marks / --export-marks switches. |
691 | |
692 | +Optionally the darcsrepo string may be a HTTP repository, in that case |
693 | +only the patches are downloaded, not the pristine, speeding up a |
694 | +one-time import. |
695 | + |
696 | == OPTIONS |
697 | |
698 | -h, --help:: |
699 | |
700 | === added file 'exporters/darcs/t/lib-httpd.sh' |
701 | --- exporters/darcs/t/lib-httpd.sh 1970-01-01 00:00:00 +0000 |
702 | +++ exporters/darcs/t/lib-httpd.sh 2009-10-22 10:35:18 +0000 |
703 | @@ -0,0 +1,67 @@ |
704 | +#!/bin/sh |
705 | +# |
706 | +# This is based on git's t/lib-httpd.sh, which is |
707 | +# Copyright (c) 2008 Clemens Buchacher <drizzd@aon.at> |
708 | +# |
709 | + |
710 | +if test -n "$DFE_TEST_SKIP_HTTPD" |
711 | +then |
712 | + echo "skipping test (undef DFE_TEST_SKIP_HTTPD to enable)" |
713 | + exit |
714 | +fi |
715 | + |
716 | +LIB_HTTPD_PATH=${LIB_HTTPD_PATH-'/usr/sbin/httpd'} |
717 | +LIB_HTTPD_PORT=${LIB_HTTPD_PORT-'8111'} |
718 | + |
719 | +HTTPD_ROOT_PATH="$PWD"/httpd |
720 | +HTTPD_DOCUMENT_ROOT_PATH=$HTTPD_ROOT_PATH/www |
721 | + |
722 | +if ! test -x "$LIB_HTTPD_PATH" |
723 | +then |
724 | + echo "skipping test, no web server found at '$LIB_HTTPD_PATH'" |
725 | + exit |
726 | +fi |
727 | + |
728 | +HTTPD_VERSION=`$LIB_HTTPD_PATH -v | \ |
729 | + sed -n 's/^Server version: Apache\/\([0-9]*\)\..*$/\1/p; q'` |
730 | + |
731 | +if test -n "$HTTPD_VERSION" |
732 | +then |
733 | + if test -z "$LIB_HTTPD_MODULE_PATH" |
734 | + then |
735 | + if ! test $HTTPD_VERSION -ge 2 |
736 | + then |
737 | + echo "skipping test, at least Apache version 2 is required" |
738 | + exit |
739 | + fi |
740 | + |
741 | + LIB_HTTPD_MODULE_PATH='/usr/lib/apache' |
742 | + fi |
743 | +else |
744 | + error "Could not identify web server at '$LIB_HTTPD_PATH'" |
745 | +fi |
746 | + |
747 | +HTTPD_PARA="-d $HTTPD_ROOT_PATH -f $HTTPD_ROOT_PATH/apache.conf" |
748 | + |
749 | +prepare_httpd() { |
750 | + mkdir -p $HTTPD_DOCUMENT_ROOT_PATH |
751 | + |
752 | + ln -s $LIB_HTTPD_MODULE_PATH $HTTPD_ROOT_PATH/modules |
753 | + |
754 | + echo "PidFile httpd.pid" > $HTTPD_ROOT_PATH/apache.conf |
755 | + echo "DocumentRoot www" >> $HTTPD_ROOT_PATH/apache.conf |
756 | + echo "ErrorLog error.log" >> $HTTPD_ROOT_PATH/apache.conf |
757 | + |
758 | + HTTPD_URL=http://127.0.0.1:$LIB_HTTPD_PORT |
759 | +} |
760 | + |
761 | +start_httpd() { |
762 | + prepare_httpd |
763 | + |
764 | + "$LIB_HTTPD_PATH" $HTTPD_PARA \ |
765 | + -c "Listen 127.0.0.1:$LIB_HTTPD_PORT" -k start |
766 | +} |
767 | + |
768 | +stop_httpd() { |
769 | + "$LIB_HTTPD_PATH" $HTTPD_PARA -k stop |
770 | +} |
771 | |
772 | === added file 'exporters/darcs/t/test2-git-http.sh' |
773 | --- exporters/darcs/t/test2-git-http.sh 1970-01-01 00:00:00 +0000 |
774 | +++ exporters/darcs/t/test2-git-http.sh 2009-10-22 10:35:18 +0000 |
775 | @@ -0,0 +1,22 @@ |
776 | +. ./lib.sh |
777 | +. ./lib-httpd.sh |
778 | + |
779 | +rm -rf test2.darcs test2.git httpd |
780 | +create_darcs test2 --darcs-2 |
781 | +mkdir -p $HTTPD_DOCUMENT_ROOT_PATH |
782 | +mv -v test2 $HTTPD_DOCUMENT_ROOT_PATH |
783 | +ln -s $HTTPD_DOCUMENT_ROOT_PATH/test2 . |
784 | + |
785 | +mkdir test2.git |
786 | +cd test2.git |
787 | +git --bare init |
788 | +cd .. |
789 | +start_httpd |
790 | +darcs-fast-export $HTTPD_URL/test2 |(cd test2.git; git fast-import) |
791 | +ret=$? |
792 | +stop_httpd |
793 | +if [ $ret != 0 ]; then |
794 | + exit $ret |
795 | +fi |
796 | +diff_git test2 |
797 | +exit $? |
The external change is the http read support (the relevant testcase was a real challenge ;) ), but internal one is a massive refactoring to a Python class.