Merge lp:~blr/launchpad/bug-1472045-demangle-inlinecomment-mail into lp:launchpad
- bug-1472045-demangle-inlinecomment-mail
- Merge into devel
Proposed by
Kit Randel
Status: | Merged | ||||
---|---|---|---|---|---|
Approved by: | Kit Randel | ||||
Approved revision: | no longer in the source branch. | ||||
Merged at revision: | 17614 | ||||
Proposed branch: | lp:~blr/launchpad/bug-1472045-demangle-inlinecomment-mail | ||||
Merge into: | lp:launchpad | ||||
Diff against target: |
735 lines (+611/-18) 3 files modified
lib/lp/code/mail/codereviewcomment.py (+1/-1) lib/lp/code/mail/patches.py (+513/-0) lib/lp/code/mail/tests/test_codereviewcomment.py (+97/-17) |
||||
To merge this branch: | bzr merge lp:~blr/launchpad/bug-1472045-demangle-inlinecomment-mail | ||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
William Grant | code | Approve | |
Review via email: mp+264105@code.launchpad.net |
Commit message
Ensure blank lines and git dirty headers (diff, index) are added to dirty_head.
Description of the change
To post a comment you must log in.
Revision history for this message
Kit Randel (blr) wrote : | # |
Revision history for this message
Kit Randel (blr) wrote : | # |
Reverted the blank line handling in codereviewcomment, will attempt to handle in patches.
Revision history for this message
Kit Randel (blr) wrote : | # |
Have cloned a local copy of bzrlib.patches with support for parsing dirty headers from git, and respecting blank lines preceding dirty headers.
Have added an additional test which ensures blank lines within hunks are still handled appropriately.
While this code could (and perhaps should) be refactored to handle all general cases of dirty headers, this does appear to work and will fix the broken behaviour in production.
Revision history for this message
William Grant (wgrant) : | # |
review:
Approve
(code)
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'lib/lp/code/mail/codereviewcomment.py' | |||
2 | --- lib/lp/code/mail/codereviewcomment.py 2015-07-07 05:32:11 +0000 | |||
3 | +++ lib/lp/code/mail/codereviewcomment.py 2015-07-09 05:40:58 +0000 | |||
4 | @@ -10,7 +10,6 @@ | |||
5 | 10 | 'CodeReviewCommentMailer', | 10 | 'CodeReviewCommentMailer', |
6 | 11 | ] | 11 | ] |
7 | 12 | 12 | ||
8 | 13 | from bzrlib import patches | ||
9 | 14 | from zope.component import getUtility | 13 | from zope.component import getUtility |
10 | 15 | from zope.security.proxy import removeSecurityProxy | 14 | from zope.security.proxy import removeSecurityProxy |
11 | 16 | 15 | ||
12 | @@ -21,6 +20,7 @@ | |||
13 | 21 | from lp.code.interfaces.codereviewinlinecomment import ( | 20 | from lp.code.interfaces.codereviewinlinecomment import ( |
14 | 22 | ICodeReviewInlineCommentSet, | 21 | ICodeReviewInlineCommentSet, |
15 | 23 | ) | 22 | ) |
16 | 23 | from lp.code.mail import patches | ||
17 | 24 | from lp.code.mail.branchmergeproposal import BMPMailer | 24 | from lp.code.mail.branchmergeproposal import BMPMailer |
18 | 25 | from lp.services.mail.sendmail import ( | 25 | from lp.services.mail.sendmail import ( |
19 | 26 | append_footer, | 26 | append_footer, |
20 | 27 | 27 | ||
21 | === added file 'lib/lp/code/mail/patches.py' | |||
22 | --- lib/lp/code/mail/patches.py 1970-01-01 00:00:00 +0000 | |||
23 | +++ lib/lp/code/mail/patches.py 2015-07-09 05:40:58 +0000 | |||
24 | @@ -0,0 +1,513 @@ | |||
25 | 1 | # This file was cloned from bzr-2.6.0-lp-3 (bzrlib.patches) and | ||
26 | 2 | # customised for LP. | ||
27 | 3 | # | ||
28 | 4 | # Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd | ||
29 | 5 | # <aaron.bentley@utoronto.ca> | ||
30 | 6 | # | ||
31 | 7 | # This program is free software; you can redistribute it and/or modify | ||
32 | 8 | # it under the terms of the GNU General Public License as published by | ||
33 | 9 | # the Free Software Foundation; either version 2 of the License, or | ||
34 | 10 | # (at your option) any later version. | ||
35 | 11 | # | ||
36 | 12 | # This program is distributed in the hope that it will be useful, | ||
37 | 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
38 | 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
39 | 15 | # GNU General Public License for more details. | ||
40 | 16 | # | ||
41 | 17 | # You should have received a copy of the GNU General Public License | ||
42 | 18 | # along with this program; if not, write to the Free Software | ||
43 | 19 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
44 | 20 | |||
45 | 21 | from __future__ import absolute_import | ||
46 | 22 | |||
47 | 23 | from bzrlib.errors import ( | ||
48 | 24 | BinaryFiles, | ||
49 | 25 | MalformedHunkHeader, | ||
50 | 26 | MalformedLine, | ||
51 | 27 | MalformedPatchHeader, | ||
52 | 28 | PatchConflict, | ||
53 | 29 | PatchSyntax, | ||
54 | 30 | ) | ||
55 | 31 | |||
56 | 32 | import re | ||
57 | 33 | |||
58 | 34 | |||
59 | 35 | binary_files_re = 'Binary files (.*) and (.*) differ\n' | ||
60 | 36 | |||
61 | 37 | |||
62 | 38 | def get_patch_names(iter_lines): | ||
63 | 39 | line = iter_lines.next() | ||
64 | 40 | try: | ||
65 | 41 | match = re.match(binary_files_re, line) | ||
66 | 42 | if match is not None: | ||
67 | 43 | raise BinaryFiles(match.group(1), match.group(2)) | ||
68 | 44 | if not line.startswith("--- "): | ||
69 | 45 | raise MalformedPatchHeader("No orig name", line) | ||
70 | 46 | else: | ||
71 | 47 | orig_name = line[4:].rstrip("\n") | ||
72 | 48 | except StopIteration: | ||
73 | 49 | raise MalformedPatchHeader("No orig line", "") | ||
74 | 50 | try: | ||
75 | 51 | line = iter_lines.next() | ||
76 | 52 | if not line.startswith("+++ "): | ||
77 | 53 | raise PatchSyntax("No mod name") | ||
78 | 54 | else: | ||
79 | 55 | mod_name = line[4:].rstrip("\n") | ||
80 | 56 | except StopIteration: | ||
81 | 57 | raise MalformedPatchHeader("No mod line", "") | ||
82 | 58 | return (orig_name, mod_name) | ||
83 | 59 | |||
84 | 60 | |||
85 | 61 | def parse_range(textrange): | ||
86 | 62 | """Parse a patch range, handling the "1" special-case | ||
87 | 63 | |||
88 | 64 | :param textrange: The text to parse | ||
89 | 65 | :type textrange: str | ||
90 | 66 | :return: the position and range, as a tuple | ||
91 | 67 | :rtype: (int, int) | ||
92 | 68 | """ | ||
93 | 69 | tmp = textrange.split(',') | ||
94 | 70 | if len(tmp) == 1: | ||
95 | 71 | pos = tmp[0] | ||
96 | 72 | range = "1" | ||
97 | 73 | else: | ||
98 | 74 | (pos, range) = tmp | ||
99 | 75 | pos = int(pos) | ||
100 | 76 | range = int(range) | ||
101 | 77 | return (pos, range) | ||
102 | 78 | |||
103 | 79 | |||
104 | 80 | def hunk_from_header(line): | ||
105 | 81 | import re | ||
106 | 82 | matches = re.match(r'\@\@ ([^@]*) \@\@( (.*))?\n', line) | ||
107 | 83 | if matches is None: | ||
108 | 84 | raise MalformedHunkHeader("Does not match format.", line) | ||
109 | 85 | try: | ||
110 | 86 | (orig, mod) = matches.group(1).split(" ") | ||
111 | 87 | except (ValueError, IndexError), e: | ||
112 | 88 | raise MalformedHunkHeader(str(e), line) | ||
113 | 89 | if not orig.startswith('-') or not mod.startswith('+'): | ||
114 | 90 | raise MalformedHunkHeader("Positions don't start with + or -.", line) | ||
115 | 91 | try: | ||
116 | 92 | (orig_pos, orig_range) = parse_range(orig[1:]) | ||
117 | 93 | (mod_pos, mod_range) = parse_range(mod[1:]) | ||
118 | 94 | except (ValueError, IndexError), e: | ||
119 | 95 | raise MalformedHunkHeader(str(e), line) | ||
120 | 96 | if mod_range < 0 or orig_range < 0: | ||
121 | 97 | raise MalformedHunkHeader("Hunk range is negative", line) | ||
122 | 98 | tail = matches.group(3) | ||
123 | 99 | return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail) | ||
124 | 100 | |||
125 | 101 | |||
126 | 102 | class HunkLine: | ||
127 | 103 | def __init__(self, contents): | ||
128 | 104 | self.contents = contents | ||
129 | 105 | |||
130 | 106 | def get_str(self, leadchar): | ||
131 | 107 | if self.contents == "\n" and leadchar == " " and False: | ||
132 | 108 | return "\n" | ||
133 | 109 | if not self.contents.endswith('\n'): | ||
134 | 110 | terminator = '\n' + NO_NL | ||
135 | 111 | else: | ||
136 | 112 | terminator = '' | ||
137 | 113 | return leadchar + self.contents + terminator | ||
138 | 114 | |||
139 | 115 | |||
140 | 116 | class ContextLine(HunkLine): | ||
141 | 117 | def __init__(self, contents): | ||
142 | 118 | HunkLine.__init__(self, contents) | ||
143 | 119 | |||
144 | 120 | def __str__(self): | ||
145 | 121 | return self.get_str(" ") | ||
146 | 122 | |||
147 | 123 | |||
148 | 124 | class InsertLine(HunkLine): | ||
149 | 125 | def __init__(self, contents): | ||
150 | 126 | HunkLine.__init__(self, contents) | ||
151 | 127 | |||
152 | 128 | def __str__(self): | ||
153 | 129 | return self.get_str("+") | ||
154 | 130 | |||
155 | 131 | |||
156 | 132 | class RemoveLine(HunkLine): | ||
157 | 133 | def __init__(self, contents): | ||
158 | 134 | HunkLine.__init__(self, contents) | ||
159 | 135 | |||
160 | 136 | def __str__(self): | ||
161 | 137 | return self.get_str("-") | ||
162 | 138 | |||
163 | 139 | NO_NL = '\\ No newline at end of file\n' | ||
164 | 140 | __pychecker__ = "no-returnvalues" | ||
165 | 141 | |||
166 | 142 | |||
167 | 143 | def parse_line(line): | ||
168 | 144 | if line.startswith("\n"): | ||
169 | 145 | return ContextLine(line) | ||
170 | 146 | elif line.startswith(" "): | ||
171 | 147 | return ContextLine(line[1:]) | ||
172 | 148 | elif line.startswith("+"): | ||
173 | 149 | return InsertLine(line[1:]) | ||
174 | 150 | elif line.startswith("-"): | ||
175 | 151 | return RemoveLine(line[1:]) | ||
176 | 152 | else: | ||
177 | 153 | raise MalformedLine("Unknown line type", line) | ||
178 | 154 | __pychecker__ = "" | ||
179 | 155 | |||
180 | 156 | |||
181 | 157 | class Hunk: | ||
182 | 158 | def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None): | ||
183 | 159 | self.orig_pos = orig_pos | ||
184 | 160 | self.orig_range = orig_range | ||
185 | 161 | self.mod_pos = mod_pos | ||
186 | 162 | self.mod_range = mod_range | ||
187 | 163 | self.tail = tail | ||
188 | 164 | self.lines = [] | ||
189 | 165 | |||
190 | 166 | def get_header(self): | ||
191 | 167 | if self.tail is None: | ||
192 | 168 | tail_str = '' | ||
193 | 169 | else: | ||
194 | 170 | tail_str = ' ' + self.tail | ||
195 | 171 | return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos, | ||
196 | 172 | self.orig_range), | ||
197 | 173 | self.range_str(self.mod_pos, | ||
198 | 174 | self.mod_range), | ||
199 | 175 | tail_str) | ||
200 | 176 | |||
201 | 177 | def range_str(self, pos, range): | ||
202 | 178 | """Return a file range, special-casing for 1-line files. | ||
203 | 179 | |||
204 | 180 | :param pos: The position in the file | ||
205 | 181 | :type pos: int | ||
206 | 182 | :range: The range in the file | ||
207 | 183 | :type range: int | ||
208 | 184 | :return: a string in the format 1,4 except when range == pos == 1 | ||
209 | 185 | """ | ||
210 | 186 | if range == 1: | ||
211 | 187 | return "%i" % pos | ||
212 | 188 | else: | ||
213 | 189 | return "%i,%i" % (pos, range) | ||
214 | 190 | |||
215 | 191 | def __str__(self): | ||
216 | 192 | lines = [self.get_header()] | ||
217 | 193 | for line in self.lines: | ||
218 | 194 | lines.append(str(line)) | ||
219 | 195 | return "".join(lines) | ||
220 | 196 | |||
221 | 197 | def shift_to_mod(self, pos): | ||
222 | 198 | if pos < self.orig_pos - 1: | ||
223 | 199 | return 0 | ||
224 | 200 | elif pos > self.orig_pos + self.orig_range: | ||
225 | 201 | return self.mod_range - self.orig_range | ||
226 | 202 | else: | ||
227 | 203 | return self.shift_to_mod_lines(pos) | ||
228 | 204 | |||
229 | 205 | def shift_to_mod_lines(self, pos): | ||
230 | 206 | position = self.orig_pos - 1 | ||
231 | 207 | shift = 0 | ||
232 | 208 | for line in self.lines: | ||
233 | 209 | if isinstance(line, InsertLine): | ||
234 | 210 | shift += 1 | ||
235 | 211 | elif isinstance(line, RemoveLine): | ||
236 | 212 | if position == pos: | ||
237 | 213 | return None | ||
238 | 214 | shift -= 1 | ||
239 | 215 | position += 1 | ||
240 | 216 | elif isinstance(line, ContextLine): | ||
241 | 217 | position += 1 | ||
242 | 218 | if position > pos: | ||
243 | 219 | break | ||
244 | 220 | return shift | ||
245 | 221 | |||
246 | 222 | |||
247 | 223 | def iter_hunks(iter_lines, allow_dirty=False): | ||
248 | 224 | ''' | ||
249 | 225 | :arg iter_lines: iterable of lines to parse for hunks | ||
250 | 226 | :kwarg allow_dirty: If True, when we encounter something that is not | ||
251 | 227 | a hunk header when we're looking for one, assume the rest of the lines | ||
252 | 228 | are not part of the patch (comments or other junk). Default False | ||
253 | 229 | ''' | ||
254 | 230 | hunk = None | ||
255 | 231 | for line in iter_lines: | ||
256 | 232 | if line == "\n": | ||
257 | 233 | if hunk is not None: | ||
258 | 234 | yield hunk | ||
259 | 235 | hunk = None | ||
260 | 236 | continue | ||
261 | 237 | if hunk is not None: | ||
262 | 238 | yield hunk | ||
263 | 239 | try: | ||
264 | 240 | hunk = hunk_from_header(line) | ||
265 | 241 | except MalformedHunkHeader: | ||
266 | 242 | if allow_dirty: | ||
267 | 243 | # If the line isn't a hunk header, then we've reached the end | ||
268 | 244 | # of this patch and there's "junk" at the end. Ignore the | ||
269 | 245 | # rest of this patch. | ||
270 | 246 | return | ||
271 | 247 | raise | ||
272 | 248 | orig_size = 0 | ||
273 | 249 | mod_size = 0 | ||
274 | 250 | while orig_size < hunk.orig_range or mod_size < hunk.mod_range: | ||
275 | 251 | hunk_line = parse_line(iter_lines.next()) | ||
276 | 252 | hunk.lines.append(hunk_line) | ||
277 | 253 | if isinstance(hunk_line, (RemoveLine, ContextLine)): | ||
278 | 254 | orig_size += 1 | ||
279 | 255 | if isinstance(hunk_line, (InsertLine, ContextLine)): | ||
280 | 256 | mod_size += 1 | ||
281 | 257 | if hunk is not None: | ||
282 | 258 | yield hunk | ||
283 | 259 | |||
284 | 260 | |||
285 | 261 | class BinaryPatch(object): | ||
286 | 262 | def __init__(self, oldname, newname): | ||
287 | 263 | self.oldname = oldname | ||
288 | 264 | self.newname = newname | ||
289 | 265 | |||
290 | 266 | def __str__(self): | ||
291 | 267 | return 'Binary files %s and %s differ\n' % (self.oldname, self.newname) | ||
292 | 268 | |||
293 | 269 | |||
294 | 270 | class Patch(BinaryPatch): | ||
295 | 271 | |||
296 | 272 | def __init__(self, oldname, newname): | ||
297 | 273 | BinaryPatch.__init__(self, oldname, newname) | ||
298 | 274 | self.hunks = [] | ||
299 | 275 | |||
300 | 276 | def __str__(self): | ||
301 | 277 | ret = self.get_header() | ||
302 | 278 | ret += "".join([str(h) for h in self.hunks]) | ||
303 | 279 | return ret | ||
304 | 280 | |||
305 | 281 | def get_header(self): | ||
306 | 282 | return "--- %s\n+++ %s\n" % (self.oldname, self.newname) | ||
307 | 283 | |||
308 | 284 | def stats_values(self): | ||
309 | 285 | """Calculate the number of inserts and removes.""" | ||
310 | 286 | removes = 0 | ||
311 | 287 | inserts = 0 | ||
312 | 288 | for hunk in self.hunks: | ||
313 | 289 | for line in hunk.lines: | ||
314 | 290 | if isinstance(line, InsertLine): | ||
315 | 291 | inserts += 1 | ||
316 | 292 | elif isinstance(line, RemoveLine): | ||
317 | 293 | removes += 1 | ||
318 | 294 | return (inserts, removes, len(self.hunks)) | ||
319 | 295 | |||
320 | 296 | def stats_str(self): | ||
321 | 297 | """Return a string of patch statistics""" | ||
322 | 298 | return "%i inserts, %i removes in %i hunks" % \ | ||
323 | 299 | self.stats_values() | ||
324 | 300 | |||
325 | 301 | def pos_in_mod(self, position): | ||
326 | 302 | newpos = position | ||
327 | 303 | for hunk in self.hunks: | ||
328 | 304 | shift = hunk.shift_to_mod(position) | ||
329 | 305 | if shift is None: | ||
330 | 306 | return None | ||
331 | 307 | newpos += shift | ||
332 | 308 | return newpos | ||
333 | 309 | |||
334 | 310 | def iter_inserted(self): | ||
335 | 311 | """Iteraties through inserted lines | ||
336 | 312 | |||
337 | 313 | :return: Pair of line number, line | ||
338 | 314 | :rtype: iterator of (int, InsertLine) | ||
339 | 315 | """ | ||
340 | 316 | for hunk in self.hunks: | ||
341 | 317 | pos = hunk.mod_pos - 1 | ||
342 | 318 | for line in hunk.lines: | ||
343 | 319 | if isinstance(line, InsertLine): | ||
344 | 320 | yield (pos, line) | ||
345 | 321 | pos += 1 | ||
346 | 322 | if isinstance(line, ContextLine): | ||
347 | 323 | pos += 1 | ||
348 | 324 | |||
349 | 325 | |||
350 | 326 | def parse_patch(iter_lines, allow_dirty=False): | ||
351 | 327 | ''' | ||
352 | 328 | :arg iter_lines: iterable of lines to parse | ||
353 | 329 | :kwarg allow_dirty: If True, allow the patch to have trailing junk. | ||
354 | 330 | Default False | ||
355 | 331 | ''' | ||
356 | 332 | iter_lines = iter_lines_handle_nl(iter_lines) | ||
357 | 333 | try: | ||
358 | 334 | (orig_name, mod_name) = get_patch_names(iter_lines) | ||
359 | 335 | except BinaryFiles, e: | ||
360 | 336 | return BinaryPatch(e.orig_name, e.mod_name) | ||
361 | 337 | else: | ||
362 | 338 | patch = Patch(orig_name, mod_name) | ||
363 | 339 | for hunk in iter_hunks(iter_lines, allow_dirty): | ||
364 | 340 | patch.hunks.append(hunk) | ||
365 | 341 | return patch | ||
366 | 342 | |||
367 | 343 | |||
368 | 344 | def iter_file_patch(iter_lines, allow_dirty=False, keep_dirty=False): | ||
369 | 345 | ''' | ||
370 | 346 | :arg iter_lines: iterable of lines to parse for patches | ||
371 | 347 | :kwarg allow_dirty: If True, allow comments and other non-patch text | ||
372 | 348 | before the first patch. Note that the algorithm here can only find | ||
373 | 349 | such text before any patches have been found. Comments after the | ||
374 | 350 | first patch are stripped away in iter_hunks() if it is also passed | ||
375 | 351 | allow_dirty=True. Default False. | ||
376 | 352 | ''' | ||
377 | 353 | # FIXME: Docstring is not quite true. We allow certain comments no | ||
378 | 354 | # matter what, If they startwith '===', '***', or '#' Someone should | ||
379 | 355 | # reexamine this logic and decide if we should include those in | ||
380 | 356 | # allow_dirty or restrict those to only being before the patch is found | ||
381 | 357 | # (as allow_dirty does). | ||
382 | 358 | regex = re.compile(binary_files_re) | ||
383 | 359 | saved_lines = [] | ||
384 | 360 | dirty_head = [] | ||
385 | 361 | orig_range = 0 | ||
386 | 362 | beginning = True | ||
387 | 363 | |||
388 | 364 | dirty_headers = ('=== ', 'diff ', 'index ') | ||
389 | 365 | for line in iter_lines: | ||
390 | 366 | # preserve bzr modified/added headers and blank lines | ||
391 | 367 | if line.startswith(dirty_headers) or not line.strip('\n'): | ||
392 | 368 | if len(saved_lines) > 0: | ||
393 | 369 | if keep_dirty and len(dirty_head) > 0: | ||
394 | 370 | yield {'saved_lines': saved_lines, | ||
395 | 371 | 'dirty_head': dirty_head} | ||
396 | 372 | dirty_head = [] | ||
397 | 373 | else: | ||
398 | 374 | yield saved_lines | ||
399 | 375 | saved_lines = [] | ||
400 | 376 | dirty_head.append(line) | ||
401 | 377 | continue | ||
402 | 378 | if line.startswith('*** '): | ||
403 | 379 | continue | ||
404 | 380 | if line.startswith('#'): | ||
405 | 381 | continue | ||
406 | 382 | elif orig_range > 0: | ||
407 | 383 | if line.startswith('-') or line.startswith(' '): | ||
408 | 384 | orig_range -= 1 | ||
409 | 385 | elif line.startswith('--- ') or regex.match(line): | ||
410 | 386 | if allow_dirty and beginning: | ||
411 | 387 | # Patches can have "junk" at the beginning | ||
412 | 388 | # Stripping junk from the end of patches is handled when we | ||
413 | 389 | # parse the patch | ||
414 | 390 | beginning = False | ||
415 | 391 | elif len(saved_lines) > 0: | ||
416 | 392 | if keep_dirty and len(dirty_head) > 0: | ||
417 | 393 | yield {'saved_lines': saved_lines, | ||
418 | 394 | 'dirty_head': dirty_head} | ||
419 | 395 | dirty_head = [] | ||
420 | 396 | else: | ||
421 | 397 | yield saved_lines | ||
422 | 398 | saved_lines = [] | ||
423 | 399 | elif line.startswith('@@'): | ||
424 | 400 | hunk = hunk_from_header(line) | ||
425 | 401 | orig_range = hunk.orig_range | ||
426 | 402 | saved_lines.append(line) | ||
427 | 403 | if len(saved_lines) > 0: | ||
428 | 404 | if keep_dirty and len(dirty_head) > 0: | ||
429 | 405 | yield {'saved_lines': saved_lines, | ||
430 | 406 | 'dirty_head': dirty_head} | ||
431 | 407 | else: | ||
432 | 408 | yield saved_lines | ||
433 | 409 | |||
434 | 410 | |||
435 | 411 | def iter_lines_handle_nl(iter_lines): | ||
436 | 412 | """ | ||
437 | 413 | Iterates through lines, ensuring that lines that originally had no | ||
438 | 414 | terminating \n are produced without one. This transformation may be | ||
439 | 415 | applied at any point up until hunk line parsing, and is safe to apply | ||
440 | 416 | repeatedly. | ||
441 | 417 | """ | ||
442 | 418 | last_line = None | ||
443 | 419 | for line in iter_lines: | ||
444 | 420 | if line == NO_NL: | ||
445 | 421 | if not last_line.endswith('\n'): | ||
446 | 422 | raise AssertionError() | ||
447 | 423 | last_line = last_line[:-1] | ||
448 | 424 | line = None | ||
449 | 425 | if last_line is not None: | ||
450 | 426 | yield last_line | ||
451 | 427 | last_line = line | ||
452 | 428 | if last_line is not None: | ||
453 | 429 | yield last_line | ||
454 | 430 | |||
455 | 431 | |||
456 | 432 | def parse_patches(iter_lines, allow_dirty=False, keep_dirty=False): | ||
457 | 433 | ''' | ||
458 | 434 | :arg iter_lines: iterable of lines to parse for patches | ||
459 | 435 | :kwarg allow_dirty: If True, allow text that's not part of the patch at | ||
460 | 436 | selected places. This includes comments before and after a patch | ||
461 | 437 | for instance. Default False. | ||
462 | 438 | :kwarg keep_dirty: If True, returns a dict of patches with dirty headers. | ||
463 | 439 | Default False. | ||
464 | 440 | ''' | ||
465 | 441 | patches = [] | ||
466 | 442 | for patch_lines in iter_file_patch(iter_lines, allow_dirty, keep_dirty): | ||
467 | 443 | if 'dirty_head' in patch_lines: | ||
468 | 444 | patches.append({'patch': parse_patch( | ||
469 | 445 | patch_lines['saved_lines'], allow_dirty), | ||
470 | 446 | 'dirty_head': patch_lines['dirty_head']}) | ||
471 | 447 | else: | ||
472 | 448 | patches.append(parse_patch(patch_lines, allow_dirty)) | ||
473 | 449 | return patches | ||
474 | 450 | |||
475 | 451 | |||
476 | 452 | def difference_index(atext, btext): | ||
477 | 453 | """Find the indext of the first character that differs between two texts | ||
478 | 454 | |||
479 | 455 | :param atext: The first text | ||
480 | 456 | :type atext: str | ||
481 | 457 | :param btext: The second text | ||
482 | 458 | :type str: str | ||
483 | 459 | :return: The index, or None if there are no differences within the range | ||
484 | 460 | :rtype: int or NoneType | ||
485 | 461 | """ | ||
486 | 462 | length = len(atext) | ||
487 | 463 | if len(btext) < length: | ||
488 | 464 | length = len(btext) | ||
489 | 465 | for i in range(length): | ||
490 | 466 | if atext[i] != btext[i]: | ||
491 | 467 | return i | ||
492 | 468 | return None | ||
493 | 469 | |||
494 | 470 | |||
495 | 471 | def iter_patched(orig_lines, patch_lines): | ||
496 | 472 | """Iterate through a series of lines with a patch applied. | ||
497 | 473 | This handles a single file, and does exact, not fuzzy patching. | ||
498 | 474 | """ | ||
499 | 475 | patch_lines = iter_lines_handle_nl(iter(patch_lines)) | ||
500 | 476 | get_patch_names(patch_lines) | ||
501 | 477 | return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines)) | ||
502 | 478 | |||
503 | 479 | |||
504 | 480 | def iter_patched_from_hunks(orig_lines, hunks): | ||
505 | 481 | """Iterate through a series of lines with a patch applied. | ||
506 | 482 | This handles a single file, and does exact, not fuzzy patching. | ||
507 | 483 | |||
508 | 484 | :param orig_lines: The unpatched lines. | ||
509 | 485 | :param hunks: An iterable of Hunk instances. | ||
510 | 486 | """ | ||
511 | 487 | seen_patch = [] | ||
512 | 488 | line_no = 1 | ||
513 | 489 | if orig_lines is not None: | ||
514 | 490 | orig_lines = iter(orig_lines) | ||
515 | 491 | for hunk in hunks: | ||
516 | 492 | while line_no < hunk.orig_pos: | ||
517 | 493 | orig_line = orig_lines.next() | ||
518 | 494 | yield orig_line | ||
519 | 495 | line_no += 1 | ||
520 | 496 | for hunk_line in hunk.lines: | ||
521 | 497 | seen_patch.append(str(hunk_line)) | ||
522 | 498 | if isinstance(hunk_line, InsertLine): | ||
523 | 499 | yield hunk_line.contents | ||
524 | 500 | elif isinstance(hunk_line, (ContextLine, RemoveLine)): | ||
525 | 501 | orig_line = orig_lines.next() | ||
526 | 502 | if orig_line != hunk_line.contents: | ||
527 | 503 | raise PatchConflict(line_no, orig_line, | ||
528 | 504 | "".join(seen_patch)) | ||
529 | 505 | if isinstance(hunk_line, ContextLine): | ||
530 | 506 | yield orig_line | ||
531 | 507 | else: | ||
532 | 508 | if not isinstance(hunk_line, RemoveLine): | ||
533 | 509 | raise AssertionError(hunk_line) | ||
534 | 510 | line_no += 1 | ||
535 | 511 | if orig_lines is not None: | ||
536 | 512 | for line in orig_lines: | ||
537 | 513 | yield line | ||
538 | 0 | 514 | ||
539 | === modified file 'lib/lp/code/mail/tests/test_codereviewcomment.py' | |||
540 | --- lib/lp/code/mail/tests/test_codereviewcomment.py 2015-07-07 05:32:11 +0000 | |||
541 | +++ lib/lp/code/mail/tests/test_codereviewcomment.py 2015-07-09 05:40:58 +0000 | |||
542 | @@ -3,7 +3,6 @@ | |||
543 | 3 | 3 | ||
544 | 4 | """Test CodeReviewComment emailing functionality.""" | 4 | """Test CodeReviewComment emailing functionality.""" |
545 | 5 | 5 | ||
546 | 6 | |||
547 | 7 | import testtools | 6 | import testtools |
548 | 8 | import transaction | 7 | import transaction |
549 | 9 | from zope.component import getUtility | 8 | from zope.component import getUtility |
550 | @@ -382,9 +381,9 @@ | |||
551 | 382 | 381 | ||
552 | 383 | diff_text = ( | 382 | diff_text = ( |
553 | 384 | "=== added directory 'foo/bar'\n" | 383 | "=== added directory 'foo/bar'\n" |
557 | 385 | "=== modified file 'foo/bar/baz.py'\n" | 384 | "=== modified file 'foo/bar/bar.py'\n" |
558 | 386 | "--- bar\t2009-08-26 15:53:34.000000000 -0400\n" | 385 | "--- bar.py\t2009-08-26 15:53:34.000000000 -0400\n" |
559 | 387 | "+++ bar\t1969-12-31 19:00:00.000000000 -0500\n" | 386 | "+++ bar.py\t1969-12-31 19:00:00.000000000 -0500\n" |
560 | 388 | "@@ -1,3 +0,0 @@\n" | 387 | "@@ -1,3 +0,0 @@\n" |
561 | 389 | "-\xc3\xa5\n" | 388 | "-\xc3\xa5\n" |
562 | 390 | "-b\n" | 389 | "-b\n" |
563 | @@ -404,7 +403,35 @@ | |||
564 | 404 | "-b\n" | 403 | "-b\n" |
565 | 405 | " c\n" | 404 | " c\n" |
566 | 406 | "+d\n" | 405 | "+d\n" |
568 | 407 | "+e\n") | 406 | "+e\n" |
569 | 407 | "\n" | ||
570 | 408 | "=== modified file 'fulango.py'\n" | ||
571 | 409 | "--- fulano.py\t2014-08-26 15:53:34.000000000 -0400\n" | ||
572 | 410 | "+++ fulano.py\t2015-12-31 19:00:00.000000000 -0500\n" | ||
573 | 411 | "@@ -1,3 +1,4 @@\n" | ||
574 | 412 | " a\n" | ||
575 | 413 | "-fulano\n" | ||
576 | 414 | " c\n" | ||
577 | 415 | "+mengano\n" | ||
578 | 416 | "+zutano\n") | ||
579 | 417 | |||
580 | 418 | git_diff_text = ( | ||
581 | 419 | "diff --git a/foo b/foo\n" | ||
582 | 420 | "index 5716ca5..7601807 100644\n" | ||
583 | 421 | "--- a/foo\n" | ||
584 | 422 | "+++ b/foo\n" | ||
585 | 423 | "@@ -1 +1 @@\n" | ||
586 | 424 | "-bar\n" | ||
587 | 425 | "+baz\n" | ||
588 | 426 | "diff --git a/fulano b/fulano\n" | ||
589 | 427 | "index 5716ca5..7601807 100644\n" | ||
590 | 428 | "--- a/fulano\n" | ||
591 | 429 | "+++ b/fulano\n" | ||
592 | 430 | "@@ -1,3 +1,3 @@\n" | ||
593 | 431 | " fulano\n" | ||
594 | 432 | " \n" | ||
595 | 433 | "-mengano\n" | ||
596 | 434 | "+zutano\n") | ||
597 | 408 | 435 | ||
598 | 409 | binary_diff_text = ( | 436 | binary_diff_text = ( |
599 | 410 | "=== added file 'lib/canonical/launchpad/images/foo.png'\n" | 437 | "=== added file 'lib/canonical/launchpad/images/foo.png'\n" |
600 | @@ -412,9 +439,10 @@ | |||
601 | 412 | "1970-01-01 00:00:00 +0000 and " | 439 | "1970-01-01 00:00:00 +0000 and " |
602 | 413 | "lib/canonical/launchpad/images/foo.png\t" | 440 | "lib/canonical/launchpad/images/foo.png\t" |
603 | 414 | "2015-06-21 22:07:50 +0000 differ\n" | 441 | "2015-06-21 22:07:50 +0000 differ\n" |
607 | 415 | "=== modified file 'foo/bar/baz.py'\n" | 442 | "\n" |
608 | 416 | "--- bar\t2009-08-26 15:53:34.000000000 -0400\n" | 443 | "=== modified file 'foo/bar/bar.py'\n" |
609 | 417 | "+++ bar\t1969-12-31 19:00:00.000000000 -0500\n" | 444 | "--- bar.py\t2009-08-26 15:53:34.000000000 -0400\n" |
610 | 445 | "+++ bar.py\t1969-12-31 19:00:00.000000000 -0500\n" | ||
611 | 418 | "@@ -1,3 +0,0 @@\n" | 446 | "@@ -1,3 +0,0 @@\n" |
612 | 419 | "-a\n" | 447 | "-a\n" |
613 | 420 | "-b\n" | 448 | "-b\n" |
614 | @@ -443,7 +471,7 @@ | |||
615 | 443 | 471 | ||
616 | 444 | def test_binary_patch_in_diff(self): | 472 | def test_binary_patch_in_diff(self): |
617 | 445 | # Binary patches with comments are handled appropriately. | 473 | # Binary patches with comments are handled appropriately. |
619 | 446 | comments = {'1': 'Updated the png', '2': 'foo', '8': 'bar'} | 474 | comments = {'1': 'Updated the png', '2': 'foo', '9': 'bar'} |
620 | 447 | section = self.getSection(comments, diff_text=self.binary_diff_text) | 475 | section = self.getSection(comments, diff_text=self.binary_diff_text) |
621 | 448 | self.assertEqual( | 476 | self.assertEqual( |
622 | 449 | map(unicode, [ | 477 | map(unicode, [ |
623 | @@ -458,9 +486,10 @@ | |||
624 | 458 | "", | 486 | "", |
625 | 459 | "foo", | 487 | "foo", |
626 | 460 | "", | 488 | "", |
630 | 461 | "> === modified file 'foo/bar/baz.py'", | 489 | "> ", |
631 | 462 | "> --- bar\t2009-08-26 15:53:34.000000000 -0400", | 490 | "> === modified file 'foo/bar/bar.py'", |
632 | 463 | "> +++ bar\t1969-12-31 19:00:00.000000000 -0500", | 491 | "> --- bar.py\t2009-08-26 15:53:34.000000000 -0400", |
633 | 492 | "> +++ bar.py\t1969-12-31 19:00:00.000000000 -0500", | ||
634 | 464 | "> @@ -1,3 +0,0 @@", | 493 | "> @@ -1,3 +0,0 @@", |
635 | 465 | "> -a", | 494 | "> -a", |
636 | 466 | "> -b", | 495 | "> -b", |
637 | @@ -468,7 +497,7 @@ | |||
638 | 468 | "bar", | 497 | "bar", |
639 | 469 | "", | 498 | "", |
640 | 470 | "> -c"]), | 499 | "> -c"]), |
642 | 471 | section.splitlines()[4:22]) | 500 | section.splitlines()[4:23]) |
643 | 472 | 501 | ||
644 | 473 | def test_single_line_comment(self): | 502 | def test_single_line_comment(self): |
645 | 474 | # The inline comments are correctly contextualized in the diff. | 503 | # The inline comments are correctly contextualized in the diff. |
646 | @@ -476,12 +505,44 @@ | |||
647 | 476 | comments = {'4': '\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae'} | 505 | comments = {'4': '\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae'} |
648 | 477 | self.assertEqual( | 506 | self.assertEqual( |
649 | 478 | map(unicode, [ | 507 | map(unicode, [ |
651 | 479 | '> +++ bar\t1969-12-31 19:00:00.000000000 -0500', | 508 | '> +++ bar.py\t1969-12-31 19:00:00.000000000 -0500', |
652 | 480 | '', | 509 | '', |
653 | 481 | '\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae', | 510 | '\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae', |
654 | 482 | '']), | 511 | '']), |
655 | 483 | self.getSection(comments).splitlines()[7:11]) | 512 | self.getSection(comments).splitlines()[7:11]) |
656 | 484 | 513 | ||
657 | 514 | def test_comments_in_git_diff(self): | ||
658 | 515 | comments = {'1': 'foo', '5': 'bar', '15': 'baz'} | ||
659 | 516 | section = self.getSection(comments, diff_text=self.git_diff_text) | ||
660 | 517 | self.assertEqual( | ||
661 | 518 | map(unicode, [ | ||
662 | 519 | "> diff --git a/foo b/foo", | ||
663 | 520 | "", | ||
664 | 521 | "foo", | ||
665 | 522 | "", | ||
666 | 523 | "> index 5716ca5..7601807 100644", | ||
667 | 524 | "> --- a/foo", | ||
668 | 525 | "> +++ b/foo", | ||
669 | 526 | "> @@ -1 +1 @@", | ||
670 | 527 | "", | ||
671 | 528 | "bar", | ||
672 | 529 | "", | ||
673 | 530 | "> -bar", | ||
674 | 531 | "> +baz", | ||
675 | 532 | "> diff --git a/fulano b/fulano", | ||
676 | 533 | "> index 5716ca5..7601807 100644", | ||
677 | 534 | "> --- a/fulano", | ||
678 | 535 | "> +++ b/fulano", | ||
679 | 536 | "> @@ -1,3 +1,3 @@", | ||
680 | 537 | "> fulano", | ||
681 | 538 | "> ", | ||
682 | 539 | "> -mengano", | ||
683 | 540 | "", | ||
684 | 541 | "baz", | ||
685 | 542 | "", | ||
686 | 543 | "> +zutano"]), | ||
687 | 544 | section.splitlines()[4:29]) | ||
688 | 545 | |||
689 | 485 | def test_commentless_hunks_ignored(self): | 546 | def test_commentless_hunks_ignored(self): |
690 | 486 | # Hunks without inline comments are not returned in the diff text. | 547 | # Hunks without inline comments are not returned in the diff text. |
691 | 487 | comments = {'16': 'A comment', '21': 'Another comment'} | 548 | comments = {'16': 'A comment', '21': 'Another comment'} |
692 | @@ -556,13 +617,32 @@ | |||
693 | 556 | '> +b']), | 617 | '> +b']), |
694 | 557 | self.getSection(comments).splitlines()[4:12]) | 618 | self.getSection(comments).splitlines()[4:12]) |
695 | 558 | 619 | ||
696 | 620 | def test_comment_in_patch_after_linebreak(self): | ||
697 | 621 | comments = {'31': 'que?'} | ||
698 | 622 | self.assertEqual( | ||
699 | 623 | map(unicode, [ | ||
700 | 624 | "> ", | ||
701 | 625 | "> === modified file 'fulango.py'", | ||
702 | 626 | "> --- fulano.py\t2014-08-26 15:53:34.000000000 -0400", | ||
703 | 627 | "> +++ fulano.py\t2015-12-31 19:00:00.000000000 -0500", | ||
704 | 628 | "> @@ -1,3 +1,4 @@", | ||
705 | 629 | "> a", | ||
706 | 630 | "> -fulano", | ||
707 | 631 | "", | ||
708 | 632 | "que?", | ||
709 | 633 | "", | ||
710 | 634 | "> c", | ||
711 | 635 | "> +mengano", | ||
712 | 636 | "> +zutano"]), | ||
713 | 637 | self.getSection(comments).splitlines()[4:17]) | ||
714 | 638 | |||
715 | 559 | def test_multi_line_comment(self): | 639 | def test_multi_line_comment(self): |
716 | 560 | # Inline comments with multiple lines are rendered appropriately. | 640 | # Inline comments with multiple lines are rendered appropriately. |
717 | 561 | comments = {'4': 'Foo\nBar'} | 641 | comments = {'4': 'Foo\nBar'} |
718 | 562 | self.assertEqual( | 642 | self.assertEqual( |
719 | 563 | map(unicode, [ | 643 | map(unicode, [ |
722 | 564 | '> --- bar\t2009-08-26 15:53:34.000000000 -0400', | 644 | '> --- bar.py\t2009-08-26 15:53:34.000000000 -0400', |
723 | 565 | '> +++ bar\t1969-12-31 19:00:00.000000000 -0500', | 645 | '> +++ bar.py\t1969-12-31 19:00:00.000000000 -0500', |
724 | 566 | '', | 646 | '', |
725 | 567 | 'Foo', | 647 | 'Foo', |
726 | 568 | 'Bar', | 648 | 'Bar', |
727 | @@ -573,7 +653,7 @@ | |||
728 | 573 | # Multiple inline comments are redered appropriately. | 653 | # Multiple inline comments are redered appropriately. |
729 | 574 | comments = {'4': 'Foo', '5': 'Bar'} | 654 | comments = {'4': 'Foo', '5': 'Bar'} |
730 | 575 | self.assertEqual( | 655 | self.assertEqual( |
732 | 576 | ['> +++ bar\t1969-12-31 19:00:00.000000000 -0500', | 656 | ['> +++ bar.py\t1969-12-31 19:00:00.000000000 -0500', |
733 | 577 | '', | 657 | '', |
734 | 578 | 'Foo', | 658 | 'Foo', |
735 | 579 | '', | 659 | '', |
While I believe this does resolve the issue (the blankline before the dirty header is discarded by bzrlib. patches. parse), it does make the assumption that our diffs will continue to be rendered consistently in the form:
=== dirty header
--- patch header
+++ patch header
@ hunk header @
text
text
text
=== dirty header
etc.
I tested this branch against a very large real-world diff (elmo's) and it appeared to be behaving.
Colin, you suggested an approach which may work better for differing cases (git?), however I didn't entirely understand and would need to discuss it with you further. Shall we land this in the meantime to get things functional and look at refactoring? I can't say I love this code.