Merge lp:~doxxx/bzr/cmdline-splitter into lp:bzr
- cmdline-splitter
- Merge into bzr.dev
Proposed by
Gordon Tyler
Status: | Merged | ||||
---|---|---|---|---|---|
Approved by: | Martin Pool | ||||
Approved revision: | not available | ||||
Merged at revision: | not available | ||||
Proposed branch: | lp:~doxxx/bzr/cmdline-splitter | ||||
Merge into: | lp:bzr | ||||
Diff against target: |
638 lines (+287/-187) 9 files modified
bzrlib/cmdline.py (+160/-0) bzrlib/commands.py (+2/-6) bzrlib/diff.py (+2/-2) bzrlib/rules.py (+2/-3) bzrlib/tests/test_cmdline.py (+74/-0) bzrlib/tests/test_commands.py (+1/-1) bzrlib/tests/test_diff.py (+13/-1) bzrlib/tests/test_win32utils.py (+11/-60) bzrlib/win32utils.py (+22/-114) |
||||
To merge this branch: | bzr merge lp:~doxxx/bzr/cmdline-splitter | ||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Martin Pool | Approve | ||
Review via email: mp+19160@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Gordon Tyler (doxxx) wrote : | # |
Revision history for this message
Martin Pool (mbp) wrote : | # |
This failed pqm because test_cmdline doesn't have a copyright header or all the necessary imports.
Revision history for this message
Martin Pool (mbp) wrote : | # |
... also new test modules must be explicitly listed in tests/__init__.py. It's a bit questionable but there it is.
I'll resubmit.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === added file 'bzrlib/cmdline.py' |
2 | --- bzrlib/cmdline.py 1970-01-01 00:00:00 +0000 |
3 | +++ bzrlib/cmdline.py 2010-02-12 05:42:16 +0000 |
4 | @@ -0,0 +1,160 @@ |
5 | +# Copyright (C) 2010 Canonical Ltd |
6 | +# |
7 | +# This program is free software; you can redistribute it and/or modify |
8 | +# it under the terms of the GNU General Public License as published by |
9 | +# the Free Software Foundation; either version 2 of the License, or |
10 | +# (at your option) any later version. |
11 | +# |
12 | +# This program is distributed in the hope that it will be useful, |
13 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | +# GNU General Public License for more details. |
16 | +# |
17 | +# You should have received a copy of the GNU General Public License |
18 | +# along with this program; if not, write to the Free Software |
19 | +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | + |
21 | +"""Unicode-compatible command-line splitter for all platforms.""" |
22 | + |
23 | +import re |
24 | + |
25 | + |
26 | +_whitespace_match = re.compile(u'\s', re.UNICODE).match |
27 | + |
28 | + |
29 | +class _PushbackSequence(object): |
30 | + def __init__(self, orig): |
31 | + self._iter = iter(orig) |
32 | + self._pushback_buffer = [] |
33 | + |
34 | + def next(self): |
35 | + if len(self._pushback_buffer) > 0: |
36 | + return self._pushback_buffer.pop() |
37 | + else: |
38 | + return self._iter.next() |
39 | + |
40 | + def pushback(self, char): |
41 | + self._pushback_buffer.append(char) |
42 | + |
43 | + def __iter__(self): |
44 | + return self |
45 | + |
46 | + |
47 | +class _Whitespace(object): |
48 | + def process(self, next_char, context): |
49 | + if _whitespace_match(next_char): |
50 | + if len(context.token) > 0: |
51 | + return None |
52 | + else: |
53 | + return self |
54 | + elif next_char in context.allowed_quote_chars: |
55 | + context.quoted = True |
56 | + return _Quotes(next_char, self) |
57 | + elif next_char == u'\\': |
58 | + return _Backslash(self) |
59 | + else: |
60 | + context.token.append(next_char) |
61 | + return _Word() |
62 | + |
63 | + |
64 | +class _Quotes(object): |
65 | + def __init__(self, quote_char, exit_state): |
66 | + self.quote_char = quote_char |
67 | + self.exit_state = exit_state |
68 | + |
69 | + def process(self, next_char, context): |
70 | + if next_char == u'\\': |
71 | + return _Backslash(self) |
72 | + elif next_char == self.quote_char: |
73 | + return self.exit_state |
74 | + else: |
75 | + context.token.append(next_char) |
76 | + return self |
77 | + |
78 | + |
79 | +class _Backslash(object): |
80 | + # See http://msdn.microsoft.com/en-us/library/bb776391(VS.85).aspx |
81 | + def __init__(self, exit_state): |
82 | + self.exit_state = exit_state |
83 | + self.count = 1 |
84 | + |
85 | + def process(self, next_char, context): |
86 | + if next_char == u'\\': |
87 | + self.count += 1 |
88 | + return self |
89 | + elif next_char in context.allowed_quote_chars: |
90 | + # 2N backslashes followed by a quote are N backslashes |
91 | + context.token.append(u'\\' * (self.count/2)) |
92 | + # 2N+1 backslashes follwed by a quote are N backslashes followed by |
93 | + # the quote which should not be processed as the start or end of |
94 | + # the quoted arg |
95 | + if self.count % 2 == 1: |
96 | + # odd number of \ escapes the quote |
97 | + context.token.append(next_char) |
98 | + else: |
99 | + # let exit_state handle next_char |
100 | + context.seq.pushback(next_char) |
101 | + self.count = 0 |
102 | + return self.exit_state |
103 | + else: |
104 | + # N backslashes not followed by a quote are just N backslashes |
105 | + if self.count > 0: |
106 | + context.token.append(u'\\' * self.count) |
107 | + self.count = 0 |
108 | + # let exit_state handle next_char |
109 | + context.seq.pushback(next_char) |
110 | + return self.exit_state |
111 | + |
112 | + def finish(self, context): |
113 | + if self.count > 0: |
114 | + context.token.append(u'\\' * self.count) |
115 | + |
116 | + |
117 | +class _Word(object): |
118 | + def process(self, next_char, context): |
119 | + if _whitespace_match(next_char): |
120 | + return None |
121 | + elif next_char in context.allowed_quote_chars: |
122 | + return _Quotes(next_char, self) |
123 | + elif next_char == u'\\': |
124 | + return _Backslash(self) |
125 | + else: |
126 | + context.token.append(next_char) |
127 | + return self |
128 | + |
129 | + |
130 | +class Splitter(object): |
131 | + def __init__(self, command_line, single_quotes_allowed): |
132 | + self.seq = _PushbackSequence(command_line) |
133 | + self.allowed_quote_chars = u'"' |
134 | + if single_quotes_allowed: |
135 | + self.allowed_quote_chars += u"'" |
136 | + |
137 | + def __iter__(self): |
138 | + return self |
139 | + |
140 | + def next(self): |
141 | + quoted, token = self._get_token() |
142 | + if token is None: |
143 | + raise StopIteration |
144 | + return quoted, token |
145 | + |
146 | + def _get_token(self): |
147 | + self.quoted = False |
148 | + self.token = [] |
149 | + state = _Whitespace() |
150 | + for next_char in self.seq: |
151 | + state = state.process(next_char, self) |
152 | + if state is None: |
153 | + break |
154 | + if not state is None and not getattr(state, 'finish', None) is None: |
155 | + state.finish(self) |
156 | + result = u''.join(self.token) |
157 | + if not self.quoted and result == '': |
158 | + result = None |
159 | + return self.quoted, result |
160 | + |
161 | + |
162 | +def split(unsplit, single_quotes_allowed=True): |
163 | + splitter = Splitter(unsplit, single_quotes_allowed=single_quotes_allowed) |
164 | + return [arg for quoted, arg in splitter] |
165 | |
166 | === modified file 'bzrlib/commands.py' |
167 | --- bzrlib/commands.py 2010-02-11 01:13:46 +0000 |
168 | +++ bzrlib/commands.py 2010-02-12 05:42:16 +0000 |
169 | @@ -41,6 +41,7 @@ |
170 | import bzrlib |
171 | from bzrlib import ( |
172 | cleanup, |
173 | + cmdline, |
174 | debug, |
175 | errors, |
176 | option, |
177 | @@ -873,11 +874,6 @@ |
178 | return ret |
179 | |
180 | |
181 | -def shlex_split_unicode(unsplit): |
182 | - import shlex |
183 | - return [u.decode('utf-8') for u in shlex.split(unsplit.encode('utf-8'))] |
184 | - |
185 | - |
186 | def get_alias(cmd, config=None): |
187 | """Return an expanded alias, or None if no alias exists. |
188 | |
189 | @@ -893,7 +889,7 @@ |
190 | config = bzrlib.config.GlobalConfig() |
191 | alias = config.get_alias(cmd) |
192 | if (alias): |
193 | - return shlex_split_unicode(alias) |
194 | + return cmdline.split(alias) |
195 | return None |
196 | |
197 | |
198 | |
199 | === modified file 'bzrlib/diff.py' |
200 | --- bzrlib/diff.py 2010-01-20 23:26:31 +0000 |
201 | +++ bzrlib/diff.py 2010-02-12 05:42:16 +0000 |
202 | @@ -31,7 +31,7 @@ |
203 | from bzrlib import ( |
204 | branch as _mod_branch, |
205 | bzrdir, |
206 | - commands, |
207 | + cmdline, |
208 | errors, |
209 | osutils, |
210 | patiencediff, |
211 | @@ -683,7 +683,7 @@ |
212 | @classmethod |
213 | def from_string(klass, command_string, old_tree, new_tree, to_file, |
214 | path_encoding='utf-8'): |
215 | - command_template = commands.shlex_split_unicode(command_string) |
216 | + command_template = cmdline.split(command_string) |
217 | if '@' not in command_string: |
218 | command_template.extend(['@old_path', '@new_path']) |
219 | return klass(command_template, old_tree, new_tree, to_file, |
220 | |
221 | === modified file 'bzrlib/rules.py' |
222 | --- bzrlib/rules.py 2009-05-07 05:08:46 +0000 |
223 | +++ bzrlib/rules.py 2010-02-12 05:42:16 +0000 |
224 | @@ -21,7 +21,7 @@ |
225 | |
226 | from bzrlib import ( |
227 | config, |
228 | - commands, |
229 | + cmdline, |
230 | errors, |
231 | globbing, |
232 | osutils, |
233 | @@ -81,8 +81,7 @@ |
234 | self.pattern_to_section = {} |
235 | for s in sections: |
236 | if s.startswith(FILE_PREFS_PREFIX): |
237 | - file_patterns = commands.shlex_split_unicode( |
238 | - s[FILE_PREFS_PREFIX_LEN:]) |
239 | + file_patterns = cmdline.split(s[FILE_PREFS_PREFIX_LEN:]) |
240 | patterns.extend(file_patterns) |
241 | for fp in file_patterns: |
242 | self.pattern_to_section[fp] = s |
243 | |
244 | === added file 'bzrlib/tests/test_cmdline.py' |
245 | --- bzrlib/tests/test_cmdline.py 1970-01-01 00:00:00 +0000 |
246 | +++ bzrlib/tests/test_cmdline.py 2010-02-12 05:42:16 +0000 |
247 | @@ -0,0 +1,74 @@ |
248 | +from bzrlib import cmdline |
249 | + |
250 | +class TestSplitter(tests.TestCase): |
251 | + |
252 | + def assertAsTokens(self, expected, line, single_quotes_allowed=False): |
253 | + s = cmdline.Splitter(line, single_quotes_allowed=single_quotes_allowed) |
254 | + self.assertEqual(expected, list(s)) |
255 | + |
256 | + def test_simple(self): |
257 | + self.assertAsTokens([(False, u'foo'), (False, u'bar'), (False, u'baz')], |
258 | + u'foo bar baz') |
259 | + |
260 | + def test_ignore_multiple_spaces(self): |
261 | + self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar') |
262 | + |
263 | + def test_ignore_leading_space(self): |
264 | + self.assertAsTokens([(False, u'foo'), (False, u'bar')], u' foo bar') |
265 | + |
266 | + def test_ignore_trailing_space(self): |
267 | + self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar ') |
268 | + |
269 | + def test_posix_quotations(self): |
270 | + self.assertAsTokens([(True, u'foo bar')], u"'foo bar'", |
271 | + single_quotes_allowed=True) |
272 | + self.assertAsTokens([(True, u'foo bar')], u"'fo''o b''ar'", |
273 | + single_quotes_allowed=True) |
274 | + self.assertAsTokens([(True, u'foo bar')], u'"fo""o b""ar"', |
275 | + single_quotes_allowed=True) |
276 | + self.assertAsTokens([(True, u'foo bar')], u'"fo"\'o b\'"ar"', |
277 | + single_quotes_allowed=True) |
278 | + |
279 | + def test_nested_quotations(self): |
280 | + self.assertAsTokens([(True, u'foo"" bar')], u"\"foo\\\"\\\" bar\"") |
281 | + self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"") |
282 | + self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"", |
283 | + single_quotes_allowed=True) |
284 | + self.assertAsTokens([(True, u'foo"" bar')], u"'foo\"\" bar'", |
285 | + single_quotes_allowed=True) |
286 | + |
287 | + def test_empty_result(self): |
288 | + self.assertAsTokens([], u'') |
289 | + self.assertAsTokens([], u' ') |
290 | + |
291 | + def test_quoted_empty(self): |
292 | + self.assertAsTokens([(True, '')], u'""') |
293 | + self.assertAsTokens([(False, u"''")], u"''") |
294 | + self.assertAsTokens([(True, '')], u"''", single_quotes_allowed=True) |
295 | + |
296 | + def test_unicode_chars(self): |
297 | + self.assertAsTokens([(False, u'f\xb5\xee'), (False, u'\u1234\u3456')], |
298 | + u'f\xb5\xee \u1234\u3456') |
299 | + |
300 | + def test_newline_in_quoted_section(self): |
301 | + self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u'"foo\nbar\nbaz\n"') |
302 | + self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u"'foo\nbar\nbaz\n'", |
303 | + single_quotes_allowed=True) |
304 | + |
305 | + def test_escape_chars(self): |
306 | + self.assertAsTokens([(False, u'foo\\bar')], u'foo\\bar') |
307 | + |
308 | + def test_escape_quote(self): |
309 | + self.assertAsTokens([(True, u'foo"bar')], u'"foo\\"bar"') |
310 | + self.assertAsTokens([(True, u'foo\\"bar')], u'"foo\\\\\\"bar"') |
311 | + self.assertAsTokens([(True, u'foo\\bar')], u'"foo\\\\"bar"') |
312 | + |
313 | + def test_double_escape(self): |
314 | + self.assertAsTokens([(True, u'foo\\\\bar')], u'"foo\\\\bar"') |
315 | + self.assertAsTokens([(False, u'foo\\\\bar')], u"foo\\\\bar") |
316 | + |
317 | + def test_multiple_quoted_args(self): |
318 | + self.assertAsTokens([(True, u'x x'), (True, u'y y')], |
319 | + u'"x x" "y y"') |
320 | + self.assertAsTokens([(True, u'x x'), (True, u'y y')], |
321 | + u'"x x" \'y y\'', single_quotes_allowed=True) |
322 | |
323 | === modified file 'bzrlib/tests/test_commands.py' |
324 | --- bzrlib/tests/test_commands.py 2009-05-23 21:01:51 +0000 |
325 | +++ bzrlib/tests/test_commands.py 2010-02-12 05:42:16 +0000 |
326 | @@ -111,7 +111,7 @@ |
327 | |
328 | def test_unicode(self): |
329 | my_config = self._get_config("[ALIASES]\n" |
330 | - u"iam=whoami 'Erik B\u00e5gfors <erik@bagfors.nu>'\n") |
331 | + u'iam=whoami "Erik B\u00e5gfors <erik@bagfors.nu>"\n') |
332 | self.assertEqual([u'whoami', u'Erik B\u00e5gfors <erik@bagfors.nu>'], |
333 | commands.get_alias("iam", config=my_config)) |
334 | |
335 | |
336 | === modified file 'bzrlib/tests/test_diff.py' |
337 | --- bzrlib/tests/test_diff.py 2009-12-22 15:50:40 +0000 |
338 | +++ bzrlib/tests/test_diff.py 2010-02-12 05:42:16 +0000 |
339 | @@ -45,6 +45,8 @@ |
340 | from bzrlib.revisiontree import RevisionTree |
341 | from bzrlib.revisionspec import RevisionSpec |
342 | |
343 | +from bzrlib.tests.test_win32utils import BackslashDirSeparatorFeature |
344 | + |
345 | |
346 | class _AttribFeature(Feature): |
347 | |
348 | @@ -1292,12 +1294,22 @@ |
349 | diff_obj.command_template) |
350 | |
351 | def test_from_string_u5(self): |
352 | - diff_obj = DiffFromTool.from_string('diff -u\\ 5', None, None, None) |
353 | + diff_obj = DiffFromTool.from_string('diff "-u 5"', None, None, None) |
354 | self.addCleanup(diff_obj.finish) |
355 | self.assertEqual(['diff', '-u 5', '@old_path', '@new_path'], |
356 | diff_obj.command_template) |
357 | self.assertEqual(['diff', '-u 5', 'old-path', 'new-path'], |
358 | diff_obj._get_command('old-path', 'new-path')) |
359 | + |
360 | + def test_from_string_path_with_backslashes(self): |
361 | + self.requireFeature(BackslashDirSeparatorFeature) |
362 | + tool = 'C:\\Tools\\Diff.exe' |
363 | + diff_obj = DiffFromTool.from_string(tool, None, None, None) |
364 | + self.addCleanup(diff_obj.finish) |
365 | + self.assertEqual(['C:\\Tools\\Diff.exe', '@old_path', '@new_path'], |
366 | + diff_obj.command_template) |
367 | + self.assertEqual(['C:\\Tools\\Diff.exe', 'old-path', 'new-path'], |
368 | + diff_obj._get_command('old-path', 'new-path')) |
369 | |
370 | def test_execute(self): |
371 | output = StringIO() |
372 | |
373 | === modified file 'bzrlib/tests/test_win32utils.py' |
374 | --- bzrlib/tests/test_win32utils.py 2010-01-25 17:48:22 +0000 |
375 | +++ bzrlib/tests/test_win32utils.py 2010-02-12 05:42:16 +0000 |
376 | @@ -288,70 +288,15 @@ |
377 | |
378 | |
379 | |
380 | -class TestUnicodeShlex(tests.TestCase): |
381 | - |
382 | - def assertAsTokens(self, expected, line): |
383 | - s = win32utils.UnicodeShlex(line) |
384 | - self.assertEqual(expected, list(s)) |
385 | - |
386 | - def test_simple(self): |
387 | - self.assertAsTokens([(False, u'foo'), (False, u'bar'), (False, u'baz')], |
388 | - u'foo bar baz') |
389 | - |
390 | - def test_ignore_multiple_spaces(self): |
391 | - self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar') |
392 | - |
393 | - def test_ignore_leading_space(self): |
394 | - self.assertAsTokens([(False, u'foo'), (False, u'bar')], u' foo bar') |
395 | - |
396 | - def test_ignore_trailing_space(self): |
397 | - self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar ') |
398 | - |
399 | - def test_posix_quotations(self): |
400 | - self.assertAsTokens([(True, u'foo bar')], u'"foo bar"') |
401 | - self.assertAsTokens([(False, u"'fo''o"), (False, u"b''ar'")], |
402 | - u"'fo''o b''ar'") |
403 | - self.assertAsTokens([(True, u'foo bar')], u'"fo""o b""ar"') |
404 | - self.assertAsTokens([(True, u"fo'o"), (True, u"b'ar")], |
405 | - u'"fo"\'o b\'"ar"') |
406 | - |
407 | - def test_nested_quotations(self): |
408 | - self.assertAsTokens([(True, u'foo"" bar')], u"\"foo\\\"\\\" bar\"") |
409 | - self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"") |
410 | - |
411 | - def test_empty_result(self): |
412 | - self.assertAsTokens([], u'') |
413 | - self.assertAsTokens([], u' ') |
414 | - |
415 | - def test_quoted_empty(self): |
416 | - self.assertAsTokens([(True, '')], u'""') |
417 | - self.assertAsTokens([(False, u"''")], u"''") |
418 | - |
419 | - def test_unicode_chars(self): |
420 | - self.assertAsTokens([(False, u'f\xb5\xee'), (False, u'\u1234\u3456')], |
421 | - u'f\xb5\xee \u1234\u3456') |
422 | - |
423 | - def test_newline_in_quoted_section(self): |
424 | - self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u'"foo\nbar\nbaz\n"') |
425 | - |
426 | - def test_escape_chars(self): |
427 | - self.assertAsTokens([(False, u'foo\\bar')], u'foo\\bar') |
428 | - |
429 | - def test_escape_quote(self): |
430 | - self.assertAsTokens([(True, u'foo"bar')], u'"foo\\"bar"') |
431 | - |
432 | - def test_double_escape(self): |
433 | - self.assertAsTokens([(True, u'foo\\bar')], u'"foo\\\\bar"') |
434 | - self.assertAsTokens([(False, u'foo\\\\bar')], u"foo\\\\bar") |
435 | - |
436 | |
437 | class Test_CommandLineToArgv(tests.TestCaseInTempDir): |
438 | |
439 | - def assertCommandLine(self, expected, line): |
440 | + def assertCommandLine(self, expected, line, single_quotes_allowed=False): |
441 | # Strictly speaking we should respect parameter order versus glob |
442 | # expansions, but it's not really worth the effort here |
443 | - self.assertEqual(expected, |
444 | - sorted(win32utils._command_line_to_argv(line))) |
445 | + argv = win32utils._command_line_to_argv(line, |
446 | + single_quotes_allowed=single_quotes_allowed) |
447 | + self.assertEqual(expected, sorted(argv)) |
448 | |
449 | def test_glob_paths(self): |
450 | self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h']) |
451 | @@ -367,19 +312,25 @@ |
452 | self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h']) |
453 | self.assertCommandLine([u'a/*.c'], '"a/*.c"') |
454 | self.assertCommandLine([u"'a/*.c'"], "'a/*.c'") |
455 | + self.assertCommandLine([u'a/*.c'], "'a/*.c'", |
456 | + single_quotes_allowed=True) |
457 | |
458 | def test_slashes_changed(self): |
459 | # Quoting doesn't change the supplied args |
460 | self.assertCommandLine([u'a\\*.c'], '"a\\*.c"') |
461 | + self.assertCommandLine([u'a\\*.c'], "'a\\*.c'", |
462 | + single_quotes_allowed=True) |
463 | # Expands the glob, but nothing matches, swaps slashes |
464 | self.assertCommandLine([u'a/*.c'], 'a\\*.c') |
465 | self.assertCommandLine([u'a/?.c'], 'a\\?.c') |
466 | # No glob, doesn't touch slashes |
467 | self.assertCommandLine([u'a\\foo.c'], 'a\\foo.c') |
468 | |
469 | - def test_no_single_quote_supported(self): |
470 | + def test_single_quote_support(self): |
471 | self.assertCommandLine(["add", "let's-do-it.txt"], |
472 | "add let's-do-it.txt") |
473 | + self.assertCommandLine(["add", "lets do it.txt"], |
474 | + "add 'lets do it.txt'", single_quotes_allowed=True) |
475 | |
476 | def test_case_insensitive_globs(self): |
477 | self.requireFeature(tests.CaseInsCasePresFilenameFeature) |
478 | |
479 | === modified file 'bzrlib/win32utils.py' |
480 | --- bzrlib/win32utils.py 2010-02-04 16:06:36 +0000 |
481 | +++ bzrlib/win32utils.py 2010-02-12 05:42:16 +0000 |
482 | @@ -25,6 +25,7 @@ |
483 | import struct |
484 | import sys |
485 | |
486 | +from bzrlib import cmdline |
487 | |
488 | # Windows version |
489 | if sys.platform == 'win32': |
490 | @@ -522,112 +523,21 @@ |
491 | trace.mutter('Unable to set hidden attribute on %r: %s', path, e) |
492 | |
493 | |
494 | - |
495 | -class UnicodeShlex(object): |
496 | - """This is a very simplified version of shlex.shlex. |
497 | - |
498 | - The main change is that it supports non-ascii input streams. The internal |
499 | - structure is quite simplified relative to shlex.shlex, since we aren't |
500 | - trying to handle multiple input streams, etc. In fact, we don't use a |
501 | - file-like api either. |
502 | - """ |
503 | - |
504 | - def __init__(self, uni_string): |
505 | - self._input = uni_string |
506 | - self._input_iter = iter(self._input) |
507 | - self._whitespace_match = re.compile(u'\s').match |
508 | - self._word_match = re.compile(u'\S').match |
509 | - self._quote_chars = u'"' |
510 | - # self._quote_match = re.compile(u'[\'"]').match |
511 | - self._escape_match = lambda x: None # Never matches |
512 | - self._escape = '\\' |
513 | - # State can be |
514 | - # ' ' - after whitespace, starting a new token |
515 | - # 'a' - after text, currently working on a token |
516 | - # '"' - after ", currently in a "-delimited quoted section |
517 | - # "\" - after '\', checking the next char |
518 | - self._state = ' ' |
519 | - self._token = [] # Current token being parsed |
520 | - |
521 | - def _get_token(self): |
522 | - # Were there quote chars as part of this token? |
523 | - quoted = False |
524 | - quoted_state = None |
525 | - for nextchar in self._input_iter: |
526 | - if self._state == ' ': |
527 | - if self._whitespace_match(nextchar): |
528 | - # if self._token: return token |
529 | - continue |
530 | - elif nextchar in self._quote_chars: |
531 | - self._state = nextchar # quoted state |
532 | - elif self._word_match(nextchar): |
533 | - self._token.append(nextchar) |
534 | - self._state = 'a' |
535 | - else: |
536 | - raise AssertionError('wtttf?') |
537 | - elif self._state in self._quote_chars: |
538 | - quoted = True |
539 | - if nextchar == self._state: # End of quote |
540 | - self._state = 'a' # posix allows 'foo'bar to translate to |
541 | - # foobar |
542 | - elif self._state == '"' and nextchar == self._escape: |
543 | - quoted_state = self._state |
544 | - self._state = nextchar |
545 | - else: |
546 | - self._token.append(nextchar) |
547 | - elif self._state == self._escape: |
548 | - if nextchar == '\\': |
549 | - self._token.append('\\') |
550 | - elif nextchar == '"': |
551 | - self._token.append(nextchar) |
552 | - else: |
553 | - self._token.append('\\' + nextchar) |
554 | - self._state = quoted_state |
555 | - elif self._state == 'a': |
556 | - if self._whitespace_match(nextchar): |
557 | - if self._token: |
558 | - break # emit this token |
559 | - else: |
560 | - continue # no token to emit |
561 | - elif nextchar in self._quote_chars: |
562 | - # Start a new quoted section |
563 | - self._state = nextchar |
564 | - # escape? |
565 | - elif (self._word_match(nextchar) |
566 | - or nextchar in self._quote_chars |
567 | - # or whitespace_split? |
568 | - ): |
569 | - self._token.append(nextchar) |
570 | - else: |
571 | - raise AssertionError('state == "a", char: %r' |
572 | - % (nextchar,)) |
573 | - else: |
574 | - raise AssertionError('unknown state: %r' % (self._state,)) |
575 | - result = ''.join(self._token) |
576 | - self._token = [] |
577 | - if not quoted and result == '': |
578 | - result = None |
579 | - return quoted, result |
580 | - |
581 | - def __iter__(self): |
582 | - return self |
583 | - |
584 | - def next(self): |
585 | - quoted, token = self._get_token() |
586 | - if token is None: |
587 | - raise StopIteration |
588 | - return quoted, token |
589 | - |
590 | - |
591 | -def _command_line_to_argv(command_line): |
592 | - """Convert a Unicode command line into a set of argv arguments. |
593 | - |
594 | - This does wildcard expansion, etc. It is intended to make wildcards act |
595 | - closer to how they work in posix shells, versus how they work by default on |
596 | - Windows. |
597 | - """ |
598 | - s = UnicodeShlex(command_line) |
599 | - # Now that we've split the content, expand globs |
600 | +def _command_line_to_argv(command_line, single_quotes_allowed=False): |
601 | + """Convert a Unicode command line into a list of argv arguments. |
602 | + |
603 | + It performs wildcard expansion to make wildcards act closer to how they |
604 | + work in posix shells, versus how they work by default on Windows. Quoted |
605 | + arguments are left untouched. |
606 | + |
607 | + :param command_line: The unicode string to split into an arg list. |
608 | + :param single_quotes_allowed: Whether single quotes are accepted as quoting |
609 | + characters like double quotes. False by |
610 | + default. |
611 | + :return: A list of unicode strings. |
612 | + """ |
613 | + s = cmdline.Splitter(command_line, single_quotes_allowed=single_quotes_allowed) |
614 | + # Now that we've split the content, expand globs if necessary |
615 | # TODO: Use 'globbing' instead of 'glob.glob', this gives us stuff like |
616 | # '**/' style globs |
617 | args = [] |
618 | @@ -641,14 +551,12 @@ |
619 | |
620 | if has_ctypes and winver != 'Windows 98': |
621 | def get_unicode_argv(): |
622 | - LPCWSTR = ctypes.c_wchar_p |
623 | - INT = ctypes.c_int |
624 | - POINTER = ctypes.POINTER |
625 | - prototype = ctypes.WINFUNCTYPE(LPCWSTR) |
626 | - GetCommandLine = prototype(("GetCommandLineW", |
627 | - ctypes.windll.kernel32)) |
628 | - prototype = ctypes.WINFUNCTYPE(POINTER(LPCWSTR), LPCWSTR, POINTER(INT)) |
629 | - command_line = GetCommandLine() |
630 | + prototype = ctypes.WINFUNCTYPE(ctypes.c_wchar_p) |
631 | + GetCommandLineW = prototype(("GetCommandLineW", |
632 | + ctypes.windll.kernel32)) |
633 | + command_line = GetCommandLineW() |
634 | + if command_line is None: |
635 | + raise ctypes.WinError() |
636 | # Skip the first argument, since we only care about parameters |
637 | argv = _command_line_to_argv(command_line)[1:] |
638 | if getattr(sys, 'frozen', None) is None: |
At John's suggestion, I've reworked my fix for bug 392428 to entirely replace bzr's use of the shlex module with a commandline splitter that works for all platforms. This resides in a new module called bzrlib.cmdline which provides a Splitter class for complete control over how splitting is done and a split() helper function, which defaults to supporting single quotes.
bzrlib. commands. shlex_split_ unicode( ) has been removed in favour of bzrlib. cmdline. split() .
Processing of the win32 bzr commandline allows single quotes for quoting arguments. I can revert that, if desired, without affecting single quote support in aliases, etc.
I've tested this with selftest on Windows 7 and Debian.