Merge lp:~doxxx/bzr/cmdline-splitter into lp:bzr

Proposed by Gordon Tyler
Status: Merged
Approved by: Martin Pool
Approved revision: not available
Merged at revision: not available
Proposed branch: lp:~doxxx/bzr/cmdline-splitter
Merge into: lp:bzr
Diff against target: 638 lines (+287/-187)
9 files modified
bzrlib/cmdline.py (+160/-0)
bzrlib/commands.py (+2/-6)
bzrlib/diff.py (+2/-2)
bzrlib/rules.py (+2/-3)
bzrlib/tests/test_cmdline.py (+74/-0)
bzrlib/tests/test_commands.py (+1/-1)
bzrlib/tests/test_diff.py (+13/-1)
bzrlib/tests/test_win32utils.py (+11/-60)
bzrlib/win32utils.py (+22/-114)
To merge this branch: bzr merge lp:~doxxx/bzr/cmdline-splitter
Reviewer Review Type Date Requested Status
Martin Pool Approve
Review via email: mp+19160@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Gordon Tyler (doxxx) wrote :

At John's suggestion, I've reworked my fix for bug 392428 to entirely replace bzr's use of the shlex module with a commandline splitter that works for all platforms. This resides in a new module called bzrlib.cmdline which provides a Splitter class for complete control over how splitting is done and a split() helper function, which defaults to supporting single quotes.

bzrlib.commands.shlex_split_unicode() has been removed in favour of bzrlib.cmdline.split().

Processing of the win32 bzr commandline allows single quotes for quoting arguments. I can revert that, if desired, without affecting single quote support in aliases, etc.

I've tested this with selftest on Windows 7 and Debian.

Revision history for this message
Martin Pool (mbp) wrote :

Looks good, thanks.

review: Approve
Revision history for this message
Martin Pool (mbp) wrote :

This failed pqm because test_cmdline doesn't have a copyright header or all the necessary imports.

Revision history for this message
Martin Pool (mbp) wrote :

... also new test modules must be explicitly listed in tests/__init__.py. It's a bit questionable but there it is.

I'll resubmit.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'bzrlib/cmdline.py'
2--- bzrlib/cmdline.py 1970-01-01 00:00:00 +0000
3+++ bzrlib/cmdline.py 2010-02-12 05:42:16 +0000
4@@ -0,0 +1,160 @@
5+# Copyright (C) 2010 Canonical Ltd
6+#
7+# This program is free software; you can redistribute it and/or modify
8+# it under the terms of the GNU General Public License as published by
9+# the Free Software Foundation; either version 2 of the License, or
10+# (at your option) any later version.
11+#
12+# This program is distributed in the hope that it will be useful,
13+# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+# GNU General Public License for more details.
16+#
17+# You should have received a copy of the GNU General Public License
18+# along with this program; if not, write to the Free Software
19+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20+
21+"""Unicode-compatible command-line splitter for all platforms."""
22+
23+import re
24+
25+
26+_whitespace_match = re.compile(u'\s', re.UNICODE).match
27+
28+
29+class _PushbackSequence(object):
30+ def __init__(self, orig):
31+ self._iter = iter(orig)
32+ self._pushback_buffer = []
33+
34+ def next(self):
35+ if len(self._pushback_buffer) > 0:
36+ return self._pushback_buffer.pop()
37+ else:
38+ return self._iter.next()
39+
40+ def pushback(self, char):
41+ self._pushback_buffer.append(char)
42+
43+ def __iter__(self):
44+ return self
45+
46+
47+class _Whitespace(object):
48+ def process(self, next_char, context):
49+ if _whitespace_match(next_char):
50+ if len(context.token) > 0:
51+ return None
52+ else:
53+ return self
54+ elif next_char in context.allowed_quote_chars:
55+ context.quoted = True
56+ return _Quotes(next_char, self)
57+ elif next_char == u'\\':
58+ return _Backslash(self)
59+ else:
60+ context.token.append(next_char)
61+ return _Word()
62+
63+
64+class _Quotes(object):
65+ def __init__(self, quote_char, exit_state):
66+ self.quote_char = quote_char
67+ self.exit_state = exit_state
68+
69+ def process(self, next_char, context):
70+ if next_char == u'\\':
71+ return _Backslash(self)
72+ elif next_char == self.quote_char:
73+ return self.exit_state
74+ else:
75+ context.token.append(next_char)
76+ return self
77+
78+
79+class _Backslash(object):
80+ # See http://msdn.microsoft.com/en-us/library/bb776391(VS.85).aspx
81+ def __init__(self, exit_state):
82+ self.exit_state = exit_state
83+ self.count = 1
84+
85+ def process(self, next_char, context):
86+ if next_char == u'\\':
87+ self.count += 1
88+ return self
89+ elif next_char in context.allowed_quote_chars:
90+ # 2N backslashes followed by a quote are N backslashes
91+ context.token.append(u'\\' * (self.count/2))
92+ # 2N+1 backslashes follwed by a quote are N backslashes followed by
93+ # the quote which should not be processed as the start or end of
94+ # the quoted arg
95+ if self.count % 2 == 1:
96+ # odd number of \ escapes the quote
97+ context.token.append(next_char)
98+ else:
99+ # let exit_state handle next_char
100+ context.seq.pushback(next_char)
101+ self.count = 0
102+ return self.exit_state
103+ else:
104+ # N backslashes not followed by a quote are just N backslashes
105+ if self.count > 0:
106+ context.token.append(u'\\' * self.count)
107+ self.count = 0
108+ # let exit_state handle next_char
109+ context.seq.pushback(next_char)
110+ return self.exit_state
111+
112+ def finish(self, context):
113+ if self.count > 0:
114+ context.token.append(u'\\' * self.count)
115+
116+
117+class _Word(object):
118+ def process(self, next_char, context):
119+ if _whitespace_match(next_char):
120+ return None
121+ elif next_char in context.allowed_quote_chars:
122+ return _Quotes(next_char, self)
123+ elif next_char == u'\\':
124+ return _Backslash(self)
125+ else:
126+ context.token.append(next_char)
127+ return self
128+
129+
130+class Splitter(object):
131+ def __init__(self, command_line, single_quotes_allowed):
132+ self.seq = _PushbackSequence(command_line)
133+ self.allowed_quote_chars = u'"'
134+ if single_quotes_allowed:
135+ self.allowed_quote_chars += u"'"
136+
137+ def __iter__(self):
138+ return self
139+
140+ def next(self):
141+ quoted, token = self._get_token()
142+ if token is None:
143+ raise StopIteration
144+ return quoted, token
145+
146+ def _get_token(self):
147+ self.quoted = False
148+ self.token = []
149+ state = _Whitespace()
150+ for next_char in self.seq:
151+ state = state.process(next_char, self)
152+ if state is None:
153+ break
154+ if not state is None and not getattr(state, 'finish', None) is None:
155+ state.finish(self)
156+ result = u''.join(self.token)
157+ if not self.quoted and result == '':
158+ result = None
159+ return self.quoted, result
160+
161+
162+def split(unsplit, single_quotes_allowed=True):
163+ splitter = Splitter(unsplit, single_quotes_allowed=single_quotes_allowed)
164+ return [arg for quoted, arg in splitter]
165
166=== modified file 'bzrlib/commands.py'
167--- bzrlib/commands.py 2010-02-11 01:13:46 +0000
168+++ bzrlib/commands.py 2010-02-12 05:42:16 +0000
169@@ -41,6 +41,7 @@
170 import bzrlib
171 from bzrlib import (
172 cleanup,
173+ cmdline,
174 debug,
175 errors,
176 option,
177@@ -873,11 +874,6 @@
178 return ret
179
180
181-def shlex_split_unicode(unsplit):
182- import shlex
183- return [u.decode('utf-8') for u in shlex.split(unsplit.encode('utf-8'))]
184-
185-
186 def get_alias(cmd, config=None):
187 """Return an expanded alias, or None if no alias exists.
188
189@@ -893,7 +889,7 @@
190 config = bzrlib.config.GlobalConfig()
191 alias = config.get_alias(cmd)
192 if (alias):
193- return shlex_split_unicode(alias)
194+ return cmdline.split(alias)
195 return None
196
197
198
199=== modified file 'bzrlib/diff.py'
200--- bzrlib/diff.py 2010-01-20 23:26:31 +0000
201+++ bzrlib/diff.py 2010-02-12 05:42:16 +0000
202@@ -31,7 +31,7 @@
203 from bzrlib import (
204 branch as _mod_branch,
205 bzrdir,
206- commands,
207+ cmdline,
208 errors,
209 osutils,
210 patiencediff,
211@@ -683,7 +683,7 @@
212 @classmethod
213 def from_string(klass, command_string, old_tree, new_tree, to_file,
214 path_encoding='utf-8'):
215- command_template = commands.shlex_split_unicode(command_string)
216+ command_template = cmdline.split(command_string)
217 if '@' not in command_string:
218 command_template.extend(['@old_path', '@new_path'])
219 return klass(command_template, old_tree, new_tree, to_file,
220
221=== modified file 'bzrlib/rules.py'
222--- bzrlib/rules.py 2009-05-07 05:08:46 +0000
223+++ bzrlib/rules.py 2010-02-12 05:42:16 +0000
224@@ -21,7 +21,7 @@
225
226 from bzrlib import (
227 config,
228- commands,
229+ cmdline,
230 errors,
231 globbing,
232 osutils,
233@@ -81,8 +81,7 @@
234 self.pattern_to_section = {}
235 for s in sections:
236 if s.startswith(FILE_PREFS_PREFIX):
237- file_patterns = commands.shlex_split_unicode(
238- s[FILE_PREFS_PREFIX_LEN:])
239+ file_patterns = cmdline.split(s[FILE_PREFS_PREFIX_LEN:])
240 patterns.extend(file_patterns)
241 for fp in file_patterns:
242 self.pattern_to_section[fp] = s
243
244=== added file 'bzrlib/tests/test_cmdline.py'
245--- bzrlib/tests/test_cmdline.py 1970-01-01 00:00:00 +0000
246+++ bzrlib/tests/test_cmdline.py 2010-02-12 05:42:16 +0000
247@@ -0,0 +1,74 @@
248+from bzrlib import cmdline
249+
250+class TestSplitter(tests.TestCase):
251+
252+ def assertAsTokens(self, expected, line, single_quotes_allowed=False):
253+ s = cmdline.Splitter(line, single_quotes_allowed=single_quotes_allowed)
254+ self.assertEqual(expected, list(s))
255+
256+ def test_simple(self):
257+ self.assertAsTokens([(False, u'foo'), (False, u'bar'), (False, u'baz')],
258+ u'foo bar baz')
259+
260+ def test_ignore_multiple_spaces(self):
261+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar')
262+
263+ def test_ignore_leading_space(self):
264+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u' foo bar')
265+
266+ def test_ignore_trailing_space(self):
267+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar ')
268+
269+ def test_posix_quotations(self):
270+ self.assertAsTokens([(True, u'foo bar')], u"'foo bar'",
271+ single_quotes_allowed=True)
272+ self.assertAsTokens([(True, u'foo bar')], u"'fo''o b''ar'",
273+ single_quotes_allowed=True)
274+ self.assertAsTokens([(True, u'foo bar')], u'"fo""o b""ar"',
275+ single_quotes_allowed=True)
276+ self.assertAsTokens([(True, u'foo bar')], u'"fo"\'o b\'"ar"',
277+ single_quotes_allowed=True)
278+
279+ def test_nested_quotations(self):
280+ self.assertAsTokens([(True, u'foo"" bar')], u"\"foo\\\"\\\" bar\"")
281+ self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"")
282+ self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"",
283+ single_quotes_allowed=True)
284+ self.assertAsTokens([(True, u'foo"" bar')], u"'foo\"\" bar'",
285+ single_quotes_allowed=True)
286+
287+ def test_empty_result(self):
288+ self.assertAsTokens([], u'')
289+ self.assertAsTokens([], u' ')
290+
291+ def test_quoted_empty(self):
292+ self.assertAsTokens([(True, '')], u'""')
293+ self.assertAsTokens([(False, u"''")], u"''")
294+ self.assertAsTokens([(True, '')], u"''", single_quotes_allowed=True)
295+
296+ def test_unicode_chars(self):
297+ self.assertAsTokens([(False, u'f\xb5\xee'), (False, u'\u1234\u3456')],
298+ u'f\xb5\xee \u1234\u3456')
299+
300+ def test_newline_in_quoted_section(self):
301+ self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u'"foo\nbar\nbaz\n"')
302+ self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u"'foo\nbar\nbaz\n'",
303+ single_quotes_allowed=True)
304+
305+ def test_escape_chars(self):
306+ self.assertAsTokens([(False, u'foo\\bar')], u'foo\\bar')
307+
308+ def test_escape_quote(self):
309+ self.assertAsTokens([(True, u'foo"bar')], u'"foo\\"bar"')
310+ self.assertAsTokens([(True, u'foo\\"bar')], u'"foo\\\\\\"bar"')
311+ self.assertAsTokens([(True, u'foo\\bar')], u'"foo\\\\"bar"')
312+
313+ def test_double_escape(self):
314+ self.assertAsTokens([(True, u'foo\\\\bar')], u'"foo\\\\bar"')
315+ self.assertAsTokens([(False, u'foo\\\\bar')], u"foo\\\\bar")
316+
317+ def test_multiple_quoted_args(self):
318+ self.assertAsTokens([(True, u'x x'), (True, u'y y')],
319+ u'"x x" "y y"')
320+ self.assertAsTokens([(True, u'x x'), (True, u'y y')],
321+ u'"x x" \'y y\'', single_quotes_allowed=True)
322
323=== modified file 'bzrlib/tests/test_commands.py'
324--- bzrlib/tests/test_commands.py 2009-05-23 21:01:51 +0000
325+++ bzrlib/tests/test_commands.py 2010-02-12 05:42:16 +0000
326@@ -111,7 +111,7 @@
327
328 def test_unicode(self):
329 my_config = self._get_config("[ALIASES]\n"
330- u"iam=whoami 'Erik B\u00e5gfors <erik@bagfors.nu>'\n")
331+ u'iam=whoami "Erik B\u00e5gfors <erik@bagfors.nu>"\n')
332 self.assertEqual([u'whoami', u'Erik B\u00e5gfors <erik@bagfors.nu>'],
333 commands.get_alias("iam", config=my_config))
334
335
336=== modified file 'bzrlib/tests/test_diff.py'
337--- bzrlib/tests/test_diff.py 2009-12-22 15:50:40 +0000
338+++ bzrlib/tests/test_diff.py 2010-02-12 05:42:16 +0000
339@@ -45,6 +45,8 @@
340 from bzrlib.revisiontree import RevisionTree
341 from bzrlib.revisionspec import RevisionSpec
342
343+from bzrlib.tests.test_win32utils import BackslashDirSeparatorFeature
344+
345
346 class _AttribFeature(Feature):
347
348@@ -1292,12 +1294,22 @@
349 diff_obj.command_template)
350
351 def test_from_string_u5(self):
352- diff_obj = DiffFromTool.from_string('diff -u\\ 5', None, None, None)
353+ diff_obj = DiffFromTool.from_string('diff "-u 5"', None, None, None)
354 self.addCleanup(diff_obj.finish)
355 self.assertEqual(['diff', '-u 5', '@old_path', '@new_path'],
356 diff_obj.command_template)
357 self.assertEqual(['diff', '-u 5', 'old-path', 'new-path'],
358 diff_obj._get_command('old-path', 'new-path'))
359+
360+ def test_from_string_path_with_backslashes(self):
361+ self.requireFeature(BackslashDirSeparatorFeature)
362+ tool = 'C:\\Tools\\Diff.exe'
363+ diff_obj = DiffFromTool.from_string(tool, None, None, None)
364+ self.addCleanup(diff_obj.finish)
365+ self.assertEqual(['C:\\Tools\\Diff.exe', '@old_path', '@new_path'],
366+ diff_obj.command_template)
367+ self.assertEqual(['C:\\Tools\\Diff.exe', 'old-path', 'new-path'],
368+ diff_obj._get_command('old-path', 'new-path'))
369
370 def test_execute(self):
371 output = StringIO()
372
373=== modified file 'bzrlib/tests/test_win32utils.py'
374--- bzrlib/tests/test_win32utils.py 2010-01-25 17:48:22 +0000
375+++ bzrlib/tests/test_win32utils.py 2010-02-12 05:42:16 +0000
376@@ -288,70 +288,15 @@
377
378
379
380-class TestUnicodeShlex(tests.TestCase):
381-
382- def assertAsTokens(self, expected, line):
383- s = win32utils.UnicodeShlex(line)
384- self.assertEqual(expected, list(s))
385-
386- def test_simple(self):
387- self.assertAsTokens([(False, u'foo'), (False, u'bar'), (False, u'baz')],
388- u'foo bar baz')
389-
390- def test_ignore_multiple_spaces(self):
391- self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar')
392-
393- def test_ignore_leading_space(self):
394- self.assertAsTokens([(False, u'foo'), (False, u'bar')], u' foo bar')
395-
396- def test_ignore_trailing_space(self):
397- self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar ')
398-
399- def test_posix_quotations(self):
400- self.assertAsTokens([(True, u'foo bar')], u'"foo bar"')
401- self.assertAsTokens([(False, u"'fo''o"), (False, u"b''ar'")],
402- u"'fo''o b''ar'")
403- self.assertAsTokens([(True, u'foo bar')], u'"fo""o b""ar"')
404- self.assertAsTokens([(True, u"fo'o"), (True, u"b'ar")],
405- u'"fo"\'o b\'"ar"')
406-
407- def test_nested_quotations(self):
408- self.assertAsTokens([(True, u'foo"" bar')], u"\"foo\\\"\\\" bar\"")
409- self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"")
410-
411- def test_empty_result(self):
412- self.assertAsTokens([], u'')
413- self.assertAsTokens([], u' ')
414-
415- def test_quoted_empty(self):
416- self.assertAsTokens([(True, '')], u'""')
417- self.assertAsTokens([(False, u"''")], u"''")
418-
419- def test_unicode_chars(self):
420- self.assertAsTokens([(False, u'f\xb5\xee'), (False, u'\u1234\u3456')],
421- u'f\xb5\xee \u1234\u3456')
422-
423- def test_newline_in_quoted_section(self):
424- self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u'"foo\nbar\nbaz\n"')
425-
426- def test_escape_chars(self):
427- self.assertAsTokens([(False, u'foo\\bar')], u'foo\\bar')
428-
429- def test_escape_quote(self):
430- self.assertAsTokens([(True, u'foo"bar')], u'"foo\\"bar"')
431-
432- def test_double_escape(self):
433- self.assertAsTokens([(True, u'foo\\bar')], u'"foo\\\\bar"')
434- self.assertAsTokens([(False, u'foo\\\\bar')], u"foo\\\\bar")
435-
436
437 class Test_CommandLineToArgv(tests.TestCaseInTempDir):
438
439- def assertCommandLine(self, expected, line):
440+ def assertCommandLine(self, expected, line, single_quotes_allowed=False):
441 # Strictly speaking we should respect parameter order versus glob
442 # expansions, but it's not really worth the effort here
443- self.assertEqual(expected,
444- sorted(win32utils._command_line_to_argv(line)))
445+ argv = win32utils._command_line_to_argv(line,
446+ single_quotes_allowed=single_quotes_allowed)
447+ self.assertEqual(expected, sorted(argv))
448
449 def test_glob_paths(self):
450 self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h'])
451@@ -367,19 +312,25 @@
452 self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h'])
453 self.assertCommandLine([u'a/*.c'], '"a/*.c"')
454 self.assertCommandLine([u"'a/*.c'"], "'a/*.c'")
455+ self.assertCommandLine([u'a/*.c'], "'a/*.c'",
456+ single_quotes_allowed=True)
457
458 def test_slashes_changed(self):
459 # Quoting doesn't change the supplied args
460 self.assertCommandLine([u'a\\*.c'], '"a\\*.c"')
461+ self.assertCommandLine([u'a\\*.c'], "'a\\*.c'",
462+ single_quotes_allowed=True)
463 # Expands the glob, but nothing matches, swaps slashes
464 self.assertCommandLine([u'a/*.c'], 'a\\*.c')
465 self.assertCommandLine([u'a/?.c'], 'a\\?.c')
466 # No glob, doesn't touch slashes
467 self.assertCommandLine([u'a\\foo.c'], 'a\\foo.c')
468
469- def test_no_single_quote_supported(self):
470+ def test_single_quote_support(self):
471 self.assertCommandLine(["add", "let's-do-it.txt"],
472 "add let's-do-it.txt")
473+ self.assertCommandLine(["add", "lets do it.txt"],
474+ "add 'lets do it.txt'", single_quotes_allowed=True)
475
476 def test_case_insensitive_globs(self):
477 self.requireFeature(tests.CaseInsCasePresFilenameFeature)
478
479=== modified file 'bzrlib/win32utils.py'
480--- bzrlib/win32utils.py 2010-02-04 16:06:36 +0000
481+++ bzrlib/win32utils.py 2010-02-12 05:42:16 +0000
482@@ -25,6 +25,7 @@
483 import struct
484 import sys
485
486+from bzrlib import cmdline
487
488 # Windows version
489 if sys.platform == 'win32':
490@@ -522,112 +523,21 @@
491 trace.mutter('Unable to set hidden attribute on %r: %s', path, e)
492
493
494-
495-class UnicodeShlex(object):
496- """This is a very simplified version of shlex.shlex.
497-
498- The main change is that it supports non-ascii input streams. The internal
499- structure is quite simplified relative to shlex.shlex, since we aren't
500- trying to handle multiple input streams, etc. In fact, we don't use a
501- file-like api either.
502- """
503-
504- def __init__(self, uni_string):
505- self._input = uni_string
506- self._input_iter = iter(self._input)
507- self._whitespace_match = re.compile(u'\s').match
508- self._word_match = re.compile(u'\S').match
509- self._quote_chars = u'"'
510- # self._quote_match = re.compile(u'[\'"]').match
511- self._escape_match = lambda x: None # Never matches
512- self._escape = '\\'
513- # State can be
514- # ' ' - after whitespace, starting a new token
515- # 'a' - after text, currently working on a token
516- # '"' - after ", currently in a "-delimited quoted section
517- # "\" - after '\', checking the next char
518- self._state = ' '
519- self._token = [] # Current token being parsed
520-
521- def _get_token(self):
522- # Were there quote chars as part of this token?
523- quoted = False
524- quoted_state = None
525- for nextchar in self._input_iter:
526- if self._state == ' ':
527- if self._whitespace_match(nextchar):
528- # if self._token: return token
529- continue
530- elif nextchar in self._quote_chars:
531- self._state = nextchar # quoted state
532- elif self._word_match(nextchar):
533- self._token.append(nextchar)
534- self._state = 'a'
535- else:
536- raise AssertionError('wtttf?')
537- elif self._state in self._quote_chars:
538- quoted = True
539- if nextchar == self._state: # End of quote
540- self._state = 'a' # posix allows 'foo'bar to translate to
541- # foobar
542- elif self._state == '"' and nextchar == self._escape:
543- quoted_state = self._state
544- self._state = nextchar
545- else:
546- self._token.append(nextchar)
547- elif self._state == self._escape:
548- if nextchar == '\\':
549- self._token.append('\\')
550- elif nextchar == '"':
551- self._token.append(nextchar)
552- else:
553- self._token.append('\\' + nextchar)
554- self._state = quoted_state
555- elif self._state == 'a':
556- if self._whitespace_match(nextchar):
557- if self._token:
558- break # emit this token
559- else:
560- continue # no token to emit
561- elif nextchar in self._quote_chars:
562- # Start a new quoted section
563- self._state = nextchar
564- # escape?
565- elif (self._word_match(nextchar)
566- or nextchar in self._quote_chars
567- # or whitespace_split?
568- ):
569- self._token.append(nextchar)
570- else:
571- raise AssertionError('state == "a", char: %r'
572- % (nextchar,))
573- else:
574- raise AssertionError('unknown state: %r' % (self._state,))
575- result = ''.join(self._token)
576- self._token = []
577- if not quoted and result == '':
578- result = None
579- return quoted, result
580-
581- def __iter__(self):
582- return self
583-
584- def next(self):
585- quoted, token = self._get_token()
586- if token is None:
587- raise StopIteration
588- return quoted, token
589-
590-
591-def _command_line_to_argv(command_line):
592- """Convert a Unicode command line into a set of argv arguments.
593-
594- This does wildcard expansion, etc. It is intended to make wildcards act
595- closer to how they work in posix shells, versus how they work by default on
596- Windows.
597- """
598- s = UnicodeShlex(command_line)
599- # Now that we've split the content, expand globs
600+def _command_line_to_argv(command_line, single_quotes_allowed=False):
601+ """Convert a Unicode command line into a list of argv arguments.
602+
603+ It performs wildcard expansion to make wildcards act closer to how they
604+ work in posix shells, versus how they work by default on Windows. Quoted
605+ arguments are left untouched.
606+
607+ :param command_line: The unicode string to split into an arg list.
608+ :param single_quotes_allowed: Whether single quotes are accepted as quoting
609+ characters like double quotes. False by
610+ default.
611+ :return: A list of unicode strings.
612+ """
613+ s = cmdline.Splitter(command_line, single_quotes_allowed=single_quotes_allowed)
614+ # Now that we've split the content, expand globs if necessary
615 # TODO: Use 'globbing' instead of 'glob.glob', this gives us stuff like
616 # '**/' style globs
617 args = []
618@@ -641,14 +551,12 @@
619
620 if has_ctypes and winver != 'Windows 98':
621 def get_unicode_argv():
622- LPCWSTR = ctypes.c_wchar_p
623- INT = ctypes.c_int
624- POINTER = ctypes.POINTER
625- prototype = ctypes.WINFUNCTYPE(LPCWSTR)
626- GetCommandLine = prototype(("GetCommandLineW",
627- ctypes.windll.kernel32))
628- prototype = ctypes.WINFUNCTYPE(POINTER(LPCWSTR), LPCWSTR, POINTER(INT))
629- command_line = GetCommandLine()
630+ prototype = ctypes.WINFUNCTYPE(ctypes.c_wchar_p)
631+ GetCommandLineW = prototype(("GetCommandLineW",
632+ ctypes.windll.kernel32))
633+ command_line = GetCommandLineW()
634+ if command_line is None:
635+ raise ctypes.WinError()
636 # Skip the first argument, since we only care about parameters
637 argv = _command_line_to_argv(command_line)[1:]
638 if getattr(sys, 'frozen', None) is None: