Merge lp:~linaro-graphics-wg/libmatrix/split-refactor into lp:~jesse-barker/libmatrix/trunk

Proposed by Alexandros Frantzis
Status: Merged
Merged at revision: 39
Proposed branch: lp:~linaro-graphics-wg/libmatrix/split-refactor
Merge into: lp:~jesse-barker/libmatrix/trunk
Diff against target: 523 lines (+396/-26)
7 files modified
Makefile (+2/-0)
shader-source.cc (+1/-1)
test/libmatrix_test.cc (+4/-0)
test/util_split_test.cc (+180/-0)
test/util_split_test.h (+31/-0)
util.cc (+160/-19)
util.h (+18/-6)
To merge this branch: bzr merge lp:~linaro-graphics-wg/libmatrix/split-refactor
Reviewer Review Type Date Requested Status
Jesse Barker Approve
Review via email: mp+118059@code.launchpad.net

Description of the change

Refactor Util::split() interface and and support for splitting quoted strings using bash-like rules.

Note that I didn't add tests for SplitModeFuzzy as I haven't understood it's purpose and mechanism completely.

To post a comment you must log in.
Revision history for this message
Jesse Barker (jesse-barker) wrote :

With tests and everything, fantastic! You left your name off the author credits on the new stuff. Was that intentional (placing the blame on me ;-)?

review: Approve
42. By Alexandros Frantzis

Correct contributors section for util split test files.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'Makefile'
2--- Makefile 2012-05-02 21:36:04 +0000
3+++ Makefile 2012-08-06 15:06:24 +0000
4@@ -9,6 +9,7 @@
5 $(TESTDIR)/inverse_test.cc \
6 $(TESTDIR)/transpose_test.cc \
7 $(TESTDIR)/shader_source_test.cc \
8+ $(TESTDIR)/util_split_test.cc \
9 $(TESTDIR)/libmatrix_test.cc
10 TESTOBJS = $(TESTSRCS:.cc=.o)
11
12@@ -32,6 +33,7 @@
13 $(TESTDIR)/inverse_test.o: $(TESTDIR)/inverse_test.cc $(TESTDIR)/inverse_test.h $(TESTDIR)/libmatrix_test.h mat.h
14 $(TESTDIR)/transpose_test.o: $(TESTDIR)/transpose_test.cc $(TESTDIR)/transpose_test.h $(TESTDIR)/libmatrix_test.h mat.h
15 $(TESTDIR)/shader_source_test.o: $(TESTDIR)/shader_source_test.cc $(TESTDIR)/shader_source_test.h $(TESTDIR)/libmatrix_test.h shader-source.h
16+$(TESTDIR)/util_split_test.o: $(TESTDIR)/util_split_test.cc $(TESTDIR)/util_split_test.h $(TESTDIR)/libmatrix_test.h util.h
17 $(TESTDIR)/libmatrix_test: $(TESTOBJS) libmatrix.a
18 $(CXX) -o $@ $^
19 run_tests: $(LIBMATRIX_TESTS)
20
21=== modified file 'shader-source.cc'
22--- shader-source.cc 2012-01-26 16:12:35 +0000
23+++ shader-source.cc 2012-08-06 15:06:24 +0000
24@@ -589,7 +589,7 @@
25 {
26 std::vector<std::string> elems;
27
28- Util::split(precision_values, ',', elems);
29+ Util::split(precision_values, ',', elems, Util::SplitModeNormal);
30
31 for (size_t i = 0; i < elems.size() && i < 4; i++) {
32 const std::string& pstr(elems[i]);
33
34=== modified file 'test/libmatrix_test.cc'
35--- test/libmatrix_test.cc 2012-01-23 19:18:34 +0000
36+++ test/libmatrix_test.cc 2012-08-06 15:06:24 +0000
37@@ -8,6 +8,7 @@
38 //
39 // Contributors:
40 // Jesse Barker - original implementation.
41+// Alexandros Frantzis - Util::split tests
42 //
43 #include <iostream>
44 #include <string>
45@@ -17,6 +18,7 @@
46 #include "transpose_test.h"
47 #include "const_vec_test.h"
48 #include "shader_source_test.h"
49+#include "util_split_test.h"
50
51 using std::cerr;
52 using std::cout;
53@@ -42,6 +44,8 @@
54 testVec.push_back(new MatrixTest3x3Transpose());
55 testVec.push_back(new MatrixTest4x4Transpose());
56 testVec.push_back(new ShaderSourceBasic());
57+ testVec.push_back(new UtilSplitTestNormal());
58+ testVec.push_back(new UtilSplitTestQuoted());
59
60 for (vector<MatrixTest*>::iterator testIt = testVec.begin();
61 testIt != testVec.end();
62
63=== added file 'test/util_split_test.cc'
64--- test/util_split_test.cc 1970-01-01 00:00:00 +0000
65+++ test/util_split_test.cc 2012-08-06 15:06:24 +0000
66@@ -0,0 +1,180 @@
67+//
68+// Copyright (c) 2012 Linaro Limited
69+//
70+// All rights reserved. This program and the accompanying materials
71+// are made available under the terms of the MIT License which accompanies
72+// this distribution, and is available at
73+// http://www.opensource.org/licenses/mit-license.php
74+//
75+// Contributors:
76+// Alexandros Frantzis - original implementation.
77+//
78+#include <iostream>
79+#include <string>
80+#include <vector>
81+#include "libmatrix_test.h"
82+#include "util_split_test.h"
83+#include "../util.h"
84+
85+using std::cout;
86+using std::endl;
87+using std::string;
88+using std::vector;
89+
90+template <typename T> static bool
91+areVectorsEqual(vector<T>& vec1, vector<T>& vec2)
92+{
93+ if (vec1.size() != vec2.size())
94+ return false;
95+
96+ for (unsigned int i = 0; i < vec1.size(); i++)
97+ {
98+ if (vec1[i] != vec2[i])
99+ return false;
100+ }
101+
102+ return true;
103+}
104+
105+template <typename T> static void
106+printVector(vector<T>& vec)
107+{
108+ cout << "[";
109+ for (unsigned int i = 0; i < vec.size(); i++)
110+ {
111+ cout << '"' << vec[i] << '"';
112+ if (i < vec.size() - 1)
113+ cout << ", ";
114+ }
115+ cout << "]";
116+}
117+
118+void
119+UtilSplitTestNormal::run(const Options& options)
120+{
121+ const string test1("abc def ghi");
122+ const string test2(" abc: def :ghi ");
123+ vector<string> expected1;
124+ vector<string> expected2;
125+ vector<string> results;
126+
127+ expected1.push_back("abc");
128+ expected1.push_back("def");
129+ expected1.push_back("ghi");
130+
131+ expected2.push_back(" abc");
132+ expected2.push_back(" def ");
133+ expected2.push_back("ghi ");
134+
135+ if (options.beVerbose())
136+ {
137+ cout << "Testing string \"" << test1 << "\"" << endl;
138+ }
139+
140+ Util::split(test1, ' ', results, Util::SplitModeNormal);
141+
142+ if (options.beVerbose())
143+ {
144+ cout << "Split result: ";
145+ printVector(results);
146+ cout << endl << "Expected: ";
147+ printVector(expected1);
148+ cout << endl;
149+ }
150+
151+ if (!areVectorsEqual(results, expected1))
152+ {
153+ return;
154+ }
155+
156+ results.clear();
157+
158+ if (options.beVerbose())
159+ {
160+ cout << "Testing string \"" << test2 << "\"" << endl;
161+ }
162+
163+ Util::split(test2, ':', results, Util::SplitModeNormal);
164+
165+ if (options.beVerbose())
166+ {
167+ cout << "Split result: ";
168+ printVector(results);
169+ cout << endl << "Expected: ";
170+ printVector(expected2);
171+ cout << endl;
172+ }
173+
174+ if (!areVectorsEqual(results, expected2))
175+ {
176+ return;
177+ }
178+
179+ pass_ = true;
180+}
181+
182+void
183+UtilSplitTestQuoted::run(const Options& options)
184+{
185+ const string test1("abc \"def' ghi\" klm\\ nop -b qr:title='123 \"456'");
186+ const string test2("abc: def='1:2:3:'ghi : \":jk\"");
187+ vector<string> expected1;
188+ vector<string> expected2;
189+ vector<string> results;
190+
191+ expected1.push_back("abc");
192+ expected1.push_back("def' ghi");
193+ expected1.push_back("klm nop");
194+ expected1.push_back("-b");
195+ expected1.push_back("qr:title=123 \"456");
196+
197+ expected2.push_back("abc");
198+ expected2.push_back(" def=1:2:3:ghi ");
199+ expected2.push_back(" :jk");
200+
201+ if (options.beVerbose())
202+ {
203+ cout << "Testing string \"" << test1 << "\"" << endl;
204+ }
205+
206+ Util::split(test1, ' ', results, Util::SplitModeQuoted);
207+
208+ if (options.beVerbose())
209+ {
210+ cout << "Split result: ";
211+ printVector(results);
212+ cout << endl << "Expected: ";
213+ printVector(expected1);
214+ cout << endl;
215+ }
216+
217+ if (!areVectorsEqual(results, expected1))
218+ {
219+ return;
220+ }
221+
222+ results.clear();
223+
224+ if (options.beVerbose())
225+ {
226+ cout << "Testing string \"" << test2 << "\"" << endl;
227+ }
228+
229+ Util::split(test2, ':', results, Util::SplitModeQuoted);
230+
231+ if (options.beVerbose())
232+ {
233+ cout << "Split result: ";
234+ printVector(results);
235+ cout << endl << "Expected: ";
236+ printVector(expected2);
237+ cout << endl;
238+ }
239+
240+ if (!areVectorsEqual(results, expected2))
241+ {
242+ return;
243+ }
244+
245+ pass_ = true;
246+}
247
248=== added file 'test/util_split_test.h'
249--- test/util_split_test.h 1970-01-01 00:00:00 +0000
250+++ test/util_split_test.h 2012-08-06 15:06:24 +0000
251@@ -0,0 +1,31 @@
252+//
253+// Copyright (c) 2012 Linaro Limited
254+//
255+// All rights reserved. This program and the accompanying materials
256+// are made available under the terms of the MIT License which accompanies
257+// this distribution, and is available at
258+// http://www.opensource.org/licenses/mit-license.php
259+//
260+// Contributors:
261+// Alexandros Frantzis - original implementation.
262+//
263+#ifndef UTIL_SPLIT_TEST_H_
264+#define UTIL_SPLIT_TEST_H_
265+
266+class MatrixTest;
267+class Options;
268+
269+class UtilSplitTestNormal : public MatrixTest
270+{
271+public:
272+ UtilSplitTestNormal() : MatrixTest("Util::split::normal") {}
273+ virtual void run(const Options& options);
274+};
275+
276+class UtilSplitTestQuoted : public MatrixTest
277+{
278+public:
279+ UtilSplitTestQuoted() : MatrixTest("Util::split::quoted") {}
280+ virtual void run(const Options& options);
281+};
282+#endif // UTIL_SPLIT_TEST_H_
283
284=== modified file 'util.cc'
285--- util.cc 2012-05-02 21:36:04 +0000
286+++ util.cc 2012-08-06 15:06:24 +0000
287@@ -25,25 +25,102 @@
288 using std::string;
289 using std::vector;
290
291-void
292-Util::split(const string& src, char delim, vector<string>& elementVec, bool fuzzy)
293-{
294- // Trivial rejection
295- if (src.empty())
296- {
297- return;
298- }
299-
300- // Simple case: we want to enforce the value of 'delim' strictly
301- if (!fuzzy)
302- {
303- std::stringstream ss(src);
304- string item;
305- while(std::getline(ss, item, delim))
306- elementVec.push_back(item);
307- return;
308- }
309-
310+/*
311+ * State machine for bash-like quoted string escaping:
312+ *
313+ * \
314+ * -----------> +---------+
315+ * | ---------- | Escaped |
316+ * | | *,ESC +---------+
317+ * | |
318+ * | v '
319+ * +--------+ ---> +--------------+ -----
320+ * | Normal | <--- | SingleQuoted | | *, ESC
321+ * +--------+ ' +--------------+ <----
322+ * | ^
323+ * | |
324+ * | | " +--------------+ ----
325+ * | ---------- | DoubleQuoted | | *, ESC
326+ * -----------> +--------------+ <---
327+ * " | ^
328+ * \ | | *, ESC
329+ * v |
330+ * +---------------------+
331+ * | DoubleQuotedEscaped |
332+ * +---------------------+
333+ *
334+ * ESC: Mark character as Escaped
335+ */
336+static void
337+fill_escape_vector(const string &str, vector<bool> &esc_vec)
338+{
339+ enum State {
340+ StateNormal,
341+ StateEscaped,
342+ StateDoubleQuoted,
343+ StateDoubleQuotedEscaped,
344+ StateSingleQuoted
345+ };
346+
347+ State state = StateNormal;
348+
349+ for (string::const_iterator iter = str.begin();
350+ iter != str.end();
351+ iter++)
352+ {
353+ const char c(*iter);
354+ bool esc = false;
355+
356+ switch (state) {
357+ case StateNormal:
358+ if (c == '"')
359+ state = StateDoubleQuoted;
360+ else if (c == '\\')
361+ state = StateEscaped;
362+ else if (c == '\'')
363+ state = StateSingleQuoted;
364+ break;
365+ case StateEscaped:
366+ esc = true;
367+ state = StateNormal;
368+ break;
369+ case StateDoubleQuoted:
370+ if (c == '"')
371+ state = StateNormal;
372+ else if (c == '\\')
373+ state = StateDoubleQuotedEscaped;
374+ else
375+ esc = true;
376+ break;
377+ case StateDoubleQuotedEscaped:
378+ esc = true;
379+ state = StateDoubleQuoted;
380+ break;
381+ case StateSingleQuoted:
382+ if (c == '\'')
383+ state = StateNormal;
384+ else
385+ esc = true;
386+ default:
387+ break;
388+ }
389+
390+ esc_vec.push_back(esc);
391+ }
392+}
393+
394+static void
395+split_normal(const string& src, char delim, vector<string>& elementVec)
396+{
397+ std::stringstream ss(src);
398+ string item;
399+ while(std::getline(ss, item, delim))
400+ elementVec.push_back(item);
401+}
402+
403+static void
404+split_fuzzy(const string& src, char delim, vector<string>& elementVec)
405+{
406 // Fuzzy case: Initialize our delimiter string based upon the caller's plus
407 // a space to allow for more flexibility.
408 string delimiter(" ");
409@@ -76,6 +153,70 @@
410 elementVec.push_back(str);
411 }
412
413+static void
414+split_quoted(const string& src, char delim, vector<string>& elementVec)
415+{
416+ std::stringstream ss;
417+ vector<bool> escVec;
418+
419+ /* Mark characters in the string as escaped or not */
420+ fill_escape_vector(src, escVec);
421+
422+ /* Sanity check... */
423+ if (src.length() != escVec.size())
424+ return;
425+
426+ for (vector<bool>::const_iterator iter = escVec.begin();
427+ iter != escVec.end();
428+ iter++)
429+ {
430+ bool escaped = static_cast<bool>(*iter);
431+ char c = src[iter - escVec.begin()];
432+
433+ /* Output all characters, except unescaped ",\,' */
434+ if ((c != '"' && c != '\\' && c != '\'') || escaped) {
435+ /* If we reach an unescaped delimiter character, do a split */
436+ if (c == delim && !escaped) {
437+ elementVec.push_back(ss.str());
438+ ss.str("");
439+ ss.clear();
440+ }
441+ else {
442+ ss << c;
443+ }
444+ }
445+
446+ }
447+
448+ /* Handle final element, delimited by end of string */
449+ const string &finalElement(ss.str());
450+ if (!finalElement.empty())
451+ elementVec.push_back(finalElement);
452+}
453+
454+void
455+Util::split(const string& src, char delim, vector<string>& elementVec,
456+ Util::SplitMode mode)
457+{
458+ // Trivial rejection
459+ if (src.empty())
460+ {
461+ return;
462+ }
463+
464+ switch (mode)
465+ {
466+ case Util::SplitModeNormal:
467+ return split_normal(src, delim, elementVec);
468+ case Util::SplitModeFuzzy:
469+ return split_fuzzy(src, delim, elementVec);
470+ case Util::SplitModeQuoted:
471+ return split_quoted(src, delim, elementVec);
472+ default:
473+ break;
474+ }
475+}
476+
477 uint64_t
478 Util::get_timestamp_us()
479 {
480
481=== modified file 'util.h'
482--- util.h 2012-05-02 21:36:04 +0000
483+++ util.h 2012-08-06 15:06:24 +0000
484@@ -25,21 +25,33 @@
485
486 struct Util {
487 /**
488+ * How to perform the split() operation
489+ */
490+ enum SplitMode {
491+ /** Normal split operation */
492+ SplitModeNormal,
493+ /** Allow for spaces and multiple consecutive occurences of the delimiter */
494+ SplitModeFuzzy,
495+ /** Take into account bash-like quoting and escaping rules */
496+ SplitModeQuoted
497+ };
498+
499+ /**
500 * split() - Splits a string into elements using a provided delimiter
501 *
502 * @s: the string to split
503 * @delim: the delimiter to use
504 * @elems: the string vector to populate
505- * @fuzzy: (optional) enable/disable strict handling of @delim
506+ * @mode: the SplitMode to use
507 *
508 * Using @delim to determine field boundaries, splits @s into separate
509 * string elements. These elements are returned in the string vector
510- * @elems. If @fuzzy is true, then the handling of @delim allows for
511- * spaces and multiple consecutive occurences of @delim in determining
512- * field boundaries. As long as @s is non-empty, there will be at least
513- * one element in @elems.
514+ * @elems. As long as @s is non-empty, there will be at least one
515+ * element in @elems.
516 */
517- static void split(const std::string &s, char delim, std::vector<std::string> &elems, bool fuzzy = false);
518+ static void split(const std::string& src, char delim,
519+ std::vector<std::string>& elems,
520+ Util::SplitMode mode);
521 /**
522 * get_timestamp_us() - Returns the current time in microseconds
523 */

Subscribers

People subscribed via source and target branches