Merge lp:~paul-lucas/zorba/pjl-misc into lp:zorba
- pjl-misc
- Merge into trunk
Status: | Merged |
---|---|
Approved by: | Paul J. Lucas |
Approved revision: | 11470 |
Merged at revision: | 11705 |
Proposed branch: | lp:~paul-lucas/zorba/pjl-misc |
Merge into: | lp:zorba |
Diff against target: |
532 lines (+155/-66) 8 files modified
src/runtime/csv/csv_impl.cpp (+15/-14) src/runtime/csv/pregenerated/csv.h (+1/-0) src/runtime/spec/csv/csv.xml (+1/-0) src/store/naive/simple_item_factory.cpp (+13/-12) src/util/csv_parser.cpp (+9/-2) src/util/json_parser.cpp (+37/-33) src/util/json_parser.h (+4/-4) src/util/string_util.h (+75/-1) |
To merge this branch: | bzr merge lp:~paul-lucas/zorba/pjl-misc |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Matthias Brantner | Approve | ||
Paul J. Lucas | Approve | ||
Review via email: mp+209760@code.launchpad.net |
Commit message
1. In a couple of cases, now reusing the same zstring so the cost of growing the string is not paid per value.
2. Added a string_appender class that "chunks" the appending of single characters onto the end of strings to reduce the number of times zstring::append() is called.
3. Improved creating of JSON objects by using iterators rather than operator[].
Description of the change
1. In a couple of cases, now reusing the same zstring so the cost of growing the string is not paid per value.
2. Added a string_appender class that "chunks" the appending of single characters onto the end of strings to reduce the number of times zstring::append() is called.
3. Improved creating of JSON objects by using iterators rather than operator[].
Paul J. Lucas (paul-lucas) wrote : | # |
Paul J. Lucas (paul-lucas) : | # |
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue starting for the following merge proposals:
https:/
Progress dashboard at http://
Zorba Build Bot (zorba-buildbot) wrote : | # |
Voting criteria failed for the following merge proposals:
https:/
Votes: {'Approve': 1}
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue result for https:/
Stage "CommitZorba" failed.
Check console output at http://
Matthias Brantner (matthias-brantner) : | # |
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue starting for the following merge proposals:
https:/
Progress dashboard at http://
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue succeeded - proposal merged!
Preview Diff
1 | === modified file 'src/runtime/csv/csv_impl.cpp' |
2 | --- src/runtime/csv/csv_impl.cpp 2014-03-01 01:57:00 +0000 |
3 | +++ src/runtime/csv/csv_impl.cpp 2014-03-06 19:08:12 +0000 |
4 | @@ -365,7 +365,7 @@ |
5 | store::Item_t item; |
6 | vector<store::Item_t> keys_copy, values; |
7 | set<unsigned> keys_omit; |
8 | - zstring value; |
9 | + zstring *value; |
10 | bool eol, quoted, swap_keys = false; |
11 | |
12 | CsvParseIteratorState *state; |
13 | @@ -381,7 +381,8 @@ |
14 | set_options( item, state ); |
15 | } |
16 | |
17 | - while ( state->csv_.next_value( &value, &eol, "ed ) ) { |
18 | + while ( state->csv_.next_value( &state->value_, &eol, "ed ) ) { |
19 | + value = &state->value_; |
20 | if ( state->keys_.size() && values.size() == state->keys_.size() && |
21 | state->extra_name_.empty() ) { |
22 | // |
23 | @@ -390,13 +391,13 @@ |
24 | // |
25 | throw XQUERY_EXCEPTION( |
26 | csv::EXTRA_VALUE, |
27 | - ERROR_PARAMS( value, state->line_no_ ), |
28 | + ERROR_PARAMS( *value, state->line_no_ ), |
29 | ERROR_LOC( loc ) |
30 | ); |
31 | } |
32 | |
33 | item = nullptr; |
34 | - if ( value.empty() ) { |
35 | + if ( value->empty() ) { |
36 | if ( state->keys_.empty() ) { |
37 | // |
38 | // Header field names can never be empty. |
39 | @@ -408,7 +409,7 @@ |
40 | ); |
41 | } |
42 | if ( quoted ) |
43 | - GENV_ITEMFACTORY->createString( item, value ); |
44 | + GENV_ITEMFACTORY->createString( item, *value ); |
45 | else |
46 | switch ( state->missing_ ) { |
47 | case missing::error: |
48 | @@ -421,15 +422,15 @@ |
49 | break; |
50 | } |
51 | } else if ( state->cast_unquoted_ && !quoted && !state->keys_.empty() ) { |
52 | - if ( value == "T" || value == "Y" ) |
53 | + if ( *value == "T" || *value == "Y" ) |
54 | GENV_ITEMFACTORY->createBoolean( item, true ); |
55 | - else if ( value == "F" || value == "N" ) |
56 | + else if ( *value == "F" || *value == "N" ) |
57 | GENV_ITEMFACTORY->createBoolean( item, false ); |
58 | else { |
59 | json::token t; |
60 | - switch ( parse_json( value, &t ) ) { |
61 | + switch ( parse_json( *value, &t ) ) { |
62 | case json::boolean: |
63 | - GENV_ITEMFACTORY->createBoolean( item, value[0] == 't' ); |
64 | + GENV_ITEMFACTORY->createBoolean( item, (*value)[0] == 't' ); |
65 | break; |
66 | case json::null: |
67 | GENV_ITEMFACTORY->createJSONNull( item ); |
68 | @@ -437,24 +438,24 @@ |
69 | case json::number: |
70 | switch ( t.get_numeric_type() ) { |
71 | case json::token::integer: |
72 | - GENV_ITEMFACTORY->createInteger( item, xs_integer( value ) ); |
73 | + GENV_ITEMFACTORY->createInteger( item, xs_integer( *value ) ); |
74 | break; |
75 | case json::token::decimal: |
76 | - GENV_ITEMFACTORY->createDecimal( item, xs_decimal( value ) ); |
77 | + GENV_ITEMFACTORY->createDecimal( item, xs_decimal( *value ) ); |
78 | break; |
79 | case json::token::floating_point: |
80 | - GENV_ITEMFACTORY->createDouble( item, xs_double( value ) ); |
81 | + GENV_ITEMFACTORY->createDouble( item, xs_double( *value ) ); |
82 | break; |
83 | default: |
84 | ZORBA_ASSERT( false ); |
85 | } |
86 | break; |
87 | default: |
88 | - GENV_ITEMFACTORY->createString( item, value ); |
89 | + GENV_ITEMFACTORY->createString( item, *value ); |
90 | } // switch |
91 | } // else |
92 | } else { |
93 | - GENV_ITEMFACTORY->createString( item, value ); |
94 | + GENV_ITEMFACTORY->createString( item, *value ); |
95 | } |
96 | |
97 | if ( !item.isNull() ) |
98 | |
99 | === modified file 'src/runtime/csv/pregenerated/csv.h' |
100 | --- src/runtime/csv/pregenerated/csv.h 2014-01-31 21:47:54 +0000 |
101 | +++ src/runtime/csv/pregenerated/csv.h 2014-03-06 19:08:12 +0000 |
102 | @@ -57,6 +57,7 @@ |
103 | missing::type missing_; // |
104 | bool skip_called_; // |
105 | zstring string_; // |
106 | + zstring value_; // |
107 | |
108 | CsvParseIteratorState(); |
109 | |
110 | |
111 | === modified file 'src/runtime/spec/csv/csv.xml' |
112 | --- src/runtime/spec/csv/csv.xml 2014-02-28 01:46:12 +0000 |
113 | +++ src/runtime/spec/csv/csv.xml 2014-03-06 19:08:12 +0000 |
114 | @@ -39,6 +39,7 @@ |
115 | <zorba:member type="missing::type" name="missing_" defaultValue="missing::null"/> |
116 | <zorba:member type="bool" name="skip_called_" defaultValue="false"/> |
117 | <zorba:member type="zstring" name="string_"/> |
118 | + <zorba:member type="zstring" name="value_"/> |
119 | </zorba:state> |
120 | <zorba:method name="count" const="true" return="bool"> |
121 | <zorba:param name="result" type="store::Item_t&"/> |
122 | |
123 | === modified file 'src/store/naive/simple_item_factory.cpp' |
124 | --- src/store/naive/simple_item_factory.cpp 2013-09-17 21:12:49 +0000 |
125 | +++ src/store/naive/simple_item_factory.cpp 2014-03-06 19:08:12 +0000 |
126 | @@ -2366,19 +2366,20 @@ |
127 | const std::vector<store::Item_t>& names, |
128 | const std::vector<store::Item_t>& values) |
129 | { |
130 | + assert( names.size() == values.size() ); |
131 | + |
132 | result = new json::SimpleJSONObject(); |
133 | - |
134 | - json::JSONObject* obj = static_cast<json::JSONObject*>(result.getp()); |
135 | - |
136 | - assert(names.size() == values.size()); |
137 | - |
138 | - csize numPairs = names.size(); |
139 | - for (csize i = 0; i < numPairs; ++i) |
140 | - { |
141 | - if (!obj->add(names[i], values[i], false)) |
142 | - { |
143 | - RAISE_ERROR_NO_LOC(jerr::JNDY0003, ERROR_PARAMS(names[i]->getStringValue())); |
144 | - } |
145 | + json::JSONObject *const obj = static_cast<json::JSONObject*>( result.getp() ); |
146 | + |
147 | + std::vector<store::Item_t>::const_iterator n_i( names.begin() ); |
148 | + std::vector<store::Item_t>::const_iterator v_i( values.begin() ); |
149 | + std::vector<store::Item_t>::const_iterator const n_end( names.end() ); |
150 | + |
151 | + for ( ; n_i != n_end; ++n_i, ++v_i ) { |
152 | + if ( !obj->add( *n_i, *v_i, false ) ) |
153 | + throw XQUERY_EXCEPTION( |
154 | + jerr::JNDY0003, ERROR_PARAMS( (*n_i)->getStringValue() ) |
155 | + ); |
156 | } |
157 | |
158 | return true; |
159 | |
160 | === modified file 'src/util/csv_parser.cpp' |
161 | --- src/util/csv_parser.cpp 2013-09-23 15:02:13 +0000 |
162 | +++ src/util/csv_parser.cpp 2014-03-06 19:08:12 +0000 |
163 | @@ -14,6 +14,10 @@ |
164 | * limitations under the License. |
165 | */ |
166 | |
167 | +// Zorba |
168 | +#include "util/string_util.h" |
169 | + |
170 | +// local |
171 | #include "csv_parser.h" |
172 | |
173 | namespace zorba { |
174 | @@ -21,11 +25,13 @@ |
175 | /////////////////////////////////////////////////////////////////////////////// |
176 | |
177 | bool csv_parser::next_value( zstring *value, bool *eol, bool *quoted ) const { |
178 | - value->clear(); |
179 | + ztd::string_appender<zstring,128> appender( value ); |
180 | char c; |
181 | bool in_quote = false; |
182 | bool is_quoted = false; |
183 | |
184 | + value->clear(); |
185 | + |
186 | while ( is_->get( c ) ) { |
187 | if ( in_quote ) { |
188 | if ( quote_esc_ == quote_ ) { // "" |
189 | @@ -63,9 +69,10 @@ |
190 | goto return_true; |
191 | } // switch |
192 | } // else |
193 | - *value += c; |
194 | + appender += c; |
195 | } // while |
196 | |
197 | + appender.flush(); |
198 | if ( value->empty() ) |
199 | return false; |
200 | |
201 | |
202 | === modified file 'src/util/json_parser.cpp' |
203 | --- src/util/json_parser.cpp 2013-08-29 01:20:43 +0000 |
204 | +++ src/util/json_parser.cpp 2014-03-06 19:08:12 +0000 |
205 | @@ -253,8 +253,9 @@ |
206 | // |
207 | location::line_type const quote_line = cur_loc_.line(); |
208 | location::column_type const quote_col = cur_loc_.column(); |
209 | - if ( !parse_string( &t->value_, throw_exceptions ) ) |
210 | + if ( !parse_string( throw_exceptions ) ) |
211 | return false; |
212 | + t->value_ = value_; |
213 | t->type_ = token::string; |
214 | t->loc_.set( |
215 | cur_loc_.file(), quote_line, quote_col, prev_line_, prev_col_ |
216 | @@ -273,8 +274,9 @@ |
217 | case '8': |
218 | case '9': { |
219 | token::numeric_type nt; |
220 | - if ( !(nt = parse_number( c, &t->value_, throw_exceptions )) ) |
221 | + if ( !(nt = parse_number( c, throw_exceptions )) ) |
222 | return false; |
223 | + t->value_ = value_; |
224 | t->numeric_type_ = nt; |
225 | t->type_ = token::number; |
226 | set_loc_range( &t->loc_ ); |
227 | @@ -284,8 +286,9 @@ |
228 | case 'n': // null |
229 | case 't': { // true |
230 | token::type tt; |
231 | - if ( !(tt = parse_literal( c, &t->value_, throw_exceptions )) ) |
232 | + if ( !(tt = parse_literal( c, throw_exceptions )) ) |
233 | return false; |
234 | + t->value_ = value_; |
235 | t->type_ = tt; |
236 | set_loc_range( &t->loc_ ); |
237 | return true; |
238 | @@ -372,22 +375,21 @@ |
239 | return false; |
240 | } |
241 | |
242 | -token::type lexer::parse_literal( char first_c, token::value_type *value, |
243 | - bool throw_exceptions ) { |
244 | +token::type lexer::parse_literal( char first_c, bool throw_exceptions ) { |
245 | static token::value_type const false_value( "false" ); |
246 | static token::value_type const null_value ( "null" ); |
247 | static token::value_type const true_value ( "true" ); |
248 | |
249 | token::type tt = token::none; |
250 | switch ( first_c ) { |
251 | - case 'f': *value = false_value; tt = token::json_false; break; |
252 | - case 'n': *value = null_value ; tt = token::json_null ; break; |
253 | - case 't': *value = true_value ; tt = token::json_true ; break; |
254 | + case 'f': value_ = false_value; tt = token::json_false; break; |
255 | + case 'n': value_ = null_value ; tt = token::json_null ; break; |
256 | + case 't': value_ = true_value ; tt = token::json_true ; break; |
257 | default : assert( false ); |
258 | } |
259 | |
260 | char c; |
261 | - for ( char const *s = value->c_str(); *++s; ) { |
262 | + for ( char const *s = value_.c_str(); *++s; ) { |
263 | if ( !get_char( &c ) ) |
264 | goto error_set_cur_loc_end_false; |
265 | if ( c != *s ) |
266 | @@ -409,17 +411,16 @@ |
267 | return token::none; |
268 | } |
269 | |
270 | -token::numeric_type lexer::parse_number( char first_c, |
271 | - token::value_type *value, |
272 | - bool throw_exceptions ) { |
273 | +token::numeric_type lexer::parse_number( char first_c, bool throw_exceptions ) { |
274 | token::numeric_type numeric_type; |
275 | + ztd::string_appender<token::value_type,64> value( &value_ ); |
276 | |
277 | - value->clear(); |
278 | + value_.clear(); |
279 | |
280 | // <number> ::= [-] <int> [<frac>] [<exp>] |
281 | char c = first_c; |
282 | if ( c == '-' ) { |
283 | - *value += c; |
284 | + value += c; |
285 | if ( !get_char( &c ) ) |
286 | goto error_set_cur_loc_end_false; |
287 | } |
288 | @@ -427,7 +428,7 @@ |
289 | // <int> := '0' | <1-9> <digit>* |
290 | if ( !ascii::is_digit( c ) ) |
291 | goto error_set_cur_loc_end; |
292 | - *value += c; |
293 | + value += c; |
294 | numeric_type = token::integer; |
295 | if ( c == '0' ) { |
296 | if ( !peek_char( &c ) ) |
297 | @@ -443,19 +444,19 @@ |
298 | if ( !ascii::is_digit( c ) ) |
299 | break; |
300 | get_char( &c ); |
301 | - *value += c; |
302 | + value += c; |
303 | } |
304 | } |
305 | |
306 | // <frac> ::= '.' <digit>+ |
307 | if ( c == '.' ) { |
308 | get_char( &c ); |
309 | - *value += c; |
310 | + value += c; |
311 | if ( !get_char( &c ) ) |
312 | goto error_set_cur_loc_end_false; |
313 | if ( !ascii::is_digit( c ) ) |
314 | goto error_set_cur_loc_end; |
315 | - *value += c; |
316 | + value += c; |
317 | numeric_type = token::decimal; |
318 | while ( true ) { |
319 | if ( !peek_char( &c ) ) |
320 | @@ -465,7 +466,7 @@ |
321 | if ( !ascii::is_digit( c ) ) |
322 | break; |
323 | get_char( &c ); |
324 | - *value += c; |
325 | + value += c; |
326 | } |
327 | } |
328 | |
329 | @@ -474,17 +475,17 @@ |
330 | // <sign> ::= '-' | '+' |
331 | if ( c == 'e' || c == 'E' ) { |
332 | get_char( &c ); |
333 | - *value += c; |
334 | + value += c; |
335 | if ( !get_char( &c ) ) |
336 | goto error_set_cur_loc_end_false; |
337 | if ( c == '+' || c == '-' ) { |
338 | - *value += c; |
339 | + value += c; |
340 | if ( !get_char( &c ) ) |
341 | goto error_set_cur_loc_end_false; |
342 | } |
343 | if ( !ascii::is_digit( c ) ) |
344 | goto error_set_cur_loc_end; |
345 | - *value += c; |
346 | + value += c; |
347 | numeric_type = token::floating_point; |
348 | while ( true ) { |
349 | if ( !peek_char( &c ) ) |
350 | @@ -494,7 +495,7 @@ |
351 | if ( !ascii::is_digit( c ) ) |
352 | break; |
353 | get_char( &c ); |
354 | - *value += c; |
355 | + value += c; |
356 | } |
357 | } |
358 | |
359 | @@ -512,10 +513,12 @@ |
360 | return token::non_numeric; |
361 | } |
362 | |
363 | -bool lexer::parse_string( token::value_type *value, bool throw_exceptions ) { |
364 | - value->clear(); |
365 | +bool lexer::parse_string( bool throw_exceptions ) { |
366 | bool got_backslash = false; |
367 | location start_loc( cur_loc_ ); |
368 | + ztd::string_appender<token::value_type,1024> value( &value_ ); |
369 | + |
370 | + value_.clear(); |
371 | |
372 | while ( true ) { |
373 | // |
374 | @@ -537,28 +540,29 @@ |
375 | case '"': |
376 | case '/': |
377 | case '\\': |
378 | - *value += c; |
379 | + value += c; |
380 | break; |
381 | case 'b': |
382 | - *value += '\b'; |
383 | + value += '\b'; |
384 | break; |
385 | case 'f': |
386 | - *value += '\f'; |
387 | + value += '\f'; |
388 | break; |
389 | case 'n': |
390 | - *value += '\n'; |
391 | + value += '\n'; |
392 | break; |
393 | case 'r': |
394 | - *value += '\r'; |
395 | + value += '\r'; |
396 | break; |
397 | case 't': |
398 | - *value += '\t'; |
399 | + value += '\t'; |
400 | break; |
401 | case 'u': { |
402 | unicode::code_point cp; |
403 | if ( !parse_codepoint( &cp, throw_exceptions ) ) |
404 | return false; |
405 | - utf8::encode( cp, value ); |
406 | + value.flush(); |
407 | + utf8::encode( cp, &value_ ); |
408 | break; |
409 | } |
410 | default: |
411 | @@ -576,7 +580,7 @@ |
412 | case '"': |
413 | return true; |
414 | default: |
415 | - *value += c; |
416 | + value += c; |
417 | } |
418 | } // while |
419 | } |
420 | |
421 | === modified file 'src/util/json_parser.h' |
422 | --- src/util/json_parser.h 2013-08-29 01:20:43 +0000 |
423 | +++ src/util/json_parser.h 2014-03-06 19:08:12 +0000 |
424 | @@ -504,10 +504,9 @@ |
425 | bool get_char( char* ); |
426 | bool peek_char( char* ); |
427 | bool parse_codepoint( unicode::code_point *cp, bool throw_exceptions ); |
428 | - token::type parse_literal( char, token::value_type*, bool throw_exceptions ); |
429 | - token::numeric_type parse_number( char, token::value_type*, |
430 | - bool throw_exceptions ); |
431 | - bool parse_string( token::value_type*, bool throw_exceptions ); |
432 | + token::type parse_literal( char, bool throw_exceptions ); |
433 | + token::numeric_type parse_number( char, bool throw_exceptions ); |
434 | + bool parse_string( bool throw_exceptions ); |
435 | void set_cur_loc(); |
436 | location& set_cur_loc_end( bool prev = true ); |
437 | void set_loc_range( location* ); |
438 | @@ -518,6 +517,7 @@ |
439 | column_type col_, prev_col_; |
440 | location cur_loc_; |
441 | bool throw_exceptions_; |
442 | + token::value_type value_; |
443 | }; |
444 | |
445 | /////////////////////////////////////////////////////////////////////////////// |
446 | |
447 | === modified file 'src/util/string_util.h' |
448 | --- src/util/string_util.h 2013-12-04 14:28:07 +0000 |
449 | +++ src/util/string_util.h 2014-03-06 19:08:12 +0000 |
450 | @@ -50,7 +50,81 @@ |
451 | |
452 | using internal::ztd::c_str; |
453 | |
454 | -////////// String building ///////////////////////////////////////////////////// |
455 | +////////// String appending /////////////////////////////////////////////////// |
456 | + |
457 | +/** |
458 | + * A %string_appender is used to optimize repeatedly appending characters to a |
459 | + * string in a loop by gathering \em N characters and appending them in chunks |
460 | + * so as to call the string's \c append() function less. |
461 | + */ |
462 | +template<class StringType,int BufCapacity> |
463 | +class string_appender { |
464 | +public: |
465 | + typedef StringType value_type; |
466 | + typedef typename value_type::value_type char_type; |
467 | + typedef typename value_type::size_type size_type; |
468 | + |
469 | + /** |
470 | + * Constructs an appender. |
471 | + * |
472 | + * @param s A pointer to the string to append to. |
473 | + */ |
474 | + string_appender( value_type *s ) : s_( s ), buf_size_( 0 ) { } |
475 | + |
476 | + /** |
477 | + * Destroys the appender and appends any unappended characters to the string. |
478 | + */ |
479 | + ~string_appender() { |
480 | + flush(); |
481 | + } |
482 | + |
483 | + /** |
484 | + * Appends a character. |
485 | + * |
486 | + * @param c The character to append. |
487 | + * @return Returns \c *this. |
488 | + */ |
489 | + string_appender& append( char_type c ) { |
490 | + buf_[ buf_size_++ ] = c; |
491 | + if ( buf_size_ == BufCapacity ) |
492 | + flush(); |
493 | + return *this; |
494 | + } |
495 | + |
496 | + /** |
497 | + * Appends any unappended characters to the string. |
498 | + */ |
499 | + void flush() { |
500 | + s_->append( buf_, buf_size_ ); |
501 | + buf_size_ = 0; |
502 | + } |
503 | + |
504 | + /** |
505 | + * Gets the string that is being appended to. |
506 | + * |
507 | + * @return Returns said string. |
508 | + */ |
509 | + value_type& str() const { |
510 | + return *s_; |
511 | + } |
512 | + |
513 | + /** |
514 | + * Appends a character. |
515 | + * |
516 | + * @param c The character to append. |
517 | + * @return Returns \c *this. |
518 | + */ |
519 | + string_appender& operator+=( char_type c ) { |
520 | + return append( c ); |
521 | + } |
522 | + |
523 | +private: |
524 | + char_type buf_[ BufCapacity ]; |
525 | + size_type buf_size_; |
526 | + value_type *s_; |
527 | +}; |
528 | + |
529 | +////////// String building //////////////////////////////////////////////////// |
530 | |
531 | /** |
532 | * A %string_builder is used to build (concatenate) strings on-the-fly and pass |
1. In a couple of cases, now reusing the same zstring so the cost of growing the string is not paid per value.
2. Added a string_appender class that "chunks" the appending of single characters onto the end of strings to reduce the number of times zstring::append() is called.
3. Improved creating of JSON objects by using iterators rather than operator[].