Merge lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba

Proposed by Paul J. Lucas on 2013-01-09
Status: Merged
Approved by: Paul J. Lucas on 2013-01-26
Approved revision: 11177
Merged at revision: 11201
Proposed branch: lp:~paul-lucas/zorba/feature-utf8_streambuf
Merge into: lp:zorba
Diff against target: 820 lines (+752/-0)
8 files modified
src/unit_tests/CMakeLists.txt (+1/-0)
src/unit_tests/test_utf8_streambuf.cpp (+166/-0)
src/unit_tests/unit_test_list.h (+1/-0)
src/unit_tests/unit_tests.cpp (+1/-0)
src/util/CMakeLists.txt (+1/-0)
src/util/utf8_streambuf.cpp (+259/-0)
src/util/utf8_streambuf.h (+322/-0)
test/unit/CMakeLists.txt (+1/-0)
To merge this branch: bzr merge lp:~paul-lucas/zorba/feature-utf8_streambuf
Reviewer Review Type Date Requested Status
Matthias Brantner 2013-01-09 Approve on 2013-01-25
Sorin Marian Nasoi 2013-01-09 Approve on 2013-01-24
Juan Zacarias Approve on 2013-01-24
Paul J. Lucas Approve on 2013-01-09
Review via email: mp+142440@code.launchpad.net

Commit message

Streambuf for validating UTF-8 on-the-fly.

Description of the change

Streambuf for validating UTF-8 on-the-fly.

To post a comment you must log in.
Paul J. Lucas (paul-lucas) :
review: Approve
Paul J. Lucas (paul-lucas) wrote :

Do you guys want this??

Please not that it does require some discipline to use due to the way exceptions are handled in streams.

Juan Zacarias (juan457) :
review: Approve
review: Approve
review: Approve
Zorba Build Bot (zorba-buildbot) wrote :

There are additional revisions which have not been approved in review. Please seek review and approval of these new revisions.

Zorba Build Bot (zorba-buildbot) wrote :

Validation queue job feature-utf8_streambuf-2013-01-26T01-04-43.101Z is finished. The final status was:

All tests succeeded!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/unit_tests/CMakeLists.txt'
2--- src/unit_tests/CMakeLists.txt 2013-01-24 02:27:45 +0000
3+++ src/unit_tests/CMakeLists.txt 2013-01-25 22:32:27 +0000
4@@ -23,6 +23,7 @@
5 test_uri.cpp
6 test_uuid.cpp
7 unit_tests.cpp
8+ test_utf8_streambuf.cpp
9 )
10
11 IF (ZORBA_WITH_FILE_ACCESS)
12
13=== added file 'src/unit_tests/test_utf8_streambuf.cpp'
14--- src/unit_tests/test_utf8_streambuf.cpp 1970-01-01 00:00:00 +0000
15+++ src/unit_tests/test_utf8_streambuf.cpp 2013-01-25 22:32:27 +0000
16@@ -0,0 +1,166 @@
17+/*
18+ * Copyright 2006-2008 The FLWOR Foundation.
19+ *
20+ * Licensed under the Apache License, Version 2.0 (the "License");
21+ * you may not use this file except in compliance with the License.
22+ * You may obtain a copy of the License at
23+ *
24+ * http://www.apache.org/licenses/LICENSE-2.0
25+ *
26+ * Unless required by applicable law or agreed to in writing, software
27+ * distributed under the License is distributed on an "AS IS" BASIS,
28+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29+ * See the License for the specific language governing permissions and
30+ * limitations under the License.
31+ */
32+
33+#include "stdafx.h"
34+
35+#include <fstream>
36+#include <iostream>
37+#include <sstream>
38+#include <string>
39+
40+#include <zorba/zorba_exception.h>
41+
42+#include "util/utf8_streambuf.h"
43+
44+using namespace std;
45+using namespace zorba;
46+
47+#define SMILEY_FACE "\xF0\x9F\x98\x8A"
48+#define COPYRIGHT_UTF8 "\xC2\xA9"
49+#define ONE_THIRD_UTF8 "\xE2\x85\x93"
50+
51+#define BAD_COPYRIGHT_1_UTF8 "\x42\xA9"
52+#define BAD_COPYRIGHT_2_UTF8 "\xC2\x79"
53+
54+static char const *const tests_good[] = {
55+ "Hello, world!",
56+ "Copyright " COPYRIGHT_UTF8 " 2012",
57+ ONE_THIRD_UTF8 " cup sugar",
58+ "Smiley " SMILEY_FACE,
59+ "Smiley 2 " SMILEY_FACE SMILEY_FACE,
60+ SMILEY_FACE " Smiley",
61+ SMILEY_FACE SMILEY_FACE " 2 Smiley",
62+ 0
63+};
64+
65+static char const *const tests_bad[] = {
66+ "Copyright " BAD_COPYRIGHT_1_UTF8 " 2012",
67+ "Copyright " BAD_COPYRIGHT_2_UTF8 " 2012",
68+ 0
69+};
70+
71+///////////////////////////////////////////////////////////////////////////////
72+
73+static int failures;
74+
75+static bool assert_true( int no, char const *expr, int line, bool result ) {
76+ if ( !result ) {
77+ cout << '#' << no << " FAILED, line " << line << ": " << expr << endl;
78+ ++failures;
79+ }
80+ return result;
81+}
82+
83+static void print_exception( int no, char const *expr, int line,
84+ std::exception const &e ) {
85+ assert_true( no, expr, line, false );
86+ cout << "+ exception: " << e.what() << endl;
87+}
88+
89+#define ASSERT_TRUE( NO, EXPR ) assert_true( NO, #EXPR, __LINE__, !!(EXPR) )
90+
91+#define ASSERT_TRUE_AND_NO_EXCEPTION( NO, EXPR ) \
92+ try { ASSERT_TRUE( NO, EXPR ); } \
93+ catch ( exception const &e ) { print_exception( NO, #EXPR, __LINE__, e ); } \
94+ catch ( ... ) { assert_true( NO, #EXPR, __LINE__, false ); }
95+
96+#define ASSERT_EXCEPTION( NO, EXPR ) \
97+ try { EXPR; assert_true( NO, #EXPR, __LINE__, false ); } \
98+ catch ( ZorbaException const &e ) { } \
99+ catch ( ... ) { assert_true( NO, #EXPR, __LINE__, false ); }
100+
101+///////////////////////////////////////////////////////////////////////////////
102+
103+static bool test_getline( char const *test ) {
104+ istringstream iss( test );
105+ utf8::streambuf utf_buf( iss.rdbuf() );
106+ iss.ios::rdbuf( &utf_buf );
107+ iss.exceptions( ios::badbit );
108+
109+ char buf[ 1024 ];
110+ iss.getline( buf, sizeof buf );
111+ if ( iss.gcount() ) {
112+ string const s( buf, iss.gcount() );
113+ return s == test;
114+ }
115+ return false;
116+}
117+
118+static bool test_read( char const *test ) {
119+ istringstream iss( test );
120+ utf8::streambuf utf_buf( iss.rdbuf() );
121+ iss.ios::rdbuf( &utf_buf );
122+ iss.exceptions( ios::badbit );
123+
124+ char buf[ 1024 ];
125+ iss.read( buf, sizeof buf );
126+ if ( iss.gcount() ) {
127+ string const s( buf, iss.gcount() );
128+ return s == test;
129+ }
130+ return false;
131+}
132+
133+static bool test_insertion( char const *test ) {
134+ ostringstream oss;
135+ utf8::streambuf utf_buf( oss.rdbuf(), true );
136+ oss.ios::rdbuf( &utf_buf );
137+ oss.exceptions( ios::badbit );
138+
139+ oss << test << flush;
140+ string const s( oss.str() );
141+ return s == test;
142+}
143+
144+static bool test_put( char const *test ) {
145+ ostringstream oss;
146+ utf8::streambuf utf_buf( oss.rdbuf(), true );
147+ oss.ios::rdbuf( &utf_buf );
148+ oss.exceptions( ios::badbit );
149+
150+ for ( char const *c = test; *c; ++c )
151+ oss.put( *c );
152+
153+ string const s( oss.str() );
154+ return s == test;
155+}
156+
157+///////////////////////////////////////////////////////////////////////////////
158+
159+namespace zorba {
160+namespace UnitTests {
161+
162+int test_utf8_streambuf( int, char*[] ) {
163+ int test_no = 0;
164+ for ( char const *const *s = tests_good; *s; ++s, ++test_no ) {
165+ ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_getline( *s ) );
166+ ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_read( *s ) );
167+ ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_insertion( *s ) );
168+ ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_put( *s ) );
169+ }
170+ for ( char const *const *s = tests_bad; *s; ++s, ++test_no ) {
171+ ASSERT_EXCEPTION( test_no, test_getline( *s ) );
172+ ASSERT_EXCEPTION( test_no, test_read( *s ) );
173+ ASSERT_EXCEPTION( test_no, test_insertion( *s ) );
174+ ASSERT_EXCEPTION( test_no, test_put( *s ) );
175+ }
176+ cout << failures << " test(s) failed\n";
177+ return failures ? 1 : 0;
178+}
179+
180+} // namespace UnitTests
181+} // namespace zorba
182+/* vim:set et sw=2 ts=2: */
183
184=== modified file 'src/unit_tests/unit_test_list.h'
185--- src/unit_tests/unit_test_list.h 2013-01-25 21:39:08 +0000
186+++ src/unit_tests/unit_test_list.h 2013-01-25 22:32:27 +0000
187@@ -63,6 +63,7 @@
188 int test_unordered_set( int, char*[] );
189 #endif /* ZORBA_HAVE_UNORDERED_SET */
190
191+ int test_utf8_streambuf( int, char*[] );
192 int test_uuid( int, char*[] );
193
194 void initializeTestList();
195
196=== modified file 'src/unit_tests/unit_tests.cpp'
197--- src/unit_tests/unit_tests.cpp 2013-01-25 21:39:08 +0000
198+++ src/unit_tests/unit_tests.cpp 2013-01-25 22:32:27 +0000
199@@ -64,6 +64,7 @@
200 libunittests["unique_ptr"] = test_unique_ptr;
201 #endif /* ZORBA_HAVE_UNIQUE_PTR */
202
203+ libunittests["utf8_streambuf"] = test_utf8_streambuf;
204 libunittests["uuid"] = test_uuid;
205
206 #ifndef ZORBA_HAVE_UNORDERED_MAP
207
208=== modified file 'src/util/CMakeLists.txt'
209--- src/util/CMakeLists.txt 2013-01-15 19:16:16 +0000
210+++ src/util/CMakeLists.txt 2013-01-25 22:32:27 +0000
211@@ -31,6 +31,7 @@
212 unicode_categories.cpp
213 uri_util.cpp
214 utf8_util.cpp
215+ utf8_streambuf.cpp
216 xml_util.cpp
217 fx/fxcharheap.cpp
218 string/empty_rep_base.cpp
219
220=== added file 'src/util/utf8_streambuf.cpp'
221--- src/util/utf8_streambuf.cpp 1970-01-01 00:00:00 +0000
222+++ src/util/utf8_streambuf.cpp 2013-01-25 22:32:27 +0000
223@@ -0,0 +1,259 @@
224+/*
225+ * Copyright 2006-2008 The FLWOR Foundation.
226+ *
227+ * Licensed under the Apache License, Version 2.0 (the "License");
228+ * you may not use this file except in compliance with the License.
229+ * You may obtain a copy of the License at
230+ *
231+ * http://www.apache.org/licenses/LICENSE-2.0
232+ *
233+ * Unless required by applicable law or agreed to in writing, software
234+ * distributed under the License is distributed on an "AS IS" BASIS,
235+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
236+ * See the License for the specific language governing permissions and
237+ * limitations under the License.
238+ */
239+
240+#include "stdafx.h"
241+
242+//#define ZORBA_DEBUG_UTF8_STREAMBUF
243+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
244+# include <stdio.h>
245+#endif
246+
247+#include <iomanip>
248+#include <stdexcept>
249+
250+#include <zorba/config.h>
251+#include <zorba/diagnostic_list.h>
252+
253+#include "diagnostics/diagnostic.h"
254+#include "diagnostics/zorba_exception.h"
255+#include "util/cxx_util.h"
256+#include "util/oseparator.h"
257+#include "util/string_util.h"
258+#include "util/utf8_util.h"
259+
260+#include "utf8_streambuf.h"
261+
262+using namespace std;
263+
264+namespace zorba {
265+namespace utf8 {
266+
267+///////////////////////////////////////////////////////////////////////////////
268+
269+inline void streambuf::buf_type::clear() {
270+ char_len_ = 0;
271+}
272+
273+void streambuf::buf_type::throw_invalid_utf8( storage_type *buf,
274+ size_type len ) {
275+ ostringstream oss;
276+ oss << hex << setfill('0') << setw(2) << uppercase;
277+ oseparator comma( ',' );
278+
279+ for ( size_type i = 0; i < len; ++i )
280+ oss << comma << "0x" << (static_cast<unsigned>( buf[i] ) & 0xFF);
281+
282+ clear();
283+ throw ZORBA_EXCEPTION(
284+ zerr::ZXQD0006_INVALID_UTF8_BYTE_SEQUENCE,
285+ ERROR_PARAMS( oss.str() )
286+ );
287+}
288+
289+void streambuf::buf_type::validate( storage_type c, bool bump ) {
290+ size_type char_len_copy = char_len_, cur_len_copy = cur_len_;
291+
292+ if ( !char_len_copy ) {
293+ //
294+ // This means we're (hopefully) at the first byte of a UTF-8 byte sequence
295+ // comprising a character.
296+ //
297+ if ( !(char_len_copy = char_length( c )) )
298+ throw_invalid_utf8( &c, 1 );
299+ cur_len_copy = 0;
300+ }
301+
302+ storage_type *const cur_byte_ptr = utf8_char_ + cur_len_copy;
303+ storage_type const old_byte = *cur_byte_ptr;
304+ *cur_byte_ptr = c;
305+
306+ if ( cur_len_copy++ && !is_continuation_byte( c ) )
307+ throw_invalid_utf8( utf8_char_, cur_len_copy );
308+
309+ if ( bump ) {
310+ char_len_ = (cur_len_copy == char_len_copy ? 0 : char_len_copy);
311+ cur_len_ = cur_len_copy;
312+ } else {
313+ *cur_byte_ptr = old_byte;
314+ }
315+}
316+
317+///////////////////////////////////////////////////////////////////////////////
318+
319+inline void streambuf::clear() {
320+ gbuf_.clear();
321+ pbuf_.clear();
322+}
323+
324+streambuf::streambuf( std::streambuf *orig, bool validate_put ) :
325+ internal::proxy_streambuf( orig ),
326+ validate_put_( validate_put )
327+{
328+ if ( !orig )
329+ throw invalid_argument( "null streambuf" );
330+ clear();
331+}
332+
333+void streambuf::imbue( std::locale const &loc ) {
334+ original()->pubimbue( loc );
335+}
336+
337+void streambuf::resync() {
338+ int_type c = original()->sgetc();
339+ while ( !traits_type::eq_int_type( c, traits_type::eof() ) ) {
340+ if ( is_start_byte( traits_type::to_char_type( c ) ) )
341+ break;
342+ c = original()->sbumpc();
343+ }
344+}
345+
346+streambuf::pos_type streambuf::seekoff( off_type o, ios_base::seekdir d,
347+ ios_base::openmode m ) {
348+ clear();
349+ return original()->pubseekoff( o, d, m );
350+}
351+
352+streambuf::pos_type streambuf::seekpos( pos_type p, ios_base::openmode m ) {
353+ clear();
354+ return original()->pubseekpos( p, m );
355+}
356+
357+std::streambuf* streambuf::setbuf( char_type *p, streamsize s ) {
358+ original()->pubsetbuf( p, s );
359+ return this;
360+}
361+
362+streamsize streambuf::showmanyc() {
363+ return original()->in_avail();
364+}
365+
366+int streambuf::sync() {
367+ return original()->pubsync();
368+}
369+
370+streambuf::int_type streambuf::overflow( int_type c ) {
371+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
372+ printf( "overflow()\n" );
373+#endif
374+ if ( traits_type::eq_int_type( c, traits_type::eof() ) )
375+ return traits_type::eof();
376+ if ( validate_put_ )
377+ pbuf_.validate( traits_type::to_char_type( c ), true );
378+ original()->sputc( c );
379+ return c;
380+}
381+
382+streambuf::int_type streambuf::pbackfail( int_type c ) {
383+ if ( !traits_type::eq_int_type( c, traits_type::eof() ) &&
384+ gbuf_.cur_len_ &&
385+ original()->sputbackc( traits_type::to_char_type( c ) ) ) {
386+ --gbuf_.cur_len_;
387+ return c;
388+ }
389+ return traits_type::eof();
390+}
391+
392+streambuf::int_type streambuf::uflow() {
393+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
394+ printf( "uflow()\n" );
395+#endif
396+ int_type const c = original()->sbumpc();
397+ if ( traits_type::eq_int_type( c, traits_type::eof() ) )
398+ return traits_type::eof();
399+ gbuf_.validate( traits_type::to_char_type( c ) );
400+ return c;
401+}
402+
403+streambuf::int_type streambuf::underflow() {
404+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
405+ printf( "underflow()\n" );
406+#endif
407+ int_type const c = original()->sgetc();
408+ if ( traits_type::eq_int_type( c, traits_type::eof() ) )
409+ return traits_type::eof();
410+ gbuf_.validate( traits_type::to_char_type( c ), false );
411+ return c;
412+}
413+
414+streamsize streambuf::xsgetn( char_type *to, streamsize size ) {
415+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
416+ printf( "xsgetn()\n" );
417+#endif
418+ streamsize return_size = 0;
419+
420+ if ( gbuf_.char_len_ ) {
421+ streamsize const want = gbuf_.char_len_ - gbuf_.cur_len_;
422+ streamsize const get = min( want, size );
423+ streamsize const got = original()->sgetn( to, get );
424+ for ( streamsize i = 0; i < got; ++i )
425+ gbuf_.validate( to[i] );
426+ to += got;
427+ size -= got, return_size += got;
428+ }
429+
430+ while ( size > 0 ) {
431+ if ( streamsize const got = original()->sgetn( to, size ) ) {
432+ for ( streamsize i = 0; i < got; ++i )
433+ gbuf_.validate( to[i] );
434+ to += got;
435+ size -= got, return_size += got;
436+ } else
437+ break;
438+ }
439+ return return_size;
440+}
441+
442+streamsize streambuf::xsputn( char_type const *from, streamsize size ) {
443+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
444+ printf( "xsputn()\n" );
445+#endif
446+ if ( validate_put_ )
447+ for ( streamsize i = 0; i < size; ++i )
448+ pbuf_.validate( from[i] );
449+ return original()->sputn( from, size );
450+}
451+
452+///////////////////////////////////////////////////////////////////////////////
453+
454+// Both new & delete are done inside Zorba rather than in the header to
455+// guarantee that they're cross-DLL-boundary safe on Windows.
456+
457+std::streambuf* alloc_streambuf( std::streambuf *orig ) {
458+ return new utf8::streambuf( orig );
459+}
460+
461+int get_streambuf_index() {
462+ //
463+ // This function is out-of-line because it has a static constant within it.
464+ // It has a static constant within it to guarantee (1) initialization before
465+ // use and (2) initialization happens exactly once.
466+ //
467+ // See: "Standard C++ IOStreams and Locales: Advanced Programmer's Guide and
468+ // Reference," Angelika Langer and Klaus Kreft, Addison-Wesley, 2000, section
469+ // 3.3.1.1: "Initializing and Maintaining the iword/pword Index."
470+ //
471+ // See: "The C++ Programming Language," Bjarne Stroustrup, Addison-Wesley,
472+ // 2000, section 10.4.8: "Local Static Store."
473+ //
474+ static int const index = ios_base::xalloc();
475+ return index;
476+}
477+
478+///////////////////////////////////////////////////////////////////////////////
479+
480+} // namespace utf8
481+} // namespace zorba
482+/* vim:set et sw=2 ts=2: */
483
484=== added file 'src/util/utf8_streambuf.h'
485--- src/util/utf8_streambuf.h 1970-01-01 00:00:00 +0000
486+++ src/util/utf8_streambuf.h 2013-01-25 22:32:27 +0000
487@@ -0,0 +1,322 @@
488+/*
489+ * Copyright 2006-2008 The FLWOR Foundation.
490+ *
491+ * Licensed under the Apache License, Version 2.0 (the "License");
492+ * you may not use this file except in compliance with the License.
493+ * You may obtain a copy of the License at
494+ *
495+ * http://www.apache.org/licenses/LICENSE-2.0
496+ *
497+ * Unless required by applicable law or agreed to in writing, software
498+ * distributed under the License is distributed on an "AS IS" BASIS,
499+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
500+ * See the License for the specific language governing permissions and
501+ * limitations under the License.
502+ */
503+
504+#ifndef ZORBA_UTF8_STREAMBUF_H
505+#define ZORBA_UTF8_STREAMBUF_H
506+
507+#include <zorba/internal/streambuf.h>
508+
509+#include "util/utf8_util.h"
510+
511+namespace zorba {
512+namespace utf8 {
513+
514+///////////////////////////////////////////////////////////////////////////////
515+
516+/**
517+ * A %utf8::streambuf is-a std::streambuf for validating UTF-8 on-the-fly.
518+ * To use it, replace a stream's streambuf:
519+ * \code
520+ * istream is;
521+ * // ...
522+ * utf8::streambuf xbuf( is.rdbuf() );
523+ * is.ios::rdbuf( &xbuf );
524+ * \endcode
525+ * Note that the %utf8::streambuf must exist for as long as it's being used by
526+ * the stream. If you are replacing the streambuf for a stream you did not
527+ * create, you should set it back to the original streambuf:
528+ * \code
529+ * void f( ostream &os ) {
530+ * utf8::streambuf xbuf( os.rdbuf() );
531+ * try {
532+ * os.ios::rdbuf( &xbuf );
533+ * // ...
534+ * os.ios::rdbuf( xbuf.original() );
535+ * }
536+ * catch ( ... ) {
537+ * os.ios::rdbuf( xbuf.original() );
538+ * throw;
539+ * }
540+ * }
541+ * \endcode
542+ *
543+ * If an invalid UTF-8 byte sequence is read, then the stream's \c badbit is
544+ * set. Hence using a %utf8::streambuf requires rigorous error-checking.
545+ *
546+ * However, if exceptions are enabled for the stream, then
547+ * \c ZXQD0006_INVALID_UTF8_BYTE_SEQUENCE is thrown. (When enabling exceptions
548+ * for a stream you didn't create, you should set the exception mask back to
549+ * the original mask.)
550+ * \code
551+ * istream is;
552+ * std::ios::iostate const orig_exceptions = is.exceptions();
553+ * try {
554+ * is.exceptions( orig_exceptions | ios::badbit );
555+ * // ...
556+ * is.exceptions( orig_exceptions );
557+ * }
558+ * catch ( ... ) {
559+ * is.exceptions( orig_exceptions );
560+ * throw;
561+ * }
562+ * \endcode
563+ *
564+ * While %utf8::streambuf does support seeking, the positions must always be on
565+ * the first byte of a UTF-8 character.
566+ */
567+class streambuf : public internal::proxy_streambuf {
568+public:
569+ /**
570+ * Constructs a %streambuf.
571+ *
572+ * @param orig The original streambuf to read/write from/to.
573+ * @param validate_put If \c true, characters written are validated;
574+ * if \c false, characters are written without validation, i.e., it's assumed
575+ * that you're writing valid UTF-8.
576+ * @throws std::invalid_argument if \a orig is \c null.
577+ */
578+ streambuf( std::streambuf *orig, bool validate_put = false );
579+
580+ /**
581+ * If an invalid UTF-8 byte sequence was read, resynchronizes by skipping
582+ * bytes until a new UTF-8 start byte is encountered.
583+ */
584+ void resync();
585+
586+protected:
587+ void imbue( std::locale const& );
588+ pos_type seekoff( off_type, std::ios_base::seekdir, std::ios_base::openmode );
589+ pos_type seekpos( pos_type, std::ios_base::openmode );
590+ std::streambuf* setbuf( char_type*, std::streamsize );
591+ std::streamsize showmanyc();
592+ int sync();
593+ int_type overflow( int_type );
594+ int_type pbackfail( int_type );
595+ int_type uflow();
596+ int_type underflow();
597+ std::streamsize xsgetn( char_type*, std::streamsize );
598+ std::streamsize xsputn( char_type const*, std::streamsize );
599+
600+private:
601+ struct buf_type {
602+ encoded_char_type utf8_char_;
603+ size_type char_len_;
604+ size_type cur_len_;
605+
606+ void clear();
607+ void throw_invalid_utf8( storage_type *buf, size_type len );
608+ void validate( storage_type, bool bump = true );
609+ };
610+
611+ buf_type gbuf_, pbuf_;
612+ bool const validate_put_;
613+
614+ void clear();
615+
616+ // forbid
617+ streambuf( streambuf const& );
618+ streambuf& operator=( streambuf const& );
619+};
620+
621+///////////////////////////////////////////////////////////////////////////////
622+
623+std::streambuf* alloc_streambuf( std::streambuf *orig );
624+
625+int get_streambuf_index();
626+
627+///////////////////////////////////////////////////////////////////////////////
628+
629+/**
630+ * Attaches a utf8::streambuf to a stream. Unlike using a
631+ * utf8::streambuf directly, this function will create the streambuf,
632+ * attach it to the stream, and manage it for the lifetime of the stream
633+ * automatically.
634+ *
635+ * @param ios The stream to attach the utf8::streambuf to. If the stream
636+ * already has a utf8::streambuf attached to it, this function does
637+ * nothing.
638+ */
639+template<typename charT,typename Traits> inline
640+void attach( std::basic_ios<charT,Traits> &ios ) {
641+ int const index = get_streambuf_index();
642+ void *&pword = ios.pword( index );
643+ if ( !pword ) {
644+ std::streambuf *const buf = alloc_streambuf( ios.rdbuf() );
645+ ios.rdbuf( buf );
646+ pword = buf;
647+ ios.register_callback( internal::stream_callback, index );
648+ }
649+}
650+
651+/**
652+ * Detaches a previously attached utf8::streambuf from a stream. The streambuf
653+ * is destroyed and the stream's original streambuf is restored.
654+ *
655+ * @param ios The stream to detach the utf8::streambuf from. If the stream
656+ * doesn't have a utf8::streambuf attached to it, this function does nothing.
657+ */
658+template<typename charT,typename Traits> inline
659+void detach( std::basic_ios<charT,Traits> &ios ) {
660+ int const index = get_streambuf_index();
661+ if ( streambuf *const buf = static_cast<streambuf*>( ios.pword( index ) ) ) {
662+ ios.pword( index ) = 0;
663+ ios.rdbuf( buf->original() );
664+ internal::dealloc_streambuf( buf );
665+ }
666+}
667+
668+/**
669+ * Checks whether the given stream has a utf8::streambuf attached.
670+ *
671+ * @param ios The stream to check.
672+ * @return \c true only if a utf8::streambuf is attached.
673+ */
674+template<typename charT,typename Traits> inline
675+bool is_attached( std::basic_ios<charT,Traits> &ios ) {
676+ return !!ios.pword( get_streambuf_index() );
677+}
678+
679+/**
680+ * A %utf8::auto_attach is a class that attaches a utf8::streambuf to a stream
681+ * and automatically detaches it when the %auto_attach object is destroyed.
682+ * \code
683+ * void f( ostream &os ) {
684+ * utf8::auto_attach<ostream> const raii( os, "ISO-8859-1" );
685+ * // ...
686+ * }
687+ * \endcode
688+ * A %utf8::auto_attach is useful for streams not created by you.
689+ *
690+ * @see http://en.wikipedia.org/wiki/Resource_Acquisition_Is_Initialization
691+ */
692+template<class StreamType>
693+class auto_attach {
694+public:
695+ /**
696+ * Constructs an %auto_attach object calling attach() on the given stream.
697+ *
698+ * @param stream The stream to attach the utf8::streambuf to. If the stream
699+ * already has a utf8::streambuf attached to it, this contructor does
700+ * nothing.
701+ */
702+ auto_attach( StreamType &stream ) : stream_( stream ) {
703+ attach( stream );
704+ }
705+
706+ /**
707+ * Destroys this %auto_attach object calling detach() on the previously
708+ * attached stream.
709+ */
710+ ~auto_attach() {
711+ detach( stream_ );
712+ }
713+
714+private:
715+ StreamType &stream_;
716+};
717+
718+///////////////////////////////////////////////////////////////////////////////
719+
720+/**
721+ * A %utf8::stream is used to wrap a C++ standard I/O stream with a
722+ * utf8::streambuf so that encoding/decoding and the management of the
723+ * streambuf happens automatically.
724+ *
725+ * A %utf8::stream is useful for streams created by you.
726+ *
727+ * @tparam StreamType The I/O stream class type to wrap. It must be a concrete
728+ * stream class.
729+ */
730+template<class StreamType>
731+class stream : public StreamType {
732+public:
733+ /**
734+ * Constructs a %utf8::stream.
735+ */
736+ stream() :
737+#ifdef WIN32
738+# pragma warning( push )
739+# pragma warning( disable : 4355 )
740+#endif /* WIN32 */
741+ utf8_buf_( this->rdbuf() )
742+#ifdef WIN32
743+# pragma warning( pop )
744+#endif /* WIN32 */
745+ {
746+ init();
747+ }
748+
749+ /**
750+ * Constructs a %stream.
751+ *
752+ * @tparam StreamArgType The type of the first argument of \a StreamType's
753+ * constructor.
754+ * @param stream_arg The argument to pass as the first argument to
755+ * \a StreamType's constructor.
756+ */
757+ template<typename StreamArgType>
758+ stream( StreamArgType stream_arg ) :
759+ StreamType( stream_arg ),
760+#ifdef WIN32
761+# pragma warning( push )
762+# pragma warning( disable : 4355 )
763+#endif /* WIN32 */
764+ utf8_buf_( this->rdbuf() )
765+#ifdef WIN32
766+# pragma warning( pop )
767+#endif /* WIN32 */
768+ {
769+ init();
770+ }
771+
772+ /**
773+ * Constructs a %utf8::stream.
774+ *
775+ * @tparam StreamArgType The type of the first argument of \a StreamType's
776+ * constructor.
777+ * @param stream_arg The argument to pass as the first argument to
778+ * \a StreamType's constructor.
779+ * @param mode The open-mode to pass to \a StreamType's constructor.
780+ */
781+ template<typename StreamArgType>
782+ stream( StreamArgType stream_arg, std::ios_base::openmode mode ) :
783+ StreamType( stream_arg, mode ),
784+#ifdef WIN32
785+# pragma warning( push )
786+# pragma warning( disable : 4355 )
787+#endif /* WIN32 */
788+ utf8_buf_( this->rdbuf() )
789+#ifdef WIN32
790+# pragma warning( pop )
791+#endif /* WIN32 */
792+ {
793+ init();
794+ }
795+
796+private:
797+ streambuf utf8_buf_;
798+
799+ void init() {
800+ this->std::ios::rdbuf( &utf8_buf_ );
801+ }
802+};
803+
804+///////////////////////////////////////////////////////////////////////////////
805+
806+} // namespace utf8
807+} // namespace zorba
808+#endif /* ZORBA_UTF8_STREAMBUF_H */
809+/* vim:set et sw=2 ts=2: */
810
811=== modified file 'test/unit/CMakeLists.txt'
812--- test/unit/CMakeLists.txt 2013-01-11 01:34:56 +0000
813+++ test/unit/CMakeLists.txt 2013-01-25 22:32:27 +0000
814@@ -165,5 +165,6 @@
815 IF (NOT ZORBA_HAVE_UNORDERED_SET)
816 ZORBA_ADD_TEST("test/libunit/unordered_set" LibUnitTest unordered_set)
817 ENDIF (NOT ZORBA_HAVE_UNORDERED_SET)
818+ZORBA_ADD_TEST("test/libunit/utf8_streambuf" LibUnitTest utf8_streambuf)
819
820 # vim:set et sw=2 ts=2:

Subscribers

People subscribed via source and target branches