Merge lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba

Proposed by Paul J. Lucas
Status: Merged
Approved by: Paul J. Lucas
Approved revision: 11177
Merged at revision: 11201
Proposed branch: lp:~paul-lucas/zorba/feature-utf8_streambuf
Merge into: lp:zorba
Diff against target: 820 lines (+752/-0)
8 files modified
src/unit_tests/CMakeLists.txt (+1/-0)
src/unit_tests/test_utf8_streambuf.cpp (+166/-0)
src/unit_tests/unit_test_list.h (+1/-0)
src/unit_tests/unit_tests.cpp (+1/-0)
src/util/CMakeLists.txt (+1/-0)
src/util/utf8_streambuf.cpp (+259/-0)
src/util/utf8_streambuf.h (+322/-0)
test/unit/CMakeLists.txt (+1/-0)
To merge this branch: bzr merge lp:~paul-lucas/zorba/feature-utf8_streambuf
Reviewer Review Type Date Requested Status
Matthias Brantner Approve
Sorin Marian Nasoi Approve
Juan Zacarias Approve
Paul J. Lucas Approve
Review via email: mp+142440@code.launchpad.net

Commit message

Streambuf for validating UTF-8 on-the-fly.

Description of the change

Streambuf for validating UTF-8 on-the-fly.

To post a comment you must log in.
Revision history for this message
Paul J. Lucas (paul-lucas) :
review: Approve
Revision history for this message
Paul J. Lucas (paul-lucas) wrote :

Do you guys want this??

Please not that it does require some discipline to use due to the way exceptions are handled in streams.

Revision history for this message
Juan Zacarias (juan457) :
review: Approve
Revision history for this message
Sorin Marian Nasoi (sorin.marian.nasoi) :
review: Approve
Revision history for this message
Matthias Brantner (matthias-brantner) :
review: Approve
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

There are additional revisions which have not been approved in review. Please seek review and approval of these new revisions.

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

Validation queue job feature-utf8_streambuf-2013-01-26T01-04-43.101Z is finished. The final status was:

All tests succeeded!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'src/unit_tests/CMakeLists.txt'
--- src/unit_tests/CMakeLists.txt 2013-01-24 02:27:45 +0000
+++ src/unit_tests/CMakeLists.txt 2013-01-25 22:32:27 +0000
@@ -23,6 +23,7 @@
23 test_uri.cpp23 test_uri.cpp
24 test_uuid.cpp24 test_uuid.cpp
25 unit_tests.cpp25 unit_tests.cpp
26 test_utf8_streambuf.cpp
26)27)
2728
28IF (ZORBA_WITH_FILE_ACCESS)29IF (ZORBA_WITH_FILE_ACCESS)
2930
=== added file 'src/unit_tests/test_utf8_streambuf.cpp'
--- src/unit_tests/test_utf8_streambuf.cpp 1970-01-01 00:00:00 +0000
+++ src/unit_tests/test_utf8_streambuf.cpp 2013-01-25 22:32:27 +0000
@@ -0,0 +1,166 @@
1/*
2 * Copyright 2006-2008 The FLWOR Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "stdafx.h"
18
19#include <fstream>
20#include <iostream>
21#include <sstream>
22#include <string>
23
24#include <zorba/zorba_exception.h>
25
26#include "util/utf8_streambuf.h"
27
28using namespace std;
29using namespace zorba;
30
31#define SMILEY_FACE "\xF0\x9F\x98\x8A"
32#define COPYRIGHT_UTF8 "\xC2\xA9"
33#define ONE_THIRD_UTF8 "\xE2\x85\x93"
34
35#define BAD_COPYRIGHT_1_UTF8 "\x42\xA9"
36#define BAD_COPYRIGHT_2_UTF8 "\xC2\x79"
37
38static char const *const tests_good[] = {
39 "Hello, world!",
40 "Copyright " COPYRIGHT_UTF8 " 2012",
41 ONE_THIRD_UTF8 " cup sugar",
42 "Smiley " SMILEY_FACE,
43 "Smiley 2 " SMILEY_FACE SMILEY_FACE,
44 SMILEY_FACE " Smiley",
45 SMILEY_FACE SMILEY_FACE " 2 Smiley",
46 0
47};
48
49static char const *const tests_bad[] = {
50 "Copyright " BAD_COPYRIGHT_1_UTF8 " 2012",
51 "Copyright " BAD_COPYRIGHT_2_UTF8 " 2012",
52 0
53};
54
55///////////////////////////////////////////////////////////////////////////////
56
57static int failures;
58
59static bool assert_true( int no, char const *expr, int line, bool result ) {
60 if ( !result ) {
61 cout << '#' << no << " FAILED, line " << line << ": " << expr << endl;
62 ++failures;
63 }
64 return result;
65}
66
67static void print_exception( int no, char const *expr, int line,
68 std::exception const &e ) {
69 assert_true( no, expr, line, false );
70 cout << "+ exception: " << e.what() << endl;
71}
72
73#define ASSERT_TRUE( NO, EXPR ) assert_true( NO, #EXPR, __LINE__, !!(EXPR) )
74
75#define ASSERT_TRUE_AND_NO_EXCEPTION( NO, EXPR ) \
76 try { ASSERT_TRUE( NO, EXPR ); } \
77 catch ( exception const &e ) { print_exception( NO, #EXPR, __LINE__, e ); } \
78 catch ( ... ) { assert_true( NO, #EXPR, __LINE__, false ); }
79
80#define ASSERT_EXCEPTION( NO, EXPR ) \
81 try { EXPR; assert_true( NO, #EXPR, __LINE__, false ); } \
82 catch ( ZorbaException const &e ) { } \
83 catch ( ... ) { assert_true( NO, #EXPR, __LINE__, false ); }
84
85///////////////////////////////////////////////////////////////////////////////
86
87static bool test_getline( char const *test ) {
88 istringstream iss( test );
89 utf8::streambuf utf_buf( iss.rdbuf() );
90 iss.ios::rdbuf( &utf_buf );
91 iss.exceptions( ios::badbit );
92
93 char buf[ 1024 ];
94 iss.getline( buf, sizeof buf );
95 if ( iss.gcount() ) {
96 string const s( buf, iss.gcount() );
97 return s == test;
98 }
99 return false;
100}
101
102static bool test_read( char const *test ) {
103 istringstream iss( test );
104 utf8::streambuf utf_buf( iss.rdbuf() );
105 iss.ios::rdbuf( &utf_buf );
106 iss.exceptions( ios::badbit );
107
108 char buf[ 1024 ];
109 iss.read( buf, sizeof buf );
110 if ( iss.gcount() ) {
111 string const s( buf, iss.gcount() );
112 return s == test;
113 }
114 return false;
115}
116
117static bool test_insertion( char const *test ) {
118 ostringstream oss;
119 utf8::streambuf utf_buf( oss.rdbuf(), true );
120 oss.ios::rdbuf( &utf_buf );
121 oss.exceptions( ios::badbit );
122
123 oss << test << flush;
124 string const s( oss.str() );
125 return s == test;
126}
127
128static bool test_put( char const *test ) {
129 ostringstream oss;
130 utf8::streambuf utf_buf( oss.rdbuf(), true );
131 oss.ios::rdbuf( &utf_buf );
132 oss.exceptions( ios::badbit );
133
134 for ( char const *c = test; *c; ++c )
135 oss.put( *c );
136
137 string const s( oss.str() );
138 return s == test;
139}
140
141///////////////////////////////////////////////////////////////////////////////
142
143namespace zorba {
144namespace UnitTests {
145
146int test_utf8_streambuf( int, char*[] ) {
147 int test_no = 0;
148 for ( char const *const *s = tests_good; *s; ++s, ++test_no ) {
149 ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_getline( *s ) );
150 ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_read( *s ) );
151 ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_insertion( *s ) );
152 ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_put( *s ) );
153 }
154 for ( char const *const *s = tests_bad; *s; ++s, ++test_no ) {
155 ASSERT_EXCEPTION( test_no, test_getline( *s ) );
156 ASSERT_EXCEPTION( test_no, test_read( *s ) );
157 ASSERT_EXCEPTION( test_no, test_insertion( *s ) );
158 ASSERT_EXCEPTION( test_no, test_put( *s ) );
159 }
160 cout << failures << " test(s) failed\n";
161 return failures ? 1 : 0;
162}
163
164} // namespace UnitTests
165} // namespace zorba
166/* vim:set et sw=2 ts=2: */
0167
=== modified file 'src/unit_tests/unit_test_list.h'
--- src/unit_tests/unit_test_list.h 2013-01-25 21:39:08 +0000
+++ src/unit_tests/unit_test_list.h 2013-01-25 22:32:27 +0000
@@ -63,6 +63,7 @@
63 int test_unordered_set( int, char*[] );63 int test_unordered_set( int, char*[] );
64#endif /* ZORBA_HAVE_UNORDERED_SET */64#endif /* ZORBA_HAVE_UNORDERED_SET */
6565
66 int test_utf8_streambuf( int, char*[] );
66 int test_uuid( int, char*[] );67 int test_uuid( int, char*[] );
6768
68 void initializeTestList();69 void initializeTestList();
6970
=== modified file 'src/unit_tests/unit_tests.cpp'
--- src/unit_tests/unit_tests.cpp 2013-01-25 21:39:08 +0000
+++ src/unit_tests/unit_tests.cpp 2013-01-25 22:32:27 +0000
@@ -64,6 +64,7 @@
64 libunittests["unique_ptr"] = test_unique_ptr;64 libunittests["unique_ptr"] = test_unique_ptr;
65#endif /* ZORBA_HAVE_UNIQUE_PTR */65#endif /* ZORBA_HAVE_UNIQUE_PTR */
6666
67 libunittests["utf8_streambuf"] = test_utf8_streambuf;
67 libunittests["uuid"] = test_uuid;68 libunittests["uuid"] = test_uuid;
6869
69#ifndef ZORBA_HAVE_UNORDERED_MAP70#ifndef ZORBA_HAVE_UNORDERED_MAP
7071
=== modified file 'src/util/CMakeLists.txt'
--- src/util/CMakeLists.txt 2013-01-15 19:16:16 +0000
+++ src/util/CMakeLists.txt 2013-01-25 22:32:27 +0000
@@ -31,6 +31,7 @@
31 unicode_categories.cpp31 unicode_categories.cpp
32 uri_util.cpp32 uri_util.cpp
33 utf8_util.cpp33 utf8_util.cpp
34 utf8_streambuf.cpp
34 xml_util.cpp35 xml_util.cpp
35 fx/fxcharheap.cpp36 fx/fxcharheap.cpp
36 string/empty_rep_base.cpp37 string/empty_rep_base.cpp
3738
=== added file 'src/util/utf8_streambuf.cpp'
--- src/util/utf8_streambuf.cpp 1970-01-01 00:00:00 +0000
+++ src/util/utf8_streambuf.cpp 2013-01-25 22:32:27 +0000
@@ -0,0 +1,259 @@
1/*
2 * Copyright 2006-2008 The FLWOR Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "stdafx.h"
18
19//#define ZORBA_DEBUG_UTF8_STREAMBUF
20#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
21# include <stdio.h>
22#endif
23
24#include <iomanip>
25#include <stdexcept>
26
27#include <zorba/config.h>
28#include <zorba/diagnostic_list.h>
29
30#include "diagnostics/diagnostic.h"
31#include "diagnostics/zorba_exception.h"
32#include "util/cxx_util.h"
33#include "util/oseparator.h"
34#include "util/string_util.h"
35#include "util/utf8_util.h"
36
37#include "utf8_streambuf.h"
38
39using namespace std;
40
41namespace zorba {
42namespace utf8 {
43
44///////////////////////////////////////////////////////////////////////////////
45
46inline void streambuf::buf_type::clear() {
47 char_len_ = 0;
48}
49
50void streambuf::buf_type::throw_invalid_utf8( storage_type *buf,
51 size_type len ) {
52 ostringstream oss;
53 oss << hex << setfill('0') << setw(2) << uppercase;
54 oseparator comma( ',' );
55
56 for ( size_type i = 0; i < len; ++i )
57 oss << comma << "0x" << (static_cast<unsigned>( buf[i] ) & 0xFF);
58
59 clear();
60 throw ZORBA_EXCEPTION(
61 zerr::ZXQD0006_INVALID_UTF8_BYTE_SEQUENCE,
62 ERROR_PARAMS( oss.str() )
63 );
64}
65
66void streambuf::buf_type::validate( storage_type c, bool bump ) {
67 size_type char_len_copy = char_len_, cur_len_copy = cur_len_;
68
69 if ( !char_len_copy ) {
70 //
71 // This means we're (hopefully) at the first byte of a UTF-8 byte sequence
72 // comprising a character.
73 //
74 if ( !(char_len_copy = char_length( c )) )
75 throw_invalid_utf8( &c, 1 );
76 cur_len_copy = 0;
77 }
78
79 storage_type *const cur_byte_ptr = utf8_char_ + cur_len_copy;
80 storage_type const old_byte = *cur_byte_ptr;
81 *cur_byte_ptr = c;
82
83 if ( cur_len_copy++ && !is_continuation_byte( c ) )
84 throw_invalid_utf8( utf8_char_, cur_len_copy );
85
86 if ( bump ) {
87 char_len_ = (cur_len_copy == char_len_copy ? 0 : char_len_copy);
88 cur_len_ = cur_len_copy;
89 } else {
90 *cur_byte_ptr = old_byte;
91 }
92}
93
94///////////////////////////////////////////////////////////////////////////////
95
96inline void streambuf::clear() {
97 gbuf_.clear();
98 pbuf_.clear();
99}
100
101streambuf::streambuf( std::streambuf *orig, bool validate_put ) :
102 internal::proxy_streambuf( orig ),
103 validate_put_( validate_put )
104{
105 if ( !orig )
106 throw invalid_argument( "null streambuf" );
107 clear();
108}
109
110void streambuf::imbue( std::locale const &loc ) {
111 original()->pubimbue( loc );
112}
113
114void streambuf::resync() {
115 int_type c = original()->sgetc();
116 while ( !traits_type::eq_int_type( c, traits_type::eof() ) ) {
117 if ( is_start_byte( traits_type::to_char_type( c ) ) )
118 break;
119 c = original()->sbumpc();
120 }
121}
122
123streambuf::pos_type streambuf::seekoff( off_type o, ios_base::seekdir d,
124 ios_base::openmode m ) {
125 clear();
126 return original()->pubseekoff( o, d, m );
127}
128
129streambuf::pos_type streambuf::seekpos( pos_type p, ios_base::openmode m ) {
130 clear();
131 return original()->pubseekpos( p, m );
132}
133
134std::streambuf* streambuf::setbuf( char_type *p, streamsize s ) {
135 original()->pubsetbuf( p, s );
136 return this;
137}
138
139streamsize streambuf::showmanyc() {
140 return original()->in_avail();
141}
142
143int streambuf::sync() {
144 return original()->pubsync();
145}
146
147streambuf::int_type streambuf::overflow( int_type c ) {
148#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
149 printf( "overflow()\n" );
150#endif
151 if ( traits_type::eq_int_type( c, traits_type::eof() ) )
152 return traits_type::eof();
153 if ( validate_put_ )
154 pbuf_.validate( traits_type::to_char_type( c ), true );
155 original()->sputc( c );
156 return c;
157}
158
159streambuf::int_type streambuf::pbackfail( int_type c ) {
160 if ( !traits_type::eq_int_type( c, traits_type::eof() ) &&
161 gbuf_.cur_len_ &&
162 original()->sputbackc( traits_type::to_char_type( c ) ) ) {
163 --gbuf_.cur_len_;
164 return c;
165 }
166 return traits_type::eof();
167}
168
169streambuf::int_type streambuf::uflow() {
170#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
171 printf( "uflow()\n" );
172#endif
173 int_type const c = original()->sbumpc();
174 if ( traits_type::eq_int_type( c, traits_type::eof() ) )
175 return traits_type::eof();
176 gbuf_.validate( traits_type::to_char_type( c ) );
177 return c;
178}
179
180streambuf::int_type streambuf::underflow() {
181#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
182 printf( "underflow()\n" );
183#endif
184 int_type const c = original()->sgetc();
185 if ( traits_type::eq_int_type( c, traits_type::eof() ) )
186 return traits_type::eof();
187 gbuf_.validate( traits_type::to_char_type( c ), false );
188 return c;
189}
190
191streamsize streambuf::xsgetn( char_type *to, streamsize size ) {
192#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
193 printf( "xsgetn()\n" );
194#endif
195 streamsize return_size = 0;
196
197 if ( gbuf_.char_len_ ) {
198 streamsize const want = gbuf_.char_len_ - gbuf_.cur_len_;
199 streamsize const get = min( want, size );
200 streamsize const got = original()->sgetn( to, get );
201 for ( streamsize i = 0; i < got; ++i )
202 gbuf_.validate( to[i] );
203 to += got;
204 size -= got, return_size += got;
205 }
206
207 while ( size > 0 ) {
208 if ( streamsize const got = original()->sgetn( to, size ) ) {
209 for ( streamsize i = 0; i < got; ++i )
210 gbuf_.validate( to[i] );
211 to += got;
212 size -= got, return_size += got;
213 } else
214 break;
215 }
216 return return_size;
217}
218
219streamsize streambuf::xsputn( char_type const *from, streamsize size ) {
220#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
221 printf( "xsputn()\n" );
222#endif
223 if ( validate_put_ )
224 for ( streamsize i = 0; i < size; ++i )
225 pbuf_.validate( from[i] );
226 return original()->sputn( from, size );
227}
228
229///////////////////////////////////////////////////////////////////////////////
230
231// Both new & delete are done inside Zorba rather than in the header to
232// guarantee that they're cross-DLL-boundary safe on Windows.
233
234std::streambuf* alloc_streambuf( std::streambuf *orig ) {
235 return new utf8::streambuf( orig );
236}
237
238int get_streambuf_index() {
239 //
240 // This function is out-of-line because it has a static constant within it.
241 // It has a static constant within it to guarantee (1) initialization before
242 // use and (2) initialization happens exactly once.
243 //
244 // See: "Standard C++ IOStreams and Locales: Advanced Programmer's Guide and
245 // Reference," Angelika Langer and Klaus Kreft, Addison-Wesley, 2000, section
246 // 3.3.1.1: "Initializing and Maintaining the iword/pword Index."
247 //
248 // See: "The C++ Programming Language," Bjarne Stroustrup, Addison-Wesley,
249 // 2000, section 10.4.8: "Local Static Store."
250 //
251 static int const index = ios_base::xalloc();
252 return index;
253}
254
255///////////////////////////////////////////////////////////////////////////////
256
257} // namespace utf8
258} // namespace zorba
259/* vim:set et sw=2 ts=2: */
0260
=== added file 'src/util/utf8_streambuf.h'
--- src/util/utf8_streambuf.h 1970-01-01 00:00:00 +0000
+++ src/util/utf8_streambuf.h 2013-01-25 22:32:27 +0000
@@ -0,0 +1,322 @@
1/*
2 * Copyright 2006-2008 The FLWOR Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ZORBA_UTF8_STREAMBUF_H
18#define ZORBA_UTF8_STREAMBUF_H
19
20#include <zorba/internal/streambuf.h>
21
22#include "util/utf8_util.h"
23
24namespace zorba {
25namespace utf8 {
26
27///////////////////////////////////////////////////////////////////////////////
28
29/**
30 * A %utf8::streambuf is-a std::streambuf for validating UTF-8 on-the-fly.
31 * To use it, replace a stream's streambuf:
32 * \code
33 * istream is;
34 * // ...
35 * utf8::streambuf xbuf( is.rdbuf() );
36 * is.ios::rdbuf( &xbuf );
37 * \endcode
38 * Note that the %utf8::streambuf must exist for as long as it's being used by
39 * the stream. If you are replacing the streambuf for a stream you did not
40 * create, you should set it back to the original streambuf:
41 * \code
42 * void f( ostream &os ) {
43 * utf8::streambuf xbuf( os.rdbuf() );
44 * try {
45 * os.ios::rdbuf( &xbuf );
46 * // ...
47 * os.ios::rdbuf( xbuf.original() );
48 * }
49 * catch ( ... ) {
50 * os.ios::rdbuf( xbuf.original() );
51 * throw;
52 * }
53 * }
54 * \endcode
55 *
56 * If an invalid UTF-8 byte sequence is read, then the stream's \c badbit is
57 * set. Hence using a %utf8::streambuf requires rigorous error-checking.
58 *
59 * However, if exceptions are enabled for the stream, then
60 * \c ZXQD0006_INVALID_UTF8_BYTE_SEQUENCE is thrown. (When enabling exceptions
61 * for a stream you didn't create, you should set the exception mask back to
62 * the original mask.)
63 * \code
64 * istream is;
65 * std::ios::iostate const orig_exceptions = is.exceptions();
66 * try {
67 * is.exceptions( orig_exceptions | ios::badbit );
68 * // ...
69 * is.exceptions( orig_exceptions );
70 * }
71 * catch ( ... ) {
72 * is.exceptions( orig_exceptions );
73 * throw;
74 * }
75 * \endcode
76 *
77 * While %utf8::streambuf does support seeking, the positions must always be on
78 * the first byte of a UTF-8 character.
79 */
80class streambuf : public internal::proxy_streambuf {
81public:
82 /**
83 * Constructs a %streambuf.
84 *
85 * @param orig The original streambuf to read/write from/to.
86 * @param validate_put If \c true, characters written are validated;
87 * if \c false, characters are written without validation, i.e., it's assumed
88 * that you're writing valid UTF-8.
89 * @throws std::invalid_argument if \a orig is \c null.
90 */
91 streambuf( std::streambuf *orig, bool validate_put = false );
92
93 /**
94 * If an invalid UTF-8 byte sequence was read, resynchronizes by skipping
95 * bytes until a new UTF-8 start byte is encountered.
96 */
97 void resync();
98
99protected:
100 void imbue( std::locale const& );
101 pos_type seekoff( off_type, std::ios_base::seekdir, std::ios_base::openmode );
102 pos_type seekpos( pos_type, std::ios_base::openmode );
103 std::streambuf* setbuf( char_type*, std::streamsize );
104 std::streamsize showmanyc();
105 int sync();
106 int_type overflow( int_type );
107 int_type pbackfail( int_type );
108 int_type uflow();
109 int_type underflow();
110 std::streamsize xsgetn( char_type*, std::streamsize );
111 std::streamsize xsputn( char_type const*, std::streamsize );
112
113private:
114 struct buf_type {
115 encoded_char_type utf8_char_;
116 size_type char_len_;
117 size_type cur_len_;
118
119 void clear();
120 void throw_invalid_utf8( storage_type *buf, size_type len );
121 void validate( storage_type, bool bump = true );
122 };
123
124 buf_type gbuf_, pbuf_;
125 bool const validate_put_;
126
127 void clear();
128
129 // forbid
130 streambuf( streambuf const& );
131 streambuf& operator=( streambuf const& );
132};
133
134///////////////////////////////////////////////////////////////////////////////
135
136std::streambuf* alloc_streambuf( std::streambuf *orig );
137
138int get_streambuf_index();
139
140///////////////////////////////////////////////////////////////////////////////
141
142/**
143 * Attaches a utf8::streambuf to a stream. Unlike using a
144 * utf8::streambuf directly, this function will create the streambuf,
145 * attach it to the stream, and manage it for the lifetime of the stream
146 * automatically.
147 *
148 * @param ios The stream to attach the utf8::streambuf to. If the stream
149 * already has a utf8::streambuf attached to it, this function does
150 * nothing.
151 */
152template<typename charT,typename Traits> inline
153void attach( std::basic_ios<charT,Traits> &ios ) {
154 int const index = get_streambuf_index();
155 void *&pword = ios.pword( index );
156 if ( !pword ) {
157 std::streambuf *const buf = alloc_streambuf( ios.rdbuf() );
158 ios.rdbuf( buf );
159 pword = buf;
160 ios.register_callback( internal::stream_callback, index );
161 }
162}
163
164/**
165 * Detaches a previously attached utf8::streambuf from a stream. The streambuf
166 * is destroyed and the stream's original streambuf is restored.
167 *
168 * @param ios The stream to detach the utf8::streambuf from. If the stream
169 * doesn't have a utf8::streambuf attached to it, this function does nothing.
170 */
171template<typename charT,typename Traits> inline
172void detach( std::basic_ios<charT,Traits> &ios ) {
173 int const index = get_streambuf_index();
174 if ( streambuf *const buf = static_cast<streambuf*>( ios.pword( index ) ) ) {
175 ios.pword( index ) = 0;
176 ios.rdbuf( buf->original() );
177 internal::dealloc_streambuf( buf );
178 }
179}
180
181/**
182 * Checks whether the given stream has a utf8::streambuf attached.
183 *
184 * @param ios The stream to check.
185 * @return \c true only if a utf8::streambuf is attached.
186 */
187template<typename charT,typename Traits> inline
188bool is_attached( std::basic_ios<charT,Traits> &ios ) {
189 return !!ios.pword( get_streambuf_index() );
190}
191
192/**
193 * A %utf8::auto_attach is a class that attaches a utf8::streambuf to a stream
194 * and automatically detaches it when the %auto_attach object is destroyed.
195 * \code
196 * void f( ostream &os ) {
197 * utf8::auto_attach<ostream> const raii( os, "ISO-8859-1" );
198 * // ...
199 * }
200 * \endcode
201 * A %utf8::auto_attach is useful for streams not created by you.
202 *
203 * @see http://en.wikipedia.org/wiki/Resource_Acquisition_Is_Initialization
204 */
205template<class StreamType>
206class auto_attach {
207public:
208 /**
209 * Constructs an %auto_attach object calling attach() on the given stream.
210 *
211 * @param stream The stream to attach the utf8::streambuf to. If the stream
212 * already has a utf8::streambuf attached to it, this contructor does
213 * nothing.
214 */
215 auto_attach( StreamType &stream ) : stream_( stream ) {
216 attach( stream );
217 }
218
219 /**
220 * Destroys this %auto_attach object calling detach() on the previously
221 * attached stream.
222 */
223 ~auto_attach() {
224 detach( stream_ );
225 }
226
227private:
228 StreamType &stream_;
229};
230
231///////////////////////////////////////////////////////////////////////////////
232
233/**
234 * A %utf8::stream is used to wrap a C++ standard I/O stream with a
235 * utf8::streambuf so that encoding/decoding and the management of the
236 * streambuf happens automatically.
237 *
238 * A %utf8::stream is useful for streams created by you.
239 *
240 * @tparam StreamType The I/O stream class type to wrap. It must be a concrete
241 * stream class.
242 */
243template<class StreamType>
244class stream : public StreamType {
245public:
246 /**
247 * Constructs a %utf8::stream.
248 */
249 stream() :
250#ifdef WIN32
251# pragma warning( push )
252# pragma warning( disable : 4355 )
253#endif /* WIN32 */
254 utf8_buf_( this->rdbuf() )
255#ifdef WIN32
256# pragma warning( pop )
257#endif /* WIN32 */
258 {
259 init();
260 }
261
262 /**
263 * Constructs a %stream.
264 *
265 * @tparam StreamArgType The type of the first argument of \a StreamType's
266 * constructor.
267 * @param stream_arg The argument to pass as the first argument to
268 * \a StreamType's constructor.
269 */
270 template<typename StreamArgType>
271 stream( StreamArgType stream_arg ) :
272 StreamType( stream_arg ),
273#ifdef WIN32
274# pragma warning( push )
275# pragma warning( disable : 4355 )
276#endif /* WIN32 */
277 utf8_buf_( this->rdbuf() )
278#ifdef WIN32
279# pragma warning( pop )
280#endif /* WIN32 */
281 {
282 init();
283 }
284
285 /**
286 * Constructs a %utf8::stream.
287 *
288 * @tparam StreamArgType The type of the first argument of \a StreamType's
289 * constructor.
290 * @param stream_arg The argument to pass as the first argument to
291 * \a StreamType's constructor.
292 * @param mode The open-mode to pass to \a StreamType's constructor.
293 */
294 template<typename StreamArgType>
295 stream( StreamArgType stream_arg, std::ios_base::openmode mode ) :
296 StreamType( stream_arg, mode ),
297#ifdef WIN32
298# pragma warning( push )
299# pragma warning( disable : 4355 )
300#endif /* WIN32 */
301 utf8_buf_( this->rdbuf() )
302#ifdef WIN32
303# pragma warning( pop )
304#endif /* WIN32 */
305 {
306 init();
307 }
308
309private:
310 streambuf utf8_buf_;
311
312 void init() {
313 this->std::ios::rdbuf( &utf8_buf_ );
314 }
315};
316
317///////////////////////////////////////////////////////////////////////////////
318
319} // namespace utf8
320} // namespace zorba
321#endif /* ZORBA_UTF8_STREAMBUF_H */
322/* vim:set et sw=2 ts=2: */
0323
=== modified file 'test/unit/CMakeLists.txt'
--- test/unit/CMakeLists.txt 2013-01-11 01:34:56 +0000
+++ test/unit/CMakeLists.txt 2013-01-25 22:32:27 +0000
@@ -165,5 +165,6 @@
165IF (NOT ZORBA_HAVE_UNORDERED_SET)165IF (NOT ZORBA_HAVE_UNORDERED_SET)
166 ZORBA_ADD_TEST("test/libunit/unordered_set" LibUnitTest unordered_set)166 ZORBA_ADD_TEST("test/libunit/unordered_set" LibUnitTest unordered_set)
167ENDIF (NOT ZORBA_HAVE_UNORDERED_SET)167ENDIF (NOT ZORBA_HAVE_UNORDERED_SET)
168ZORBA_ADD_TEST("test/libunit/utf8_streambuf" LibUnitTest utf8_streambuf)
168169
169# vim:set et sw=2 ts=2:170# vim:set et sw=2 ts=2:

Subscribers

People subscribed via source and target branches