Merge lp:~zorba-coders/zorba/feature-transcode_streambuf into lp:zorba
- feature-transcode_streambuf
- Merge into trunk
Proposed by
Paul J. Lucas
Status: | Superseded | ||||
---|---|---|---|---|---|
Proposed branch: | lp:~zorba-coders/zorba/feature-transcode_streambuf | ||||
Merge into: | lp:zorba | ||||
Diff against target: |
2883 lines (+1857/-552) 35 files modified
ChangeLog (+4/-0) include/zorba/internal/proxy.h (+48/-0) include/zorba/pregenerated/diagnostic_list.h (+2/-0) include/zorba/transcode_stream.h (+213/-0) modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.cpp (+337/-338) modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.h (+164/-143) modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.cpp (+71/-21) modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.h (+10/-6) modules/com/zorba-xquery/www/modules/pregenerated/errors.xq (+4/-0) modules/org/expath/ns/file.xq.src/file.cpp (+25/-10) modules/org/expath/ns/file.xq.src/file_function.cpp (+0/-5) modules/org/expath/ns/file.xq.src/file_function.h (+5/-9) modules/org/expath/ns/file.xq.src/file_module.cpp (+2/-5) modules/org/expath/ns/file.xq.src/file_module.h (+13/-6) src/api/CMakeLists.txt (+1/-0) src/api/transcode_streambuf.cpp (+102/-0) src/diagnostics/diagnostic_en.xml (+4/-0) src/diagnostics/pregenerated/diagnostic_list.cpp (+3/-0) src/diagnostics/pregenerated/dict_en.cpp (+1/-0) src/unit_tests/CMakeLists.txt (+4/-6) src/unit_tests/test_icu_streambuf.cpp (+151/-0) src/unit_tests/unit_test_list.h (+5/-0) src/unit_tests/unit_tests.cpp (+3/-0) src/util/CMakeLists.txt (+6/-1) src/util/icu_streambuf.cpp (+300/-0) src/util/icu_streambuf.h (+140/-0) src/util/passthru_streambuf.cpp (+105/-0) src/util/passthru_streambuf.h (+76/-0) src/util/transcode_streambuf.h (+47/-0) test/rbkt/ExpQueryResults/zorba/file/cp1252.xml.res (+1/-0) test/rbkt/Queries/zorba/file/cp1252.txt (+1/-0) test/rbkt/Queries/zorba/file/cp1252.xq (+3/-0) test/rbkt/Queries/zorba/file/invalid_encoding.spec (+1/-0) test/rbkt/Queries/zorba/file/invalid_encoding.xq (+3/-0) test/rbkt/Queries/zorba/http-client/send-request/http2-read-svg.xq (+2/-2) |
||||
To merge this branch: | bzr merge lp:~zorba-coders/zorba/feature-transcode_streambuf | ||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Paul J. Lucas | Approve | ||
Matthias Brantner | Pending | ||
Review via email: mp+91980@code.launchpad.net |
This proposal has been superseded by a proposal from 2012-02-08.
Commit message
Added transcode_
Description of the change
Added transcode_
To post a comment you must log in.
Revision history for this message
Paul J. Lucas (paul-lucas) : | # |
review:
Approve
- 10651. By Matthias Brantner
-
mention http-client change in ChangeLog
- 10652. By Paul J. Lucas
-
Merge from trunk.
- 10653. By Paul J. Lucas
-
Added ZOSE0007_
UNKNOWN_ ENCODING. - 10654. By Paul J. Lucas
-
s/ZOSE0007/
ZXQP0006/ - 10655. By Paul J. Lucas
-
Merge from trunk.
- 10656. By Paul J. Lucas
-
Merge from trunk.
- 10657. By Paul J. Lucas
-
Added charset=utf-8.
- 10658. By Paul J. Lucas
-
Merge from trunk.
- 10659. By Paul J. Lucas
-
Removed http-client tag.
- 10660. By Paul J. Lucas
-
Merge from trunk.
Unmerged revisions
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'ChangeLog' |
2 | --- ChangeLog 2012-02-07 14:26:42 +0000 |
3 | +++ ChangeLog 2012-02-08 19:20:28 +0000 |
4 | @@ -28,6 +28,10 @@ |
5 | * zerr is not predeclared anymore to be http://www.zorba-xquery.com/errors |
6 | * Add new XQuery interface for the PHP bindings. |
7 | * Added API method Item::getNamespaceBindings(). |
8 | + * Added a transcoding streambuffer to the API which allows transcoding arbitrary encodings |
9 | + from and to UTF-8 |
10 | + * file:read-text is able to handle arbitrary encodings (fixes bug #867159) |
11 | + * http:send-request is able to handle arbitrary encodings |
12 | * Fixed bug #917981 (disallow declaring same module twice). |
13 | * Added API method StaticContext::getNamespaceBindings() (see bug #905035) |
14 | * Deprecated StaticContext:getNamespaceURIByPrefix() |
15 | |
16 | === added file 'include/zorba/internal/proxy.h' |
17 | --- include/zorba/internal/proxy.h 1970-01-01 00:00:00 +0000 |
18 | +++ include/zorba/internal/proxy.h 2012-02-08 19:20:28 +0000 |
19 | @@ -0,0 +1,48 @@ |
20 | +/* |
21 | + * Copyright 2006-2008 The FLWOR Foundation. |
22 | + * |
23 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
24 | + * you may not use this file except in compliance with the License. |
25 | + * You may obtain a copy of the License at |
26 | + * |
27 | + * http://www.apache.org/licenses/LICENSE-2.0 |
28 | + * |
29 | + * Unless required by applicable law or agreed to in writing, software |
30 | + * distributed under the License is distributed on an "AS IS" BASIS, |
31 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
32 | + * See the License for the specific language governing permissions and |
33 | + * limitations under the License. |
34 | + */ |
35 | + |
36 | +#ifndef ZORBA_INTERNAL_PROXY_H |
37 | +#define ZORBA_INTERNAL_PROXY_H |
38 | + |
39 | +namespace zorba { |
40 | +namespace internal { |
41 | +namespace ztd { |
42 | + |
43 | +/////////////////////////////////////////////////////////////////////////////// |
44 | + |
45 | +/** |
46 | + * \internal |
47 | + * A %proxy<T> is-a \c T that also contains a T* -- a pointer to the original. |
48 | + */ |
49 | +template<class OriginalType> |
50 | +class proxy : public OriginalType { |
51 | +public: |
52 | + proxy( OriginalType *p ) : original_( p ) { } |
53 | + |
54 | + OriginalType* original() const { |
55 | + return original_; |
56 | + } |
57 | +private: |
58 | + OriginalType *original_; |
59 | +}; |
60 | + |
61 | +/////////////////////////////////////////////////////////////////////////////// |
62 | + |
63 | +} // namespace ztd |
64 | +} // namespace internal |
65 | +} // namespace zorba |
66 | +#endif /* ZORBA_INTERNAL_PROXY_H */ |
67 | +/* vim:set et sw=2 ts=2: */ |
68 | |
69 | === modified file 'include/zorba/pregenerated/diagnostic_list.h' |
70 | --- include/zorba/pregenerated/diagnostic_list.h 2011-12-21 14:40:33 +0000 |
71 | +++ include/zorba/pregenerated/diagnostic_list.h 2012-02-08 19:20:28 +0000 |
72 | @@ -684,6 +684,8 @@ |
73 | |
74 | extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZOSE0005_DLL_LOAD_FAILED; |
75 | |
76 | +extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZOSE0006_TRANSCODING_ERROR; |
77 | + |
78 | extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZSTR0001_INDEX_ALREADY_EXISTS; |
79 | |
80 | extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZSTR0002_INDEX_DOES_NOT_EXIST; |
81 | |
82 | === added file 'include/zorba/transcode_stream.h' |
83 | --- include/zorba/transcode_stream.h 1970-01-01 00:00:00 +0000 |
84 | +++ include/zorba/transcode_stream.h 2012-02-08 19:20:28 +0000 |
85 | @@ -0,0 +1,213 @@ |
86 | +/* |
87 | + * Copyright 2006-2008 The FLWOR Foundation. |
88 | + * |
89 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
90 | + * you may not use this file except in compliance with the License. |
91 | + * You may obtain a copy of the License at |
92 | + * |
93 | + * http://www.apache.org/licenses/LICENSE-2.0 |
94 | + * |
95 | + * Unless required by applicable law or agreed to in writing, software |
96 | + * distributed under the License is distributed on an "AS IS" BASIS, |
97 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
98 | + * See the License for the specific language governing permissions and |
99 | + * limitations under the License. |
100 | + */ |
101 | + |
102 | +#ifndef ZORBA_TRANSCODE_STREAM_API_H |
103 | +#define ZORBA_TRANSCODE_STREAM_API_H |
104 | + |
105 | +#include <stdexcept> |
106 | +#include <streambuf> |
107 | +#include <string> |
108 | + |
109 | +#include <zorba/config.h> |
110 | +#include <zorba/internal/proxy.h> |
111 | +#include <zorba/internal/unique_ptr.h> |
112 | + |
113 | +namespace zorba { |
114 | + |
115 | +typedef internal::ztd::proxy<std::streambuf> proxy_streambuf; |
116 | + |
117 | +namespace transcode { |
118 | + |
119 | +/////////////////////////////////////////////////////////////////////////////// |
120 | + |
121 | +/** |
122 | + * A %transcode::streambuf is-a std::streambuf for transcoding character |
123 | + * encodings from/to UTF-8 on-the-fly. |
124 | + * |
125 | + * To use it, replace a stream's streambuf: |
126 | + * \code |
127 | + * istream is; |
128 | + * // ... |
129 | + * transcode::streambuf tbuf( "ISO-8859-1", is.rdbuf() ); |
130 | + * is.ios::rdbuf( &tbuf ); |
131 | + * \endcode |
132 | + * Note that the %transcode::streambuf must exist for as long as it's being used |
133 | + * by the stream. If you are replacing the streabuf for a stream you did not |
134 | + * create, you should set it back to the original streambuf: |
135 | + * \code |
136 | + * void f( ostream &os ) { |
137 | + * transcode::streambuf tbuf( "ISO-8859-1", os.rdbuf() ); |
138 | + * try { |
139 | + * os.ios::rdbuf( &tbuf ); |
140 | + * // ... |
141 | + * } |
142 | + * catch ( ... ) { |
143 | + * os.ios::rdbuf( tbuf.orig_streambuf() ); |
144 | + * throw; |
145 | + * } |
146 | + * } |
147 | + * \endcode |
148 | + * |
149 | + * While %transcode::streambuf does support seeking, the positions are relative |
150 | + * to the original byte stream. |
151 | + */ |
152 | +class ZORBA_DLL_PUBLIC streambuf : public std::streambuf { |
153 | +public: |
154 | + /** |
155 | + * Constructs a %transcode::streambuf. |
156 | + * |
157 | + * @param charset The name of the character encoding to convert from/to. |
158 | + * @param orig The original streambuf to read/write from/to. |
159 | + * @throws std::invalid_argument if either \a charset is not supported or |
160 | + * \a orig is null. |
161 | + */ |
162 | + streambuf( char const *charset, std::streambuf *orig ); |
163 | + |
164 | + /** |
165 | + * Destructs a %transcode::streambuf. |
166 | + */ |
167 | + ~streambuf(); |
168 | + |
169 | + /** |
170 | + * Gets the original streambuf. |
171 | + * |
172 | + * @return said streambuf. |
173 | + */ |
174 | + std::streambuf* orig_streambuf() const { |
175 | + return proxy_buf_->original(); |
176 | + } |
177 | + |
178 | +protected: |
179 | + void imbue( std::locale const& ); |
180 | + pos_type seekoff( off_type, std::ios_base::seekdir, std::ios_base::openmode ); |
181 | + pos_type seekpos( pos_type, std::ios_base::openmode ); |
182 | + std::streambuf* setbuf( char_type*, std::streamsize ); |
183 | + std::streamsize showmanyc(); |
184 | + int sync(); |
185 | + int_type overflow( int_type ); |
186 | + int_type pbackfail( int_type ); |
187 | + int_type uflow(); |
188 | + int_type underflow(); |
189 | + std::streamsize xsgetn( char_type*, std::streamsize ); |
190 | + std::streamsize xsputn( char_type const*, std::streamsize ); |
191 | + |
192 | +private: |
193 | + std::unique_ptr<proxy_streambuf> proxy_buf_; |
194 | + |
195 | + // forbid |
196 | + streambuf( streambuf const& ); |
197 | + streambuf& operator=( streambuf const& ); |
198 | +}; |
199 | + |
200 | +/////////////////////////////////////////////////////////////////////////////// |
201 | + |
202 | +/** |
203 | + * A %transcode::stream is used to wrap a C++ standard I/O stream with a |
204 | + * transcode::streambuf so that transcoding and the management of the streambuf |
205 | + * happens automatically. |
206 | + * |
207 | + * @tparam StreamType The I/O stream class type to wrap. It must be a concrete |
208 | + * stream class. |
209 | + */ |
210 | +template<class StreamType> |
211 | +class stream : public StreamType { |
212 | +public: |
213 | + /** |
214 | + * Constructs a %transcode::stream. |
215 | + * |
216 | + * @param charset The name of the character encoding to convert from/to. |
217 | + * @throws std::invalid_argument if \a charset is not supported. |
218 | + */ |
219 | + stream( char const *charset ) : |
220 | + tbuf_( charset, this->rdbuf() ) |
221 | + { |
222 | + init(); |
223 | + } |
224 | + |
225 | + /** |
226 | + * Constructs a %stream. |
227 | + * |
228 | + * @tparam StreamArgType The type of the first argument of \a StreamType's |
229 | + * constructor. |
230 | + * @param charset The name of the character encoding to convert from/to. |
231 | + * @param stream_arg The argument to pass as the first argument to |
232 | + * \a StreamType's constructor. |
233 | + * @throws std::invalid_argument if \a charset is not supported. |
234 | + */ |
235 | + template<typename StreamArgType> |
236 | + stream( char const *charset, StreamArgType stream_arg ) : |
237 | + StreamType( stream_arg ), |
238 | + tbuf_( charset, this->rdbuf() ) |
239 | + { |
240 | + init(); |
241 | + } |
242 | + |
243 | + /** |
244 | + * Constructs a %transcode::stream. |
245 | + * |
246 | + * @tparam StreamArgType The type of the first argument of \a StreamType's |
247 | + * constructor. |
248 | + * @param charset The name of the character encoding to convert from/to. |
249 | + * @param stream_arg The argument to pass as the first argument to |
250 | + * \a StreamType's constructor. |
251 | + * @param mode The open-mode to pass to \a StreamType's constructor. |
252 | + * @throws std::invalid_argument if \a charset is not supported. |
253 | + */ |
254 | + template<typename StreamArgType> |
255 | + stream( char const *charset, StreamArgType stream_arg, |
256 | + std::ios_base::openmode mode ) : |
257 | + StreamType( stream_arg, mode ), |
258 | + tbuf_( charset, this->rdbuf() ) |
259 | + { |
260 | + init(); |
261 | + } |
262 | + |
263 | +private: |
264 | + streambuf tbuf_; |
265 | + |
266 | + void init() { |
267 | + this->std::ios::rdbuf( &tbuf_ ); |
268 | + } |
269 | +}; |
270 | + |
271 | +/////////////////////////////////////////////////////////////////////////////// |
272 | + |
273 | +/** |
274 | + * Checks whether it would be necessary to transcode from the given character |
275 | + * encoding to UTF-8. |
276 | + * |
277 | + * @param charset The name of the character encoding to check. |
278 | + * @return \c true only if it would be necessary to transcode from the given |
279 | + * character encoding to UTF-8. |
280 | + */ |
281 | +ZORBA_DLL_PUBLIC |
282 | +bool is_necessary( char const *charset ); |
283 | + |
284 | +/** |
285 | + * Checks whether the given character set is supported for transcoding. |
286 | + * |
287 | + * @param charset The name of the character encoding to check. |
288 | + * @return \c true only if the character encoding is supported. |
289 | + */ |
290 | +ZORBA_DLL_PUBLIC |
291 | +bool is_supported( char const *charset ); |
292 | + |
293 | +/////////////////////////////////////////////////////////////////////////////// |
294 | + |
295 | +} // namespace transcode |
296 | +} // namespace zorba |
297 | +#endif /* ZORBA_TRANSCODE_STREAM_API_H */ |
298 | +/* vim:set et sw=2 ts=2: */ |
299 | |
300 | === modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.cpp' |
301 | --- modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.cpp 2011-07-29 08:12:36 +0000 |
302 | +++ modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.cpp 2012-02-08 19:20:28 +0000 |
303 | @@ -21,6 +21,7 @@ |
304 | #include <iostream> |
305 | #include <cassert> |
306 | #ifndef WIN32 |
307 | +#include <cerrno> |
308 | #include <sys/time.h> |
309 | #endif /* WIN32 */ |
310 | |
311 | @@ -32,349 +33,347 @@ |
312 | using namespace std; |
313 | |
314 | namespace zorba { |
315 | - namespace curl { |
316 | - |
317 | - /////////////////////////////////////////////////////////////////////////////// |
318 | - |
319 | +namespace curl { |
320 | + |
321 | +/////////////////////////////////////////////////////////////////////////////// |
322 | + |
323 | #define ZORBA_CURL_ASSERT(expr) \ |
324 | -do { \ |
325 | -if ( CURLcode const code##__LINE__ = (expr) ) \ |
326 | -throw exception( #expr, "", code##__LINE__ ); \ |
327 | -} while (0) |
328 | - |
329 | + do { \ |
330 | + if ( CURLcode const code##__LINE__ = (expr) ) \ |
331 | + throw exception( #expr, "", code##__LINE__ ); \ |
332 | + } while (0) |
333 | + |
334 | #define ZORBA_CURLM_ASSERT(expr) \ |
335 | -do { \ |
336 | -if ( CURLMcode const code##__LINE__ = (expr) ) \ |
337 | -if ( code##__LINE__ != CURLM_CALL_MULTI_PERFORM ) \ |
338 | -throw exception( #expr, "", code##__LINE__ ); \ |
339 | -} while (0) |
340 | - |
341 | - exception::exception( char const *function, char const *uri, char const *msg ) : |
342 | - std::exception(), theMessage(msg) |
343 | - { |
344 | - } |
345 | - |
346 | - exception::exception( char const *function, char const *uri, CURLcode code ) : |
347 | - std::exception(), theMessage(curl_easy_strerror(code)) |
348 | - { |
349 | - } |
350 | - |
351 | - exception::exception( char const *function, char const *uri, CURLMcode code ) : |
352 | - std::exception(), theMessage(curl_multi_strerror(code)) |
353 | - { |
354 | - } |
355 | - |
356 | - const char* exception::what() const throw() { |
357 | - return theMessage; |
358 | - } |
359 | - |
360 | - |
361 | - /////////////////////////////////////////////////////////////////////////////// |
362 | - |
363 | - CURL* create( char const *uri, write_fn_t fn, void *data ) { |
364 | - // |
365 | - // Having cURL initialization wrapped by a class and using a singleton static |
366 | - // instance guarantees that cURL is initialized exactly once before use and |
367 | - // and also is cleaned-up at program termination (when destructors for static |
368 | - // objects are called). |
369 | - // |
370 | - struct curl_initializer { |
371 | - curl_initializer() { |
372 | - ZORBA_CURL_ASSERT( curl_global_init( CURL_GLOBAL_ALL ) ); |
373 | - } |
374 | - ~curl_initializer() { |
375 | - curl_global_cleanup(); |
376 | - } |
377 | - }; |
378 | - static curl_initializer initializer; |
379 | - |
380 | - CURL *const curl = curl_easy_init(); |
381 | - if ( !curl ) |
382 | - throw exception( "curl_easy_init()", uri, "" ); |
383 | - |
384 | - try { |
385 | - ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_URL, uri ) ); |
386 | - ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEDATA, data ) ); |
387 | - ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, fn ) ); |
388 | - |
389 | - // Tells cURL to follow redirects. CURLOPT_MAXREDIRS is by default set to -1 |
390 | - // thus cURL will do an infinite number of redirects. |
391 | - ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_FOLLOWLOCATION, 1 ) ); |
392 | - |
393 | + do { \ |
394 | + if ( CURLMcode const code##__LINE__ = (expr) ) \ |
395 | + if ( code##__LINE__ != CURLM_CALL_MULTI_PERFORM ) \ |
396 | + throw exception( #expr, "", code##__LINE__ ); \ |
397 | + } while (0) |
398 | + |
399 | +exception::exception( char const *function, char const *uri, char const *msg ) : |
400 | + std::exception(), msg_( msg ) |
401 | +{ |
402 | +} |
403 | + |
404 | +exception::exception( char const *function, char const *uri, CURLcode code ) : |
405 | + std::exception(), |
406 | + msg_( curl_easy_strerror( code ) ) |
407 | +{ |
408 | +} |
409 | + |
410 | +exception::exception( char const *function, char const *uri, CURLMcode code ) : |
411 | + std::exception(), |
412 | + msg_( curl_multi_strerror( code ) ) |
413 | +{ |
414 | +} |
415 | + |
416 | +exception::~exception() throw() { |
417 | + // out-of-line since it's virtual |
418 | +} |
419 | + |
420 | +const char* exception::what() const throw() { |
421 | + return msg_.c_str(); |
422 | +} |
423 | + |
424 | +/////////////////////////////////////////////////////////////////////////////// |
425 | + |
426 | +CURL* create( char const *uri, write_fn_t fn, void *data ) { |
427 | + // |
428 | + // Having cURL initialization wrapped by a class and using a singleton static |
429 | + // instance guarantees that cURL is initialized exactly once before use and |
430 | + // and also is cleaned-up at program termination (when destructors for static |
431 | + // objects are called). |
432 | + // |
433 | + struct curl_initializer { |
434 | + curl_initializer() { |
435 | + ZORBA_CURL_ASSERT( curl_global_init( CURL_GLOBAL_ALL ) ); |
436 | + } |
437 | + ~curl_initializer() { |
438 | + curl_global_cleanup(); |
439 | + } |
440 | + }; |
441 | + static curl_initializer initializer; |
442 | + |
443 | + CURL *const curl = curl_easy_init(); |
444 | + if ( !curl ) |
445 | + throw exception( "curl_easy_init()", uri, "" ); |
446 | + |
447 | + try { |
448 | + ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_URL, uri ) ); |
449 | + ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEDATA, data ) ); |
450 | + ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, fn ) ); |
451 | + |
452 | + // Tells cURL to follow redirects. CURLOPT_MAXREDIRS is by default set to -1 |
453 | + // thus cURL will do an infinite number of redirects. |
454 | + ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_FOLLOWLOCATION, 1 ) ); |
455 | + |
456 | #ifndef ZORBA_VERIFY_PEER_SSL_CERTIFICATE |
457 | - ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_SSL_VERIFYPEER, 0 ) ); |
458 | - // |
459 | - // CURLOPT_SSL_VERIFYHOST is left default, value 2, meaning verify that the |
460 | - // Common Name or Subject Alternate Name field in the certificate matches |
461 | - // the name of the server. |
462 | - // |
463 | - // Tested with https://www.npr.org/rss/rss.php?id=1001 |
464 | - // About using SSL certs in curl: http://curl.haxx.se/docs/sslcerts.html |
465 | + ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_SSL_VERIFYPEER, 0 ) ); |
466 | + // |
467 | + // CURLOPT_SSL_VERIFYHOST is left default, value 2, meaning verify that the |
468 | + // Common Name or Subject Alternate Name field in the certificate matches |
469 | + // the name of the server. |
470 | + // |
471 | + // Tested with https://www.npr.org/rss/rss.php?id=1001 |
472 | + // About using SSL certs in curl: http://curl.haxx.se/docs/sslcerts.html |
473 | #else |
474 | # ifdef WIN32 |
475 | - // set the root CA certificates file path |
476 | - if ( GENV.g_curl_root_CA_certificates_path[0] ) |
477 | - ZORBA_CURL_ASSERT( |
478 | - curl_easy_setopt( |
479 | - curl, CURLOPT_CAINFO, GENV.g_curl_root_CA_certificates_path |
480 | - ) |
481 | - ); |
482 | + // set the root CA certificates file path |
483 | + if ( GENV.g_curl_root_CA_certificates_path[0] ) |
484 | + ZORBA_CURL_ASSERT( |
485 | + curl_easy_setopt( |
486 | + curl, CURLOPT_CAINFO, GENV.g_curl_root_CA_certificates_path |
487 | + ) |
488 | + ); |
489 | # endif /* WIN32 */ |
490 | #endif /* ZORBA_VERIFY_PEER_SSL_CERTIFICATE */ |
491 | - |
492 | - // |
493 | - // Some servers don't like requests that are made without a user-agent |
494 | - // field, so we provide one. |
495 | - // |
496 | - ZORBA_CURL_ASSERT( |
497 | - curl_easy_setopt( curl, CURLOPT_USERAGENT, "libcurl-agent/1.0" ) |
498 | - ); |
499 | - |
500 | - return curl; |
501 | - } |
502 | - catch ( ... ) { |
503 | - destroy( curl ); |
504 | - throw; |
505 | - } |
506 | - } |
507 | - |
508 | - void destroy( CURL *curl ) { |
509 | - if ( curl ) { |
510 | - curl_easy_reset( curl ); |
511 | - curl_easy_cleanup( curl ); |
512 | - } |
513 | - } |
514 | - |
515 | - /////////////////////////////////////////////////////////////////////////////// |
516 | - |
517 | - streambuf::streambuf() : theInformer(0), theOwnInformer(false) { |
518 | -#ifdef WIN32 |
519 | - theDummySocket = socket(AF_INET, SOCK_DGRAM, 0); |
520 | - if (theDummySocket == CURL_SOCKET_BAD || theDummySocket == INVALID_SOCKET) { |
521 | - std::cerr << "creating the socket failed" << std::endl; |
522 | - } |
523 | -#endif |
524 | - init(); |
525 | - } |
526 | - |
527 | - streambuf::streambuf( char const *uri ) : theInformer(0), theOwnInformer(false) { |
528 | -#ifdef WIN32 |
529 | - theDummySocket = socket(AF_INET, SOCK_DGRAM, 0); |
530 | - if (theDummySocket == CURL_SOCKET_BAD || theDummySocket == INVALID_SOCKET) { |
531 | - std::cerr << "creating the socket failed" << std::endl; |
532 | - } |
533 | -#endif |
534 | - init(); |
535 | - open( uri ); |
536 | - } |
537 | - |
538 | - int streambuf::multi_perform() { |
539 | - underflow(); |
540 | - CURLMsg* msg; |
541 | - int msgInQueue; |
542 | - int error = 0; |
543 | - while ((msg = curl_multi_info_read(curlm_, &msgInQueue))) { |
544 | - if (msg->msg == CURLMSG_DONE) { |
545 | - error = msg->data.result; |
546 | - } |
547 | - } |
548 | - return error; |
549 | - } |
550 | - |
551 | - streambuf::streambuf( CURL* aCurl) : theInformer(0), theOwnInformer(false) { |
552 | -#ifdef WIN32 |
553 | - theDummySocket = socket(AF_INET, SOCK_DGRAM, 0); |
554 | - if (theDummySocket == CURL_SOCKET_BAD || theDummySocket == INVALID_SOCKET) { |
555 | - std::cerr << "creating the socket failed" << std::endl; |
556 | - } |
557 | -#endif |
558 | - init(); |
559 | - curl_ = aCurl; |
560 | - ZORBA_CURL_ASSERT( curl_easy_setopt( aCurl, CURLOPT_WRITEDATA, this ) ); |
561 | - ZORBA_CURL_ASSERT( curl_easy_setopt( aCurl, CURLOPT_WRITEFUNCTION, curl_write_callback ) ); |
562 | - |
563 | - init_curlm(); |
564 | - } |
565 | - |
566 | - streambuf::~streambuf() { |
567 | - free( buf_ ); |
568 | - close(); |
569 | -#ifdef WIN32 |
570 | - closesocket(theDummySocket); |
571 | -#endif |
572 | - // If we have been assigned memory ownership of theInformer, delete it now. |
573 | - if (theOwnInformer) |
574 | - delete theInformer; |
575 | - } |
576 | - |
577 | - void streambuf::close() { |
578 | - if ( curl_ ) { |
579 | - if ( curlm_ ) { |
580 | - curl_multi_remove_handle( curlm_, curl_ ); |
581 | - curl_multi_cleanup( curlm_ ); |
582 | - curlm_ = 0; |
583 | - } |
584 | - destroy( curl_ ); |
585 | - curl_ = 0; |
586 | - } |
587 | - } |
588 | - |
589 | - void streambuf::curl_read() { |
590 | - buf_len_ = 0; |
591 | - while ( curl_running_ && !buf_len_ ) { |
592 | - fd_set fd_read, fd_write, fd_except; |
593 | - FD_ZERO( &fd_read ); |
594 | - FD_ZERO( &fd_write ); |
595 | - FD_ZERO( &fd_except ); |
596 | - int max_fd = -1; |
597 | -#ifdef WIN32 |
598 | - // Windows does not like a call to select where all arguments are 0. So |
599 | - // we just add a dummy socket to make the call to select happy. |
600 | - FD_SET (theDummySocket, &fd_read); |
601 | -#endif |
602 | - ZORBA_CURLM_ASSERT( |
603 | - curl_multi_fdset( curlm_, &fd_read, &fd_write, &fd_except, &max_fd ) |
604 | - ); |
605 | - |
606 | - // |
607 | - // Note that the fopen.c sample code is unnecessary at best or wrong at |
608 | - // worst; see: http://curl.haxx.se/mail/lib-2011-05/0011.html |
609 | - // |
610 | - timeval timeout; |
611 | - long curl_timeout_ms; |
612 | - ZORBA_CURLM_ASSERT( curl_multi_timeout( curlm_, &curl_timeout_ms ) ); |
613 | - if ( curl_timeout_ms > 0 ) { |
614 | - timeout.tv_sec = curl_timeout_ms / 1000; |
615 | - timeout.tv_usec = curl_timeout_ms % 1000 * 1000; |
616 | - } else { |
617 | - // |
618 | - // From curl_multi_timeout(3): |
619 | - // |
620 | - // Note: if libcurl returns a -1 timeout here, it just means that |
621 | - // libcurl currently has no stored timeout value. You must not wait |
622 | - // too long (more than a few seconds perhaps) before you call |
623 | - // curl_multi_perform() again. |
624 | - // |
625 | - // So we just pick some not-too-long default. |
626 | - // |
627 | - timeout.tv_sec = 1; |
628 | - timeout.tv_usec = 0; |
629 | - } |
630 | - |
631 | - switch ( select( max_fd + 1, &fd_read, &fd_write, &fd_except, &timeout ) ) { |
632 | - case -1: // select error |
633 | -#ifdef WIN32 |
634 | - std::cout << "Error = " << WSAGetLastError() << std::endl; |
635 | -#endif |
636 | - throw exception( "select()", "" ); |
637 | - case 0: // timeout |
638 | - // no break; |
639 | - default: |
640 | - CURLMcode code; |
641 | - do { |
642 | - code = curl_multi_perform( curlm_, &curl_running_ ); |
643 | - } while ( code == CURLM_CALL_MULTI_PERFORM ); |
644 | - ZORBA_CURLM_ASSERT( code ); |
645 | - } |
646 | - } |
647 | - if (theInformer) { |
648 | - theInformer->afterRead(); |
649 | - } |
650 | - } |
651 | - |
652 | - size_t streambuf::curl_write_callback( void *ptr, size_t size, size_t nmemb, |
653 | - void *data ) { |
654 | - size *= nmemb; |
655 | - streambuf *const that = static_cast<streambuf*>( data ); |
656 | - |
657 | - std::streamoff buf_free = that->buf_capacity_ - that->buf_len_; |
658 | - if (that->theInformer) { |
659 | - that->theInformer->beforeRead(); |
660 | - } |
661 | - if ( size > buf_free ) { |
662 | - std::streamoff new_capacity = that->buf_capacity_ + size - buf_free; |
663 | - if ( void *const new_buf = realloc( that->buf_, static_cast<size_t>(new_capacity) ) ) { |
664 | - that->buf_ = static_cast<char*>( new_buf ); |
665 | - that->buf_capacity_ = new_capacity; |
666 | - } else |
667 | - throw exception( "realloc()", "" ); |
668 | - } |
669 | - ::memcpy( that->buf_ + that->buf_len_, ptr, size ); |
670 | - that->buf_len_ += size; |
671 | - return size; |
672 | - } |
673 | - |
674 | - void streambuf::init() { |
675 | - buf_ = 0; |
676 | - buf_capacity_ = 0; |
677 | - buf_len_ = 0; |
678 | - curl_ = 0; |
679 | - curlm_ = 0; |
680 | - curl_running_ = 0; |
681 | - } |
682 | - |
683 | - void streambuf::init_curlm() { |
684 | - // |
685 | - // Lie about cURL running initially so the while-loop in curl_read() will run |
686 | - // at least once. |
687 | - // |
688 | - curl_running_ = 1; |
689 | - |
690 | - // |
691 | - // Set the "get" pointer to the end (gptr() == egptr()) so a call to |
692 | - // underflow() and initial data read will be triggered. |
693 | - // |
694 | - buf_len_ = buf_capacity_; |
695 | - setg( buf_, buf_ + buf_len_, buf_ + buf_capacity_ ); |
696 | - |
697 | - // |
698 | - // Clean-up has to be done here with try/catch (as opposed to relying on the |
699 | - // destructor) because open() can be called from the constructor. If an |
700 | - // exception is thrown, the constructor will not have completed, hence the |
701 | - // object will not have been fully constructed; therefore the destructor will |
702 | - // not be called. |
703 | - // |
704 | - try { |
705 | - if ( !(curlm_ = curl_multi_init()) ) |
706 | - throw exception( "curl_multi_init()", "" ); |
707 | - try { |
708 | - ZORBA_CURLM_ASSERT( curl_multi_add_handle( curlm_, curl_ ) ); |
709 | - } |
710 | - catch ( ... ) { |
711 | - curl_multi_cleanup( curlm_ ); |
712 | - curlm_ = 0; |
713 | - throw; |
714 | - } |
715 | - } |
716 | - catch ( ... ) { |
717 | - destroy( curl_ ); |
718 | - curl_ = 0; |
719 | - throw; |
720 | - } |
721 | - } |
722 | - |
723 | - void streambuf::open( char const *uri ) { |
724 | - curl_ = create( uri, curl_write_callback, this ); |
725 | - |
726 | - init_curlm(); |
727 | - } |
728 | - |
729 | - streamsize streambuf::showmanyc() { |
730 | - return egptr() - gptr(); |
731 | - } |
732 | - |
733 | - streambuf::int_type streambuf::underflow() { |
734 | - while ( true ) { |
735 | - if ( gptr() < egptr() ) |
736 | - return traits_type::to_int_type( *gptr() ); |
737 | - curl_read(); |
738 | - if ( !buf_len_ ) |
739 | - return traits_type::eof(); |
740 | - setg( buf_, buf_, buf_ + buf_len_ ); |
741 | - } |
742 | - } |
743 | - |
744 | - /////////////////////////////////////////////////////////////////////////////// |
745 | - |
746 | - } // namespace curl |
747 | + |
748 | + // |
749 | + // Some servers don't like requests that are made without a user-agent |
750 | + // field, so we provide one. |
751 | + // |
752 | + ZORBA_CURL_ASSERT( |
753 | + curl_easy_setopt( curl, CURLOPT_USERAGENT, "libcurl-agent/1.0" ) |
754 | + ); |
755 | + |
756 | + return curl; |
757 | + } |
758 | + catch ( ... ) { |
759 | + destroy( curl ); |
760 | + throw; |
761 | + } |
762 | +} |
763 | + |
764 | +void destroy( CURL *curl ) { |
765 | + if ( curl ) { |
766 | + curl_easy_reset( curl ); |
767 | + curl_easy_cleanup( curl ); |
768 | + } |
769 | +} |
770 | + |
771 | +/////////////////////////////////////////////////////////////////////////////// |
772 | + |
773 | +streambuf::streambuf() { |
774 | + init(); |
775 | +} |
776 | + |
777 | +streambuf::streambuf( char const *uri ) { |
778 | + init(); |
779 | + open( uri ); |
780 | +} |
781 | + |
782 | +streambuf::streambuf( CURL *curl ) { |
783 | + init(); |
784 | + curl_ = curl; |
785 | + ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEDATA, this ) ); |
786 | + ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, curl_write_callback ) ); |
787 | + init_curlm(); |
788 | +} |
789 | + |
790 | +streambuf::~streambuf() { |
791 | + free( buf_ ); |
792 | + close(); |
793 | +#ifdef WIN32 |
794 | + closesocket( dummy_socket_ ); |
795 | +#endif |
796 | + // If we have been assigned memory ownership of theInformer, delete it now. |
797 | + if ( theOwnInformer ) |
798 | + delete theInformer; |
799 | +} |
800 | + |
801 | +void streambuf::close() { |
802 | + if ( curl_ ) { |
803 | + if ( curlm_ ) { |
804 | + curl_multi_remove_handle( curlm_, curl_ ); |
805 | + curl_multi_cleanup( curlm_ ); |
806 | + curlm_ = 0; |
807 | + } |
808 | + destroy( curl_ ); |
809 | + curl_ = 0; |
810 | + } |
811 | +} |
812 | + |
813 | +void streambuf::curl_read() { |
814 | + buf_len_ = 0; |
815 | + while ( curl_running_ && !buf_len_ ) { |
816 | + fd_set fd_read, fd_write, fd_except; |
817 | + FD_ZERO( &fd_read ); |
818 | + FD_ZERO( &fd_write ); |
819 | + FD_ZERO( &fd_except ); |
820 | + int max_fd = -1; |
821 | +#ifdef WIN32 |
822 | + // |
823 | + // Windows does not like a call to select where all arguments are 0, so we |
824 | + // just add a dummy socket to make the call to select happy. |
825 | + // |
826 | + FD_SET( dummy_socket_, &fd_read ); |
827 | +#endif /* WIN32 */ |
828 | + ZORBA_CURLM_ASSERT( |
829 | + curl_multi_fdset( curlm_, &fd_read, &fd_write, &fd_except, &max_fd ) |
830 | + ); |
831 | + |
832 | + // |
833 | + // Note that the fopen.c sample code is unnecessary at best or wrong at |
834 | + // worst; see: http://curl.haxx.se/mail/lib-2011-05/0011.html |
835 | + // |
836 | + timeval timeout; |
837 | + long curl_timeout_ms; |
838 | + ZORBA_CURLM_ASSERT( curl_multi_timeout( curlm_, &curl_timeout_ms ) ); |
839 | + if ( curl_timeout_ms > 0 ) { |
840 | + timeout.tv_sec = curl_timeout_ms / 1000; |
841 | + timeout.tv_usec = curl_timeout_ms % 1000 * 1000; |
842 | + } else { |
843 | + // |
844 | + // From curl_multi_timeout(3): |
845 | + // |
846 | + // Note: if libcurl returns a -1 timeout here, it just means that |
847 | + // libcurl currently has no stored timeout value. You must not wait |
848 | + // too long (more than a few seconds perhaps) before you call |
849 | + // curl_multi_perform() again. |
850 | + // |
851 | + // So we just pick some not-too-long default. |
852 | + // |
853 | + timeout.tv_sec = 1; |
854 | + timeout.tv_usec = 0; |
855 | + } |
856 | + |
857 | + switch ( select( max_fd + 1, &fd_read, &fd_write, &fd_except, &timeout ) ) { |
858 | + case -1: // select error |
859 | +#ifdef WIN32 |
860 | + char err_buf[8]; |
861 | + sprintf( err_buf, "%d", WSAGetLastError() ); |
862 | + throw exception( "select()", "", err_buf ); |
863 | +#else |
864 | + throw exception( "select()", "", strerror( errno ) ); |
865 | +#endif |
866 | + case 0: // timeout |
867 | + // no break; |
868 | + default: |
869 | + CURLMcode code; |
870 | + do { |
871 | + code = curl_multi_perform( curlm_, &curl_running_ ); |
872 | + } while ( code == CURLM_CALL_MULTI_PERFORM ); |
873 | + ZORBA_CURLM_ASSERT( code ); |
874 | + } |
875 | + } |
876 | + if ( theInformer ) |
877 | + theInformer->afterRead(); |
878 | +} |
879 | + |
880 | +size_t streambuf::curl_write_callback( void *ptr, size_t size, size_t nmemb, |
881 | + void *data ) { |
882 | + size *= nmemb; |
883 | + streambuf *const that = static_cast<streambuf*>( data ); |
884 | + |
885 | + if ( that->theInformer ) |
886 | + that->theInformer->beforeRead(); |
887 | + |
888 | + size_t const buf_free = that->buf_capacity_ - that->buf_len_; |
889 | + if ( size > buf_free ) { |
890 | + streamoff new_capacity = that->buf_capacity_ + size - buf_free; |
891 | + if ( void *const new_buf = |
892 | + realloc( that->buf_, static_cast<size_t>( new_capacity ) ) ) { |
893 | + that->buf_ = static_cast<char*>( new_buf ); |
894 | + that->buf_capacity_ = new_capacity; |
895 | + } else |
896 | + throw exception( "realloc()", "" ); |
897 | + } |
898 | + ::memcpy( that->buf_ + that->buf_len_, ptr, size ); |
899 | + that->buf_len_ += size; |
900 | + return size; |
901 | +} |
902 | + |
903 | +void streambuf::init() { |
904 | + buf_ = 0; |
905 | + buf_capacity_ = 0; |
906 | + buf_len_ = 0; |
907 | + curl_ = 0; |
908 | + curlm_ = 0; |
909 | + curl_running_ = 0; |
910 | + theInformer = 0; |
911 | + theOwnInformer = false; |
912 | +#ifdef WIN32 |
913 | + dummy_socket_ = socket( AF_INET, SOCK_DGRAM, 0 ); |
914 | + if ( dummy_socket_ == CURL_SOCKET_BAD || dummy_socket_ == INVALID_SOCKET ) |
915 | + throw exception( "socket()", "" ); |
916 | +#endif /* WIN32 */ |
917 | +} |
918 | + |
919 | +void streambuf::init_curlm() { |
920 | + // |
921 | + // Lie about cURL running initially so the while-loop in curl_read() will run |
922 | + // at least once. |
923 | + // |
924 | + curl_running_ = 1; |
925 | + |
926 | + // |
927 | + // Set the "get" pointer to the end (gptr() == egptr()) so a call to |
928 | + // underflow() and initial data read will be triggered. |
929 | + // |
930 | + buf_len_ = buf_capacity_; |
931 | + setg( buf_, buf_ + buf_len_, buf_ + buf_capacity_ ); |
932 | + |
933 | + // |
934 | + // Clean-up has to be done here with try/catch (as opposed to relying on the |
935 | + // destructor) because open() can be called from the constructor. If an |
936 | + // exception is thrown, the constructor will not have completed, hence the |
937 | + // object will not have been fully constructed; therefore the destructor will |
938 | + // not be called. |
939 | + // |
940 | + try { |
941 | + if ( !(curlm_ = curl_multi_init()) ) |
942 | + throw exception( "curl_multi_init()", "" ); |
943 | + try { |
944 | + ZORBA_CURLM_ASSERT( curl_multi_add_handle( curlm_, curl_ ) ); |
945 | + } |
946 | + catch ( ... ) { |
947 | + curl_multi_cleanup( curlm_ ); |
948 | + curlm_ = 0; |
949 | + throw; |
950 | + } |
951 | + } |
952 | + catch ( ... ) { |
953 | + destroy( curl_ ); |
954 | + curl_ = 0; |
955 | + throw; |
956 | + } |
957 | +} |
958 | + |
959 | +int streambuf::multi_perform() { |
960 | + underflow(); |
961 | + CURLMsg *msg; |
962 | + int msgInQueue; |
963 | + int error = 0; |
964 | + while ( (msg = curl_multi_info_read( curlm_, &msgInQueue )) ) { |
965 | + if ( msg->msg == CURLMSG_DONE ) |
966 | + error = msg->data.result; |
967 | + } |
968 | + return error; |
969 | +} |
970 | + |
971 | +void streambuf::open( char const *uri ) { |
972 | + curl_ = create( uri, curl_write_callback, this ); |
973 | + |
974 | + init_curlm(); |
975 | +} |
976 | + |
977 | +streamsize streambuf::showmanyc() { |
978 | + return egptr() - gptr(); |
979 | +} |
980 | + |
981 | +streambuf::int_type streambuf::underflow() { |
982 | + while ( true ) { |
983 | + if ( gptr() < egptr() ) |
984 | + return traits_type::to_int_type( *gptr() ); |
985 | + curl_read(); |
986 | + if ( !buf_len_ ) |
987 | + return traits_type::eof(); |
988 | + setg( buf_, buf_, buf_ + buf_len_ ); |
989 | + } |
990 | +} |
991 | + |
992 | +/////////////////////////////////////////////////////////////////////////////// |
993 | + |
994 | +} // namespace curl |
995 | } // namespace zorba |
996 | +/* vim:set et sw=2 ts=2: */ |
997 | |
998 | === modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.h' |
999 | --- modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.h 2011-07-29 08:12:36 +0000 |
1000 | +++ modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.h 2012-02-08 19:20:28 +0000 |
1001 | @@ -19,154 +19,175 @@ |
1002 | |
1003 | #include <zorba/config.h> |
1004 | |
1005 | +#include <exception> |
1006 | #include <istream> |
1007 | -#include <exception> |
1008 | #include <streambuf> |
1009 | +#include <string> |
1010 | #include <curl/curl.h> |
1011 | |
1012 | namespace zorba { |
1013 | - |
1014 | - namespace http_client { |
1015 | - class InformDataRead; |
1016 | - } |
1017 | - |
1018 | - namespace curl { |
1019 | - |
1020 | - class exception : public std::exception { |
1021 | - public: |
1022 | - exception( char const *function, char const *uri, char const *msg = 0 ); |
1023 | - exception( char const *function, char const *uri, CURLcode code ); |
1024 | - exception( char const *function, char const *uri, CURLMcode code ); |
1025 | - public: |
1026 | - virtual const char* what() const throw(); |
1027 | - private: |
1028 | - const char* theMessage; |
1029 | - }; |
1030 | - |
1031 | - |
1032 | - |
1033 | - ////////// create & destroy /////////////////////////////////////////////////// |
1034 | - |
1035 | - /** |
1036 | - * The signature type of cURL's write function callback. |
1037 | - */ |
1038 | - typedef size_t (*write_fn_t)( void*, size_t, size_t, void* ); |
1039 | - |
1040 | - /** |
1041 | - * Creates a new, initialized cURL instance. |
1042 | - * |
1043 | - * @throws exception upon failure. |
1044 | - */ |
1045 | - CURL* create( char const *uri, write_fn_t fn, void *data ); |
1046 | - |
1047 | - /** |
1048 | - * Destroys a cURL instance. |
1049 | - * |
1050 | - * @param instance A cURL instance. If \c NULL, does nothing. |
1051 | - */ |
1052 | - void destroy( CURL *instance ); |
1053 | - |
1054 | - ////////// streambuf ////////////////////////////////////////////////////////// |
1055 | - |
1056 | - /** |
1057 | - * A curl::streambuf is-a std::streambuf for streaming the contents of URI |
1058 | - * using cURL. However, do not use this class directly. Use uri::streambuf |
1059 | - * instead. |
1060 | - */ |
1061 | - class streambuf : public std::streambuf { |
1062 | - public: |
1063 | - /** |
1064 | - * Constructs a %streambuf. |
1065 | - */ |
1066 | - streambuf(); |
1067 | - |
1068 | - /** |
1069 | - * Constructs a %streambuf and opens a connection to the server hosting the |
1070 | - * given URI for subsequent streaming. |
1071 | - * |
1072 | - * @param uri The URI to stream. |
1073 | - */ |
1074 | - streambuf( char const *uri ); |
1075 | - |
1076 | - /** |
1077 | - * In case we already have a curl object, which was set up somewhere else, we |
1078 | - * take it here as an arument. This takes ownership over the object. |
1079 | - */ |
1080 | - streambuf( CURL* aCurl ); |
1081 | - |
1082 | - /** |
1083 | - * Destroys a %streambuf. |
1084 | - */ |
1085 | - ~streambuf(); |
1086 | - |
1087 | - /** |
1088 | - * Opens a connection to the server hosting the given URI for subsequent |
1089 | - * streaming. |
1090 | - * |
1091 | - * @param uri The URI to stream. |
1092 | - * @throws exception upon failure. |
1093 | - */ |
1094 | - void open( char const *uri ); |
1095 | - |
1096 | - /** |
1097 | - * Tests whether the buffer is open. |
1098 | - * |
1099 | - * @return Returns \c true only if the buffer is open. |
1100 | - */ |
1101 | - bool is_open() const { |
1102 | - return !!curl_; |
1103 | - } |
1104 | - |
1105 | - /** |
1106 | - * Closes this %streambuf. |
1107 | - */ |
1108 | - void close(); |
1109 | - |
1110 | - /** |
1111 | - * Provide a InformDataRead that will get callbacks about read events. |
1112 | - */ |
1113 | - void setInformer(::zorba::http_client::InformDataRead* aInformer) { theInformer = aInformer; } |
1114 | - |
1115 | - /** |
1116 | - * Specify whether this streambuf has memory ownership over the |
1117 | - * InformDataRead it has been passed. You can use this if, for example, |
1118 | - * the lifetime of the streambuf will extend past the lifetime of the |
1119 | - * object which created the InformDataRead. |
1120 | - */ |
1121 | - void setOwnInformer(bool aOwnInformer) { theOwnInformer = aOwnInformer; } |
1122 | - |
1123 | - int multi_perform(); |
1124 | - |
1125 | - protected: |
1126 | - // inherited |
1127 | - std::streamsize showmanyc(); |
1128 | - int_type underflow(); |
1129 | - |
1130 | - private: |
1131 | - void curl_read(); |
1132 | - static size_t curl_write_callback( void*, size_t, size_t, void* ); |
1133 | - |
1134 | - void init(); |
1135 | - void init_curlm(); |
1136 | - |
1137 | - char *buf_; |
1138 | - std::streamsize buf_capacity_; |
1139 | - std::streamoff buf_len_; |
1140 | - |
1141 | - CURL *curl_; |
1142 | - CURLM *curlm_; |
1143 | - int curl_running_; |
1144 | - ::zorba::http_client::InformDataRead* theInformer; |
1145 | - bool theOwnInformer; |
1146 | - |
1147 | - // forbid |
1148 | - streambuf( streambuf const& ); |
1149 | - streambuf& operator=( streambuf const& ); |
1150 | + |
1151 | +namespace http_client { |
1152 | + class InformDataRead; |
1153 | +} |
1154 | + |
1155 | +namespace curl { |
1156 | + |
1157 | +/////////////////////////////////////////////////////////////////////////////// |
1158 | + |
1159 | +class exception : public std::exception { |
1160 | +public: |
1161 | + exception( char const *function, char const *uri, char const *msg = 0 ); |
1162 | + exception( char const *function, char const *uri, CURLcode code ); |
1163 | + exception( char const *function, char const *uri, CURLMcode code ); |
1164 | + ~exception() throw(); |
1165 | + |
1166 | + virtual const char* what() const throw(); |
1167 | + |
1168 | +private: |
1169 | + std::string msg_; |
1170 | +}; |
1171 | + |
1172 | +////////// create & destroy /////////////////////////////////////////////////// |
1173 | + |
1174 | +/** |
1175 | + * The signature type of cURL's write function callback. |
1176 | + */ |
1177 | +typedef size_t (*write_fn_t)( void*, size_t, size_t, void* ); |
1178 | + |
1179 | +/** |
1180 | + * Creates a new, initialized cURL instance. |
1181 | + * |
1182 | + * @throws exception upon failure. |
1183 | + */ |
1184 | +CURL* create( char const *uri, write_fn_t fn, void *data ); |
1185 | + |
1186 | +/** |
1187 | + * Destroys a cURL instance. |
1188 | + * |
1189 | + * @param instance A cURL instance. If \c NULL, does nothing. |
1190 | + */ |
1191 | +void destroy( CURL *instance ); |
1192 | + |
1193 | +////////// streambuf ////////////////////////////////////////////////////////// |
1194 | + |
1195 | +/** |
1196 | + * A curl::streambuf is-a std::streambuf for streaming the contents of URI |
1197 | + * using cURL. However, do not use this class directly. Use uri::streambuf |
1198 | + * instead. |
1199 | + */ |
1200 | +class streambuf : public std::streambuf { |
1201 | +public: |
1202 | + /** |
1203 | + * Constructs a %streambuf. |
1204 | + */ |
1205 | + streambuf(); |
1206 | + |
1207 | + /** |
1208 | + * Constructs a %streambuf and opens a connection to the server hosting the |
1209 | + * given URI for subsequent streaming. |
1210 | + * |
1211 | + * @param uri The URI to stream. |
1212 | + */ |
1213 | + streambuf( char const *uri ); |
1214 | + |
1215 | + /** |
1216 | + * Constructs a %streambuf using an existing CURL object. |
1217 | + * |
1218 | + * @param curl The CURL object to use. This %streambuf takes ownership of |
1219 | + * it. |
1220 | + */ |
1221 | + streambuf( CURL *curl ); |
1222 | + |
1223 | + /** |
1224 | + * Destroys a %streambuf. |
1225 | + */ |
1226 | + ~streambuf(); |
1227 | + |
1228 | + /** |
1229 | + * Opens a connection to the server hosting the given URI for subsequent |
1230 | + * streaming. |
1231 | + * |
1232 | + * @param uri The URI to stream. |
1233 | + * @throws exception upon failure. |
1234 | + */ |
1235 | + void open( char const *uri ); |
1236 | + |
1237 | + /** |
1238 | + * Tests whether the buffer is open. |
1239 | + * |
1240 | + * @return Returns \c true only if the buffer is open. |
1241 | + */ |
1242 | + bool is_open() const { |
1243 | + return !!curl_; |
1244 | + } |
1245 | + |
1246 | + /** |
1247 | + * Closes this %streambuf. |
1248 | + */ |
1249 | + void close(); |
1250 | + |
1251 | + /** |
1252 | + * Gets the CURL object in use. |
1253 | + * |
1254 | + * @return Return said CURL object. |
1255 | + */ |
1256 | + CURL* curl() const { |
1257 | + return curl_; |
1258 | + } |
1259 | + |
1260 | + /** |
1261 | + * Provide a InformDataRead that will get callbacks about read events. |
1262 | + */ |
1263 | + void setInformer( http_client::InformDataRead *aInformer ) { |
1264 | + theInformer = aInformer; |
1265 | + } |
1266 | + |
1267 | + /** |
1268 | + * Specify whether this streambuf has memory ownership over the |
1269 | + * InformDataRead it has been passed. You can use this if, for example, |
1270 | + * the lifetime of the streambuf will extend past the lifetime of the |
1271 | + * object which created the InformDataRead. |
1272 | + */ |
1273 | + void setOwnInformer( bool aOwnInformer ) { |
1274 | + theOwnInformer = aOwnInformer; |
1275 | + } |
1276 | + |
1277 | + int multi_perform(); |
1278 | + |
1279 | +protected: |
1280 | + // inherited |
1281 | + std::streamsize showmanyc(); |
1282 | + int_type underflow(); |
1283 | + |
1284 | +private: |
1285 | + void curl_read(); |
1286 | + static size_t curl_write_callback( void*, size_t, size_t, void* ); |
1287 | + |
1288 | + void init(); |
1289 | + void init_curlm(); |
1290 | + |
1291 | + char *buf_; |
1292 | + std::streamsize buf_capacity_; |
1293 | + std::streamoff buf_len_; |
1294 | + |
1295 | + CURL *curl_; |
1296 | + CURLM *curlm_; |
1297 | + int curl_running_; |
1298 | + http_client::InformDataRead *theInformer; |
1299 | + bool theOwnInformer; |
1300 | + |
1301 | + // forbid |
1302 | + streambuf( streambuf const& ); |
1303 | + streambuf& operator=( streambuf const& ); |
1304 | #ifdef WIN32 |
1305 | - SOCKET theDummySocket; |
1306 | -#endif |
1307 | - }; |
1308 | - |
1309 | - } // namespace curl |
1310 | + SOCKET dummy_socket_; |
1311 | +#endif /* WIN32 */ |
1312 | +}; |
1313 | + |
1314 | +/////////////////////////////////////////////////////////////////////////////// |
1315 | + |
1316 | +} // namespace curl |
1317 | } // namespace zorba |
1318 | #endif /* ZORBA_CURL_UTIL_H */ |
1319 | +/* vim:set et sw=2 ts=2: */ |
1320 | |
1321 | === modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.cpp' |
1322 | --- modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.cpp 2011-07-29 08:12:36 +0000 |
1323 | +++ modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.cpp 2012-02-08 19:20:28 +0000 |
1324 | @@ -26,12 +26,44 @@ |
1325 | #include <zorba/error.h> |
1326 | #include <zorba/xquery_exception.h> |
1327 | #include <zorba/xquery_functions.h> |
1328 | +#include <zorba/transcode_stream.h> |
1329 | |
1330 | #include "http_response_parser.h" |
1331 | #include "http_request_handler.h" |
1332 | #include "curl_stream_buffer.h" |
1333 | |
1334 | -namespace zorba { namespace http_client { |
1335 | +namespace zorba { |
1336 | + |
1337 | +static bool parse_content_type( std::string const &s, std::string *mime_type, |
1338 | + std::string *charset ) { |
1339 | + std::string::size_type pos = s.find( ';' ); |
1340 | + *mime_type = s.substr( 0, pos ); |
1341 | + |
1342 | + if ( pos != std::string::npos ) { |
1343 | + // |
1344 | + // Parse: charset="?XXXXX"?[ (comment)] |
1345 | + // |
1346 | + if ( (pos = s.find( '=' )) != std::string::npos ) { |
1347 | + std::string t = s.substr( pos + 1 ); |
1348 | + if ( !t.empty() ) { |
1349 | + if ( t[0] == '"' ) { |
1350 | + t.erase( 0, 1 ); |
1351 | + if ( (pos = t.find( '"' )) != std::string::npos ) |
1352 | + t.erase( pos ); |
1353 | + } else { |
1354 | + if ( (pos = t.find( ' ' )) != std::string::npos ) |
1355 | + t.erase( pos ); |
1356 | + } |
1357 | + *charset = t; |
1358 | + } |
1359 | + } |
1360 | + } else { |
1361 | + // The HTTP/1.1 spec says that the default charset is ISO-8859-1. |
1362 | + *charset = "ISO-8859-1"; |
1363 | + } |
1364 | +} |
1365 | + |
1366 | +namespace http_client { |
1367 | |
1368 | HttpResponseParser::HttpResponseParser(RequestHandler& aHandler, CURL* aCurl, |
1369 | ErrorThrower& aErrorThrower, |
1370 | @@ -60,19 +92,30 @@ |
1371 | if (lCode) |
1372 | return lCode; |
1373 | if (!theStatusOnly) { |
1374 | - std::auto_ptr<std::istream> lStream(new std::istream(theStreamBuffer)); |
1375 | + |
1376 | + if (!theOverridenContentType.empty()) { |
1377 | + parse_content_type( |
1378 | + theOverridenContentType, &theCurrentContentType, &theCurrentCharset |
1379 | + ); |
1380 | + } |
1381 | + |
1382 | + std::auto_ptr<std::istream> lStream; |
1383 | + if ( transcode::is_necessary( theCurrentCharset.c_str() ) ) { |
1384 | + lStream.reset( |
1385 | + new transcode::stream<std::istream>( |
1386 | + theCurrentCharset.c_str(), theStreamBuffer |
1387 | + ) |
1388 | + ); |
1389 | + } else |
1390 | + lStream.reset(new std::istream(theStreamBuffer)); |
1391 | + |
1392 | Item lItem; |
1393 | - if (theOverridenContentType != "") { |
1394 | - theCurrentContentType = theOverridenContentType; |
1395 | - } |
1396 | if (theCurrentContentType == "text/xml" || |
1397 | theCurrentContentType == "application/xml" || |
1398 | theCurrentContentType == "text/xml-external-parsed-entity" || |
1399 | theCurrentContentType == "application/xml-external-parsed-entity" || |
1400 | theCurrentContentType.find("+xml") == theCurrentContentType.size()-4) { |
1401 | lItem = createXmlItem(*lStream.get()); |
1402 | - } else if (theCurrentContentType.find("text/html") == 0) { |
1403 | - lItem = createTextItem(lStream.release()); |
1404 | } else if (theCurrentContentType.find("text/") == 0) { |
1405 | lItem = createTextItem(lStream.release()); |
1406 | } else { |
1407 | @@ -106,8 +149,8 @@ |
1408 | } |
1409 | theInsideRead = true; |
1410 | theHandler.beginResponse(theStatus, theMessage); |
1411 | - std::vector<std::pair<std::string, std::string> >::iterator lIter; |
1412 | - for (lIter = theHeaders.begin(); lIter != theHeaders.end(); ++lIter) { |
1413 | + for ( headers_type::const_iterator |
1414 | + lIter = theHeaders.begin(); lIter != theHeaders.end(); ++lIter) { |
1415 | theHandler.header(lIter->first, lIter->second); |
1416 | } |
1417 | if (!theStatusOnly) |
1418 | @@ -120,23 +163,20 @@ |
1419 | |
1420 | void HttpResponseParser::registerHandler() |
1421 | { |
1422 | - curl_easy_setopt(theCurl, CURLOPT_HEADERFUNCTION, |
1423 | - &HttpResponseParser::headerfunction); |
1424 | + curl_easy_setopt(theCurl, CURLOPT_HEADERFUNCTION, &curl_headerfunction); |
1425 | curl_easy_setopt(theCurl, CURLOPT_HEADERDATA, this); |
1426 | } |
1427 | |
1428 | - size_t HttpResponseParser::headerfunction(void *ptr, |
1429 | - size_t size, |
1430 | - size_t nmemb, |
1431 | - void *stream) |
1432 | + size_t HttpResponseParser::curl_headerfunction( void *ptr, size_t size, |
1433 | + size_t nmemb, void *data ) |
1434 | { |
1435 | size_t lSize = size*nmemb; |
1436 | size_t lResult = lSize; |
1437 | - HttpResponseParser* lParser = static_cast<HttpResponseParser*>(stream); |
1438 | + HttpResponseParser* lParser = static_cast<HttpResponseParser*>(data); |
1439 | if (lParser->theInsideRead) { |
1440 | lParser->theHandler.endBody(); |
1441 | + lParser->theInsideRead = false; |
1442 | } |
1443 | - lParser->theInsideRead = false; |
1444 | const char* lDataChar = (const char*) ptr; |
1445 | while (lSize != 0 && (lDataChar[lSize - 1] == 10 |
1446 | || lDataChar[lSize - 1] == 13)) { |
1447 | @@ -173,7 +213,9 @@ |
1448 | } |
1449 | String lNameS = fn::lower_case( lName ); |
1450 | if (lNameS == "content-type") { |
1451 | - lParser->theCurrentContentType = lValue.substr(0, lValue.find(';')); |
1452 | + parse_content_type( |
1453 | + lValue, &lParser->theCurrentContentType, &lParser->theCurrentCharset |
1454 | + ); |
1455 | } else if (lNameS == "content-id") { |
1456 | lParser->theId = lValue; |
1457 | } else if (lNameS == "content-description") { |
1458 | @@ -184,7 +226,7 @@ |
1459 | return lResult; |
1460 | } |
1461 | |
1462 | - void HttpResponseParser::parseStatusAndMessage(std::string aHeader) |
1463 | + void HttpResponseParser::parseStatusAndMessage(std::string const &aHeader) |
1464 | { |
1465 | std::string::size_type lPos = aHeader.find(' '); |
1466 | assert(lPos != std::string::npos); |
1467 | @@ -215,7 +257,12 @@ |
1468 | static void streamReleaser(std::istream* aStream) |
1469 | { |
1470 | // This istream contains our curl stream buffer, so we have to delete it too |
1471 | - delete aStream->rdbuf(); |
1472 | + std::streambuf *const sbuf = aStream->rdbuf(); |
1473 | + if ( transcode::streambuf *tbuf = |
1474 | + dynamic_cast<transcode::streambuf*>( sbuf ) ) |
1475 | + delete tbuf->orig_streambuf(); |
1476 | + else |
1477 | + delete sbuf; |
1478 | delete aStream; |
1479 | } |
1480 | |
1481 | @@ -265,4 +312,7 @@ |
1482 | return Item(); |
1483 | } |
1484 | } |
1485 | -}} |
1486 | + |
1487 | +} // namespace http_client |
1488 | +} // namespace zorba |
1489 | +/* vim:set et sw=2 ts=2: */ |
1490 | |
1491 | === modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.h' |
1492 | --- modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.h 2011-07-29 08:12:36 +0000 |
1493 | +++ modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.h 2012-02-08 19:20:28 +0000 |
1494 | @@ -31,6 +31,7 @@ |
1495 | namespace curl { |
1496 | class streambuf; |
1497 | } |
1498 | + |
1499 | namespace http_client { |
1500 | class RequestHandler; |
1501 | |
1502 | @@ -40,7 +41,9 @@ |
1503 | CURL* theCurl; |
1504 | ErrorThrower& theErrorThrower; |
1505 | std::string theCurrentContentType; |
1506 | - std::vector<std::pair<std::string, std::string> > theHeaders; |
1507 | + std::string theCurrentCharset; |
1508 | + typedef std::vector<std::pair<std::string, std::string> > headers_type; |
1509 | + headers_type theHeaders; |
1510 | int theStatus; |
1511 | std::string theMessage; |
1512 | zorba::curl::streambuf* theStreamBuffer; |
1513 | @@ -74,15 +77,16 @@ |
1514 | virtual void afterRead(); |
1515 | private: |
1516 | void registerHandler(); |
1517 | - void parseStatusAndMessage(std::string aHeader); |
1518 | + void parseStatusAndMessage(std::string const &aHeader); |
1519 | Item createXmlItem(std::istream& aStream); |
1520 | Item createHtmlItem(std::istream& aStream); |
1521 | Item createTextItem(std::istream* aStream); |
1522 | Item createBase64Item(std::istream& aStream); |
1523 | - public: //Handler |
1524 | - static size_t headerfunction( void *ptr, size_t size, size_t nmemb, |
1525 | - void *stream); |
1526 | + |
1527 | + static size_t curl_headerfunction( void*, size_t, size_t, void* ); |
1528 | }; |
1529 | -}} // namespace zorba, http_client |
1530 | + |
1531 | +} // namespace http_client |
1532 | +} // namespace zorba |
1533 | |
1534 | #endif //HTTP_RESPONSE_PARSER_H |
1535 | |
1536 | === modified file 'modules/com/zorba-xquery/www/modules/pregenerated/errors.xq' |
1537 | --- modules/com/zorba-xquery/www/modules/pregenerated/errors.xq 2011-12-21 14:40:33 +0000 |
1538 | +++ modules/com/zorba-xquery/www/modules/pregenerated/errors.xq 2012-02-08 19:20:28 +0000 |
1539 | @@ -664,6 +664,10 @@ |
1540 | |
1541 | (:~ |
1542 | :) |
1543 | +declare variable $zerr:ZOSE0006 as xs:QName := fn:QName($zerr:NS, "zerr:ZOSE0006"); |
1544 | + |
1545 | +(:~ |
1546 | +:) |
1547 | declare variable $zerr:ZSTR0001 as xs:QName := fn:QName($zerr:NS, "zerr:ZSTR0001"); |
1548 | |
1549 | (:~ |
1550 | |
1551 | === modified file 'modules/org/expath/ns/file.xq.src/file.cpp' |
1552 | --- modules/org/expath/ns/file.xq.src/file.cpp 2011-07-22 08:12:31 +0000 |
1553 | +++ modules/org/expath/ns/file.xq.src/file.cpp 2012-02-08 19:20:28 +0000 |
1554 | @@ -28,6 +28,7 @@ |
1555 | #include <zorba/singleton_item_sequence.h> |
1556 | #include <zorba/util/path.h> |
1557 | #include <zorba/user_exception.h> |
1558 | +#include <zorba/transcode_stream.h> |
1559 | |
1560 | #include "file_module.h" |
1561 | |
1562 | @@ -188,6 +189,7 @@ |
1563 | { |
1564 | String lFileStr = getFilePathString(aArgs, 0); |
1565 | File_t lFile = File::createFile(lFileStr.c_str()); |
1566 | + String lEncoding("UTF-8"); |
1567 | |
1568 | // preconditions |
1569 | if (!lFile->exists()) { |
1570 | @@ -198,18 +200,30 @@ |
1571 | } |
1572 | |
1573 | if (aArgs.size() == 2) { |
1574 | - // since Zorba currently only supports UTF-8 we only call this function |
1575 | - // to reject any other encoding requested bu the user |
1576 | - getEncodingArg(aArgs, 1); |
1577 | + lEncoding = getEncodingArg(aArgs, 1); |
1578 | } |
1579 | |
1580 | - std::auto_ptr<StreamableItemSequence> lSeq(new StreamableItemSequence()); |
1581 | - lFile->openInputStream(*lSeq->theStream, false, true); |
1582 | - |
1583 | - lSeq->theItem = theModule->getItemFactory()->createStreamableString( |
1584 | - *lSeq->theStream, &StreamableItemSequence::streamReleaser); |
1585 | - |
1586 | - return ItemSequence_t(lSeq.release()); |
1587 | + zorba::Item lResult; |
1588 | + std::unique_ptr<std::ifstream> lInStream; |
1589 | + if ( transcode::is_necessary( lEncoding.c_str() ) ) |
1590 | + { |
1591 | + try { |
1592 | + lInStream.reset( new transcode::stream<std::ifstream>(lEncoding.c_str()) ); |
1593 | + } catch (std::invalid_argument const& e) |
1594 | + { |
1595 | + raiseFileError("FOFL0006", "Unsupported encoding", lEncoding.c_str()); |
1596 | + } |
1597 | + } |
1598 | + else |
1599 | + { |
1600 | + lInStream.reset( new std::ifstream() ); |
1601 | + } |
1602 | + lFile->openInputStream(*lInStream.get(), false, true); |
1603 | + lResult = theModule->getItemFactory()->createStreamableString( |
1604 | + *lInStream.release(), &FileModule::streamReleaser |
1605 | + ); |
1606 | + return ItemSequence_t(new SingletonItemSequence(lResult)); |
1607 | + |
1608 | } |
1609 | |
1610 | //***************************************************************************** |
1611 | @@ -722,3 +736,4 @@ |
1612 | extern "C" DLL_EXPORT zorba::ExternalModule* createModule() { |
1613 | return new zorba::filemodule::FileModule(); |
1614 | } |
1615 | +/* vim:set et sw=2 ts=2: */ |
1616 | |
1617 | === modified file 'modules/org/expath/ns/file.xq.src/file_function.cpp' |
1618 | --- modules/org/expath/ns/file.xq.src/file_function.cpp 2011-07-13 01:56:45 +0000 |
1619 | +++ modules/org/expath/ns/file.xq.src/file_function.cpp 2012-02-08 19:20:28 +0000 |
1620 | @@ -141,11 +141,6 @@ |
1621 | arg_iter->close(); |
1622 | } |
1623 | |
1624 | - if (!(lEncoding == "UTF-8" || lEncoding == "UTF8")) { |
1625 | - // the rest are not supported encodings |
1626 | - raiseFileError("FOFL0006", "Unsupported encoding", lEncoding.c_str()); |
1627 | - } |
1628 | - |
1629 | return lEncoding; |
1630 | } |
1631 | |
1632 | |
1633 | === modified file 'modules/org/expath/ns/file.xq.src/file_function.h' |
1634 | --- modules/org/expath/ns/file.xq.src/file_function.h 2011-07-22 08:12:31 +0000 |
1635 | +++ modules/org/expath/ns/file.xq.src/file_function.h 2012-02-08 19:20:28 +0000 |
1636 | @@ -25,7 +25,9 @@ |
1637 | |
1638 | #include <fstream> |
1639 | |
1640 | -namespace zorba { namespace filemodule { |
1641 | +namespace zorba { |
1642 | + |
1643 | + namespace filemodule { |
1644 | |
1645 | class FileModule; |
1646 | |
1647 | @@ -136,18 +138,12 @@ |
1648 | next(Item& aResult); |
1649 | }; |
1650 | |
1651 | - Item theItem; |
1652 | - std::ifstream* theStream; |
1653 | + Item theItem; |
1654 | + std::ifstream* theStream; |
1655 | |
1656 | StreamableItemSequence() |
1657 | : theStream(new std::ifstream()) {} |
1658 | |
1659 | - static void |
1660 | - streamReleaser(std::istream* stream) |
1661 | - { |
1662 | - delete stream; |
1663 | - } |
1664 | - |
1665 | Iterator_t getIterator() |
1666 | { |
1667 | return new InternalIterator(this); |
1668 | |
1669 | === modified file 'modules/org/expath/ns/file.xq.src/file_module.cpp' |
1670 | --- modules/org/expath/ns/file.xq.src/file_module.cpp 2011-06-08 18:37:56 +0000 |
1671 | +++ modules/org/expath/ns/file.xq.src/file_module.cpp 2012-02-08 19:20:28 +0000 |
1672 | @@ -17,11 +17,10 @@ |
1673 | #include "file.h" |
1674 | #include "file_module.h" |
1675 | #include "file_function.h" |
1676 | +#include <cassert> |
1677 | |
1678 | namespace zorba { namespace filemodule { |
1679 | |
1680 | - ItemFactory* FileModule::theFactory = 0; |
1681 | - |
1682 | const char* FileModule::theNamespace = "http://expath.org/ns/file"; |
1683 | |
1684 | |
1685 | @@ -39,9 +38,7 @@ |
1686 | { |
1687 | ExternalFunction*& lFunc = theFunctions[aLocalname]; |
1688 | if (!lFunc) { |
1689 | - if (1 == 0) { |
1690 | - |
1691 | - } else if (aLocalname == "create-directory") { |
1692 | + if (aLocalname == "create-directory") { |
1693 | lFunc = new CreateDirectoryFunction(this); |
1694 | } else if (aLocalname == "delete-file-impl") { |
1695 | lFunc = new DeleteFileImplFunction(this); |
1696 | |
1697 | === modified file 'modules/org/expath/ns/file.xq.src/file_module.h' |
1698 | --- modules/org/expath/ns/file.xq.src/file_module.h 2011-06-08 18:37:56 +0000 |
1699 | +++ modules/org/expath/ns/file.xq.src/file_module.h 2012-02-08 19:20:28 +0000 |
1700 | @@ -27,7 +27,7 @@ |
1701 | class FileModule : public ExternalModule |
1702 | { |
1703 | private: |
1704 | - static ItemFactory* theFactory; |
1705 | + mutable ItemFactory* theFactory; |
1706 | |
1707 | public: |
1708 | static const char* theNamespace; |
1709 | @@ -43,10 +43,17 @@ |
1710 | }; |
1711 | |
1712 | typedef std::map<String, ExternalFunction*, ltstr> FuncMap_t; |
1713 | - |
1714 | FuncMap_t theFunctions; |
1715 | - |
1716 | + |
1717 | public: |
1718 | + static void |
1719 | + streamReleaser(std::istream* stream) |
1720 | + { |
1721 | + delete stream; |
1722 | + } |
1723 | + |
1724 | + FileModule() : theFactory(0) {} |
1725 | + |
1726 | virtual ~FileModule(); |
1727 | |
1728 | virtual String |
1729 | @@ -58,10 +65,10 @@ |
1730 | virtual void |
1731 | destroy(); |
1732 | |
1733 | - static ItemFactory* |
1734 | - getItemFactory() |
1735 | + ItemFactory* |
1736 | + getItemFactory() const |
1737 | { |
1738 | - if(!theFactory) |
1739 | + if (!theFactory) |
1740 | { |
1741 | theFactory = Zorba::getInstance(0)->getItemFactory(); |
1742 | } |
1743 | |
1744 | === modified file 'src/api/CMakeLists.txt' |
1745 | --- src/api/CMakeLists.txt 2011-08-31 13:17:59 +0000 |
1746 | +++ src/api/CMakeLists.txt 2012-02-08 19:20:28 +0000 |
1747 | @@ -55,6 +55,7 @@ |
1748 | zorba_functions.cpp |
1749 | annotationimpl.cpp |
1750 | auditimpl.cpp |
1751 | + transcode_streambuf.cpp |
1752 | ) |
1753 | |
1754 | IF (NOT ZORBA_NO_FULL_TEXT) |
1755 | |
1756 | === added file 'src/api/transcode_streambuf.cpp' |
1757 | --- src/api/transcode_streambuf.cpp 1970-01-01 00:00:00 +0000 |
1758 | +++ src/api/transcode_streambuf.cpp 2012-02-08 19:20:28 +0000 |
1759 | @@ -0,0 +1,102 @@ |
1760 | +/* |
1761 | + * Copyright 2006-2008 The FLWOR Foundation. |
1762 | + * |
1763 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
1764 | + * you may not use this file except in compliance with the License. |
1765 | + * You may obtain a copy of the License at |
1766 | + * |
1767 | + * http://www.apache.org/licenses/LICENSE-2.0 |
1768 | + * |
1769 | + * Unless required by applicable law or agreed to in writing, software |
1770 | + * distributed under the License is distributed on an "AS IS" BASIS, |
1771 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
1772 | + * See the License for the specific language governing permissions and |
1773 | + * limitations under the License. |
1774 | + */ |
1775 | + |
1776 | +#include <zorba/transcode_stream.h> |
1777 | + |
1778 | +#include "util/transcode_streambuf.h" |
1779 | + |
1780 | +using namespace std; |
1781 | + |
1782 | +namespace zorba { |
1783 | +namespace transcode { |
1784 | + |
1785 | +/////////////////////////////////////////////////////////////////////////////// |
1786 | + |
1787 | +streambuf::streambuf( char const *charset, std::streambuf *orig ) : |
1788 | + proxy_buf_( new internal::transcode::streambuf( charset, orig ) ) |
1789 | +{ |
1790 | +} |
1791 | + |
1792 | +streambuf::~streambuf() { |
1793 | + // out-of-line since it's virtual |
1794 | +} |
1795 | + |
1796 | +void streambuf::imbue( std::locale const &loc ) { |
1797 | + proxy_buf_->pubimbue( loc ); |
1798 | +} |
1799 | + |
1800 | +streambuf::pos_type streambuf::seekoff( off_type o, ios_base::seekdir d, |
1801 | + ios_base::openmode m ) { |
1802 | + return proxy_buf_->pubseekoff( o, d, m ); |
1803 | +} |
1804 | + |
1805 | +streambuf::pos_type streambuf::seekpos( pos_type p, ios_base::openmode m ) { |
1806 | + return proxy_buf_->pubseekpos( p, m ); |
1807 | +} |
1808 | + |
1809 | +std::streambuf* streambuf::setbuf( char_type *p, streamsize s ) { |
1810 | + proxy_buf_->pubsetbuf( p, s ); |
1811 | + return this; |
1812 | +} |
1813 | + |
1814 | +streamsize streambuf::showmanyc() { |
1815 | + return proxy_buf_->in_avail(); |
1816 | +} |
1817 | + |
1818 | +int streambuf::sync() { |
1819 | + return proxy_buf_->pubsync(); |
1820 | +} |
1821 | + |
1822 | +streambuf::int_type streambuf::overflow( int_type c ) { |
1823 | + return proxy_buf_->sputc( c ); |
1824 | +} |
1825 | + |
1826 | +streambuf::int_type streambuf::pbackfail( int_type c ) { |
1827 | + return proxy_buf_->sputbackc( traits_type::to_char_type( c ) ); |
1828 | +} |
1829 | + |
1830 | +streambuf::int_type streambuf::uflow() { |
1831 | + return proxy_buf_->sbumpc(); |
1832 | +} |
1833 | + |
1834 | +streambuf::int_type streambuf::underflow() { |
1835 | + return proxy_buf_->sgetc(); |
1836 | +} |
1837 | + |
1838 | +streamsize streambuf::xsgetn( char_type *to, streamsize size ) { |
1839 | + return proxy_buf_->sgetn( to, size ); |
1840 | +} |
1841 | + |
1842 | +streamsize streambuf::xsputn( char_type const *from, |
1843 | + streamsize size ) { |
1844 | + return proxy_buf_->sputn( from, size ); |
1845 | +} |
1846 | + |
1847 | +/////////////////////////////////////////////////////////////////////////////// |
1848 | + |
1849 | +bool is_necessary( char const *charset ) { |
1850 | + return internal::transcode::streambuf::is_necessary( charset ); |
1851 | +} |
1852 | + |
1853 | +bool is_supported( char const *charset ) { |
1854 | + return internal::transcode::streambuf::is_supported( charset ); |
1855 | +} |
1856 | + |
1857 | +/////////////////////////////////////////////////////////////////////////////// |
1858 | + |
1859 | +} // namespace transcode |
1860 | +} // namespace zorba |
1861 | +/* vim:set et sw=2 ts=2: */ |
1862 | |
1863 | === modified file 'src/diagnostics/diagnostic_en.xml' |
1864 | --- src/diagnostics/diagnostic_en.xml 2011-12-21 14:40:33 +0000 |
1865 | +++ src/diagnostics/diagnostic_en.xml 2012-02-08 19:20:28 +0000 |
1866 | @@ -2188,6 +2188,10 @@ |
1867 | <value>"$1": error loading dynamic library${: 2}</value> |
1868 | </diagnostic> |
1869 | |
1870 | + <diagnostic code="ZOSE0006" name="TRANSCODING_ERROR"> |
1871 | + <value>stream transcoding error ($1)</value> |
1872 | + </diagnostic> |
1873 | + |
1874 | <!--////////// Zorba Store Errors //////////////////////////////////////--> |
1875 | |
1876 | <diagnostic code="ZSTR0001" name="INDEX_ALREADY_EXISTS"> |
1877 | |
1878 | === modified file 'src/diagnostics/pregenerated/diagnostic_list.cpp' |
1879 | --- src/diagnostics/pregenerated/diagnostic_list.cpp 2011-12-21 14:40:33 +0000 |
1880 | +++ src/diagnostics/pregenerated/diagnostic_list.cpp 2012-02-08 19:20:28 +0000 |
1881 | @@ -1004,6 +1004,9 @@ |
1882 | ZorbaErrorCode ZOSE0005_DLL_LOAD_FAILED( "ZOSE0005" ); |
1883 | |
1884 | |
1885 | +ZorbaErrorCode ZOSE0006_TRANSCODING_ERROR( "ZOSE0006" ); |
1886 | + |
1887 | + |
1888 | ZorbaErrorCode ZSTR0001_INDEX_ALREADY_EXISTS( "ZSTR0001" ); |
1889 | |
1890 | |
1891 | |
1892 | === modified file 'src/diagnostics/pregenerated/dict_en.cpp' |
1893 | --- src/diagnostics/pregenerated/dict_en.cpp 2011-12-21 14:40:33 +0000 |
1894 | +++ src/diagnostics/pregenerated/dict_en.cpp 2012-02-08 19:20:28 +0000 |
1895 | @@ -337,6 +337,7 @@ |
1896 | { "ZOSE0003", "stream read failure" }, |
1897 | { "ZOSE0004", "${\"1\": }I/O error${: 2}" }, |
1898 | { "ZOSE0005", "\"$1\": error loading dynamic library${: 2}" }, |
1899 | + { "ZOSE0006", "stream transcoding error ($1)" }, |
1900 | { "ZSTR0001", "\"$1\": index already exists" }, |
1901 | { "ZSTR0002", "\"$1\": index does not exist" }, |
1902 | { "ZSTR0003", "\"$1\": partial key insertion into index \"$2\"" }, |
1903 | |
1904 | === modified file 'src/unit_tests/CMakeLists.txt' |
1905 | --- src/unit_tests/CMakeLists.txt 2012-02-02 09:56:52 +0000 |
1906 | +++ src/unit_tests/CMakeLists.txt 2012-02-08 19:20:28 +0000 |
1907 | @@ -11,7 +11,6 @@ |
1908 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
1909 | # See the License for the specific language governing permissions and |
1910 | # limitations under the License. |
1911 | - |
1912 | |
1913 | SET(UNIT_TEST_SRCS |
1914 | string_instantiate.cpp |
1915 | @@ -28,10 +27,9 @@ |
1916 | tokenizer.cpp) |
1917 | ENDIF (NOT ZORBA_NO_FULL_TEXT) |
1918 | |
1919 | -IF(ZORBA_WITH_DEBUGGER) |
1920 | - LIST(APPEND UNIT_TEST_SRCS |
1921 | -# test_debugger_protocol.cpp |
1922 | - ) |
1923 | -ENDIF(ZORBA_WITH_DEBUGGER) |
1924 | +IF (NOT ZORBA_NO_UNICODE) |
1925 | + LIST (APPEND UNIT_TEST_SRCS |
1926 | + test_icu_streambuf.cpp) |
1927 | +ENDIF (NOT ZORBA_NO_UNICODE) |
1928 | |
1929 | # vim:set et sw=2 tw=2: |
1930 | |
1931 | === added file 'src/unit_tests/test_icu_streambuf.cpp' |
1932 | --- src/unit_tests/test_icu_streambuf.cpp 1970-01-01 00:00:00 +0000 |
1933 | +++ src/unit_tests/test_icu_streambuf.cpp 2012-02-08 19:20:28 +0000 |
1934 | @@ -0,0 +1,151 @@ |
1935 | +/* |
1936 | + * Copyright 2006-2008 The FLWOR Foundation. |
1937 | + * |
1938 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
1939 | + * you may not use this file except in compliance with the License. |
1940 | + * You may obtain a copy of the License at |
1941 | + * |
1942 | + * http://www.apache.org/licenses/LICENSE-2.0 |
1943 | + * |
1944 | + * Unless required by applicable law or agreed to in writing, software |
1945 | + * distributed under the License is distributed on an "AS IS" BASIS, |
1946 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
1947 | + * See the License for the specific language governing permissions and |
1948 | + * limitations under the License. |
1949 | + */ |
1950 | + |
1951 | +#include <fstream> |
1952 | +#include <iostream> |
1953 | +#include <sstream> |
1954 | + |
1955 | +#include "util/transcode_streambuf.h" |
1956 | + |
1957 | +using namespace std; |
1958 | +using namespace zorba; |
1959 | + |
1960 | +#define COPYRIGHT_ISO "\xA9" |
1961 | +#define COPYRIGHT_UTF8 "\xC2\xA9" |
1962 | + |
1963 | +#define ONE_THIRD_UTF8 "\xE2\x85\x93" |
1964 | +#define ONE_THIRD_UTF16BE "\x21\x53" |
1965 | + |
1966 | +struct test { |
1967 | + char const *ext_charset; |
1968 | + char const *ext_str; |
1969 | + int ext_len; |
1970 | + char const *utf8_str; |
1971 | +}; |
1972 | + |
1973 | +static test const tests[] = { |
1974 | + /* 0 */ { "ISO-8859-1", "Copyright " COPYRIGHT_ISO " 2011", 0, "Copyright " COPYRIGHT_UTF8 " 2011" }, |
1975 | + /* 1 */ { "UTF-16BE", ONE_THIRD_UTF16BE "\0 \0c\0u\0p", 10, ONE_THIRD_UTF8 " cup" }, |
1976 | + { 0, 0, 0, 0 } |
1977 | +}; |
1978 | + |
1979 | +static string make_ext_str( test const *t ) { |
1980 | + if ( t->ext_len ) |
1981 | + return string( t->ext_str, t->ext_len ); |
1982 | + return string( t->ext_str ); |
1983 | +} |
1984 | + |
1985 | +/////////////////////////////////////////////////////////////////////////////// |
1986 | + |
1987 | +static int failures; |
1988 | + |
1989 | +static bool assert_true( int no, char const *expr, int line, bool result ) { |
1990 | + if ( !result ) { |
1991 | + cout << '#' << no << " FAILED, line " << line << ": " << expr << endl; |
1992 | + ++failures; |
1993 | + } |
1994 | + return result; |
1995 | +} |
1996 | + |
1997 | +static void print_exception( int no, char const *expr, int line, |
1998 | + std::exception const &e ) { |
1999 | + assert_true( no, expr, line, false ); |
2000 | + cout << "+ exception: " << e.what() << endl; |
2001 | +} |
2002 | + |
2003 | +#define ASSERT_TRUE( NO, EXPR ) assert_true( NO, #EXPR, __LINE__, !!(EXPR) ) |
2004 | + |
2005 | +#define ASSERT_TRUE_AND_NO_EXCEPTION( NO, EXPR ) \ |
2006 | + try { ASSERT_TRUE( NO, EXPR ); } \ |
2007 | + catch ( std::exception const &e ) { print_exception( NO, #EXPR, __LINE__, e ); } |
2008 | + |
2009 | +/////////////////////////////////////////////////////////////////////////////// |
2010 | + |
2011 | +static bool test_getline( test const *t ) { |
2012 | + string const ext_str( make_ext_str( t ) ); |
2013 | + istringstream iss( ext_str ); |
2014 | + icu_streambuf xbuf( t->ext_charset, iss.rdbuf() ); |
2015 | + iss.ios::rdbuf( &xbuf ); |
2016 | + |
2017 | + char utf8_buf[ 1024 ]; |
2018 | + iss.getline( utf8_buf, sizeof utf8_buf ); |
2019 | + if ( iss.gcount() ) { |
2020 | + string const utf8_str( utf8_buf ); |
2021 | + return utf8_str == t->utf8_str; |
2022 | + } |
2023 | + return false; |
2024 | +} |
2025 | + |
2026 | +static bool test_read( test const *t ) { |
2027 | + string const ext_str( make_ext_str( t ) ); |
2028 | + istringstream iss( ext_str ); |
2029 | + icu_streambuf xbuf( t->ext_charset, iss.rdbuf() ); |
2030 | + iss.ios::rdbuf( &xbuf ); |
2031 | + |
2032 | + char utf8_buf[ 1024 ]; |
2033 | + iss.read( utf8_buf, sizeof utf8_buf ); |
2034 | + if ( iss.gcount() ) { |
2035 | + string const utf8_str( utf8_buf, iss.gcount() ); |
2036 | + return utf8_str == t->utf8_str; |
2037 | + } |
2038 | + return false; |
2039 | +} |
2040 | + |
2041 | +static bool test_insertion( test const *t ) { |
2042 | + ostringstream oss; |
2043 | + icu_streambuf xbuf( t->ext_charset, oss.rdbuf() ); |
2044 | + oss.ios::rdbuf( &xbuf ); |
2045 | + |
2046 | + oss << t->utf8_str << flush; |
2047 | + string const ext_str( oss.str() ); |
2048 | + |
2049 | + string const expected_ext_str( make_ext_str( t ) ); |
2050 | + return ext_str == expected_ext_str; |
2051 | +} |
2052 | + |
2053 | +static bool test_put( test const *t ) { |
2054 | + ostringstream oss; |
2055 | + icu_streambuf xbuf( t->ext_charset, oss.rdbuf() ); |
2056 | + oss.ios::rdbuf( &xbuf ); |
2057 | + |
2058 | + for ( char const *c = t->utf8_str; *c; ++c ) |
2059 | + oss.put( *c ); |
2060 | + string const ext_str( oss.str() ); |
2061 | + |
2062 | + string const expected_ext_str( make_ext_str( t ) ); |
2063 | + return ext_str == expected_ext_str; |
2064 | +} |
2065 | + |
2066 | +/////////////////////////////////////////////////////////////////////////////// |
2067 | + |
2068 | +namespace zorba { |
2069 | +namespace UnitTests { |
2070 | + |
2071 | +int test_icu_streambuf( int, char*[] ) { |
2072 | + int test_no = 0; |
2073 | + for ( test const *t = tests; t->utf8_str; ++t, ++test_no ) { |
2074 | + ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_getline( t ) ); |
2075 | + ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_read( t ) ); |
2076 | + ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_insertion( t ) ); |
2077 | + ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_put( t ) ); |
2078 | + } |
2079 | + cout << failures << " test(s) failed\n"; |
2080 | + return failures ? 1 : 0; |
2081 | +} |
2082 | + |
2083 | +} // namespace UnitTests |
2084 | +} // namespace zorba |
2085 | +/* vim:set et sw=2 ts=2: */ |
2086 | |
2087 | === modified file 'src/unit_tests/unit_test_list.h' |
2088 | --- src/unit_tests/unit_test_list.h 2012-02-02 09:56:52 +0000 |
2089 | +++ src/unit_tests/unit_test_list.h 2012-02-08 19:20:28 +0000 |
2090 | @@ -17,6 +17,8 @@ |
2091 | #ifndef ZORBA_UNIT_TEST_LIST_H |
2092 | #define ZORBA_UNIT_TEST_LIST_H |
2093 | |
2094 | +#include <iostream> |
2095 | + |
2096 | #include <zorba/config.h> |
2097 | |
2098 | namespace zorba { |
2099 | @@ -34,6 +36,9 @@ |
2100 | /** |
2101 | * ADD NEW UNIT TESTS HERE |
2102 | */ |
2103 | +#ifndef ZORBA_NO_UNICODE |
2104 | + int test_icu_streambuf( int, char*[] ); |
2105 | +#endif /* ZORBA_NO_UNICODE */ |
2106 | |
2107 | void initializeTestList(); |
2108 | }; |
2109 | |
2110 | === modified file 'src/unit_tests/unit_tests.cpp' |
2111 | --- src/unit_tests/unit_tests.cpp 2012-02-02 09:56:52 +0000 |
2112 | +++ src/unit_tests/unit_tests.cpp 2012-02-08 19:20:28 +0000 |
2113 | @@ -39,6 +39,9 @@ |
2114 | void initializeTestList() { |
2115 | libunittests["string"] = test_string; |
2116 | libunittests["uri"] = runUriTest; |
2117 | +#ifndef ZORBA_NO_UNICODE |
2118 | + libunittests["icu_streambuf"] = test_icu_streambuf; |
2119 | +#endif /* ZORBA_NO_UNICODE */ |
2120 | libunittests["unique_ptr"] = test_unique_ptr; |
2121 | #ifndef ZORBA_NO_FULL_TEXT |
2122 | libunittests["stemmer"] = test_stemmer; |
2123 | |
2124 | === modified file 'src/util/CMakeLists.txt' |
2125 | --- src/util/CMakeLists.txt 2011-07-18 14:25:21 +0000 |
2126 | +++ src/util/CMakeLists.txt 2012-02-08 19:20:28 +0000 |
2127 | @@ -39,7 +39,12 @@ |
2128 | ENDIF(ZORBA_WITH_FILE_ACCESS) |
2129 | |
2130 | IF(ZORBA_NO_UNICODE) |
2131 | - LIST(APPEND UTIL_SRCS regex_ascii.cpp) |
2132 | + LIST(APPEND UTIL_SRCS |
2133 | + regex_ascii.cpp |
2134 | + passthru_streambuf.cpp) |
2135 | +ELSE(ZORBA_NO_UNICODE) |
2136 | + LIST(APPEND UTIL_SRCS |
2137 | + icu_streambuf.cpp) |
2138 | ENDIF(ZORBA_NO_UNICODE) |
2139 | |
2140 | HEADER_GROUP_SUBFOLDER(UTIL_SRCS fx) |
2141 | |
2142 | === added file 'src/util/icu_streambuf.cpp' |
2143 | --- src/util/icu_streambuf.cpp 1970-01-01 00:00:00 +0000 |
2144 | +++ src/util/icu_streambuf.cpp 2012-02-08 19:20:28 +0000 |
2145 | @@ -0,0 +1,300 @@ |
2146 | +/* |
2147 | + * Copyright 2006-2008 The FLWOR Foundation. |
2148 | + * |
2149 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
2150 | + * you may not use this file except in compliance with the License. |
2151 | + * You may obtain a copy of the License at |
2152 | + * |
2153 | + * http://www.apache.org/licenses/LICENSE-2.0 |
2154 | + * |
2155 | + * Unless required by applicable law or agreed to in writing, software |
2156 | + * distributed under the License is distributed on an "AS IS" BASIS, |
2157 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
2158 | + * See the License for the specific language governing permissions and |
2159 | + * limitations under the License. |
2160 | + */ |
2161 | + |
2162 | +#define ZORBA_DEBUG_ICU_STREAMBUF 0 |
2163 | + |
2164 | +#ifdef ZORBA_DEBUG_ICU_STREAMBUF |
2165 | +# include <stdio.h> |
2166 | +#endif |
2167 | + |
2168 | +#include <algorithm> |
2169 | +#include <cassert> |
2170 | + |
2171 | +#include <zorba/diagnostic_list.h> |
2172 | + |
2173 | +#include "diagnostics/assert.h" |
2174 | +#include "diagnostics/diagnostic.h" |
2175 | +#include "diagnostics/zorba_exception.h" |
2176 | +#include "util/cxx_util.h" |
2177 | +#include "util/string_util.h" |
2178 | +#include "util/utf8_util.h" |
2179 | + |
2180 | +#include "icu_streambuf.h" |
2181 | + |
2182 | +using namespace std; |
2183 | + |
2184 | +namespace zorba { |
2185 | + |
2186 | +int const Small_External_Buf_Size = 6; |
2187 | +int const Large_External_Buf_Size = 4096; |
2188 | + |
2189 | +/////////////////////////////////////////////////////////////////////////////// |
2190 | + |
2191 | +inline void icu_streambuf::buf_type_base::reset() { |
2192 | + pivot_source_ = pivot_target_ = pivot_buf_; |
2193 | +} |
2194 | + |
2195 | +inline void icu_streambuf::resetg() { |
2196 | + setg( |
2197 | + g_.utf8_char_, g_.utf8_char_ + sizeof g_.utf8_char_, |
2198 | + g_.utf8_char_ + sizeof g_.utf8_char_ |
2199 | + ); |
2200 | +} |
2201 | + |
2202 | +icu_streambuf::icu_streambuf( char const *charset, streambuf *orig ) : |
2203 | + proxy_streambuf( orig ), |
2204 | + no_conv_( !is_necessary( charset ) ), |
2205 | + external_conv_( no_conv_ ? nullptr : create_conv( charset ) ), |
2206 | + utf8_conv_( no_conv_ ? nullptr : create_conv( "UTF-8" ) ) |
2207 | +{ |
2208 | + if ( !orig ) |
2209 | + throw invalid_argument( "null streambuf" ); |
2210 | + resetg(); |
2211 | +} |
2212 | + |
2213 | +icu_streambuf::~icu_streambuf() { |
2214 | + if ( external_conv_ ) |
2215 | + ucnv_close( external_conv_ ); |
2216 | + if ( utf8_conv_ ) |
2217 | + ucnv_close( utf8_conv_ ); |
2218 | +} |
2219 | + |
2220 | +void icu_streambuf::clear() { |
2221 | + if ( !no_conv_ ) { |
2222 | + ucnv_reset( external_conv_ ); |
2223 | + ucnv_reset( utf8_conv_ ); |
2224 | + g_.reset(); |
2225 | + p_.reset(); |
2226 | + resetg(); |
2227 | + } |
2228 | +} |
2229 | + |
2230 | +UConverter* icu_streambuf::create_conv( char const *charset ) { |
2231 | + UErrorCode err = U_ZERO_ERROR; |
2232 | + UConverter *const conv = ucnv_open( charset, &err ); |
2233 | + ucnv_setFromUCallBack( |
2234 | + conv, UCNV_FROM_U_CALLBACK_STOP, nullptr, nullptr, nullptr, &err |
2235 | + ); |
2236 | + ucnv_setToUCallBack( |
2237 | + conv, UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, &err |
2238 | + ); |
2239 | + if ( !conv || U_FAILURE( err ) ) { |
2240 | + if ( conv ) |
2241 | + ucnv_close( conv ); |
2242 | + throw invalid_argument( charset ); |
2243 | + } |
2244 | + return conv; |
2245 | +} |
2246 | + |
2247 | +bool icu_streambuf::is_necessary( char const *charset ) { |
2248 | + // |
2249 | + // Checking for "US-ASCII" explicitly isn't necessary since ICU knows about |
2250 | + // aliases. |
2251 | + // |
2252 | + return ucnv_compareNames( charset, "ASCII" ) |
2253 | + && ucnv_compareNames( charset, "UTF-8" ); |
2254 | +} |
2255 | + |
2256 | +bool icu_streambuf::is_supported( char const *charset ) { |
2257 | + try { |
2258 | + ucnv_close( create_conv( charset ) ); |
2259 | + return true; |
2260 | + } |
2261 | + catch ( invalid_argument const& ) { |
2262 | + return false; |
2263 | + } |
2264 | +} |
2265 | + |
2266 | +icu_streambuf::pos_type icu_streambuf::seekoff( off_type o, ios_base::seekdir d, |
2267 | + ios_base::openmode m ) { |
2268 | + clear(); |
2269 | + return original()->pubseekoff( o, d, m ); |
2270 | +} |
2271 | + |
2272 | +icu_streambuf::pos_type icu_streambuf::seekpos( pos_type p, |
2273 | + ios_base::openmode m ) { |
2274 | + clear(); |
2275 | + return original()->pubseekpos( p, m ); |
2276 | +} |
2277 | + |
2278 | +streambuf* icu_streambuf::setbuf( char_type *p, streamsize s ) { |
2279 | + original()->pubsetbuf( p, s ); |
2280 | + return this; |
2281 | +} |
2282 | + |
2283 | +int icu_streambuf::sync() { |
2284 | + return original()->pubsync(); |
2285 | +} |
2286 | + |
2287 | +icu_streambuf::int_type icu_streambuf::overflow( int_type c ) { |
2288 | +#if ZORBA_DEBUG_ICU_STREAMBUF |
2289 | + printf( "overflow()\n" ); |
2290 | +#endif |
2291 | + if ( no_conv_ ) |
2292 | + return original()->sputc( c ); |
2293 | + |
2294 | + if ( traits_type::eq_int_type( c, traits_type::eof() ) ) |
2295 | + return traits_type::eof(); |
2296 | + |
2297 | + char_type const utf8_byte = traits_type::to_char_type( c ); |
2298 | + char_type const *from = &utf8_byte; |
2299 | + char ebuf[ Small_External_Buf_Size ], *to = ebuf; |
2300 | + |
2301 | + bool const ok = to_external( &from, from + 1, &to, to + sizeof ebuf ); |
2302 | + assert( ok ); |
2303 | + if ( streamsize const n = to - ebuf ) { |
2304 | + original()->sputn( ebuf, n ); |
2305 | + p_.reset(); |
2306 | + } |
2307 | + |
2308 | + return c; |
2309 | +} |
2310 | + |
2311 | +bool icu_streambuf::to_external( char_type const **from, |
2312 | + char_type const *from_end, char **to, |
2313 | + char const *to_end, bool flush ) { |
2314 | + UErrorCode err = U_ZERO_ERROR; |
2315 | + ucnv_convertEx( |
2316 | + external_conv_, utf8_conv_, to, to_end, from, from_end, |
2317 | + p_.pivot_buf_, &p_.pivot_source_, &p_.pivot_target_, |
2318 | + p_.pivot_buf_ + sizeof p_.pivot_buf_, |
2319 | + /*reset*/ false, flush, &err |
2320 | + ); |
2321 | + if ( err == U_TRUNCATED_CHAR_FOUND || err == U_BUFFER_OVERFLOW_ERROR ) |
2322 | + return false; |
2323 | + if ( U_FAILURE( err ) ) |
2324 | + throw ZORBA_EXCEPTION( |
2325 | + zerr::ZOSE0006_TRANSCODING_ERROR, ERROR_PARAMS( u_errorName( err ) ) |
2326 | + ); |
2327 | + return true; |
2328 | +} |
2329 | + |
2330 | +bool icu_streambuf::to_utf8( char const **from, char const *from_end, |
2331 | + char_type **to, char_type const *to_end, |
2332 | + bool flush ) { |
2333 | + UErrorCode err = U_ZERO_ERROR; |
2334 | + ucnv_convertEx( |
2335 | + utf8_conv_, external_conv_, to, to_end, from, from_end, |
2336 | + g_.pivot_buf_, &g_.pivot_source_, &g_.pivot_target_, |
2337 | + g_.pivot_buf_ + sizeof g_.pivot_buf_, |
2338 | + /*reset*/ false, flush, &err |
2339 | + ); |
2340 | + if ( err == U_TRUNCATED_CHAR_FOUND || err == U_BUFFER_OVERFLOW_ERROR ) |
2341 | + return false; |
2342 | + if ( U_FAILURE( err ) ) |
2343 | + throw ZORBA_EXCEPTION( |
2344 | + zerr::ZOSE0006_TRANSCODING_ERROR, ERROR_PARAMS( u_errorName( err ) ) |
2345 | + ); |
2346 | + return true; |
2347 | +} |
2348 | + |
2349 | +icu_streambuf::int_type icu_streambuf::underflow() { |
2350 | +#if ZORBA_DEBUG_ICU_STREAMBUF |
2351 | + printf( "underflow()\n" ); |
2352 | +#endif |
2353 | + if ( no_conv_ ) |
2354 | + return original()->sgetc(); |
2355 | + |
2356 | + if ( gptr() >= egptr() ) { |
2357 | + utf8::storage_type *to = g_.utf8_char_; |
2358 | + utf8::storage_type const *const to_end = to + sizeof g_.utf8_char_; |
2359 | + |
2360 | + while ( true ) { |
2361 | + int_type const c = original()->sbumpc(); |
2362 | + if ( traits_type::eq_int_type( c, traits_type::eof() ) ) |
2363 | + return traits_type::eof(); |
2364 | + |
2365 | + char const ebyte = traits_type::to_char_type( c ); |
2366 | + char const *from = &ebyte; |
2367 | + |
2368 | + to_utf8( &from, from + 1, &to, to_end ); |
2369 | + if ( to > g_.utf8_char_ ) { |
2370 | + setg( g_.utf8_char_, g_.utf8_char_, to ); |
2371 | + g_.reset(); |
2372 | + break; |
2373 | + } |
2374 | + } |
2375 | + } |
2376 | + return traits_type::to_int_type( *gptr() ); |
2377 | +} |
2378 | + |
2379 | +streamsize icu_streambuf::xsgetn( char_type *to, streamsize size ) { |
2380 | +#if ZORBA_DEBUG_ICU_STREAMBUF |
2381 | + printf( "xsgetn()\n" ); |
2382 | +#endif |
2383 | + if ( no_conv_ ) |
2384 | + return original()->sgetn( to, size ); |
2385 | + |
2386 | + streamsize return_size = 0; |
2387 | + char_type *const to_end = to + size; |
2388 | + |
2389 | + if ( streamsize const gsize = egptr() - gptr() ) { |
2390 | + // must first get any chars in g_.utf8_char_ |
2391 | + streamsize const n = min( gsize, size ); |
2392 | + traits_type::copy( to, gptr(), n ); |
2393 | + gbump( n ); |
2394 | + to += n; |
2395 | + size -= n, return_size += n; |
2396 | + } |
2397 | + |
2398 | + while ( size > 0 ) { |
2399 | + char ebuf[ Large_External_Buf_Size ]; |
2400 | + streamsize const get = min( (streamsize)(sizeof ebuf), size ); |
2401 | + if ( streamsize const got = original()->sgetn( ebuf, get ) ) { |
2402 | + char const *from = ebuf; |
2403 | + char_type const *const to_orig = to; |
2404 | + int_type const peek = original()->sgetc(); |
2405 | + bool const flush = traits_type::eq_int_type( peek, traits_type::eof() ); |
2406 | + to_utf8( &from, from + got, &to, to_end, flush ); |
2407 | + streamsize const n = to - to_orig; |
2408 | + size -= n, return_size += n; |
2409 | + if ( flush ) |
2410 | + break; |
2411 | + } else |
2412 | + break; |
2413 | + } |
2414 | + return return_size; |
2415 | +} |
2416 | + |
2417 | +streamsize icu_streambuf::xsputn( char_type const *from, streamsize size ) { |
2418 | +#if ZORBA_DEBUG_ICU_STREAMBUF |
2419 | + printf( "xsputn()\n" ); |
2420 | +#endif |
2421 | + if ( no_conv_ ) |
2422 | + return original()->sputn( from, size ); |
2423 | + |
2424 | + streamsize return_size = 0; |
2425 | + char_type const *const from_end = from + size; |
2426 | + char ebuf[ Large_External_Buf_Size ], *to = ebuf; |
2427 | + char const *const to_end = to + sizeof ebuf; |
2428 | + |
2429 | + while ( size > 0 ) { |
2430 | + char_type const *const from_orig = from; |
2431 | + to_external( &from, from_end, &to, to_end ); |
2432 | + streamsize n = to - ebuf; |
2433 | + if ( n && !original()->sputn( ebuf, n ) ) |
2434 | + break; |
2435 | + to = ebuf; |
2436 | + n = from - from_orig; |
2437 | + size -= n, return_size += n; |
2438 | + } |
2439 | + return return_size; |
2440 | +} |
2441 | + |
2442 | +/////////////////////////////////////////////////////////////////////////////// |
2443 | + |
2444 | +} // namespace zorba |
2445 | +/* vim:set et sw=2 ts=2: */ |
2446 | |
2447 | === added file 'src/util/icu_streambuf.h' |
2448 | --- src/util/icu_streambuf.h 1970-01-01 00:00:00 +0000 |
2449 | +++ src/util/icu_streambuf.h 2012-02-08 19:20:28 +0000 |
2450 | @@ -0,0 +1,140 @@ |
2451 | +/* |
2452 | + * Copyright 2006-2008 The FLWOR Foundation. |
2453 | + * |
2454 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
2455 | + * you may not use this file except in compliance with the License. |
2456 | + * You may obtain a copy of the License at |
2457 | + * |
2458 | + * http://www.apache.org/licenses/LICENSE-2.0 |
2459 | + * |
2460 | + * Unless required by applicable law or agreed to in writing, software |
2461 | + * distributed under the License is distributed on an "AS IS" BASIS, |
2462 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
2463 | + * See the License for the specific language governing permissions and |
2464 | + * limitations under the License. |
2465 | + */ |
2466 | + |
2467 | +#ifndef ZORBA_ICU_STREAMBUF_H |
2468 | +#define ZORBA_ICU_STREAMBUF_H |
2469 | + |
2470 | +#include <zorba/transcode_stream.h> |
2471 | + |
2472 | +#include "util/utf8_util.h" |
2473 | + |
2474 | +namespace zorba { |
2475 | + |
2476 | +/////////////////////////////////////////////////////////////////////////////// |
2477 | + |
2478 | +/** |
2479 | + * An %icu_streambuf is-a std::streambuf for transcoding character encodings |
2480 | + * from/to UTF-8 on-the-fly. |
2481 | + * |
2482 | + * To use it, replace a stream's streambuf: |
2483 | + * \code |
2484 | + * istream is; |
2485 | + * // ... |
2486 | + * icu_streambuf xbuf( "ISO-8859-1", is.rdbuf() ); |
2487 | + * is.ios::rdbuf( &xbuf ); |
2488 | + * \endcode |
2489 | + * Note that the %icu_streambuf must exist for as long as it's being used by |
2490 | + * the stream. If you are replacing the streabuf for a stream you did not |
2491 | + * create, you should set it back to the original streambuf: |
2492 | + * \code |
2493 | + * void f( ostream &os ) { |
2494 | + * icu_streambuf xbuf( "ISO-8859-1", os.rdbuf() ); |
2495 | + * try { |
2496 | + * os.ios::rdbuf( &xbuf ); |
2497 | + * // ... |
2498 | + * } |
2499 | + * catch ( ... ) { |
2500 | + * os.ios::rdbuf( xbuf.original() ); |
2501 | + * throw; |
2502 | + * } |
2503 | + * } |
2504 | + * \endcode |
2505 | + * |
2506 | + * While %icu_streambuf does support seeking, the positions are relative to the |
2507 | + * original byte stream. |
2508 | + */ |
2509 | +class icu_streambuf : public proxy_streambuf { |
2510 | +public: |
2511 | + /** |
2512 | + * Constructs an %icu_streambuf. |
2513 | + * |
2514 | + * @param charset The name of the character encoding to convert from/to. |
2515 | + * @param orig The original streambuf to read/write from/to. |
2516 | + */ |
2517 | + icu_streambuf( char const *charset, std::streambuf *orig ); |
2518 | + |
2519 | + /** |
2520 | + * Destructs an %icu_streambuf. |
2521 | + */ |
2522 | + ~icu_streambuf(); |
2523 | + |
2524 | + /** |
2525 | + * Checks whether it would be necessary to transcode from the given character |
2526 | + * encoding to UTF-8. |
2527 | + * |
2528 | + * @param charset The name of the character encoding to check. |
2529 | + * @return \c true only if t would be necessary to transcode from the given |
2530 | + * character encoding to UTF-8. |
2531 | + */ |
2532 | + static bool is_necessary( char const *charset ); |
2533 | + |
2534 | + /** |
2535 | + * Checks whether the given character set is supported for transcoding. |
2536 | + * |
2537 | + * @param charset The name of the character encoding to check. |
2538 | + * @return \c true only if the character encoding is supported. |
2539 | + */ |
2540 | + static bool is_supported( char const *charset ); |
2541 | + |
2542 | +protected: |
2543 | + pos_type seekoff( off_type, std::ios_base::seekdir, std::ios_base::openmode ); |
2544 | + pos_type seekpos( pos_type, std::ios_base::openmode ); |
2545 | + std::streambuf* setbuf( char_type*, std::streamsize ); |
2546 | + int sync(); |
2547 | + int_type overflow( int_type ); |
2548 | + int_type underflow(); |
2549 | + std::streamsize xsgetn( char_type*, std::streamsize ); |
2550 | + std::streamsize xsputn( char_type const*, std::streamsize ); |
2551 | + |
2552 | +private: |
2553 | + struct buf_type_base { |
2554 | + UChar pivot_buf_[ 4096 ], *pivot_source_, *pivot_target_; |
2555 | + |
2556 | + buf_type_base() { reset(); } |
2557 | + void reset(); |
2558 | + }; |
2559 | + |
2560 | + struct gbuf_type : buf_type_base { |
2561 | + utf8::encoded_char_type utf8_char_; |
2562 | + }; |
2563 | + gbuf_type g_; |
2564 | + |
2565 | + typedef buf_type_base pbuf_type; |
2566 | + pbuf_type p_; |
2567 | + |
2568 | + bool const no_conv_; // true = no conversion needed |
2569 | + UConverter *const external_conv_, *const utf8_conv_; |
2570 | + |
2571 | + void clear(); |
2572 | + static UConverter* create_conv( char const *charset ); |
2573 | + void resetg(); |
2574 | + |
2575 | + bool to_external( char_type const **from, char_type const *from_end, |
2576 | + char **to, char const *to_end, bool flush = false ); |
2577 | + |
2578 | + bool to_utf8( char const **from, char const *from_end, char_type **to, |
2579 | + char_type const *to_end, bool flush = false ); |
2580 | + |
2581 | + // forbid |
2582 | + icu_streambuf( icu_streambuf const& ); |
2583 | + icu_streambuf& operator=( icu_streambuf const& ); |
2584 | +}; |
2585 | + |
2586 | +/////////////////////////////////////////////////////////////////////////////// |
2587 | + |
2588 | +} // namespace zorba |
2589 | +#endif /* ZORBA_ICU_STREAMBUF_H */ |
2590 | +/* vim:set et sw=2 ts=2: */ |
2591 | |
2592 | === added file 'src/util/passthru_streambuf.cpp' |
2593 | --- src/util/passthru_streambuf.cpp 1970-01-01 00:00:00 +0000 |
2594 | +++ src/util/passthru_streambuf.cpp 2012-02-08 19:20:28 +0000 |
2595 | @@ -0,0 +1,105 @@ |
2596 | +/* |
2597 | + * Copyright 2006-2008 The FLWOR Foundation. |
2598 | + * |
2599 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
2600 | + * you may not use this file except in compliance with the License. |
2601 | + * You may obtain a copy of the License at |
2602 | + * |
2603 | + * http://www.apache.org/licenses/LICENSE-2.0 |
2604 | + * |
2605 | + * Unless required by applicable law or agreed to in writing, software |
2606 | + * distributed under the License is distributed on an "AS IS" BASIS, |
2607 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
2608 | + * See the License for the specific language governing permissions and |
2609 | + * limitations under the License. |
2610 | + */ |
2611 | + |
2612 | +#include "passthru_streambuf.h" |
2613 | + |
2614 | +using namespace std; |
2615 | + |
2616 | +namespace zorba { |
2617 | + |
2618 | +/////////////////////////////////////////////////////////////////////////////// |
2619 | + |
2620 | +passthru_streambuf::passthru_streambuf( char const*, streambuf *orig ) : |
2621 | + proxy_streambuf( orig ) |
2622 | +{ |
2623 | + if ( !orig ) |
2624 | + throw invalid_argument( "null streambuf" ); |
2625 | +} |
2626 | + |
2627 | +passthru_streambuf::~passthru_streambuf() { |
2628 | + // out-of-line since it's virtual |
2629 | +} |
2630 | + |
2631 | +void passthru_streambuf::imbue( std::locale const &loc ) { |
2632 | + original()->pubimbue( loc ); |
2633 | +} |
2634 | + |
2635 | +bool passthru_streambuf::is_necessary( char const *cc_charset ) { |
2636 | + zstring charset( cc_charset ); |
2637 | + ascii::trim_whitespace( charset ); |
2638 | + ascii::to_upper( charset ); |
2639 | + return charset != "ASCII" |
2640 | + && charset != "US-ASCII" |
2641 | + && charset != "UTF-8"; |
2642 | +} |
2643 | + |
2644 | +bool passthru_streambuf::is_supported( char const *cc_charset ) { |
2645 | + return !is_necessary( charset ); |
2646 | +} |
2647 | + |
2648 | +passthru_streambuf::pos_type |
2649 | +passthru_streambuf::seekoff( off_type o, ios_base::seekdir d, |
2650 | + ios_base::openmode m ) { |
2651 | + return original()->pubseekoff( o, d, m ); |
2652 | +} |
2653 | + |
2654 | +passthru_streambuf::pos_type |
2655 | +passthru_streambuf::seekpos( pos_type p, ios_base::openmode m ) { |
2656 | + return original()->pubseekpos( p, m ); |
2657 | +} |
2658 | + |
2659 | +streambuf* passthru_streambuf::setbuf( char_type *p, streamsize s ) { |
2660 | + original()->pubsetbuf( p, s ); |
2661 | + return this; |
2662 | +} |
2663 | + |
2664 | +streamsize passthru_streambuf::showmanyc() { |
2665 | + return original()->in_avail(); |
2666 | +} |
2667 | + |
2668 | +int passthru_streambuf::sync() { |
2669 | + return original()->pubsync(); |
2670 | +} |
2671 | + |
2672 | +passthru_streambuf::int_type passthru_streambuf::overflow( int_type c ) { |
2673 | + return original()->sputc( c ); |
2674 | +} |
2675 | + |
2676 | +passthru_streambuf::int_type passthru_streambuf::pbackfail( int_type c ) { |
2677 | + return original()->sputbackc( traits_type::to_char_type( c ) ); |
2678 | +} |
2679 | + |
2680 | +passthru_streambuf::int_type passthru_streambuf::uflow() { |
2681 | + return original()->sbumpc(); |
2682 | +} |
2683 | + |
2684 | +passthru_streambuf::int_type passthru_streambuf::underflow() { |
2685 | + return original()->sgetc(); |
2686 | +} |
2687 | + |
2688 | +streamsize passthru_streambuf::xsgetn( char_type *to, streamsize size ) { |
2689 | + return original()->sgetn( to, size ); |
2690 | +} |
2691 | + |
2692 | +streamsize passthru_streambuf::xsputn( char_type const *from, |
2693 | + streamsize size ) { |
2694 | + return original()->sputn( from, size ); |
2695 | +} |
2696 | + |
2697 | +/////////////////////////////////////////////////////////////////////////////// |
2698 | + |
2699 | +} // namespace zorba |
2700 | +/* vim:set et sw=2 ts=2: */ |
2701 | |
2702 | === added file 'src/util/passthru_streambuf.h' |
2703 | --- src/util/passthru_streambuf.h 1970-01-01 00:00:00 +0000 |
2704 | +++ src/util/passthru_streambuf.h 2012-02-08 19:20:28 +0000 |
2705 | @@ -0,0 +1,76 @@ |
2706 | +/* |
2707 | + * Copyright 2006-2008 The FLWOR Foundation. |
2708 | + * |
2709 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
2710 | + * you may not use this file except in compliance with the License. |
2711 | + * You may obtain a copy of the License at |
2712 | + * |
2713 | + * http://www.apache.org/licenses/LICENSE-2.0 |
2714 | + * |
2715 | + * Unless required by applicable law or agreed to in writing, software |
2716 | + * distributed under the License is distributed on an "AS IS" BASIS, |
2717 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
2718 | + * See the License for the specific language governing permissions and |
2719 | + * limitations under the License. |
2720 | + */ |
2721 | + |
2722 | +#ifndef ZORBA_PASSTHRU_STREAMBUF_H |
2723 | +#define ZORBA_PASSTHRU_STREAMBUF_H |
2724 | + |
2725 | +#include <zorba/transcode_streambuf.h> |
2726 | + |
2727 | +namespace zorba { |
2728 | + |
2729 | +/////////////////////////////////////////////////////////////////////////////// |
2730 | + |
2731 | +/** |
2732 | + * A %passthru_streambuf is-a std::streambuf TODO |
2733 | + */ |
2734 | +class passthru_streambuf : public proxy_streambuf { |
2735 | +public: |
2736 | + /** |
2737 | + * Constructs an %passthru_streambuf. |
2738 | + * |
2739 | + * @param charset The name of the character encoding to convert from/to. |
2740 | + * @param orig The original streambuf to read/write from/to. |
2741 | + */ |
2742 | + passthru_streambuf( char const *charset, std::streambuf *orig ); |
2743 | + |
2744 | + /** |
2745 | + * Destructs an %passthru_streambuf. |
2746 | + */ |
2747 | + ~passthru_streambuf(); |
2748 | + |
2749 | + /** |
2750 | + * Checks whether the given character set is supported for transcoding. |
2751 | + * |
2752 | + * @param charset The name of the character encoding to check. |
2753 | + * @return \c true only if the character encoding is supported. |
2754 | + */ |
2755 | + static bool is_supported( char const *charset ); |
2756 | + |
2757 | +protected: |
2758 | + void imbue( std::locale const& ); |
2759 | + pos_type seekoff( off_type, std::ios_base::seekdir, std::ios_base::openmode ); |
2760 | + pos_type seekpos( pos_type, std::ios_base::openmode ); |
2761 | + std::streambuf* setbuf( char_type*, std::streamsize ); |
2762 | + std::streamsize showmanyc(); |
2763 | + int sync(); |
2764 | + int_type overflow( int_type ); |
2765 | + int_type pbackfail( int_type ); |
2766 | + int_type uflow(); |
2767 | + int_type underflow(); |
2768 | + std::streamsize xsgetn( char_type*, std::streamsize ); |
2769 | + std::streamsize xsputn( char_type const*, std::streamsize ); |
2770 | + |
2771 | +private: |
2772 | + // forbid |
2773 | + passthru_streambuf( passthru_streambuf const& ); |
2774 | + passthru_streambuf& operator=( passthru_streambuf const& ); |
2775 | +}; |
2776 | + |
2777 | +/////////////////////////////////////////////////////////////////////////////// |
2778 | + |
2779 | +} // namespace zorba |
2780 | +#endif /* ZORBA_PASSTHRU_STREAMBUF_H */ |
2781 | +/* vim:set et sw=2 ts=2: */ |
2782 | |
2783 | === added file 'src/util/transcode_streambuf.h' |
2784 | --- src/util/transcode_streambuf.h 1970-01-01 00:00:00 +0000 |
2785 | +++ src/util/transcode_streambuf.h 2012-02-08 19:20:28 +0000 |
2786 | @@ -0,0 +1,47 @@ |
2787 | +/* |
2788 | + * Copyright 2006-2008 The FLWOR Foundation. |
2789 | + * |
2790 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
2791 | + * you may not use this file except in compliance with the License. |
2792 | + * You may obtain a copy of the License at |
2793 | + * |
2794 | + * http://www.apache.org/licenses/LICENSE-2.0 |
2795 | + * |
2796 | + * Unless required by applicable law or agreed to in writing, software |
2797 | + * distributed under the License is distributed on an "AS IS" BASIS, |
2798 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
2799 | + * See the License for the specific language governing permissions and |
2800 | + * limitations under the License. |
2801 | + */ |
2802 | + |
2803 | +#ifndef ZORBA_TRANSCODE_STREAMBUF_H |
2804 | +#define ZORBA_TRANSCODE_STREAMBUF_H |
2805 | + |
2806 | +#include <zorba/config.h> |
2807 | + |
2808 | +/////////////////////////////////////////////////////////////////////////////// |
2809 | + |
2810 | +#ifdef ZORBA_NO_UNICODE |
2811 | +# include "passthru_streambuf.h" |
2812 | +#else |
2813 | +# include "icu_streambuf.h" |
2814 | +#endif /* ZORBA_NO_UNICODE */ |
2815 | + |
2816 | +namespace zorba { |
2817 | +namespace internal { |
2818 | +namespace transcode { |
2819 | + |
2820 | +#ifdef ZORBA_NO_UNICODE |
2821 | +typedef passthru_streambuf streambuf; |
2822 | +#else |
2823 | +typedef icu_streambuf streambuf; |
2824 | +#endif /* ZORBA_NO_UNICODE */ |
2825 | + |
2826 | +} // namespace transcode |
2827 | +} // namespace internal |
2828 | +} // namespace zorba |
2829 | + |
2830 | +/////////////////////////////////////////////////////////////////////////////// |
2831 | + |
2832 | +#endif /* ZORBA_TRANSCODE_STREAMBUF_H */ |
2833 | +/* vim:set et sw=2 ts=2: */ |
2834 | |
2835 | === added file 'test/rbkt/ExpQueryResults/zorba/file/cp1252.xml.res' |
2836 | --- test/rbkt/ExpQueryResults/zorba/file/cp1252.xml.res 1970-01-01 00:00:00 +0000 |
2837 | +++ test/rbkt/ExpQueryResults/zorba/file/cp1252.xml.res 2012-02-08 19:20:28 +0000 |
2838 | @@ -0,0 +1,1 @@ |
2839 | +üäö |
2840 | |
2841 | === added file 'test/rbkt/Queries/zorba/file/cp1252.txt' |
2842 | --- test/rbkt/Queries/zorba/file/cp1252.txt 1970-01-01 00:00:00 +0000 |
2843 | +++ test/rbkt/Queries/zorba/file/cp1252.txt 2012-02-08 19:20:28 +0000 |
2844 | @@ -0,0 +1,1 @@ |
2845 | +üäö |
2846 | |
2847 | === added file 'test/rbkt/Queries/zorba/file/cp1252.xq' |
2848 | --- test/rbkt/Queries/zorba/file/cp1252.xq 1970-01-01 00:00:00 +0000 |
2849 | +++ test/rbkt/Queries/zorba/file/cp1252.xq 2012-02-08 19:20:28 +0000 |
2850 | @@ -0,0 +1,3 @@ |
2851 | +import module namespace f = "http://expath.org/ns/file"; |
2852 | + |
2853 | +f:read-text(fn:resolve-uri("cp1252.txt"), "CP1252") |
2854 | |
2855 | === added file 'test/rbkt/Queries/zorba/file/invalid_encoding.spec' |
2856 | --- test/rbkt/Queries/zorba/file/invalid_encoding.spec 1970-01-01 00:00:00 +0000 |
2857 | +++ test/rbkt/Queries/zorba/file/invalid_encoding.spec 2012-02-08 19:20:28 +0000 |
2858 | @@ -0,0 +1,1 @@ |
2859 | +Error: http://expath.org/ns/file:FOFL0006 |
2860 | |
2861 | === added file 'test/rbkt/Queries/zorba/file/invalid_encoding.xq' |
2862 | --- test/rbkt/Queries/zorba/file/invalid_encoding.xq 1970-01-01 00:00:00 +0000 |
2863 | +++ test/rbkt/Queries/zorba/file/invalid_encoding.xq 2012-02-08 19:20:28 +0000 |
2864 | @@ -0,0 +1,3 @@ |
2865 | +import module namespace f = "http://expath.org/ns/file"; |
2866 | + |
2867 | +f:read-text(fn:resolve-uri("cp1252.txt"), "FOO") |
2868 | |
2869 | === modified file 'test/rbkt/Queries/zorba/http-client/send-request/http2-read-svg.xq' |
2870 | --- test/rbkt/Queries/zorba/http-client/send-request/http2-read-svg.xq 2011-08-23 07:11:31 +0000 |
2871 | +++ test/rbkt/Queries/zorba/http-client/send-request/http2-read-svg.xq 2012-02-08 19:20:28 +0000 |
2872 | @@ -7,9 +7,9 @@ |
2873 | auth-method="Basic" |
2874 | send-authorization="true" |
2875 | username="zorba" |
2876 | - password="blub"/>; |
2877 | + password="blub" |
2878 | + override-media-type="application/xml; charset=utf-8"/>; |
2879 | |
2880 | variable $http-res := http:send-request($req, (), ()); |
2881 | |
2882 | $http-res[2] |
2883 | - |