1    	// wstring_convert implementation -*- C++ -*-
2    	
3    	// Copyright (C) 2015-2017 Free Software Foundation, Inc.
4    	//
5    	// This file is part of the GNU ISO C++ Library.  This library is free
6    	// software; you can redistribute it and/or modify it under the
7    	// terms of the GNU General Public License as published by the
8    	// Free Software Foundation; either version 3, or (at your option)
9    	// any later version.
10   	
11   	// This library is distributed in the hope that it will be useful,
12   	// but WITHOUT ANY WARRANTY; without even the implied warranty of
13   	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   	// GNU General Public License for more details.
15   	
16   	// Under Section 7 of GPL version 3, you are granted additional
17   	// permissions described in the GCC Runtime Library Exception, version
18   	// 3.1, as published by the Free Software Foundation.
19   	
20   	// You should have received a copy of the GNU General Public License and
21   	// a copy of the GCC Runtime Library Exception along with this program;
22   	// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23   	// <http://www.gnu.org/licenses/>.
24   	
25   	/** @file bits/locale_conv.h
26   	 *  This is an internal header file, included by other library headers.
27   	 *  Do not attempt to use it directly. @headername{locale}
28   	 */
29   	
30   	#ifndef _LOCALE_CONV_H
31   	#define _LOCALE_CONV_H 1
32   	
33   	#if __cplusplus < 201103L
34   	# include <bits/c++0x_warning.h>
35   	#else
36   	
37   	#include <streambuf>
38   	#include "stringfwd.h"
39   	#include "allocator.h"
40   	#include "codecvt.h"
41   	#include "unique_ptr.h"
42   	
43   	namespace std _GLIBCXX_VISIBILITY(default)
44   	{
45   	_GLIBCXX_BEGIN_NAMESPACE_VERSION
46   	
47   	  /**
48   	   * @addtogroup locales
49   	   * @{
50   	   */
51   	
52   	  template<typename _OutStr, typename _InChar, typename _Codecvt,
53   		   typename _State, typename _Fn>
54   	    bool
55   	    __do_str_codecvt(const _InChar* __first, const _InChar* __last,
56   			     _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
57   			     size_t& __count, _Fn __fn)
58   	    {
59   	      if (__first == __last)
60   		{
61   		  __outstr.clear();
62   		  __count = 0;
63   		  return true;
64   		}
65   	
66   	      size_t __outchars = 0;
67   	      auto __next = __first;
68   	      const auto __maxlen = __cvt.max_length() + 1;
69   	
70   	      codecvt_base::result __result;
71   	      do
72   		{
73   		  __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
74   		  auto __outnext = &__outstr.front() + __outchars;
75   		  auto const __outlast = &__outstr.back() + 1;
76   		  __result = (__cvt.*__fn)(__state, __next, __last, __next,
77   						__outnext, __outlast, __outnext);
78   		  __outchars = __outnext - &__outstr.front();
79   		}
80   	      while (__result == codecvt_base::partial && __next != __last
81   		     && (__outstr.size() - __outchars) < __maxlen);
82   	
83   	      if (__result == codecvt_base::error)
84   		{
85   		  __count = __next - __first;
86   		  return false;
87   		}
88   	
89   	      if (__result == codecvt_base::noconv)
90   		{
91   		  __outstr.assign(__first, __last);
92   		  __count = __last - __first;
93   		}
94   	      else
95   		{
96   		  __outstr.resize(__outchars);
97   		  __count = __next - __first;
98   		}
99   	
100  	      return true;
101  	    }
102  	
103  	  // Convert narrow character string to wide.
104  	  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
105  	    inline bool
106  	    __str_codecvt_in(const char* __first, const char* __last,
107  			     basic_string<_CharT, _Traits, _Alloc>& __outstr,
108  			     const codecvt<_CharT, char, _State>& __cvt,
109  			     _State& __state, size_t& __count)
110  	    {
111  	      using _Codecvt = codecvt<_CharT, char, _State>;
112  	      using _ConvFn
113  		= codecvt_base::result
114  		  (_Codecvt::*)(_State&, const char*, const char*, const char*&,
115  				_CharT*, _CharT*, _CharT*&) const;
116  	      _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
117  	      return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
118  				      __count, __fn);
119  	    }
120  	
121  	  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
122  	    inline bool
123  	    __str_codecvt_in(const char* __first, const char* __last,
124  			     basic_string<_CharT, _Traits, _Alloc>& __outstr,
125  			     const codecvt<_CharT, char, _State>& __cvt)
126  	    {
127  	      _State __state = {};
128  	      size_t __n;
129  	      return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
130  	    }
131  	
132  	  // Convert wide character string to narrow.
133  	  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
134  	    inline bool
135  	    __str_codecvt_out(const _CharT* __first, const _CharT* __last,
136  			      basic_string<char, _Traits, _Alloc>& __outstr,
137  			      const codecvt<_CharT, char, _State>& __cvt,
138  			      _State& __state, size_t& __count)
139  	    {
140  	      using _Codecvt = codecvt<_CharT, char, _State>;
141  	      using _ConvFn
142  		= codecvt_base::result
143  		  (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
144  				char*, char*, char*&) const;
145  	      _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
146  	      return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
147  				      __count, __fn);
148  	    }
149  	
150  	  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
151  	    inline bool
152  	    __str_codecvt_out(const _CharT* __first, const _CharT* __last,
153  			      basic_string<char, _Traits, _Alloc>& __outstr,
154  			      const codecvt<_CharT, char, _State>& __cvt)
155  	    {
156  	      _State __state = {};
157  	      size_t __n;
158  	      return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
159  	    }
160  	
161  	#ifdef _GLIBCXX_USE_WCHAR_T
162  	
163  	_GLIBCXX_BEGIN_NAMESPACE_CXX11
164  	
165  	  /// String conversions
166  	  template<typename _Codecvt, typename _Elem = wchar_t,
167  		   typename _Wide_alloc = allocator<_Elem>,
168  		   typename _Byte_alloc = allocator<char>>
169  	    class wstring_convert
170  	    {
171  	    public:
172  	      typedef basic_string<char, char_traits<char>, _Byte_alloc>   byte_string;
173  	      typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
174  	      typedef typename _Codecvt::state_type 			   state_type;
175  	      typedef typename wide_string::traits_type::int_type	   int_type;
176  	
177  	      /** Default constructor.
178  	       *
179  	       * @param  __pcvt The facet to use for conversions.
180  	       *
181  	       * Takes ownership of @p __pcvt and will delete it in the destructor.
182  	       */
183  	      explicit
184  	      wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt)
185  	      {
186  		if (!_M_cvt)
187  		  __throw_logic_error("wstring_convert");
188  	      }
189  	
190  	      /** Construct with an initial converstion state.
191  	       *
192  	       * @param  __pcvt The facet to use for conversions.
193  	       * @param  __state Initial conversion state.
194  	       *
195  	       * Takes ownership of @p __pcvt and will delete it in the destructor.
196  	       * The object's conversion state will persist between conversions.
197  	       */
198  	      wstring_convert(_Codecvt* __pcvt, state_type __state)
199  	      : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
200  	      {
201  		if (!_M_cvt)
202  		  __throw_logic_error("wstring_convert");
203  	      }
204  	
205  	      /** Construct with error strings.
206  	       *
207  	       * @param  __byte_err A string to return on failed conversions.
208  	       * @param  __wide_err A wide string to return on failed conversions.
209  	       */
210  	      explicit
211  	      wstring_convert(const byte_string& __byte_err,
212  			      const wide_string& __wide_err = wide_string())
213  	      : _M_cvt(new _Codecvt),
214  		_M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
215  		_M_with_strings(true)
216  	      {
217  		if (!_M_cvt)
218  		  __throw_logic_error("wstring_convert");
219  	      }
220  	
221  	      ~wstring_convert() = default;
222  	
223  	      // _GLIBCXX_RESOLVE_LIB_DEFECTS
224  	      // 2176. Special members for wstring_convert and wbuffer_convert
225  	      wstring_convert(const wstring_convert&) = delete;
226  	      wstring_convert& operator=(const wstring_convert&) = delete;
227  	
228  	      /// @{ Convert from bytes.
229  	      wide_string
230  	      from_bytes(char __byte)
231  	      {
232  		char __bytes[2] = { __byte };
233  		return from_bytes(__bytes, __bytes+1);
234  	      }
235  	
236  	      wide_string
237  	      from_bytes(const char* __ptr)
238  	      { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
239  	
240  	      wide_string
241  	      from_bytes(const byte_string& __str)
242  	      {
243  		auto __ptr = __str.data();
244  		return from_bytes(__ptr, __ptr + __str.size());
245  	      }
246  	
247  	      wide_string
248  	      from_bytes(const char* __first, const char* __last)
249  	      {
250  		if (!_M_with_cvtstate)
251  		  _M_state = state_type();
252  		wide_string __out{ _M_wide_err_string.get_allocator() };
253  		if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
254  				     _M_count))
255  		  return __out;
256  		if (_M_with_strings)
257  		  return _M_wide_err_string;
258  		__throw_range_error("wstring_convert::from_bytes");
259  	      }
260  	      /// @}
261  	
262  	      /// @{ Convert to bytes.
263  	      byte_string
264  	      to_bytes(_Elem __wchar)
265  	      {
266  		_Elem __wchars[2] = { __wchar };
267  		return to_bytes(__wchars, __wchars+1);
268  	      }
269  	
270  	      byte_string
271  	      to_bytes(const _Elem* __ptr)
272  	      {
273  		return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
274  	      }
275  	
276  	      byte_string
277  	      to_bytes(const wide_string& __wstr)
278  	      {
279  		auto __ptr = __wstr.data();
280  		return to_bytes(__ptr, __ptr + __wstr.size());
281  	      }
282  	
283  	      byte_string
284  	      to_bytes(const _Elem* __first, const _Elem* __last)
285  	      {
286  		if (!_M_with_cvtstate)
287  		  _M_state = state_type();
288  		byte_string __out{ _M_byte_err_string.get_allocator() };
289  		if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
290  				      _M_count))
291  		  return __out;
292  		if (_M_with_strings)
293  		  return _M_byte_err_string;
294  		__throw_range_error("wstring_convert::to_bytes");
295  	      }
296  	      /// @}
297  	
298  	      // _GLIBCXX_RESOLVE_LIB_DEFECTS
299  	      // 2174. wstring_convert::converted() should be noexcept
300  	      /// The number of elements successfully converted in the last conversion.
301  	      size_t converted() const noexcept { return _M_count; }
302  	
303  	      /// The final conversion state of the last conversion.
304  	      state_type state() const { return _M_state; }
305  	
306  	    private:
307  	      unique_ptr<_Codecvt>	_M_cvt;
308  	      byte_string		_M_byte_err_string;
309  	      wide_string		_M_wide_err_string;
310  	      state_type		_M_state = state_type();
311  	      size_t			_M_count = 0;
312  	      bool			_M_with_cvtstate = false;
313  	      bool			_M_with_strings = false;
314  	    };
315  	
316  	_GLIBCXX_END_NAMESPACE_CXX11
317  	
318  	  /// Buffer conversions
319  	  template<typename _Codecvt, typename _Elem = wchar_t,
320  		   typename _Tr = char_traits<_Elem>>
321  	    class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
322  	    {
323  	      typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
324  	
325  	    public:
326  	      typedef typename _Codecvt::state_type state_type;
327  	
328  	      /** Default constructor.
329  	       *
330  	       * @param  __bytebuf The underlying byte stream buffer.
331  	       * @param  __pcvt    The facet to use for conversions.
332  	       * @param  __state   Initial conversion state.
333  	       *
334  	       * Takes ownership of @p __pcvt and will delete it in the destructor.
335  	       */
336  	      explicit
337  	      wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt,
338  			      state_type __state = state_type())
339  	      : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
340  	      {
341  		if (!_M_cvt)
342  		  __throw_logic_error("wbuffer_convert");
343  	
344  		_M_always_noconv = _M_cvt->always_noconv();
345  	
346  		if (_M_buf)
347  		  {
348  		    this->setp(_M_put_area, _M_put_area + _S_buffer_length);
349  		    this->setg(_M_get_area + _S_putback_length,
350  			       _M_get_area + _S_putback_length,
351  			       _M_get_area + _S_putback_length);
352  		  }
353  	      }
354  	
355  	      ~wbuffer_convert() = default;
356  	
357  	      // _GLIBCXX_RESOLVE_LIB_DEFECTS
358  	      // 2176. Special members for wstring_convert and wbuffer_convert
359  	      wbuffer_convert(const wbuffer_convert&) = delete;
360  	      wbuffer_convert& operator=(const wbuffer_convert&) = delete;
361  	
362  	      streambuf* rdbuf() const noexcept { return _M_buf; }
363  	
364  	      streambuf*
365  	      rdbuf(streambuf *__bytebuf) noexcept
366  	      {
367  		auto __prev = _M_buf;
368  		_M_buf = __bytebuf;
369  		return __prev;
370  	      }
371  	
372  	      /// The conversion state following the last conversion.
373  	      state_type state() const noexcept { return _M_state; }
374  	
375  	    protected:
376  	      int
377  	      sync()
378  	      { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
379  	
380  	      typename _Wide_streambuf::int_type
381  	      overflow(typename _Wide_streambuf::int_type __out)
382  	      {
383  		if (!_M_buf || !_M_conv_put())
384  		  return _Tr::eof();
385  		else if (!_Tr::eq_int_type(__out, _Tr::eof()))
386  		  return this->sputc(__out);
387  		return _Tr::not_eof(__out);
388  	      }
389  	
390  	      typename _Wide_streambuf::int_type
391  	      underflow()
392  	      {
393  		if (!_M_buf)
394  		  return _Tr::eof();
395  	
396  		if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
397  		  return _Tr::to_int_type(*this->gptr());
398  		else
399  		  return _Tr::eof();
400  	      }
401  	
402  	      streamsize
403  	      xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
404  	      {
405  		if (!_M_buf || __n == 0)
406  		  return 0;
407  		streamsize __done = 0;
408  		do
409  		{
410  		  auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
411  						   __n - __done);
412  		  _Tr::copy(this->pptr(), __s + __done, __nn);
413  		  this->pbump(__nn);
414  		  __done += __nn;
415  		} while (__done < __n && _M_conv_put());
416  		return __done;
417  	      }
418  	
419  	    private:
420  	      // fill the get area from converted contents of the byte stream buffer
421  	      bool
422  	      _M_conv_get()
423  	      {
424  		const streamsize __pb1 = this->gptr() - this->eback();
425  		const streamsize __pb2 = _S_putback_length;
426  		const streamsize __npb = std::min(__pb1, __pb2);
427  	
428  		_Tr::move(_M_get_area + _S_putback_length - __npb,
429  			  this->gptr() - __npb, __npb);
430  	
431  		streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
432  		__nbytes = std::min(__nbytes, _M_buf->in_avail());
433  		if (__nbytes < 1)
434  		  __nbytes = 1;
435  		__nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
436  		if (__nbytes < 1)
437  		  return false;
438  		__nbytes += _M_unconv;
439  	
440  		// convert _M_get_buf into _M_get_area
441  	
442  		_Elem* __outbuf = _M_get_area + _S_putback_length;
443  		_Elem* __outnext = __outbuf;
444  		const char* __bnext = _M_get_buf;
445  	
446  		codecvt_base::result __result;
447  		if (_M_always_noconv)
448  		  __result = codecvt_base::noconv;
449  		else
450  		  {
451  		    _Elem* __outend = _M_get_area + _S_buffer_length;
452  	
453  		    __result = _M_cvt->in(_M_state,
454  					  __bnext, __bnext + __nbytes, __bnext,
455  					  __outbuf, __outend, __outnext);
456  		  }
457  	
458  		if (__result == codecvt_base::noconv)
459  		  {
460  		    // cast is safe because noconv means _Elem is same type as char
461  		    auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
462  		    _Tr::copy(__outbuf, __get_buf, __nbytes);
463  		    _M_unconv = 0;
464  		    return true;
465  		  }
466  	
467  		if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
468  		  char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
469  	
470  		this->setg(__outbuf, __outbuf, __outnext);
471  	
472  		return __result != codecvt_base::error;
473  	      }
474  	
475  	      // unused
476  	      bool
477  	      _M_put(...)
478  	      { return false; }
479  	
480  	      bool
481  	      _M_put(const char* __p, streamsize __n)
482  	      {
483  		if (_M_buf->sputn(__p, __n) < __n)
484  		  return false;
485  		return true;
486  	      }
487  	
488  	      // convert the put area and write to the byte stream buffer
489  	      bool
490  	      _M_conv_put()
491  	      {
492  		_Elem* const __first = this->pbase();
493  		const _Elem* const __last = this->pptr();
494  		const streamsize __pending = __last - __first;
495  	
496  		if (_M_always_noconv)
497  		  return _M_put(__first, __pending);
498  	
499  		char __outbuf[2 * _S_buffer_length];
500  	
501  		const _Elem* __next = __first;
502  		const _Elem* __start;
503  		do
504  		  {
505  		    __start = __next;
506  		    char* __outnext = __outbuf;
507  		    char* const __outlast = __outbuf + sizeof(__outbuf);
508  		    auto __result = _M_cvt->out(_M_state, __next, __last, __next,
509  						__outnext, __outlast, __outnext);
510  		    if (__result == codecvt_base::error)
511  		      return false;
512  		    else if (__result == codecvt_base::noconv)
513  		      return _M_put(__next, __pending);
514  	
515  		    if (!_M_put(__outbuf, __outnext - __outbuf))
516  		      return false;
517  		  }
518  		while (__next != __last && __next != __start);
519  	
520  		if (__next != __last)
521  		  _Tr::move(__first, __next, __last - __next);
522  	
523  		this->pbump(__first - __next);
524  		return __next != __first;
525  	      }
526  	
527  	      streambuf*		_M_buf;
528  	      unique_ptr<_Codecvt>	_M_cvt;
529  	      state_type		_M_state;
530  	
531  	      static const streamsize	_S_buffer_length = 32;
532  	      static const streamsize	_S_putback_length = 3;
533  	      _Elem                     _M_put_area[_S_buffer_length];
534  	      _Elem                     _M_get_area[_S_buffer_length];
535  	      streamsize		_M_unconv = 0;
536  	      char			_M_get_buf[_S_buffer_length-_S_putback_length];
537  	      bool			_M_always_noconv;
538  	    };
539  	
540  	#endif  // _GLIBCXX_USE_WCHAR_T
541  	
542  	  /// @} group locales
543  	
544  	_GLIBCXX_END_NAMESPACE_VERSION
545  	} // namespace
546  	
547  	#endif // __cplusplus
548  	
549  	#endif /* _LOCALE_CONV_H */
550