1[/ 2 Copyright 2006-2007 John Maddock. 3 Distributed under the Boost Software License, Version 1.0. 4 (See accompanying file LICENSE_1_0.txt or copy at 5 http://www.boost.org/LICENSE_1_0.txt). 6] 7 8[section:uni_iter Unicode Iterators] 9 10[h4 Synopsis] 11 12``#include <boost/regex/pending/unicode_iterator.hpp>`` 13 14 template <class BaseIterator, class U16Type = ::boost::uint16_t> 15 class u32_to_u16_iterator; 16 17 template <class BaseIterator, class U32Type = ::boost::uint32_t> 18 class u16_to_u32_iterator; 19 20 template <class BaseIterator, class U8Type = ::boost::uint8_t> 21 class u32_to_u8_iterator; 22 23 template <class BaseIterator, class U32Type = ::boost::uint32_t> 24 class u8_to_u32_iterator; 25 26 template <class BaseIterator> 27 class utf16_output_iterator; 28 29 template <class BaseIterator> 30 class utf8_output_iterator; 31 32 33[h4 Description] 34 35This header contains a selection of iterator adaptors that make a sequence of characters in one 36encoding "look like" a read-only sequence of characters in another encoding. 37 38 template <class BaseIterator, class U16Type = ::boost::uint16_t> 39 class u32_to_u16_iterator 40 { 41 u32_to_u16_iterator(); 42 u32_to_u16_iterator(BaseIterator start_position); 43 44 // Other standard BidirectionalIterator members here... 45 }; 46 47A Bidirectional iterator adapter that makes an underlying sequence of UTF32 characters look like 48a (read-only) sequence of UTF16 characters. The UTF16 characters are encoded in the platforms 49native byte order. 50 51 template <class BaseIterator, class U32Type = ::boost::uint32_t> 52 class u16_to_u32_iterator 53 { 54 u16_to_u32_iterator(); 55 u16_to_u32_iterator(BaseIterator start_position); 56 u16_to_u32_iterator(BaseIterator start_position, BaseIterator start_range, BaseIterator end_range); 57 58 // Other standard BidirectionalIterator members here... 59 }; 60 61A Bidirectional iterator adapter that makes an underlying sequence of UTF16 characters 62(in the platforms native byte order) look like a (read-only) sequence of UTF32 characters. 63 64The three-arg constructor of this class takes the start and end of the underlying sequence 65as well as the position to start iteration from. This constructor validates that the 66underlying sequence has validly encoded endpoints: this prevents accidentally incrementing/decrementing 67past the end of the underlying sequence as a result of invalid UTF16 code sequences at the endpoints 68of the underlying range. 69 70 template <class BaseIterator, class U8Type = ::boost::uint8_t> 71 class u32_to_u8_iterator 72 { 73 u32_to_u8_iterator(); 74 u32_to_u8_iterator(BaseIterator start_position); 75 76 // Other standard BidirectionalIterator members here... 77 }; 78 79A Bidirectional iterator adapter that makes an underlying sequence of UTF32 characters look like 80a (read-only) sequence of UTF8 characters. 81 82 template <class BaseIterator, class U32Type = ::boost::uint32_t> 83 class u8_to_u32_iterator 84 { 85 u8_to_u32_iterator(); 86 u8_to_u32_iterator(BaseIterator start_position); 87 u8_to_u32_iterator(BaseIterator start_position, BaseIterator start_range, BaseIterator end_range); 88 89 // Other standard BidirectionalIterator members here... 90 }; 91 92A Bidirectional iterator adapter that makes an underlying sequence of UTF8 characters 93look like a (read-only) sequence of UTF32 characters. 94 95The three-arg constructor of this class takes the start and end of the underlying sequence 96as well as the position to start iteration from. This constructor validates that the 97underlying sequence has validly encoded endpoints: this prevents accidentally incrementing/decrementing 98past the end of the underlying sequence as a result of invalid UTF8 code sequences at the endpoints 99of the underlying range. 100 101 template <class BaseIterator> 102 class utf16_output_iterator 103 { 104 utf16_output_iterator(const BaseIterator& b); 105 utf16_output_iterator(const utf16_output_iterator& that); 106 utf16_output_iterator& operator=(const utf16_output_iterator& that); 107 108 // Other standard OutputIterator members here... 109 }; 110 111Simple OutputIterator adapter - accepts UTF32 values as input, and forwards them to ['BaseIterator b] 112as UTF16. Both UTF32 and UTF16 values are in native byte order. 113 114 template <class BaseIterator> 115 class utf8_output_iterator 116 { 117 utf8_output_iterator(const BaseIterator& b); 118 utf8_output_iterator(const utf8_output_iterator& that); 119 utf8_output_iterator& operator=(const utf8_output_iterator& that); 120 121 // Other standard OutputIterator members here... 122 }; 123 124Simple OutputIterator adapter - accepts UTF32 values as input, and forwards them to ['BaseIterator b] 125as UTF8. The UTF32 input values must be in native byte order. 126 127[endsect] 128