1 // 2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See 5 // accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt) 7 // 8 #ifndef BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED 9 #define BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED 10 11 #include <boost/locale/config.hpp> 12 #include <boost/cstdint.hpp> 13 #include <boost/assert.hpp> 14 #ifdef BOOST_MSVC 15 # pragma warning(push) 16 # pragma warning(disable : 4275 4251 4231 4660) 17 #endif 18 19 20 namespace boost { 21 22 namespace locale { 23 24 /// 25 /// \brief This namespase contains all operations required for boundary analysis of text 26 /// 27 namespace boundary { 28 /// 29 /// \defgroup boundary Boundary Analysis 30 /// 31 /// This module contains all operations required for boundary analysis of text: character, word, like and sentence boundaries 32 /// 33 /// @{ 34 /// 35 36 /// 37 /// This type describes a possible boundary analysis alternatives. 38 /// 39 enum boundary_type { 40 character, ///< Analyse the text for character boundaries 41 word, ///< Analyse the text for word boundaries 42 sentence, ///< Analyse the text for Find sentence boundaries 43 line ///< Analyse the text for positions suitable for line breaks 44 }; 45 46 /// 47 /// \brief Flags used with word boundary analysis -- the type of the word, line or sentence boundary found. 48 /// 49 /// It is a bit-mask that represents various combinations of rules used to select this specific boundary. 50 /// 51 typedef uint32_t rule_type; 52 53 /// 54 /// \anchor bl_boundary_word_rules 55 /// \name Flags that describe a type of word selected 56 /// @{ 57 static const rule_type 58 word_none = 0x0000F, ///< Not a word, like white space or punctuation mark 59 word_number = 0x000F0, ///< Word that appear to be a number 60 word_letter = 0x00F00, ///< Word that contains letters, excluding kana and ideographic characters 61 word_kana = 0x0F000, ///< Word that contains kana characters 62 word_ideo = 0xF0000, ///< Word that contains ideographic characters 63 word_any = 0xFFFF0, ///< Any word including numbers, 0 is special flag, equivalent to 15 64 word_letters = 0xFFF00, ///< Any word, excluding numbers but including letters, kana and ideograms. 65 word_kana_ideo = 0xFF000, ///< Word that includes kana or ideographic characters 66 word_mask = 0xFFFFF; ///< Full word mask - select all possible variants 67 /// @} 68 69 /// 70 /// \anchor bl_boundary_line_rules 71 /// \name Flags that describe a type of line break 72 /// @{ 73 static const rule_type 74 line_soft = 0x0F, ///< Soft line break: optional but not required 75 line_hard = 0xF0, ///< Hard line break: like break is required (as per CR/LF) 76 line_any = 0xFF, ///< Soft or Hard line break 77 line_mask = 0xFF; ///< Select all types of line breaks 78 79 /// @} 80 81 /// 82 /// \anchor bl_boundary_sentence_rules 83 /// \name Flags that describe a type of sentence break 84 /// 85 /// @{ 86 static const rule_type 87 sentence_term = 0x0F, ///< \brief The sentence was terminated with a sentence terminator 88 /// like ".", "!" possible followed by hard separator like CR, LF, PS 89 sentence_sep = 0xF0, ///< \brief The sentence does not contain terminator like ".", "!" but ended with hard separator 90 /// like CR, LF, PS or end of input. 91 sentence_any = 0xFF, ///< Either first or second sentence break type;. 92 sentence_mask = 0xFF; ///< Select all sentence breaking points 93 94 ///@} 95 96 /// 97 /// \name Flags that describe a type of character break. 98 /// 99 /// At this point break iterator does not distinguish different 100 /// kinds of characters so it is used for consistency. 101 ///@{ 102 static const rule_type 103 character_any = 0xF, ///< Not in use, just for consistency 104 character_mask = 0xF; ///< Select all character breaking points 105 106 ///@} 107 108 /// 109 /// This function returns the mask that covers all variants for specific boundary type 110 /// boundary_rule(boundary_type t)111 inline rule_type boundary_rule(boundary_type t) 112 { 113 switch(t) { 114 case character: return character_mask; 115 case word: return word_mask; 116 case sentence: return sentence_mask; 117 case line: return line_mask; 118 default: return 0; 119 } 120 } 121 122 /// 123 ///@} 124 /// 125 126 } // boundary 127 } // locale 128 } // boost 129 130 131 #ifdef BOOST_MSVC 132 #pragma warning(pop) 133 #endif 134 135 #endif 136 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 137