1 /* Charset handling while reading PO files. 2 Copyright (C) 2001-2003, 2006 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2001. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 17 18 #ifndef _PO_CHARSET_H 19 #define _PO_CHARSET_H 20 21 #include <stdbool.h> 22 #include <stddef.h> 23 24 #if HAVE_ICONV 25 #include <iconv.h> 26 #endif 27 28 29 #ifdef __cplusplus 30 extern "C" { 31 #endif 32 33 34 /* Canonicalize an encoding name. 35 The results of this function are statically allocated and can be 36 compared using ==. */ 37 extern const char *po_charset_canonicalize (const char *charset); 38 39 /* The canonicalized encoding name for ASCII. */ 40 extern DLL_VARIABLE const char *po_charset_ascii; 41 42 /* The canonicalized encoding name for UTF-8. */ 43 extern DLL_VARIABLE const char *po_charset_utf8; 44 45 /* Test for ASCII compatibility. */ 46 extern bool po_charset_ascii_compatible (const char *canon_charset); 47 48 /* Test for a weird encoding, i.e. an encoding which has double-byte 49 characters ending in 0x5C. */ 50 extern bool po_is_charset_weird (const char *canon_charset); 51 52 /* Test for a weird CJK encoding, i.e. a weird encoding with CJK structure. 53 An encoding has CJK structure if every valid character stream is composed 54 of single bytes in the range 0x{00..7F} and of byte pairs in the range 55 0x{80..FF}{30..FF}. */ 56 extern bool po_is_charset_weird_cjk (const char *canon_charset); 57 58 /* Returns a character iterator for a given encoding. 59 Given a pointer into a string, it returns the number occupied by the next 60 single character. If the piece of string is not valid or if the *s == '\0', 61 it returns 1. */ 62 typedef size_t (*character_iterator_t) (const char *s); 63 extern character_iterator_t po_charset_character_iterator (const char *canon_charset); 64 65 66 /* The PO file's encoding, as specified in the header entry. */ 67 extern DLL_VARIABLE const char *po_lex_charset; 68 69 #if HAVE_ICONV 70 /* Converter from the PO file's encoding to UTF-8. */ 71 extern DLL_VARIABLE iconv_t po_lex_iconv; 72 #endif 73 /* If no converter is available, some information about the structure of the 74 PO file's encoding. */ 75 extern DLL_VARIABLE bool po_lex_weird_cjk; 76 77 /* Initialize the PO file's encoding. */ 78 extern void po_lex_charset_init (void); 79 80 /* Set the PO file's encoding from the header entry. */ 81 extern void po_lex_charset_set (const char *header_entry, 82 const char *filename); 83 84 /* Finish up with the PO file's encoding. */ 85 extern void po_lex_charset_close (void); 86 87 88 #ifdef __cplusplus 89 } 90 #endif 91 92 93 #endif /* _PO_CHARSET_H */ 94