1 /* GNU SED, a batch stream editor.
2 Copyright (C) 2003, 2006, 2009 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18 #include "sed.h"
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "localcharset.h"
23
24 int mb_cur_max;
25 bool is_utf8;
26
27 #ifdef HAVE_MBRTOWC
28 /* Add a byte to the multibyte character represented by the state
29 CUR_STAT, and answer its length if a character is completed,
30 or -2 if it is yet to be completed. */
brlen(ch,cur_stat)31 int brlen (ch, cur_stat)
32 int ch;
33 mbstate_t *cur_stat;
34 {
35 char c = ch;
36
37 /* If we use the generic brlen, then MBRLEN == mbrlen. */
38 int result = mbrtowc(NULL, &c, 1, cur_stat);
39
40 /* An invalid sequence is treated like a singlebyte character. */
41 if (result == -1)
42 {
43 memset (cur_stat, 0, sizeof (mbstate_t));
44 return 1;
45 }
46
47 return result;
48 }
49 #endif
50
51 void
initialize_mbcs()52 initialize_mbcs ()
53 {
54 /* For UTF-8, we know that the encoding is stateless. */
55 const char *codeset_name;
56
57 codeset_name = locale_charset ();
58 is_utf8 = (strcmp (codeset_name, "UTF-8") == 0);
59
60 #ifdef HAVE_MBRTOWC
61 mb_cur_max = MB_CUR_MAX;
62 #else
63 mb_cur_max = 1;
64 #endif
65 }
66
67