1 /* SPDX-License-Identifier: GPL-2.0+ */ 2 /* 3 * charset conversion utils 4 * 5 * Copyright (c) 2017 Rob Clark 6 */ 7 8 #ifndef __CHARSET_H_ 9 #define __CHARSET_H_ 10 11 #include <linux/kernel.h> 12 #include <linux/types.h> 13 14 #define MAX_UTF8_PER_UTF16 3 15 16 /** 17 * console_read_unicode() - read Unicode code point from console 18 * 19 * @code: pointer to store Unicode code point 20 * Return: 0 = success 21 */ 22 int console_read_unicode(s32 *code); 23 24 /** 25 * utf8_get() - get next UTF-8 code point from buffer 26 * 27 * @src: pointer to current byte, updated to point to next byte 28 * Return: code point, or 0 for end of string, or -1 if no legal 29 * code point is found. In case of an error src points to 30 * the incorrect byte. 31 */ 32 s32 utf8_get(const char **src); 33 34 /** 35 * utf8_put() - write UTF-8 code point to buffer 36 * 37 * @code: code point 38 * @dst: pointer to destination buffer, updated to next position 39 * Return: -1 if the input parameters are invalid 40 */ 41 int utf8_put(s32 code, char **dst); 42 43 /** 44 * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion 45 * to utf-16 46 * 47 * @src: utf-8 string 48 * @count: maximum number of code points to convert 49 * Return: length in u16 after conversion to utf-16 without the 50 * trailing \0. If an invalid UTF-8 sequence is hit one 51 * u16 will be reserved for a replacement character. 52 */ 53 size_t utf8_utf16_strnlen(const char *src, size_t count); 54 55 /** 56 * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16 57 * 58 * @src: utf-8 string 59 * Return: length in u16 after conversion to utf-16 without the 60 * trailing \0. If an invalid UTF-8 sequence is hit one 61 * u16 will be reserved for a replacement character. 62 */ 63 #define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX) 64 65 /** 66 * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string 67 * 68 * @dst: destination buffer 69 * @src: source buffer 70 * @count: maximum number of code points to copy 71 * Return: -1 if the input parameters are invalid 72 */ 73 int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count); 74 75 /** 76 * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string 77 * 78 * @dst: destination buffer 79 * @src: source buffer 80 * Return: -1 if the input parameters are invalid 81 */ 82 #define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX) 83 84 /** 85 * utf16_get() - get next UTF-16 code point from buffer 86 * 87 * @src: pointer to current word, updated to point to next word 88 * Return: code point, or 0 for end of string, or -1 if no legal 89 * code point is found. In case of an error src points to 90 * the incorrect word. 91 */ 92 s32 utf16_get(const u16 **src); 93 94 /** 95 * utf16_put() - write UTF-16 code point to buffer 96 * 97 * @code: code point 98 * @dst: pointer to destination buffer, updated to next position 99 * Return: -1 if the input parameters are invalid 100 */ 101 int utf16_put(s32 code, u16 **dst); 102 103 /** 104 * utf16_strnlen() - length of a truncated utf-16 string 105 * 106 * @src: utf-16 string 107 * @count: maximum number of code points to convert 108 * Return: length in code points. If an invalid UTF-16 sequence is 109 * hit one position will be reserved for a replacement 110 * character. 111 */ 112 size_t utf16_strnlen(const u16 *src, size_t count); 113 114 /** 115 * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion 116 * to utf-8 117 * 118 * @src: utf-16 string 119 * @count: maximum number of code points to convert 120 * Return: length in bytes after conversion to utf-8 without the 121 * trailing \0. If an invalid UTF-16 sequence is hit one 122 * byte will be reserved for a replacement character. 123 */ 124 size_t utf16_utf8_strnlen(const u16 *src, size_t count); 125 126 /** 127 * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8 128 * 129 * @src: utf-16 string 130 * Return: length in bytes after conversion to utf-8 without the 131 * trailing \0. If an invalid UTF-16 sequence is hit one 132 * byte will be reserved for a replacement character. 133 */ 134 #define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX) 135 136 /** 137 * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string 138 * 139 * @dst: destination buffer 140 * @src: source buffer 141 * @count: maximum number of code points to copy 142 * Return: -1 if the input parameters are invalid 143 */ 144 int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count); 145 146 /** 147 * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string 148 * 149 * @dst: destination buffer 150 * @src: source buffer 151 * Return: -1 if the input parameters are invalid 152 */ 153 #define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX) 154 155 /** 156 * utf_to_lower() - convert a Unicode letter to lower case 157 * 158 * @code: letter to convert 159 * Return: lower case letter or unchanged letter 160 */ 161 s32 utf_to_lower(const s32 code); 162 163 /** 164 * utf_to_upper() - convert a Unicode letter to upper case 165 * 166 * @code: letter to convert 167 * Return: upper case letter or unchanged letter 168 */ 169 s32 utf_to_upper(const s32 code); 170 171 /* 172 * u16_strncmp() - compare two u16 string 173 * 174 * @s1: first string to compare 175 * @s2: second string to compare 176 * @n: maximum number of u16 to compare 177 * Return: 0 if the first n u16 are the same in s1 and s2 178 * < 0 if the first different u16 in s1 is less than the 179 * corresponding u16 in s2 180 * > 0 if the first different u16 in s1 is greater than the 181 * corresponding u16 in s2 182 */ 183 int u16_strncmp(const u16 *s1, const u16 *s2, size_t n); 184 #define u16_strcmp(s1, s2) u16_strncmp((s1), (s2), SIZE_MAX) 185 186 /** 187 * u16_strlen - count non-zero words 188 * 189 * This function matches wsclen() if the -fshort-wchar compiler flag is set. 190 * In the EFI context we explicitly need a function handling u16 strings. 191 * 192 * @in: null terminated u16 string 193 * ReturnValue: number of non-zero words. 194 * This is not the number of utf-16 letters! 195 */ 196 size_t u16_strlen(const void *in); 197 198 /** 199 * u16_strlen - count non-zero words 200 * 201 * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set. 202 * In the EFI context we explicitly need a function handling u16 strings. 203 * 204 * @in: null terminated u16 string 205 * @count: maximum number of words to count 206 * ReturnValue: number of non-zero words. 207 * This is not the number of utf-16 letters! 208 */ 209 size_t u16_strnlen(const u16 *in, size_t count); 210 211 /** 212 * u16_strcpy() - copy u16 string 213 * 214 * Copy u16 string pointed to by src, including terminating null word, to 215 * the buffer pointed to by dest. 216 * 217 * @dest: destination buffer 218 * @src: source buffer (null terminated) 219 * Return: 'dest' address 220 */ 221 u16 *u16_strcpy(u16 *dest, const u16 *src); 222 223 /** 224 * u16_strdup() - duplicate u16 string 225 * 226 * Copy u16 string pointed to by src, including terminating null word, to a 227 * newly allocated buffer. 228 * 229 * @src: source buffer (null terminated) 230 * Return: allocated new buffer on success, NULL on failure 231 */ 232 u16 *u16_strdup(const void *src); 233 234 /** 235 * utf16_to_utf8() - Convert an utf16 string to utf8 236 * 237 * Converts 'size' characters of the utf16 string 'src' to utf8 238 * written to the 'dest' buffer. 239 * 240 * NOTE that a single utf16 character can generate up to 3 utf8 241 * characters. See MAX_UTF8_PER_UTF16. 242 * 243 * @dest the destination buffer to write the utf8 characters 244 * @src the source utf16 string 245 * @size the number of utf16 characters to convert 246 * @return the pointer to the first unwritten byte in 'dest' 247 */ 248 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); 249 250 #endif /* __CHARSET_H_ */ 251