1 /******************************************************************************
2 ** This file is an amalgamation of many separate C source files from SQLite
3 ** version 3.40.1. By combining all the individual C code files into this
4 ** single large file, the entire code can be compiled as a single translation
5 ** unit. This allows many compilers to do optimizations that would not be
6 ** possible if the files were compiled separately. Performance improvements
7 ** of 5% or more are commonly seen when SQLite is compiled as a single
8 ** translation unit.
9 **
10 ** This file is all you need to compile SQLite. To use SQLite in other
11 ** programs, you need this file and the "sqlite3.h" header file that defines
12 ** the programming interface to the SQLite library. (If you do not have
13 ** the "sqlite3.h" header file at hand, you will find a copy embedded within
14 ** the text of this file. Search for "Begin file sqlite3.h" to find the start
15 ** of the embedded sqlite3.h header file.) Additional code files may be needed
16 ** if you want a wrapper to interface SQLite with your choice of programming
17 ** language. The code for the "sqlite3" command-line shell is also in a
18 ** separate file. This file contains only code for the core SQLite library.
19 */
20 /*
21 ** 2019.09.02-Complete codec logic for encryption and decryption.
22 ** Huawei Technologies Co, Ltd.
23 */
24 /************** Begin file icu.c *********************************************/
25 /*
26 ** 2007 May 6
27 **
28 ** The author disclaims copyright to this source code. In place of
29 ** a legal notice, here is a blessing:
30 **
31 ** May you do good and not evil.
32 ** May you find forgiveness for yourself and forgive others.
33 ** May you share freely, never taking more than you give.
34 **
35 *************************************************************************
36 ** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $
37 **
38 ** This file implements an integration between the ICU library
39 ** ("International Components for Unicode", an open-source library
40 ** for handling unicode data) and SQLite. The integration uses
41 ** ICU to provide the following to SQLite:
42 **
43 ** * An implementation of the SQL regexp() function (and hence REGEXP
44 ** operator) using the ICU uregex_XX() APIs.
45 **
46 ** * Implementations of the SQL scalar upper() and lower() functions
47 ** for case mapping.
48 **
49 ** * Integration of ICU and SQLite collation sequences.
50 **
51 ** * An implementation of the LIKE operator that uses ICU to
52 ** provide case-independent matching.
53 */
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <assert.h>
58 #include <stddef.h>
59
60 #include "sqlite3icu.h"
61 #include "sqlite3.h"
62
63 #ifdef HARMONY_OS
64 #include "common/unicode/putil.h"
65 #endif
66
67 #if !defined(SQLITE_CORE) \
68 || defined(SQLITE_ENABLE_ICU) \
69 || defined(SQLITE_ENABLE_ICU_COLLATIONS)
70
71 /* Include ICU headers */
72 #include <unicode/utypes.h>
73 #include <unicode/uregex.h>
74 #include <unicode/ustring.h>
75 #include <unicode/ucol.h>
76
77 #if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION)
78 /* This case when the file really is being compiled as a loadable
79 ** extension */
80 # define SQLITE_EXTENSION_INIT1 const sqlite3_api_routines *sqlite3_api=0;
81 # define SQLITE_EXTENSION_INIT2(v) sqlite3_api=v;
82 # define SQLITE_EXTENSION_INIT3 \
83 extern const sqlite3_api_routines *sqlite3_api;
84 #else
85 /* This case when the file is being statically linked into the
86 ** application */
87 # define SQLITE_EXTENSION_INIT1 /*no-op*/
88 # define SQLITE_EXTENSION_INIT2(v) (void)v; /* unused parameter */
89 # define SQLITE_EXTENSION_INIT3 /*no-op*/
90 #endif
91
92 /* #include <assert.h> */
93
94 #ifndef SQLITE_CORE
95 /* #include "sqlite3ext.h" */
96 SQLITE_EXTENSION_INIT1
97 #else
98 /* #include "sqlite3.h" */
99 #endif
100
101 // hw export the symbols
102 #ifdef SQLITE_EXPORT_SYMBOLS
103 #if defined(__GNUC__)
104 # define EXPORT_SYMBOLS __attribute__ ((visibility ("default")))
105 #elif defined(_MSC_VER)
106 # define EXPORT_SYMBOLS __declspec(dllexport)
107 #else
108 # define EXPORT_SYMBOLS
109 #endif
110 #endif
111
112 EXPORT_SYMBOLS SQLITE_API int sqlite3IcuInit(sqlite3 *db);
113 #ifdef SQLITE_ENABLE_ICU
114 EXPORT_SYMBOLS SQLITE_API void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
115 #endif
116 /*
117 ** This function is called when an ICU function called from within
118 ** the implementation of an SQL scalar function returns an error.
119 **
120 ** The scalar function context passed as the first argument is
121 ** loaded with an error message based on the following two args.
122 */
icuFunctionError(sqlite3_context * pCtx,const char * zName,UErrorCode e)123 static void icuFunctionError(
124 sqlite3_context *pCtx, /* SQLite scalar function context */
125 const char *zName, /* Name of ICU function that failed */
126 UErrorCode e /* Error code returned by ICU function */
127 ){
128 char zBuf[128];
129 sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
130 zBuf[127] = '\0';
131 sqlite3_result_error(pCtx, zBuf, -1);
132 }
133
134 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
135
136 /*
137 ** Maximum length (in bytes) of the pattern in a LIKE or GLOB
138 ** operator.
139 */
140 #ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
141 # define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
142 #endif
143
144 /*
145 ** Version of sqlite3_free() that is always a function, never a macro.
146 */
xFree(void * p)147 static void xFree(void *p){
148 sqlite3_free(p);
149 }
150
151 /*
152 ** This lookup table is used to help decode the first byte of
153 ** a multi-byte UTF8 character. It is copied here from SQLite source
154 ** code file utf8.c.
155 */
156 static const unsigned char icuUtf8Trans1[] = {
157 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
158 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
159 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
160 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
161 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
162 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
163 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
164 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
165 };
166
167 #define SQLITE_ICU_READ_UTF8(zIn, c) \
168 c = *(zIn++); \
169 if( c>=0xc0 ){ \
170 c = icuUtf8Trans1[c-0xc0]; \
171 while( (*zIn & 0xc0)==0x80 ){ \
172 c = (c<<6) + (0x3f & *(zIn++)); \
173 } \
174 }
175
176 #define SQLITE_ICU_SKIP_UTF8(zIn) \
177 assert( *zIn ); \
178 if( *(zIn++)>=0xc0 ){ \
179 while( (*zIn & 0xc0)==0x80 ){zIn++;} \
180 }
181
182
183 /*
184 ** Compare two UTF-8 strings for equality where the first string is
185 ** a "LIKE" expression. Return true (1) if they are the same and
186 ** false (0) if they are different.
187 */
icuLikeCompare(const uint8_t * zPattern,const uint8_t * zString,const UChar32 uEsc)188 static int icuLikeCompare(
189 const uint8_t *zPattern, /* LIKE pattern */
190 const uint8_t *zString, /* The UTF-8 string to compare against */
191 const UChar32 uEsc /* The escape character */
192 ){
193 static const uint32_t MATCH_ONE = (uint32_t)'_';
194 static const uint32_t MATCH_ALL = (uint32_t)'%';
195
196 int prevEscape = 0; /* True if the previous character was uEsc */
197
198 while( 1 ){
199
200 /* Read (and consume) the next character from the input pattern. */
201 uint32_t uPattern;
202 SQLITE_ICU_READ_UTF8(zPattern, uPattern);
203 if( uPattern==0 ) break;
204
205 /* There are now 4 possibilities:
206 **
207 ** 1. uPattern is an unescaped match-all character "%",
208 ** 2. uPattern is an unescaped match-one character "_",
209 ** 3. uPattern is an unescaped escape character, or
210 ** 4. uPattern is to be handled as an ordinary character
211 */
212 if( uPattern==MATCH_ALL && !prevEscape && uPattern!=(uint32_t)uEsc ){
213 /* Case 1. */
214 uint8_t c;
215
216 /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
217 ** MATCH_ALL. For each MATCH_ONE, skip one character in the
218 ** test string.
219 */
220 while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){
221 if( c==MATCH_ONE ){
222 if( *zString==0 ) return 0;
223 SQLITE_ICU_SKIP_UTF8(zString);
224 }
225 zPattern++;
226 }
227
228 if( *zPattern==0 ) return 1;
229
230 while( *zString ){
231 if( icuLikeCompare(zPattern, zString, uEsc) ){
232 return 1;
233 }
234 SQLITE_ICU_SKIP_UTF8(zString);
235 }
236 return 0;
237
238 }else if( uPattern==MATCH_ONE && !prevEscape && uPattern!=(uint32_t)uEsc ){
239 /* Case 2. */
240 if( *zString==0 ) return 0;
241 SQLITE_ICU_SKIP_UTF8(zString);
242
243 }else if( uPattern==(uint32_t)uEsc && !prevEscape ){
244 /* Case 3. */
245 prevEscape = 1;
246
247 }else{
248 /* Case 4. */
249 uint32_t uString;
250 SQLITE_ICU_READ_UTF8(zString, uString);
251 uString = (uint32_t)u_foldCase((UChar32)uString, U_FOLD_CASE_DEFAULT);
252 uPattern = (uint32_t)u_foldCase((UChar32)uPattern, U_FOLD_CASE_DEFAULT);
253 if( uString!=uPattern ){
254 return 0;
255 }
256 prevEscape = 0;
257 }
258 }
259
260 return *zString==0;
261 }
262
263 /*
264 ** Implementation of the like() SQL function. This function implements
265 ** the build-in LIKE operator. The first argument to the function is the
266 ** pattern and the second argument is the string. So, the SQL statements:
267 **
268 ** A LIKE B
269 **
270 ** is implemented as like(B, A). If there is an escape character E,
271 **
272 ** A LIKE B ESCAPE E
273 **
274 ** is mapped to like(B, A, E).
275 */
icuLikeFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)276 static void icuLikeFunc(
277 sqlite3_context *context,
278 int argc,
279 sqlite3_value **argv
280 ){
281 const unsigned char *zA = sqlite3_value_text(argv[0]);
282 const unsigned char *zB = sqlite3_value_text(argv[1]);
283 UChar32 uEsc = 0;
284
285 /* Limit the length of the LIKE or GLOB pattern to avoid problems
286 ** of deep recursion and N*N behavior in patternCompare().
287 */
288 if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){
289 sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
290 return;
291 }
292
293
294 if( argc==3 ){
295 /* The escape character string must consist of a single UTF-8 character.
296 ** Otherwise, return an error.
297 */
298 int nE= sqlite3_value_bytes(argv[2]);
299 const unsigned char *zE = sqlite3_value_text(argv[2]);
300 int i = 0;
301 if( zE==0 ) return;
302 U8_NEXT(zE, i, nE, uEsc);
303 if( i!=nE){
304 sqlite3_result_error(context,
305 "ESCAPE expression must be a single character", -1);
306 return;
307 }
308 }
309
310 if( zA && zB ){
311 sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc));
312 }
313 }
314
315 /*
316 ** Function to delete compiled regexp objects. Registered as
317 ** a destructor function with sqlite3_set_auxdata().
318 */
icuRegexpDelete(void * p)319 static void icuRegexpDelete(void *p){
320 URegularExpression *pExpr = (URegularExpression *)p;
321 uregex_close(pExpr);
322 }
323
324 /*
325 ** Implementation of SQLite REGEXP operator. This scalar function takes
326 ** two arguments. The first is a regular expression pattern to compile
327 ** the second is a string to match against that pattern. If either
328 ** argument is an SQL NULL, then NULL Is returned. Otherwise, the result
329 ** is 1 if the string matches the pattern, or 0 otherwise.
330 **
331 ** SQLite maps the regexp() function to the regexp() operator such
332 ** that the following two are equivalent:
333 **
334 ** zString REGEXP zPattern
335 ** regexp(zPattern, zString)
336 **
337 ** Uses the following ICU regexp APIs:
338 **
339 ** uregex_open()
340 ** uregex_matches()
341 ** uregex_close()
342 */
icuRegexpFunc(sqlite3_context * p,int nArg,sqlite3_value ** apArg)343 static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
344 UErrorCode status = U_ZERO_ERROR;
345 URegularExpression *pExpr;
346 UBool res;
347 const UChar *zString = sqlite3_value_text16(apArg[1]);
348
349 (void)nArg; /* Unused parameter */
350
351 /* If the left hand side of the regexp operator is NULL,
352 ** then the result is also NULL.
353 */
354 if( !zString ){
355 return;
356 }
357
358 pExpr = sqlite3_get_auxdata(p, 0);
359 if( !pExpr ){
360 const UChar *zPattern = sqlite3_value_text16(apArg[0]);
361 if( !zPattern ){
362 return;
363 }
364 pExpr = uregex_open(zPattern, -1, 0, 0, &status);
365
366 if( U_SUCCESS(status) ){
367 sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
368 pExpr = sqlite3_get_auxdata(p, 0);
369 }
370 if( !pExpr ){
371 icuFunctionError(p, "uregex_open", status);
372 return;
373 }
374 }
375
376 /* Configure the text that the regular expression operates on. */
377 uregex_setText(pExpr, zString, -1, &status);
378 if( !U_SUCCESS(status) ){
379 icuFunctionError(p, "uregex_setText", status);
380 return;
381 }
382
383 /* Attempt the match */
384 res = uregex_matches(pExpr, 0, &status);
385 if( !U_SUCCESS(status) ){
386 icuFunctionError(p, "uregex_matches", status);
387 return;
388 }
389
390 /* Set the text that the regular expression operates on to a NULL
391 ** pointer. This is not really necessary, but it is tidier than
392 ** leaving the regular expression object configured with an invalid
393 ** pointer after this function returns.
394 */
395 uregex_setText(pExpr, 0, 0, &status);
396
397 /* Return 1 or 0. */
398 sqlite3_result_int(p, res ? 1 : 0);
399 }
400
401 /*
402 ** Implementations of scalar functions for case mapping - upper() and
403 ** lower(). Function upper() converts its input to upper-case (ABC).
404 ** Function lower() converts to lower-case (abc).
405 **
406 ** ICU provides two types of case mapping, "general" case mapping and
407 ** "language specific". Refer to ICU documentation for the differences
408 ** between the two.
409 **
410 ** To utilise "general" case mapping, the upper() or lower() scalar
411 ** functions are invoked with one argument:
412 **
413 ** upper('ABC') -> 'abc'
414 ** lower('abc') -> 'ABC'
415 **
416 ** To access ICU "language specific" case mapping, upper() or lower()
417 ** should be invoked with two arguments. The second argument is the name
418 ** of the locale to use. Passing an empty string ("") or SQL NULL value
419 ** as the second argument is the same as invoking the 1 argument version
420 ** of upper() or lower().
421 **
422 ** lower('I', 'en_us') -> 'i'
423 ** lower('I', 'tr_tr') -> '\u131' (small dotless i)
424 **
425 ** http://www.icu-project.org/userguide/posix.html#case_mappings
426 */
icuCaseFunc16(sqlite3_context * p,int nArg,sqlite3_value ** apArg)427 static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
428 const UChar *zInput; /* Pointer to input string */
429 UChar *zOutput = 0; /* Pointer to output buffer */
430 int nInput; /* Size of utf-16 input string in bytes */
431 int nOut; /* Size of output buffer in bytes */
432 int cnt;
433 int bToUpper; /* True for toupper(), false for tolower() */
434 UErrorCode status;
435 const char *zLocale = 0;
436
437 assert(nArg==1 || nArg==2);
438 bToUpper = (sqlite3_user_data(p)!=0);
439 if( nArg==2 ){
440 zLocale = (const char *)sqlite3_value_text(apArg[1]);
441 }
442
443 zInput = sqlite3_value_text16(apArg[0]);
444 if( !zInput ){
445 return;
446 }
447 nOut = nInput = sqlite3_value_bytes16(apArg[0]);
448 if( nOut==0 ){
449 sqlite3_result_text16(p, "", 0, SQLITE_STATIC);
450 return;
451 }
452
453 for(cnt=0; cnt<2; cnt++){
454 UChar *zNew = sqlite3_realloc(zOutput, nOut);
455 if( zNew==0 ){
456 sqlite3_free(zOutput);
457 sqlite3_result_error_nomem(p);
458 return;
459 }
460 zOutput = zNew;
461 status = U_ZERO_ERROR;
462 if( bToUpper ){
463 nOut = 2*u_strToUpper(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
464 }else{
465 nOut = 2*u_strToLower(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
466 }
467
468 if( U_SUCCESS(status) ){
469 sqlite3_result_text16(p, zOutput, nOut, xFree);
470 }else if( status==U_BUFFER_OVERFLOW_ERROR ){
471 assert( cnt==0 );
472 continue;
473 }else{
474 icuFunctionError(p, bToUpper ? "u_strToUpper" : "u_strToLower", status);
475 }
476 return;
477 }
478 assert( 0 ); /* Unreachable */
479 }
480
481 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
482
483 /*
484 ** Collation sequence destructor function. The pCtx argument points to
485 ** a UCollator structure previously allocated using ucol_open().
486 */
icuCollationDel(void * pCtx)487 static void icuCollationDel(void *pCtx){
488 UCollator *p = (UCollator *)pCtx;
489 ucol_close(p);
490 }
491
492 /*
493 ** Collation sequence comparison function. The pCtx argument points to
494 ** a UCollator structure previously allocated using ucol_open().
495 */
icuCollationColl(void * pCtx,int nLeft,const void * zLeft,int nRight,const void * zRight)496 static int icuCollationColl(
497 void *pCtx,
498 int nLeft,
499 const void *zLeft,
500 int nRight,
501 const void *zRight
502 ){
503 UCollationResult res;
504 UCollator *p = (UCollator *)pCtx;
505 res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
506 switch( res ){
507 case UCOL_LESS: return -1;
508 case UCOL_GREATER: return +1;
509 case UCOL_EQUAL: return 0;
510 }
511 assert(!"Unexpected return value from ucol_strcoll()");
512 return 0;
513 }
514
515 /*
516 ** Implementation of the scalar function icu_load_collation().
517 **
518 ** This scalar function is used to add ICU collation based collation
519 ** types to an SQLite database connection. It is intended to be called
520 ** as follows:
521 **
522 ** SELECT icu_load_collation(<locale>, <collation-name>);
523 **
524 ** Where <locale> is a string containing an ICU locale identifier (i.e.
525 ** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
526 ** collation sequence to create.
527 */
icuLoadCollation(sqlite3_context * p,int nArg,sqlite3_value ** apArg)528 static void icuLoadCollation(
529 sqlite3_context *p,
530 int nArg,
531 sqlite3_value **apArg
532 ){
533 sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
534 UErrorCode status = U_ZERO_ERROR;
535 const char *zLocale; /* Locale identifier - (eg. "jp_JP") */
536 const char *zName; /* SQL Collation sequence name (eg. "japanese") */
537 UCollator *pUCollator; /* ICU library collation object */
538 int rc; /* Return code from sqlite3_create_collation_x() */
539
540 assert(nArg==2);
541 (void)nArg; /* Unused parameter */
542 zLocale = (const char *)sqlite3_value_text(apArg[0]);
543 zName = (const char *)sqlite3_value_text(apArg[1]);
544
545 if( !zLocale || !zName ){
546 return;
547 }
548
549 pUCollator = ucol_open(zLocale, &status);
550 if( !U_SUCCESS(status) ){
551 icuFunctionError(p, "ucol_open", status);
552 return;
553 }
554 assert(p);
555
556 rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator,
557 icuCollationColl, icuCollationDel
558 );
559 if( rc!=SQLITE_OK ){
560 ucol_close(pUCollator);
561 sqlite3_result_error(p, "Error registering collation function", -1);
562 }
563 }
564
565 /*
566 ** Register the ICU extension functions with database db.
567 */
sqlite3IcuInit(sqlite3 * db)568 EXPORT_SYMBOLS SQLITE_API int sqlite3IcuInit(sqlite3 *db){
569 # define SQLITEICU_EXTRAFLAGS (SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS)
570 static const struct IcuScalar {
571 const char *zName; /* Function name */
572 unsigned char nArg; /* Number of arguments */
573 unsigned int enc; /* Optimal text encoding */
574 unsigned char iContext; /* sqlite3_user_data() context */
575 void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
576 } scalars[] = {
577 {"icu_load_collation",2,SQLITE_UTF8|SQLITE_DIRECTONLY,1, icuLoadCollation},
578 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
579 {"regexp", 2, SQLITE_ANY|SQLITEICU_EXTRAFLAGS, 0, icuRegexpFunc},
580 {"lower", 1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16},
581 {"lower", 2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16},
582 {"upper", 1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16},
583 {"upper", 2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16},
584 {"lower", 1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16},
585 {"lower", 2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16},
586 {"upper", 1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16},
587 {"upper", 2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16},
588 {"like", 2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuLikeFunc},
589 {"like", 3, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuLikeFunc},
590 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
591 };
592 #ifdef HARMONY_OS
593 extern void SetOhosIcuDirectory();
594 SetOhosIcuDirectory();
595 #endif
596 int rc = SQLITE_OK;
597 int i;
598
599 for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){
600 const struct IcuScalar *p = &scalars[i];
601 rc = sqlite3_create_function(
602 db, p->zName, p->nArg, p->enc,
603 p->iContext ? (void*)db : (void*)0,
604 p->xFunc, 0, 0
605 );
606 }
607
608 return rc;
609 }
610
611 #if !SQLITE_CORE
612 #ifdef _WIN32
613 __declspec(dllexport)
614 #endif
sqlite3_icu_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)615 SQLITE_API int sqlite3_icu_init(
616 sqlite3 *db,
617 char **pzErrMsg,
618 const sqlite3_api_routines *pApi
619 ){
620 SQLITE_EXTENSION_INIT2(pApi)
621 return sqlite3IcuInit(db);
622 }
623 #endif
624
625 #endif
626
627 /************** End of icu.c *************************************************/
628 /************** Begin file fts3_icu.c ****************************************/
629 /*
630 ** 2007 June 22
631 **
632 ** The author disclaims copyright to this source code. In place of
633 ** a legal notice, here is a blessing:
634 **
635 ** May you do good and not evil.
636 ** May you find forgiveness for yourself and forgive others.
637 ** May you share freely, never taking more than you give.
638 **
639 *************************************************************************
640 ** This file implements a tokenizer for fts3 based on the ICU library.
641 */
642 /* #include "fts3Int.h" */
643 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
644 #ifdef SQLITE_ENABLE_ICU
645
646 /* #include <assert.h> */
647 /* #include <string.h> */
648 /* #include "fts3_tokenizer.h" */
649
650 #include <unicode/ubrk.h>
651 /* #include <unicode/ucol.h> */
652 /* #include <unicode/ustring.h> */
653 #include <unicode/utf16.h>
654
655 typedef struct IcuTokenizer IcuTokenizer;
656 typedef struct IcuCursor IcuCursor;
657
658 struct IcuTokenizer {
659 sqlite3_tokenizer base;
660 char *zLocale;
661 };
662
663 struct IcuCursor {
664 sqlite3_tokenizer_cursor base;
665
666 UBreakIterator *pIter; /* ICU break-iterator object */
667 int nChar; /* Number of UChar elements in pInput */
668 UChar *aChar; /* Copy of input using utf-16 encoding */
669 int *aOffset; /* Offsets of each character in utf-8 input */
670
671 int nBuffer;
672 char *zBuffer;
673
674 int iToken;
675 };
676
677 /*
678 ** Create a new tokenizer instance.
679 */
icuCreate(int argc,const char * const * argv,sqlite3_tokenizer ** ppTokenizer)680 static int icuCreate(
681 int argc, /* Number of entries in argv[] */
682 const char * const *argv, /* Tokenizer creation arguments */
683 sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
684 ){
685 IcuTokenizer *p;
686 int n = 0;
687
688 if( argc>0 ){
689 n = strlen(argv[0])+1;
690 }
691 p = (IcuTokenizer *)sqlite3_malloc64(sizeof(IcuTokenizer)+n);
692 if( !p ){
693 return SQLITE_NOMEM;
694 }
695 memset(p, 0, sizeof(IcuTokenizer));
696
697 if( n ){
698 p->zLocale = (char *)&p[1];
699 memcpy(p->zLocale, argv[0], n);
700 }
701
702 *ppTokenizer = (sqlite3_tokenizer *)p;
703
704 return SQLITE_OK;
705 }
706
707 /*
708 ** Destroy a tokenizer
709 */
icuDestroy(sqlite3_tokenizer * pTokenizer)710 static int icuDestroy(sqlite3_tokenizer *pTokenizer){
711 IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
712 sqlite3_free(p);
713 return SQLITE_OK;
714 }
715
716 /*
717 ** Prepare to begin tokenizing a particular string. The input
718 ** string to be tokenized is pInput[0..nBytes-1]. A cursor
719 ** used to incrementally tokenize this string is returned in
720 ** *ppCursor.
721 */
icuOpen(sqlite3_tokenizer * pTokenizer,const char * zInput,int nInput,sqlite3_tokenizer_cursor ** ppCursor)722 static int icuOpen(
723 sqlite3_tokenizer *pTokenizer, /* The tokenizer */
724 const char *zInput, /* Input string */
725 int nInput, /* Length of zInput in bytes */
726 sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
727 ){
728 IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
729 IcuCursor *pCsr;
730
731 const int32_t opt = U_FOLD_CASE_DEFAULT;
732 UErrorCode status = U_ZERO_ERROR;
733 int nChar;
734
735 UChar32 c;
736 int iInput = 0;
737 int iOut = 0;
738
739 *ppCursor = 0;
740
741 if( zInput==0 ){
742 nInput = 0;
743 zInput = "";
744 }else if( nInput<0 ){
745 nInput = strlen(zInput);
746 }
747 nChar = nInput+1;
748 pCsr = (IcuCursor *)sqlite3_malloc64(
749 sizeof(IcuCursor) + /* IcuCursor */
750 ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
751 (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
752 );
753 if( !pCsr ){
754 return SQLITE_NOMEM;
755 }
756 memset(pCsr, 0, sizeof(IcuCursor));
757 pCsr->aChar = (UChar *)&pCsr[1];
758 pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
759
760 pCsr->aOffset[iOut] = iInput;
761 U8_NEXT(zInput, iInput, nInput, c);
762 while( c>0 ){
763 int isError = 0;
764 c = u_foldCase(c, opt);
765 U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
766 if( isError ){
767 sqlite3_free(pCsr);
768 return SQLITE_ERROR;
769 }
770 pCsr->aOffset[iOut] = iInput;
771
772 if( iInput<nInput ){
773 U8_NEXT(zInput, iInput, nInput, c);
774 }else{
775 c = 0;
776 }
777 }
778
779 pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
780 if( !U_SUCCESS(status) ){
781 sqlite3_free(pCsr);
782 return SQLITE_ERROR;
783 }
784 pCsr->nChar = iOut;
785
786 ubrk_first(pCsr->pIter);
787 *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
788 return SQLITE_OK;
789 }
790
791 /*
792 ** Close a tokenization cursor previously opened by a call to icuOpen().
793 */
icuClose(sqlite3_tokenizer_cursor * pCursor)794 static int icuClose(sqlite3_tokenizer_cursor *pCursor){
795 IcuCursor *pCsr = (IcuCursor *)pCursor;
796 ubrk_close(pCsr->pIter);
797 sqlite3_free(pCsr->zBuffer);
798 sqlite3_free(pCsr);
799 return SQLITE_OK;
800 }
801
802 /*
803 ** Extract the next token from a tokenization cursor.
804 */
icuNext(sqlite3_tokenizer_cursor * pCursor,const char ** ppToken,int * pnBytes,int * piStartOffset,int * piEndOffset,int * piPosition)805 static int icuNext(
806 sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
807 const char **ppToken, /* OUT: *ppToken is the token text */
808 int *pnBytes, /* OUT: Number of bytes in token */
809 int *piStartOffset, /* OUT: Starting offset of token */
810 int *piEndOffset, /* OUT: Ending offset of token */
811 int *piPosition /* OUT: Position integer of token */
812 ){
813 IcuCursor *pCsr = (IcuCursor *)pCursor;
814
815 int iStart = 0;
816 int iEnd = 0;
817 int nByte = 0;
818
819 while( iStart==iEnd ){
820 UChar32 c;
821
822 iStart = ubrk_current(pCsr->pIter);
823 iEnd = ubrk_next(pCsr->pIter);
824 if( iEnd==UBRK_DONE ){
825 return SQLITE_DONE;
826 }
827
828 while( iStart<iEnd ){
829 int iWhite = iStart;
830 U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
831 if( u_isspace(c) ){
832 iStart = iWhite;
833 }else{
834 break;
835 }
836 }
837 assert(iStart<=iEnd);
838 }
839
840 do {
841 UErrorCode status = U_ZERO_ERROR;
842 if( nByte ){
843 char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
844 if( !zNew ){
845 return SQLITE_NOMEM;
846 }
847 pCsr->zBuffer = zNew;
848 pCsr->nBuffer = nByte;
849 }
850
851 u_strToUTF8(
852 pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
853 &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
854 &status /* Output success/failure */
855 );
856 } while( nByte>pCsr->nBuffer );
857
858 *ppToken = pCsr->zBuffer;
859 *pnBytes = nByte;
860 *piStartOffset = pCsr->aOffset[iStart];
861 *piEndOffset = pCsr->aOffset[iEnd];
862 *piPosition = pCsr->iToken++;
863
864 return SQLITE_OK;
865 }
866
867 /*
868 ** The set of routines that implement the simple tokenizer
869 */
870 static const sqlite3_tokenizer_module icuTokenizerModule = {
871 0, /* iVersion */
872 icuCreate, /* xCreate */
873 icuDestroy, /* xCreate */
874 icuOpen, /* xOpen */
875 icuClose, /* xClose */
876 icuNext, /* xNext */
877 0, /* xLanguageid */
878 };
879
880 /*
881 ** Set *ppModule to point at the implementation of the ICU tokenizer.
882 */
sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const ** ppModule)883 EXPORT_SYMBOLS SQLITE_API void sqlite3Fts3IcuTokenizerModule(
884 sqlite3_tokenizer_module const**ppModule
885 ){
886 *ppModule = &icuTokenizerModule;
887 }
888
889 #endif /* defined(SQLITE_ENABLE_ICU) */
890 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
891
892 /************** End of fts3_icu.c ********************************************/