• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2 ** This file is an amalgamation of many separate C source files from SQLite
3 ** version 3.40.1.  By combining all the individual C code files into this
4 ** single large file, the entire code can be compiled as a single translation
5 ** unit.  This allows many compilers to do optimizations that would not be
6 ** possible if the files were compiled separately.  Performance improvements
7 ** of 5% or more are commonly seen when SQLite is compiled as a single
8 ** translation unit.
9 **
10 ** This file is all you need to compile SQLite.  To use SQLite in other
11 ** programs, you need this file and the "sqlite3.h" header file that defines
12 ** the programming interface to the SQLite library.  (If you do not have
13 ** the "sqlite3.h" header file at hand, you will find a copy embedded within
14 ** the text of this file.  Search for "Begin file sqlite3.h" to find the start
15 ** of the embedded sqlite3.h header file.) Additional code files may be needed
16 ** if you want a wrapper to interface SQLite with your choice of programming
17 ** language. The code for the "sqlite3" command-line shell is also in a
18 ** separate file. This file contains only code for the core SQLite library.
19 */
20 /*
21 ** 2019.09.02-Complete codec logic for encryption and decryption.
22 **            Huawei Technologies Co, Ltd.
23 */
24 /************** Begin file icu.c *********************************************/
25 /*
26 ** 2007 May 6
27 **
28 ** The author disclaims copyright to this source code.  In place of
29 ** a legal notice, here is a blessing:
30 **
31 **    May you do good and not evil.
32 **    May you find forgiveness for yourself and forgive others.
33 **    May you share freely, never taking more than you give.
34 **
35 *************************************************************************
36 ** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $
37 **
38 ** This file implements an integration between the ICU library
39 ** ("International Components for Unicode", an open-source library
40 ** for handling unicode data) and SQLite. The integration uses
41 ** ICU to provide the following to SQLite:
42 **
43 **   * An implementation of the SQL regexp() function (and hence REGEXP
44 **     operator) using the ICU uregex_XX() APIs.
45 **
46 **   * Implementations of the SQL scalar upper() and lower() functions
47 **     for case mapping.
48 **
49 **   * Integration of ICU and SQLite collation sequences.
50 **
51 **   * An implementation of the LIKE operator that uses ICU to
52 **     provide case-independent matching.
53 */
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <assert.h>
58 #include <stddef.h>
59 
60 #include "sqlite3icu.h"
61 #include "sqlite3.h"
62 
63 #ifdef HARMONY_OS
64 #include "common/unicode/putil.h"
65 #endif
66 
67 #if !defined(SQLITE_CORE)                  \
68  || defined(SQLITE_ENABLE_ICU)             \
69  || defined(SQLITE_ENABLE_ICU_COLLATIONS)
70 
71 /* Include ICU headers */
72 #include <unicode/utypes.h>
73 #include <unicode/uregex.h>
74 #include <unicode/ustring.h>
75 #include <unicode/ucol.h>
76 
77 #if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION)
78   /* This case when the file really is being compiled as a loadable
79   ** extension */
80 # define SQLITE_EXTENSION_INIT1     const sqlite3_api_routines *sqlite3_api=0;
81 # define SQLITE_EXTENSION_INIT2(v)  sqlite3_api=v;
82 # define SQLITE_EXTENSION_INIT3     \
83     extern const sqlite3_api_routines *sqlite3_api;
84 #else
85   /* This case when the file is being statically linked into the
86   ** application */
87 # define SQLITE_EXTENSION_INIT1     /*no-op*/
88 # define SQLITE_EXTENSION_INIT2(v)  (void)v; /* unused parameter */
89 # define SQLITE_EXTENSION_INIT3     /*no-op*/
90 #endif
91 
92 /* #include <assert.h> */
93 
94 #ifndef SQLITE_CORE
95 /*   #include "sqlite3ext.h" */
96   SQLITE_EXTENSION_INIT1
97 #else
98 /*   #include "sqlite3.h" */
99 #endif
100 
101 // hw export the symbols
102 #ifdef SQLITE_EXPORT_SYMBOLS
103 #if defined(__GNUC__)
104 #  define EXPORT_SYMBOLS  __attribute__ ((visibility ("default")))
105 #elif defined(_MSC_VER)
106 #  define EXPORT_SYMBOLS  __declspec(dllexport)
107 #else
108 #  define EXPORT_SYMBOLS
109 #endif
110 #endif
111 
112 EXPORT_SYMBOLS SQLITE_API int sqlite3IcuInit(sqlite3 *db);
113 #ifdef SQLITE_ENABLE_ICU
114 EXPORT_SYMBOLS SQLITE_API void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
115 #endif
116 /*
117 ** This function is called when an ICU function called from within
118 ** the implementation of an SQL scalar function returns an error.
119 **
120 ** The scalar function context passed as the first argument is
121 ** loaded with an error message based on the following two args.
122 */
icuFunctionError(sqlite3_context * pCtx,const char * zName,UErrorCode e)123 static void icuFunctionError(
124   sqlite3_context *pCtx,       /* SQLite scalar function context */
125   const char *zName,           /* Name of ICU function that failed */
126   UErrorCode e                 /* Error code returned by ICU function */
127 ){
128   char zBuf[128];
129   sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
130   zBuf[127] = '\0';
131   sqlite3_result_error(pCtx, zBuf, -1);
132 }
133 
134 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
135 
136 /*
137 ** Maximum length (in bytes) of the pattern in a LIKE or GLOB
138 ** operator.
139 */
140 #ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
141 # define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
142 #endif
143 
144 /*
145 ** Version of sqlite3_free() that is always a function, never a macro.
146 */
xFree(void * p)147 static void xFree(void *p){
148   sqlite3_free(p);
149 }
150 
151 /*
152 ** This lookup table is used to help decode the first byte of
153 ** a multi-byte UTF8 character. It is copied here from SQLite source
154 ** code file utf8.c.
155 */
156 static const unsigned char icuUtf8Trans1[] = {
157   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
158   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
159   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
160   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
161   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
162   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
163   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
164   0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
165 };
166 
167 #define SQLITE_ICU_READ_UTF8(zIn, c)                       \
168   c = *(zIn++);                                            \
169   if( c>=0xc0 ){                                           \
170     c = icuUtf8Trans1[c-0xc0];                             \
171     while( (*zIn & 0xc0)==0x80 ){                          \
172       c = (c<<6) + (0x3f & *(zIn++));                      \
173     }                                                      \
174   }
175 
176 #define SQLITE_ICU_SKIP_UTF8(zIn)                          \
177   assert( *zIn );                                          \
178   if( *(zIn++)>=0xc0 ){                                    \
179     while( (*zIn & 0xc0)==0x80 ){zIn++;}                   \
180   }
181 
182 
183 /*
184 ** Compare two UTF-8 strings for equality where the first string is
185 ** a "LIKE" expression. Return true (1) if they are the same and
186 ** false (0) if they are different.
187 */
icuLikeCompare(const uint8_t * zPattern,const uint8_t * zString,const UChar32 uEsc)188 static int icuLikeCompare(
189   const uint8_t *zPattern,   /* LIKE pattern */
190   const uint8_t *zString,    /* The UTF-8 string to compare against */
191   const UChar32 uEsc         /* The escape character */
192 ){
193   static const uint32_t MATCH_ONE = (uint32_t)'_';
194   static const uint32_t MATCH_ALL = (uint32_t)'%';
195 
196   int prevEscape = 0;     /* True if the previous character was uEsc */
197 
198   while( 1 ){
199 
200     /* Read (and consume) the next character from the input pattern. */
201     uint32_t uPattern;
202     SQLITE_ICU_READ_UTF8(zPattern, uPattern);
203     if( uPattern==0 ) break;
204 
205     /* There are now 4 possibilities:
206     **
207     **     1. uPattern is an unescaped match-all character "%",
208     **     2. uPattern is an unescaped match-one character "_",
209     **     3. uPattern is an unescaped escape character, or
210     **     4. uPattern is to be handled as an ordinary character
211     */
212     if( uPattern==MATCH_ALL && !prevEscape && uPattern!=(uint32_t)uEsc ){
213       /* Case 1. */
214       uint8_t c;
215 
216       /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
217       ** MATCH_ALL. For each MATCH_ONE, skip one character in the
218       ** test string.
219       */
220       while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){
221         if( c==MATCH_ONE ){
222           if( *zString==0 ) return 0;
223           SQLITE_ICU_SKIP_UTF8(zString);
224         }
225         zPattern++;
226       }
227 
228       if( *zPattern==0 ) return 1;
229 
230       while( *zString ){
231         if( icuLikeCompare(zPattern, zString, uEsc) ){
232           return 1;
233         }
234         SQLITE_ICU_SKIP_UTF8(zString);
235       }
236       return 0;
237 
238     }else if( uPattern==MATCH_ONE && !prevEscape && uPattern!=(uint32_t)uEsc ){
239       /* Case 2. */
240       if( *zString==0 ) return 0;
241       SQLITE_ICU_SKIP_UTF8(zString);
242 
243     }else if( uPattern==(uint32_t)uEsc && !prevEscape ){
244       /* Case 3. */
245       prevEscape = 1;
246 
247     }else{
248       /* Case 4. */
249       uint32_t uString;
250       SQLITE_ICU_READ_UTF8(zString, uString);
251       uString = (uint32_t)u_foldCase((UChar32)uString, U_FOLD_CASE_DEFAULT);
252       uPattern = (uint32_t)u_foldCase((UChar32)uPattern, U_FOLD_CASE_DEFAULT);
253       if( uString!=uPattern ){
254         return 0;
255       }
256       prevEscape = 0;
257     }
258   }
259 
260   return *zString==0;
261 }
262 
263 /*
264 ** Implementation of the like() SQL function.  This function implements
265 ** the build-in LIKE operator.  The first argument to the function is the
266 ** pattern and the second argument is the string.  So, the SQL statements:
267 **
268 **       A LIKE B
269 **
270 ** is implemented as like(B, A). If there is an escape character E,
271 **
272 **       A LIKE B ESCAPE E
273 **
274 ** is mapped to like(B, A, E).
275 */
icuLikeFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)276 static void icuLikeFunc(
277   sqlite3_context *context,
278   int argc,
279   sqlite3_value **argv
280 ){
281   const unsigned char *zA = sqlite3_value_text(argv[0]);
282   const unsigned char *zB = sqlite3_value_text(argv[1]);
283   UChar32 uEsc = 0;
284 
285   /* Limit the length of the LIKE or GLOB pattern to avoid problems
286   ** of deep recursion and N*N behavior in patternCompare().
287   */
288   if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){
289     sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
290     return;
291   }
292 
293 
294   if( argc==3 ){
295     /* The escape character string must consist of a single UTF-8 character.
296     ** Otherwise, return an error.
297     */
298     int nE= sqlite3_value_bytes(argv[2]);
299     const unsigned char *zE = sqlite3_value_text(argv[2]);
300     int i = 0;
301     if( zE==0 ) return;
302     U8_NEXT(zE, i, nE, uEsc);
303     if( i!=nE){
304       sqlite3_result_error(context,
305           "ESCAPE expression must be a single character", -1);
306       return;
307     }
308   }
309 
310   if( zA && zB ){
311     sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc));
312   }
313 }
314 
315 /*
316 ** Function to delete compiled regexp objects. Registered as
317 ** a destructor function with sqlite3_set_auxdata().
318 */
icuRegexpDelete(void * p)319 static void icuRegexpDelete(void *p){
320   URegularExpression *pExpr = (URegularExpression *)p;
321   uregex_close(pExpr);
322 }
323 
324 /*
325 ** Implementation of SQLite REGEXP operator. This scalar function takes
326 ** two arguments. The first is a regular expression pattern to compile
327 ** the second is a string to match against that pattern. If either
328 ** argument is an SQL NULL, then NULL Is returned. Otherwise, the result
329 ** is 1 if the string matches the pattern, or 0 otherwise.
330 **
331 ** SQLite maps the regexp() function to the regexp() operator such
332 ** that the following two are equivalent:
333 **
334 **     zString REGEXP zPattern
335 **     regexp(zPattern, zString)
336 **
337 ** Uses the following ICU regexp APIs:
338 **
339 **     uregex_open()
340 **     uregex_matches()
341 **     uregex_close()
342 */
icuRegexpFunc(sqlite3_context * p,int nArg,sqlite3_value ** apArg)343 static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
344   UErrorCode status = U_ZERO_ERROR;
345   URegularExpression *pExpr;
346   UBool res;
347   const UChar *zString = sqlite3_value_text16(apArg[1]);
348 
349   (void)nArg;  /* Unused parameter */
350 
351   /* If the left hand side of the regexp operator is NULL,
352   ** then the result is also NULL.
353   */
354   if( !zString ){
355     return;
356   }
357 
358   pExpr = sqlite3_get_auxdata(p, 0);
359   if( !pExpr ){
360     const UChar *zPattern = sqlite3_value_text16(apArg[0]);
361     if( !zPattern ){
362       return;
363     }
364     pExpr = uregex_open(zPattern, -1, 0, 0, &status);
365 
366     if( U_SUCCESS(status) ){
367       sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
368       pExpr = sqlite3_get_auxdata(p, 0);
369     }
370     if( !pExpr ){
371       icuFunctionError(p, "uregex_open", status);
372       return;
373     }
374   }
375 
376   /* Configure the text that the regular expression operates on. */
377   uregex_setText(pExpr, zString, -1, &status);
378   if( !U_SUCCESS(status) ){
379     icuFunctionError(p, "uregex_setText", status);
380     return;
381   }
382 
383   /* Attempt the match */
384   res = uregex_matches(pExpr, 0, &status);
385   if( !U_SUCCESS(status) ){
386     icuFunctionError(p, "uregex_matches", status);
387     return;
388   }
389 
390   /* Set the text that the regular expression operates on to a NULL
391   ** pointer. This is not really necessary, but it is tidier than
392   ** leaving the regular expression object configured with an invalid
393   ** pointer after this function returns.
394   */
395   uregex_setText(pExpr, 0, 0, &status);
396 
397   /* Return 1 or 0. */
398   sqlite3_result_int(p, res ? 1 : 0);
399 }
400 
401 /*
402 ** Implementations of scalar functions for case mapping - upper() and
403 ** lower(). Function upper() converts its input to upper-case (ABC).
404 ** Function lower() converts to lower-case (abc).
405 **
406 ** ICU provides two types of case mapping, "general" case mapping and
407 ** "language specific". Refer to ICU documentation for the differences
408 ** between the two.
409 **
410 ** To utilise "general" case mapping, the upper() or lower() scalar
411 ** functions are invoked with one argument:
412 **
413 **     upper('ABC') -> 'abc'
414 **     lower('abc') -> 'ABC'
415 **
416 ** To access ICU "language specific" case mapping, upper() or lower()
417 ** should be invoked with two arguments. The second argument is the name
418 ** of the locale to use. Passing an empty string ("") or SQL NULL value
419 ** as the second argument is the same as invoking the 1 argument version
420 ** of upper() or lower().
421 **
422 **     lower('I', 'en_us') -> 'i'
423 **     lower('I', 'tr_tr') -> '\u131' (small dotless i)
424 **
425 ** http://www.icu-project.org/userguide/posix.html#case_mappings
426 */
icuCaseFunc16(sqlite3_context * p,int nArg,sqlite3_value ** apArg)427 static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
428   const UChar *zInput;            /* Pointer to input string */
429   UChar *zOutput = 0;             /* Pointer to output buffer */
430   int nInput;                     /* Size of utf-16 input string in bytes */
431   int nOut;                       /* Size of output buffer in bytes */
432   int cnt;
433   int bToUpper;                   /* True for toupper(), false for tolower() */
434   UErrorCode status;
435   const char *zLocale = 0;
436 
437   assert(nArg==1 || nArg==2);
438   bToUpper = (sqlite3_user_data(p)!=0);
439   if( nArg==2 ){
440     zLocale = (const char *)sqlite3_value_text(apArg[1]);
441   }
442 
443   zInput = sqlite3_value_text16(apArg[0]);
444   if( !zInput ){
445     return;
446   }
447   nOut = nInput = sqlite3_value_bytes16(apArg[0]);
448   if( nOut==0 ){
449     sqlite3_result_text16(p, "", 0, SQLITE_STATIC);
450     return;
451   }
452 
453   for(cnt=0; cnt<2; cnt++){
454     UChar *zNew = sqlite3_realloc(zOutput, nOut);
455     if( zNew==0 ){
456       sqlite3_free(zOutput);
457       sqlite3_result_error_nomem(p);
458       return;
459     }
460     zOutput = zNew;
461     status = U_ZERO_ERROR;
462     if( bToUpper ){
463       nOut = 2*u_strToUpper(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
464     }else{
465       nOut = 2*u_strToLower(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
466     }
467 
468     if( U_SUCCESS(status) ){
469       sqlite3_result_text16(p, zOutput, nOut, xFree);
470     }else if( status==U_BUFFER_OVERFLOW_ERROR ){
471       assert( cnt==0 );
472       continue;
473     }else{
474       icuFunctionError(p, bToUpper ? "u_strToUpper" : "u_strToLower", status);
475     }
476     return;
477   }
478   assert( 0 );     /* Unreachable */
479 }
480 
481 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
482 
483 /*
484 ** Collation sequence destructor function. The pCtx argument points to
485 ** a UCollator structure previously allocated using ucol_open().
486 */
icuCollationDel(void * pCtx)487 static void icuCollationDel(void *pCtx){
488   UCollator *p = (UCollator *)pCtx;
489   ucol_close(p);
490 }
491 
492 /*
493 ** Collation sequence comparison function. The pCtx argument points to
494 ** a UCollator structure previously allocated using ucol_open().
495 */
icuCollationColl(void * pCtx,int nLeft,const void * zLeft,int nRight,const void * zRight)496 static int icuCollationColl(
497   void *pCtx,
498   int nLeft,
499   const void *zLeft,
500   int nRight,
501   const void *zRight
502 ){
503   UCollationResult res;
504   UCollator *p = (UCollator *)pCtx;
505   res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
506   switch( res ){
507     case UCOL_LESS:    return -1;
508     case UCOL_GREATER: return +1;
509     case UCOL_EQUAL:   return 0;
510   }
511   assert(!"Unexpected return value from ucol_strcoll()");
512   return 0;
513 }
514 
515 /*
516 ** Implementation of the scalar function icu_load_collation().
517 **
518 ** This scalar function is used to add ICU collation based collation
519 ** types to an SQLite database connection. It is intended to be called
520 ** as follows:
521 **
522 **     SELECT icu_load_collation(<locale>, <collation-name>);
523 **
524 ** Where <locale> is a string containing an ICU locale identifier (i.e.
525 ** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
526 ** collation sequence to create.
527 */
icuLoadCollation(sqlite3_context * p,int nArg,sqlite3_value ** apArg)528 static void icuLoadCollation(
529   sqlite3_context *p,
530   int nArg,
531   sqlite3_value **apArg
532 ){
533   sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
534   UErrorCode status = U_ZERO_ERROR;
535   const char *zLocale;      /* Locale identifier - (eg. "jp_JP") */
536   const char *zName;        /* SQL Collation sequence name (eg. "japanese") */
537   UCollator *pUCollator;    /* ICU library collation object */
538   int rc;                   /* Return code from sqlite3_create_collation_x() */
539 
540   assert(nArg==2);
541   (void)nArg; /* Unused parameter */
542   zLocale = (const char *)sqlite3_value_text(apArg[0]);
543   zName = (const char *)sqlite3_value_text(apArg[1]);
544 
545   if( !zLocale || !zName ){
546     return;
547   }
548 
549   pUCollator = ucol_open(zLocale, &status);
550   if( !U_SUCCESS(status) ){
551     icuFunctionError(p, "ucol_open", status);
552     return;
553   }
554   assert(p);
555 
556   rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator,
557       icuCollationColl, icuCollationDel
558   );
559   if( rc!=SQLITE_OK ){
560     ucol_close(pUCollator);
561     sqlite3_result_error(p, "Error registering collation function", -1);
562   }
563 }
564 
565 /*
566 ** Register the ICU extension functions with database db.
567 */
sqlite3IcuInit(sqlite3 * db)568 EXPORT_SYMBOLS SQLITE_API int sqlite3IcuInit(sqlite3 *db){
569 # define SQLITEICU_EXTRAFLAGS (SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS)
570   static const struct IcuScalar {
571     const char *zName;                        /* Function name */
572     unsigned char nArg;                       /* Number of arguments */
573     unsigned int enc;                         /* Optimal text encoding */
574     unsigned char iContext;                   /* sqlite3_user_data() context */
575     void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
576   } scalars[] = {
577     {"icu_load_collation",2,SQLITE_UTF8|SQLITE_DIRECTONLY,1, icuLoadCollation},
578 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
579     {"regexp", 2, SQLITE_ANY|SQLITEICU_EXTRAFLAGS,         0, icuRegexpFunc},
580     {"lower",  1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       0, icuCaseFunc16},
581     {"lower",  2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       0, icuCaseFunc16},
582     {"upper",  1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       1, icuCaseFunc16},
583     {"upper",  2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       1, icuCaseFunc16},
584     {"lower",  1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuCaseFunc16},
585     {"lower",  2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuCaseFunc16},
586     {"upper",  1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        1, icuCaseFunc16},
587     {"upper",  2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        1, icuCaseFunc16},
588     {"like",   2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuLikeFunc},
589     {"like",   3, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuLikeFunc},
590 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
591   };
592 #ifdef HARMONY_OS
593   extern void SetOhosIcuDirectory();
594   SetOhosIcuDirectory();
595 #endif
596   int rc = SQLITE_OK;
597   int i;
598 
599   for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){
600     const struct IcuScalar *p = &scalars[i];
601     rc = sqlite3_create_function(
602         db, p->zName, p->nArg, p->enc,
603         p->iContext ? (void*)db : (void*)0,
604         p->xFunc, 0, 0
605     );
606   }
607 
608   return rc;
609 }
610 
611 #if !SQLITE_CORE
612 #ifdef _WIN32
613 __declspec(dllexport)
614 #endif
sqlite3_icu_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)615 SQLITE_API int sqlite3_icu_init(
616   sqlite3 *db,
617   char **pzErrMsg,
618   const sqlite3_api_routines *pApi
619 ){
620   SQLITE_EXTENSION_INIT2(pApi)
621   return sqlite3IcuInit(db);
622 }
623 #endif
624 
625 #endif
626 
627 /************** End of icu.c *************************************************/
628 /************** Begin file fts3_icu.c ****************************************/
629 /*
630 ** 2007 June 22
631 **
632 ** The author disclaims copyright to this source code.  In place of
633 ** a legal notice, here is a blessing:
634 **
635 **    May you do good and not evil.
636 **    May you find forgiveness for yourself and forgive others.
637 **    May you share freely, never taking more than you give.
638 **
639 *************************************************************************
640 ** This file implements a tokenizer for fts3 based on the ICU library.
641 */
642 /* #include "fts3Int.h" */
643 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
644 #ifdef SQLITE_ENABLE_ICU
645 
646 /* #include <assert.h> */
647 /* #include <string.h> */
648 /* #include "fts3_tokenizer.h" */
649 
650 #include <unicode/ubrk.h>
651 /* #include <unicode/ucol.h> */
652 /* #include <unicode/ustring.h> */
653 #include <unicode/utf16.h>
654 
655 typedef struct IcuTokenizer IcuTokenizer;
656 typedef struct IcuCursor IcuCursor;
657 
658 struct IcuTokenizer {
659   sqlite3_tokenizer base;
660   char *zLocale;
661 };
662 
663 struct IcuCursor {
664   sqlite3_tokenizer_cursor base;
665 
666   UBreakIterator *pIter;      /* ICU break-iterator object */
667   int nChar;                  /* Number of UChar elements in pInput */
668   UChar *aChar;               /* Copy of input using utf-16 encoding */
669   int *aOffset;               /* Offsets of each character in utf-8 input */
670 
671   int nBuffer;
672   char *zBuffer;
673 
674   int iToken;
675 };
676 
677 /*
678 ** Create a new tokenizer instance.
679 */
icuCreate(int argc,const char * const * argv,sqlite3_tokenizer ** ppTokenizer)680 static int icuCreate(
681   int argc,                            /* Number of entries in argv[] */
682   const char * const *argv,            /* Tokenizer creation arguments */
683   sqlite3_tokenizer **ppTokenizer      /* OUT: Created tokenizer */
684 ){
685   IcuTokenizer *p;
686   int n = 0;
687 
688   if( argc>0 ){
689     n = strlen(argv[0])+1;
690   }
691   p = (IcuTokenizer *)sqlite3_malloc64(sizeof(IcuTokenizer)+n);
692   if( !p ){
693     return SQLITE_NOMEM;
694   }
695   memset(p, 0, sizeof(IcuTokenizer));
696 
697   if( n ){
698     p->zLocale = (char *)&p[1];
699     memcpy(p->zLocale, argv[0], n);
700   }
701 
702   *ppTokenizer = (sqlite3_tokenizer *)p;
703 
704   return SQLITE_OK;
705 }
706 
707 /*
708 ** Destroy a tokenizer
709 */
icuDestroy(sqlite3_tokenizer * pTokenizer)710 static int icuDestroy(sqlite3_tokenizer *pTokenizer){
711   IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
712   sqlite3_free(p);
713   return SQLITE_OK;
714 }
715 
716 /*
717 ** Prepare to begin tokenizing a particular string.  The input
718 ** string to be tokenized is pInput[0..nBytes-1].  A cursor
719 ** used to incrementally tokenize this string is returned in
720 ** *ppCursor.
721 */
icuOpen(sqlite3_tokenizer * pTokenizer,const char * zInput,int nInput,sqlite3_tokenizer_cursor ** ppCursor)722 static int icuOpen(
723   sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
724   const char *zInput,                    /* Input string */
725   int nInput,                            /* Length of zInput in bytes */
726   sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
727 ){
728   IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
729   IcuCursor *pCsr;
730 
731   const int32_t opt = U_FOLD_CASE_DEFAULT;
732   UErrorCode status = U_ZERO_ERROR;
733   int nChar;
734 
735   UChar32 c;
736   int iInput = 0;
737   int iOut = 0;
738 
739   *ppCursor = 0;
740 
741   if( zInput==0 ){
742     nInput = 0;
743     zInput = "";
744   }else if( nInput<0 ){
745     nInput = strlen(zInput);
746   }
747   nChar = nInput+1;
748   pCsr = (IcuCursor *)sqlite3_malloc64(
749       sizeof(IcuCursor) +                /* IcuCursor */
750       ((nChar+3)&~3) * sizeof(UChar) +   /* IcuCursor.aChar[] */
751       (nChar+1) * sizeof(int)            /* IcuCursor.aOffset[] */
752   );
753   if( !pCsr ){
754     return SQLITE_NOMEM;
755   }
756   memset(pCsr, 0, sizeof(IcuCursor));
757   pCsr->aChar = (UChar *)&pCsr[1];
758   pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
759 
760   pCsr->aOffset[iOut] = iInput;
761   U8_NEXT(zInput, iInput, nInput, c);
762   while( c>0 ){
763     int isError = 0;
764     c = u_foldCase(c, opt);
765     U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
766     if( isError ){
767       sqlite3_free(pCsr);
768       return SQLITE_ERROR;
769     }
770     pCsr->aOffset[iOut] = iInput;
771 
772     if( iInput<nInput ){
773       U8_NEXT(zInput, iInput, nInput, c);
774     }else{
775       c = 0;
776     }
777   }
778 
779   pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
780   if( !U_SUCCESS(status) ){
781     sqlite3_free(pCsr);
782     return SQLITE_ERROR;
783   }
784   pCsr->nChar = iOut;
785 
786   ubrk_first(pCsr->pIter);
787   *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
788   return SQLITE_OK;
789 }
790 
791 /*
792 ** Close a tokenization cursor previously opened by a call to icuOpen().
793 */
icuClose(sqlite3_tokenizer_cursor * pCursor)794 static int icuClose(sqlite3_tokenizer_cursor *pCursor){
795   IcuCursor *pCsr = (IcuCursor *)pCursor;
796   ubrk_close(pCsr->pIter);
797   sqlite3_free(pCsr->zBuffer);
798   sqlite3_free(pCsr);
799   return SQLITE_OK;
800 }
801 
802 /*
803 ** Extract the next token from a tokenization cursor.
804 */
icuNext(sqlite3_tokenizer_cursor * pCursor,const char ** ppToken,int * pnBytes,int * piStartOffset,int * piEndOffset,int * piPosition)805 static int icuNext(
806   sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */
807   const char **ppToken,               /* OUT: *ppToken is the token text */
808   int *pnBytes,                       /* OUT: Number of bytes in token */
809   int *piStartOffset,                 /* OUT: Starting offset of token */
810   int *piEndOffset,                   /* OUT: Ending offset of token */
811   int *piPosition                     /* OUT: Position integer of token */
812 ){
813   IcuCursor *pCsr = (IcuCursor *)pCursor;
814 
815   int iStart = 0;
816   int iEnd = 0;
817   int nByte = 0;
818 
819   while( iStart==iEnd ){
820     UChar32 c;
821 
822     iStart = ubrk_current(pCsr->pIter);
823     iEnd = ubrk_next(pCsr->pIter);
824     if( iEnd==UBRK_DONE ){
825       return SQLITE_DONE;
826     }
827 
828     while( iStart<iEnd ){
829       int iWhite = iStart;
830       U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
831       if( u_isspace(c) ){
832         iStart = iWhite;
833       }else{
834         break;
835       }
836     }
837     assert(iStart<=iEnd);
838   }
839 
840   do {
841     UErrorCode status = U_ZERO_ERROR;
842     if( nByte ){
843       char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
844       if( !zNew ){
845         return SQLITE_NOMEM;
846       }
847       pCsr->zBuffer = zNew;
848       pCsr->nBuffer = nByte;
849     }
850 
851     u_strToUTF8(
852         pCsr->zBuffer, pCsr->nBuffer, &nByte,    /* Output vars */
853         &pCsr->aChar[iStart], iEnd-iStart,       /* Input vars */
854         &status                                  /* Output success/failure */
855     );
856   } while( nByte>pCsr->nBuffer );
857 
858   *ppToken = pCsr->zBuffer;
859   *pnBytes = nByte;
860   *piStartOffset = pCsr->aOffset[iStart];
861   *piEndOffset = pCsr->aOffset[iEnd];
862   *piPosition = pCsr->iToken++;
863 
864   return SQLITE_OK;
865 }
866 
867 /*
868 ** The set of routines that implement the simple tokenizer
869 */
870 static const sqlite3_tokenizer_module icuTokenizerModule = {
871   0,                           /* iVersion    */
872   icuCreate,                   /* xCreate     */
873   icuDestroy,                  /* xCreate     */
874   icuOpen,                     /* xOpen       */
875   icuClose,                    /* xClose      */
876   icuNext,                     /* xNext       */
877   0,                           /* xLanguageid */
878 };
879 
880 /*
881 ** Set *ppModule to point at the implementation of the ICU tokenizer.
882 */
sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const ** ppModule)883 EXPORT_SYMBOLS SQLITE_API void sqlite3Fts3IcuTokenizerModule(
884   sqlite3_tokenizer_module const**ppModule
885 ){
886   *ppModule = &icuTokenizerModule;
887 }
888 
889 #endif /* defined(SQLITE_ENABLE_ICU) */
890 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
891 
892 /************** End of fts3_icu.c ********************************************/