• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From 53b13f3aa2f41a8d30eac3702fe066d3a4a616ee Mon Sep 17 00:00:00 2001
2From: MartinChoo <214582617@qq.com>
3Date: Wed, 23 Jul 2025 17:39:10 +0800
4Subject: [PATCH 02/12] Enable and optimize ICU
5
6---
7 src/sqlite3.c    | 1013 ++++------------------------------------------
8 src/sqlite3icu.c |  888 ++++++++++++++++++++++++++++++++++++++++
9 2 files changed, 962 insertions(+), 939 deletions(-)
10 create mode 100644 src/sqlite3icu.c
11
12diff --git a/src/sqlite3.c b/src/sqlite3.c
13index b132937..efc4cd4 100644
14--- a/src/sqlite3.c
15+++ b/src/sqlite3.c
16@@ -2502,6 +2502,7 @@ struct sqlite3_mem_methods {
17 #define SQLITE_CONFIG_SORTERREF_SIZE      28  /* int nByte */
18 #define SQLITE_CONFIG_MEMDB_MAXSIZE       29  /* sqlite3_int64 */
19 #define SQLITE_CONFIG_ROWID_IN_VIEW       30  /* int* */
20+#define SQLITE_CONFIG_ENABLE_ICU          41  /* boolean */
21
22 /*
23 ** CAPI3REF: Database Connection Configuration Options
24@@ -3289,6 +3290,7 @@ SQLITE_API void sqlite3_free_table(char **result);
25 #  define EXPORT_SYMBOLS
26 #endif
27 #endif
28+
29 /*
30 ** CAPI3REF: Formatted String Printing Functions
31 **
32@@ -178413,6 +178415,7 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db);
33 /************** End of fts3.h ************************************************/
34 /************** Continuing where we left off in main.c ***********************/
35 #endif
36+
37 #ifdef SQLITE_ENABLE_RTREE
38 /************** Include rtree.h in the middle of main.c **********************/
39 /************** Begin file rtree.h *******************************************/
40@@ -178475,13 +178478,54 @@ SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db);
41 extern "C" {
42 #endif  /* __cplusplus */
43
44-SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db);
45+SQLITE_PRIVATE int sqlite3IcuInitInner(sqlite3 *db);
46
47 #if 0
48 }  /* extern "C" */
49 #endif  /* __cplusplus */
50
51 /************** End of sqliteicu.h *******************************************/
52+#ifndef _WIN32
53+#include <dlfcn.h>
54+#endif
55+
56+typedef void (*sqlite3Fts3IcuTokenizerModule_ptr)(sqlite3_tokenizer_module const** ppModule);
57+typedef int (*sqlite3IcuInit_ptr)(sqlite3 *db);
58+static sqlite3Fts3IcuTokenizerModule_ptr tokenModulePtr = NULL;
59+static sqlite3IcuInit_ptr icuInitPtr = NULL;
60+static u32 icuEnable = 0u;
61+static u32 icuInit = 0u;
62+static void *g_library = NULL;
63+
64+int sqlite3IcuModuleInit(){
65+  int rc = SQLITE_OK;
66+  if( icuInit ){
67+    return rc;
68+  }
69+#ifndef _WIN32
70+  g_library = dlopen("libsqliteicu.z.so", RTLD_LAZY);
71+  if( g_library==NULL ){
72+    sqlite3_log(SQLITE_ERROR, "load icu so failed");
73+    return SQLITE_ERROR;
74+  }
75+  tokenModulePtr = (sqlite3Fts3IcuTokenizerModule_ptr)dlsym(g_library, "sqlite3Fts3IcuTokenizerModule");
76+  icuInitPtr = (sqlite3IcuInit_ptr)dlsym(g_library, "sqlite3IcuInit");
77+  if( tokenModulePtr==NULL || icuInitPtr==NULL ){
78+    sqlite3_log(SQLITE_ERROR, "load icu init function failed");
79+    return SQLITE_ERROR;
80+  }
81+  icuInit = 1u;
82+#endif
83+  return rc;
84+}
85+
86+SQLITE_PRIVATE int sqlite3IcuInitInner(sqlite3 *db)
87+{
88+  if( !icuEnable ){
89+    return SQLITE_OK;
90+  }
91+  return icuInitPtr(db);
92+}
93 /************** Continuing where we left off in main.c ***********************/
94 #endif
95
96@@ -178521,7 +178565,7 @@ static int (*const sqlite3BuiltinExtensions[])(sqlite3*) = {
97   sqlite3Fts5Init,
98 #endif
99 #if defined(SQLITE_ENABLE_ICU) || defined(SQLITE_ENABLE_ICU_COLLATIONS)
100-  sqlite3IcuInit,
101+  sqlite3IcuInitInner,
102 #endif
103 #ifdef SQLITE_ENABLE_RTREE
104   sqlite3RtreeInit,
105@@ -178913,6 +178957,19 @@ SQLITE_API int sqlite3_shutdown(void){
106 SQLITE_API int sqlite3_config(int op, ...){
107   va_list ap;
108   int rc = SQLITE_OK;
109+  va_start(ap, op);
110+
111+#if defined(SQLITE_ENABLE_ICU) || defined(SQLITE_ENABLE_ICU_COLLATIONS)
112+  if( op==SQLITE_CONFIG_ENABLE_ICU ){
113+    int iVal = va_arg(ap, int);
114+    if( iVal==0 ){
115+      icuEnable = 0u;
116+    }else{
117+      icuEnable = 1u;
118+    }
119+    return rc;
120+  }
121+#endif /* SQLITE_ENABLE_ICU */
122
123   /* sqlite3_config() normally returns SQLITE_MISUSE if it is invoked while
124   ** the SQLite library is in use.  Except, a few selected opcodes
125@@ -178930,7 +178987,6 @@ SQLITE_API int sqlite3_config(int op, ...){
126     testcase( op==SQLITE_CONFIG_PCACHE_HDRSZ );
127   }
128
129-  va_start(ap, op);
130   switch( op ){
131
132     /* Mutex configuration options are only available in a threadsafe
133@@ -182053,6 +182109,12 @@ static int openDatabase(
134   sqlite3RegisterPerConnectionBuiltinFunctions(db);
135   rc = sqlite3_errcode(db);
136
137+#if defined(SQLITE_ENABLE_ICU) || defined(SQLITE_ENABLE_ICU_COLLATIONS)
138+  if( icuEnable ){
139+    rc = sqlite3IcuModuleInit();
140+    if( rc!=SQLITE_OK ) return rc;
141+  }
142+#endif
143
144   /* Load compiled-in extensions */
145   for(i=0; rc==SQLITE_OK && i<ArraySize(sqlite3BuiltinExtensions); i++){
146@@ -184344,114 +184406,6 @@ SQLITE_EXTENSION_INIT3
147 ** the tokenization rules supplied by a specific sqlite3_tokenizer
148 ** object.
149 */
150-typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
151-typedef struct sqlite3_tokenizer sqlite3_tokenizer;
152-typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
153-
154-struct sqlite3_tokenizer_module {
155-
156-  /*
157-  ** Structure version. Should always be set to 0 or 1.
158-  */
159-  int iVersion;
160-
161-  /*
162-  ** Create a new tokenizer. The values in the argv[] array are the
163-  ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
164-  ** TABLE statement that created the fts3 table. For example, if
165-  ** the following SQL is executed:
166-  **
167-  **   CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
168-  **
169-  ** then argc is set to 2, and the argv[] array contains pointers
170-  ** to the strings "arg1" and "arg2".
171-  **
172-  ** This method should return either SQLITE_OK (0), or an SQLite error
173-  ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
174-  ** to point at the newly created tokenizer structure. The generic
175-  ** sqlite3_tokenizer.pModule variable should not be initialized by
176-  ** this callback. The caller will do so.
177-  */
178-  int (*xCreate)(
179-    int argc,                           /* Size of argv array */
180-    const char *const*argv,             /* Tokenizer argument strings */
181-    sqlite3_tokenizer **ppTokenizer     /* OUT: Created tokenizer */
182-  );
183-
184-  /*
185-  ** Destroy an existing tokenizer. The fts3 module calls this method
186-  ** exactly once for each successful call to xCreate().
187-  */
188-  int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
189-
190-  /*
191-  ** Create a tokenizer cursor to tokenize an input buffer. The caller
192-  ** is responsible for ensuring that the input buffer remains valid
193-  ** until the cursor is closed (using the xClose() method).
194-  */
195-  int (*xOpen)(
196-    sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */
197-    const char *pInput, int nBytes,      /* Input buffer */
198-    sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */
199-  );
200-
201-  /*
202-  ** Destroy an existing tokenizer cursor. The fts3 module calls this
203-  ** method exactly once for each successful call to xOpen().
204-  */
205-  int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
206-
207-  /*
208-  ** Retrieve the next token from the tokenizer cursor pCursor. This
209-  ** method should either return SQLITE_OK and set the values of the
210-  ** "OUT" variables identified below, or SQLITE_DONE to indicate that
211-  ** the end of the buffer has been reached, or an SQLite error code.
212-  **
213-  ** *ppToken should be set to point at a buffer containing the
214-  ** normalized version of the token (i.e. after any case-folding and/or
215-  ** stemming has been performed). *pnBytes should be set to the length
216-  ** of this buffer in bytes. The input text that generated the token is
217-  ** identified by the byte offsets returned in *piStartOffset and
218-  ** *piEndOffset. *piStartOffset should be set to the index of the first
219-  ** byte of the token in the input buffer. *piEndOffset should be set
220-  ** to the index of the first byte just past the end of the token in
221-  ** the input buffer.
222-  **
223-  ** The buffer *ppToken is set to point at is managed by the tokenizer
224-  ** implementation. It is only required to be valid until the next call
225-  ** to xNext() or xClose().
226-  */
227-  /* TODO(shess) current implementation requires pInput to be
228-  ** nul-terminated.  This should either be fixed, or pInput/nBytes
229-  ** should be converted to zInput.
230-  */
231-  int (*xNext)(
232-    sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
233-    const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
234-    int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
235-    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
236-    int *piPosition      /* OUT: Number of tokens returned before this one */
237-  );
238-
239-  /***********************************************************************
240-  ** Methods below this point are only available if iVersion>=1.
241-  */
242-
243-  /*
244-  ** Configure the language id of a tokenizer cursor.
245-  */
246-  int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
247-};
248-
249-struct sqlite3_tokenizer {
250-  const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
251-  /* Tokenizer implementations will typically add additional fields */
252-};
253-
254-struct sqlite3_tokenizer_cursor {
255-  sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
256-  /* Tokenizer implementations will typically add additional fields */
257-};
258
259 int fts3_global_term_cnt(int iTerm, int iCol);
260 int fts3_term_cnt(int iTerm, int iCol);
261@@ -189003,9 +188957,6 @@ SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module co
262 #ifndef SQLITE_DISABLE_FTS3_UNICODE
263 SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
264 #endif
265-#ifdef SQLITE_ENABLE_ICU
266-SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
267-#endif
268
269 /*
270 ** Initialize the fts3 extension. If this extension is built as part
271@@ -189024,7 +188975,14 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){
272
273 #ifdef SQLITE_ENABLE_ICU
274   const sqlite3_tokenizer_module *pIcu = 0;
275-  sqlite3Fts3IcuTokenizerModule(&pIcu);
276+  if( icuEnable ){
277+    if( tokenModulePtr!=NULL ){
278+      tokenModulePtr(&pIcu);
279+    }else{
280+      sqlite3_log(SQLITE_ERROR, "icu module ptr is null");
281+      return SQLITE_ERROR;
282+    }
283+  }
284 #endif
285
286 #ifndef SQLITE_DISABLE_FTS3_UNICODE
287@@ -189060,7 +189018,7 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){
288      || sqlite3Fts3HashInsert(&pHash->hash, "unicode61", 10, (void *)pUnicode)
289 #endif
290 #ifdef SQLITE_ENABLE_ICU
291-     || (pIcu && sqlite3Fts3HashInsert(&pHash->hash, "icu", 4, (void *)pIcu))
292+     || (icuEnable && pIcu && sqlite3Fts3HashInsert(&pHash->hash, "icu", 4, (void *)pIcu))
293 #endif
294     ){
295       rc = SQLITE_NOMEM;
296@@ -213799,829 +213757,6 @@ SQLITE_API int sqlite3_rtree_init(
297 #endif
298
299 /************** End of rtree.c ***********************************************/
300-/************** Begin file icu.c *********************************************/
301-/*
302-** 2007 May 6
303-**
304-** The author disclaims copyright to this source code.  In place of
305-** a legal notice, here is a blessing:
306-**
307-**    May you do good and not evil.
308-**    May you find forgiveness for yourself and forgive others.
309-**    May you share freely, never taking more than you give.
310-**
311-*************************************************************************
312-** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $
313-**
314-** This file implements an integration between the ICU library
315-** ("International Components for Unicode", an open-source library
316-** for handling unicode data) and SQLite. The integration uses
317-** ICU to provide the following to SQLite:
318-**
319-**   * An implementation of the SQL regexp() function (and hence REGEXP
320-**     operator) using the ICU uregex_XX() APIs.
321-**
322-**   * Implementations of the SQL scalar upper() and lower() functions
323-**     for case mapping.
324-**
325-**   * Integration of ICU and SQLite collation sequences.
326-**
327-**   * An implementation of the LIKE operator that uses ICU to
328-**     provide case-independent matching.
329-*/
330-
331-#if !defined(SQLITE_CORE)                  \
332- || defined(SQLITE_ENABLE_ICU)             \
333- || defined(SQLITE_ENABLE_ICU_COLLATIONS)
334-
335-/* Include ICU headers */
336-#include <unicode/utypes.h>
337-#include <unicode/uregex.h>
338-#include <unicode/ustring.h>
339-#include <unicode/ucol.h>
340-
341-/* #include <assert.h> */
342-
343-#ifndef SQLITE_CORE
344-/*   #include "sqlite3ext.h" */
345-  SQLITE_EXTENSION_INIT1
346-#else
347-/*   #include "sqlite3.h" */
348-#endif
349-
350-/*
351-** This function is called when an ICU function called from within
352-** the implementation of an SQL scalar function returns an error.
353-**
354-** The scalar function context passed as the first argument is
355-** loaded with an error message based on the following two args.
356-*/
357-static void icuFunctionError(
358-  sqlite3_context *pCtx,       /* SQLite scalar function context */
359-  const char *zName,           /* Name of ICU function that failed */
360-  UErrorCode e                 /* Error code returned by ICU function */
361-){
362-  char zBuf[128];
363-  sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
364-  zBuf[127] = '\0';
365-  sqlite3_result_error(pCtx, zBuf, -1);
366-}
367-
368-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
369-
370-/*
371-** Maximum length (in bytes) of the pattern in a LIKE or GLOB
372-** operator.
373-*/
374-#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
375-# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
376-#endif
377-
378-/*
379-** Version of sqlite3_free() that is always a function, never a macro.
380-*/
381-static void xFree(void *p){
382-  sqlite3_free(p);
383-}
384-
385-/*
386-** This lookup table is used to help decode the first byte of
387-** a multi-byte UTF8 character. It is copied here from SQLite source
388-** code file utf8.c.
389-*/
390-static const unsigned char icuUtf8Trans1[] = {
391-  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
392-  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
393-  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
394-  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
395-  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
396-  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
397-  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
398-  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
399-};
400-
401-#define SQLITE_ICU_READ_UTF8(zIn, c)                       \
402-  c = *(zIn++);                                            \
403-  if( c>=0xc0 ){                                           \
404-    c = icuUtf8Trans1[c-0xc0];                             \
405-    while( (*zIn & 0xc0)==0x80 ){                          \
406-      c = (c<<6) + (0x3f & *(zIn++));                      \
407-    }                                                      \
408-  }
409-
410-#define SQLITE_ICU_SKIP_UTF8(zIn)                          \
411-  assert( *zIn );                                          \
412-  if( *(zIn++)>=0xc0 ){                                    \
413-    while( (*zIn & 0xc0)==0x80 ){zIn++;}                   \
414-  }
415-
416-
417-/*
418-** Compare two UTF-8 strings for equality where the first string is
419-** a "LIKE" expression. Return true (1) if they are the same and
420-** false (0) if they are different.
421-*/
422-static int icuLikeCompare(
423-  const uint8_t *zPattern,   /* LIKE pattern */
424-  const uint8_t *zString,    /* The UTF-8 string to compare against */
425-  const UChar32 uEsc         /* The escape character */
426-){
427-  static const uint32_t MATCH_ONE = (uint32_t)'_';
428-  static const uint32_t MATCH_ALL = (uint32_t)'%';
429-
430-  int prevEscape = 0;     /* True if the previous character was uEsc */
431-
432-  while( 1 ){
433-
434-    /* Read (and consume) the next character from the input pattern. */
435-    uint32_t uPattern;
436-    SQLITE_ICU_READ_UTF8(zPattern, uPattern);
437-    if( uPattern==0 ) break;
438-
439-    /* There are now 4 possibilities:
440-    **
441-    **     1. uPattern is an unescaped match-all character "%",
442-    **     2. uPattern is an unescaped match-one character "_",
443-    **     3. uPattern is an unescaped escape character, or
444-    **     4. uPattern is to be handled as an ordinary character
445-    */
446-    if( uPattern==MATCH_ALL && !prevEscape && uPattern!=(uint32_t)uEsc ){
447-      /* Case 1. */
448-      uint8_t c;
449-
450-      /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
451-      ** MATCH_ALL. For each MATCH_ONE, skip one character in the
452-      ** test string.
453-      */
454-      while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){
455-        if( c==MATCH_ONE ){
456-          if( *zString==0 ) return 0;
457-          SQLITE_ICU_SKIP_UTF8(zString);
458-        }
459-        zPattern++;
460-      }
461-
462-      if( *zPattern==0 ) return 1;
463-
464-      while( *zString ){
465-        if( icuLikeCompare(zPattern, zString, uEsc) ){
466-          return 1;
467-        }
468-        SQLITE_ICU_SKIP_UTF8(zString);
469-      }
470-      return 0;
471-
472-    }else if( uPattern==MATCH_ONE && !prevEscape && uPattern!=(uint32_t)uEsc ){
473-      /* Case 2. */
474-      if( *zString==0 ) return 0;
475-      SQLITE_ICU_SKIP_UTF8(zString);
476-
477-    }else if( uPattern==(uint32_t)uEsc && !prevEscape ){
478-      /* Case 3. */
479-      prevEscape = 1;
480-
481-    }else{
482-      /* Case 4. */
483-      uint32_t uString;
484-      SQLITE_ICU_READ_UTF8(zString, uString);
485-      uString = (uint32_t)u_foldCase((UChar32)uString, U_FOLD_CASE_DEFAULT);
486-      uPattern = (uint32_t)u_foldCase((UChar32)uPattern, U_FOLD_CASE_DEFAULT);
487-      if( uString!=uPattern ){
488-        return 0;
489-      }
490-      prevEscape = 0;
491-    }
492-  }
493-
494-  return *zString==0;
495-}
496-
497-/*
498-** Implementation of the like() SQL function.  This function implements
499-** the build-in LIKE operator.  The first argument to the function is the
500-** pattern and the second argument is the string.  So, the SQL statements:
501-**
502-**       A LIKE B
503-**
504-** is implemented as like(B, A). If there is an escape character E,
505-**
506-**       A LIKE B ESCAPE E
507-**
508-** is mapped to like(B, A, E).
509-*/
510-static void icuLikeFunc(
511-  sqlite3_context *context,
512-  int argc,
513-  sqlite3_value **argv
514-){
515-  const unsigned char *zA = sqlite3_value_text(argv[0]);
516-  const unsigned char *zB = sqlite3_value_text(argv[1]);
517-  UChar32 uEsc = 0;
518-
519-  /* Limit the length of the LIKE or GLOB pattern to avoid problems
520-  ** of deep recursion and N*N behavior in patternCompare().
521-  */
522-  if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){
523-    sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
524-    return;
525-  }
526-
527-
528-  if( argc==3 ){
529-    /* The escape character string must consist of a single UTF-8 character.
530-    ** Otherwise, return an error.
531-    */
532-    int nE= sqlite3_value_bytes(argv[2]);
533-    const unsigned char *zE = sqlite3_value_text(argv[2]);
534-    int i = 0;
535-    if( zE==0 ) return;
536-    U8_NEXT(zE, i, nE, uEsc);
537-    if( i!=nE){
538-      sqlite3_result_error(context,
539-          "ESCAPE expression must be a single character", -1);
540-      return;
541-    }
542-  }
543-
544-  if( zA && zB ){
545-    sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc));
546-  }
547-}
548-
549-/*
550-** Function to delete compiled regexp objects. Registered as
551-** a destructor function with sqlite3_set_auxdata().
552-*/
553-static void icuRegexpDelete(void *p){
554-  URegularExpression *pExpr = (URegularExpression *)p;
555-  uregex_close(pExpr);
556-}
557-
558-/*
559-** Implementation of SQLite REGEXP operator. This scalar function takes
560-** two arguments. The first is a regular expression pattern to compile
561-** the second is a string to match against that pattern. If either
562-** argument is an SQL NULL, then NULL Is returned. Otherwise, the result
563-** is 1 if the string matches the pattern, or 0 otherwise.
564-**
565-** SQLite maps the regexp() function to the regexp() operator such
566-** that the following two are equivalent:
567-**
568-**     zString REGEXP zPattern
569-**     regexp(zPattern, zString)
570-**
571-** Uses the following ICU regexp APIs:
572-**
573-**     uregex_open()
574-**     uregex_matches()
575-**     uregex_close()
576-*/
577-static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
578-  UErrorCode status = U_ZERO_ERROR;
579-  URegularExpression *pExpr;
580-  UBool res;
581-  const UChar *zString = sqlite3_value_text16(apArg[1]);
582-
583-  (void)nArg;  /* Unused parameter */
584-
585-  /* If the left hand side of the regexp operator is NULL,
586-  ** then the result is also NULL.
587-  */
588-  if( !zString ){
589-    return;
590-  }
591-
592-  pExpr = sqlite3_get_auxdata(p, 0);
593-  if( !pExpr ){
594-    const UChar *zPattern = sqlite3_value_text16(apArg[0]);
595-    if( !zPattern ){
596-      return;
597-    }
598-    pExpr = uregex_open(zPattern, -1, 0, 0, &status);
599-
600-    if( U_SUCCESS(status) ){
601-      sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
602-      pExpr = sqlite3_get_auxdata(p, 0);
603-    }
604-    if( !pExpr ){
605-      icuFunctionError(p, "uregex_open", status);
606-      return;
607-    }
608-  }
609-
610-  /* Configure the text that the regular expression operates on. */
611-  uregex_setText(pExpr, zString, -1, &status);
612-  if( !U_SUCCESS(status) ){
613-    icuFunctionError(p, "uregex_setText", status);
614-    return;
615-  }
616-
617-  /* Attempt the match */
618-  res = uregex_matches(pExpr, 0, &status);
619-  if( !U_SUCCESS(status) ){
620-    icuFunctionError(p, "uregex_matches", status);
621-    return;
622-  }
623-
624-  /* Set the text that the regular expression operates on to a NULL
625-  ** pointer. This is not really necessary, but it is tidier than
626-  ** leaving the regular expression object configured with an invalid
627-  ** pointer after this function returns.
628-  */
629-  uregex_setText(pExpr, 0, 0, &status);
630-
631-  /* Return 1 or 0. */
632-  sqlite3_result_int(p, res ? 1 : 0);
633-}
634-
635-/*
636-** Implementations of scalar functions for case mapping - upper() and
637-** lower(). Function upper() converts its input to upper-case (ABC).
638-** Function lower() converts to lower-case (abc).
639-**
640-** ICU provides two types of case mapping, "general" case mapping and
641-** "language specific". Refer to ICU documentation for the differences
642-** between the two.
643-**
644-** To utilise "general" case mapping, the upper() or lower() scalar
645-** functions are invoked with one argument:
646-**
647-**     upper('ABC') -> 'abc'
648-**     lower('abc') -> 'ABC'
649-**
650-** To access ICU "language specific" case mapping, upper() or lower()
651-** should be invoked with two arguments. The second argument is the name
652-** of the locale to use. Passing an empty string ("") or SQL NULL value
653-** as the second argument is the same as invoking the 1 argument version
654-** of upper() or lower().
655-**
656-**     lower('I', 'en_us') -> 'i'
657-**     lower('I', 'tr_tr') -> '\u131' (small dotless i)
658-**
659-** http://www.icu-project.org/userguide/posix.html#case_mappings
660-*/
661-static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
662-  const UChar *zInput;            /* Pointer to input string */
663-  UChar *zOutput = 0;             /* Pointer to output buffer */
664-  int nInput;                     /* Size of utf-16 input string in bytes */
665-  int nOut;                       /* Size of output buffer in bytes */
666-  int cnt;
667-  int bToUpper;                   /* True for toupper(), false for tolower() */
668-  UErrorCode status;
669-  const char *zLocale = 0;
670-
671-  assert(nArg==1 || nArg==2);
672-  bToUpper = (sqlite3_user_data(p)!=0);
673-  if( nArg==2 ){
674-    zLocale = (const char *)sqlite3_value_text(apArg[1]);
675-  }
676-
677-  zInput = sqlite3_value_text16(apArg[0]);
678-  if( !zInput ){
679-    return;
680-  }
681-  nOut = nInput = sqlite3_value_bytes16(apArg[0]);
682-  if( nOut==0 ){
683-    sqlite3_result_text16(p, "", 0, SQLITE_STATIC);
684-    return;
685-  }
686-
687-  for(cnt=0; cnt<2; cnt++){
688-    UChar *zNew = sqlite3_realloc(zOutput, nOut);
689-    if( zNew==0 ){
690-      sqlite3_free(zOutput);
691-      sqlite3_result_error_nomem(p);
692-      return;
693-    }
694-    zOutput = zNew;
695-    status = U_ZERO_ERROR;
696-    if( bToUpper ){
697-      nOut = 2*u_strToUpper(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
698-    }else{
699-      nOut = 2*u_strToLower(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
700-    }
701-
702-    if( U_SUCCESS(status) ){
703-      sqlite3_result_text16(p, zOutput, nOut, xFree);
704-    }else if( status==U_BUFFER_OVERFLOW_ERROR ){
705-      assert( cnt==0 );
706-      continue;
707-    }else{
708-      icuFunctionError(p, bToUpper ? "u_strToUpper" : "u_strToLower", status);
709-    }
710-    return;
711-  }
712-  assert( 0 );     /* Unreachable */
713-}
714-
715-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
716-
717-/*
718-** Collation sequence destructor function. The pCtx argument points to
719-** a UCollator structure previously allocated using ucol_open().
720-*/
721-static void icuCollationDel(void *pCtx){
722-  UCollator *p = (UCollator *)pCtx;
723-  ucol_close(p);
724-}
725-
726-/*
727-** Collation sequence comparison function. The pCtx argument points to
728-** a UCollator structure previously allocated using ucol_open().
729-*/
730-static int icuCollationColl(
731-  void *pCtx,
732-  int nLeft,
733-  const void *zLeft,
734-  int nRight,
735-  const void *zRight
736-){
737-  UCollationResult res;
738-  UCollator *p = (UCollator *)pCtx;
739-  res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
740-  switch( res ){
741-    case UCOL_LESS:    return -1;
742-    case UCOL_GREATER: return +1;
743-    case UCOL_EQUAL:   return 0;
744-  }
745-  assert(!"Unexpected return value from ucol_strcoll()");
746-  return 0;
747-}
748-
749-/*
750-** Implementation of the scalar function icu_load_collation().
751-**
752-** This scalar function is used to add ICU collation based collation
753-** types to an SQLite database connection. It is intended to be called
754-** as follows:
755-**
756-**     SELECT icu_load_collation(<locale>, <collation-name>);
757-**
758-** Where <locale> is a string containing an ICU locale identifier (i.e.
759-** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
760-** collation sequence to create.
761-*/
762-static void icuLoadCollation(
763-  sqlite3_context *p,
764-  int nArg,
765-  sqlite3_value **apArg
766-){
767-  sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
768-  UErrorCode status = U_ZERO_ERROR;
769-  const char *zLocale;      /* Locale identifier - (eg. "jp_JP") */
770-  const char *zName;        /* SQL Collation sequence name (eg. "japanese") */
771-  UCollator *pUCollator;    /* ICU library collation object */
772-  int rc;                   /* Return code from sqlite3_create_collation_x() */
773-
774-  assert(nArg==2);
775-  (void)nArg; /* Unused parameter */
776-  zLocale = (const char *)sqlite3_value_text(apArg[0]);
777-  zName = (const char *)sqlite3_value_text(apArg[1]);
778-
779-  if( !zLocale || !zName ){
780-    return;
781-  }
782-
783-  pUCollator = ucol_open(zLocale, &status);
784-  if( !U_SUCCESS(status) ){
785-    icuFunctionError(p, "ucol_open", status);
786-    return;
787-  }
788-  assert(p);
789-
790-  rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator,
791-      icuCollationColl, icuCollationDel
792-  );
793-  if( rc!=SQLITE_OK ){
794-    ucol_close(pUCollator);
795-    sqlite3_result_error(p, "Error registering collation function", -1);
796-  }
797-}
798-
799-/*
800-** Register the ICU extension functions with database db.
801-*/
802-SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){
803-# define SQLITEICU_EXTRAFLAGS (SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS)
804-  static const struct IcuScalar {
805-    const char *zName;                        /* Function name */
806-    unsigned char nArg;                       /* Number of arguments */
807-    unsigned int enc;                         /* Optimal text encoding */
808-    unsigned char iContext;                   /* sqlite3_user_data() context */
809-    void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
810-  } scalars[] = {
811-    {"icu_load_collation",2,SQLITE_UTF8|SQLITE_DIRECTONLY,1, icuLoadCollation},
812-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
813-    {"regexp", 2, SQLITE_ANY|SQLITEICU_EXTRAFLAGS,         0, icuRegexpFunc},
814-    {"lower",  1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       0, icuCaseFunc16},
815-    {"lower",  2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       0, icuCaseFunc16},
816-    {"upper",  1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       1, icuCaseFunc16},
817-    {"upper",  2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       1, icuCaseFunc16},
818-    {"lower",  1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuCaseFunc16},
819-    {"lower",  2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuCaseFunc16},
820-    {"upper",  1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        1, icuCaseFunc16},
821-    {"upper",  2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        1, icuCaseFunc16},
822-    {"like",   2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuLikeFunc},
823-    {"like",   3, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuLikeFunc},
824-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
825-  };
826-  int rc = SQLITE_OK;
827-  int i;
828-
829-  for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){
830-    const struct IcuScalar *p = &scalars[i];
831-    rc = sqlite3_create_function(
832-        db, p->zName, p->nArg, p->enc,
833-        p->iContext ? (void*)db : (void*)0,
834-        p->xFunc, 0, 0
835-    );
836-  }
837-
838-  return rc;
839-}
840-
841-#if !SQLITE_CORE
842-#ifdef _WIN32
843-__declspec(dllexport)
844-#endif
845-SQLITE_API int sqlite3_icu_init(
846-  sqlite3 *db,
847-  char **pzErrMsg,
848-  const sqlite3_api_routines *pApi
849-){
850-  SQLITE_EXTENSION_INIT2(pApi)
851-  return sqlite3IcuInit(db);
852-}
853-#endif
854-
855-#endif
856-
857-/************** End of icu.c *************************************************/
858-/************** Begin file fts3_icu.c ****************************************/
859-/*
860-** 2007 June 22
861-**
862-** The author disclaims copyright to this source code.  In place of
863-** a legal notice, here is a blessing:
864-**
865-**    May you do good and not evil.
866-**    May you find forgiveness for yourself and forgive others.
867-**    May you share freely, never taking more than you give.
868-**
869-*************************************************************************
870-** This file implements a tokenizer for fts3 based on the ICU library.
871-*/
872-/* #include "fts3Int.h" */
873-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
874-#ifdef SQLITE_ENABLE_ICU
875-
876-/* #include <assert.h> */
877-/* #include <string.h> */
878-/* #include "fts3_tokenizer.h" */
879-
880-#include <unicode/ubrk.h>
881-/* #include <unicode/ucol.h> */
882-/* #include <unicode/ustring.h> */
883-#include <unicode/utf16.h>
884-
885-typedef struct IcuTokenizer IcuTokenizer;
886-typedef struct IcuCursor IcuCursor;
887-
888-struct IcuTokenizer {
889-  sqlite3_tokenizer base;
890-  char *zLocale;
891-};
892-
893-struct IcuCursor {
894-  sqlite3_tokenizer_cursor base;
895-
896-  UBreakIterator *pIter;      /* ICU break-iterator object */
897-  int nChar;                  /* Number of UChar elements in pInput */
898-  UChar *aChar;               /* Copy of input using utf-16 encoding */
899-  int *aOffset;               /* Offsets of each character in utf-8 input */
900-
901-  int nBuffer;
902-  char *zBuffer;
903-
904-  int iToken;
905-};
906-
907-/*
908-** Create a new tokenizer instance.
909-*/
910-static int icuCreate(
911-  int argc,                            /* Number of entries in argv[] */
912-  const char * const *argv,            /* Tokenizer creation arguments */
913-  sqlite3_tokenizer **ppTokenizer      /* OUT: Created tokenizer */
914-){
915-  IcuTokenizer *p;
916-  int n = 0;
917-
918-  if( argc>0 ){
919-    n = strlen(argv[0])+1;
920-  }
921-  p = (IcuTokenizer *)sqlite3_malloc64(sizeof(IcuTokenizer)+n);
922-  if( !p ){
923-    return SQLITE_NOMEM;
924-  }
925-  memset(p, 0, sizeof(IcuTokenizer));
926-
927-  if( n ){
928-    p->zLocale = (char *)&p[1];
929-    memcpy(p->zLocale, argv[0], n);
930-  }
931-
932-  *ppTokenizer = (sqlite3_tokenizer *)p;
933-
934-  return SQLITE_OK;
935-}
936-
937-/*
938-** Destroy a tokenizer
939-*/
940-static int icuDestroy(sqlite3_tokenizer *pTokenizer){
941-  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
942-  sqlite3_free(p);
943-  return SQLITE_OK;
944-}
945-
946-/*
947-** Prepare to begin tokenizing a particular string.  The input
948-** string to be tokenized is pInput[0..nBytes-1].  A cursor
949-** used to incrementally tokenize this string is returned in
950-** *ppCursor.
951-*/
952-static int icuOpen(
953-  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
954-  const char *zInput,                    /* Input string */
955-  int nInput,                            /* Length of zInput in bytes */
956-  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
957-){
958-  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
959-  IcuCursor *pCsr;
960-
961-  const int32_t opt = U_FOLD_CASE_DEFAULT;
962-  UErrorCode status = U_ZERO_ERROR;
963-  int nChar;
964-
965-  UChar32 c;
966-  int iInput = 0;
967-  int iOut = 0;
968-
969-  *ppCursor = 0;
970-
971-  if( zInput==0 ){
972-    nInput = 0;
973-    zInput = "";
974-  }else if( nInput<0 ){
975-    nInput = strlen(zInput);
976-  }
977-  nChar = nInput+1;
978-  pCsr = (IcuCursor *)sqlite3_malloc64(
979-      sizeof(IcuCursor) +                /* IcuCursor */
980-      ((nChar+3)&~3) * sizeof(UChar) +   /* IcuCursor.aChar[] */
981-      (nChar+1) * sizeof(int)            /* IcuCursor.aOffset[] */
982-  );
983-  if( !pCsr ){
984-    return SQLITE_NOMEM;
985-  }
986-  memset(pCsr, 0, sizeof(IcuCursor));
987-  pCsr->aChar = (UChar *)&pCsr[1];
988-  pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
989-
990-  pCsr->aOffset[iOut] = iInput;
991-  U8_NEXT(zInput, iInput, nInput, c);
992-  while( c>0 ){
993-    int isError = 0;
994-    c = u_foldCase(c, opt);
995-    U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
996-    if( isError ){
997-      sqlite3_free(pCsr);
998-      return SQLITE_ERROR;
999-    }
1000-    pCsr->aOffset[iOut] = iInput;
1001-
1002-    if( iInput<nInput ){
1003-      U8_NEXT(zInput, iInput, nInput, c);
1004-    }else{
1005-      c = 0;
1006-    }
1007-  }
1008-
1009-  pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
1010-  if( !U_SUCCESS(status) ){
1011-    sqlite3_free(pCsr);
1012-    return SQLITE_ERROR;
1013-  }
1014-  pCsr->nChar = iOut;
1015-
1016-  ubrk_first(pCsr->pIter);
1017-  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
1018-  return SQLITE_OK;
1019-}
1020-
1021-/*
1022-** Close a tokenization cursor previously opened by a call to icuOpen().
1023-*/
1024-static int icuClose(sqlite3_tokenizer_cursor *pCursor){
1025-  IcuCursor *pCsr = (IcuCursor *)pCursor;
1026-  ubrk_close(pCsr->pIter);
1027-  sqlite3_free(pCsr->zBuffer);
1028-  sqlite3_free(pCsr);
1029-  return SQLITE_OK;
1030-}
1031-
1032-/*
1033-** Extract the next token from a tokenization cursor.
1034-*/
1035-static int icuNext(
1036-  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */
1037-  const char **ppToken,               /* OUT: *ppToken is the token text */
1038-  int *pnBytes,                       /* OUT: Number of bytes in token */
1039-  int *piStartOffset,                 /* OUT: Starting offset of token */
1040-  int *piEndOffset,                   /* OUT: Ending offset of token */
1041-  int *piPosition                     /* OUT: Position integer of token */
1042-){
1043-  IcuCursor *pCsr = (IcuCursor *)pCursor;
1044-
1045-  int iStart = 0;
1046-  int iEnd = 0;
1047-  int nByte = 0;
1048-
1049-  while( iStart==iEnd ){
1050-    UChar32 c;
1051-
1052-    iStart = ubrk_current(pCsr->pIter);
1053-    iEnd = ubrk_next(pCsr->pIter);
1054-    if( iEnd==UBRK_DONE ){
1055-      return SQLITE_DONE;
1056-    }
1057-
1058-    while( iStart<iEnd ){
1059-      int iWhite = iStart;
1060-      U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
1061-      if( u_isspace(c) ){
1062-        iStart = iWhite;
1063-      }else{
1064-        break;
1065-      }
1066-    }
1067-    assert(iStart<=iEnd);
1068-  }
1069-
1070-  do {
1071-    UErrorCode status = U_ZERO_ERROR;
1072-    if( nByte ){
1073-      char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
1074-      if( !zNew ){
1075-        return SQLITE_NOMEM;
1076-      }
1077-      pCsr->zBuffer = zNew;
1078-      pCsr->nBuffer = nByte;
1079-    }
1080-
1081-    u_strToUTF8(
1082-        pCsr->zBuffer, pCsr->nBuffer, &nByte,    /* Output vars */
1083-        &pCsr->aChar[iStart], iEnd-iStart,       /* Input vars */
1084-        &status                                  /* Output success/failure */
1085-    );
1086-  } while( nByte>pCsr->nBuffer );
1087-
1088-  *ppToken = pCsr->zBuffer;
1089-  *pnBytes = nByte;
1090-  *piStartOffset = pCsr->aOffset[iStart];
1091-  *piEndOffset = pCsr->aOffset[iEnd];
1092-  *piPosition = pCsr->iToken++;
1093-
1094-  return SQLITE_OK;
1095-}
1096-
1097-/*
1098-** The set of routines that implement the simple tokenizer
1099-*/
1100-static const sqlite3_tokenizer_module icuTokenizerModule = {
1101-  0,                           /* iVersion    */
1102-  icuCreate,                   /* xCreate     */
1103-  icuDestroy,                  /* xCreate     */
1104-  icuOpen,                     /* xOpen       */
1105-  icuClose,                    /* xClose      */
1106-  icuNext,                     /* xNext       */
1107-  0,                           /* xLanguageid */
1108-};
1109-
1110-/*
1111-** Set *ppModule to point at the implementation of the ICU tokenizer.
1112-*/
1113-SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(
1114-  sqlite3_tokenizer_module const**ppModule
1115-){
1116-  *ppModule = &icuTokenizerModule;
1117-}
1118-
1119-#endif /* defined(SQLITE_ENABLE_ICU) */
1120-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
1121-
1122-/************** End of fts3_icu.c ********************************************/
1123 /************** Begin file sqlite3rbu.c **************************************/
1124 /*
1125 ** 2014 August 30
1126diff --git a/src/sqlite3icu.c b/src/sqlite3icu.c
1127new file mode 100644
1128index 0000000..b5944d5
1129--- /dev/null
1130+++ b/src/sqlite3icu.c
1131@@ -0,0 +1,888 @@
1132+/******************************************************************************
1133+** This file is an amalgamation of many separate C source files from SQLite
1134+** version 3.40.1.  By combining all the individual C code files into this
1135+** single large file, the entire code can be compiled as a single translation
1136+** unit.  This allows many compilers to do optimizations that would not be
1137+** possible if the files were compiled separately.  Performance improvements
1138+** of 5% or more are commonly seen when SQLite is compiled as a single
1139+** translation unit.
1140+**
1141+** This file is all you need to compile SQLite.  To use SQLite in other
1142+** programs, you need this file and the "sqlite3.h" header file that defines
1143+** the programming interface to the SQLite library.  (If you do not have
1144+** the "sqlite3.h" header file at hand, you will find a copy embedded within
1145+** the text of this file.  Search for "Begin file sqlite3.h" to find the start
1146+** of the embedded sqlite3.h header file.) Additional code files may be needed
1147+** if you want a wrapper to interface SQLite with your choice of programming
1148+** language. The code for the "sqlite3" command-line shell is also in a
1149+** separate file. This file contains only code for the core SQLite library.
1150+*/
1151+/*
1152+** 2019.09.02-Complete codec logic for encryption and decryption.
1153+**            Huawei Technologies Co, Ltd.
1154+*/
1155+/************** Begin file icu.c *********************************************/
1156+/*
1157+** 2007 May 6
1158+**
1159+** The author disclaims copyright to this source code.  In place of
1160+** a legal notice, here is a blessing:
1161+**
1162+**    May you do good and not evil.
1163+**    May you find forgiveness for yourself and forgive others.
1164+**    May you share freely, never taking more than you give.
1165+**
1166+*************************************************************************
1167+** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $
1168+**
1169+** This file implements an integration between the ICU library
1170+** ("International Components for Unicode", an open-source library
1171+** for handling unicode data) and SQLite. The integration uses
1172+** ICU to provide the following to SQLite:
1173+**
1174+**   * An implementation of the SQL regexp() function (and hence REGEXP
1175+**     operator) using the ICU uregex_XX() APIs.
1176+**
1177+**   * Implementations of the SQL scalar upper() and lower() functions
1178+**     for case mapping.
1179+**
1180+**   * Integration of ICU and SQLite collation sequences.
1181+**
1182+**   * An implementation of the LIKE operator that uses ICU to
1183+**     provide case-independent matching.
1184+*/
1185+#include <stdio.h>
1186+#include <stdlib.h>
1187+#include <string.h>
1188+#include <assert.h>
1189+#include <stddef.h>
1190+
1191+#include "sqlite3icu.h"
1192+#include "sqlite3.h"
1193+
1194+#if !defined(SQLITE_CORE)                  \
1195+ || defined(SQLITE_ENABLE_ICU)             \
1196+ || defined(SQLITE_ENABLE_ICU_COLLATIONS)
1197+
1198+/* Include ICU headers */
1199+#include <unicode/utypes.h>
1200+#include <unicode/uregex.h>
1201+#include <unicode/ustring.h>
1202+#include <unicode/ucol.h>
1203+
1204+#if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION)
1205+  /* This case when the file really is being compiled as a loadable
1206+  ** extension */
1207+# define SQLITE_EXTENSION_INIT1     const sqlite3_api_routines *sqlite3_api=0;
1208+# define SQLITE_EXTENSION_INIT2(v)  sqlite3_api=v;
1209+# define SQLITE_EXTENSION_INIT3     \
1210+    extern const sqlite3_api_routines *sqlite3_api;
1211+#else
1212+  /* This case when the file is being statically linked into the
1213+  ** application */
1214+# define SQLITE_EXTENSION_INIT1     /*no-op*/
1215+# define SQLITE_EXTENSION_INIT2(v)  (void)v; /* unused parameter */
1216+# define SQLITE_EXTENSION_INIT3     /*no-op*/
1217+#endif
1218+
1219+/* #include <assert.h> */
1220+
1221+#ifndef SQLITE_CORE
1222+/*   #include "sqlite3ext.h" */
1223+  SQLITE_EXTENSION_INIT1
1224+#else
1225+/*   #include "sqlite3.h" */
1226+#endif
1227+
1228+// export the symbols
1229+#ifdef SQLITE_EXPORT_SYMBOLS
1230+#if defined(__GNUC__)
1231+#  define EXPORT_SYMBOLS  __attribute__ ((visibility ("default")))
1232+#elif defined(_MSC_VER)
1233+#  define EXPORT_SYMBOLS  __declspec(dllexport)
1234+#else
1235+#  define EXPORT_SYMBOLS
1236+#endif
1237+#endif
1238+
1239+EXPORT_SYMBOLS SQLITE_API int sqlite3IcuInit(sqlite3 *db);
1240+#ifdef SQLITE_ENABLE_ICU
1241+EXPORT_SYMBOLS SQLITE_API void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
1242+#endif
1243+/*
1244+** This function is called when an ICU function called from within
1245+** the implementation of an SQL scalar function returns an error.
1246+**
1247+** The scalar function context passed as the first argument is
1248+** loaded with an error message based on the following two args.
1249+*/
1250+static void icuFunctionError(
1251+  sqlite3_context *pCtx,       /* SQLite scalar function context */
1252+  const char *zName,           /* Name of ICU function that failed */
1253+  UErrorCode e                 /* Error code returned by ICU function */
1254+){
1255+  char zBuf[128];
1256+  sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
1257+  zBuf[127] = '\0';
1258+  sqlite3_result_error(pCtx, zBuf, -1);
1259+}
1260+
1261+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
1262+
1263+/*
1264+** Maximum length (in bytes) of the pattern in a LIKE or GLOB
1265+** operator.
1266+*/
1267+#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
1268+# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
1269+#endif
1270+
1271+/*
1272+** Version of sqlite3_free() that is always a function, never a macro.
1273+*/
1274+static void xFree(void *p){
1275+  sqlite3_free(p);
1276+}
1277+
1278+/*
1279+** This lookup table is used to help decode the first byte of
1280+** a multi-byte UTF8 character. It is copied here from SQLite source
1281+** code file utf8.c.
1282+*/
1283+static const unsigned char icuUtf8Trans1[] = {
1284+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1285+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1286+  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1287+  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1288+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1289+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1290+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1291+  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1292+};
1293+
1294+#define SQLITE_ICU_READ_UTF8(zIn, c)                       \
1295+  c = *(zIn++);                                            \
1296+  if( c>=0xc0 ){                                           \
1297+    c = icuUtf8Trans1[c-0xc0];                             \
1298+    while( (*zIn & 0xc0)==0x80 ){                          \
1299+      c = (c<<6) + (0x3f & *(zIn++));                      \
1300+    }                                                      \
1301+  }
1302+
1303+#define SQLITE_ICU_SKIP_UTF8(zIn)                          \
1304+  assert( *zIn );                                          \
1305+  if( *(zIn++)>=0xc0 ){                                    \
1306+    while( (*zIn & 0xc0)==0x80 ){zIn++;}                   \
1307+  }
1308+
1309+
1310+/*
1311+** Compare two UTF-8 strings for equality where the first string is
1312+** a "LIKE" expression. Return true (1) if they are the same and
1313+** false (0) if they are different.
1314+*/
1315+static int icuLikeCompare(
1316+  const uint8_t *zPattern,   /* LIKE pattern */
1317+  const uint8_t *zString,    /* The UTF-8 string to compare against */
1318+  const UChar32 uEsc         /* The escape character */
1319+){
1320+  static const uint32_t MATCH_ONE = (uint32_t)'_';
1321+  static const uint32_t MATCH_ALL = (uint32_t)'%';
1322+
1323+  int prevEscape = 0;     /* True if the previous character was uEsc */
1324+
1325+  while( 1 ){
1326+
1327+    /* Read (and consume) the next character from the input pattern. */
1328+    uint32_t uPattern;
1329+    SQLITE_ICU_READ_UTF8(zPattern, uPattern);
1330+    if( uPattern==0 ) break;
1331+
1332+    /* There are now 4 possibilities:
1333+    **
1334+    **     1. uPattern is an unescaped match-all character "%",
1335+    **     2. uPattern is an unescaped match-one character "_",
1336+    **     3. uPattern is an unescaped escape character, or
1337+    **     4. uPattern is to be handled as an ordinary character
1338+    */
1339+    if( uPattern==MATCH_ALL && !prevEscape && uPattern!=(uint32_t)uEsc ){
1340+      /* Case 1. */
1341+      uint8_t c;
1342+
1343+      /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
1344+      ** MATCH_ALL. For each MATCH_ONE, skip one character in the
1345+      ** test string.
1346+      */
1347+      while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){
1348+        if( c==MATCH_ONE ){
1349+          if( *zString==0 ) return 0;
1350+          SQLITE_ICU_SKIP_UTF8(zString);
1351+        }
1352+        zPattern++;
1353+      }
1354+
1355+      if( *zPattern==0 ) return 1;
1356+
1357+      while( *zString ){
1358+        if( icuLikeCompare(zPattern, zString, uEsc) ){
1359+          return 1;
1360+        }
1361+        SQLITE_ICU_SKIP_UTF8(zString);
1362+      }
1363+      return 0;
1364+
1365+    }else if( uPattern==MATCH_ONE && !prevEscape && uPattern!=(uint32_t)uEsc ){
1366+      /* Case 2. */
1367+      if( *zString==0 ) return 0;
1368+      SQLITE_ICU_SKIP_UTF8(zString);
1369+
1370+    }else if( uPattern==(uint32_t)uEsc && !prevEscape ){
1371+      /* Case 3. */
1372+      prevEscape = 1;
1373+
1374+    }else{
1375+      /* Case 4. */
1376+      uint32_t uString;
1377+      SQLITE_ICU_READ_UTF8(zString, uString);
1378+      uString = (uint32_t)u_foldCase((UChar32)uString, U_FOLD_CASE_DEFAULT);
1379+      uPattern = (uint32_t)u_foldCase((UChar32)uPattern, U_FOLD_CASE_DEFAULT);
1380+      if( uString!=uPattern ){
1381+        return 0;
1382+      }
1383+      prevEscape = 0;
1384+    }
1385+  }
1386+
1387+  return *zString==0;
1388+}
1389+
1390+/*
1391+** Implementation of the like() SQL function.  This function implements
1392+** the build-in LIKE operator.  The first argument to the function is the
1393+** pattern and the second argument is the string.  So, the SQL statements:
1394+**
1395+**       A LIKE B
1396+**
1397+** is implemented as like(B, A). If there is an escape character E,
1398+**
1399+**       A LIKE B ESCAPE E
1400+**
1401+** is mapped to like(B, A, E).
1402+*/
1403+static void icuLikeFunc(
1404+  sqlite3_context *context,
1405+  int argc,
1406+  sqlite3_value **argv
1407+){
1408+  const unsigned char *zA = sqlite3_value_text(argv[0]);
1409+  const unsigned char *zB = sqlite3_value_text(argv[1]);
1410+  UChar32 uEsc = 0;
1411+
1412+  /* Limit the length of the LIKE or GLOB pattern to avoid problems
1413+  ** of deep recursion and N*N behavior in patternCompare().
1414+  */
1415+  if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){
1416+    sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
1417+    return;
1418+  }
1419+
1420+
1421+  if( argc==3 ){
1422+    /* The escape character string must consist of a single UTF-8 character.
1423+    ** Otherwise, return an error.
1424+    */
1425+    int nE= sqlite3_value_bytes(argv[2]);
1426+    const unsigned char *zE = sqlite3_value_text(argv[2]);
1427+    int i = 0;
1428+    if( zE==0 ) return;
1429+    U8_NEXT(zE, i, nE, uEsc);
1430+    if( i!=nE){
1431+      sqlite3_result_error(context,
1432+          "ESCAPE expression must be a single character", -1);
1433+      return;
1434+    }
1435+  }
1436+
1437+  if( zA && zB ){
1438+    sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc));
1439+  }
1440+}
1441+
1442+/*
1443+** Function to delete compiled regexp objects. Registered as
1444+** a destructor function with sqlite3_set_auxdata().
1445+*/
1446+static void icuRegexpDelete(void *p){
1447+  URegularExpression *pExpr = (URegularExpression *)p;
1448+  uregex_close(pExpr);
1449+}
1450+
1451+/*
1452+** Implementation of SQLite REGEXP operator. This scalar function takes
1453+** two arguments. The first is a regular expression pattern to compile
1454+** the second is a string to match against that pattern. If either
1455+** argument is an SQL NULL, then NULL Is returned. Otherwise, the result
1456+** is 1 if the string matches the pattern, or 0 otherwise.
1457+**
1458+** SQLite maps the regexp() function to the regexp() operator such
1459+** that the following two are equivalent:
1460+**
1461+**     zString REGEXP zPattern
1462+**     regexp(zPattern, zString)
1463+**
1464+** Uses the following ICU regexp APIs:
1465+**
1466+**     uregex_open()
1467+**     uregex_matches()
1468+**     uregex_close()
1469+*/
1470+static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
1471+  UErrorCode status = U_ZERO_ERROR;
1472+  URegularExpression *pExpr;
1473+  UBool res;
1474+  const UChar *zString = sqlite3_value_text16(apArg[1]);
1475+
1476+  (void)nArg;  /* Unused parameter */
1477+
1478+  /* If the left hand side of the regexp operator is NULL,
1479+  ** then the result is also NULL.
1480+  */
1481+  if( !zString ){
1482+    return;
1483+  }
1484+
1485+  pExpr = sqlite3_get_auxdata(p, 0);
1486+  if( !pExpr ){
1487+    const UChar *zPattern = sqlite3_value_text16(apArg[0]);
1488+    if( !zPattern ){
1489+      return;
1490+    }
1491+    pExpr = uregex_open(zPattern, -1, 0, 0, &status);
1492+
1493+    if( U_SUCCESS(status) ){
1494+      sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
1495+      pExpr = sqlite3_get_auxdata(p, 0);
1496+    }
1497+    if( !pExpr ){
1498+      icuFunctionError(p, "uregex_open", status);
1499+      return;
1500+    }
1501+  }
1502+
1503+  /* Configure the text that the regular expression operates on. */
1504+  uregex_setText(pExpr, zString, -1, &status);
1505+  if( !U_SUCCESS(status) ){
1506+    icuFunctionError(p, "uregex_setText", status);
1507+    return;
1508+  }
1509+
1510+  /* Attempt the match */
1511+  res = uregex_matches(pExpr, 0, &status);
1512+  if( !U_SUCCESS(status) ){
1513+    icuFunctionError(p, "uregex_matches", status);
1514+    return;
1515+  }
1516+
1517+  /* Set the text that the regular expression operates on to a NULL
1518+  ** pointer. This is not really necessary, but it is tidier than
1519+  ** leaving the regular expression object configured with an invalid
1520+  ** pointer after this function returns.
1521+  */
1522+  uregex_setText(pExpr, 0, 0, &status);
1523+
1524+  /* Return 1 or 0. */
1525+  sqlite3_result_int(p, res ? 1 : 0);
1526+}
1527+
1528+/*
1529+** Implementations of scalar functions for case mapping - upper() and
1530+** lower(). Function upper() converts its input to upper-case (ABC).
1531+** Function lower() converts to lower-case (abc).
1532+**
1533+** ICU provides two types of case mapping, "general" case mapping and
1534+** "language specific". Refer to ICU documentation for the differences
1535+** between the two.
1536+**
1537+** To utilise "general" case mapping, the upper() or lower() scalar
1538+** functions are invoked with one argument:
1539+**
1540+**     upper('ABC') -> 'abc'
1541+**     lower('abc') -> 'ABC'
1542+**
1543+** To access ICU "language specific" case mapping, upper() or lower()
1544+** should be invoked with two arguments. The second argument is the name
1545+** of the locale to use. Passing an empty string ("") or SQL NULL value
1546+** as the second argument is the same as invoking the 1 argument version
1547+** of upper() or lower().
1548+**
1549+**     lower('I', 'en_us') -> 'i'
1550+**     lower('I', 'tr_tr') -> '\u131' (small dotless i)
1551+**
1552+** http://www.icu-project.org/userguide/posix.html#case_mappings
1553+*/
1554+static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
1555+  const UChar *zInput;            /* Pointer to input string */
1556+  UChar *zOutput = 0;             /* Pointer to output buffer */
1557+  int nInput;                     /* Size of utf-16 input string in bytes */
1558+  int nOut;                       /* Size of output buffer in bytes */
1559+  int cnt;
1560+  int bToUpper;                   /* True for toupper(), false for tolower() */
1561+  UErrorCode status;
1562+  const char *zLocale = 0;
1563+
1564+  assert(nArg==1 || nArg==2);
1565+  bToUpper = (sqlite3_user_data(p)!=0);
1566+  if( nArg==2 ){
1567+    zLocale = (const char *)sqlite3_value_text(apArg[1]);
1568+  }
1569+
1570+  zInput = sqlite3_value_text16(apArg[0]);
1571+  if( !zInput ){
1572+    return;
1573+  }
1574+  nOut = nInput = sqlite3_value_bytes16(apArg[0]);
1575+  if( nOut==0 ){
1576+    sqlite3_result_text16(p, "", 0, SQLITE_STATIC);
1577+    return;
1578+  }
1579+
1580+  for(cnt=0; cnt<2; cnt++){
1581+    UChar *zNew = sqlite3_realloc(zOutput, nOut);
1582+    if( zNew==0 ){
1583+      sqlite3_free(zOutput);
1584+      sqlite3_result_error_nomem(p);
1585+      return;
1586+    }
1587+    zOutput = zNew;
1588+    status = U_ZERO_ERROR;
1589+    if( bToUpper ){
1590+      nOut = 2*u_strToUpper(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
1591+    }else{
1592+      nOut = 2*u_strToLower(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
1593+    }
1594+
1595+    if( U_SUCCESS(status) ){
1596+      sqlite3_result_text16(p, zOutput, nOut, xFree);
1597+    }else if( status==U_BUFFER_OVERFLOW_ERROR ){
1598+      assert( cnt==0 );
1599+      continue;
1600+    }else{
1601+      icuFunctionError(p, bToUpper ? "u_strToUpper" : "u_strToLower", status);
1602+    }
1603+    return;
1604+  }
1605+  assert( 0 );     /* Unreachable */
1606+}
1607+
1608+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
1609+
1610+/*
1611+** Collation sequence destructor function. The pCtx argument points to
1612+** a UCollator structure previously allocated using ucol_open().
1613+*/
1614+static void icuCollationDel(void *pCtx){
1615+  UCollator *p = (UCollator *)pCtx;
1616+  ucol_close(p);
1617+}
1618+
1619+/*
1620+** Collation sequence comparison function. The pCtx argument points to
1621+** a UCollator structure previously allocated using ucol_open().
1622+*/
1623+static int icuCollationColl(
1624+  void *pCtx,
1625+  int nLeft,
1626+  const void *zLeft,
1627+  int nRight,
1628+  const void *zRight
1629+){
1630+  UCollationResult res;
1631+  UCollator *p = (UCollator *)pCtx;
1632+  res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
1633+  switch( res ){
1634+    case UCOL_LESS:    return -1;
1635+    case UCOL_GREATER: return +1;
1636+    case UCOL_EQUAL:   return 0;
1637+  }
1638+  assert(!"Unexpected return value from ucol_strcoll()");
1639+  return 0;
1640+}
1641+
1642+/*
1643+** Implementation of the scalar function icu_load_collation().
1644+**
1645+** This scalar function is used to add ICU collation based collation
1646+** types to an SQLite database connection. It is intended to be called
1647+** as follows:
1648+**
1649+**     SELECT icu_load_collation(<locale>, <collation-name>);
1650+**
1651+** Where <locale> is a string containing an ICU locale identifier (i.e.
1652+** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
1653+** collation sequence to create.
1654+*/
1655+static void icuLoadCollation(
1656+  sqlite3_context *p,
1657+  int nArg,
1658+  sqlite3_value **apArg
1659+){
1660+  sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
1661+  UErrorCode status = U_ZERO_ERROR;
1662+  const char *zLocale;      /* Locale identifier - (eg. "jp_JP") */
1663+  const char *zName;        /* SQL Collation sequence name (eg. "japanese") */
1664+  UCollator *pUCollator;    /* ICU library collation object */
1665+  int rc;                   /* Return code from sqlite3_create_collation_x() */
1666+
1667+  assert(nArg==2);
1668+  (void)nArg; /* Unused parameter */
1669+  zLocale = (const char *)sqlite3_value_text(apArg[0]);
1670+  zName = (const char *)sqlite3_value_text(apArg[1]);
1671+
1672+  if( !zLocale || !zName ){
1673+    return;
1674+  }
1675+
1676+  pUCollator = ucol_open(zLocale, &status);
1677+  if( !U_SUCCESS(status) ){
1678+    icuFunctionError(p, "ucol_open", status);
1679+    return;
1680+  }
1681+  assert(p);
1682+
1683+  rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator,
1684+      icuCollationColl, icuCollationDel
1685+  );
1686+  if( rc!=SQLITE_OK ){
1687+    ucol_close(pUCollator);
1688+    sqlite3_result_error(p, "Error registering collation function", -1);
1689+  }
1690+}
1691+
1692+/*
1693+** Register the ICU extension functions with database db.
1694+*/
1695+EXPORT_SYMBOLS SQLITE_API int sqlite3IcuInit(sqlite3 *db){
1696+# define SQLITEICU_EXTRAFLAGS (SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS)
1697+  static const struct IcuScalar {
1698+    const char *zName;                        /* Function name */
1699+    unsigned char nArg;                       /* Number of arguments */
1700+    unsigned int enc;                         /* Optimal text encoding */
1701+    unsigned char iContext;                   /* sqlite3_user_data() context */
1702+    void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
1703+  } scalars[] = {
1704+    {"icu_load_collation",2,SQLITE_UTF8|SQLITE_DIRECTONLY,1, icuLoadCollation},
1705+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
1706+    {"regexp", 2, SQLITE_ANY|SQLITEICU_EXTRAFLAGS,         0, icuRegexpFunc},
1707+    {"lower",  1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       0, icuCaseFunc16},
1708+    {"lower",  2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       0, icuCaseFunc16},
1709+    {"upper",  1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       1, icuCaseFunc16},
1710+    {"upper",  2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       1, icuCaseFunc16},
1711+    {"lower",  1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuCaseFunc16},
1712+    {"lower",  2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuCaseFunc16},
1713+    {"upper",  1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        1, icuCaseFunc16},
1714+    {"upper",  2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        1, icuCaseFunc16},
1715+    {"like",   2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuLikeFunc},
1716+    {"like",   3, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuLikeFunc},
1717+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
1718+  };
1719+#ifdef OS_FEATURE
1720+  extern void SetOhosIcuDirectory();
1721+  SetOhosIcuDirectory();
1722+#endif
1723+  int rc = SQLITE_OK;
1724+  int i;
1725+
1726+  for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){
1727+    const struct IcuScalar *p = &scalars[i];
1728+    rc = sqlite3_create_function(
1729+        db, p->zName, p->nArg, p->enc,
1730+        p->iContext ? (void*)db : (void*)0,
1731+        p->xFunc, 0, 0
1732+    );
1733+  }
1734+
1735+  return rc;
1736+}
1737+
1738+#if !SQLITE_CORE
1739+#ifdef _WIN32
1740+__declspec(dllexport)
1741+#endif
1742+SQLITE_API int sqlite3_icu_init(
1743+  sqlite3 *db,
1744+  char **pzErrMsg,
1745+  const sqlite3_api_routines *pApi
1746+){
1747+  SQLITE_EXTENSION_INIT2(pApi)
1748+  return sqlite3IcuInit(db);
1749+}
1750+#endif
1751+
1752+#endif
1753+
1754+/************** End of icu.c *************************************************/
1755+/************** Begin file fts3_icu.c ****************************************/
1756+/*
1757+** 2007 June 22
1758+**
1759+** The author disclaims copyright to this source code.  In place of
1760+** a legal notice, here is a blessing:
1761+**
1762+**    May you do good and not evil.
1763+**    May you find forgiveness for yourself and forgive others.
1764+**    May you share freely, never taking more than you give.
1765+**
1766+*************************************************************************
1767+** This file implements a tokenizer for fts3 based on the ICU library.
1768+*/
1769+/* #include "fts3Int.h" */
1770+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
1771+#ifdef SQLITE_ENABLE_ICU
1772+
1773+/* #include <assert.h> */
1774+/* #include <string.h> */
1775+/* #include "fts3_tokenizer.h" */
1776+
1777+#include <unicode/ubrk.h>
1778+/* #include <unicode/ucol.h> */
1779+/* #include <unicode/ustring.h> */
1780+#include <unicode/utf16.h>
1781+
1782+typedef struct IcuTokenizer IcuTokenizer;
1783+typedef struct IcuCursor IcuCursor;
1784+
1785+struct IcuTokenizer {
1786+  sqlite3_tokenizer base;
1787+  char *zLocale;
1788+};
1789+
1790+struct IcuCursor {
1791+  sqlite3_tokenizer_cursor base;
1792+
1793+  UBreakIterator *pIter;      /* ICU break-iterator object */
1794+  int nChar;                  /* Number of UChar elements in pInput */
1795+  UChar *aChar;               /* Copy of input using utf-16 encoding */
1796+  int *aOffset;               /* Offsets of each character in utf-8 input */
1797+
1798+  int nBuffer;
1799+  char *zBuffer;
1800+
1801+  int iToken;
1802+};
1803+
1804+/*
1805+** Create a new tokenizer instance.
1806+*/
1807+static int icuCreate(
1808+  int argc,                            /* Number of entries in argv[] */
1809+  const char * const *argv,            /* Tokenizer creation arguments */
1810+  sqlite3_tokenizer **ppTokenizer      /* OUT: Created tokenizer */
1811+){
1812+  IcuTokenizer *p;
1813+  int n = 0;
1814+
1815+  if( argc>0 ){
1816+    n = strlen(argv[0])+1;
1817+  }
1818+  p = (IcuTokenizer *)sqlite3_malloc64(sizeof(IcuTokenizer)+n);
1819+  if( !p ){
1820+    return SQLITE_NOMEM;
1821+  }
1822+  memset(p, 0, sizeof(IcuTokenizer));
1823+
1824+  if( n ){
1825+    p->zLocale = (char *)&p[1];
1826+    memcpy(p->zLocale, argv[0], n);
1827+  }
1828+
1829+  *ppTokenizer = (sqlite3_tokenizer *)p;
1830+
1831+  return SQLITE_OK;
1832+}
1833+
1834+/*
1835+** Destroy a tokenizer
1836+*/
1837+static int icuDestroy(sqlite3_tokenizer *pTokenizer){
1838+  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
1839+  sqlite3_free(p);
1840+  return SQLITE_OK;
1841+}
1842+
1843+/*
1844+** Prepare to begin tokenizing a particular string.  The input
1845+** string to be tokenized is pInput[0..nBytes-1].  A cursor
1846+** used to incrementally tokenize this string is returned in
1847+** *ppCursor.
1848+*/
1849+static int icuOpen(
1850+  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
1851+  const char *zInput,                    /* Input string */
1852+  int nInput,                            /* Length of zInput in bytes */
1853+  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
1854+){
1855+  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
1856+  IcuCursor *pCsr;
1857+
1858+  const int32_t opt = U_FOLD_CASE_DEFAULT;
1859+  UErrorCode status = U_ZERO_ERROR;
1860+  int nChar;
1861+
1862+  UChar32 c;
1863+  int iInput = 0;
1864+  int iOut = 0;
1865+
1866+  *ppCursor = 0;
1867+
1868+  if( zInput==0 ){
1869+    nInput = 0;
1870+    zInput = "";
1871+  }else if( nInput<0 ){
1872+    nInput = strlen(zInput);
1873+  }
1874+  nChar = nInput+1;
1875+  pCsr = (IcuCursor *)sqlite3_malloc64(
1876+      sizeof(IcuCursor) +                /* IcuCursor */
1877+      ((nChar+3)&~3) * sizeof(UChar) +   /* IcuCursor.aChar[] */
1878+      (nChar+1) * sizeof(int)            /* IcuCursor.aOffset[] */
1879+  );
1880+  if( !pCsr ){
1881+    return SQLITE_NOMEM;
1882+  }
1883+  memset(pCsr, 0, sizeof(IcuCursor));
1884+  pCsr->aChar = (UChar *)&pCsr[1];
1885+  pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
1886+
1887+  pCsr->aOffset[iOut] = iInput;
1888+  U8_NEXT(zInput, iInput, nInput, c);
1889+  while( c>0 ){
1890+    int isError = 0;
1891+    c = u_foldCase(c, opt);
1892+    U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
1893+    if( isError ){
1894+      sqlite3_free(pCsr);
1895+      return SQLITE_ERROR;
1896+    }
1897+    pCsr->aOffset[iOut] = iInput;
1898+
1899+    if( iInput<nInput ){
1900+      U8_NEXT(zInput, iInput, nInput, c);
1901+    }else{
1902+      c = 0;
1903+    }
1904+  }
1905+
1906+  pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
1907+  if( !U_SUCCESS(status) ){
1908+    sqlite3_free(pCsr);
1909+    return SQLITE_ERROR;
1910+  }
1911+  pCsr->nChar = iOut;
1912+
1913+  ubrk_first(pCsr->pIter);
1914+  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
1915+  return SQLITE_OK;
1916+}
1917+
1918+/*
1919+** Close a tokenization cursor previously opened by a call to icuOpen().
1920+*/
1921+static int icuClose(sqlite3_tokenizer_cursor *pCursor){
1922+  IcuCursor *pCsr = (IcuCursor *)pCursor;
1923+  ubrk_close(pCsr->pIter);
1924+  sqlite3_free(pCsr->zBuffer);
1925+  sqlite3_free(pCsr);
1926+  return SQLITE_OK;
1927+}
1928+
1929+/*
1930+** Extract the next token from a tokenization cursor.
1931+*/
1932+static int icuNext(
1933+  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */
1934+  const char **ppToken,               /* OUT: *ppToken is the token text */
1935+  int *pnBytes,                       /* OUT: Number of bytes in token */
1936+  int *piStartOffset,                 /* OUT: Starting offset of token */
1937+  int *piEndOffset,                   /* OUT: Ending offset of token */
1938+  int *piPosition                     /* OUT: Position integer of token */
1939+){
1940+  IcuCursor *pCsr = (IcuCursor *)pCursor;
1941+
1942+  int iStart = 0;
1943+  int iEnd = 0;
1944+  int nByte = 0;
1945+
1946+  while( iStart==iEnd ){
1947+    UChar32 c;
1948+
1949+    iStart = ubrk_current(pCsr->pIter);
1950+    iEnd = ubrk_next(pCsr->pIter);
1951+    if( iEnd==UBRK_DONE ){
1952+      return SQLITE_DONE;
1953+    }
1954+
1955+    while( iStart<iEnd ){
1956+      int iWhite = iStart;
1957+      U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
1958+      if( u_isspace(c) ){
1959+        iStart = iWhite;
1960+      }else{
1961+        break;
1962+      }
1963+    }
1964+    assert(iStart<=iEnd);
1965+  }
1966+
1967+  do {
1968+    UErrorCode status = U_ZERO_ERROR;
1969+    if( nByte ){
1970+      char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
1971+      if( !zNew ){
1972+        return SQLITE_NOMEM;
1973+      }
1974+      pCsr->zBuffer = zNew;
1975+      pCsr->nBuffer = nByte;
1976+    }
1977+
1978+    u_strToUTF8(
1979+        pCsr->zBuffer, pCsr->nBuffer, &nByte,    /* Output vars */
1980+        &pCsr->aChar[iStart], iEnd-iStart,       /* Input vars */
1981+        &status                                  /* Output success/failure */
1982+    );
1983+  } while( nByte>pCsr->nBuffer );
1984+
1985+  *ppToken = pCsr->zBuffer;
1986+  *pnBytes = nByte;
1987+  *piStartOffset = pCsr->aOffset[iStart];
1988+  *piEndOffset = pCsr->aOffset[iEnd];
1989+  *piPosition = pCsr->iToken++;
1990+
1991+  return SQLITE_OK;
1992+}
1993+
1994+/*
1995+** The set of routines that implement the simple tokenizer
1996+*/
1997+static const sqlite3_tokenizer_module icuTokenizerModule = {
1998+  0,                           /* iVersion    */
1999+  icuCreate,                   /* xCreate     */
2000+  icuDestroy,                  /* xCreate     */
2001+  icuOpen,                     /* xOpen       */
2002+  icuClose,                    /* xClose      */
2003+  icuNext,                     /* xNext       */
2004+  0,                           /* xLanguageid */
2005+};
2006+
2007+/*
2008+** Set *ppModule to point at the implementation of the ICU tokenizer.
2009+*/
2010+EXPORT_SYMBOLS SQLITE_API void sqlite3Fts3IcuTokenizerModule(
2011+  sqlite3_tokenizer_module const**ppModule
2012+){
2013+  *ppModule = &icuTokenizerModule;
2014+}
2015+
2016+#endif /* defined(SQLITE_ENABLE_ICU) */
2017+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
2018+
2019+/************** End of fts3_icu.c ********************************************/
2020\ No newline at end of file
2021--
20222.47.0.windows.2
2023
2024