0002-Enable-and-optimize-ICU.patch - OpenGrok cross reference for /third_party/sqlite/patch/0002-Enable-and-optimize-ICU.patch

Lines Matching +full:escape +full:- +full:string +full:- +full:regexp
6 ---
7  src/sqlite3.c    | 1013 ++++------------------------------------------
9  2 files changed, 962 insertions(+), 939 deletions(-)
12 diff --git a/src/sqlite3.c b/src/sqlite3.c
14 --- a/src/sqlite3.c
16 @@ -2502,6 +2502,7 @@ struct sqlite3_mem_methods {
24 @@ -3289,6 +3290,7 @@ SQLITE_API void sqlite3_free_table(char **result);
30  ** CAPI3REF: Formatted String Printing Functions
32 @@ -178413,6 +178415,7 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db);
40 @@ -178475,13 +178478,54 @@ SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db);
44 -SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db);
96 @@ -178521,7 +178565,7 @@ static int (*const sqlite3BuiltinExtensions[])(sqlite3*) = {
100 -  sqlite3IcuInit,
105 @@ -178913,6 +178957,19 @@ SQLITE_API int sqlite3_shutdown(void){
125 @@ -178930,7 +178987,6 @@ SQLITE_API int sqlite3_config(int op, ...){
129 -  va_start(ap, op);
133 @@ -182053,6 +182109,12 @@ static int openDatabase(
144    /* Load compiled-in extensions */
146 @@ -184344,114 +184406,6 @@ SQLITE_EXTENSION_INIT3
150 -typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
151 -typedef struct sqlite3_tokenizer sqlite3_tokenizer;
152 -typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
153 -
154 -struct sqlite3_tokenizer_module {
155 -
156 -  /*
157 -  ** Structure version. Should always be set to 0 or 1.
158 -  */
159 -  int iVersion;
160 -
161 -  /*
162 -  ** Create a new tokenizer. The values in the argv[] array are the
163 -  ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
164 -  ** TABLE statement that created the fts3 table. For example, if
165 -  ** the following SQL is executed:
166 -  **
167 -  **   CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
168 -  **
169 -  ** then argc is set to 2, and the argv[] array contains pointers
170 -  ** to the strings "arg1" and "arg2".
171 -  **
172 -  ** This method should return either SQLITE_OK (0), or an SQLite error
173 -  ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
174 -  ** to point at the newly created tokenizer structure. The generic
175 -  ** sqlite3_tokenizer.pModule variable should not be initialized by
176 -  ** this callback. The caller will do so.
177 -  */
178 -  int (*xCreate)(
179 -    int argc,                           /* Size of argv array */
180 -    const char *const*argv,             /* Tokenizer argument strings */
181 -    sqlite3_tokenizer **ppTokenizer     /* OUT: Created tokenizer */
182 -  );
183 -
184 -  /*
185 -  ** Destroy an existing tokenizer. The fts3 module calls this method
186 -  ** exactly once for each successful call to xCreate().
187 -  */
188 -  int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
189 -
190 -  /*
191 -  ** Create a tokenizer cursor to tokenize an input buffer. The caller
192 -  ** is responsible for ensuring that the input buffer remains valid
193 -  ** until the cursor is closed (using the xClose() method).
194 -  */
195 -  int (*xOpen)(
196 -    sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */
197 -    const char *pInput, int nBytes,      /* Input buffer */
198 -    sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */
199 -  );
200 -
201 -  /*
202 -  ** Destroy an existing tokenizer cursor. The fts3 module calls this
203 -  ** method exactly once for each successful call to xOpen().
204 -  */
205 -  int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
206 -
207 -  /*
208 -  ** Retrieve the next token from the tokenizer cursor pCursor. This
209 -  ** method should either return SQLITE_OK and set the values of the
210 -  ** "OUT" variables identified below, or SQLITE_DONE to indicate that
211 -  ** the end of the buffer has been reached, or an SQLite error code.
212 -  **
213 -  ** *ppToken should be set to point at a buffer containing the
214 -  ** normalized version of the token (i.e. after any case-folding and/or
215 -  ** stemming has been performed). *pnBytes should be set to the length
216 -  ** of this buffer in bytes. The input text that generated the token is
217 -  ** identified by the byte offsets returned in *piStartOffset and
218 -  ** *piEndOffset. *piStartOffset should be set to the index of the first
219 -  ** byte of the token in the input buffer. *piEndOffset should be set
220 -  ** to the index of the first byte just past the end of the token in
221 -  ** the input buffer.
222 -  **
223 -  ** The buffer *ppToken is set to point at is managed by the tokenizer
224 -  ** implementation. It is only required to be valid until the next call
225 -  ** to xNext() or xClose().
226 -  */
227 -  /* TODO(shess) current implementation requires pInput to be
228 -  ** nul-terminated.  This should either be fixed, or pInput/nBytes
229 -  ** should be converted to zInput.
230 -  */
231 -  int (*xNext)(
232 -    sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
233 -    const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
234 -    int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
235 -    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
236 -    int *piPosition      /* OUT: Number of tokens returned before this one */
237 -  );
238 -
239 -  /***********************************************************************
240 -  ** Methods below this point are only available if iVersion>=1.
241 -  */
242 -
243 -  /*
244 -  ** Configure the language id of a tokenizer cursor.
245 -  */
246 -  int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
247 -};
248 -
249 -struct sqlite3_tokenizer {
250 -  const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
251 -  /* Tokenizer implementations will typically add additional fields */
252 -};
253 -
254 -struct sqlite3_tokenizer_cursor {
255 -  sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
256 -  /* Tokenizer implementations will typically add additional fields */
257 -};
261 @@ -189003,9 +188957,6 @@ SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_mo…
265 -#ifdef SQLITE_ENABLE_ICU
266 -SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
267 -#endif
271 @@ -189024,7 +188975,14 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){
275 -  sqlite3Fts3IcuTokenizerModule(&pIcu);
287 @@ -189060,7 +189018,7 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){
288       || sqlite3Fts3HashInsert(&pHash->hash, "unicode61", 10, (void *)pUnicode)
291 -     || (pIcu && sqlite3Fts3HashInsert(&pHash->hash, "icu", 4, (void *)pIcu))
292 +     || (icuEnable && pIcu && sqlite3Fts3HashInsert(&pHash->hash, "icu", 4, (void *)pIcu))
296 @@ -213799,829 +213757,6 @@ SQLITE_API int sqlite3_rtree_init(
300 -/************** Begin file icu.c *********************************************/
301 -/*
302 -** 2007 May 6
303 -**
304 -** The author disclaims copyright to this source code.  In place of
305 -** a legal notice, here is a blessing:
306 -**
307 -**    May you do good and not evil.
308 -**    May you find forgiveness for yourself and forgive others.
309 -**    May you share freely, never taking more than you give.
310 -**
311 -*************************************************************************
312 -** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $
313 -**
314 -** This file implements an integration between the ICU library
315 -** ("International Components for Unicode", an open-source library
316 -** for handling unicode data) and SQLite. The integration uses
317 -** ICU to provide the following to SQLite:
318 -**
319 -**   * An implementation of the SQL regexp() function (and hence REGEXP
320 -**     operator) using the ICU uregex_XX() APIs.
321 -**
322 -**   * Implementations of the SQL scalar upper() and lower() functions
323 -**     for case mapping.
324 -**
325 -**   * Integration of ICU and SQLite collation sequences.
326 -**
327 -**   * An implementation of the LIKE operator that uses ICU to
328 -**     provide case-independent matching.
329 -*/
330 -
331 -#if !defined(SQLITE_CORE)                  \
332 - || defined(SQLITE_ENABLE_ICU)             \
333 - || defined(SQLITE_ENABLE_ICU_COLLATIONS)
334 -
335 -/* Include ICU headers */
336 -#include <unicode/utypes.h>
337 -#include <unicode/uregex.h>
338 -#include <unicode/ustring.h>
339 -#include <unicode/ucol.h>
340 -
341 -/* #include <assert.h> */
342 -
343 -#ifndef SQLITE_CORE
344 -/*   #include "sqlite3ext.h" */
345 -  SQLITE_EXTENSION_INIT1
346 -#else
347 -/*   #include "sqlite3.h" */
348 -#endif
349 -
350 -/*
351 -** This function is called when an ICU function called from within
352 -** the implementation of an SQL scalar function returns an error.
353 -**
354 -** The scalar function context passed as the first argument is
355 -** loaded with an error message based on the following two args.
356 -*/
357 -static void icuFunctionError(
358 -  sqlite3_context *pCtx,       /* SQLite scalar function context */
359 -  const char *zName,           /* Name of ICU function that failed */
360 -  UErrorCode e                 /* Error code returned by ICU function */
361 -){
362 -  char zBuf[128];
363 -  sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
364 -  zBuf[127] = '\0';
365 -  sqlite3_result_error(pCtx, zBuf, -1);
366 -}
367 -
368 -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
369 -
370 -/*
371 -** Maximum length (in bytes) of the pattern in a LIKE or GLOB
372 -** operator.
373 -*/
374 -#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
375 -# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
376 -#endif
377 -
378 -/*
379 -** Version of sqlite3_free() that is always a function, never a macro.
380 -*/
381 -static void xFree(void *p){
382 -  sqlite3_free(p);
383 -}
384 -
385 -/*
386 -** This lookup table is used to help decode the first byte of
387 -** a multi-byte UTF8 character. It is copied here from SQLite source
388 -** code file utf8.c.
389 -*/
390 -static const unsigned char icuUtf8Trans1[] = {
391 -  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
392 -  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
393 -  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
394 -  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
395 -  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
396 -  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
397 -  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
398 -  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
399 -};
400 -
401 -#define SQLITE_ICU_READ_UTF8(zIn, c)                       \
402 -  c = *(zIn++);                                            \
403 -  if( c>=0xc0 ){                                           \
404 -    c = icuUtf8Trans1[c-0xc0];                             \
405 -    while( (*zIn & 0xc0)==0x80 ){                          \
406 -      c = (c<<6) + (0x3f & *(zIn++));                      \
407 -    }                                                      \
408 -  }
409 -
410 -#define SQLITE_ICU_SKIP_UTF8(zIn)                          \
411 -  assert( *zIn );                                          \
412 -  if( *(zIn++)>=0xc0 ){                                    \
413 -    while( (*zIn & 0xc0)==0x80 ){zIn++;}                   \
414 -  }
415 -
416 -
417 -/*
418 -** Compare two UTF-8 strings for equality where the first string is
419 -** a "LIKE" expression. Return true (1) if they are the same and
420 -** false (0) if they are different.
421 -*/
422 -static int icuLikeCompare(
423 -  const uint8_t *zPattern,   /* LIKE pattern */
424 -  const uint8_t *zString,    /* The UTF-8 string to compare against */
425 -  const UChar32 uEsc         /* The escape character */
426 -){
427 -  static const uint32_t MATCH_ONE = (uint32_t)'_';
428 -  static const uint32_t MATCH_ALL = (uint32_t)'%';
429 -
430 -  int prevEscape = 0;     /* True if the previous character was uEsc */
431 -
432 -  while( 1 ){
433 -
434 -    /* Read (and consume) the next character from the input pattern. */
435 -    uint32_t uPattern;
436 -    SQLITE_ICU_READ_UTF8(zPattern, uPattern);
437 -    if( uPattern==0 ) break;
438 -
439 -    /* There are now 4 possibilities:
440 -    **
441 -    **     1. uPattern is an unescaped match-all character "%",
442 -    **     2. uPattern is an unescaped match-one character "_",
443 -    **     3. uPattern is an unescaped escape character, or
444 -    **     4. uPattern is to be handled as an ordinary character
445 -    */
446 -    if( uPattern==MATCH_ALL && !prevEscape && uPattern!=(uint32_t)uEsc ){
447 -      /* Case 1. */
448 -      uint8_t c;
449 -
450 -      /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
451 -      ** MATCH_ALL. For each MATCH_ONE, skip one character in the
452 -      ** test string.
453 -      */
454 -      while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){
455 -        if( c==MATCH_ONE ){
456 -          if( *zString==0 ) return 0;
457 -          SQLITE_ICU_SKIP_UTF8(zString);
458 -        }
459 -        zPattern++;
460 -      }
461 -
462 -      if( *zPattern==0 ) return 1;
463 -
464 -      while( *zString ){
465 -        if( icuLikeCompare(zPattern, zString, uEsc) ){
466 -          return 1;
467 -        }
468 -        SQLITE_ICU_SKIP_UTF8(zString);
469 -      }
470 -      return 0;
471 -
472 -    }else if( uPattern==MATCH_ONE && !prevEscape && uPattern!=(uint32_t)uEsc ){
473 -      /* Case 2. */
474 -      if( *zString==0 ) return 0;
475 -      SQLITE_ICU_SKIP_UTF8(zString);
476 -
477 -    }else if( uPattern==(uint32_t)uEsc && !prevEscape ){
478 -      /* Case 3. */
479 -      prevEscape = 1;
480 -
481 -    }else{
482 -      /* Case 4. */
483 -      uint32_t uString;
484 -      SQLITE_ICU_READ_UTF8(zString, uString);
485 -      uString = (uint32_t)u_foldCase((UChar32)uString, U_FOLD_CASE_DEFAULT);
486 -      uPattern = (uint32_t)u_foldCase((UChar32)uPattern, U_FOLD_CASE_DEFAULT);
487 -      if( uString!=uPattern ){
488 -        return 0;
489 -      }
490 -      prevEscape = 0;
491 -    }
492 -  }
493 -
494 -  return *zString==0;
495 -}
496 -
497 -/*
498 -** Implementation of the like() SQL function.  This function implements
499 -** the build-in LIKE operator.  The first argument to the function is the
500 -** pattern and the second argument is the string.  So, the SQL statements:
501 -**
502 -**       A LIKE B
503 -**
504 -** is implemented as like(B, A). If there is an escape character E,
505 -**
506 -**       A LIKE B ESCAPE E
507 -**
508 -** is mapped to like(B, A, E).
509 -*/
510 -static void icuLikeFunc(
511 -  sqlite3_context *context,
512 -  int argc,
513 -  sqlite3_value **argv
514 -){
515 -  const unsigned char *zA = sqlite3_value_text(argv[0]);
516 -  const unsigned char *zB = sqlite3_value_text(argv[1]);
517 -  UChar32 uEsc = 0;
518 -
519 -  /* Limit the length of the LIKE or GLOB pattern to avoid problems
520 -  ** of deep recursion and N*N behavior in patternCompare().
521 -  */
522 -  if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){
523 -    sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
524 -    return;
525 -  }
526 -
527 -
528 -  if( argc==3 ){
529 -    /* The escape character string must consist of a single UTF-8 character.
530 -    ** Otherwise, return an error.
531 -    */
532 -    int nE= sqlite3_value_bytes(argv[2]);
533 -    const unsigned char *zE = sqlite3_value_text(argv[2]);
534 -    int i = 0;
535 -    if( zE==0 ) return;
536 -    U8_NEXT(zE, i, nE, uEsc);
537 -    if( i!=nE){
538 -      sqlite3_result_error(context,
539 -          "ESCAPE expression must be a single character", -1);
540 -      return;
541 -    }
542 -  }
543 -
544 -  if( zA && zB ){
545 -    sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc));
546 -  }
547 -}
548 -
549 -/*
550 -** Function to delete compiled regexp objects. Registered as
551 -** a destructor function with sqlite3_set_auxdata().
552 -*/
553 -static void icuRegexpDelete(void *p){
554 -  URegularExpression *pExpr = (URegularExpression *)p;
555 -  uregex_close(pExpr);
556 -}
557 -
558 -/*
559 -** Implementation of SQLite REGEXP operator. This scalar function takes
560 -** two arguments. The first is a regular expression pattern to compile
561 -** the second is a string to match against that pattern. If either
562 -** argument is an SQL NULL, then NULL Is returned. Otherwise, the result
563 -** is 1 if the string matches the pattern, or 0 otherwise.
564 -**
565 -** SQLite maps the regexp() function to the regexp() operator such
566 -** that the following two are equivalent:
567 -**
568 -**     zString REGEXP zPattern
569 -**     regexp(zPattern, zString)
570 -**
571 -** Uses the following ICU regexp APIs:
572 -**
573 -**     uregex_open()
574 -**     uregex_matches()
575 -**     uregex_close()
576 -*/
577 -static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
578 -  UErrorCode status = U_ZERO_ERROR;
579 -  URegularExpression *pExpr;
580 -  UBool res;
581 -  const UChar *zString = sqlite3_value_text16(apArg[1]);
582 -
583 -  (void)nArg;  /* Unused parameter */
584 -
585 -  /* If the left hand side of the regexp operator is NULL,
586 -  ** then the result is also NULL.
587 -  */
588 -  if( !zString ){
589 -    return;
590 -  }
591 -
592 -  pExpr = sqlite3_get_auxdata(p, 0);
593 -  if( !pExpr ){
594 -    const UChar *zPattern = sqlite3_value_text16(apArg[0]);
595 -    if( !zPattern ){
596 -      return;
597 -    }
598 -    pExpr = uregex_open(zPattern, -1, 0, 0, &status);
599 -
600 -    if( U_SUCCESS(status) ){
601 -      sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
602 -      pExpr = sqlite3_get_auxdata(p, 0);
603 -    }
604 -    if( !pExpr ){
605 -      icuFunctionError(p, "uregex_open", status);
606 -      return;
607 -    }
608 -  }
609 -
610 -  /* Configure the text that the regular expression operates on. */
611 -  uregex_setText(pExpr, zString, -1, &status);
612 -  if( !U_SUCCESS(status) ){
613 -    icuFunctionError(p, "uregex_setText", status);
614 -    return;
615 -  }
616 -
617 -  /* Attempt the match */
618 -  res = uregex_matches(pExpr, 0, &status);
619 -  if( !U_SUCCESS(status) ){
620 -    icuFunctionError(p, "uregex_matches", status);
621 -    return;
622 -  }
623 -
624 -  /* Set the text that the regular expression operates on to a NULL
625 -  ** pointer. This is not really necessary, but it is tidier than
626 -  ** leaving the regular expression object configured with an invalid
627 -  ** pointer after this function returns.
628 -  */
629 -  uregex_setText(pExpr, 0, 0, &status);
630 -
631 -  /* Return 1 or 0. */
632 -  sqlite3_result_int(p, res ? 1 : 0);
633 -}
634 -
635 -/*
636 -** Implementations of scalar functions for case mapping - upper() and
637 -** lower(). Function upper() converts its input to upper-case (ABC).
638 -** Function lower() converts to lower-case (abc).
639 -**
640 -** ICU provides two types of case mapping, "general" case mapping and
641 -** "language specific". Refer to ICU documentation for the differences
642 -** between the two.
643 -**
644 -** To utilise "general" case mapping, the upper() or lower() scalar
645 -** functions are invoked with one argument:
646 -**
647 -**     upper('ABC') -> 'abc'
648 -**     lower('abc') -> 'ABC'
649 -**
650 -** To access ICU "language specific" case mapping, upper() or lower()
651 -** should be invoked with two arguments. The second argument is the name
652 -** of the locale to use. Passing an empty string ("") or SQL NULL value
653 -** as the second argument is the same as invoking the 1 argument version
654 -** of upper() or lower().
655 -**
656 -**     lower('I', 'en_us') -> 'i'
657 -**     lower('I', 'tr_tr') -> '\u131' (small dotless i)
658 -**
659 -** http://www.icu-project.org/userguide/posix.html#case_mappings
660 -*/
661 -static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
662 -  const UChar *zInput;            /* Pointer to input string */
663 -  UChar *zOutput = 0;             /* Pointer to output buffer */
664 -  int nInput;                     /* Size of utf-16 input string in bytes */
665 -  int nOut;                       /* Size of output buffer in bytes */
666 -  int cnt;
667 -  int bToUpper;                   /* True for toupper(), false for tolower() */
668 -  UErrorCode status;
669 -  const char *zLocale = 0;
670 -
671 -  assert(nArg==1 || nArg==2);
672 -  bToUpper = (sqlite3_user_data(p)!=0);
673 -  if( nArg==2 ){
674 -    zLocale = (const char *)sqlite3_value_text(apArg[1]);
675 -  }
676 -
677 -  zInput = sqlite3_value_text16(apArg[0]);
678 -  if( !zInput ){
679 -    return;
680 -  }
681 -  nOut = nInput = sqlite3_value_bytes16(apArg[0]);
682 -  if( nOut==0 ){
683 -    sqlite3_result_text16(p, "", 0, SQLITE_STATIC);
684 -    return;
685 -  }
686 -
687 -  for(cnt=0; cnt<2; cnt++){
688 -    UChar *zNew = sqlite3_realloc(zOutput, nOut);
689 -    if( zNew==0 ){
690 -      sqlite3_free(zOutput);
691 -      sqlite3_result_error_nomem(p);
692 -      return;
693 -    }
694 -    zOutput = zNew;
695 -    status = U_ZERO_ERROR;
696 -    if( bToUpper ){
697 -      nOut = 2*u_strToUpper(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
698 -    }else{
699 -      nOut = 2*u_strToLower(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
700 -    }
701 -
702 -    if( U_SUCCESS(status) ){
703 -      sqlite3_result_text16(p, zOutput, nOut, xFree);
704 -    }else if( status==U_BUFFER_OVERFLOW_ERROR ){
705 -      assert( cnt==0 );
706 -      continue;
707 -    }else{
708 -      icuFunctionError(p, bToUpper ? "u_strToUpper" : "u_strToLower", status);
709 -    }
710 -    return;
711 -  }
712 -  assert( 0 );     /* Unreachable */
713 -}
714 -
715 -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
716 -
717 -/*
718 -** Collation sequence destructor function. The pCtx argument points to
719 -** a UCollator structure previously allocated using ucol_open().
720 -*/
721 -static void icuCollationDel(void *pCtx){
722 -  UCollator *p = (UCollator *)pCtx;
723 -  ucol_close(p);
724 -}
725 -
726 -/*
727 -** Collation sequence comparison function. The pCtx argument points to
728 -** a UCollator structure previously allocated using ucol_open().
729 -*/
730 -static int icuCollationColl(
731 -  void *pCtx,
732 -  int nLeft,
733 -  const void *zLeft,
734 -  int nRight,
735 -  const void *zRight
736 -){
737 -  UCollationResult res;
738 -  UCollator *p = (UCollator *)pCtx;
739 -  res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
740 -  switch( res ){
741 -    case UCOL_LESS:    return -1;
742 -    case UCOL_GREATER: return +1;
743 -    case UCOL_EQUAL:   return 0;
744 -  }
745 -  assert(!"Unexpected return value from ucol_strcoll()");
746 -  return 0;
747 -}
748 -
749 -/*
750 -** Implementation of the scalar function icu_load_collation().
751 -**
752 -** This scalar function is used to add ICU collation based collation
753 -** types to an SQLite database connection. It is intended to be called
754 -** as follows:
755 -**
756 -**     SELECT icu_load_collation(<locale>, <collation-name>);
757 -**
758 -** Where <locale> is a string containing an ICU locale identifier (i.e.
759 -** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
760 -** collation sequence to create.
761 -*/
762 -static void icuLoadCollation(
763 -  sqlite3_context *p,
764 -  int nArg,
765 -  sqlite3_value **apArg
766 -){
767 -  sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
768 -  UErrorCode status = U_ZERO_ERROR;
769 -  const char *zLocale;      /* Locale identifier - (eg. "jp_JP") */
770 -  const char *zName;        /* SQL Collation sequence name (eg. "japanese") */
771 -  UCollator *pUCollator;    /* ICU library collation object */
772 -  int rc;                   /* Return code from sqlite3_create_collation_x() */
773 -
774 -  assert(nArg==2);
775 -  (void)nArg; /* Unused parameter */
776 -  zLocale = (const char *)sqlite3_value_text(apArg[0]);
777 -  zName = (const char *)sqlite3_value_text(apArg[1]);
778 -
779 -  if( !zLocale || !zName ){
780 -    return;
781 -  }
782 -
783 -  pUCollator = ucol_open(zLocale, &status);
784 -  if( !U_SUCCESS(status) ){
785 -    icuFunctionError(p, "ucol_open", status);
786 -    return;
787 -  }
788 -  assert(p);
789 -
790 -  rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator,
791 -      icuCollationColl, icuCollationDel
792 -  );
793 -  if( rc!=SQLITE_OK ){
794 -    ucol_close(pUCollator);
795 -    sqlite3_result_error(p, "Error registering collation function", -1);
796 -  }
797 -}
798 -
799 -/*
800 -** Register the ICU extension functions with database db.
801 -*/
802 -SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){
803 -# define SQLITEICU_EXTRAFLAGS (SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS)
804 -  static const struct IcuScalar {
805 -    const char *zName;                        /* Function name */
806 -    unsigned char nArg;                       /* Number of arguments */
807 -    unsigned int enc;                         /* Optimal text encoding */
808 -    unsigned char iContext;                   /* sqlite3_user_data() context */
809 -    void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
810 -  } scalars[] = {
811 -    {"icu_load_collation",2,SQLITE_UTF8|SQLITE_DIRECTONLY,1, icuLoadCollation},
812 -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
813 -    {"regexp", 2, SQLITE_ANY|SQLITEICU_EXTRAFLAGS,         0, icuRegexpFunc},
814 -    {"lower",  1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       0, icuCaseFunc16},
815 -    {"lower",  2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       0, icuCaseFunc16},
816 -    {"upper",  1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       1, icuCaseFunc16},
817 -    {"upper",  2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS,       1, icuCaseFunc16},
818 -    {"lower",  1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuCaseFunc16},
819 -    {"lower",  2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuCaseFunc16},
820 -    {"upper",  1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        1, icuCaseFunc16},
821 -    {"upper",  2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        1, icuCaseFunc16},
822 -    {"like",   2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuLikeFunc},
823 -    {"like",   3, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS,        0, icuLikeFunc},
824 -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
825 -  };
826 -  int rc = SQLITE_OK;
827 -  int i;
828 -
829 -  for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){
830 -    const struct IcuScalar *p = &scalars[i];
831 -    rc = sqlite3_create_function(
832 -        db, p->zName, p->nArg, p->enc,
833 -        p->iContext ? (void*)db : (void*)0,
834 -        p->xFunc, 0, 0
835 -    );
836 -  }
837 -
838 -  return rc;
839 -}
840 -
841 -#if !SQLITE_CORE
842 -#ifdef _WIN32
843 -__declspec(dllexport)
844 -#endif
845 -SQLITE_API int sqlite3_icu_init(
846 -  sqlite3 *db,
847 -  char **pzErrMsg,
848 -  const sqlite3_api_routines *pApi
849 -){
850 -  SQLITE_EXTENSION_INIT2(pApi)
851 -  return sqlite3IcuInit(db);
852 -}
853 -#endif
854 -
855 -#endif
856 -
857 -/************** End of icu.c *************************************************/
858 -/************** Begin file fts3_icu.c ****************************************/
859 -/*
860 -** 2007 June 22
861 -**
862 -** The author disclaims copyright to this source code.  In place of
863 -** a legal notice, here is a blessing:
864 -**
865 -**    May you do good and not evil.
866 -**    May you find forgiveness for yourself and forgive others.
867 -**    May you share freely, never taking more than you give.
868 -**
869 -*************************************************************************
870 -** This file implements a tokenizer for fts3 based on the ICU library.
871 -*/
872 -/* #include "fts3Int.h" */
873 -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
874 -#ifdef SQLITE_ENABLE_ICU
875 -
876 -/* #include <assert.h> */
877 -/* #include <string.h> */
878 -/* #include "fts3_tokenizer.h" */
879 -
880 -#include <unicode/ubrk.h>
881 -/* #include <unicode/ucol.h> */
882 -/* #include <unicode/ustring.h> */
883 -#include <unicode/utf16.h>
884 -
885 -typedef struct IcuTokenizer IcuTokenizer;
886 -typedef struct IcuCursor IcuCursor;
887 -
888 -struct IcuTokenizer {
889 -  sqlite3_tokenizer base;
890 -  char *zLocale;
891 -};
892 -
893 -struct IcuCursor {
894 -  sqlite3_tokenizer_cursor base;
895 -
896 -  UBreakIterator *pIter;      /* ICU break-iterator object */
897 -  int nChar;                  /* Number of UChar elements in pInput */
898 -  UChar *aChar;               /* Copy of input using utf-16 encoding */
899 -  int *aOffset;               /* Offsets of each character in utf-8 input */
900 -
901 -  int nBuffer;
902 -  char *zBuffer;
903 -
904 -  int iToken;
905 -};
906 -
907 -/*
908 -** Create a new tokenizer instance.
909 -*/
910 -static int icuCreate(
911 -  int argc,                            /* Number of entries in argv[] */
912 -  const char * const *argv,            /* Tokenizer creation arguments */
913 -  sqlite3_tokenizer **ppTokenizer      /* OUT: Created tokenizer */
914 -){
915 -  IcuTokenizer *p;
916 -  int n = 0;
917 -
918 -  if( argc>0 ){
919 -    n = strlen(argv[0])+1;
920 -  }
921 -  p = (IcuTokenizer *)sqlite3_malloc64(sizeof(IcuTokenizer)+n);
922 -  if( !p ){
923 -    return SQLITE_NOMEM;
924 -  }
925 -  memset(p, 0, sizeof(IcuTokenizer));
926 -
927 -  if( n ){
928 -    p->zLocale = (char *)&p[1];
929 -    memcpy(p->zLocale, argv[0], n);
930 -  }
931 -
932 -  *ppTokenizer = (sqlite3_tokenizer *)p;
933 -
934 -  return SQLITE_OK;
935 -}
936 -
937 -/*
938 -** Destroy a tokenizer
939 -*/
940 -static int icuDestroy(sqlite3_tokenizer *pTokenizer){
941 -  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
942 -  sqlite3_free(p);
943 -  return SQLITE_OK;
944 -}
945 -
946 -/*
947 -** Prepare to begin tokenizing a particular string.  The input
948 -** string to be tokenized is pInput[0..nBytes-1].  A cursor
949 -** used to incrementally tokenize this string is returned in
950 -** *ppCursor.
951 -*/
952 -static int icuOpen(
953 -  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
954 -  const char *zInput,                    /* Input string */
955 -  int nInput,                            /* Length of zInput in bytes */
956 -  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
957 -){
958 -  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
959 -  IcuCursor *pCsr;
960 -
961 -  const int32_t opt = U_FOLD_CASE_DEFAULT;
962 -  UErrorCode status = U_ZERO_ERROR;
963 -  int nChar;
964 -
965 -  UChar32 c;
966 -  int iInput = 0;
967 -  int iOut = 0;
968 -
969 -  *ppCursor = 0;
970 -
971 -  if( zInput==0 ){
972 -    nInput = 0;
973 -    zInput = "";
974 -  }else if( nInput<0 ){
975 -    nInput = strlen(zInput);
976 -  }
977 -  nChar = nInput+1;
978 -  pCsr = (IcuCursor *)sqlite3_malloc64(
979 -      sizeof(IcuCursor) +                /* IcuCursor */
980 -      ((nChar+3)&~3) * sizeof(UChar) +   /* IcuCursor.aChar[] */
981 -      (nChar+1) * sizeof(int)            /* IcuCursor.aOffset[] */
982 -  );
983 -  if( !pCsr ){
984 -    return SQLITE_NOMEM;
985 -  }
986 -  memset(pCsr, 0, sizeof(IcuCursor));
987 -  pCsr->aChar = (UChar *)&pCsr[1];
988 -  pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
989 -
990 -  pCsr->aOffset[iOut] = iInput;
991 -  U8_NEXT(zInput, iInput, nInput, c);
992 -  while( c>0 ){
993 -    int isError = 0;
994 -    c = u_foldCase(c, opt);
995 -    U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
996 -    if( isError ){
997 -      sqlite3_free(pCsr);
998 -      return SQLITE_ERROR;
999 -    }
1000 -    pCsr->aOffset[iOut] = iInput;
1001 -
1002 -    if( iInput<nInput ){
1003 -      U8_NEXT(zInput, iInput, nInput, c);
1004 -    }else{
1005 -      c = 0;
1006 -    }
1007 -  }
1008 -
1009 -  pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
1010 -  if( !U_SUCCESS(status) ){
1011 -    sqlite3_free(pCsr);
1012 -    return SQLITE_ERROR;
1013 -  }
1014 -  pCsr->nChar = iOut;
1015 -
1016 -  ubrk_first(pCsr->pIter);
1017 -  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
1018 -  return SQLITE_OK;
1019 -}
1020 -
1021 -/*
1022 -** Close a tokenization cursor previously opened by a call to icuOpen().
1023 -*/
1024 -static int icuClose(sqlite3_tokenizer_cursor *pCursor){
1025 -  IcuCursor *pCsr = (IcuCursor *)pCursor;
1026 -  ubrk_close(pCsr->pIter);
1027 -  sqlite3_free(pCsr->zBuffer);
1028 -  sqlite3_free(pCsr);
1029 -  return SQLITE_OK;
1030 -}
1031 -
1032 -/*
1033 -** Extract the next token from a tokenization cursor.
1034 -*/
1035 -static int icuNext(
1036 -  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */
1037 -  const char **ppToken,               /* OUT: *ppToken is the token text */
1038 -  int *pnBytes,                       /* OUT: Number of bytes in token */
1039 -  int *piStartOffset,                 /* OUT: Starting offset of token */
1040 -  int *piEndOffset,                   /* OUT: Ending offset of token */
1041 -  int *piPosition                     /* OUT: Position integer of token */
1042 -){
1043 -  IcuCursor *pCsr = (IcuCursor *)pCursor;
1044 -
1045 -  int iStart = 0;
1046 -  int iEnd = 0;
1047 -  int nByte = 0;
1048 -
1049 -  while( iStart==iEnd ){
1050 -    UChar32 c;
1051 -
1052 -    iStart = ubrk_current(pCsr->pIter);
1053 -    iEnd = ubrk_next(pCsr->pIter);
1054 -    if( iEnd==UBRK_DONE ){
1055 -      return SQLITE_DONE;
1056 -    }
1057 -
1058 -    while( iStart<iEnd ){
1059 -      int iWhite = iStart;
1060 -      U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
1061 -      if( u_isspace(c) ){
1062 -        iStart = iWhite;
1063 -      }else{
1064 -        break;
1065 -      }
1066 -    }
1067 -    assert(iStart<=iEnd);
1068 -  }
1069 -
1070 -  do {
1071 -    UErrorCode status = U_ZERO_ERROR;
1072 -    if( nByte ){
1073 -      char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
1074 -      if( !zNew ){
1075 -        return SQLITE_NOMEM;
1076 -      }
1077 -      pCsr->zBuffer = zNew;
1078 -      pCsr->nBuffer = nByte;
1079 -    }
1080 -
1081 -    u_strToUTF8(
1082 -        pCsr->zBuffer, pCsr->nBuffer, &nByte,    /* Output vars */
1083 -        &pCsr->aChar[iStart], iEnd-iStart,       /* Input vars */
1084 -        &status                                  /* Output success/failure */
1085 -    );
1086 -  } while( nByte>pCsr->nBuffer );
1087 -
1088 -  *ppToken = pCsr->zBuffer;
1089 -  *pnBytes = nByte;
1090 -  *piStartOffset = pCsr->aOffset[iStart];
1091 -  *piEndOffset = pCsr->aOffset[iEnd];
1092 -  *piPosition = pCsr->iToken++;
1093 -
1094 -  return SQLITE_OK;
1095 -}
1096 -
1097 -/*
1098 -** The set of routines that implement the simple tokenizer
1099 -*/
1100 -static const sqlite3_tokenizer_module icuTokenizerModule = {
1101 -  0,                           /* iVersion    */
1102 -  icuCreate,                   /* xCreate     */
1103 -  icuDestroy,                  /* xCreate     */
1104 -  icuOpen,                     /* xOpen       */
1105 -  icuClose,                    /* xClose      */
1106 -  icuNext,                     /* xNext       */
1107 -  0,                           /* xLanguageid */
1108 -};
1109 -
1110 -/*
1111 -** Set *ppModule to point at the implementation of the ICU tokenizer.
1112 -*/
1113 -SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(
1114 -  sqlite3_tokenizer_module const**ppModule
1115 -){
1116 -  *ppModule = &icuTokenizerModule;
1117 -}
1118 -
1119 -#endif /* defined(SQLITE_ENABLE_ICU) */
1120 -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
1121 -
1122 -/************** End of fts3_icu.c ********************************************/
1126 diff --git a/src/sqlite3icu.c b/src/sqlite3icu.c
1129 --- /dev/null
1131 @@ -0,0 +1,888 @@
1148 +** language. The code for the "sqlite3" command-line shell is also in a
1152 +** 2019.09.02-Complete codec logic for encryption and decryption.
1170 +** ("International Components for Unicode", an open-source library
1174 +**   * An implementation of the SQL regexp() function (and hence REGEXP
1183 +**     provide case-independent matching.
1187 +#include <string.h>
1214 +# define SQLITE_EXTENSION_INIT1     /*no-op*/
1216 +# define SQLITE_EXTENSION_INIT3     /*no-op*/
1258 +  sqlite3_result_error(pCtx, zBuf, -1);
1280 +** a multi-byte UTF8 character. It is copied here from SQLite source
1297 +    c = icuUtf8Trans1[c-0xc0];                             \
1311 +** Compare two UTF-8 strings for equality where the first string is
1317 +  const uint8_t *zString,    /* The UTF-8 string to compare against */
1318 +  const UChar32 uEsc         /* The escape character */
1334 +    **     1. uPattern is an unescaped match-all character "%",
1335 +    **     2. uPattern is an unescaped match-one character "_",
1336 +    **     3. uPattern is an unescaped escape character, or
1345 +      ** test string.
1392 +** the build-in LIKE operator.  The first argument to the function is the
1393 +** pattern and the second argument is the string.  So, the SQL statements:
1397 +** is implemented as like(B, A). If there is an escape character E,
1399 +**       A LIKE B ESCAPE E
1416 +    sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
1422 +    /* The escape character string must consist of a single UTF-8 character.
1432 +          "ESCAPE expression must be a single character", -1);
1443 +** Function to delete compiled regexp objects. Registered as
1452 +** Implementation of SQLite REGEXP operator. This scalar function takes
1454 +** the second is a string to match against that pattern. If either
1456 +** is 1 if the string matches the pattern, or 0 otherwise.
1458 +** SQLite maps the regexp() function to the regexp() operator such
1461 +**     zString REGEXP zPattern
1462 +**     regexp(zPattern, zString)
1464 +** Uses the following ICU regexp APIs:
1478 +  /* If the left hand side of the regexp operator is NULL,
1491 +    pExpr = uregex_open(zPattern, -1, 0, 0, &status);
1504 +  uregex_setText(pExpr, zString, -1, &status);
1529 +** Implementations of scalar functions for case mapping - upper() and
1530 +** lower(). Function upper() converts its input to upper-case (ABC).
1531 +** Function lower() converts to lower-case (abc).
1540 +**     upper('ABC') -> 'abc'
1541 +**     lower('abc') -> 'ABC'
1545 +** of the locale to use. Passing an empty string ("") or SQL NULL value
1549 +**     lower('I', 'en_us') -> 'i'
1550 +**     lower('I', 'tr_tr') -> '\u131' (small dotless i)
1552 +** http://www.icu-project.org/userguide/posix.html#case_mappings
1555 +  const UChar *zInput;            /* Pointer to input string */
1557 +  int nInput;                     /* Size of utf-16 input string in bytes */
1634 +    case UCOL_LESS:    return -1;
1649 +**     SELECT icu_load_collation(<locale>, <collation-name>);
1651 +** Where <locale> is a string containing an ICU locale identifier (i.e.
1652 +** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
1662 +  const char *zLocale;      /* Locale identifier - (eg. "jp_JP") */
1688 +    sqlite3_result_error(p, "Error registering collation function", -1);
1706 +    {"regexp", 2, SQLITE_ANY|SQLITEICU_EXTRAFLAGS,         0, icuRegexpFunc},
1729 +        db, p->zName, p->nArg, p->enc,
1730 +        p->iContext ? (void*)db : (void*)0,
1731 +        p->xFunc, 0, 0
1774 +/* #include <string.h> */
1793 +  UBreakIterator *pIter;      /* ICU break-iterator object */
1795 +  UChar *aChar;               /* Copy of input using utf-16 encoding */
1796 +  int *aOffset;               /* Offsets of each character in utf-8 input */
1825 +    p->zLocale = (char *)&p[1];
1826 +    memcpy(p->zLocale, argv[0], n);
1844 +** Prepare to begin tokenizing a particular string.  The input
1845 +** string to be tokenized is pInput[0..nBytes-1].  A cursor
1846 +** used to incrementally tokenize this string is returned in
1851 +  const char *zInput,                    /* Input string */
1884 +  pCsr->aChar = (UChar *)&pCsr[1];
1885 +  pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
1887 +  pCsr->aOffset[iOut] = iInput;
1892 +    U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
1897 +    pCsr->aOffset[iOut] = iInput;
1906 +  pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
1911 +  pCsr->nChar = iOut;
1913 +  ubrk_first(pCsr->pIter);
1923 +  ubrk_close(pCsr->pIter);
1924 +  sqlite3_free(pCsr->zBuffer);
1949 +    iStart = ubrk_current(pCsr->pIter);
1950 +    iEnd = ubrk_next(pCsr->pIter);
1957 +      U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
1970 +      char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
1974 +      pCsr->zBuffer = zNew;
1975 +      pCsr->nBuffer = nByte;
1979 +        pCsr->zBuffer, pCsr->nBuffer, &nByte,    /* Output vars */
1980 +        &pCsr->aChar[iStart], iEnd-iStart,       /* Input vars */
1983 +  } while( nByte>pCsr->nBuffer );
1985 +  *ppToken = pCsr->zBuffer;
1987 +  *piStartOffset = pCsr->aOffset[iStart];
1988 +  *piEndOffset = pCsr->aOffset[iEnd];
1989 +  *piPosition = pCsr->iToken++;
2021 --