Lines Matching +full:escape +full:- +full:string +full:- +full:regexp
6 ---
7 src/sqlite3.c | 1013 ++++------------------------------------------
9 2 files changed, 962 insertions(+), 939 deletions(-)
12 diff --git a/src/sqlite3.c b/src/sqlite3.c
14 --- a/src/sqlite3.c
16 @@ -2502,6 +2502,7 @@ struct sqlite3_mem_methods {
24 @@ -3289,6 +3290,7 @@ SQLITE_API void sqlite3_free_table(char **result);
30 ** CAPI3REF: Formatted String Printing Functions
32 @@ -178413,6 +178415,7 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db);
40 @@ -178475,13 +178478,54 @@ SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db);
44 -SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db);
96 @@ -178521,7 +178565,7 @@ static int (*const sqlite3BuiltinExtensions[])(sqlite3*) = {
100 - sqlite3IcuInit,
105 @@ -178913,6 +178957,19 @@ SQLITE_API int sqlite3_shutdown(void){
125 @@ -178930,7 +178987,6 @@ SQLITE_API int sqlite3_config(int op, ...){
129 - va_start(ap, op);
133 @@ -182053,6 +182109,12 @@ static int openDatabase(
144 /* Load compiled-in extensions */
146 @@ -184344,114 +184406,6 @@ SQLITE_EXTENSION_INIT3
150 -typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
151 -typedef struct sqlite3_tokenizer sqlite3_tokenizer;
152 -typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
153 -
154 -struct sqlite3_tokenizer_module {
155 -
156 - /*
157 - ** Structure version. Should always be set to 0 or 1.
158 - */
159 - int iVersion;
160 -
161 - /*
162 - ** Create a new tokenizer. The values in the argv[] array are the
163 - ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
164 - ** TABLE statement that created the fts3 table. For example, if
165 - ** the following SQL is executed:
166 - **
167 - ** CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
168 - **
169 - ** then argc is set to 2, and the argv[] array contains pointers
170 - ** to the strings "arg1" and "arg2".
171 - **
172 - ** This method should return either SQLITE_OK (0), or an SQLite error
173 - ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
174 - ** to point at the newly created tokenizer structure. The generic
175 - ** sqlite3_tokenizer.pModule variable should not be initialized by
176 - ** this callback. The caller will do so.
177 - */
178 - int (*xCreate)(
179 - int argc, /* Size of argv array */
180 - const char *const*argv, /* Tokenizer argument strings */
181 - sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
182 - );
183 -
184 - /*
185 - ** Destroy an existing tokenizer. The fts3 module calls this method
186 - ** exactly once for each successful call to xCreate().
187 - */
188 - int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
189 -
190 - /*
191 - ** Create a tokenizer cursor to tokenize an input buffer. The caller
192 - ** is responsible for ensuring that the input buffer remains valid
193 - ** until the cursor is closed (using the xClose() method).
194 - */
195 - int (*xOpen)(
196 - sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
197 - const char *pInput, int nBytes, /* Input buffer */
198 - sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
199 - );
200 -
201 - /*
202 - ** Destroy an existing tokenizer cursor. The fts3 module calls this
203 - ** method exactly once for each successful call to xOpen().
204 - */
205 - int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
206 -
207 - /*
208 - ** Retrieve the next token from the tokenizer cursor pCursor. This
209 - ** method should either return SQLITE_OK and set the values of the
210 - ** "OUT" variables identified below, or SQLITE_DONE to indicate that
211 - ** the end of the buffer has been reached, or an SQLite error code.
212 - **
213 - ** *ppToken should be set to point at a buffer containing the
214 - ** normalized version of the token (i.e. after any case-folding and/or
215 - ** stemming has been performed). *pnBytes should be set to the length
216 - ** of this buffer in bytes. The input text that generated the token is
217 - ** identified by the byte offsets returned in *piStartOffset and
218 - ** *piEndOffset. *piStartOffset should be set to the index of the first
219 - ** byte of the token in the input buffer. *piEndOffset should be set
220 - ** to the index of the first byte just past the end of the token in
221 - ** the input buffer.
222 - **
223 - ** The buffer *ppToken is set to point at is managed by the tokenizer
224 - ** implementation. It is only required to be valid until the next call
225 - ** to xNext() or xClose().
226 - */
227 - /* TODO(shess) current implementation requires pInput to be
228 - ** nul-terminated. This should either be fixed, or pInput/nBytes
229 - ** should be converted to zInput.
230 - */
231 - int (*xNext)(
232 - sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
233 - const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
234 - int *piStartOffset, /* OUT: Byte offset of token in input buffer */
235 - int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
236 - int *piPosition /* OUT: Number of tokens returned before this one */
237 - );
238 -
239 - /***********************************************************************
240 - ** Methods below this point are only available if iVersion>=1.
241 - */
242 -
243 - /*
244 - ** Configure the language id of a tokenizer cursor.
245 - */
246 - int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
247 -};
248 -
249 -struct sqlite3_tokenizer {
250 - const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
251 - /* Tokenizer implementations will typically add additional fields */
252 -};
253 -
254 -struct sqlite3_tokenizer_cursor {
255 - sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
256 - /* Tokenizer implementations will typically add additional fields */
257 -};
261 @@ -189003,9 +188957,6 @@ SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_mo…
265 -#ifdef SQLITE_ENABLE_ICU
266 -SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
267 -#endif
271 @@ -189024,7 +188975,14 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){
275 - sqlite3Fts3IcuTokenizerModule(&pIcu);
287 @@ -189060,7 +189018,7 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){
288 || sqlite3Fts3HashInsert(&pHash->hash, "unicode61", 10, (void *)pUnicode)
291 - || (pIcu && sqlite3Fts3HashInsert(&pHash->hash, "icu", 4, (void *)pIcu))
292 + || (icuEnable && pIcu && sqlite3Fts3HashInsert(&pHash->hash, "icu", 4, (void *)pIcu))
296 @@ -213799,829 +213757,6 @@ SQLITE_API int sqlite3_rtree_init(
300 -/************** Begin file icu.c *********************************************/
301 -/*
302 -** 2007 May 6
303 -**
304 -** The author disclaims copyright to this source code. In place of
305 -** a legal notice, here is a blessing:
306 -**
307 -** May you do good and not evil.
308 -** May you find forgiveness for yourself and forgive others.
309 -** May you share freely, never taking more than you give.
310 -**
311 -*************************************************************************
312 -** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $
313 -**
314 -** This file implements an integration between the ICU library
315 -** ("International Components for Unicode", an open-source library
316 -** for handling unicode data) and SQLite. The integration uses
317 -** ICU to provide the following to SQLite:
318 -**
319 -** * An implementation of the SQL regexp() function (and hence REGEXP
320 -** operator) using the ICU uregex_XX() APIs.
321 -**
322 -** * Implementations of the SQL scalar upper() and lower() functions
323 -** for case mapping.
324 -**
325 -** * Integration of ICU and SQLite collation sequences.
326 -**
327 -** * An implementation of the LIKE operator that uses ICU to
328 -** provide case-independent matching.
329 -*/
330 -
331 -#if !defined(SQLITE_CORE) \
332 - || defined(SQLITE_ENABLE_ICU) \
333 - || defined(SQLITE_ENABLE_ICU_COLLATIONS)
334 -
335 -/* Include ICU headers */
336 -#include <unicode/utypes.h>
337 -#include <unicode/uregex.h>
338 -#include <unicode/ustring.h>
339 -#include <unicode/ucol.h>
340 -
341 -/* #include <assert.h> */
342 -
343 -#ifndef SQLITE_CORE
344 -/* #include "sqlite3ext.h" */
345 - SQLITE_EXTENSION_INIT1
346 -#else
347 -/* #include "sqlite3.h" */
348 -#endif
349 -
350 -/*
351 -** This function is called when an ICU function called from within
352 -** the implementation of an SQL scalar function returns an error.
353 -**
354 -** The scalar function context passed as the first argument is
355 -** loaded with an error message based on the following two args.
356 -*/
357 -static void icuFunctionError(
358 - sqlite3_context *pCtx, /* SQLite scalar function context */
359 - const char *zName, /* Name of ICU function that failed */
360 - UErrorCode e /* Error code returned by ICU function */
361 -){
362 - char zBuf[128];
363 - sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
364 - zBuf[127] = '\0';
365 - sqlite3_result_error(pCtx, zBuf, -1);
366 -}
367 -
368 -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
369 -
370 -/*
371 -** Maximum length (in bytes) of the pattern in a LIKE or GLOB
372 -** operator.
373 -*/
374 -#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
375 -# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
376 -#endif
377 -
378 -/*
379 -** Version of sqlite3_free() that is always a function, never a macro.
380 -*/
381 -static void xFree(void *p){
382 - sqlite3_free(p);
383 -}
384 -
385 -/*
386 -** This lookup table is used to help decode the first byte of
387 -** a multi-byte UTF8 character. It is copied here from SQLite source
388 -** code file utf8.c.
389 -*/
390 -static const unsigned char icuUtf8Trans1[] = {
391 - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
392 - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
393 - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
394 - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
395 - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
396 - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
397 - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
398 - 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
399 -};
400 -
401 -#define SQLITE_ICU_READ_UTF8(zIn, c) \
402 - c = *(zIn++); \
403 - if( c>=0xc0 ){ \
404 - c = icuUtf8Trans1[c-0xc0]; \
405 - while( (*zIn & 0xc0)==0x80 ){ \
406 - c = (c<<6) + (0x3f & *(zIn++)); \
407 - } \
408 - }
409 -
410 -#define SQLITE_ICU_SKIP_UTF8(zIn) \
411 - assert( *zIn ); \
412 - if( *(zIn++)>=0xc0 ){ \
413 - while( (*zIn & 0xc0)==0x80 ){zIn++;} \
414 - }
415 -
416 -
417 -/*
418 -** Compare two UTF-8 strings for equality where the first string is
419 -** a "LIKE" expression. Return true (1) if they are the same and
420 -** false (0) if they are different.
421 -*/
422 -static int icuLikeCompare(
423 - const uint8_t *zPattern, /* LIKE pattern */
424 - const uint8_t *zString, /* The UTF-8 string to compare against */
425 - const UChar32 uEsc /* The escape character */
426 -){
427 - static const uint32_t MATCH_ONE = (uint32_t)'_';
428 - static const uint32_t MATCH_ALL = (uint32_t)'%';
429 -
430 - int prevEscape = 0; /* True if the previous character was uEsc */
431 -
432 - while( 1 ){
433 -
434 - /* Read (and consume) the next character from the input pattern. */
435 - uint32_t uPattern;
436 - SQLITE_ICU_READ_UTF8(zPattern, uPattern);
437 - if( uPattern==0 ) break;
438 -
439 - /* There are now 4 possibilities:
440 - **
441 - ** 1. uPattern is an unescaped match-all character "%",
442 - ** 2. uPattern is an unescaped match-one character "_",
443 - ** 3. uPattern is an unescaped escape character, or
444 - ** 4. uPattern is to be handled as an ordinary character
445 - */
446 - if( uPattern==MATCH_ALL && !prevEscape && uPattern!=(uint32_t)uEsc ){
447 - /* Case 1. */
448 - uint8_t c;
449 -
450 - /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
451 - ** MATCH_ALL. For each MATCH_ONE, skip one character in the
452 - ** test string.
453 - */
454 - while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){
455 - if( c==MATCH_ONE ){
456 - if( *zString==0 ) return 0;
457 - SQLITE_ICU_SKIP_UTF8(zString);
458 - }
459 - zPattern++;
460 - }
461 -
462 - if( *zPattern==0 ) return 1;
463 -
464 - while( *zString ){
465 - if( icuLikeCompare(zPattern, zString, uEsc) ){
466 - return 1;
467 - }
468 - SQLITE_ICU_SKIP_UTF8(zString);
469 - }
470 - return 0;
471 -
472 - }else if( uPattern==MATCH_ONE && !prevEscape && uPattern!=(uint32_t)uEsc ){
473 - /* Case 2. */
474 - if( *zString==0 ) return 0;
475 - SQLITE_ICU_SKIP_UTF8(zString);
476 -
477 - }else if( uPattern==(uint32_t)uEsc && !prevEscape ){
478 - /* Case 3. */
479 - prevEscape = 1;
480 -
481 - }else{
482 - /* Case 4. */
483 - uint32_t uString;
484 - SQLITE_ICU_READ_UTF8(zString, uString);
485 - uString = (uint32_t)u_foldCase((UChar32)uString, U_FOLD_CASE_DEFAULT);
486 - uPattern = (uint32_t)u_foldCase((UChar32)uPattern, U_FOLD_CASE_DEFAULT);
487 - if( uString!=uPattern ){
488 - return 0;
489 - }
490 - prevEscape = 0;
491 - }
492 - }
493 -
494 - return *zString==0;
495 -}
496 -
497 -/*
498 -** Implementation of the like() SQL function. This function implements
499 -** the build-in LIKE operator. The first argument to the function is the
500 -** pattern and the second argument is the string. So, the SQL statements:
501 -**
502 -** A LIKE B
503 -**
504 -** is implemented as like(B, A). If there is an escape character E,
505 -**
506 -** A LIKE B ESCAPE E
507 -**
508 -** is mapped to like(B, A, E).
509 -*/
510 -static void icuLikeFunc(
511 - sqlite3_context *context,
512 - int argc,
513 - sqlite3_value **argv
514 -){
515 - const unsigned char *zA = sqlite3_value_text(argv[0]);
516 - const unsigned char *zB = sqlite3_value_text(argv[1]);
517 - UChar32 uEsc = 0;
518 -
519 - /* Limit the length of the LIKE or GLOB pattern to avoid problems
520 - ** of deep recursion and N*N behavior in patternCompare().
521 - */
522 - if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){
523 - sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
524 - return;
525 - }
526 -
527 -
528 - if( argc==3 ){
529 - /* The escape character string must consist of a single UTF-8 character.
530 - ** Otherwise, return an error.
531 - */
532 - int nE= sqlite3_value_bytes(argv[2]);
533 - const unsigned char *zE = sqlite3_value_text(argv[2]);
534 - int i = 0;
535 - if( zE==0 ) return;
536 - U8_NEXT(zE, i, nE, uEsc);
537 - if( i!=nE){
538 - sqlite3_result_error(context,
539 - "ESCAPE expression must be a single character", -1);
540 - return;
541 - }
542 - }
543 -
544 - if( zA && zB ){
545 - sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc));
546 - }
547 -}
548 -
549 -/*
550 -** Function to delete compiled regexp objects. Registered as
551 -** a destructor function with sqlite3_set_auxdata().
552 -*/
553 -static void icuRegexpDelete(void *p){
554 - URegularExpression *pExpr = (URegularExpression *)p;
555 - uregex_close(pExpr);
556 -}
557 -
558 -/*
559 -** Implementation of SQLite REGEXP operator. This scalar function takes
560 -** two arguments. The first is a regular expression pattern to compile
561 -** the second is a string to match against that pattern. If either
562 -** argument is an SQL NULL, then NULL Is returned. Otherwise, the result
563 -** is 1 if the string matches the pattern, or 0 otherwise.
564 -**
565 -** SQLite maps the regexp() function to the regexp() operator such
566 -** that the following two are equivalent:
567 -**
568 -** zString REGEXP zPattern
569 -** regexp(zPattern, zString)
570 -**
571 -** Uses the following ICU regexp APIs:
572 -**
573 -** uregex_open()
574 -** uregex_matches()
575 -** uregex_close()
576 -*/
577 -static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
578 - UErrorCode status = U_ZERO_ERROR;
579 - URegularExpression *pExpr;
580 - UBool res;
581 - const UChar *zString = sqlite3_value_text16(apArg[1]);
582 -
583 - (void)nArg; /* Unused parameter */
584 -
585 - /* If the left hand side of the regexp operator is NULL,
586 - ** then the result is also NULL.
587 - */
588 - if( !zString ){
589 - return;
590 - }
591 -
592 - pExpr = sqlite3_get_auxdata(p, 0);
593 - if( !pExpr ){
594 - const UChar *zPattern = sqlite3_value_text16(apArg[0]);
595 - if( !zPattern ){
596 - return;
597 - }
598 - pExpr = uregex_open(zPattern, -1, 0, 0, &status);
599 -
600 - if( U_SUCCESS(status) ){
601 - sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
602 - pExpr = sqlite3_get_auxdata(p, 0);
603 - }
604 - if( !pExpr ){
605 - icuFunctionError(p, "uregex_open", status);
606 - return;
607 - }
608 - }
609 -
610 - /* Configure the text that the regular expression operates on. */
611 - uregex_setText(pExpr, zString, -1, &status);
612 - if( !U_SUCCESS(status) ){
613 - icuFunctionError(p, "uregex_setText", status);
614 - return;
615 - }
616 -
617 - /* Attempt the match */
618 - res = uregex_matches(pExpr, 0, &status);
619 - if( !U_SUCCESS(status) ){
620 - icuFunctionError(p, "uregex_matches", status);
621 - return;
622 - }
623 -
624 - /* Set the text that the regular expression operates on to a NULL
625 - ** pointer. This is not really necessary, but it is tidier than
626 - ** leaving the regular expression object configured with an invalid
627 - ** pointer after this function returns.
628 - */
629 - uregex_setText(pExpr, 0, 0, &status);
630 -
631 - /* Return 1 or 0. */
632 - sqlite3_result_int(p, res ? 1 : 0);
633 -}
634 -
635 -/*
636 -** Implementations of scalar functions for case mapping - upper() and
637 -** lower(). Function upper() converts its input to upper-case (ABC).
638 -** Function lower() converts to lower-case (abc).
639 -**
640 -** ICU provides two types of case mapping, "general" case mapping and
641 -** "language specific". Refer to ICU documentation for the differences
642 -** between the two.
643 -**
644 -** To utilise "general" case mapping, the upper() or lower() scalar
645 -** functions are invoked with one argument:
646 -**
647 -** upper('ABC') -> 'abc'
648 -** lower('abc') -> 'ABC'
649 -**
650 -** To access ICU "language specific" case mapping, upper() or lower()
651 -** should be invoked with two arguments. The second argument is the name
652 -** of the locale to use. Passing an empty string ("") or SQL NULL value
653 -** as the second argument is the same as invoking the 1 argument version
654 -** of upper() or lower().
655 -**
656 -** lower('I', 'en_us') -> 'i'
657 -** lower('I', 'tr_tr') -> '\u131' (small dotless i)
658 -**
659 -** http://www.icu-project.org/userguide/posix.html#case_mappings
660 -*/
661 -static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
662 - const UChar *zInput; /* Pointer to input string */
663 - UChar *zOutput = 0; /* Pointer to output buffer */
664 - int nInput; /* Size of utf-16 input string in bytes */
665 - int nOut; /* Size of output buffer in bytes */
666 - int cnt;
667 - int bToUpper; /* True for toupper(), false for tolower() */
668 - UErrorCode status;
669 - const char *zLocale = 0;
670 -
671 - assert(nArg==1 || nArg==2);
672 - bToUpper = (sqlite3_user_data(p)!=0);
673 - if( nArg==2 ){
674 - zLocale = (const char *)sqlite3_value_text(apArg[1]);
675 - }
676 -
677 - zInput = sqlite3_value_text16(apArg[0]);
678 - if( !zInput ){
679 - return;
680 - }
681 - nOut = nInput = sqlite3_value_bytes16(apArg[0]);
682 - if( nOut==0 ){
683 - sqlite3_result_text16(p, "", 0, SQLITE_STATIC);
684 - return;
685 - }
686 -
687 - for(cnt=0; cnt<2; cnt++){
688 - UChar *zNew = sqlite3_realloc(zOutput, nOut);
689 - if( zNew==0 ){
690 - sqlite3_free(zOutput);
691 - sqlite3_result_error_nomem(p);
692 - return;
693 - }
694 - zOutput = zNew;
695 - status = U_ZERO_ERROR;
696 - if( bToUpper ){
697 - nOut = 2*u_strToUpper(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
698 - }else{
699 - nOut = 2*u_strToLower(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
700 - }
701 -
702 - if( U_SUCCESS(status) ){
703 - sqlite3_result_text16(p, zOutput, nOut, xFree);
704 - }else if( status==U_BUFFER_OVERFLOW_ERROR ){
705 - assert( cnt==0 );
706 - continue;
707 - }else{
708 - icuFunctionError(p, bToUpper ? "u_strToUpper" : "u_strToLower", status);
709 - }
710 - return;
711 - }
712 - assert( 0 ); /* Unreachable */
713 -}
714 -
715 -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
716 -
717 -/*
718 -** Collation sequence destructor function. The pCtx argument points to
719 -** a UCollator structure previously allocated using ucol_open().
720 -*/
721 -static void icuCollationDel(void *pCtx){
722 - UCollator *p = (UCollator *)pCtx;
723 - ucol_close(p);
724 -}
725 -
726 -/*
727 -** Collation sequence comparison function. The pCtx argument points to
728 -** a UCollator structure previously allocated using ucol_open().
729 -*/
730 -static int icuCollationColl(
731 - void *pCtx,
732 - int nLeft,
733 - const void *zLeft,
734 - int nRight,
735 - const void *zRight
736 -){
737 - UCollationResult res;
738 - UCollator *p = (UCollator *)pCtx;
739 - res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
740 - switch( res ){
741 - case UCOL_LESS: return -1;
742 - case UCOL_GREATER: return +1;
743 - case UCOL_EQUAL: return 0;
744 - }
745 - assert(!"Unexpected return value from ucol_strcoll()");
746 - return 0;
747 -}
748 -
749 -/*
750 -** Implementation of the scalar function icu_load_collation().
751 -**
752 -** This scalar function is used to add ICU collation based collation
753 -** types to an SQLite database connection. It is intended to be called
754 -** as follows:
755 -**
756 -** SELECT icu_load_collation(<locale>, <collation-name>);
757 -**
758 -** Where <locale> is a string containing an ICU locale identifier (i.e.
759 -** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
760 -** collation sequence to create.
761 -*/
762 -static void icuLoadCollation(
763 - sqlite3_context *p,
764 - int nArg,
765 - sqlite3_value **apArg
766 -){
767 - sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
768 - UErrorCode status = U_ZERO_ERROR;
769 - const char *zLocale; /* Locale identifier - (eg. "jp_JP") */
770 - const char *zName; /* SQL Collation sequence name (eg. "japanese") */
771 - UCollator *pUCollator; /* ICU library collation object */
772 - int rc; /* Return code from sqlite3_create_collation_x() */
773 -
774 - assert(nArg==2);
775 - (void)nArg; /* Unused parameter */
776 - zLocale = (const char *)sqlite3_value_text(apArg[0]);
777 - zName = (const char *)sqlite3_value_text(apArg[1]);
778 -
779 - if( !zLocale || !zName ){
780 - return;
781 - }
782 -
783 - pUCollator = ucol_open(zLocale, &status);
784 - if( !U_SUCCESS(status) ){
785 - icuFunctionError(p, "ucol_open", status);
786 - return;
787 - }
788 - assert(p);
789 -
790 - rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator,
791 - icuCollationColl, icuCollationDel
792 - );
793 - if( rc!=SQLITE_OK ){
794 - ucol_close(pUCollator);
795 - sqlite3_result_error(p, "Error registering collation function", -1);
796 - }
797 -}
798 -
799 -/*
800 -** Register the ICU extension functions with database db.
801 -*/
802 -SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){
803 -# define SQLITEICU_EXTRAFLAGS (SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS)
804 - static const struct IcuScalar {
805 - const char *zName; /* Function name */
806 - unsigned char nArg; /* Number of arguments */
807 - unsigned int enc; /* Optimal text encoding */
808 - unsigned char iContext; /* sqlite3_user_data() context */
809 - void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
810 - } scalars[] = {
811 - {"icu_load_collation",2,SQLITE_UTF8|SQLITE_DIRECTONLY,1, icuLoadCollation},
812 -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
813 - {"regexp", 2, SQLITE_ANY|SQLITEICU_EXTRAFLAGS, 0, icuRegexpFunc},
814 - {"lower", 1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16},
815 - {"lower", 2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16},
816 - {"upper", 1, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16},
817 - {"upper", 2, SQLITE_UTF16|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16},
818 - {"lower", 1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16},
819 - {"lower", 2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuCaseFunc16},
820 - {"upper", 1, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16},
821 - {"upper", 2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 1, icuCaseFunc16},
822 - {"like", 2, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuLikeFunc},
823 - {"like", 3, SQLITE_UTF8|SQLITEICU_EXTRAFLAGS, 0, icuLikeFunc},
824 -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
825 - };
826 - int rc = SQLITE_OK;
827 - int i;
828 -
829 - for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){
830 - const struct IcuScalar *p = &scalars[i];
831 - rc = sqlite3_create_function(
832 - db, p->zName, p->nArg, p->enc,
833 - p->iContext ? (void*)db : (void*)0,
834 - p->xFunc, 0, 0
835 - );
836 - }
837 -
838 - return rc;
839 -}
840 -
841 -#if !SQLITE_CORE
842 -#ifdef _WIN32
843 -__declspec(dllexport)
844 -#endif
845 -SQLITE_API int sqlite3_icu_init(
846 - sqlite3 *db,
847 - char **pzErrMsg,
848 - const sqlite3_api_routines *pApi
849 -){
850 - SQLITE_EXTENSION_INIT2(pApi)
851 - return sqlite3IcuInit(db);
852 -}
853 -#endif
854 -
855 -#endif
856 -
857 -/************** End of icu.c *************************************************/
858 -/************** Begin file fts3_icu.c ****************************************/
859 -/*
860 -** 2007 June 22
861 -**
862 -** The author disclaims copyright to this source code. In place of
863 -** a legal notice, here is a blessing:
864 -**
865 -** May you do good and not evil.
866 -** May you find forgiveness for yourself and forgive others.
867 -** May you share freely, never taking more than you give.
868 -**
869 -*************************************************************************
870 -** This file implements a tokenizer for fts3 based on the ICU library.
871 -*/
872 -/* #include "fts3Int.h" */
873 -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
874 -#ifdef SQLITE_ENABLE_ICU
875 -
876 -/* #include <assert.h> */
877 -/* #include <string.h> */
878 -/* #include "fts3_tokenizer.h" */
879 -
880 -#include <unicode/ubrk.h>
881 -/* #include <unicode/ucol.h> */
882 -/* #include <unicode/ustring.h> */
883 -#include <unicode/utf16.h>
884 -
885 -typedef struct IcuTokenizer IcuTokenizer;
886 -typedef struct IcuCursor IcuCursor;
887 -
888 -struct IcuTokenizer {
889 - sqlite3_tokenizer base;
890 - char *zLocale;
891 -};
892 -
893 -struct IcuCursor {
894 - sqlite3_tokenizer_cursor base;
895 -
896 - UBreakIterator *pIter; /* ICU break-iterator object */
897 - int nChar; /* Number of UChar elements in pInput */
898 - UChar *aChar; /* Copy of input using utf-16 encoding */
899 - int *aOffset; /* Offsets of each character in utf-8 input */
900 -
901 - int nBuffer;
902 - char *zBuffer;
903 -
904 - int iToken;
905 -};
906 -
907 -/*
908 -** Create a new tokenizer instance.
909 -*/
910 -static int icuCreate(
911 - int argc, /* Number of entries in argv[] */
912 - const char * const *argv, /* Tokenizer creation arguments */
913 - sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
914 -){
915 - IcuTokenizer *p;
916 - int n = 0;
917 -
918 - if( argc>0 ){
919 - n = strlen(argv[0])+1;
920 - }
921 - p = (IcuTokenizer *)sqlite3_malloc64(sizeof(IcuTokenizer)+n);
922 - if( !p ){
923 - return SQLITE_NOMEM;
924 - }
925 - memset(p, 0, sizeof(IcuTokenizer));
926 -
927 - if( n ){
928 - p->zLocale = (char *)&p[1];
929 - memcpy(p->zLocale, argv[0], n);
930 - }
931 -
932 - *ppTokenizer = (sqlite3_tokenizer *)p;
933 -
934 - return SQLITE_OK;
935 -}
936 -
937 -/*
938 -** Destroy a tokenizer
939 -*/
940 -static int icuDestroy(sqlite3_tokenizer *pTokenizer){
941 - IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
942 - sqlite3_free(p);
943 - return SQLITE_OK;
944 -}
945 -
946 -/*
947 -** Prepare to begin tokenizing a particular string. The input
948 -** string to be tokenized is pInput[0..nBytes-1]. A cursor
949 -** used to incrementally tokenize this string is returned in
950 -** *ppCursor.
951 -*/
952 -static int icuOpen(
953 - sqlite3_tokenizer *pTokenizer, /* The tokenizer */
954 - const char *zInput, /* Input string */
955 - int nInput, /* Length of zInput in bytes */
956 - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
957 -){
958 - IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
959 - IcuCursor *pCsr;
960 -
961 - const int32_t opt = U_FOLD_CASE_DEFAULT;
962 - UErrorCode status = U_ZERO_ERROR;
963 - int nChar;
964 -
965 - UChar32 c;
966 - int iInput = 0;
967 - int iOut = 0;
968 -
969 - *ppCursor = 0;
970 -
971 - if( zInput==0 ){
972 - nInput = 0;
973 - zInput = "";
974 - }else if( nInput<0 ){
975 - nInput = strlen(zInput);
976 - }
977 - nChar = nInput+1;
978 - pCsr = (IcuCursor *)sqlite3_malloc64(
979 - sizeof(IcuCursor) + /* IcuCursor */
980 - ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
981 - (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
982 - );
983 - if( !pCsr ){
984 - return SQLITE_NOMEM;
985 - }
986 - memset(pCsr, 0, sizeof(IcuCursor));
987 - pCsr->aChar = (UChar *)&pCsr[1];
988 - pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
989 -
990 - pCsr->aOffset[iOut] = iInput;
991 - U8_NEXT(zInput, iInput, nInput, c);
992 - while( c>0 ){
993 - int isError = 0;
994 - c = u_foldCase(c, opt);
995 - U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
996 - if( isError ){
997 - sqlite3_free(pCsr);
998 - return SQLITE_ERROR;
999 - }
1000 - pCsr->aOffset[iOut] = iInput;
1001 -
1002 - if( iInput<nInput ){
1003 - U8_NEXT(zInput, iInput, nInput, c);
1004 - }else{
1005 - c = 0;
1006 - }
1007 - }
1008 -
1009 - pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
1010 - if( !U_SUCCESS(status) ){
1011 - sqlite3_free(pCsr);
1012 - return SQLITE_ERROR;
1013 - }
1014 - pCsr->nChar = iOut;
1015 -
1016 - ubrk_first(pCsr->pIter);
1017 - *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
1018 - return SQLITE_OK;
1019 -}
1020 -
1021 -/*
1022 -** Close a tokenization cursor previously opened by a call to icuOpen().
1023 -*/
1024 -static int icuClose(sqlite3_tokenizer_cursor *pCursor){
1025 - IcuCursor *pCsr = (IcuCursor *)pCursor;
1026 - ubrk_close(pCsr->pIter);
1027 - sqlite3_free(pCsr->zBuffer);
1028 - sqlite3_free(pCsr);
1029 - return SQLITE_OK;
1030 -}
1031 -
1032 -/*
1033 -** Extract the next token from a tokenization cursor.
1034 -*/
1035 -static int icuNext(
1036 - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
1037 - const char **ppToken, /* OUT: *ppToken is the token text */
1038 - int *pnBytes, /* OUT: Number of bytes in token */
1039 - int *piStartOffset, /* OUT: Starting offset of token */
1040 - int *piEndOffset, /* OUT: Ending offset of token */
1041 - int *piPosition /* OUT: Position integer of token */
1042 -){
1043 - IcuCursor *pCsr = (IcuCursor *)pCursor;
1044 -
1045 - int iStart = 0;
1046 - int iEnd = 0;
1047 - int nByte = 0;
1048 -
1049 - while( iStart==iEnd ){
1050 - UChar32 c;
1051 -
1052 - iStart = ubrk_current(pCsr->pIter);
1053 - iEnd = ubrk_next(pCsr->pIter);
1054 - if( iEnd==UBRK_DONE ){
1055 - return SQLITE_DONE;
1056 - }
1057 -
1058 - while( iStart<iEnd ){
1059 - int iWhite = iStart;
1060 - U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
1061 - if( u_isspace(c) ){
1062 - iStart = iWhite;
1063 - }else{
1064 - break;
1065 - }
1066 - }
1067 - assert(iStart<=iEnd);
1068 - }
1069 -
1070 - do {
1071 - UErrorCode status = U_ZERO_ERROR;
1072 - if( nByte ){
1073 - char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
1074 - if( !zNew ){
1075 - return SQLITE_NOMEM;
1076 - }
1077 - pCsr->zBuffer = zNew;
1078 - pCsr->nBuffer = nByte;
1079 - }
1080 -
1081 - u_strToUTF8(
1082 - pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
1083 - &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
1084 - &status /* Output success/failure */
1085 - );
1086 - } while( nByte>pCsr->nBuffer );
1087 -
1088 - *ppToken = pCsr->zBuffer;
1089 - *pnBytes = nByte;
1090 - *piStartOffset = pCsr->aOffset[iStart];
1091 - *piEndOffset = pCsr->aOffset[iEnd];
1092 - *piPosition = pCsr->iToken++;
1093 -
1094 - return SQLITE_OK;
1095 -}
1096 -
1097 -/*
1098 -** The set of routines that implement the simple tokenizer
1099 -*/
1100 -static const sqlite3_tokenizer_module icuTokenizerModule = {
1101 - 0, /* iVersion */
1102 - icuCreate, /* xCreate */
1103 - icuDestroy, /* xCreate */
1104 - icuOpen, /* xOpen */
1105 - icuClose, /* xClose */
1106 - icuNext, /* xNext */
1107 - 0, /* xLanguageid */
1108 -};
1109 -
1110 -/*
1111 -** Set *ppModule to point at the implementation of the ICU tokenizer.
1112 -*/
1113 -SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(
1114 - sqlite3_tokenizer_module const**ppModule
1115 -){
1116 - *ppModule = &icuTokenizerModule;
1117 -}
1118 -
1119 -#endif /* defined(SQLITE_ENABLE_ICU) */
1120 -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
1121 -
1122 -/************** End of fts3_icu.c ********************************************/
1126 diff --git a/src/sqlite3icu.c b/src/sqlite3icu.c
1129 --- /dev/null
1131 @@ -0,0 +1,888 @@
1148 +** language. The code for the "sqlite3" command-line shell is also in a
1152 +** 2019.09.02-Complete codec logic for encryption and decryption.
1170 +** ("International Components for Unicode", an open-source library
1174 +** * An implementation of the SQL regexp() function (and hence REGEXP
1183 +** provide case-independent matching.
1187 +#include <string.h>
1214 +# define SQLITE_EXTENSION_INIT1 /*no-op*/
1216 +# define SQLITE_EXTENSION_INIT3 /*no-op*/
1258 + sqlite3_result_error(pCtx, zBuf, -1);
1280 +** a multi-byte UTF8 character. It is copied here from SQLite source
1297 + c = icuUtf8Trans1[c-0xc0]; \
1311 +** Compare two UTF-8 strings for equality where the first string is
1317 + const uint8_t *zString, /* The UTF-8 string to compare against */
1318 + const UChar32 uEsc /* The escape character */
1334 + ** 1. uPattern is an unescaped match-all character "%",
1335 + ** 2. uPattern is an unescaped match-one character "_",
1336 + ** 3. uPattern is an unescaped escape character, or
1345 + ** test string.
1392 +** the build-in LIKE operator. The first argument to the function is the
1393 +** pattern and the second argument is the string. So, the SQL statements:
1397 +** is implemented as like(B, A). If there is an escape character E,
1399 +** A LIKE B ESCAPE E
1416 + sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
1422 + /* The escape character string must consist of a single UTF-8 character.
1432 + "ESCAPE expression must be a single character", -1);
1443 +** Function to delete compiled regexp objects. Registered as
1452 +** Implementation of SQLite REGEXP operator. This scalar function takes
1454 +** the second is a string to match against that pattern. If either
1456 +** is 1 if the string matches the pattern, or 0 otherwise.
1458 +** SQLite maps the regexp() function to the regexp() operator such
1461 +** zString REGEXP zPattern
1462 +** regexp(zPattern, zString)
1464 +** Uses the following ICU regexp APIs:
1478 + /* If the left hand side of the regexp operator is NULL,
1491 + pExpr = uregex_open(zPattern, -1, 0, 0, &status);
1504 + uregex_setText(pExpr, zString, -1, &status);
1529 +** Implementations of scalar functions for case mapping - upper() and
1530 +** lower(). Function upper() converts its input to upper-case (ABC).
1531 +** Function lower() converts to lower-case (abc).
1540 +** upper('ABC') -> 'abc'
1541 +** lower('abc') -> 'ABC'
1545 +** of the locale to use. Passing an empty string ("") or SQL NULL value
1549 +** lower('I', 'en_us') -> 'i'
1550 +** lower('I', 'tr_tr') -> '\u131' (small dotless i)
1552 +** http://www.icu-project.org/userguide/posix.html#case_mappings
1555 + const UChar *zInput; /* Pointer to input string */
1557 + int nInput; /* Size of utf-16 input string in bytes */
1634 + case UCOL_LESS: return -1;
1649 +** SELECT icu_load_collation(<locale>, <collation-name>);
1651 +** Where <locale> is a string containing an ICU locale identifier (i.e.
1652 +** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
1662 + const char *zLocale; /* Locale identifier - (eg. "jp_JP") */
1688 + sqlite3_result_error(p, "Error registering collation function", -1);
1706 + {"regexp", 2, SQLITE_ANY|SQLITEICU_EXTRAFLAGS, 0, icuRegexpFunc},
1729 + db, p->zName, p->nArg, p->enc,
1730 + p->iContext ? (void*)db : (void*)0,
1731 + p->xFunc, 0, 0
1774 +/* #include <string.h> */
1793 + UBreakIterator *pIter; /* ICU break-iterator object */
1795 + UChar *aChar; /* Copy of input using utf-16 encoding */
1796 + int *aOffset; /* Offsets of each character in utf-8 input */
1825 + p->zLocale = (char *)&p[1];
1826 + memcpy(p->zLocale, argv[0], n);
1844 +** Prepare to begin tokenizing a particular string. The input
1845 +** string to be tokenized is pInput[0..nBytes-1]. A cursor
1846 +** used to incrementally tokenize this string is returned in
1851 + const char *zInput, /* Input string */
1884 + pCsr->aChar = (UChar *)&pCsr[1];
1885 + pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
1887 + pCsr->aOffset[iOut] = iInput;
1892 + U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
1897 + pCsr->aOffset[iOut] = iInput;
1906 + pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
1911 + pCsr->nChar = iOut;
1913 + ubrk_first(pCsr->pIter);
1923 + ubrk_close(pCsr->pIter);
1924 + sqlite3_free(pCsr->zBuffer);
1949 + iStart = ubrk_current(pCsr->pIter);
1950 + iEnd = ubrk_next(pCsr->pIter);
1957 + U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
1970 + char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
1974 + pCsr->zBuffer = zNew;
1975 + pCsr->nBuffer = nByte;
1979 + pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
1980 + &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
1983 + } while( nByte>pCsr->nBuffer );
1985 + *ppToken = pCsr->zBuffer;
1987 + *piStartOffset = pCsr->aOffset[iStart];
1988 + *piEndOffset = pCsr->aOffset[iEnd];
1989 + *piPosition = pCsr->iToken++;
2021 --