1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 % %
4 % %
5 % %
6 % TTTTT OOO K K EEEEE N N %
7 % T O O K K E NN N %
8 % T O O KKK EEE N N N %
9 % T O O K K E N NN %
10 % T OOO K K EEEEE N N %
11 % %
12 % %
13 % MagickCore Token Methods %
14 % %
15 % Software Design %
16 % Cristy %
17 % January 1993 %
18 % %
19 % %
20 % Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization %
21 % dedicated to making software imaging solutions freely available. %
22 % %
23 % You may not use this file except in compliance with the License. You may %
24 % obtain a copy of the License at %
25 % %
26 % http://www.imagemagick.org/script/license.php %
27 % %
28 % Unless required by applicable law or agreed to in writing, software %
29 % distributed under the License is distributed on an "AS IS" BASIS, %
30 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31 % See the License for the specific language governing permissions and %
32 % limitations under the License. %
33 % %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35 %
36 %
37 %
38 */
39
40 /*
41 Include declarations.
42 */
43 #include "MagickCore/studio.h"
44 #include "MagickCore/exception.h"
45 #include "MagickCore/exception-private.h"
46 #include "MagickCore/image.h"
47 #include "MagickCore/memory_.h"
48 #include "MagickCore/string_.h"
49 #include "MagickCore/string-private.h"
50 #include "MagickCore/token.h"
51 #include "MagickCore/token-private.h"
52 #include "MagickCore/utility.h"
53 #include "MagickCore/utility-private.h"
54
55 /*
56 Typedef declaractions.
57 */
58 struct _TokenInfo
59 {
60 int
61 state;
62
63 MagickStatusType
64 flag;
65
66 ssize_t
67 offset;
68
69 char
70 quote;
71
72 size_t
73 signature;
74 };
75
76 /*
77 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
78 % %
79 % %
80 % %
81 % A c q u i r e T o k e n I n f o %
82 % %
83 % %
84 % %
85 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
86 %
87 % AcquireTokenInfo() allocates the TokenInfo structure.
88 %
89 % The format of the AcquireTokenInfo method is:
90 %
91 % TokenInfo *AcquireTokenInfo()
92 %
93 */
AcquireTokenInfo(void)94 MagickExport TokenInfo *AcquireTokenInfo(void)
95 {
96 TokenInfo
97 *token_info;
98
99 token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
100 if (token_info == (TokenInfo *) NULL)
101 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
102 token_info->signature=MagickCoreSignature;
103 return(token_info);
104 }
105
106 /*
107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
108 % %
109 % %
110 % %
111 % D e s t r o y T o k e n I n f o %
112 % %
113 % %
114 % %
115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116 %
117 % DestroyTokenInfo() deallocates memory associated with an TokenInfo
118 % structure.
119 %
120 % The format of the DestroyTokenInfo method is:
121 %
122 % TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
123 %
124 % A description of each parameter follows:
125 %
126 % o token_info: Specifies a pointer to an TokenInfo structure.
127 %
128 */
DestroyTokenInfo(TokenInfo * token_info)129 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
130 {
131 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
132 assert(token_info != (TokenInfo *) NULL);
133 assert(token_info->signature == MagickCoreSignature);
134 token_info->signature=(~MagickCoreSignature);
135 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
136 return(token_info);
137 }
138
139 /*
140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
141 % %
142 % %
143 % %
144 + G e t N e x t T o k e n %
145 % %
146 % %
147 % %
148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
149 %
150 % GetNextToken() gets a token from the token stream. A token is defined as
151 % a sequence of characters delimited by whitespace (e.g. clip-path), a
152 % sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
153 % parenthesis (e.g. rgb(0,0,0)). GetNextToken() also recognizes these
154 % separator characters: ':', '=', ',', and ';'.
155 %
156 % The format of the GetNextToken method is:
157 %
158 % void GetNextToken(const char *start,const char **end,
159 % const size_t extent,char *token)
160 %
161 % A description of each parameter follows:
162 %
163 % o start: the start of the token sequence.
164 %
165 % o end: point to the end of the token sequence.
166 %
167 % o extent: maximum extent of the token.
168 %
169 % o token: copy the token to this buffer.
170 %
171 */
GetNextToken(const char * start,const char ** end,const size_t extent,char * token)172 MagickExport void GetNextToken(const char *start,const char **end,
173 const size_t extent,char *token)
174 {
175 double
176 value;
177
178 register const char
179 *p;
180
181 register ssize_t
182 i;
183
184 assert(start != (const char *) NULL);
185 assert(token != (char *) NULL);
186 i=0;
187 p=start;
188 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
189 p++;
190 switch (*p)
191 {
192 case '\0':
193 break;
194 case '"':
195 case '\'':
196 case '`':
197 case '{':
198 {
199 register char
200 escape;
201
202 switch (*p)
203 {
204 case '"': escape='"'; break;
205 case '\'': escape='\''; break;
206 case '`': escape='\''; break;
207 case '{': escape='}'; break;
208 default: escape=(*p); break;
209 }
210 for (p++; *p != '\0'; p++)
211 {
212 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
213 p++;
214 else
215 if (*p == escape)
216 {
217 p++;
218 break;
219 }
220 if (i < (ssize_t) (extent-1))
221 token[i++]=(*p);
222 }
223 break;
224 }
225 case '/':
226 {
227 if (i < (ssize_t) (extent-1))
228 token[i++]=(*p++);
229 if ((*p == '>') || (*p == '/'))
230 if (i < (ssize_t) (extent-1))
231 token[i++]=(*p++);
232 break;
233 }
234 default:
235 {
236 char
237 *q;
238
239 value=StringToDouble(p,&q);
240 (void) value;
241 if ((p != q) && (*p != ','))
242 {
243 for ( ; (p < q) && (*p != ','); p++)
244 if (i < (ssize_t) (extent-1))
245 token[i++]=(*p);
246 if (*p == '%')
247 if (i < (ssize_t) (extent-1))
248 token[i++]=(*p++);
249 break;
250 }
251 if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
252 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
253 {
254 if (i < (ssize_t) (extent-1))
255 token[i++]=(*p++);
256 break;
257 }
258 for ( ; *p != '\0'; p++)
259 {
260 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
261 (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
262 break;
263 if ((i > 0) && (*p == '<'))
264 break;
265 if (i < (ssize_t) (extent-1))
266 token[i++]=(*p);
267 if (*p == '>')
268 break;
269 if (*p == '(')
270 for (p++; *p != '\0'; p++)
271 {
272 if (i < (ssize_t) (extent-1))
273 token[i++]=(*p);
274 if ((*p == ')') && (*(p-1) != '\\'))
275 break;
276 }
277 }
278 break;
279 }
280 }
281 token[i]='\0';
282 if (LocaleNCompare(token,"url(",4) == 0)
283 {
284 ssize_t
285 offset;
286
287 offset=4;
288 if (token[offset] == '#')
289 offset++;
290 i=(ssize_t) strlen(token);
291 (void) CopyMagickString(token,token+offset,MagickPathExtent);
292 token[i-offset-1]='\0';
293 }
294 while (isspace((int) ((unsigned char) *p)) != 0)
295 p++;
296 if (end != (const char **) NULL)
297 *end=(const char *) p;
298 }
299
300 /*
301 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
302 % %
303 % %
304 % %
305 % G l o b E x p r e s s i o n %
306 % %
307 % %
308 % %
309 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
310 %
311 % GlobExpression() returns MagickTrue if the expression matches the pattern.
312 %
313 % The format of the GlobExpression function is:
314 %
315 % MagickBooleanType GlobExpression(const char *expression,
316 % const char *pattern,const MagickBooleanType case_insensitive)
317 %
318 % A description of each parameter follows:
319 %
320 % o expression: Specifies a pointer to a text string containing a file name.
321 %
322 % o pattern: Specifies a pointer to a text string containing a pattern.
323 %
324 % o case_insensitive: set to MagickTrue to ignore the case when matching
325 % an expression.
326 %
327 */
GlobExpression(const char * expression,const char * pattern,const MagickBooleanType case_insensitive)328 MagickExport MagickBooleanType GlobExpression(const char *expression,
329 const char *pattern,const MagickBooleanType case_insensitive)
330 {
331 MagickBooleanType
332 done,
333 match;
334
335 register const char
336 *p;
337
338 /*
339 Return on empty pattern or '*'.
340 */
341 if (pattern == (char *) NULL)
342 return(MagickTrue);
343 if (GetUTFCode(pattern) == 0)
344 return(MagickTrue);
345 if (LocaleCompare(pattern,"*") == 0)
346 return(MagickTrue);
347 p=pattern+strlen(pattern)-1;
348 if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
349 {
350 ExceptionInfo
351 *exception;
352
353 ImageInfo
354 *image_info;
355
356 /*
357 Determine if pattern is a scene, i.e. img0001.pcd[2].
358 */
359 image_info=AcquireImageInfo();
360 (void) CopyMagickString(image_info->filename,pattern,MagickPathExtent);
361 exception=AcquireExceptionInfo();
362 (void) SetImageInfo(image_info,0,exception);
363 exception=DestroyExceptionInfo(exception);
364 if (LocaleCompare(image_info->filename,pattern) != 0)
365 {
366 image_info=DestroyImageInfo(image_info);
367 return(MagickFalse);
368 }
369 image_info=DestroyImageInfo(image_info);
370 }
371 /*
372 Evaluate glob expression.
373 */
374 done=MagickFalse;
375 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
376 {
377 if (GetUTFCode(expression) == 0)
378 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
379 break;
380 switch (GetUTFCode(pattern))
381 {
382 case '*':
383 {
384 MagickBooleanType
385 status;
386
387 status=MagickFalse;
388 pattern+=GetUTFOctets(pattern);
389 while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
390 {
391 status=GlobExpression(expression,pattern,case_insensitive);
392 expression+=GetUTFOctets(expression);
393 }
394 if (status != MagickFalse)
395 {
396 while (GetUTFCode(expression) != 0)
397 expression+=GetUTFOctets(expression);
398 while (GetUTFCode(pattern) != 0)
399 pattern+=GetUTFOctets(pattern);
400 }
401 break;
402 }
403 case '[':
404 {
405 int
406 c;
407
408 pattern+=GetUTFOctets(pattern);
409 for ( ; ; )
410 {
411 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
412 {
413 done=MagickTrue;
414 break;
415 }
416 if (GetUTFCode(pattern) == '\\')
417 {
418 pattern+=GetUTFOctets(pattern);
419 if (GetUTFCode(pattern) == 0)
420 {
421 done=MagickTrue;
422 break;
423 }
424 }
425 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
426 {
427 c=GetUTFCode(pattern);
428 pattern+=GetUTFOctets(pattern);
429 pattern+=GetUTFOctets(pattern);
430 if (GetUTFCode(pattern) == ']')
431 {
432 done=MagickTrue;
433 break;
434 }
435 if (GetUTFCode(pattern) == '\\')
436 {
437 pattern+=GetUTFOctets(pattern);
438 if (GetUTFCode(pattern) == 0)
439 {
440 done=MagickTrue;
441 break;
442 }
443 }
444 if ((GetUTFCode(expression) < c) ||
445 (GetUTFCode(expression) > GetUTFCode(pattern)))
446 {
447 pattern+=GetUTFOctets(pattern);
448 continue;
449 }
450 }
451 else
452 if (GetUTFCode(pattern) != GetUTFCode(expression))
453 {
454 pattern+=GetUTFOctets(pattern);
455 continue;
456 }
457 pattern+=GetUTFOctets(pattern);
458 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
459 {
460 if ((GetUTFCode(pattern) == '\\') &&
461 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
462 pattern+=GetUTFOctets(pattern);
463 pattern+=GetUTFOctets(pattern);
464 }
465 if (GetUTFCode(pattern) != 0)
466 {
467 pattern+=GetUTFOctets(pattern);
468 expression+=GetUTFOctets(expression);
469 }
470 break;
471 }
472 break;
473 }
474 case '?':
475 {
476 pattern+=GetUTFOctets(pattern);
477 expression+=GetUTFOctets(expression);
478 break;
479 }
480 case '{':
481 {
482 pattern+=GetUTFOctets(pattern);
483 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
484 {
485 p=expression;
486 match=MagickTrue;
487 while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
488 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
489 (match != MagickFalse))
490 {
491 if (GetUTFCode(pattern) == '\\')
492 pattern+=GetUTFOctets(pattern);
493 match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
494 MagickFalse;
495 p+=GetUTFOctets(p);
496 pattern+=GetUTFOctets(pattern);
497 }
498 if (GetUTFCode(pattern) == 0)
499 {
500 match=MagickFalse;
501 done=MagickTrue;
502 break;
503 }
504 else
505 if (match != MagickFalse)
506 {
507 expression=p;
508 while ((GetUTFCode(pattern) != '}') &&
509 (GetUTFCode(pattern) != 0))
510 {
511 pattern+=GetUTFOctets(pattern);
512 if (GetUTFCode(pattern) == '\\')
513 {
514 pattern+=GetUTFOctets(pattern);
515 if (GetUTFCode(pattern) == '}')
516 pattern+=GetUTFOctets(pattern);
517 }
518 }
519 }
520 else
521 {
522 while ((GetUTFCode(pattern) != '}') &&
523 (GetUTFCode(pattern) != ',') &&
524 (GetUTFCode(pattern) != 0))
525 {
526 pattern+=GetUTFOctets(pattern);
527 if (GetUTFCode(pattern) == '\\')
528 {
529 pattern+=GetUTFOctets(pattern);
530 if ((GetUTFCode(pattern) == '}') ||
531 (GetUTFCode(pattern) == ','))
532 pattern+=GetUTFOctets(pattern);
533 }
534 }
535 }
536 if (GetUTFCode(pattern) != 0)
537 pattern+=GetUTFOctets(pattern);
538 }
539 break;
540 }
541 case '\\':
542 {
543 pattern+=GetUTFOctets(pattern);
544 if (GetUTFCode(pattern) == 0)
545 break;
546 }
547 default:
548 {
549 if (case_insensitive != MagickFalse)
550 {
551 if (tolower((int) GetUTFCode(expression)) !=
552 tolower((int) GetUTFCode(pattern)))
553 {
554 done=MagickTrue;
555 break;
556 }
557 }
558 else
559 if (GetUTFCode(expression) != GetUTFCode(pattern))
560 {
561 done=MagickTrue;
562 break;
563 }
564 expression+=GetUTFOctets(expression);
565 pattern+=GetUTFOctets(pattern);
566 }
567 }
568 }
569 while (GetUTFCode(pattern) == '*')
570 pattern+=GetUTFOctets(pattern);
571 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
572 MagickTrue : MagickFalse;
573 return(match);
574 }
575
576 /*
577 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
578 % %
579 % %
580 % %
581 + I s G l o b %
582 % %
583 % %
584 % %
585 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
586 %
587 % IsGlob() returns MagickTrue if the path specification contains a globbing
588 % pattern.
589 %
590 % The format of the IsGlob method is:
591 %
592 % MagickBooleanType IsGlob(const char *geometry)
593 %
594 % A description of each parameter follows:
595 %
596 % o path: the path.
597 %
598 */
IsGlob(const char * path)599 MagickPrivate MagickBooleanType IsGlob(const char *path)
600 {
601 MagickBooleanType
602 status = MagickFalse;
603
604 register const char
605 *p;
606
607 if (IsPathAccessible(path) != MagickFalse)
608 return(MagickFalse);
609 for (p=path; *p != '\0'; p++)
610 {
611 switch (*p)
612 {
613 case '*':
614 case '?':
615 case '{':
616 case '}':
617 case '[':
618 case ']':
619 {
620 status=MagickTrue;
621 break;
622 }
623 default:
624 break;
625 }
626 }
627 return(status);
628 }
629
630 /*
631 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
632 % %
633 % %
634 % %
635 % T o k e n i z e r %
636 % %
637 % %
638 % %
639 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
640 %
641 % Tokenizer() is a generalized, finite state token parser. It extracts tokens
642 % one at a time from a string of characters. The characters used for white
643 % space, for break characters, and for quotes can be specified. Also,
644 % characters in the string can be preceded by a specifiable escape character
645 % which removes any special meaning the character may have.
646 %
647 % Here is some terminology:
648 %
649 % o token: A single unit of information in the form of a group of
650 % characters.
651 %
652 % o white space: Apace that gets ignored (except within quotes or when
653 % escaped), like blanks and tabs. in addition, white space terminates a
654 % non-quoted token.
655 %
656 % o break set: One or more characters that separates non-quoted tokens.
657 % Commas are a common break character. The usage of break characters to
658 % signal the end of a token is the same as that of white space, except
659 % multiple break characters with nothing or only white space between
660 % generate a null token for each two break characters together.
661 %
662 % For example, if blank is set to be the white space and comma is set to
663 % be the break character, the line
664 %
665 % A, B, C , , DEF
666 %
667 % ... consists of 5 tokens:
668 %
669 % 1) "A"
670 % 2) "B"
671 % 3) "C"
672 % 4) "" (the null string)
673 % 5) "DEF"
674 %
675 % o Quote character: A character that, when surrounding a group of other
676 % characters, causes the group of characters to be treated as a single
677 % token, no matter how many white spaces or break characters exist in
678 % the group. Also, a token always terminates after the closing quote.
679 % For example, if ' is the quote character, blank is white space, and
680 % comma is the break character, the following string
681 %
682 % A, ' B, CD'EF GHI
683 %
684 % ... consists of 4 tokens:
685 %
686 % 1) "A"
687 % 2) " B, CD" (note the blanks & comma)
688 % 3) "EF"
689 % 4) "GHI"
690 %
691 % The quote characters themselves do not appear in the resultant
692 % tokens. The double quotes are delimiters i use here for
693 % documentation purposes only.
694 %
695 % o Escape character: A character which itself is ignored but which
696 % causes the next character to be used as is. ^ and \ are often used
697 % as escape characters. An escape in the last position of the string
698 % gets treated as a "normal" (i.e., non-quote, non-white, non-break,
699 % and non-escape) character. For example, assume white space, break
700 % character, and quote are the same as in the above examples, and
701 % further, assume that ^ is the escape character. Then, in the string
702 %
703 % ABC, ' DEF ^' GH' I ^ J K^ L ^
704 %
705 % ... there are 7 tokens:
706 %
707 % 1) "ABC"
708 % 2) " DEF ' GH"
709 % 3) "I"
710 % 4) " " (a lone blank)
711 % 5) "J"
712 % 6) "K L"
713 % 7) "^" (passed as is at end of line)
714 %
715 % The format of the Tokenizer method is:
716 %
717 % int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
718 % const size_t max_token_length,const char *line,const char *white,
719 % const char *break_set,const char *quote,const char escape,
720 % char *breaker,int *next,char *quoted)
721 %
722 % A description of each parameter follows:
723 %
724 % o flag: right now, only the low order 3 bits are used.
725 %
726 % 1 => convert non-quoted tokens to upper case
727 % 2 => convert non-quoted tokens to lower case
728 % 0 => do not convert non-quoted tokens
729 %
730 % o token: a character string containing the returned next token
731 %
732 % o max_token_length: the maximum size of "token". Characters beyond
733 % "max_token_length" are truncated.
734 %
735 % o string: the string to be parsed.
736 %
737 % o white: a string of the valid white spaces. example:
738 %
739 % char whitesp[]={" \t"};
740 %
741 % blank and tab will be valid white space.
742 %
743 % o break: a string of the valid break characters. example:
744 %
745 % char breakch[]={";,"};
746 %
747 % semicolon and comma will be valid break characters.
748 %
749 % o quote: a string of the valid quote characters. An example would be
750 %
751 % char whitesp[]={"'\"");
752 %
753 % (this causes single and double quotes to be valid) Note that a
754 % token starting with one of these characters needs the same quote
755 % character to terminate it.
756 %
757 % for example:
758 %
759 % "ABC '
760 %
761 % is unterminated, but
762 %
763 % "DEF" and 'GHI'
764 %
765 % are properly terminated. Note that different quote characters
766 % can appear on the same line; only for a given token do the quote
767 % characters have to be the same.
768 %
769 % o escape: the escape character (NOT a string ... only one
770 % allowed). Use zero if none is desired.
771 %
772 % o breaker: the break character used to terminate the current
773 % token. If the token was quoted, this will be the quote used. If
774 % the token is the last one on the line, this will be zero.
775 %
776 % o next: this variable points to the first character of the
777 % next token. it gets reset by "tokenizer" as it steps through the
778 % string. Set it to 0 upon initialization, and leave it alone
779 % after that. You can change it if you want to jump around in the
780 % string or re-parse from the beginning, but be careful.
781 %
782 % o quoted: set to True if the token was quoted and MagickFalse
783 % if not. You may need this information (for example: in C, a
784 % string with quotes around it is a character string, while one
785 % without is an identifier).
786 %
787 % o result: 0 if we haven't reached EOS (end of string), and 1
788 % if we have.
789 %
790 */
791
792 #define IN_WHITE 0
793 #define IN_TOKEN 1
794 #define IN_QUOTE 2
795 #define IN_OZONE 3
796
sindex(int c,const char * string)797 static ssize_t sindex(int c,const char *string)
798 {
799 register const char
800 *p;
801
802 for (p=string; *p != '\0'; p++)
803 if (c == (int) (*p))
804 return((ssize_t) (p-string));
805 return(-1);
806 }
807
StoreToken(TokenInfo * token_info,char * string,size_t max_token_length,int c)808 static void StoreToken(TokenInfo *token_info,char *string,
809 size_t max_token_length,int c)
810 {
811 register ssize_t
812 i;
813
814 if ((token_info->offset < 0) ||
815 ((size_t) token_info->offset >= (max_token_length-1)))
816 return;
817 i=token_info->offset++;
818 string[i]=(char) c;
819 if (token_info->state == IN_QUOTE)
820 return;
821 switch (token_info->flag & 0x03)
822 {
823 case 1:
824 {
825 string[i]=(char) toupper(c);
826 break;
827 }
828 case 2:
829 {
830 string[i]=(char) tolower(c);
831 break;
832 }
833 default:
834 break;
835 }
836 }
837
Tokenizer(TokenInfo * token_info,const unsigned flag,char * token,const size_t max_token_length,const char * line,const char * white,const char * break_set,const char * quote,const char escape,char * breaker,int * next,char * quoted)838 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
839 char *token,const size_t max_token_length,const char *line,const char *white,
840 const char *break_set,const char *quote,const char escape,char *breaker,
841 int *next,char *quoted)
842 {
843 int
844 c;
845
846 register ssize_t
847 i;
848
849 *breaker='\0';
850 *quoted='\0';
851 if (line[*next] == '\0')
852 return(1);
853 token_info->state=IN_WHITE;
854 token_info->quote=(char) MagickFalse;
855 token_info->flag=flag;
856 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
857 {
858 c=(int) line[*next];
859 i=sindex(c,break_set);
860 if (i >= 0)
861 {
862 switch (token_info->state)
863 {
864 case IN_WHITE:
865 case IN_TOKEN:
866 case IN_OZONE:
867 {
868 (*next)++;
869 *breaker=break_set[i];
870 token[token_info->offset]='\0';
871 return(0);
872 }
873 case IN_QUOTE:
874 {
875 StoreToken(token_info,token,max_token_length,c);
876 break;
877 }
878 }
879 continue;
880 }
881 i=sindex(c,quote);
882 if (i >= 0)
883 {
884 switch (token_info->state)
885 {
886 case IN_WHITE:
887 {
888 token_info->state=IN_QUOTE;
889 token_info->quote=quote[i];
890 *quoted=(char) MagickTrue;
891 break;
892 }
893 case IN_QUOTE:
894 {
895 if (quote[i] != token_info->quote)
896 StoreToken(token_info,token,max_token_length,c);
897 else
898 {
899 token_info->state=IN_OZONE;
900 token_info->quote='\0';
901 }
902 break;
903 }
904 case IN_TOKEN:
905 case IN_OZONE:
906 {
907 *breaker=(char) c;
908 token[token_info->offset]='\0';
909 return(0);
910 }
911 }
912 continue;
913 }
914 i=sindex(c,white);
915 if (i >= 0)
916 {
917 switch (token_info->state)
918 {
919 case IN_WHITE:
920 case IN_OZONE:
921 break;
922 case IN_TOKEN:
923 {
924 token_info->state=IN_OZONE;
925 break;
926 }
927 case IN_QUOTE:
928 {
929 StoreToken(token_info,token,max_token_length,c);
930 break;
931 }
932 }
933 continue;
934 }
935 if (c == (int) escape)
936 {
937 if (line[(*next)+1] == '\0')
938 {
939 *breaker='\0';
940 StoreToken(token_info,token,max_token_length,c);
941 (*next)++;
942 token[token_info->offset]='\0';
943 return(0);
944 }
945 switch (token_info->state)
946 {
947 case IN_WHITE:
948 {
949 (*next)--;
950 token_info->state=IN_TOKEN;
951 break;
952 }
953 case IN_TOKEN:
954 case IN_QUOTE:
955 {
956 (*next)++;
957 c=(int) line[*next];
958 StoreToken(token_info,token,max_token_length,c);
959 break;
960 }
961 case IN_OZONE:
962 {
963 token[token_info->offset]='\0';
964 return(0);
965 }
966 }
967 continue;
968 }
969 switch (token_info->state)
970 {
971 case IN_WHITE:
972 {
973 token_info->state=IN_TOKEN;
974 StoreToken(token_info,token,max_token_length,c);
975 break;
976 }
977 case IN_TOKEN:
978 case IN_QUOTE:
979 {
980 StoreToken(token_info,token,max_token_length,c);
981 break;
982 }
983 case IN_OZONE:
984 {
985 token[token_info->offset]='\0';
986 return(0);
987 }
988 }
989 }
990 token[token_info->offset]='\0';
991 return(0);
992 }
993