1 /*
2 * Copyright 2011 - 2014
3 * Andr\xe9 Malo or his licensors, as applicable
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 #include "cext.h"
19 EXT_INIT_FUNC;
20
21 #ifdef EXT3
22 typedef Py_UNICODE rchar;
23 #else
24 typedef unsigned char rchar;
25 #endif
26 #define U(c) ((rchar)(c))
27
28 typedef struct {
29 const rchar *start;
30 const rchar *sentinel;
31 const rchar *tsentinel;
32 Py_ssize_t at_group;
33 int in_macie5;
34 int in_rule;
35 int keep_bang_comments;
36 } rcssmin_ctx_t;
37
38 typedef enum {
39 NEED_SPACE_MAYBE = 0,
40 NEED_SPACE_NEVER
41 } need_space_flag;
42
43
44 #define RCSSMIN_DULL_BIT (1 << 0)
45 #define RCSSMIN_HEX_BIT (1 << 1)
46 #define RCSSMIN_ESC_BIT (1 << 2)
47 #define RCSSMIN_SPACE_BIT (1 << 3)
48 #define RCSSMIN_STRING_DULL_BIT (1 << 4)
49 #define RCSSMIN_NMCHAR_BIT (1 << 5)
50 #define RCSSMIN_URI_DULL_BIT (1 << 6)
51 #define RCSSMIN_PRE_CHAR_BIT (1 << 7)
52 #define RCSSMIN_POST_CHAR_BIT (1 << 8)
53
54 static const unsigned short rcssmin_charmask[128] = {
55 21, 21, 21, 21, 21, 21, 21, 21,
56 21, 28, 8, 21, 8, 8, 21, 21,
57 21, 21, 21, 21, 21, 21, 21, 21,
58 21, 21, 21, 21, 21, 21, 21, 21,
59 28, 469, 4, 85, 85, 85, 85, 4,
60 149, 277, 85, 469, 469, 117, 85, 84,
61 115, 115, 115, 115, 115, 115, 115, 115,
62 115, 115, 468, 340, 85, 469, 468, 85,
63 84, 115, 115, 115, 115, 115, 115, 117,
64 117, 117, 117, 117, 117, 117, 117, 117,
65 117, 117, 117, 117, 117, 117, 117, 117,
66 117, 117, 117, 213, 4, 341, 85, 117,
67 85, 115, 115, 115, 115, 115, 115, 117,
68 117, 117, 117, 117, 117, 117, 117, 117,
69 117, 117, 117, 117, 117, 116, 117, 117,
70 117, 117, 117, 468, 85, 468, 85, 21
71 };
72
73 #define RCSSMIN_IS_DULL(c) ((U(c) > 127) || \
74 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_DULL_BIT))
75
76 #define RCSSMIN_IS_HEX(c) ((U(c) <= 127) && \
77 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_HEX_BIT))
78
79 #define RCSSMIN_IS_ESC(c) ((U(c) > 127) || \
80 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_ESC_BIT))
81
82 #define RCSSMIN_IS_SPACE(c) ((U(c) <= 127) && \
83 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_SPACE_BIT))
84
85 #define RCSSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \
86 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_STRING_DULL_BIT))
87
88 #define RCSSMIN_IS_NMCHAR(c) ((U(c) > 127) || \
89 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_NMCHAR_BIT))
90
91 #define RCSSMIN_IS_URI_DULL(c) ((U(c) > 127) || \
92 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_URI_DULL_BIT))
93
94 #define RCSSMIN_IS_PRE_CHAR(c) ((U(c) <= 127) && \
95 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_PRE_CHAR_BIT))
96
97 #define RCSSMIN_IS_POST_CHAR(c) ((U(c) <= 127) && \
98 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_POST_CHAR_BIT))
99
100
101 static const rchar pattern_url[] = {
102 /*U('u'),*/ U('r'), U('l'), U('(')
103 };
104
105 static const rchar pattern_ie7[] = {
106 /*U('>'),*/ U('/'), U('*'), U('*'), U('/')
107 };
108
109 static const rchar pattern_media[] = {
110 U('m'), U('e'), U('d'), U('i'), U('a'),
111 U('M'), U('E'), U('D'), U('I'), U('A')
112 };
113
114 static const rchar pattern_document[] = {
115 U('d'), U('o'), U('c'), U('u'), U('m'), U('e'), U('n'), U('t'),
116 U('D'), U('O'), U('C'), U('U'), U('M'), U('E'), U('N'), U('T')
117 };
118
119 static const rchar pattern_supports[] = {
120 U('s'), U('u'), U('p'), U('p'), U('o'), U('r'), U('t'), U('s'),
121 U('S'), U('U'), U('P'), U('P'), U('O'), U('R'), U('T'), U('S')
122 };
123
124 static const rchar pattern_keyframes[] = {
125 U('k'), U('e'), U('y'), U('f'), U('r'), U('a'), U('m'), U('e'), U('s'),
126 U('K'), U('E'), U('Y'), U('F'), U('R'), U('A'), U('M'), U('E'), U('S')
127 };
128
129 static const rchar pattern_vendor_o[] = {
130 U('-'), U('o'), U('-'),
131 U('-'), U('O'), U('-')
132 };
133
134 static const rchar pattern_vendor_moz[] = {
135 U('-'), U('m'), U('o'), U('z'), U('-'),
136 U('-'), U('M'), U('O'), U('Z'), U('-')
137 };
138
139 static const rchar pattern_vendor_webkit[] = {
140 U('-'), U('w'), U('e'), U('b'), U('k'), U('i'), U('t'), U('-'),
141 U('-'), U('W'), U('E'), U('B'), U('K'), U('I'), U('T'), U('-')
142 };
143
144 static const rchar pattern_vendor_ms[] = {
145 U('-'), U('m'), U('s'), U('-'),
146 U('-'), U('M'), U('S'), U('-')
147 };
148
149 static const rchar pattern_first[] = {
150 U('f'), U('i'), U('r'), U('s'), U('t'), U('-'), U('l'),
151 U('F'), U('I'), U('R'), U('S'), U('T'), U('-'), U('L')
152 };
153
154 static const rchar pattern_line[] = {
155 U('i'), U('n'), U('e'),
156 U('I'), U('N'), U('E'),
157 };
158
159 static const rchar pattern_letter[] = {
160 U('e'), U('t'), U('t'), U('e'), U('r'),
161 U('E'), U('T'), U('T'), U('E'), U('R')
162 };
163
164 static const rchar pattern_macie5_init[] = {
165 U('/'), U('*'), U('\\'), U('*'), U('/')
166 };
167
168 static const rchar pattern_macie5_exit[] = {
169 U('/'), U('*'), U('*'), U('/')
170 };
171
172 /*
173 * Match a pattern (and copy immediately to target)
174 */
175 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
176 #pragma GCC diagnostic push
177 #pragma GCC diagnostic ignored "-Wstrict-overflow"
178 #endif
179 static int
copy_match(const rchar * pattern,const rchar * psentinel,const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)180 copy_match(const rchar *pattern, const rchar *psentinel,
181 const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
182 {
183 const rchar *source = *source_;
184 rchar *target = *target_;
185 rchar c;
186
187 while (pattern < psentinel
188 && source < ctx->sentinel && target < ctx->tsentinel
189 && ((c = *source++) == *pattern++))
190 *target++ = c;
191
192 *source_ = source;
193 *target_ = target;
194
195 return (pattern == psentinel);
196 }
197 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
198 #pragma GCC diagnostic pop
199 #endif
200
201 #define MATCH(PAT, source, target, ctx) ( \
202 copy_match(pattern_##PAT, \
203 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar), \
204 source, target, ctx) \
205 )
206
207
208 /*
209 * Match a pattern (and copy immediately to target) - CI version
210 */
211 static int
copy_imatch(const rchar * pattern,const rchar * psentinel,const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)212 copy_imatch(const rchar *pattern, const rchar *psentinel,
213 const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
214 {
215 const rchar *source = *source_, *pstart = pattern;
216 rchar *target = *target_;
217 rchar c;
218
219 while (pattern < psentinel
220 && source < ctx->sentinel && target < ctx->tsentinel
221 && ((c = *source++) == *pattern
222 || c == pstart[(pattern - pstart) + (psentinel - pstart)])) {
223 ++pattern;
224 *target++ = c;
225 }
226
227 *source_ = source;
228 *target_ = target;
229
230 return (pattern == psentinel);
231 }
232
233 #define IMATCH(PAT, source, target, ctx) ( \
234 copy_imatch(pattern_##PAT, \
235 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar) / 2, \
236 source, target, ctx) \
237 )
238
239
240 /*
241 * Copy characters
242 */
243 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
244 #pragma GCC diagnostic push
245 #pragma GCC diagnostic ignored "-Wstrict-overflow"
246 #endif
247 static int
copy(const rchar * source,const rchar * sentinel,rchar ** target_,rcssmin_ctx_t * ctx)248 copy(const rchar *source, const rchar *sentinel, rchar **target_,
249 rcssmin_ctx_t *ctx)
250 {
251 rchar *target = *target_;
252
253 while (source < sentinel && target < ctx->tsentinel)
254 *target++ = *source++;
255
256 *target_ = target;
257
258 return (source == sentinel);
259 }
260 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
261 #pragma GCC diagnostic pop
262 #endif
263
264 #define COPY_PAT(PAT, target, ctx) ( \
265 copy(pattern_##PAT, \
266 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar), \
267 target, ctx) \
268 )
269
270
271 /*
272 * The ABORT macros work with known local variables!
273 */
274 #define ABORT_(RET) do { \
275 if (source < ctx->sentinel && !(target < ctx->tsentinel)) { \
276 *source_ = source; \
277 *target_ = target; \
278 } \
279 return RET; \
280 } while(0)
281
282
283 #define CRAPPY_C90_COMPATIBLE_EMPTY
284 #define ABORT ABORT_(CRAPPY_C90_COMPATIBLE_EMPTY)
285 #define RABORT(RET) ABORT_((RET))
286
287
288 /*
289 * Copy escape
290 */
291 static void
copy_escape(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)292 copy_escape(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
293 {
294 const rchar *source = *source_, *hsentinel;
295 rchar *target = *target_;
296 rchar c;
297
298 *target++ = U('\\');
299 *target_ = target;
300
301 if (source < ctx->sentinel && target < ctx->tsentinel) {
302 c = *source++;
303 if (RCSSMIN_IS_ESC(c)) {
304 *target++ = c;
305 }
306 else if (RCSSMIN_IS_HEX(c)) {
307 *target++ = c;
308
309 /* 6 hex chars max, one we got already */
310 if (ctx->sentinel - source > 5)
311 hsentinel = source + 5;
312 else
313 hsentinel = ctx->sentinel;
314
315 while (source < hsentinel && target < ctx->tsentinel
316 && (c = *source, RCSSMIN_IS_HEX(c))) {
317 ++source;
318 *target++ = c;
319 }
320
321 /* One optional space after */
322 if (source < ctx->sentinel && target < ctx->tsentinel) {
323 if (source == hsentinel)
324 c = *source;
325 if (RCSSMIN_IS_SPACE(c)) {
326 ++source;
327 *target++ = U(' ');
328 if (c == U('\r') && source < ctx->sentinel
329 && *source == U('\n'))
330 ++source;
331 }
332 }
333 }
334 }
335
336 *target_ = target;
337 *source_ = source;
338 }
339
340
341 /*
342 * Copy string
343 */
344 static void
copy_string(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)345 copy_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
346 {
347 const rchar *source = *source_;
348 rchar *target = *target_;
349 rchar c, quote = source[-1];
350
351 *target++ = quote;
352 *target_ = target;
353
354 while (source < ctx->sentinel && target < ctx->tsentinel) {
355 c = *target++ = *source++;
356 if (RCSSMIN_IS_STRING_DULL(c))
357 continue;
358
359 switch (c) {
360 case U('\''): case U('"'):
361 if (c == quote) {
362 *target_ = target;
363 *source_ = source;
364 return;
365 }
366 continue;
367
368 case U('\\'):
369 if (source < ctx->sentinel && target < ctx->tsentinel) {
370 c = *source++;
371 switch (c) {
372 case U('\r'):
373 if (source < ctx->sentinel && *source == U('\n'))
374 ++source;
375 /* fall through */
376
377 case U('\n'): case U('\f'):
378 --target;
379 break;
380
381 default:
382 *target++ = c;
383 }
384 }
385 continue;
386 }
387 break; /* forbidden characters */
388 }
389
390 ABORT;
391 }
392
393
394 /*
395 * Copy URI string
396 */
397 static int
copy_uri_string(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)398 copy_uri_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
399 {
400 const rchar *source = *source_;
401 rchar *target = *target_;
402 rchar c, quote = source[-1];
403
404 *target++ = quote;
405 *target_ = target;
406
407 while (source < ctx->sentinel && target < ctx->tsentinel) {
408 c = *source++;
409 if (RCSSMIN_IS_SPACE(c))
410 continue;
411 *target++ = c;
412 if (RCSSMIN_IS_STRING_DULL(c))
413 continue;
414
415 switch (c) {
416 case U('\''): case U('"'):
417 if (c == quote) {
418 *target_ = target;
419 *source_ = source;
420 return 0;
421 }
422 continue;
423
424 case U('\\'):
425 if (source < ctx->sentinel && target < ctx->tsentinel) {
426 c = *source;
427 switch (c) {
428 case U('\r'):
429 if ((source + 1) < ctx->sentinel && source[1] == U('\n'))
430 ++source;
431 /* fall through */
432
433 case U('\n'): case U('\f'):
434 --target;
435 ++source;
436 break;
437
438 default:
439 --target;
440 copy_escape(&source, &target, ctx);
441 }
442 }
443 continue;
444 }
445
446 break; /* forbidden characters */
447 }
448
449 RABORT(-1);
450 }
451
452
453 /*
454 * Copy URI (unquoted)
455 */
456 static int
copy_uri_unquoted(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)457 copy_uri_unquoted(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
458 {
459 const rchar *source = *source_;
460 rchar *target = *target_;
461 rchar c;
462
463 *target++ = source[-1];
464 *target_ = target;
465
466 while (source < ctx->sentinel && target < ctx->tsentinel) {
467 c = *source++;
468 if (RCSSMIN_IS_SPACE(c))
469 continue;
470 *target++ = c;
471 if (RCSSMIN_IS_URI_DULL(c))
472 continue;
473
474 switch (c) {
475
476 case U(')'):
477 *target_ = target - 1;
478 *source_ = source - 1;
479 return 0;
480
481 case U('\\'):
482 if (source < ctx->sentinel && target < ctx->tsentinel) {
483 c = *source;
484 switch (c) {
485 case U('\r'):
486 if ((source + 1) < ctx->sentinel && source[1] == U('\n'))
487 ++source;
488 /* fall through */
489
490 case U('\n'): case U('\f'):
491 --target;
492 ++source;
493 break;
494
495 default:
496 --target;
497 copy_escape(&source, &target, ctx);
498 }
499 }
500 continue;
501 }
502
503 break; /* forbidden characters */
504 }
505
506 RABORT(-1);
507 }
508
509
510 /*
511 * Copy url
512 */
513 static void
copy_url(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)514 copy_url(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
515 {
516 const rchar *source = *source_;
517 rchar *target = *target_;
518 rchar c;
519
520 *target++ = U('u');
521 *target_ = target;
522
523 /* Must not be inside an identifier */
524 if ((source != ctx->start + 1) && RCSSMIN_IS_NMCHAR(source[-2]))
525 return;
526
527 if (!MATCH(url, &source, &target, ctx)
528 || !(source < ctx->sentinel && target < ctx->tsentinel))
529 ABORT;
530
531 while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source))
532 ++source;
533
534 if (!(source < ctx->sentinel))
535 ABORT;
536
537 c = *source++;
538 switch (c) {
539 case U('"'): case U('\''):
540 if (copy_uri_string(&source, &target, ctx) == -1)
541 ABORT;
542
543 while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source))
544 ++source;
545 break;
546
547 default:
548 if (copy_uri_unquoted(&source, &target, ctx) == -1)
549 ABORT;
550 }
551
552 if (!(source < ctx->sentinel && target < ctx->tsentinel))
553 ABORT;
554
555 if ((*target++ = *source++) != U(')'))
556 ABORT;
557
558 *target_ = target;
559 *source_ = source;
560 }
561
562
563 /*
564 * Copy @-group
565 */
566 static void
copy_at_group(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)567 copy_at_group(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
568 {
569 const rchar *source = *source_;
570 rchar *target = *target_;
571
572 *target++ = U('@');
573 *target_ = target;
574
575 #define REMATCH(what) ( \
576 source = *source_, \
577 target = *target_, \
578 IMATCH(what, &source, &target, ctx) \
579 )
580 #define CMATCH(what) IMATCH(what, &source, &target, ctx)
581
582 if (( !CMATCH(media)
583 && !REMATCH(supports)
584 && !REMATCH(document)
585 && !REMATCH(keyframes)
586 && !(REMATCH(vendor_webkit) && CMATCH(keyframes))
587 && !(REMATCH(vendor_moz) && CMATCH(keyframes))
588 && !(REMATCH(vendor_o) && CMATCH(keyframes))
589 && !(REMATCH(vendor_ms) && CMATCH(keyframes)))
590 || !(source < ctx->sentinel && target < ctx->tsentinel)
591 || RCSSMIN_IS_NMCHAR(*source))
592 ABORT;
593
594 #undef CMATCH
595 #undef REMATCH
596
597 ++ctx->at_group;
598
599 *target_ = target;
600 *source_ = source;
601 }
602
603
604 /*
605 * Skip space
606 */
607 static const rchar *
skip_space(const rchar * source,rcssmin_ctx_t * ctx)608 skip_space(const rchar *source, rcssmin_ctx_t *ctx)
609 {
610 const rchar *begin = source;
611 int res;
612 rchar c;
613
614 while (source < ctx->sentinel) {
615 c = *source;
616 if (RCSSMIN_IS_SPACE(c)) {
617 ++source;
618 continue;
619 }
620 else if (c == U('/')) {
621 ++source;
622 if (!(source < ctx->sentinel && *source == U('*'))) {
623 --source;
624 break;
625 }
626 ++source;
627 res = 0;
628 while (source < ctx->sentinel) {
629 c = *source++;
630 if (c != U('*'))
631 continue;
632 if (!(source < ctx->sentinel))
633 return begin;
634 if (*source != U('/'))
635 continue;
636
637 /* Comment complete */
638 ++source;
639 res = 1;
640 break;
641 }
642 if (!res)
643 return begin;
644
645 continue;
646 }
647
648 break;
649 }
650
651 return source;
652 }
653
654
655 /*
656 * Copy space
657 */
658 static void
copy_space(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx,need_space_flag need_space)659 copy_space(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx,
660 need_space_flag need_space)
661 {
662 const rchar *source = *source_, *end, *comment;
663 rchar *target = *target_;
664 int res;
665 rchar c;
666
667 --source;
668 if (need_space == NEED_SPACE_MAYBE
669 && source > ctx->start
670 && !RCSSMIN_IS_PRE_CHAR(source[-1])
671 && (end = skip_space(source, ctx)) < ctx->sentinel
672 && (!RCSSMIN_IS_POST_CHAR(*end)
673 || (*end == U(':') && !ctx->in_rule && !ctx->at_group))) {
674
675 if (!(target < ctx->tsentinel))
676 ABORT;
677 *target++ = U(' ');
678 }
679
680 while (source < ctx->sentinel) {
681 switch (c = *source) {
682
683 /* comment */
684 case U('/'):
685 comment = source++;
686 if (!((source < ctx->sentinel && *source == U('*')))) {
687 --source;
688 break;
689 }
690 ++source;
691 res = 0;
692 while (source < ctx->sentinel) {
693 c = *source++;
694 if (c != U('*'))
695 continue;
696 if (!(source < ctx->sentinel))
697 ABORT;
698 if (*source != U('/'))
699 continue;
700
701 /* Comment complete */
702 ++source;
703 res = 1;
704
705 if (ctx->keep_bang_comments && comment[2] == U('!')) {
706 ctx->in_macie5 = (source[-3] == U('\\'));
707 if (!copy(comment, source, &target, ctx))
708 ABORT;
709 }
710 else if (source[-3] == U('\\')) {
711 if (!ctx->in_macie5) {
712 if (!COPY_PAT(macie5_init, &target, ctx))
713 ABORT;
714 }
715 ctx->in_macie5 = 1;
716 }
717 else if (ctx->in_macie5) {
718 if (!COPY_PAT(macie5_exit, &target, ctx))
719 ABORT;
720 ctx->in_macie5 = 0;
721 }
722 /* else don't copy anything */
723 break;
724 }
725 if (!res)
726 ABORT;
727 continue;
728
729 /* space */
730 case U(' '): case U('\t'): case U('\r'): case U('\n'): case U('\f'):
731 ++source;
732 continue;
733 }
734
735 break;
736 }
737
738 *source_ = source;
739 *target_ = target;
740 }
741
742
743 /*
744 * Copy space if comment
745 */
746 static int
copy_space_comment(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx,need_space_flag need_space)747 copy_space_comment(const rchar **source_, rchar **target_,
748 rcssmin_ctx_t *ctx, need_space_flag need_space)
749 {
750 const rchar *source = *source_;
751 rchar *target = *target_;
752
753 if (source < ctx->sentinel && *source == U('*')) {
754 copy_space(source_, target_, ctx, need_space);
755 if (*source_ > source)
756 return 0;
757 }
758 if (!(target < ctx->tsentinel))
759 RABORT(-1);
760
761 *target++ = source[-1];
762
763 /* *source_ = source; <-- unchanged */
764 *target_ = target;
765
766 return -1;
767 }
768
769
770 /*
771 * Copy space if exists
772 */
773 static int
copy_space_optional(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)774 copy_space_optional(const rchar **source_, rchar **target_,
775 rcssmin_ctx_t *ctx)
776 {
777 const rchar *source = *source_;
778
779 if (!(source < ctx->sentinel))
780 return -1;
781
782 if (*source == U('/')) {
783 *source_ = source + 1;
784 return copy_space_comment(source_, target_, ctx, NEED_SPACE_NEVER);
785 }
786 else if (RCSSMIN_IS_SPACE(*source)) {
787 *source_ = source + 1;
788 copy_space(source_, target_, ctx, NEED_SPACE_NEVER);
789 return 0;
790 }
791
792 return -1;
793 }
794
795
796 /*
797 * Copy :first-line|letter
798 */
799 static void
copy_first(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)800 copy_first(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
801 {
802 const rchar *source = *source_, *next, *source_fork;
803 rchar *target = *target_, *target_fork;
804
805 *target++ = U(':');
806 *target_ = target;
807
808 if (!IMATCH(first, &source, &target, ctx)
809 || !(source < ctx->sentinel && target < ctx->tsentinel))
810 ABORT;
811
812 source_fork = source;
813 target_fork = target;
814
815 if (!IMATCH(line, &source, &target, ctx)) {
816 source = source_fork;
817 target = target_fork;
818
819 if (!IMATCH(letter, &source, &target, ctx)
820 || !(source < ctx->sentinel && target < ctx->tsentinel))
821 ABORT;
822 }
823
824 next = skip_space(source, ctx);
825 if (!(next < ctx->sentinel && target < ctx->tsentinel
826 && (*next == U('{') || *next == U(','))))
827 ABORT;
828
829 *target++ = U(' ');
830 *target_ = target;
831 *source_ = source;
832 (void)copy_space_optional(source_, target_, ctx);
833 }
834
835
836 /*
837 * Copy IE7 hack
838 */
839 static void
copy_ie7hack(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)840 copy_ie7hack(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
841 {
842 const rchar *source = *source_;
843 rchar *target = *target_;
844
845 *target++ = U('>');
846 *target_ = target;
847
848 if (ctx->in_rule || ctx->at_group)
849 return; /* abort */
850
851 if (!MATCH(ie7, &source, &target, ctx))
852 ABORT;
853
854 ctx->in_macie5 = 0;
855
856 *target_ = target;
857 *source_ = source;
858
859 (void)copy_space_optional(source_, target_, ctx);
860 }
861
862
863 /*
864 * Copy semicolon; miss out duplicates or even this one (before '}')
865 */
866 static void
copy_semicolon(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)867 copy_semicolon(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
868 {
869 const rchar *source = *source_, *begin, *end;
870 rchar *target = *target_;
871
872 begin = source;
873 while (source < ctx->sentinel) {
874 end = skip_space(source, ctx);
875 if (!(end < ctx->sentinel)) {
876 if (!(target < ctx->tsentinel))
877 ABORT;
878 *target++ = U(';');
879 break;
880 }
881 switch (*end) {
882 case U(';'):
883 source = end + 1;
884 continue;
885
886 case U('}'):
887 if (ctx->in_rule)
888 break;
889
890 /* fall through */
891 default:
892 if (!(target < ctx->tsentinel))
893 ABORT;
894 *target++ = U(';');
895 break;
896 }
897
898 break;
899 }
900
901 source = begin;
902 *target_ = target;
903 while (source < ctx->sentinel) {
904 if (*source == U(';')) {
905 ++source;
906 continue;
907 }
908
909 if (copy_space_optional(&source, target_, ctx) == 0)
910 continue;
911
912 break;
913 }
914
915 *source_ = source;
916 }
917
918
919 /*
920 * Main function
921 *
922 * The return value determines the result length (kept in the target buffer).
923 * However, if the target buffer is too small, the return value is greater
924 * than tlength. The difference to tlength is the number of unconsumed source
925 * characters at the time the buffer was full. In this case you should resize
926 * the target buffer to the return value and call rcssmin again. Repeat as
927 * often as needed.
928 */
929 static Py_ssize_t
rcssmin(const rchar * source,rchar * target,Py_ssize_t slength,Py_ssize_t tlength,int keep_bang_comments)930 rcssmin(const rchar *source, rchar *target, Py_ssize_t slength,
931 Py_ssize_t tlength, int keep_bang_comments)
932 {
933 rcssmin_ctx_t ctx_, *ctx = &ctx_;
934 const rchar *tstart = target;
935 rchar c;
936
937 ctx->start = source;
938 ctx->sentinel = source + slength;
939 ctx->tsentinel = target + tlength;
940 ctx->at_group = 0;
941 ctx->in_macie5 = 0;
942 ctx->in_rule = 0;
943 ctx->keep_bang_comments = keep_bang_comments;
944
945 while (source < ctx->sentinel && target < ctx->tsentinel) {
946 c = *source++;
947 if (RCSSMIN_IS_DULL(c)) {
948 *target++ = c;
949 continue;
950 }
951 else if (RCSSMIN_IS_SPACE(c)) {
952 copy_space(&source, &target, ctx, NEED_SPACE_MAYBE);
953 continue;
954 }
955
956 switch (c) {
957
958 /* Escape */
959 case U('\\'):
960 copy_escape(&source, &target, ctx);
961 continue;
962
963 /* String */
964 case U('"'): case U('\''):
965 copy_string(&source, &target, ctx);
966 continue;
967
968 /* URL */
969 case U('u'):
970 copy_url(&source, &target, ctx);
971 continue;
972
973 /* IE7hack */
974 case U('>'):
975 copy_ie7hack(&source, &target, ctx);
976 continue;
977
978 /* @-group */
979 case U('@'):
980 copy_at_group(&source, &target, ctx);
981 continue;
982
983 /* ; */
984 case U(';'):
985 copy_semicolon(&source, &target, ctx);
986 continue;
987
988 /* :first-line|letter followed by [{,] */
989 /* (apparently needed for IE6) */
990 case U(':'):
991 copy_first(&source, &target, ctx);
992 continue;
993
994 /* { */
995 case U('{'):
996 if (ctx->at_group)
997 --ctx->at_group;
998 else
999 ++ctx->in_rule;
1000 *target++ = c;
1001 continue;
1002
1003 /* } */
1004 case U('}'):
1005 if (ctx->in_rule)
1006 --ctx->in_rule;
1007 *target++ = c;
1008 continue;
1009
1010 /* space starting with comment */
1011 case U('/'):
1012 (void)copy_space_comment(&source, &target, ctx, NEED_SPACE_MAYBE);
1013 continue;
1014
1015 /* Fallback: copy character. Better safe than sorry. Should not be
1016 * reached, though */
1017 default:
1018 *target++ = c;
1019 continue;
1020 }
1021 }
1022
1023 return
1024 (Py_ssize_t)(target - tstart) + (Py_ssize_t)(ctx->sentinel - source);
1025 }
1026
1027
1028 PyDoc_STRVAR(rcssmin_cssmin__doc__,
1029 "cssmin(style, keep_bang_comments=False)\n\
1030 \n\
1031 Minify CSS.\n\
1032 \n\
1033 :Note: This is a hand crafted C implementation built on the regex\n\
1034 semantics.\n\
1035 \n\
1036 :Parameters:\n\
1037 `style` : ``str``\n\
1038 CSS to minify\n\
1039 \n\
1040 :Return: Minified style\n\
1041 :Rtype: ``str``");
1042
1043 static PyObject *
rcssmin_cssmin(PyObject * self,PyObject * args,PyObject * kwds)1044 rcssmin_cssmin(PyObject *self, PyObject *args, PyObject *kwds)
1045 {
1046 PyObject *style, *keep_bang_comments_ = NULL, *result;
1047 static char *kwlist[] = {"style", "keep_bang_comments", NULL};
1048 Py_ssize_t rlength, slength, length;
1049 int keep_bang_comments;
1050 #ifdef EXT2
1051 int uni;
1052 #define UOBJ "O"
1053 #endif
1054 #ifdef EXT3
1055 #define UOBJ "U"
1056 #endif
1057
1058 if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist,
1059 &style, &keep_bang_comments_))
1060 return NULL;
1061
1062 if (!keep_bang_comments_)
1063 keep_bang_comments = 0;
1064 else {
1065 keep_bang_comments = PyObject_IsTrue(keep_bang_comments_);
1066 if (keep_bang_comments == -1)
1067 return NULL;
1068 }
1069
1070 #ifdef EXT2
1071 if (PyUnicode_Check(style)) {
1072 if (!(style = PyUnicode_AsUTF8String(style)))
1073 return NULL;
1074 uni = 1;
1075 }
1076 else {
1077 if (!(style = PyObject_Str(style)))
1078 return NULL;
1079 uni = 0;
1080 }
1081 #endif
1082
1083 #ifdef EXT3
1084 Py_INCREF(style);
1085 #define PyString_GET_SIZE PyUnicode_GET_SIZE
1086 #define PyString_AS_STRING PyUnicode_AS_UNICODE
1087 #define _PyString_Resize PyUnicode_Resize
1088 #define PyString_FromStringAndSize PyUnicode_FromUnicode
1089 #endif
1090
1091 rlength = slength = PyString_GET_SIZE(style);
1092
1093 again:
1094 if (!(result = PyString_FromStringAndSize(NULL, rlength))) {
1095 Py_DECREF(style);
1096 return NULL;
1097 }
1098 Py_BEGIN_ALLOW_THREADS
1099 length = rcssmin((rchar *)PyString_AS_STRING(style),
1100 (rchar *)PyString_AS_STRING(result),
1101 slength, rlength, keep_bang_comments);
1102 Py_END_ALLOW_THREADS
1103
1104 if (length > rlength) {
1105 Py_DECREF(result);
1106 rlength = length;
1107 goto again;
1108 }
1109
1110 Py_DECREF(style);
1111 if (length < 0) {
1112 Py_DECREF(result);
1113 return NULL;
1114 }
1115 if (length != rlength && _PyString_Resize(&result, length) == -1)
1116 return NULL;
1117
1118 #ifdef EXT2
1119 if (uni) {
1120 style = PyUnicode_DecodeUTF8(PyString_AS_STRING(result),
1121 PyString_GET_SIZE(result), "strict");
1122 Py_DECREF(result);
1123 if (!style)
1124 return NULL;
1125 result = style;
1126 }
1127 #endif
1128 return result;
1129 }
1130
1131 /* ------------------------ BEGIN MODULE DEFINITION ------------------------ */
1132
1133 EXT_METHODS = {
1134 {"cssmin",
1135 (PyCFunction)rcssmin_cssmin, METH_VARARGS | METH_KEYWORDS,
1136 rcssmin_cssmin__doc__},
1137
1138 {NULL} /* Sentinel */
1139 };
1140
1141 PyDoc_STRVAR(EXT_DOCS_VAR,
1142 "C implementation of rcssmin\n\
1143 ===========================\n\
1144 \n\
1145 C implementation of rcssmin.");
1146
1147
1148 EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR);
1149
1150 EXT_INIT_FUNC {
1151 PyObject *m;
1152
1153 /* Create the module and populate stuff */
1154 if (!(m = EXT_CREATE(&EXT_DEFINE_VAR)))
1155 EXT_INIT_ERROR(NULL);
1156
1157 EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1");
1158 EXT_ADD_STRING(m, "__docformat__", "restructuredtext en");
1159
1160 EXT_INIT_RETURN(m);
1161 }
1162
1163 /* ------------------------- END MODULE DEFINITION ------------------------- */
1164