1 // Copyright (c) 2011, Mike Samuel 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions 6 // are met: 7 // 8 // Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // Redistributions in binary form must reproduce the above copyright 11 // notice, this list of conditions and the following disclaimer in the 12 // documentation and/or other materials provided with the distribution. 13 // Neither the name of the OWASP nor the names of its contributors may 14 // be used to endorse or promote products derived from this software 15 // without specific prior written permission. 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 29 package org.owasp.html; 30 31 final class CssGrammar { 32 errorRecoveryUntilSemiOrCloseBracket( CssTokens.TokenIterator it)33 private static void errorRecoveryUntilSemiOrCloseBracket( 34 CssTokens.TokenIterator it) { 35 int bracketDepth = 0; 36 for (; it.hasNext(); it.advance()) { 37 switch (it.type()) { 38 case SEMICOLON: 39 it.advance(); 40 return; 41 case LEFT_CURLY: 42 case LEFT_PAREN: 43 case LEFT_SQUARE: 44 ++bracketDepth; 45 break; 46 case RIGHT_CURLY: 47 case RIGHT_PAREN: 48 case RIGHT_SQUARE: 49 --bracketDepth; 50 if (bracketDepth <= 0) { 51 if (bracketDepth != 0) { it.advance(); } 52 return; 53 } 54 break; 55 default: 56 break; 57 } 58 } 59 } 60 parsePropertyGroup(String css, PropertyHandler handler)61 static void parsePropertyGroup(String css, PropertyHandler handler) { 62 // Split tokens by semicolons/curly-braces, then by first colon, 63 // dropping spaces and comments to identify property names and token runs 64 // that form the value. 65 66 CssTokens tokens = CssTokens.lex(css); 67 CssTokens.TokenIterator it = tokens.iterator(); 68 propertyNameLoop: 69 while (it.hasTokenAfterSpace()) { 70 // Check that we have an identifier that might be a property name. 71 if (it.type() != CssTokens.TokenType.IDENT) { 72 errorRecoveryUntilSemiOrCloseBracket(it); 73 continue; 74 } 75 76 String name = it.next(); 77 78 // Look for a colon. 79 if (!(it.hasTokenAfterSpace() && ":".equals(it.token()))) { 80 errorRecoveryUntilSemiOrCloseBracket(it); 81 continue propertyNameLoop; 82 } 83 it.advance(); 84 85 handler.startProperty(Strings.toLowerCase(name)); 86 parsePropertyValue(it, handler); 87 handler.endProperty(); 88 } 89 } 90 parsePropertyValue( CssTokens.TokenIterator it, PropertyHandler handler)91 private static void parsePropertyValue( 92 CssTokens.TokenIterator it, PropertyHandler handler) { 93 propertyValueLoop: 94 while (it.hasNext()) { 95 CssTokens.TokenType type = it.type(); 96 String token = it.token(); 97 switch (type) { 98 case SEMICOLON: 99 it.advance(); 100 break propertyValueLoop; 101 case FUNCTION: 102 CssTokens.TokenIterator actuals = it.spliceToEnd(); 103 handler.startFunction(token); 104 parsePropertyValue(actuals, handler); 105 handler.endFunction(token); 106 continue; // Skip the advance over token. 107 case IDENT: 108 handler.identifier(token); 109 break; 110 case HASH_UNRESTRICTED: 111 if (token.length() == 4 || token.length() == 7) { 112 handler.hash(token); 113 } 114 break; 115 case STRING: 116 handler.quotedString(token); 117 break; 118 case URL: 119 handler.url(token); 120 break; 121 case DIMENSION: 122 case NUMBER: 123 case PERCENTAGE: 124 handler.quantity(token); 125 break; 126 case AT: 127 case BAD_DIMENSION: 128 case COLUMN: 129 case DOT_IDENT: 130 case HASH_ID: 131 case MATCH: 132 case UNICODE_RANGE: 133 case WHITESPACE: 134 break; 135 case LEFT_CURLY: 136 case LEFT_PAREN: 137 case LEFT_SQUARE: 138 case RIGHT_CURLY: 139 case RIGHT_PAREN: 140 case RIGHT_SQUARE: 141 case COMMA: 142 case COLON: 143 case DELIM: 144 handler.punctuation(token); 145 break; 146 } 147 it.advance(); 148 } 149 } 150 151 /** 152 * Decodes any escape sequences and strips any quotes from the input. 153 */ cssContent(String token)154 static String cssContent(String token) { 155 int n = token.length(); 156 int pos = 0; 157 StringBuilder sb = null; 158 if (n >= 2) { 159 char ch0 = token.charAt(0); 160 if (ch0 == '"' || ch0 == '\'') { 161 if (ch0 == token.charAt(n - 1)) { 162 pos = 1; 163 --n; 164 sb = new StringBuilder(n); 165 } 166 } 167 } 168 for (int esc; (esc = token.indexOf('\\', pos)) >= 0;) { 169 int end = esc + 2; 170 if (esc > n) { break; } 171 if (sb == null) { sb = new StringBuilder(n); } 172 sb.append(token, pos, esc); 173 int codepoint = token.charAt(end - 1); 174 if (isHex(codepoint)) { 175 // Parse \hhhhh<opt-break> where hhhhh is one or more hex digits 176 // and <opt-break> is an optional space or tab character that can be 177 // used to separate an escape sequence from a following literal hex 178 // digit. 179 while (end < n && isHex(token.charAt(end))) { ++end; } 180 try { 181 codepoint = Integer.parseInt(token.substring(esc + 1, end), 16); 182 } catch (RuntimeException ex) { 183 codepoint = 0xfffd; // Unknown codepoint. 184 } 185 if (end < n) { 186 char ch = token.charAt(end); 187 if (ch == ' ' || ch == '\t') { // Ignorable hex follower. 188 ++end; 189 } 190 } 191 } 192 sb.appendCodePoint(codepoint); 193 pos = end; 194 } 195 if (sb == null) { return token; } 196 return sb.append(token, pos, n).toString(); 197 } 198 isHex(int codepoint)199 private static boolean isHex(int codepoint) { 200 return ('0' <= codepoint && codepoint <= '9') 201 || ('A' <= codepoint && codepoint <= 'F') 202 || ('a' <= codepoint && codepoint <= 'f'); 203 } 204 205 interface PropertyHandler { startProperty(String propertyName)206 void startProperty(String propertyName); quantity(String token)207 void quantity(String token); identifier(String token)208 void identifier(String token); hash(String token)209 void hash(String token); quotedString(String token)210 void quotedString(String token); url(String token)211 void url(String token); punctuation(String token)212 void punctuation(String token); startFunction(String token)213 void startFunction(String token); endFunction(String token)214 void endFunction(String token); endProperty()215 void endProperty(); 216 } 217 218 } 219