1 // ================================================================================================= 2 // ADOBE SYSTEMS INCORPORATED 3 // Copyright 2006 Adobe Systems Incorporated 4 // All Rights Reserved 5 // 6 // NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms 7 // of the Adobe license agreement accompanying it. 8 // ================================================================================================= 9 10 package com.adobe.xmp.impl; 11 12 13 import com.adobe.xmp.XMPConst; 14 15 16 /** 17 * Utility functions for the XMPToolkit implementation. 18 * 19 * @since 06.06.2006 20 */ 21 public class Utils implements XMPConst 22 { 23 /** segments of a UUID */ 24 public static final int UUID_SEGMENT_COUNT = 4; 25 /** length of a UUID */ 26 public static final int UUID_LENGTH = 32 + UUID_SEGMENT_COUNT; 27 /** table of XML name start chars (<= 0xFF) */ 28 private static boolean[] xmlNameStartChars; 29 /** table of XML name chars (<= 0xFF) */ 30 private static boolean[] xmlNameChars; 31 /** init char tables */ 32 static 33 { initCharTables()34 initCharTables(); 35 } 36 37 38 /** 39 * Private constructor 40 */ Utils()41 private Utils() 42 { 43 // EMPTY 44 } 45 46 47 /** 48 * Normalize an xml:lang value so that comparisons are effectively case 49 * insensitive as required by RFC 3066 (which superceeds RFC 1766). The 50 * normalization rules: 51 * <ul> 52 * <li> The primary subtag is lower case, the suggested practice of ISO 639. 53 * <li> All 2 letter secondary subtags are upper case, the suggested 54 * practice of ISO 3166. 55 * <li> All other subtags are lower case. 56 * </ul> 57 * 58 * @param value 59 * raw value 60 * @return Returns the normalized value. 61 */ normalizeLangValue(String value)62 public static String normalizeLangValue(String value) 63 { 64 // don't normalize x-default 65 if (XMPConst.X_DEFAULT.equals(value)) 66 { 67 return value; 68 } 69 70 int subTag = 1; 71 StringBuffer buffer = new StringBuffer(); 72 73 for (int i = 0; i < value.length(); i++) 74 { 75 switch (value.charAt(i)) 76 { 77 case '-': 78 case '_': 79 // move to next subtag and convert underscore to hyphen 80 buffer.append('-'); 81 subTag++; 82 break; 83 case ' ': 84 // remove spaces 85 break; 86 default: 87 // convert second subtag to uppercase, all other to lowercase 88 if (subTag != 2) 89 { 90 buffer.append(Character.toLowerCase(value.charAt(i))); 91 } 92 else 93 { 94 buffer.append(Character.toUpperCase(value.charAt(i))); 95 } 96 } 97 98 } 99 return buffer.toString(); 100 } 101 102 103 /** 104 * Split the name and value parts for field and qualifier selectors: 105 * <ul> 106 * <li>[qualName="value"] - An element in an array of structs, chosen by a 107 * field value. 108 * <li>[?qualName="value"] - An element in an array, chosen by a qualifier 109 * value. 110 * </ul> 111 * The value portion is a string quoted by ''' or '"'. The value may contain 112 * any character including a doubled quoting character. The value may be 113 * empty. <em>Note:</em> It is assumed that the expression is formal 114 * correct 115 * 116 * @param selector 117 * the selector 118 * @return Returns an array where the first entry contains the name and the 119 * second the value. 120 */ splitNameAndValue(String selector)121 static String[] splitNameAndValue(String selector) 122 { 123 // get the name 124 int eq = selector.indexOf('='); 125 int pos = 1; 126 if (selector.charAt(pos) == '?') 127 { 128 pos++; 129 } 130 String name = selector.substring(pos, eq); 131 132 // get the value 133 pos = eq + 1; 134 char quote = selector.charAt(pos); 135 pos++; 136 int end = selector.length() - 2; // quote and ] 137 StringBuffer value = new StringBuffer(end - eq); 138 while (pos < end) 139 { 140 value.append(selector.charAt(pos)); 141 pos++; 142 if (selector.charAt(pos) == quote) 143 { 144 // skip one quote in value 145 pos++; 146 } 147 } 148 return new String[] { name, value.toString() }; 149 } 150 151 152 /** 153 * 154 * @param schema 155 * a schema namespace 156 * @param prop 157 * an XMP Property 158 * @return Returns true if the property is defined as "Internal 159 * Property", see XMP Specification. 160 */ isInternalProperty(String schema, String prop)161 static boolean isInternalProperty(String schema, String prop) 162 { 163 boolean isInternal = false; 164 165 if (NS_DC.equals(schema)) 166 { 167 if ("dc:format".equals(prop) || "dc:language".equals(prop)) 168 { 169 isInternal = true; 170 } 171 } 172 else if (NS_XMP.equals(schema)) 173 { 174 if ("xmp:BaseURL".equals(prop) || "xmp:CreatorTool".equals(prop) 175 || "xmp:Format".equals(prop) || "xmp:Locale".equals(prop) 176 || "xmp:MetadataDate".equals(prop) || "xmp:ModifyDate".equals(prop)) 177 { 178 isInternal = true; 179 } 180 } 181 else if (NS_PDF.equals(schema)) 182 { 183 if ("pdf:BaseURL".equals(prop) || "pdf:Creator".equals(prop) 184 || "pdf:ModDate".equals(prop) || "pdf:PDFVersion".equals(prop) 185 || "pdf:Producer".equals(prop)) 186 { 187 isInternal = true; 188 } 189 } 190 else if (NS_TIFF.equals(schema)) 191 { 192 isInternal = true; 193 if ("tiff:ImageDescription".equals(prop) || "tiff:Artist".equals(prop) 194 || "tiff:Copyright".equals(prop)) 195 { 196 isInternal = false; 197 } 198 } 199 else if (NS_EXIF.equals(schema)) 200 { 201 isInternal = true; 202 if ("exif:UserComment".equals(prop)) 203 { 204 isInternal = false; 205 } 206 } 207 else if (NS_EXIF_AUX.equals(schema)) 208 { 209 isInternal = true; 210 } 211 else if (NS_PHOTOSHOP.equals(schema)) 212 { 213 if ("photoshop:ICCProfile".equals(prop)) 214 { 215 isInternal = true; 216 } 217 } 218 else if (NS_CAMERARAW.equals(schema)) 219 { 220 if ("crs:Version".equals(prop) || "crs:RawFileName".equals(prop) 221 || "crs:ToneCurveName".equals(prop)) 222 { 223 isInternal = true; 224 } 225 } 226 else if (NS_ADOBESTOCKPHOTO.equals(schema)) 227 { 228 isInternal = true; 229 } 230 else if (NS_XMP_MM.equals(schema)) 231 { 232 isInternal = true; 233 } 234 else if (TYPE_TEXT.equals(schema)) 235 { 236 isInternal = true; 237 } 238 else if (TYPE_PAGEDFILE.equals(schema)) 239 { 240 isInternal = true; 241 } 242 else if (TYPE_GRAPHICS.equals(schema)) 243 { 244 isInternal = true; 245 } 246 else if (TYPE_IMAGE.equals(schema)) 247 { 248 isInternal = true; 249 } 250 else if (TYPE_FONT.equals(schema)) 251 { 252 isInternal = true; 253 } 254 255 return isInternal; 256 } 257 258 259 /** 260 * Check some requirements for an UUID: 261 * <ul> 262 * <li>Length of the UUID is 32</li> 263 * <li>The Delimiter count is 4 and all the 4 delimiter are on their right 264 * position (8,13,18,23)</li> 265 * </ul> 266 * 267 * 268 * @param uuid uuid to test 269 * @return true - this is a well formed UUID, false - UUID has not the expected format 270 */ 271 checkUUIDFormat(String uuid)272 static boolean checkUUIDFormat(String uuid) 273 { 274 boolean result = true; 275 int delimCnt = 0; 276 int delimPos = 0; 277 278 if (uuid == null) 279 { 280 return false; 281 } 282 283 for (delimPos = 0; delimPos < uuid.length(); delimPos++) 284 { 285 if (uuid.charAt(delimPos) == '-') 286 { 287 delimCnt++; 288 result = result && 289 (delimPos == 8 || delimPos == 13 || delimPos == 18 || delimPos == 23); 290 } 291 } 292 293 return result && UUID_SEGMENT_COUNT == delimCnt && UUID_LENGTH == delimPos; 294 } 295 296 297 /** 298 * Simple check for valid XMLNames. Within ASCII range<br> 299 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br> 300 * are accepted, above all characters (which is not entirely 301 * correct according to the XML Spec. 302 * 303 * @param name an XML Name 304 * @return Return <code>true</code> if the name is correct. 305 */ isXMLName(String name)306 public static boolean isXMLName(String name) 307 { 308 if (name.length() > 0 && !isNameStartChar(name.charAt(0))) 309 { 310 return false; 311 } 312 for (int i = 1; i < name.length(); i++) 313 { 314 if (!isNameChar(name.charAt(i))) 315 { 316 return false; 317 } 318 } 319 return true; 320 } 321 322 323 /** 324 * Checks if the value is a legal "unqualified" XML name, as 325 * defined in the XML Namespaces proposed recommendation. 326 * These are XML names, except that they must not contain a colon. 327 * @param name the value to check 328 * @return Returns true if the name is a valid "unqualified" XML name. 329 */ isXMLNameNS(String name)330 public static boolean isXMLNameNS(String name) 331 { 332 if (name.length() > 0 && (!isNameStartChar(name.charAt(0)) || name.charAt(0) == ':')) 333 { 334 return false; 335 } 336 for (int i = 1; i < name.length(); i++) 337 { 338 if (!isNameChar(name.charAt(i)) || name.charAt(i) == ':') 339 { 340 return false; 341 } 342 } 343 return true; 344 } 345 346 347 /** 348 * @param c a char 349 * @return Returns true if the char is an ASCII control char. 350 */ isControlChar(char c)351 static boolean isControlChar(char c) 352 { 353 return (c <= 0x1F || c == 0x7F) && 354 c != 0x09 && c != 0x0A && c != 0x0D; 355 } 356 357 358 /** 359 * Serializes the node value in XML encoding. Its used for tag bodies and 360 * attributes.<br> 361 * <em>Note:</em> The attribute is always limited by quotes, 362 * thats why <code>&apos;</code> is never serialized.<br> 363 * <em>Note:</em> Control chars are written unescaped, but if the user uses others than tab, LF 364 * and CR the resulting XML will become invalid. 365 * @param value a string 366 * @param forAttribute flag if string is attribute value (need to additional escape quotes) 367 * @param escapeWhitespaces Decides if LF, CR and TAB are escaped. 368 * @return Returns the value ready for XML output. 369 */ escapeXML(String value, boolean forAttribute, boolean escapeWhitespaces)370 public static String escapeXML(String value, boolean forAttribute, boolean escapeWhitespaces) 371 { 372 // quick check if character are contained that need special treatment 373 boolean needsEscaping = false; 374 for (int i = 0; i < value.length (); i++) 375 { 376 char c = value.charAt (i); 377 if ( 378 c == '<' || c == '>' || c == '&' || // XML chars 379 (escapeWhitespaces && (c == '\t' || c == '\n' || c == '\r')) || 380 (forAttribute && c == '"')) 381 { 382 needsEscaping = true; 383 break; 384 } 385 } 386 387 if (!needsEscaping) 388 { 389 // fast path 390 return value; 391 } 392 else 393 { 394 // slow path with escaping 395 StringBuffer buffer = new StringBuffer(value.length() * 4 / 3); 396 for (int i = 0; i < value.length (); i++) 397 { 398 char c = value.charAt (i); 399 if (!(escapeWhitespaces && (c == '\t' || c == '\n' || c == '\r'))) 400 { 401 switch (c) 402 { 403 // we do what "Canonical XML" expects 404 // AUDIT: ' not serialized as only outer qoutes are used 405 case '<': buffer.append("<"); continue; 406 case '>': buffer.append(">"); continue; 407 case '&': buffer.append("&"); continue; 408 case '"': buffer.append(forAttribute ? """ : "\""); continue; 409 default: buffer.append(c); continue; 410 } 411 } 412 else 413 { 414 // write control chars escaped, 415 // if there are others than tab, LF and CR the xml will become invalid. 416 buffer.append("&#x"); 417 buffer.append(Integer.toHexString(c).toUpperCase()); 418 buffer.append(';'); 419 } 420 } 421 return buffer.toString(); 422 } 423 } 424 425 426 /** 427 * Replaces the ASCII control chars with a space. 428 * 429 * @param value 430 * a node value 431 * @return Returns the cleaned up value 432 */ removeControlChars(String value)433 static String removeControlChars(String value) 434 { 435 StringBuffer buffer = new StringBuffer(value); 436 for (int i = 0; i < buffer.length(); i++) 437 { 438 if (isControlChar(buffer.charAt(i))) 439 { 440 buffer.setCharAt(i, ' '); 441 } 442 } 443 return buffer.toString(); 444 } 445 446 447 /** 448 * Simple check if a character is a valid XML start name char. 449 * Within ASCII range<br> 450 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br> 451 * are accepted, above all characters (which is not entirely 452 * correct according to the XML Spec) 453 * 454 * @param ch a character 455 * @return Returns true if the character is a valid first char of an XML name. 456 */ isNameStartChar(char ch)457 private static boolean isNameStartChar(char ch) 458 { 459 return ch > 0xFF || xmlNameStartChars[ch]; 460 } 461 462 463 /** 464 * Simple check if a character is a valid XML name char 465 * (every char except the first one). 466 * Within ASCII range<br> 467 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br> 468 * are accepted, above all characters (which is not entirely 469 * correct according to the XML Spec) 470 * 471 * @param ch a character 472 * @return Returns true if the character is a valid char of an XML name. 473 */ isNameChar(char ch)474 private static boolean isNameChar(char ch) 475 { 476 return ch > 0xFF || xmlNameChars[ch]; 477 } 478 479 480 /** 481 * Initializes the char tables for later use. 482 */ initCharTables()483 private static void initCharTables() 484 { 485 xmlNameChars = new boolean[0x0100]; 486 xmlNameStartChars = new boolean[0x0100]; 487 488 for (char ch = 0; ch < xmlNameChars.length; ch++) 489 { 490 xmlNameStartChars[ch] = 491 ('a' <= ch && ch <= 'z') || 492 ('A' <= ch && ch <= 'Z') || 493 ch == ':' || 494 ch == '_' || 495 (0xC0 <= ch && ch <= 0xD6) || 496 (0xD8 <= ch && ch <= 0xF6); 497 498 xmlNameChars[ch] = 499 ('a' <= ch && ch <= 'z') || 500 ('A' <= ch && ch <= 'Z') || 501 ('0' <= ch && ch <= '9') || 502 ch == ':' || 503 ch == '_' || 504 ch == '-' || 505 ch == '.' || 506 ch == 0xB7 || 507 (0xC0 <= ch && ch <= 0xD6) || 508 (0xD8 <= ch && ch <= 0xF6); 509 } 510 } 511 }