1 /* 2 * Copyright (C) 2013 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.doclava; 18 19 import java.io.*; 20 import java.text.BreakIterator; 21 import java.util.ArrayList; 22 import java.util.Collections; 23 import java.util.Comparator; 24 import java.util.List; 25 import java.util.regex.Pattern; 26 import java.util.regex.Matcher; 27 import java.io.File; 28 29 import com.google.clearsilver.jsilver.data.Data; 30 31 import org.ccil.cowan.tagsoup.*; 32 import org.xml.sax.XMLReader; 33 import org.xml.sax.InputSource; 34 import org.xml.sax.Attributes; 35 import org.xml.sax.helpers.DefaultHandler; 36 37 import org.w3c.dom.Node; 38 import org.w3c.dom.NodeList; 39 40 import javax.xml.transform.dom.DOMResult; 41 import javax.xml.transform.sax.SAXSource; 42 import javax.xml.transform.Transformer; 43 import javax.xml.transform.TransformerFactory; 44 import javax.xml.xpath.XPath; 45 import javax.xml.xpath.XPathConstants; 46 import javax.xml.xpath.XPathExpression; 47 import javax.xml.xpath.XPathFactory; 48 49 /** 50 * Metadata associated with a specific documentation page. Extracts 51 * metadata based on the page's declared hdf vars (meta.tags and others) 52 * as well as implicit data relating to the page, such as url, type, etc. 53 * Includes a Node class that represents the metadata and lets it attach 54 * to parent/child elements in the tree metadata nodes for all pages. 55 * Node also includes methods for rendering the node tree to a json file 56 * in docs output, which is then used by JavaScript to load metadata 57 * objects into html pages. 58 */ 59 60 public class PageMetadata { 61 File mSource; 62 String mDest; 63 String mTagList; 64 static boolean sLowercaseTags = true; 65 static boolean sLowercaseKeywords = true; 66 //static String linkPrefix = (Doclava.META_DBG) ? "/" : "http://developer.android.com/"; 67 /** 68 * regex pattern to match javadoc @link and similar tags. Extracts 69 * root symbol to $1. 70 */ 71 private static final Pattern JD_TAG_PATTERN = 72 Pattern.compile("\\{@.*?[\\s\\.\\#]([A-Za-z\\(\\)\\d_]+)(?=\u007D)\u007D"); 73 PageMetadata(File source, String dest, List<Node> taglist)74 public PageMetadata(File source, String dest, List<Node> taglist) { 75 mSource = source; 76 mDest = dest; 77 78 if (dest != null) { 79 int len = dest.length(); 80 if (len > 1 && dest.charAt(len - 1) != '/') { 81 mDest = dest + '/'; 82 } else { 83 mDest = dest; 84 } 85 } 86 } 87 88 /** 89 * Given a list of metadata nodes organized by type, sort the 90 * root nodes by type name and render the types and their child 91 * metadata nodes to a json file in the out dir. 92 * 93 * @param rootTypeNodesList A list of root metadata nodes, each 94 * representing a type and it's member child pages. 95 * @deprecated 96 */ WriteList(List<Node> rootTypeNodesList)97 public static void WriteList(List<Node> rootTypeNodesList) { 98 Collections.sort(rootTypeNodesList, BY_TYPE_NAME); 99 Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(rootTypeNodesList).build(); 100 101 StringBuilder buf = new StringBuilder(); 102 // write the taglist to string format 103 pageMeta.renderTypeResources(buf); 104 pageMeta.renderTypesByTag(buf); 105 // write the taglist to js file 106 Data data = Doclava.makeHDF(); 107 data.setValue("reference_tree", buf.toString()); 108 ClearPage.write(data, "jd_lists_unified.cs", "jd_lists_unified.js"); 109 } 110 111 /** 112 * Given a list of metadata nodes organized by lang, sort the 113 * root nodes by type name and render the types and their child 114 * metadata nodes to separate lang-specific json files in the out dir. 115 * 116 * @param rootNodesList A list of root metadata nodes, each 117 * representing a type and it's member child pages. 118 */ WriteListByLang(List<Node> rootNodesList)119 public static void WriteListByLang(List<Node> rootNodesList) { 120 Collections.sort(rootNodesList, BY_LANG_NAME); 121 for (Node n : rootNodesList) { 122 String langFilename = ""; 123 String langname = n.getLang(); 124 langFilename = "_" + langname; 125 Collections.sort(n.getChildren(), BY_TYPE_NAME); 126 Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(n.getChildren()).build(); 127 128 StringBuilder buf = new StringBuilder(); 129 // write the taglist to string format 130 pageMeta.renderLangResources(buf,langname); 131 //pageMeta.renderTypesByTag(buf); 132 // write the taglist to js file 133 Data data = Doclava.makeHDF(); 134 data.setValue("reference_tree", buf.toString()); 135 data.setValue("metadata.lang", langname); 136 String unifiedFilename = "jd_lists_unified" + langFilename + ".js"; 137 String extrasFilename = "jd_extras" + langFilename + ".js"; 138 // write out jd_lists_unified for each lang 139 ClearPage.write(data, "jd_lists_unified.cs", unifiedFilename); 140 // append jd_extras to jd_lists_unified for each lang, then delete. 141 appendExtrasMetadata(extrasFilename, unifiedFilename); 142 } 143 } 144 145 /** 146 * Given a list of metadata nodes organized by lang, sort the 147 * root nodes by type name and render the types and their child 148 * samples metadata nodes only to separate lang-specific json files 149 * in the out dir. Only used by devsite (ds) builds. 150 * 151 * @param rootNodesList A list of root metadata nodes, each 152 * representing a type and it's member child pages. 153 */ WriteSamplesListByLang(List<Node> rootNodesList)154 public static void WriteSamplesListByLang(List<Node> rootNodesList) { 155 Collections.sort(rootNodesList, BY_LANG_NAME); 156 for (Node n : rootNodesList) { 157 boolean langHasSamples = false; 158 String langFilename = ""; 159 String langname = n.getLang(); 160 langFilename = "_" + langname; 161 Collections.sort(n.getChildren(), BY_TYPE_NAME); 162 Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(n.getChildren()).build(); 163 164 StringBuilder buf = new StringBuilder(); 165 // write the taglist to string format 166 langHasSamples = pageMeta.renderSamplesResources(buf,langname); 167 // write the taglist to js file 168 Data data = Doclava.makeHDF(); 169 data.setValue("reference_tree", buf.toString()); 170 data.setValue("metadata.lang", langname); 171 172 if (langHasSamples) { 173 data.setValue("samples_only", "1"); 174 // write out jd_lists_unified for each lang 175 String unifiedFilename = "android_samples_metadata" + langFilename + ".js"; 176 ClearPage.write(data, "jd_lists_unified.cs", unifiedFilename); 177 } 178 } 179 } 180 181 /** 182 * Extract supported metadata values from a page and add them as 183 * a child node of a root node based on type. Some metadata values 184 * are normalized. Unsupported metadata fields are ignored. See 185 * Node for supported metadata fields and methods for accessing values. 186 * 187 * @param docfile The file from which to extract metadata. 188 * @param dest The output path for the file, used to set link to page. 189 * @param filename The file from which to extract metadata. 190 * @param hdf Data object in which to store the metadata values. 191 * @param tagList The file from which to extract metadata. 192 */ setPageMetadata(String docfile, String dest, String filename, Data hdf, List<Node> tagList)193 public static void setPageMetadata(String docfile, String dest, String filename, 194 Data hdf, List<Node> tagList) { 195 //exclude this page if author does not want it included 196 boolean excludeNode = "true".equals(hdf.getValue("excludeFromSuggestions","")); 197 198 //check whether summary and image exist and if not, get them from itemprop/markup 199 Boolean needsSummary = "".equals(hdf.getValue("page.metaDescription", "")); 200 Boolean needsImage = "".equals(hdf.getValue("page.image", "")); 201 if ((needsSummary) || (needsImage)) { 202 //try to extract the metadata from itemprop and markup 203 inferMetadata(docfile, hdf, needsSummary, needsImage); 204 } 205 206 //extract available metadata and set it in a node 207 if (!excludeNode) { 208 Node pageMeta = new Node.Builder().build(); 209 pageMeta.setLabel(getTitleNormalized(hdf, "page.title")); 210 pageMeta.setCategory(hdf.getValue("page.category","")); 211 pageMeta.setSummary(hdf.getValue("page.metaDescription","")); 212 pageMeta.setLink(getPageUrlNormalized(filename)); 213 pageMeta.setGroup(getStringValueNormalized(hdf,"sample.group")); 214 pageMeta.setKeywords(getPageTagsNormalized(hdf, "page.tags")); 215 pageMeta.setTags(getPageTagsNormalized(hdf, "meta.tags")); 216 pageMeta.setImage(getImageUrlNormalized(hdf.getValue("page.image", ""))); 217 pageMeta.setLang(getLangStringNormalized(hdf, filename)); 218 pageMeta.setType(getStringValueNormalized(hdf, "page.type")); 219 pageMeta.setTimestamp(hdf.getValue("page.timestamp","")); 220 appendMetaNodeByLang(pageMeta, tagList); 221 } 222 } 223 224 /** 225 * Attempt to infer page metadata based on the contents of the 226 * file. Load and parse the file as a dom tree. Select values 227 * in this order: 1. dom node specifically tagged with 228 * microdata (itemprop). 2. first qualitifed p or img node. 229 * 230 * @param docfile The file from which to extract metadata. 231 * @param hdf Data object in which to store the metadata values. 232 * @param needsSummary Whether to extract summary metadata. 233 * @param needsImage Whether to extract image metadata. 234 */ inferMetadata(String docfile, Data hdf, Boolean needsSummary, Boolean needsImage)235 public static void inferMetadata(String docfile, Data hdf, 236 Boolean needsSummary, Boolean needsImage) { 237 String sum = ""; 238 String imageUrl = ""; 239 String sumFrom = needsSummary ? "none" : "hdf"; 240 String imgFrom = needsImage ? "none" : "hdf"; 241 String filedata = hdf.getValue("commentText", ""); 242 if (Doclava.META_DBG) System.out.println("----- " + docfile + "\n"); 243 244 try { 245 XPathFactory xpathFac = XPathFactory.newInstance(); 246 XPath xpath = xpathFac.newXPath(); 247 InputStream inputStream = new ByteArrayInputStream(filedata.getBytes()); 248 XMLReader reader = new Parser(); 249 reader.setFeature(Parser.namespacesFeature, false); 250 reader.setFeature(Parser.namespacePrefixesFeature, false); 251 reader.setFeature(Parser.ignoreBogonsFeature, true); 252 253 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 254 DOMResult result = new DOMResult(); 255 transformer.transform(new SAXSource(reader, new InputSource(inputStream)), result); 256 org.w3c.dom.Node htmlNode = result.getNode(); 257 258 if (needsSummary) { 259 StringBuilder sumStrings = new StringBuilder(); 260 XPathExpression ItempropDescExpr = xpath.compile("/descendant-or-self::*" 261 + "[@itemprop='description'][1]//text()[string(.)]"); 262 org.w3c.dom.NodeList nodes = (org.w3c.dom.NodeList) ItempropDescExpr.evaluate(htmlNode, 263 XPathConstants.NODESET); 264 if (nodes.getLength() > 0) { 265 for (int i = 0; i < nodes.getLength(); i++) { 266 String tx = nodes.item(i).getNodeValue(); 267 sumStrings.append(tx); 268 sumFrom = "itemprop"; 269 } 270 } else { 271 XPathExpression FirstParaExpr = xpath.compile("//p[not(../../../" 272 + "@class='notice-developers') and not(../@class='sidebox')" 273 + "and not(@class)]//text()"); 274 nodes = (org.w3c.dom.NodeList) FirstParaExpr.evaluate(htmlNode, XPathConstants.NODESET); 275 if (nodes.getLength() > 0) { 276 for (int i = 0; i < nodes.getLength(); i++) { 277 String tx = nodes.item(i).getNodeValue(); 278 sumStrings.append(tx + " "); 279 sumFrom = "markup"; 280 } 281 } 282 } 283 //found a summary string, now normalize it 284 sum = sumStrings.toString().trim(); 285 if ((sum != null) && (!"".equals(sum))) { 286 sum = getSummaryNormalized(sum); 287 } 288 //normalized summary ended up being too short to be meaningful 289 if ("".equals(sum)) { 290 if (Doclava.META_DBG) System.out.println("Warning: description too short! (" 291 + sum.length() + "chars) ...\n\n"); 292 } 293 //summary looks good, store it to the file hdf data 294 hdf.setValue("page.metaDescription", sum); 295 } 296 if (needsImage) { 297 XPathExpression ItempropImageExpr = xpath.compile("//*[@itemprop='image']/@src"); 298 org.w3c.dom.NodeList imgNodes = (org.w3c.dom.NodeList) ItempropImageExpr.evaluate(htmlNode, 299 XPathConstants.NODESET); 300 if (imgNodes.getLength() > 0) { 301 imageUrl = imgNodes.item(0).getNodeValue(); 302 imageUrl = getImageUrlNormalized(imageUrl); 303 imgFrom = "itemprop"; 304 } else { 305 XPathExpression FirstImgExpr = xpath.compile("//img/@src"); 306 imgNodes = (org.w3c.dom.NodeList) FirstImgExpr.evaluate(htmlNode, XPathConstants.NODESET); 307 if (imgNodes.getLength() > 0) { 308 //iterate nodes looking for valid image url and normalize. 309 for (int i = 0; i < imgNodes.getLength(); i++) { 310 String tx = imgNodes.item(i).getNodeValue(); 311 //qualify and normalize the image 312 imageUrl = getImageUrlNormalized(tx); 313 //this img src did not qualify, keep looking... 314 if ("".equals(imageUrl)) { 315 if (Doclava.META_DBG) System.out.println(" >>>>> Discarded image: " + tx); 316 continue; 317 } else { 318 imgFrom = "markup"; 319 break; 320 } 321 } 322 } 323 } 324 //img src url looks good, store it to the file hdf data 325 hdf.setValue("page.image", imageUrl); 326 } 327 if (Doclava.META_DBG) System.out.println("Image (" + imgFrom + "): " + imageUrl); 328 if (Doclava.META_DBG) System.out.println("Summary (" + sumFrom + "): " + sum.length() 329 + " chars\n\n" + sum + "\n"); 330 return; 331 332 } catch (Exception e) { 333 if (Doclava.META_DBG) System.out.println(" >>>>> Exception: " + e + "\n"); 334 } 335 } 336 337 /** 338 * Normalize a comma-delimited, multi-string value. Split on commas, remove 339 * quotes, trim whitespace, optionally make keywords/tags lowercase for 340 * easier matching. 341 * 342 * @param hdf Data object in which the metadata values are stored. 343 * @param tag The hdf var from which the metadata was extracted. 344 * @return A normalized string value for the specified tag. 345 */ getPageTagsNormalized(Data hdf, String tag)346 public static String getPageTagsNormalized(Data hdf, String tag) { 347 348 String normTags = ""; 349 StringBuilder tags = new StringBuilder(); 350 String tagList = hdf.getValue(tag, ""); 351 if (tag.equals("meta.tags") && (tagList.equals(""))) { 352 //use keywords as tags if no meta tags are available 353 tagList = hdf.getValue("page.tags", ""); 354 } 355 if (!tagList.equals("")) { 356 tagList = tagList.replaceAll("\"", ""); 357 358 String[] tagParts = tagList.split("[,\u3001]"); 359 for (int iter = 0; iter < tagParts.length; iter++) { 360 tags.append("\""); 361 if (tag.equals("meta.tags") && sLowercaseTags) { 362 tagParts[iter] = tagParts[iter].toLowerCase(); 363 } else if (tag.equals("page.tags") && sLowercaseKeywords) { 364 tagParts[iter] = tagParts[iter].toLowerCase(); 365 } 366 if (tag.equals("meta.tags")) { 367 //tags.append("#"); //to match hashtag format used with yt/blogger resources 368 tagParts[iter] = tagParts[iter].replaceAll(" ",""); 369 } 370 tags.append(tagParts[iter].trim()); 371 tags.append("\""); 372 if (iter < tagParts.length - 1) { 373 tags.append(","); 374 } 375 } 376 } 377 //write this back to hdf to expose through js 378 if (tag.equals("meta.tags")) { 379 hdf.setValue(tag, tags.toString()); 380 } 381 return tags.toString(); 382 } 383 384 /** 385 * Normalize a string for which only a single value is supported. 386 * Extract the string up to the first comma, remove quotes, remove 387 * any forward-slash prefix, trim any whitespace, optionally make 388 * lowercase for easier matching. 389 * 390 * @param hdf Data object in which the metadata values are stored. 391 * @param tag The hdf var from which the metadata should be extracted. 392 * @return A normalized string value for the specified tag. 393 */ getStringValueNormalized(Data hdf, String tag)394 public static String getStringValueNormalized(Data hdf, String tag) { 395 StringBuilder outString = new StringBuilder(); 396 String tagList = hdf.getValue(tag, ""); 397 tagList.replaceAll("\"", ""); 398 if ("".equals(tagList)) { 399 return tagList; 400 } else { 401 int end = tagList.indexOf(","); 402 if (end != -1) { 403 tagList = tagList.substring(0,end); 404 } 405 tagList = tagList.startsWith("/") ? tagList.substring(1) : tagList; 406 if ("sample.group".equals(tag) && sLowercaseTags) { 407 tagList = tagList.toLowerCase(); 408 } 409 outString.append(tagList.trim()); 410 return outString.toString(); 411 } 412 } 413 414 /** 415 * Normalize a page title. Extract the string, remove quotes, remove 416 * markup, and trim any whitespace. 417 * 418 * @param hdf Data object in which the metadata values are stored. 419 * @param tag The hdf var from which the metadata should be extracted. 420 * @return A normalized string value for the specified tag. 421 */ getTitleNormalized(Data hdf, String tag)422 public static String getTitleNormalized(Data hdf, String tag) { 423 StringBuilder outTitle = new StringBuilder(); 424 String title = hdf.getValue(tag, ""); 425 if (!title.isEmpty()) { 426 title = escapeString(title); 427 if (title.indexOf("<span") != -1) { 428 String[] splitTitle = title.split("<span(.*?)</span>"); 429 title = splitTitle[0]; 430 for (int j = 1; j < splitTitle.length; j++) { 431 title.concat(splitTitle[j]); 432 } 433 } 434 outTitle.append(title.trim()); 435 } 436 return outTitle.toString(); 437 } 438 439 /** 440 * Extract and normalize a page's language string based on the 441 * lowercased dir path. Non-supported langs are ignored and assigned 442 * the default lang string of "en". 443 * 444 * @param filename A path string to the file relative to root. 445 * @return A normalized lang value. 446 */ getLangStringNormalized(Data data, String filename)447 public static String getLangStringNormalized(Data data, String filename) { 448 String[] stripStr = filename.toLowerCase().split("\\/", 3); 449 String outFrag = "en"; 450 String pathCanonical = filename; 451 if (stripStr.length > 0) { 452 for (String t : DocFile.DEVSITE_VALID_LANGS) { 453 if ("intl".equals(stripStr[0])) { 454 if (t.equals(stripStr[1])) { 455 outFrag = stripStr[1]; 456 //extract the root url (exclusive of intl/nn) 457 pathCanonical = stripStr[2]; 458 break; 459 } 460 } 461 } 462 } 463 //extract the root url (exclusive of intl/nn) 464 data.setValue("path.canonical", pathCanonical); 465 return outFrag; 466 } 467 468 /** 469 * Normalize a page summary string and truncate as needed. Strings 470 * exceeding max_chars are truncated at the first word boundary 471 * following the max_size marker. Strings smaller than min_chars 472 * are discarded (as they are assumed to be too little context). 473 * 474 * @param s String extracted from the page as it's summary. 475 * @return A normalized string value. 476 */ getSummaryNormalized(String s)477 public static String getSummaryNormalized(String s) { 478 String str = ""; 479 int max_chars = 250; 480 int min_chars = 50; 481 int marker = 0; 482 if (s.length() < min_chars) { 483 return str; 484 } else { 485 str = s.replaceAll("^\"|\"$", ""); 486 str = str.replaceAll("\\s+", " "); 487 str = JD_TAG_PATTERN.matcher(str).replaceAll("$1"); 488 str = escapeString(str); 489 BreakIterator bi = BreakIterator.getWordInstance(); 490 bi.setText(str); 491 if (str.length() > max_chars) { 492 marker = bi.following(max_chars); 493 } else { 494 marker = bi.last(); 495 } 496 str = str.substring(0, marker); 497 str = str.concat("\u2026" ); 498 } 499 return str; 500 } 501 escapeString(String s)502 public static String escapeString(String s) { 503 s = s.replaceAll("\"", """); 504 s = s.replaceAll("\'", "'"); 505 s = s.replaceAll("<", "<"); 506 s = s.replaceAll(">", ">"); 507 s = s.replaceAll("/", "/"); 508 return s; 509 } 510 511 //Disqualify img src urls that include these substrings 512 public static String[] IMAGE_EXCLUDE = {"/triangle-", "favicon","android-logo", 513 "icon_play.png", "robot-tiny"}; 514 inList(String s, String[] list)515 public static boolean inList(String s, String[] list) { 516 for (String t : list) { 517 if (s.contains(t)) { 518 return true; 519 } 520 } 521 return false; 522 } 523 524 /** 525 * Normalize an img src url by removing docRoot and leading 526 * slash for local image references. These are added later 527 * in js to support offline mode and keep path reference 528 * format consistent with hrefs. 529 * 530 * @param url Abs or rel url sourced from img src. 531 * @return Normalized url if qualified, else empty 532 */ getImageUrlNormalized(String url)533 public static String getImageUrlNormalized(String url) { 534 String absUrl = ""; 535 // validate to avoid choosing using specific images 536 if ((url != null) && (!url.equals("")) && (!inList(url, IMAGE_EXCLUDE))) { 537 absUrl = url.replace("{@docRoot}", ""); 538 absUrl = absUrl.replaceFirst("^/(?!/)", ""); 539 } 540 return absUrl; 541 } 542 543 /** 544 * Normalize an href url by removing docRoot and leading 545 * slash for local image references. These are added later 546 * in js to support offline mode and keep path reference 547 * format consistent with hrefs. 548 * 549 * @param url Abs or rel page url sourced from href 550 * @return Normalized url, either abs or rel to root 551 */ getPageUrlNormalized(String url)552 public static String getPageUrlNormalized(String url) { 553 String absUrl = ""; 554 555 if ((url !=null) && (!url.equals(""))) { 556 absUrl = url.replace("{@docRoot}", ""); 557 if (Doclava.USE_DEVSITE_LOCALE_OUTPUT_PATHS) { 558 absUrl = absUrl.replaceFirst("^en/", ""); 559 } 560 absUrl = absUrl.replaceFirst("^/(?!/)", ""); 561 } 562 return absUrl; 563 } 564 565 /** 566 * Given a metadata node, add it as a child of a root node based on its 567 * type. If there is no root node that matches the node's type, create one 568 * and add the metadata node as a child node. 569 * 570 * @param gNode The node to attach to a root node or add as a new root node. 571 * @param rootList The current list of root nodes. 572 * @return The updated list of root nodes. 573 */ appendMetaNodeByLang(Node gNode, List<Node> rootList)574 public static List<Node> appendMetaNodeByLang(Node gNode, List<Node> rootList) { 575 576 String nodeLang = gNode.getLang(); 577 boolean matched = false; 578 for (Node n : rootList) { 579 if (n.getLang().equals(nodeLang)) { //find any matching lang node 580 appendMetaNodeByType(gNode,n.getChildren()); 581 //n.getChildren().add(gNode); 582 matched = true; 583 break; // add to the first root node only 584 } // tag did not match 585 } // end rootnodes matching iterator 586 if (!matched) { 587 List<Node> mlangList = new ArrayList<Node>(); // list of file objects that have a given lang 588 //mlangList.add(gNode); 589 Node tnode = new Node.Builder().setChildren(mlangList).setLang(nodeLang).build(); 590 rootList.add(tnode); 591 appendMetaNodeByType(gNode, mlangList); 592 } 593 return rootList; 594 } 595 596 /** 597 * Given a metadata node, add it as a child of a root node based on its 598 * type. If there is no root node that matches the node's type, create one 599 * and add the metadata node as a child node. 600 * 601 * @param gNode The node to attach to a root node or add as a new root node. 602 * @param rootList The current list of root nodes. 603 * @return The updated list of root nodes. 604 */ appendMetaNodeByType(Node gNode, List<Node> rootList)605 public static List<Node> appendMetaNodeByType(Node gNode, List<Node> rootList) { 606 607 String nodeTags = gNode.getType(); 608 boolean matched = false; 609 for (Node n : rootList) { 610 if (n.getType().equals(nodeTags)) { //find any matching type node 611 n.getChildren().add(gNode); 612 matched = true; 613 break; // add to the first root node only 614 } // tag did not match 615 } // end rootnodes matching iterator 616 if (!matched) { 617 List<Node> mtaglist = new ArrayList<Node>(); // list of file objects that have a given type 618 mtaglist.add(gNode); 619 Node tnode = new Node.Builder().setChildren(mtaglist).setType(nodeTags).build(); 620 rootList.add(tnode); 621 } 622 return rootList; 623 } 624 625 /** 626 * Given a metadata node, add it as a child of a root node based on its 627 * tag. If there is no root node matching the tag, create one for it 628 * and add the metadata node as a child node. 629 * 630 * @param gNode The node to attach to a root node or add as a new root node. 631 * @param rootTagNodesList The current list of root nodes. 632 * @return The updated list of root nodes. 633 */ appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList)634 public static List<Node> appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList) { 635 636 for (int iter = 0; iter < gNode.getChildren().size(); iter++) { 637 if (gNode.getChildren().get(iter).getTags() != null) { 638 List<String> nodeTags = gNode.getChildren().get(iter).getTags(); 639 boolean matched = false; 640 for (String t : nodeTags) { //process each of the meta.tags 641 for (Node n : rootTagNodesList) { 642 if (n.getLabel().equals(t.toString())) { 643 n.getTags().add(String.valueOf(iter)); 644 matched = true; 645 break; // add to the first root node only 646 } // tag did not match 647 } // end rootnodes matching iterator 648 if (!matched) { 649 List<String> mtaglist = new ArrayList<String>(); // list of objects with a given tag 650 mtaglist.add(String.valueOf(iter)); 651 Node tnode = new Node.Builder().setLabel(t.toString()).setTags(mtaglist).build(); 652 rootTagNodesList.add(tnode); 653 } 654 } 655 } 656 } 657 return rootTagNodesList; 658 } 659 660 /** 661 * Append the contents of jd_extras to jd_lists_unified for each language. 662 * 663 * @param extrasFilename The lang-specific extras file to append. 664 * @param unifiedFilename The lang-specific unified metadata file. 665 */ appendExtrasMetadata(String extrasFilename, String unifiedFilename)666 public static void appendExtrasMetadata (String extrasFilename, String unifiedFilename) { 667 668 File f = new File(ClearPage.outputDir + "/" + extrasFilename); 669 if (f.exists() && !f.isDirectory()) { 670 ClearPage.copyFile(true, f, unifiedFilename, true); 671 } 672 } 673 674 public static final Comparator<Node> BY_TAG_NAME = new Comparator<Node>() { 675 public int compare (Node one, Node other) { 676 return one.getLabel().compareTo(other.getLabel()); 677 } 678 }; 679 680 public static final Comparator<Node> BY_TYPE_NAME = new Comparator<Node>() { 681 public int compare (Node one, Node other) { 682 return one.getType().compareTo(other.getType()); 683 } 684 }; 685 686 public static final Comparator<Node> BY_LANG_NAME = new Comparator<Node>() { 687 public int compare (Node one, Node other) { 688 return one.getLang().compareTo(other.getLang()); 689 } 690 }; 691 692 /** 693 * A node for storing page metadata. Use Builder.build() to instantiate. 694 */ 695 public static class Node { 696 697 private String mLabel; // holds page.title or similar identifier 698 private String mCategory; // subtabs, example 'training' 'guides' 699 private String mSummary; // Summary for card or similar use 700 private String mLink; //link href for item click 701 private String mGroup; // from sample.group in _index.jd 702 private List<String> mKeywords; // from page.tags 703 private List<String> mTags; // from meta.tags 704 private String mImage; // holds an href, fully qualified or relative to root 705 private List<Node> mChildren; 706 private String mLang; 707 private String mType; // design, develop, distribute, youtube, blog, etc 708 private String mTimestamp; // optional timestamp eg 1447452827 709 Node(Builder builder)710 private Node(Builder builder) { 711 mLabel = builder.mLabel; 712 mCategory = builder.mCategory; 713 mSummary = builder.mSummary; 714 mLink = builder.mLink; 715 mGroup = builder.mGroup; 716 mKeywords = builder.mKeywords; 717 mTags = builder.mTags; 718 mImage = builder.mImage; 719 mChildren = builder.mChildren; 720 mLang = builder.mLang; 721 mType = builder.mType; 722 mTimestamp = builder.mTimestamp; 723 } 724 725 private static class Builder { 726 private String mLabel, mCategory, mSummary, mLink, mGroup, mImage, mLang, mType, mTimestamp; 727 private List<String> mKeywords = null; 728 private List<String> mTags = null; 729 private List<Node> mChildren = null; setLabel(String mLabel)730 public Builder setLabel(String mLabel) { this.mLabel = mLabel; return this;} setCategory(String mCategory)731 public Builder setCategory(String mCategory) { 732 this.mCategory = mCategory; return this; 733 } setSummary(String mSummary)734 public Builder setSummary(String mSummary) {this.mSummary = mSummary; return this;} setLink(String mLink)735 public Builder setLink(String mLink) {this.mLink = mLink; return this;} setGroup(String mGroup)736 public Builder setGroup(String mGroup) {this.mGroup = mGroup; return this;} setKeywords(List<String> mKeywords)737 public Builder setKeywords(List<String> mKeywords) { 738 this.mKeywords = mKeywords; return this; 739 } setTags(List<String> mTags)740 public Builder setTags(List<String> mTags) {this.mTags = mTags; return this;} setImage(String mImage)741 public Builder setImage(String mImage) {this.mImage = mImage; return this;} setChildren(List<Node> mChildren)742 public Builder setChildren(List<Node> mChildren) {this.mChildren = mChildren; return this;} setLang(String mLang)743 public Builder setLang(String mLang) {this.mLang = mLang; return this;} setType(String mType)744 public Builder setType(String mType) {this.mType = mType; return this;} setTimestamp(String mTimestamp)745 public Builder setTimestamp(String mTimestamp) {this.mTimestamp = mTimestamp; return this;} build()746 public Node build() {return new Node(this);} 747 } 748 749 /** 750 * Render a tree of metadata nodes organized by type. 751 * @param buf Output buffer to render to. 752 */ renderTypeResources(StringBuilder buf)753 void renderTypeResources(StringBuilder buf) { 754 List<Node> list = mChildren; //list of type rootnodes 755 if (list == null || list.size() == 0) { 756 buf.append("null"); 757 } else { 758 final int n = list.size(); 759 for (int i = 0; i < n; i++) { 760 buf.append("var " + list.get(i).mType.toUpperCase() + "_RESOURCES = ["); 761 list.get(i).renderTypes(buf); //render this type's children 762 buf.append("\n];\n\n"); 763 } 764 } 765 } 766 767 /** 768 * Render a tree of metadata nodes organized by lang. 769 * @param buf Output buffer to render to. 770 */ renderLangResources(StringBuilder buf, String langname)771 void renderLangResources(StringBuilder buf, String langname) { 772 List<Node> list = mChildren; //list of type rootnodes 773 if (list == null || list.size() == 0) { 774 buf.append("null"); 775 } else { 776 final int n = list.size(); 777 for (int i = 0; i < n; i++) { 778 buf.append("METADATA['" + langname + "']." + list.get(i).mType + " = ["); 779 list.get(i).renderTypes(buf); //render this lang's children 780 buf.append("\n];\n\n"); 781 } 782 } 783 } 784 785 /** 786 * Render a tree of metadata nodes of type 'develop' to extract 787 * samples metadata. Only used by devsite (ds) builds. 788 * @param buf Output buffer to render to. 789 * @return true if samples were rendered to buf 790 */ renderSamplesResources(StringBuilder buf, String langname)791 boolean renderSamplesResources(StringBuilder buf, String langname) { 792 boolean langHasSamples = false; 793 List<Node> list = mChildren; //list of type rootnodes 794 if (list == null || list.size() == 0) { 795 buf.append("null"); 796 } else { 797 final int n = list.size(); 798 for (int i = 0; i < n; i++) { 799 //samples are always in type 'develop', so restrict 800 if ("develop".equals(list.get(i).mType)) { 801 //render this type's children 802 langHasSamples = list.get(i).renderTypeForSamples(buf); 803 } 804 } 805 } 806 return langHasSamples; 807 } 808 809 /** 810 * Render all metadata nodes for a specific type. 811 * @param buf Output buffer to render to. 812 */ renderTypes(StringBuilder buf)813 void renderTypes(StringBuilder buf) { 814 List<Node> list = mChildren; 815 if (list == null || list.size() == 0) { 816 buf.append("nulltype"); 817 } else { 818 final int n = list.size(); 819 for (int i = 0; i < n; i++) { 820 buf.append("\n {\n"); 821 buf.append(" \"title\":\""); 822 renderStrWithUcs(buf, list.get(i).mLabel); 823 buf.append("\",\n" ); 824 buf.append(" \"summary\":\""); 825 renderStrWithUcs(buf, list.get(i).mSummary); 826 buf.append("\",\n" ); 827 buf.append(" \"url\":\"" + list.get(i).mLink + "\",\n" ); 828 if (!"".equals(list.get(i).mImage)) { 829 buf.append(" \"image\":\"" + list.get(i).mImage + "\",\n" ); 830 } 831 if (!"".equals(list.get(i).mGroup)) { 832 buf.append(" \"group\":\""); 833 renderStrWithUcs(buf, list.get(i).mGroup); 834 buf.append("\",\n" ); 835 } 836 if (!"".equals(list.get(i).mCategory)) { 837 buf.append(" \"category\":\"" + list.get(i).mCategory + "\",\n" ); 838 } 839 if ((list.get(i).mType != null) && (list.get(i).mType != "")) { 840 buf.append(" \"type\":\"" + list.get(i).mType + "\",\n"); 841 } 842 list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords"); 843 list.get(i).renderArrayType(buf, list.get(i).mTags, "tags"); 844 if (!"".equals(list.get(i).mTimestamp)) { 845 buf.append(" \"timestamp\":\"" + list.get(i).mTimestamp + "\",\n"); 846 } 847 buf.append(" \"lang\":\"" + list.get(i).mLang + "\"" ); 848 buf.append("\n }"); 849 if (i != n - 1) { 850 buf.append(", "); 851 } 852 } 853 } 854 } 855 856 /** 857 * Render all metadata nodes for samples only. 858 * Only used by devsite (ds) builds. 859 * @param buf Output buffer to render to. 860 * @return whether any samples were rendered to buf 861 */ renderTypeForSamples(StringBuilder buf)862 boolean renderTypeForSamples(StringBuilder buf) { 863 boolean typeHasSamples = false; 864 List<Node> list = mChildren; 865 if (list == null || list.size() == 0) { 866 buf.append("nulltype"); 867 } else { 868 final int n = list.size(); 869 for (int i = 0; i < n; i++) { 870 // valid samples must have category 'samples' 871 if ("samples".equals(list.get(i).mCategory)) { 872 typeHasSamples = true; 873 buf.append("\n {\n"); 874 buf.append(" \"title\":\""); 875 renderStrWithUcs(buf, list.get(i).mLabel); 876 buf.append("\",\n" ); 877 buf.append(" \"summary\":\""); 878 renderStrWithUcs(buf, list.get(i).mSummary); 879 buf.append("\",\n" ); 880 buf.append(" \"url\":\"" + list.get(i).mLink + "\",\n" ); 881 if (!"".equals(list.get(i).mImage)) { 882 buf.append(" \"image\":\"" + list.get(i).mImage + "\",\n" ); 883 } 884 if (!"".equals(list.get(i).mGroup)) { 885 buf.append(" \"group\":\""); 886 renderStrWithUcs(buf, list.get(i).mGroup); 887 buf.append("\",\n" ); 888 } 889 if (!"".equals(list.get(i).mCategory)) { 890 buf.append(" \"category\":\"" + list.get(i).mCategory + "\",\n" ); 891 } 892 if ((list.get(i).mType != null) && (list.get(i).mType != "")) { 893 buf.append(" \"type\":\"" + list.get(i).mType + "\",\n"); 894 } 895 list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords"); 896 list.get(i).renderArrayType(buf, list.get(i).mTags, "tags"); 897 if (!"".equals(list.get(i).mTimestamp)) { 898 buf.append(" \"timestamp\":\"" + list.get(i).mTimestamp + "\",\n"); 899 } 900 buf.append(" \"lang\":\"" + list.get(i).mLang + "\"" ); 901 buf.append("\n }"); 902 if (i != n - 1) { 903 buf.append(", "); 904 } 905 } 906 } 907 } 908 return typeHasSamples; 909 } 910 911 /** 912 * Build and render a list of tags associated with each type. 913 * @param buf Output buffer to render to. 914 */ renderTypesByTag(StringBuilder buf)915 void renderTypesByTag(StringBuilder buf) { 916 List<Node> list = mChildren; //list of rootnodes 917 if (list == null || list.size() == 0) { 918 buf.append("null"); 919 } else { 920 final int n = list.size(); 921 for (int i = 0; i < n; i++) { 922 buf.append("var " + list.get(i).mType.toUpperCase() + "_BY_TAG = {"); 923 List<Node> mTagList = new ArrayList(); //list of rootnodes 924 mTagList = appendMetaNodeByTagIndex(list.get(i), mTagList); 925 list.get(i).renderTagIndices(buf, mTagList); 926 buf.append("\n};\n\n"); 927 } 928 } 929 } 930 931 /** 932 * Render a list of tags associated with a type, including the 933 * tag's indices in the type array. 934 * @param buf Output buffer to render to. 935 * @param tagList Node tree of types to render. 936 */ renderTagIndices(StringBuilder buf, List<Node> tagList)937 void renderTagIndices(StringBuilder buf, List<Node> tagList) { 938 List<Node> list = tagList; 939 if (list == null || list.size() == 0) { 940 buf.append(""); 941 } else { 942 final int n = list.size(); 943 for (int i = 0; i < n; i++) { 944 buf.append("\n " + list.get(i).mLabel + ":["); 945 renderArrayValue(buf, list.get(i).mTags); 946 buf.append("]"); 947 if (i != n - 1) { 948 buf.append(", "); 949 } 950 } 951 } 952 } 953 954 /** 955 * Render key:arrayvalue pair. 956 * @param buf Output buffer to render to. 957 * @param type The list value to render as an arrayvalue. 958 * @param key The key for the pair. 959 */ renderArrayType(StringBuilder buf, List<String> type, String key)960 void renderArrayType(StringBuilder buf, List<String> type, String key) { 961 buf.append(" \"" + key + "\": ["); 962 renderArrayValue(buf, type); 963 buf.append("],\n"); 964 } 965 966 /** 967 * Render an array value to buf, with special handling of unicode characters. 968 * @param buf Output buffer to render to. 969 * @param type The list value to render as an arrayvalue. 970 */ renderArrayValue(StringBuilder buf, List<String> type)971 void renderArrayValue(StringBuilder buf, List<String> type) { 972 List<String> list = type; 973 if (list != null) { 974 final int n = list.size(); 975 for (int i = 0; i < n; i++) { 976 String tagval = list.get(i).toString(); 977 renderStrWithUcs(buf,tagval); 978 if (i != n - 1) { 979 buf.append(","); 980 } 981 } 982 } 983 } 984 985 /** 986 * Render a string that can include ucs2 encoded characters. 987 * @param buf Output buffer to render to. 988 * @param chars String to append to buf with any necessary encoding 989 */ renderStrWithUcs(StringBuilder buf, String chars)990 void renderStrWithUcs(StringBuilder buf, String chars) { 991 String strval = chars; 992 final int L = strval.length(); 993 for (int t = 0; t < L; t++) { 994 char c = strval.charAt(t); 995 if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_HIGH_SURROGATE ) { 996 // we have a UTF-16 multi-byte character 997 int codePoint = strval.codePointAt(t); 998 int charSize = Character.charCount(codePoint); 999 t += charSize - 1; 1000 buf.append(String.format("\\u%04x",codePoint)); 1001 } else if (c >= ' ' && c <= '~' && c != '\\') { 1002 buf.append(c); 1003 } else { 1004 // we are encoding a two byte character 1005 buf.append(String.format("\\u%04x", (int) c)); 1006 } 1007 } 1008 } 1009 getLabel()1010 public String getLabel() { 1011 return mLabel; 1012 } 1013 setLabel(String label)1014 public void setLabel(String label) { 1015 mLabel = label; 1016 } 1017 getCategory()1018 public String getCategory() { 1019 return mCategory; 1020 } 1021 setCategory(String title)1022 public void setCategory(String title) { 1023 mCategory = title; 1024 } 1025 getSummary()1026 public String getSummary() { 1027 return mSummary; 1028 } 1029 setSummary(String summary)1030 public void setSummary(String summary) { 1031 mSummary = summary; 1032 } 1033 getLink()1034 public String getLink() { 1035 return mLink; 1036 } 1037 setLink(String ref)1038 public void setLink(String ref) { 1039 mLink = ref; 1040 } 1041 getGroup()1042 public String getGroup() { 1043 return mGroup; 1044 } 1045 setGroup(String group)1046 public void setGroup(String group) { 1047 mGroup = group; 1048 } 1049 getTags()1050 public List<String> getTags() { 1051 return mTags; 1052 } 1053 setTags(String tags)1054 public void setTags(String tags) { 1055 if ("".equals(tags)) { 1056 mTags = null; 1057 } else { 1058 List<String> tagList = new ArrayList(); 1059 String[] tagParts = tags.split(","); 1060 1061 for (String t : tagParts) { 1062 tagList.add(t); 1063 } 1064 mTags = tagList; 1065 } 1066 } 1067 getKeywords()1068 public List<String> getKeywords() { 1069 return mKeywords; 1070 } 1071 setKeywords(String keywords)1072 public void setKeywords(String keywords) { 1073 if ("".equals(keywords)) { 1074 mKeywords = null; 1075 } else { 1076 List<String> keywordList = new ArrayList(); 1077 String[] keywordParts = keywords.split(","); 1078 1079 for (String k : keywordParts) { 1080 keywordList.add(k); 1081 } 1082 mKeywords = keywordList; 1083 } 1084 } 1085 getImage()1086 public String getImage() { 1087 return mImage; 1088 } 1089 setImage(String ref)1090 public void setImage(String ref) { 1091 mImage = ref; 1092 } 1093 getChildren()1094 public List<Node> getChildren() { 1095 return mChildren; 1096 } 1097 setChildren(List<Node> node)1098 public void setChildren(List<Node> node) { 1099 mChildren = node; 1100 } 1101 getLang()1102 public String getLang() { 1103 return mLang; 1104 } 1105 setLang(String lang)1106 public void setLang(String lang) { 1107 mLang = lang; 1108 } 1109 getType()1110 public String getType() { 1111 return mType; 1112 } 1113 getTimestamp()1114 public String getTimestamp() { 1115 return mTimestamp; 1116 } 1117 setType(String type)1118 public void setType(String type) { 1119 mType = type; 1120 } 1121 setTimestamp(String timestamp)1122 public void setTimestamp(String timestamp) { 1123 mTimestamp = timestamp; 1124 } 1125 } 1126 } 1127