• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.doclava;
18 
19 import java.io.*;
20 import java.text.BreakIterator;
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.Comparator;
24 import java.util.List;
25 import java.util.regex.Pattern;
26 import java.util.regex.Matcher;
27 import java.io.File;
28 
29 import com.google.clearsilver.jsilver.data.Data;
30 
31 import org.ccil.cowan.tagsoup.*;
32 import org.xml.sax.XMLReader;
33 import org.xml.sax.InputSource;
34 import org.xml.sax.Attributes;
35 import org.xml.sax.helpers.DefaultHandler;
36 
37 import org.w3c.dom.Node;
38 import org.w3c.dom.NodeList;
39 
40 import javax.xml.transform.dom.DOMResult;
41 import javax.xml.transform.sax.SAXSource;
42 import javax.xml.transform.Transformer;
43 import javax.xml.transform.TransformerFactory;
44 import javax.xml.xpath.XPath;
45 import javax.xml.xpath.XPathConstants;
46 import javax.xml.xpath.XPathExpression;
47 import javax.xml.xpath.XPathFactory;
48 
49 /**
50 * Metadata associated with a specific documentation page. Extracts
51 * metadata based on the page's declared hdf vars (meta.tags and others)
52 * as well as implicit data relating to the page, such as url, type, etc.
53 * Includes a Node class that represents the metadata and lets it attach
54 * to parent/child elements in the tree metadata nodes for all pages.
55 * Node also includes methods for rendering the node tree to a json file
56 * in docs output, which is then used by JavaScript to load metadata
57 * objects into html pages.
58 */
59 
60 public class PageMetadata {
61   File mSource;
62   String mDest;
63   String mTagList;
64   static boolean sLowercaseTags = true;
65   static boolean sLowercaseKeywords = true;
66   //static String linkPrefix = (Doclava.META_DBG) ? "/" : "http://developer.android.com/";
67   /**
68    * regex pattern to match javadoc @link and similar tags. Extracts
69    * root symbol to $1.
70    */
71   private static final Pattern JD_TAG_PATTERN =
72       Pattern.compile("\\{@.*?[\\s\\.\\#]([A-Za-z\\(\\)\\d_]+)(?=\u007D)\u007D");
73 
PageMetadata(File source, String dest, List<Node> taglist)74   public PageMetadata(File source, String dest, List<Node> taglist) {
75     mSource = source;
76     mDest = dest;
77 
78     if (dest != null) {
79       int len = dest.length();
80       if (len > 1 && dest.charAt(len - 1) != '/') {
81         mDest = dest + '/';
82       } else {
83         mDest = dest;
84       }
85     }
86   }
87 
88   /**
89   * Given a list of metadata nodes organized by type, sort the
90   * root nodes by type name and render the types and their child
91   * metadata nodes to a json file in the out dir.
92   *
93   * @param rootTypeNodesList A list of root metadata nodes, each
94   *        representing a type and it's member child pages.
95   */
WriteList(List<Node> rootTypeNodesList)96   public static void WriteList(List<Node> rootTypeNodesList) {
97 
98     Collections.sort(rootTypeNodesList, BY_TYPE_NAME);
99     Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(rootTypeNodesList).build();
100 
101     StringBuilder buf = new StringBuilder();
102     // write the taglist to string format
103     pageMeta.renderTypeResources(buf);
104     pageMeta.renderTypesByTag(buf);
105     // write the taglist to js file
106     Data data = Doclava.makeHDF();
107     data.setValue("reference_tree", buf.toString());
108     ClearPage.write(data, "jd_lists_unified.cs", "jd_lists_unified.js");
109   }
110 
111   /**
112   * Given a list of metadata nodes organized by lang, sort the
113   * root nodes by type name and render the types and their child
114   * metadata nodes to separate lang-specific json files in the out dir.
115   *
116   * @param rootNodesList A list of root metadata nodes, each
117   *        representing a type and it's member child pages.
118   */
WriteListByLang(List<Node> rootNodesList)119   public static void WriteListByLang(List<Node> rootNodesList) {
120     Collections.sort(rootNodesList, BY_LANG_NAME);
121     for (Node n : rootNodesList) {
122       String langFilename = "";
123       String langname = n.getLang();
124       langFilename = "_" + langname;
125       Collections.sort(n.getChildren(), BY_TYPE_NAME);
126       Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(n.getChildren()).build();
127 
128       StringBuilder buf = new StringBuilder();
129       // write the taglist to string format
130       pageMeta.renderLangResources(buf,langname);
131       //pageMeta.renderTypesByTag(buf);
132       // write the taglist to js file
133       Data data = Doclava.makeHDF();
134       data.setValue("reference_tree", buf.toString());
135       data.setValue("metadata.lang", langname);
136       String unifiedFilename = "jd_lists_unified" + langFilename + ".js";
137       String extrasFilename = "jd_extras" + langFilename + ".js";
138       // write out jd_lists_unified for each lang
139       ClearPage.write(data, "jd_lists_unified.cs", unifiedFilename);
140       // append jd_extras to jd_lists_unified for each lang, then delete.
141       appendExtrasMetadata(extrasFilename, unifiedFilename);
142     }
143   }
144 
145   /**
146   * Extract supported metadata values from a page and add them as
147   * a child node of a root node based on type. Some metadata values
148   * are normalized. Unsupported metadata fields are ignored. See
149   * Node for supported metadata fields and methods for accessing values.
150   *
151   * @param docfile The file from which to extract metadata.
152   * @param dest The output path for the file, used to set link to page.
153   * @param filename The file from which to extract metadata.
154   * @param hdf Data object in which to store the metadata values.
155   * @param tagList The file from which to extract metadata.
156   */
setPageMetadata(String docfile, String dest, String filename, Data hdf, List<Node> tagList)157   public static void setPageMetadata(String docfile, String dest, String filename,
158       Data hdf, List<Node> tagList) {
159     //exclude this page if author does not want it included
160     boolean excludeNode = "true".equals(hdf.getValue("excludeFromSuggestions",""));
161 
162     //check whether summary and image exist and if not, get them from itemprop/markup
163     Boolean needsSummary = "".equals(hdf.getValue("page.metaDescription", ""));
164     Boolean needsImage = "".equals(hdf.getValue("page.image", ""));
165     if ((needsSummary) || (needsImage)) {
166       //try to extract the metadata from itemprop and markup
167       inferMetadata(docfile, hdf, needsSummary, needsImage);
168     }
169 
170     //extract available metadata and set it in a node
171     if (!excludeNode) {
172       Node pageMeta = new Node.Builder().build();
173       pageMeta.setLabel(getTitleNormalized(hdf, "page.title"));
174       pageMeta.setCategory(hdf.getValue("page.category",""));
175       pageMeta.setSummary(hdf.getValue("page.metaDescription",""));
176       pageMeta.setLink(getPageUrlNormalized(filename));
177       pageMeta.setGroup(getStringValueNormalized(hdf,"sample.group"));
178       pageMeta.setKeywords(getPageTagsNormalized(hdf, "page.tags"));
179       pageMeta.setTags(getPageTagsNormalized(hdf, "meta.tags"));
180       pageMeta.setImage(getImageUrlNormalized(hdf.getValue("page.image", "")));
181       pageMeta.setLang(getLangStringNormalized(hdf, filename));
182       pageMeta.setType(getStringValueNormalized(hdf, "page.type"));
183       pageMeta.setTimestamp(hdf.getValue("page.timestamp",""));
184       if (Doclava.USE_UPDATED_TEMPLATES) {
185         appendMetaNodeByLang(pageMeta, tagList);
186       } else {
187         appendMetaNodeByType(pageMeta, tagList);
188       }
189     }
190   }
191 
192   /**
193   * Attempt to infer page metadata based on the contents of the
194   * file. Load and parse the file as a dom tree. Select values
195   * in this order: 1. dom node specifically tagged with
196   * microdata (itemprop). 2. first qualitifed p or img node.
197   *
198   * @param docfile The file from which to extract metadata.
199   * @param hdf Data object in which to store the metadata values.
200   * @param needsSummary Whether to extract summary metadata.
201   * @param needsImage Whether to extract image metadata.
202   */
inferMetadata(String docfile, Data hdf, Boolean needsSummary, Boolean needsImage)203   public static void inferMetadata(String docfile, Data hdf,
204       Boolean needsSummary, Boolean needsImage) {
205     String sum = "";
206     String imageUrl = "";
207     String sumFrom = needsSummary ? "none" : "hdf";
208     String imgFrom = needsImage ? "none" : "hdf";
209     String filedata = hdf.getValue("commentText", "");
210     if (Doclava.META_DBG) System.out.println("----- " + docfile + "\n");
211 
212     try {
213       XPathFactory xpathFac = XPathFactory.newInstance();
214       XPath xpath = xpathFac.newXPath();
215       InputStream inputStream = new ByteArrayInputStream(filedata.getBytes());
216       XMLReader reader = new Parser();
217       reader.setFeature(Parser.namespacesFeature, false);
218       reader.setFeature(Parser.namespacePrefixesFeature, false);
219       reader.setFeature(Parser.ignoreBogonsFeature, true);
220 
221       Transformer transformer = TransformerFactory.newInstance().newTransformer();
222       DOMResult result = new DOMResult();
223       transformer.transform(new SAXSource(reader, new InputSource(inputStream)), result);
224       org.w3c.dom.Node htmlNode = result.getNode();
225 
226       if (needsSummary) {
227         StringBuilder sumStrings = new StringBuilder();
228         XPathExpression ItempropDescExpr = xpath.compile("/descendant-or-self::*"
229             + "[@itemprop='description'][1]//text()[string(.)]");
230         org.w3c.dom.NodeList nodes = (org.w3c.dom.NodeList) ItempropDescExpr.evaluate(htmlNode,
231             XPathConstants.NODESET);
232         if (nodes.getLength() > 0) {
233           for (int i = 0; i < nodes.getLength(); i++) {
234             String tx = nodes.item(i).getNodeValue();
235             sumStrings.append(tx);
236             sumFrom = "itemprop";
237           }
238         } else {
239           XPathExpression FirstParaExpr = xpath.compile("//p[not(../../../"
240               + "@class='notice-developers') and not(../@class='sidebox')"
241               + "and not(@class)]//text()");
242           nodes = (org.w3c.dom.NodeList) FirstParaExpr.evaluate(htmlNode, XPathConstants.NODESET);
243           if (nodes.getLength() > 0) {
244             for (int i = 0; i < nodes.getLength(); i++) {
245               String tx = nodes.item(i).getNodeValue();
246               sumStrings.append(tx + " ");
247               sumFrom = "markup";
248             }
249           }
250         }
251         //found a summary string, now normalize it
252         sum = sumStrings.toString().trim();
253         if ((sum != null) && (!"".equals(sum))) {
254           sum = getSummaryNormalized(sum);
255         }
256         //normalized summary ended up being too short to be meaningful
257         if ("".equals(sum)) {
258            if (Doclava.META_DBG) System.out.println("Warning: description too short! ("
259             + sum.length() + "chars) ...\n\n");
260         }
261         //summary looks good, store it to the file hdf data
262         hdf.setValue("page.metaDescription", sum);
263       }
264       if (needsImage) {
265         XPathExpression ItempropImageExpr = xpath.compile("//*[@itemprop='image']/@src");
266         org.w3c.dom.NodeList imgNodes = (org.w3c.dom.NodeList) ItempropImageExpr.evaluate(htmlNode,
267             XPathConstants.NODESET);
268         if (imgNodes.getLength() > 0) {
269           imageUrl = imgNodes.item(0).getNodeValue();
270           imgFrom = "itemprop";
271         } else {
272           XPathExpression FirstImgExpr = xpath.compile("//img/@src");
273           imgNodes = (org.w3c.dom.NodeList) FirstImgExpr.evaluate(htmlNode, XPathConstants.NODESET);
274           if (imgNodes.getLength() > 0) {
275             //iterate nodes looking for valid image url and normalize.
276             for (int i = 0; i < imgNodes.getLength(); i++) {
277               String tx = imgNodes.item(i).getNodeValue();
278               //qualify and normalize the image
279               imageUrl = getImageUrlNormalized(tx);
280               //this img src did not qualify, keep looking...
281               if ("".equals(imageUrl)) {
282                 if (Doclava.META_DBG) System.out.println("    >>>>> Discarded image: " + tx);
283                 continue;
284               } else {
285                 imgFrom = "markup";
286                 break;
287               }
288             }
289           }
290         }
291         //img src url looks good, store it to the file hdf data
292         hdf.setValue("page.image", imageUrl);
293       }
294       if (Doclava.META_DBG) System.out.println("Image (" + imgFrom + "): " + imageUrl);
295       if (Doclava.META_DBG) System.out.println("Summary (" + sumFrom + "): " + sum.length()
296           + " chars\n\n" + sum + "\n");
297       return;
298 
299     } catch (Exception e) {
300       if (Doclava.META_DBG) System.out.println("    >>>>> Exception: " + e + "\n");
301     }
302   }
303 
304   /**
305   * Normalize a comma-delimited, multi-string value. Split on commas, remove
306   * quotes, trim whitespace, optionally make keywords/tags lowercase for
307   * easier matching.
308   *
309   * @param hdf Data object in which the metadata values are stored.
310   * @param tag The hdf var from which the metadata was extracted.
311   * @return A normalized string value for the specified tag.
312   */
getPageTagsNormalized(Data hdf, String tag)313   public static String getPageTagsNormalized(Data hdf, String tag) {
314 
315     String normTags = "";
316     StringBuilder tags = new StringBuilder();
317     String tagList = hdf.getValue(tag, "");
318     if (tag.equals("meta.tags") && (tagList.equals(""))) {
319       //use keywords as tags if no meta tags are available
320       tagList = hdf.getValue("page.tags", "");
321     }
322     if (!tagList.equals("")) {
323       tagList = tagList.replaceAll("\"", "");
324 
325       String[] tagParts = tagList.split("[,\u3001]");
326       for (int iter = 0; iter < tagParts.length; iter++) {
327         tags.append("\"");
328         if (tag.equals("meta.tags") && sLowercaseTags) {
329           tagParts[iter] = tagParts[iter].toLowerCase();
330         } else if (tag.equals("page.tags") && sLowercaseKeywords) {
331           tagParts[iter] = tagParts[iter].toLowerCase();
332         }
333         if (tag.equals("meta.tags")) {
334           //tags.append("#"); //to match hashtag format used with yt/blogger resources
335           tagParts[iter] = tagParts[iter].replaceAll(" ","");
336         }
337         tags.append(tagParts[iter].trim());
338         tags.append("\"");
339         if (iter < tagParts.length - 1) {
340           tags.append(",");
341         }
342       }
343     }
344     //write this back to hdf to expose through js
345     if (tag.equals("meta.tags")) {
346       hdf.setValue(tag, tags.toString());
347     }
348     return tags.toString();
349   }
350 
351   /**
352   * Normalize a string for which only a single value is supported.
353   * Extract the string up to the first comma, remove quotes, remove
354   * any forward-slash prefix, trim any whitespace, optionally make
355   * lowercase for easier matching.
356   *
357   * @param hdf Data object in which the metadata values are stored.
358   * @param tag The hdf var from which the metadata should be extracted.
359   * @return A normalized string value for the specified tag.
360   */
getStringValueNormalized(Data hdf, String tag)361   public static String getStringValueNormalized(Data hdf, String tag) {
362     StringBuilder outString =  new StringBuilder();
363     String tagList = hdf.getValue(tag, "");
364     tagList.replaceAll("\"", "");
365     if ("".equals(tagList)) {
366       return tagList;
367     } else {
368       int end = tagList.indexOf(",");
369       if (end != -1) {
370         tagList = tagList.substring(0,end);
371       }
372       tagList = tagList.startsWith("/") ? tagList.substring(1) : tagList;
373       if ("sample.group".equals(tag) && sLowercaseTags) {
374         tagList = tagList.toLowerCase();
375       }
376       outString.append(tagList.trim());
377       return outString.toString();
378     }
379   }
380 
381   /**
382   * Normalize a page title. Extract the string, remove quotes, remove
383   * markup, and trim any whitespace.
384   *
385   * @param hdf Data object in which the metadata values are stored.
386   * @param tag The hdf var from which the metadata should be extracted.
387   * @return A normalized string value for the specified tag.
388   */
getTitleNormalized(Data hdf, String tag)389   public static String getTitleNormalized(Data hdf, String tag) {
390     StringBuilder outTitle =  new StringBuilder();
391     String title = hdf.getValue(tag, "");
392     if (!title.isEmpty()) {
393       title = escapeString(title);
394       if (title.indexOf("<span") != -1) {
395         String[] splitTitle = title.split("<span(.*?)</span>");
396         title = splitTitle[0];
397         for (int j = 1; j < splitTitle.length; j++) {
398           title.concat(splitTitle[j]);
399         }
400       }
401       outTitle.append(title.trim());
402     }
403     return outTitle.toString();
404   }
405 
406   /**
407   * Extract and normalize a page's language string based on the
408   * lowercased dir path. Non-supported langs are ignored and assigned
409   * the default lang string of "en".
410   *
411   * @param filename A path string to the file relative to root.
412   * @return A normalized lang value.
413   */
getLangStringNormalized(Data data, String filename)414   public static String getLangStringNormalized(Data data, String filename) {
415     String[] stripStr = filename.toLowerCase().split("\\/", 3);
416     String outFrag = "en";
417     String pathCanonical = filename;
418     if (stripStr.length > 0) {
419       for (String t : DocFile.DEVSITE_VALID_LANGS) {
420         if ("intl".equals(stripStr[0])) {
421           if (t.equals(stripStr[1])) {
422             outFrag = stripStr[1];
423             //extract the root url (exclusive of intl/nn)
424             pathCanonical = stripStr[2];
425             break;
426           }
427         }
428       }
429     }
430     //extract the root url (exclusive of intl/nn)
431     data.setValue("path.canonical", pathCanonical);
432     return outFrag;
433   }
434 
435   /**
436   * Normalize a page summary string and truncate as needed. Strings
437   * exceeding max_chars are truncated at the first word boundary
438   * following the max_size marker. Strings smaller than min_chars
439   * are discarded (as they are assumed to be too little context).
440   *
441   * @param s String extracted from the page as it's summary.
442   * @return A normalized string value.
443   */
getSummaryNormalized(String s)444   public static String getSummaryNormalized(String s) {
445     String str = "";
446     int max_chars = 250;
447     int min_chars = 50;
448     int marker = 0;
449     if (s.length() < min_chars) {
450       return str;
451     } else {
452       str = s.replaceAll("^\"|\"$", "");
453       str = str.replaceAll("\\s+", " ");
454       str = JD_TAG_PATTERN.matcher(str).replaceAll("$1");
455       str = escapeString(str);
456       BreakIterator bi = BreakIterator.getWordInstance();
457       bi.setText(str);
458       if (str.length() > max_chars) {
459         marker = bi.following(max_chars);
460       } else {
461         marker = bi.last();
462       }
463       str = str.substring(0, marker);
464       str = str.concat("\u2026" );
465     }
466     return str;
467   }
468 
escapeString(String s)469   public static String escapeString(String s) {
470     s = s.replaceAll("\"", "&quot;");
471     s = s.replaceAll("\'", "&#39;");
472     s = s.replaceAll("<", "&lt;");
473     s = s.replaceAll(">", "&gt;");
474     s = s.replaceAll("/", "&#47;");
475     return s;
476   }
477 
478   //Disqualify img src urls that include these substrings
479   public static String[] IMAGE_EXCLUDE = {"/triangle-", "favicon","android-logo",
480       "icon_play.png", "robot-tiny"};
481 
inList(String s, String[] list)482   public static boolean inList(String s, String[] list) {
483     for (String t : list) {
484       if (s.contains(t)) {
485         return true;
486       }
487     }
488     return false;
489   }
490 
491   /**
492   * Normalize an img src url by removing docRoot and leading
493   * slash for local image references. These are added later
494   * in js to support offline mode and keep path reference
495   * format consistent with hrefs.
496   *
497   * @param url Abs or rel url sourced from img src.
498   * @return Normalized url if qualified, else empty
499   */
getImageUrlNormalized(String url)500   public static String getImageUrlNormalized(String url) {
501     String absUrl = "";
502     // validate to avoid choosing using specific images
503     if ((url != null) && (!url.equals("")) && (!inList(url, IMAGE_EXCLUDE))) {
504       absUrl = url.replace("{@docRoot}", "");
505       absUrl = absUrl.replaceFirst("^/(?!/)", "");
506     }
507     return absUrl;
508   }
509 
510   /**
511   * Normalize an href url by removing docRoot and leading
512   * slash for local image references. These are added later
513   * in js to support offline mode and keep path reference
514   * format consistent with hrefs.
515   *
516   * @param url Abs or rel page url sourced from href
517   * @return Normalized url, either abs or rel to root
518   */
getPageUrlNormalized(String url)519   public static String getPageUrlNormalized(String url) {
520     String absUrl = "";
521 
522     if ((url !=null) && (!url.equals(""))) {
523       absUrl = url.replace("{@docRoot}", "");
524       if (Doclava.USE_DEVSITE_LOCALE_OUTPUT_PATHS) {
525         absUrl = absUrl.replaceFirst("^en/", "");
526       }
527       absUrl = absUrl.replaceFirst("^/(?!/)", "");
528     }
529     return absUrl;
530   }
531 
532   /**
533   * Given a metadata node, add it as a child of a root node based on its
534   * type. If there is no root node that matches the node's type, create one
535   * and add the metadata node as a child node.
536   *
537   * @param gNode The node to attach to a root node or add as a new root node.
538   * @param rootList The current list of root nodes.
539   * @return The updated list of root nodes.
540   */
appendMetaNodeByLang(Node gNode, List<Node> rootList)541   public static List<Node> appendMetaNodeByLang(Node gNode, List<Node> rootList) {
542 
543     String nodeLang = gNode.getLang();
544     boolean matched = false;
545     for (Node n : rootList) {
546       if (n.getLang().equals(nodeLang)) {  //find any matching lang node
547         appendMetaNodeByType(gNode,n.getChildren());
548         //n.getChildren().add(gNode);
549         matched = true;
550         break; // add to the first root node only
551       } // tag did not match
552     } // end rootnodes matching iterator
553     if (!matched) {
554       List<Node> mlangList = new ArrayList<Node>(); // list of file objects that have a given lang
555       //mlangList.add(gNode);
556       Node tnode = new Node.Builder().setChildren(mlangList).setLang(nodeLang).build();
557       rootList.add(tnode);
558       appendMetaNodeByType(gNode, mlangList);
559     }
560     return rootList;
561   }
562 
563   /**
564   * Given a metadata node, add it as a child of a root node based on its
565   * type. If there is no root node that matches the node's type, create one
566   * and add the metadata node as a child node.
567   *
568   * @param gNode The node to attach to a root node or add as a new root node.
569   * @param rootList The current list of root nodes.
570   * @return The updated list of root nodes.
571   */
appendMetaNodeByType(Node gNode, List<Node> rootList)572   public static List<Node> appendMetaNodeByType(Node gNode, List<Node> rootList) {
573 
574     String nodeTags = gNode.getType();
575     boolean matched = false;
576     for (Node n : rootList) {
577       if (n.getType().equals(nodeTags)) {  //find any matching type node
578         n.getChildren().add(gNode);
579         matched = true;
580         break; // add to the first root node only
581       } // tag did not match
582     } // end rootnodes matching iterator
583     if (!matched) {
584       List<Node> mtaglist = new ArrayList<Node>(); // list of file objects that have a given type
585       mtaglist.add(gNode);
586       Node tnode = new Node.Builder().setChildren(mtaglist).setType(nodeTags).build();
587       rootList.add(tnode);
588     }
589     return rootList;
590   }
591 
592   /**
593   * Given a metadata node, add it as a child of a root node based on its
594   * tag. If there is no root node matching the tag, create one for it
595   * and add the metadata node as a child node.
596   *
597   * @param gNode The node to attach to a root node or add as a new root node.
598   * @param rootTagNodesList The current list of root nodes.
599   * @return The updated list of root nodes.
600   */
appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList)601   public static List<Node> appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList) {
602 
603     for (int iter = 0; iter < gNode.getChildren().size(); iter++) {
604       if (gNode.getChildren().get(iter).getTags() != null) {
605         List<String> nodeTags = gNode.getChildren().get(iter).getTags();
606         boolean matched = false;
607         for (String t : nodeTags) { //process each of the meta.tags
608           for (Node n : rootTagNodesList) {
609             if (n.getLabel().equals(t.toString())) {
610               n.getTags().add(String.valueOf(iter));
611               matched = true;
612               break; // add to the first root node only
613             } // tag did not match
614           } // end rootnodes matching iterator
615           if (!matched) {
616             List<String> mtaglist = new ArrayList<String>(); // list of objects with a given tag
617             mtaglist.add(String.valueOf(iter));
618             Node tnode = new Node.Builder().setLabel(t.toString()).setTags(mtaglist).build();
619             rootTagNodesList.add(tnode);
620           }
621         }
622       }
623     }
624     return rootTagNodesList;
625   }
626 
627   /**
628   * Append the contents of jd_extras to jd_lists_unified for each language.
629   *
630   * @param extrasFilename The lang-specific extras file to append.
631   * @param unifiedFilename The lang-specific unified metadata file.
632   */
appendExtrasMetadata(String extrasFilename, String unifiedFilename)633   public static void appendExtrasMetadata (String extrasFilename, String unifiedFilename) {
634 
635     File f = new File(ClearPage.outputDir + "/" + extrasFilename);
636     if (f.exists() && !f.isDirectory()) {
637       ClearPage.copyFile(true, f, unifiedFilename, true);
638     }
639   }
640 
641   public static final Comparator<Node> BY_TAG_NAME = new Comparator<Node>() {
642     public int compare (Node one, Node other) {
643       return one.getLabel().compareTo(other.getLabel());
644     }
645   };
646 
647   public static final Comparator<Node> BY_TYPE_NAME = new Comparator<Node>() {
648     public int compare (Node one, Node other) {
649       return one.getType().compareTo(other.getType());
650     }
651   };
652 
653     public static final Comparator<Node> BY_LANG_NAME = new Comparator<Node>() {
654     public int compare (Node one, Node other) {
655       return one.getLang().compareTo(other.getLang());
656     }
657   };
658 
659   /**
660   * A node for storing page metadata. Use Builder.build() to instantiate.
661   */
662   public static class Node {
663 
664     private String mLabel; // holds page.title or similar identifier
665     private String mCategory; // subtabs, example 'training' 'guides'
666     private String mSummary; // Summary for card or similar use
667     private String mLink; //link href for item click
668     private String mGroup; // from sample.group in _index.jd
669     private List<String> mKeywords; // from page.tags
670     private List<String> mTags; // from meta.tags
671     private String mImage; // holds an href, fully qualified or relative to root
672     private List<Node> mChildren;
673     private String mLang;
674     private String mType; // design, develop, distribute, youtube, blog, etc
675     private String mTimestamp; // optional timestamp eg 1447452827
676 
Node(Builder builder)677     private Node(Builder builder) {
678       mLabel = builder.mLabel;
679       mCategory = builder.mCategory;
680       mSummary = builder.mSummary;
681       mLink = builder.mLink;
682       mGroup = builder.mGroup;
683       mKeywords = builder.mKeywords;
684       mTags = builder.mTags;
685       mImage = builder.mImage;
686       mChildren = builder.mChildren;
687       mLang = builder.mLang;
688       mType = builder.mType;
689       mTimestamp = builder.mTimestamp;
690     }
691 
692     private static class Builder {
693       private String mLabel, mCategory, mSummary, mLink, mGroup, mImage, mLang, mType, mTimestamp;
694       private List<String> mKeywords = null;
695       private List<String> mTags = null;
696       private List<Node> mChildren = null;
setLabel(String mLabel)697       public Builder setLabel(String mLabel) { this.mLabel = mLabel; return this;}
setCategory(String mCategory)698       public Builder setCategory(String mCategory) {
699         this.mCategory = mCategory; return this;
700       }
setSummary(String mSummary)701       public Builder setSummary(String mSummary) {this.mSummary = mSummary; return this;}
setLink(String mLink)702       public Builder setLink(String mLink) {this.mLink = mLink; return this;}
setGroup(String mGroup)703       public Builder setGroup(String mGroup) {this.mGroup = mGroup; return this;}
setKeywords(List<String> mKeywords)704       public Builder setKeywords(List<String> mKeywords) {
705         this.mKeywords = mKeywords; return this;
706       }
setTags(List<String> mTags)707       public Builder setTags(List<String> mTags) {this.mTags = mTags; return this;}
setImage(String mImage)708       public Builder setImage(String mImage) {this.mImage = mImage; return this;}
setChildren(List<Node> mChildren)709       public Builder setChildren(List<Node> mChildren) {this.mChildren = mChildren; return this;}
setLang(String mLang)710       public Builder setLang(String mLang) {this.mLang = mLang; return this;}
setType(String mType)711       public Builder setType(String mType) {this.mType = mType; return this;}
setTimestamp(String mTimestamp)712       public Builder setTimestamp(String mTimestamp) {this.mTimestamp = mTimestamp; return this;}
build()713       public Node build() {return new Node(this);}
714     }
715 
716     /**
717     * Render a tree of metadata nodes organized by type.
718     * @param buf Output buffer to render to.
719     */
renderTypeResources(StringBuilder buf)720     void renderTypeResources(StringBuilder buf) {
721       List<Node> list = mChildren; //list of type rootnodes
722       if (list == null || list.size() == 0) {
723         buf.append("null");
724       } else {
725         final int n = list.size();
726         for (int i = 0; i < n; i++) {
727           buf.append("var " + list.get(i).mType.toUpperCase() + "_RESOURCES = [");
728           list.get(i).renderTypes(buf); //render this type's children
729           buf.append("\n];\n\n");
730         }
731       }
732     }
733 
734     /**
735     * Render a tree of metadata nodes organized by lang.
736     * @param buf Output buffer to render to.
737     */
renderLangResources(StringBuilder buf, String langname)738     void renderLangResources(StringBuilder buf, String langname) {
739       List<Node> list = mChildren; //list of type rootnodes
740       if (list == null || list.size() == 0) {
741         buf.append("null");
742       } else {
743         final int n = list.size();
744         for (int i = 0; i < n; i++) {
745           buf.append("METADATA['" + langname + "']." + list.get(i).mType + " = [");
746           list.get(i).renderTypes(buf); //render this lang's children
747           buf.append("\n];\n\n");
748         }
749       }
750     }
751     /**
752     * Render all metadata nodes for a specific type.
753     * @param buf Output buffer to render to.
754     */
renderTypes(StringBuilder buf)755     void renderTypes(StringBuilder buf) {
756       List<Node> list = mChildren;
757       if (list == null || list.size() == 0) {
758         buf.append("nulltype");
759       } else {
760         final int n = list.size();
761         for (int i = 0; i < n; i++) {
762           buf.append("\n      {\n");
763           buf.append("        \"title\":\"");
764           renderStrWithUcs(buf, list.get(i).mLabel);
765           buf.append("\",\n" );
766           buf.append("        \"summary\":\"");
767           renderStrWithUcs(buf, list.get(i).mSummary);
768           buf.append("\",\n" );
769           buf.append("        \"url\":\"" + list.get(i).mLink + "\",\n" );
770           if (!"".equals(list.get(i).mImage)) {
771             buf.append("        \"image\":\"" + list.get(i).mImage + "\",\n" );
772           }
773           if (!"".equals(list.get(i).mGroup)) {
774             buf.append("        \"group\":\"");
775             renderStrWithUcs(buf, list.get(i).mGroup);
776             buf.append("\",\n" );
777           }
778           if (!"".equals(list.get(i).mCategory)) {
779             buf.append("        \"category\":\"" + list.get(i).mCategory + "\",\n" );
780           }
781           if ((list.get(i).mType != null) && (list.get(i).mType != "")) {
782             buf.append("        \"type\":\"" + list.get(i).mType + "\",\n");
783           }
784           list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
785           list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
786           if (!"".equals(list.get(i).mTimestamp)) {
787             buf.append("        \"timestamp\":\"" + list.get(i).mTimestamp + "\",\n");
788           }
789           buf.append("        \"lang\":\"" + list.get(i).mLang + "\"" );
790           buf.append("\n      }");
791           if (i != n - 1) {
792             buf.append(", ");
793           }
794         }
795       }
796     }
797 
798     /**
799     * Build and render a list of tags associated with each type.
800     * @param buf Output buffer to render to.
801     */
renderTypesByTag(StringBuilder buf)802     void renderTypesByTag(StringBuilder buf) {
803       List<Node> list = mChildren; //list of rootnodes
804       if (list == null || list.size() == 0) {
805         buf.append("null");
806       } else {
807         final int n = list.size();
808         for (int i = 0; i < n; i++) {
809         buf.append("var " + list.get(i).mType.toUpperCase() + "_BY_TAG = {");
810         List<Node> mTagList = new ArrayList(); //list of rootnodes
811         mTagList = appendMetaNodeByTagIndex(list.get(i), mTagList);
812         list.get(i).renderTagIndices(buf, mTagList);
813           buf.append("\n};\n\n");
814         }
815       }
816     }
817 
818     /**
819     * Render a list of tags associated with a type, including the
820     * tag's indices in the type array.
821     * @param buf Output buffer to render to.
822     * @param tagList Node tree of types to render.
823     */
renderTagIndices(StringBuilder buf, List<Node> tagList)824     void renderTagIndices(StringBuilder buf, List<Node> tagList) {
825       List<Node> list = tagList;
826       if (list == null || list.size() == 0) {
827         buf.append("");
828       } else {
829         final int n = list.size();
830         for (int i = 0; i < n; i++) {
831           buf.append("\n    " + list.get(i).mLabel + ":[");
832           renderArrayValue(buf, list.get(i).mTags);
833           buf.append("]");
834           if (i != n - 1) {
835             buf.append(", ");
836           }
837         }
838       }
839     }
840 
841     /**
842     * Render key:arrayvalue pair.
843     * @param buf Output buffer to render to.
844     * @param type The list value to render as an arrayvalue.
845     * @param key The key for the pair.
846     */
renderArrayType(StringBuilder buf, List<String> type, String key)847     void renderArrayType(StringBuilder buf, List<String> type, String key) {
848       buf.append("        \"" + key + "\": [");
849       renderArrayValue(buf, type);
850       buf.append("],\n");
851     }
852 
853     /**
854     * Render an array value to buf, with special handling of unicode characters.
855     * @param buf Output buffer to render to.
856     * @param type The list value to render as an arrayvalue.
857     */
renderArrayValue(StringBuilder buf, List<String> type)858     void renderArrayValue(StringBuilder buf, List<String> type) {
859       List<String> list = type;
860       if (list != null) {
861         final int n = list.size();
862         for (int i = 0; i < n; i++) {
863           String tagval = list.get(i).toString();
864           renderStrWithUcs(buf,tagval);
865           if (i != n - 1) {
866             buf.append(",");
867           }
868         }
869       }
870     }
871 
872     /**
873     * Render a string that can include ucs2 encoded characters.
874     * @param buf Output buffer to render to.
875     * @param chars String to append to buf with any necessary encoding
876     */
renderStrWithUcs(StringBuilder buf, String chars)877     void renderStrWithUcs(StringBuilder buf, String chars) {
878       String strval = chars;
879       final int L = strval.length();
880       for (int t = 0; t < L; t++) {
881         char c = strval.charAt(t);
882         if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_HIGH_SURROGATE ) {
883           // we have a UTF-16 multi-byte character
884           int codePoint = strval.codePointAt(t);
885           int charSize = Character.charCount(codePoint);
886           t += charSize - 1;
887           buf.append(String.format("\\u%04x",codePoint));
888         } else if (c >= ' ' && c <= '~' && c != '\\') {
889           buf.append(c);
890         } else {
891           // we are encoding a two byte character
892           buf.append(String.format("\\u%04x", (int) c));
893         }
894       }
895     }
896 
getLabel()897     public String getLabel() {
898       return mLabel;
899     }
900 
setLabel(String label)901     public void setLabel(String label) {
902        mLabel = label;
903     }
904 
getCategory()905     public String getCategory() {
906       return mCategory;
907     }
908 
setCategory(String title)909     public void setCategory(String title) {
910        mCategory = title;
911     }
912 
getSummary()913     public String getSummary() {
914       return mSummary;
915     }
916 
setSummary(String summary)917     public void setSummary(String summary) {
918        mSummary = summary;
919     }
920 
getLink()921     public String getLink() {
922       return mLink;
923     }
924 
setLink(String ref)925     public void setLink(String ref) {
926        mLink = ref;
927     }
928 
getGroup()929     public String getGroup() {
930       return mGroup;
931     }
932 
setGroup(String group)933     public void setGroup(String group) {
934       mGroup = group;
935     }
936 
getTags()937     public List<String> getTags() {
938         return mTags;
939     }
940 
setTags(String tags)941     public void setTags(String tags) {
942       if ("".equals(tags)) {
943         mTags = null;
944       } else {
945         List<String> tagList = new ArrayList();
946         String[] tagParts = tags.split(",");
947 
948         for (String t : tagParts) {
949           tagList.add(t);
950         }
951         mTags = tagList;
952       }
953     }
954 
getKeywords()955     public List<String> getKeywords() {
956         return mKeywords;
957     }
958 
setKeywords(String keywords)959     public void setKeywords(String keywords) {
960       if ("".equals(keywords)) {
961         mKeywords = null;
962       } else {
963         List<String> keywordList = new ArrayList();
964         String[] keywordParts = keywords.split(",");
965 
966         for (String k : keywordParts) {
967           keywordList.add(k);
968         }
969         mKeywords = keywordList;
970       }
971     }
972 
getImage()973     public String getImage() {
974         return mImage;
975     }
976 
setImage(String ref)977     public void setImage(String ref) {
978        mImage = ref;
979     }
980 
getChildren()981     public List<Node> getChildren() {
982         return mChildren;
983     }
984 
setChildren(List<Node> node)985     public void setChildren(List<Node> node) {
986         mChildren = node;
987     }
988 
getLang()989     public String getLang() {
990       return mLang;
991     }
992 
setLang(String lang)993     public void setLang(String lang) {
994       mLang = lang;
995     }
996 
getType()997     public String getType() {
998       return mType;
999     }
1000 
getTimestamp()1001     public String getTimestamp() {
1002       return mTimestamp;
1003     }
1004 
setType(String type)1005     public void setType(String type) {
1006       mType = type;
1007     }
1008 
setTimestamp(String timestamp)1009     public void setTimestamp(String timestamp) {
1010       mTimestamp = timestamp;
1011     }
1012   }
1013 }
1014