1const fs = require("fs").promises; 2const { marked } = require("marked"); 3const jsdom = require("jsdom"); 4const { JSDOM } = jsdom; 5const path = require("path"); 6const markedAlert = require('marked-alert'); 7 8// Setup some options for our markdown renderer 9marked.setOptions({ 10 renderer: new marked.Renderer(), 11 12 // Add a code highlighter 13 highlight: function (code, forlanguage) { 14 const hljs = require("highlight.js"); 15 language = hljs.getLanguage(forlanguage) ? forlanguage : "plaintext"; 16 return hljs.highlight(code, { language }).value; 17 }, 18 pedantic: false, 19 gfm: true, 20 breaks: false, 21 sanitize: false, 22 smartLists: true, 23 smartypants: false, 24 xhtml: false, 25}); 26 27marked.use(markedAlert()); 28 29/** 30 * Read the input .md file, and write to a corresponding .html file 31 * @param {string} infile path to input file 32 * @returns {Promise<string>} name of output file (for status update) 33 */ 34async function renderit(infile) { 35 const gtag = (await fs.readFile('gtag.html', 'utf-8')).trim(); 36 console.log(`Reading ${infile}`); 37 basename = path.basename(infile, ".md"); 38 const outfile = path.join(path.dirname(infile), `${basename}.html`); 39 let f1 = await fs.readFile(infile, "utf-8"); 40 41 // oh the irony of removing a BOM before posting to unicode.org 42 if (f1.charCodeAt(0) == 0xfeff) { 43 f1 = f1.substring(3); 44 } 45 46 // render to HTML 47 const rawHtml = marked(f1); 48 49 // now fix. Spin up a JSDOM so we can manipulate 50 const dom = new JSDOM(rawHtml); 51 const document = dom.window.document; 52 53 // First the HEAD 54 const head = dom.window.document.getElementsByTagName("head")[0]; 55 56 // add CSS to HEAD 57 head.innerHTML = 58 head.innerHTML + 59 gtag + '\n' + 60 `<meta charset="utf-8">\n` + 61 `<link rel='stylesheet' type='text/css' media='screen' href='../reports-v2.css'>\n` + 62 `<link rel='stylesheet' type='text/css' media='screen' href='tr35.css'>\n`; 63 64 // Assume there's not already a title and that we need to add one. 65 if (dom.window.document.getElementsByTagName("title").length >= 1) { 66 console.log("Already had a <title>… not changing."); 67 } else { 68 const title = document.createElement("title"); 69 const first_h1_text = document.getElementsByTagName("h1")[0].textContent.replace(')Part', ') Part'); 70 title.appendChild(document.createTextNode(first_h1_text)) 71 head.appendChild(title); 72 } 73 74 // calculate the header object 75 const header = dom.window.document.createElement("div"); 76 header.setAttribute("class", "header"); 77 78 // taken from prior TRs, read from the header in 'header.html' 79 header.innerHTML = (await fs.readFile('header.html', 'utf-8')).trim(); 80 81 // Move all elements out of the top level body and into a subelement 82 // The subelement is <div class="body"/> 83 const body = dom.window.document.getElementsByTagName("body")[0]; 84 const bp = body.parentNode; 85 div = dom.window.document.createElement("div"); 86 div.setAttribute("class", "body"); 87 let sawFirstTable = false; 88 for (const e of body.childNodes) { 89 body.removeChild(e); 90 if (div.childNodes.length === 0 && e.tagName === 'P') { 91 // update title element to <h2 class="uaxtitle"/> 92 const newTitle = document.createElement('h2'); 93 newTitle.setAttribute("class", "uaxtitle"); 94 newTitle.appendChild(document.createTextNode(e.textContent)); 95 div.appendChild(newTitle); 96 } else { 97 if (!sawFirstTable && e.tagName === 'TABLE') { 98 // Update first table to simple width=90% 99 // The first table is the document header (Author, etc.) 100 e.setAttribute("class", "simple"); 101 e.setAttribute("width", "90%"); 102 sawFirstTable = true; 103 } 104 div.appendChild(e); 105 } 106 } 107 108 /** 109 * create a <SCRIPT/> object. 110 * Choose ONE of src or code. 111 * @param {Object} obj 112 * @param {string} obj.src source of script as url 113 * @param {string} obj.code code for script as text 114 * @returns 115 */ 116 function getScript({src, code}) { 117 const script = dom.window.document.createElement("script"); 118 if (src) { 119 script.setAttribute("src", src); 120 } 121 if (code) { 122 script.appendChild(dom.window.document.createTextNode(code)); 123 } 124 return script; 125 } 126 127 // body already has no content to it at this point. 128 // Add all the pieces back. 129 body.appendChild(getScript({ src: './js/anchor.min.js' })); 130 body.appendChild(header); 131 body.appendChild(div); 132 133 // now, fix all links from ….md#… to ….html#… 134 for (const e of dom.window.document.getElementsByTagName("a")) { 135 const href = e.getAttribute("href"); 136 let m; 137 if ((m = /^(.*)\.md#(.*)$/.exec(href))) { 138 e.setAttribute("href", `${m[1]}.html#${m[2]}`); 139 } else if ((m = /^(.*)\.md$/.exec(href))) { 140 e.setAttribute("href", `${m[1]}.html`); 141 } 142 } 143 144 // put this last 145 body.appendChild(getScript({ 146 // This invokes anchor.js 147 code: `anchors.add('h1, h2, h3, h4, h5, h6, caption, dfn');` 148 })); 149 150 // Now, fixup captions 151 // Look for: <h6>Table: …</h6> followed by <table>…</table> 152 // Move the h6 inside the table, but as <caption/> 153 const h6es = dom.window.document.getElementsByTagName("h6"); 154 const toRemove = []; 155 for (const h6 of h6es) { 156 if (!h6.innerHTML.startsWith("Table: ")) { 157 console.error('Does not start with Table: ' + h6.innerHTML); 158 continue; // no 'Table:' marker. 159 } 160 const next = h6.nextElementSibling; 161 if (next.tagName !== 'TABLE') { 162 console.error('Not a following table for ' + h6.innerHTML); 163 continue; // Next item is not a table. Maybe a PRE or something. 164 } 165 const caption = dom.window.document.createElement("caption"); 166 for (const e of h6.childNodes) { 167 // h6.removeChild(e); 168 caption.appendChild(e.cloneNode(true)); 169 } 170 for (const p of h6.attributes) { 171 caption.setAttribute(p.name, p.value); 172 h6.removeAttribute(p.name); // so that it does not have a conflicting id 173 } 174 next.prepend(caption); 175 toRemove.push(h6); 176 } 177 for (const h6 of toRemove) { 178 h6.remove(); 179 } 180 181 // Drop generated anchors where there is an explicit anchor 182 const anchors = dom.window.document.getElementsByTagName("a"); 183 for (const a of anchors) { 184 // a needs to have a name 185 const aname = a.getAttribute('name'); 186 if (!aname) continue; 187 // parent needs to have a single child node and its own 'id'. 188 const parent = a.parentElement; 189 if (parent.childElementCount !== 1) continue; 190 const parid = parent.getAttribute('id'); 191 if(!parid) continue; 192 // Criteria met. swap the name and id 193 parent.setAttribute('id', aname); 194 a.setAttribute('name', parid); 195 } 196 197 // OK, done munging the DOM, write it out. 198 console.log(`Writing ${outfile}`); 199 200 // TODO: we assume that DOCTYPE is not written. 201 await fs.writeFile(outfile, `<!DOCTYPE html>\n` 202 + dom.serialize()); 203 return outfile; 204} 205 206/** 207 * Convert all files 208 * @returns Promise<String[]> list of output files 209 */ 210async function fixall() { 211 outbox = "./dist"; 212 213 // TODO: move source file copy into JavaScript? 214 // srcbox = '../../../docs/ldml'; 215 216 const fileList = (await fs.readdir(outbox)) 217 .filter((f) => /\.md$/.test(f)) 218 .map((f) => path.join(outbox, f)); 219 return Promise.all(fileList.map(renderit)); 220} 221 222fixall().then( 223 (x) => console.dir(x), 224 (e) => { 225 console.error(e); 226 process.exitCode = 1; 227 } 228); 229