• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// extract site frontmatter and read from /sitemap.tsv, save to json
2
3import * as fs from "node:fs/promises";
4import * as path from "node:path";
5import { default as process } from "node:process";
6import { default as matter } from "gray-matter";
7import { SitemapStream, streamToPromise } from "sitemap";
8import { Readable } from "node:stream";
9import { Dirent } from "node:fs";
10
11// utilities and constants
12
13// files to skip
14const SKIP_THESE = /(node_modules|\.jekyll-cache|^sitemap.*)/;
15
16// final URL of site
17const SITE = "https://cldr.unicode.org";
18
19// input file
20const SITEMAPFILE = "sitemap.tsv";
21
22// utility collator
23const coll = new Intl.Collator(["und"]);
24
25/**
26 * Directory Crawler: process one directory
27 * @param {string} d directory paren
28 * @param {string} fullPath path to this file
29 * @param {object} out output object
30 */
31async function processFile(d, fullPath, out) {
32  const f = await fs.readFile(fullPath, "utf-8");
33  const m = matter(f);
34  fullPath = fullPath.replace(/\\/g, '/'); // backslash with slash, for win
35  if (m && m.data) {
36    const { data } = m;
37    out.all.push({ ...data, fullPath });
38  } else {
39    out.app.push({ fullPath }); // synthesize data?
40  }
41}
42
43/**
44 * Directory Crawler: process one dirent
45 * @param {string} d directory paren
46 * @param {object} out output object
47 * @param {Dirent} e directory entry
48 * @returns
49 */
50async function processEntry(d, out, e) {
51  const fullpath = path.join(d, e.name);
52  if (SKIP_THESE.test(e.name)) return;
53  if (e.isDirectory()) {
54    return await traverse(fullpath, out);
55  } else if (!e.isFile() || !/\.md$/.test(e.name)) {
56    return;
57  }
58  await processFile(d, fullpath, out);
59}
60
61/**
62 * Directory Crawler: kick off the crawl (or subcrawl) of a directory
63 * @param {string} d path to directory
64 * @param {object} out output struct
65 */
66async function traverse(d, out) {
67  const dirents = await fs.readdir(d, { withFileTypes: true });
68  const promises = dirents.map((e) => processEntry(d, out, e));
69  return Promise.all(promises);
70}
71
72/** replace a/b/c.md with a/b */
73function path2dir(p) {
74  const dir = p.split("/").slice(0, -1).join("/");
75  return dir;
76}
77
78/** replace a/b/c.md with a/b/c.html */
79function md2html(p) {
80  return p.replace(/\.md$/, ".html");
81}
82
83/** replace a/b/c.html with a/b/c.md */
84function html2md(p) {
85  return p.replace(/\.html$/, ".md");
86}
87
88/** replace a/b/c.md with a/b/c */
89function dropmd(p) {
90  return p.replace(/\.md$/, "");
91}
92
93/**
94 *
95 * @param {number} n
96 * @returns string with n tabs
97 */
98function tabs(n) {
99  let s = [];
100  for (let i = 0; i < n; i++) {
101    s.push("\t");
102  }
103  return s.join("");
104}
105
106/** convert a markdown path to a final URL */
107function mkurl(p) {
108  return `${SITE}/${md2html(p)}`;
109}
110
111async function writeXmlSiteMap(out) {
112  // simple list of links
113  const links = await Promise.all(
114    out.all.map(async ({ fullPath, title }) => {
115      const stat = await fs.stat(fullPath);
116      return {
117        url: dropmd(`/${fullPath}`),
118        lastmod: stat.mtime.toISOString(),
119      };
120    })
121  );
122  const stream = new SitemapStream({ hostname: SITE });
123  const data = (
124    await streamToPromise(Readable.from(links).pipe(stream))
125  ).toString();
126  await fs.writeFile("./sitemap.xml", data, "utf-8");
127  console.log(`Wrote sitemap.xml with ${links.length} entries`);
128}
129
130async function readTsvSiteMap(out) {
131  console.log(`Reading ${SITEMAPFILE}`);
132  const lines = (await fs.readFile(SITEMAPFILE, "utf-8")).split("\n"); // don't skip comment lines here so we can get line numbers.
133  const errors = [];
134
135  // user's specified map
136  const usermap = {
137    /*
138    index: {
139      parent: null,
140      title: 'CLDR Site',
141      children: [
142        'cldr-spec',
143        'downloads',
144145      ],
146    },
147    'cldr-spec': {
148      parent: 'index',
149      title: …,
150      children: [
151        'cldr-spec/collation-guidelines',
152153      ],
154    },
155    'cldr-spec/collation-guidelines': {
156      parent: 'cldr-spec',
157      title: …,
158      children: null,
159    },
160  */
161  };
162  // stack of parents, in order
163  let parents = [];
164  let n = 0;
165  for (let line of lines) {
166    n++;
167    const location = `${SITEMAPFILE}:${n}: `; // for errors
168    // skip comment or blank lines
169    if (/^[ \t]*#/.test(line) || !line.trim()) continue;
170
171    // # of leading
172    const tabs = /^[\t]*/.exec(line)[0].length;
173    // rest of line: the actual path
174    const path = line.slice(tabs).trim();
175    if (usermap[path]) {
176      errors.push(`${location} duplicate path: ${path}`);
177      continue;
178    }
179    const foundItem = out.all.find(({ fullPath }) => fullPath === `${path}.md`);
180    if (!foundItem) {
181      errors.push(`${location} could not find file: ${path}.md`);
182      continue;
183    }
184    if (!foundItem.title) {
185      errors.push(`${location} missing title in ${path}.md`);
186      // let this continue on
187    }
188    usermap[path] = {
189      title: foundItem.title ?? path,
190    };
191    const parentCount = parents.length;
192    if (tabs < parentCount) {
193      /**
194       * index [1]
195       *    foo [2]
196       *
197       */
198      // outdent
199      if (tabs == 0) {
200        errors.push(`${location} can't have more than one root page!`);
201        break;
202      }
203      // drop 'n' parents
204      parents = parents.slice(0, tabs);
205    } else if (tabs > parentCount) {
206      // Error - wrong indent
207      errors.push(
208        `${location} indent too deep (expected ${parentCount} tabs at most)`
209      );
210      continue;
211    }
212    const parent = parents.slice(-1)[0] || null; // calculate parent (null for index page)
213    usermap[path].parent = parent;
214    if (parent) {
215      // not for index
216      usermap[parent].children = usermap[parent].children ?? [];
217      usermap[parent].children.push(path);
218    }
219    parents.push(path); // for next time
220  }
221  out.usermap = usermap;
222  out.all.forEach(({ fullPath }) => {
223    if (!usermap[dropmd(fullPath)]) {
224      errors.push(`${SITEMAPFILE}: missing: ${dropmd(fullPath)}`);
225    }
226  });
227  if (errors.length) {
228    errors.forEach((l) => console.error(l));
229    throw Error(`${errors.length} errors reading tsv`);
230  } else {
231    console.log(`${SITEMAPFILE} Valid.`);
232  }
233}
234
235/** top level async */
236async function main() {
237  const out = {
238    all: [],
239  };
240  await fs.mkdir("assets/json/", { recursive: true });
241  await traverse(".", out);
242  await writeXmlSiteMap(out);
243  await readTsvSiteMap(out);
244  // write final json asset
245  delete out.all; //not needed at this phase, so trim out of the deploy
246  await fs.writeFile("assets/json/tree.json", JSON.stringify(out, null, " "));
247  console.log("Wrote assets/json/tree.json");
248}
249
250main().then(
251  () => console.log("Done."),
252  (e) => {
253    console.error(e);
254    process.exitCode = 1;
255  }
256);
257