• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package markdown
6
7import (
8	"bytes"
9	"strings"
10)
11
12type tableTrimmed string
13
14func isTableSpace(c byte) bool {
15	return c == ' ' || c == '\t' || c == '\v' || c == '\f'
16}
17
18func tableTrimSpace(s string) string {
19	i := 0
20	for i < len(s) && isTableSpace(s[i]) {
21		i++
22	}
23	j := len(s)
24	for j > i && isTableSpace(s[j-1]) {
25		j--
26	}
27	return s[i:j]
28}
29
30func tableTrimOuter(row string) tableTrimmed {
31	row = tableTrimSpace(row)
32	if len(row) > 0 && row[0] == '|' {
33		row = row[1:]
34	}
35	if len(row) > 0 && row[len(row)-1] == '|' {
36		row = row[:len(row)-1]
37	}
38	return tableTrimmed(row)
39}
40
41func isTableStart(hdr1, delim1 string) bool {
42	// Scan potential delimiter string, counting columns.
43	// This happens on every line of text,
44	// so make it relatively quick - nothing expensive.
45	col := 0
46	delim := tableTrimOuter(delim1)
47	i := 0
48	for ; ; col++ {
49		for i < len(delim) && isTableSpace(delim[i]) {
50			i++
51		}
52		if i >= len(delim) {
53			break
54		}
55		if i < len(delim) && delim[i] == ':' {
56			i++
57		}
58		if i >= len(delim) || delim[i] != '-' {
59			return false
60		}
61		i++
62		for i < len(delim) && delim[i] == '-' {
63			i++
64		}
65		if i < len(delim) && delim[i] == ':' {
66			i++
67		}
68		for i < len(delim) && isTableSpace(delim[i]) {
69			i++
70		}
71		if i < len(delim) && delim[i] == '|' {
72			i++
73		}
74	}
75
76	if strings.TrimSpace(hdr1) == "|" {
77		// https://github.com/github/cmark-gfm/pull/127 and
78		// https://github.com/github/cmark-gfm/pull/128
79		// fixed a buffer overread by rejecting | by itself as a table line.
80		// That seems to violate the spec, but we will play along.
81		return false
82	}
83
84	return col == tableCount(tableTrimOuter(hdr1))
85}
86
87func tableCount(row tableTrimmed) int {
88	col := 1
89	prev := byte(0)
90	for i := 0; i < len(row); i++ {
91		c := row[i]
92		if c == '|' && prev != '\\' {
93			col++
94		}
95		prev = c
96	}
97	return col
98}
99
100type tableBuilder struct {
101	hdr   tableTrimmed
102	delim tableTrimmed
103	rows  []tableTrimmed
104}
105
106func (b *tableBuilder) start(hdr, delim string) {
107	b.hdr = tableTrimOuter(hdr)
108	b.delim = tableTrimOuter(delim)
109}
110
111func (b *tableBuilder) addRow(row string) {
112	b.rows = append(b.rows, tableTrimOuter(row))
113}
114
115type Table struct {
116	Position
117	Header []*Text
118	Align  []string // 'l', 'c', 'r' for left, center, right; 0 for unset
119	Rows   [][]*Text
120}
121
122func (t *Table) PrintHTML(buf *bytes.Buffer) {
123	buf.WriteString("<table>\n")
124	buf.WriteString("<thead>\n")
125	buf.WriteString("<tr>\n")
126	for i, hdr := range t.Header {
127		buf.WriteString("<th")
128		if t.Align[i] != "" {
129			buf.WriteString(" align=\"")
130			buf.WriteString(t.Align[i])
131			buf.WriteString("\"")
132		}
133		buf.WriteString(">")
134		hdr.PrintHTML(buf)
135		buf.WriteString("</th>\n")
136	}
137	buf.WriteString("</tr>\n")
138	buf.WriteString("</thead>\n")
139	if len(t.Rows) > 0 {
140		buf.WriteString("<tbody>\n")
141		for _, row := range t.Rows {
142			buf.WriteString("<tr>\n")
143			for i, cell := range row {
144				buf.WriteString("<td")
145				if i < len(t.Align) && t.Align[i] != "" {
146					buf.WriteString(" align=\"")
147					buf.WriteString(t.Align[i])
148					buf.WriteString("\"")
149				}
150				buf.WriteString(">")
151				cell.PrintHTML(buf)
152				buf.WriteString("</td>\n")
153			}
154			buf.WriteString("</tr>\n")
155		}
156		buf.WriteString("</tbody>\n")
157	}
158	buf.WriteString("</table>\n")
159}
160
161func (t *Table) printMarkdown(buf *bytes.Buffer, s mdState) {
162}
163
164func (b *tableBuilder) build(p buildState) Block {
165	pos := p.pos()
166	pos.StartLine-- // builder does not count header
167	pos.EndLine = pos.StartLine + 1 + len(b.rows)
168	t := &Table{
169		Position: pos,
170	}
171	width := tableCount(b.hdr)
172	t.Header = b.parseRow(p, b.hdr, pos.StartLine, width)
173	t.Align = b.parseAlign(b.delim, width)
174	t.Rows = make([][]*Text, len(b.rows))
175	for i, row := range b.rows {
176		t.Rows[i] = b.parseRow(p, row, pos.StartLine+2+i, width)
177	}
178	return t
179}
180
181func (b *tableBuilder) parseRow(p buildState, row tableTrimmed, line int, width int) []*Text {
182	out := make([]*Text, 0, width)
183	pos := Position{StartLine: line, EndLine: line}
184	start := 0
185	unesc := nop
186	for i := 0; i < len(row); i++ {
187		c := row[i]
188		if c == '\\' && i+1 < len(row) && row[i+1] == '|' {
189			unesc = tableUnescape
190			i++
191			continue
192		}
193		if c == '|' {
194			out = append(out, p.newText(pos, unesc(strings.Trim(string(row[start:i]), " \t\v\f"))))
195			if len(out) == width {
196				// Extra cells are discarded!
197				return out
198			}
199			start = i + 1
200			unesc = nop
201		}
202	}
203	out = append(out, p.newText(pos, unesc(strings.Trim(string(row[start:]), " \t\v\f"))))
204	for len(out) < width {
205		// Missing cells are considered empty.
206		out = append(out, p.newText(pos, ""))
207	}
208	return out
209}
210
211func nop(text string) string {
212	return text
213}
214
215func tableUnescape(text string) string {
216	out := make([]byte, 0, len(text))
217	for i := 0; i < len(text); i++ {
218		c := text[i]
219		if c == '\\' && i+1 < len(text) && text[i+1] == '|' {
220			i++
221			c = '|'
222		}
223		out = append(out, c)
224	}
225	return string(out)
226}
227
228func (b *tableBuilder) parseAlign(delim tableTrimmed, n int) []string {
229	align := make([]string, 0, tableCount(delim))
230	start := 0
231	for i := 0; i < len(delim); i++ {
232		if delim[i] == '|' {
233			align = append(align, tableAlign(string(delim[start:i])))
234			start = i + 1
235		}
236	}
237	align = append(align, tableAlign(string(delim[start:])))
238	return align
239}
240
241func tableAlign(cell string) string {
242	cell = tableTrimSpace(cell)
243	l := cell[0] == ':'
244	r := cell[len(cell)-1] == ':'
245	switch {
246	case l && r:
247		return "center"
248	case l:
249		return "left"
250	case r:
251		return "right"
252	}
253	return ""
254}
255