• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
2
3package reader
4
5import (
6	"bufio"
7	"fmt"
8	"io"
9	"strings"
10	"unicode"
11)
12
13// TagValuePair is a convenience struct for a (tag, value) string pair.
14type TagValuePair struct {
15	Tag   string
16	Value string
17}
18
19// ReadTagValues takes an io.Reader, scans it line by line and returns
20// a slice of {string, string} structs in the form {tag, value}.
21func ReadTagValues(content io.Reader) ([]TagValuePair, error) {
22	r := &tvReader{}
23
24	scanner := bufio.NewScanner(content)
25	for scanner.Scan() {
26		// read each line, one by one
27		err := r.readNextLine(scanner.Text())
28		if err != nil {
29			return nil, err
30		}
31	}
32	if err := scanner.Err(); err != nil {
33		return nil, err
34	}
35
36	// finalize and make sure all is well
37	tvList, err := r.finalize()
38	if err != nil {
39		return nil, err
40	}
41
42	// convert internal format to exported TagValueList
43	var exportedTVList []TagValuePair
44	for _, tv := range tvList {
45		tvPair := TagValuePair{Tag: tv.tag, Value: tv.value}
46		exportedTVList = append(exportedTVList, tvPair)
47	}
48
49	return exportedTVList, nil
50}
51
52type tagvalue struct {
53	tag   string
54	value string
55}
56
57type tvReader struct {
58	midtext      bool
59	tvList       []tagvalue
60	currentLine  int
61	currentTag   string
62	currentValue string
63}
64
65func (reader *tvReader) finalize() ([]tagvalue, error) {
66	if reader.midtext {
67		return nil, fmt.Errorf("finalize called while still midtext parsing a text tag")
68	}
69	return reader.tvList, nil
70}
71
72func (reader *tvReader) readNextLine(line string) error {
73	reader.currentLine++
74
75	if reader.midtext {
76		return reader.readNextLineFromMidtext(line)
77	}
78
79	return reader.readNextLineFromReady(line)
80}
81
82func (reader *tvReader) readNextLineFromReady(line string) error {
83	// strip whitespace from beginning of line
84	line2 := strings.TrimLeftFunc(line, func(r rune) bool {
85		return unicode.IsSpace(r)
86	})
87
88	// ignore empty lines
89	if line2 == "" {
90		return nil
91	}
92
93	// ignore comment lines
94	if strings.HasPrefix(line2, "#") {
95		return nil
96	}
97
98	// split at colon
99	substrings := strings.SplitN(line2, ":", 2)
100	if len(substrings) == 1 {
101		// error if a colon isn't found
102		return fmt.Errorf("no colon found in '%s'", line)
103	}
104
105	// the first substring is the tag
106	reader.currentTag = strings.TrimSpace(substrings[0])
107
108	// determine whether the value contains (or starts) a <text> line
109	substrings = strings.SplitN(substrings[1], "<text>", 2)
110	if len(substrings) == 1 {
111		// no <text> tag found means this is a single-line value
112		// strip whitespace and use as a single line
113		reader.currentValue = strings.TrimSpace(substrings[0])
114	} else {
115		// there was a <text> tag; now decide whether it's multi-line
116		substrings = strings.SplitN(substrings[1], "</text>", 2)
117		if len(substrings) > 1 {
118			// there is also a </text> tag; take the middle part and
119			// set as value
120			reader.currentValue = substrings[0]
121		} else {
122			// there is no </text> tag on this line; switch to midtext
123			reader.currentValue = substrings[0] + "\n"
124			reader.midtext = true
125			return nil
126		}
127	}
128
129	// if we got here, the value was on a single line
130	// so go ahead and add it to the tag-value list
131	tv := tagvalue{reader.currentTag, reader.currentValue}
132	reader.tvList = append(reader.tvList, tv)
133
134	// and reset
135	reader.currentTag = ""
136	reader.currentValue = ""
137
138	return nil
139}
140
141func (reader *tvReader) readNextLineFromMidtext(line string) error {
142	// look for whether the line closes here
143	substrings := strings.SplitN(line, "</text>", 2)
144	if len(substrings) == 1 {
145		// doesn't contain </text>, so keep building the current value
146		reader.currentValue += line + "\n"
147		return nil
148	}
149
150	// contains </text>, so end and record this pair
151	reader.currentValue += substrings[0]
152	tv := tagvalue{reader.currentTag, reader.currentValue}
153	reader.tvList = append(reader.tvList, tv)
154
155	// and reset
156	reader.midtext = false
157	reader.currentTag = ""
158	reader.currentValue = ""
159
160	return nil
161}
162