1// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 2 3package reader 4 5import ( 6 "bufio" 7 "fmt" 8 "io" 9 "strings" 10 "unicode" 11) 12 13// TagValuePair is a convenience struct for a (tag, value) string pair. 14type TagValuePair struct { 15 Tag string 16 Value string 17} 18 19// ReadTagValues takes an io.Reader, scans it line by line and returns 20// a slice of {string, string} structs in the form {tag, value}. 21func ReadTagValues(content io.Reader) ([]TagValuePair, error) { 22 r := &tvReader{} 23 24 scanner := bufio.NewScanner(content) 25 for scanner.Scan() { 26 // read each line, one by one 27 err := r.readNextLine(scanner.Text()) 28 if err != nil { 29 return nil, err 30 } 31 } 32 if err := scanner.Err(); err != nil { 33 return nil, err 34 } 35 36 // finalize and make sure all is well 37 tvList, err := r.finalize() 38 if err != nil { 39 return nil, err 40 } 41 42 // convert internal format to exported TagValueList 43 var exportedTVList []TagValuePair 44 for _, tv := range tvList { 45 tvPair := TagValuePair{Tag: tv.tag, Value: tv.value} 46 exportedTVList = append(exportedTVList, tvPair) 47 } 48 49 return exportedTVList, nil 50} 51 52type tagvalue struct { 53 tag string 54 value string 55} 56 57type tvReader struct { 58 midtext bool 59 tvList []tagvalue 60 currentLine int 61 currentTag string 62 currentValue string 63} 64 65func (reader *tvReader) finalize() ([]tagvalue, error) { 66 if reader.midtext { 67 return nil, fmt.Errorf("finalize called while still midtext parsing a text tag") 68 } 69 return reader.tvList, nil 70} 71 72func (reader *tvReader) readNextLine(line string) error { 73 reader.currentLine++ 74 75 if reader.midtext { 76 return reader.readNextLineFromMidtext(line) 77 } 78 79 return reader.readNextLineFromReady(line) 80} 81 82func (reader *tvReader) readNextLineFromReady(line string) error { 83 // strip whitespace from beginning of line 84 line2 := strings.TrimLeftFunc(line, func(r rune) bool { 85 return unicode.IsSpace(r) 86 }) 87 88 // ignore empty lines 89 if line2 == "" { 90 return nil 91 } 92 93 // ignore comment lines 94 if strings.HasPrefix(line2, "#") { 95 return nil 96 } 97 98 // split at colon 99 substrings := strings.SplitN(line2, ":", 2) 100 if len(substrings) == 1 { 101 // error if a colon isn't found 102 return fmt.Errorf("no colon found in '%s'", line) 103 } 104 105 // the first substring is the tag 106 reader.currentTag = strings.TrimSpace(substrings[0]) 107 108 // determine whether the value contains (or starts) a <text> line 109 substrings = strings.SplitN(substrings[1], "<text>", 2) 110 if len(substrings) == 1 { 111 // no <text> tag found means this is a single-line value 112 // strip whitespace and use as a single line 113 reader.currentValue = strings.TrimSpace(substrings[0]) 114 } else { 115 // there was a <text> tag; now decide whether it's multi-line 116 substrings = strings.SplitN(substrings[1], "</text>", 2) 117 if len(substrings) > 1 { 118 // there is also a </text> tag; take the middle part and 119 // set as value 120 reader.currentValue = substrings[0] 121 } else { 122 // there is no </text> tag on this line; switch to midtext 123 reader.currentValue = substrings[0] + "\n" 124 reader.midtext = true 125 return nil 126 } 127 } 128 129 // if we got here, the value was on a single line 130 // so go ahead and add it to the tag-value list 131 tv := tagvalue{reader.currentTag, reader.currentValue} 132 reader.tvList = append(reader.tvList, tv) 133 134 // and reset 135 reader.currentTag = "" 136 reader.currentValue = "" 137 138 return nil 139} 140 141func (reader *tvReader) readNextLineFromMidtext(line string) error { 142 // look for whether the line closes here 143 substrings := strings.SplitN(line, "</text>", 2) 144 if len(substrings) == 1 { 145 // doesn't contain </text>, so keep building the current value 146 reader.currentValue += line + "\n" 147 return nil 148 } 149 150 // contains </text>, so end and record this pair 151 reader.currentValue += substrings[0] 152 tv := tagvalue{reader.currentTag, reader.currentValue} 153 reader.tvList = append(reader.tvList, tv) 154 155 // and reset 156 reader.midtext = false 157 reader.currentTag = "" 158 reader.currentValue = "" 159 160 return nil 161} 162