// Copyright 2019 The Go Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package span import ( "fmt" "unicode/utf16" "unicode/utf8" ) // ToUTF16Column calculates the utf16 column expressed by the point given the // supplied file contents. // This is used to convert from the native (always in bytes) column // representation and the utf16 counts used by some editors. func ToUTF16Column(p Point, content []byte) (int, error) { if content == nil { return -1, fmt.Errorf("ToUTF16Column: missing content") } if !p.HasPosition() { return -1, fmt.Errorf("ToUTF16Column: point is missing position") } if !p.HasOffset() { return -1, fmt.Errorf("ToUTF16Column: point is missing offset") } offset := p.Offset() // 0-based colZero := p.Column() - 1 // 0-based if colZero == 0 { // 0-based column 0, so it must be chr 1 return 1, nil } else if colZero < 0 { return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero) } // work out the offset at the start of the line using the column lineOffset := offset - colZero if lineOffset < 0 || offset > len(content) { return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content)) } // Use the offset to pick out the line start. // This cannot panic: offset > len(content) and lineOffset < offset. start := content[lineOffset:] // Now, truncate down to the supplied column. start = start[:colZero] // and count the number of utf16 characters // in theory we could do this by hand more efficiently... return len(utf16.Encode([]rune(string(start)))) + 1, nil } // FromUTF16Column advances the point by the utf16 character offset given the // supplied line contents. // This is used to convert from the utf16 counts used by some editors to the // native (always in bytes) column representation. func FromUTF16Column(p Point, chr int, content []byte) (Point, error) { if !p.HasOffset() { return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset") } // if chr is 1 then no adjustment needed if chr <= 1 { return p, nil } if p.Offset() >= len(content) { return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content)) } remains := content[p.Offset():] // scan forward the specified number of characters for count := 1; count < chr; count++ { if len(remains) <= 0 { return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content") } r, w := utf8.DecodeRune(remains) if r == '\n' { // Per the LSP spec: // // > If the character value is greater than the line length it // > defaults back to the line length. break } remains = remains[w:] if r >= 0x10000 { // a two point rune count++ // if we finished in a two point rune, do not advance past the first if count >= chr { break } } p.v.Column += w p.v.Offset += w } return p, nil }