1// Copyright 2019 The Go Authors. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package span 16 17import ( 18 "fmt" 19 "unicode/utf16" 20 "unicode/utf8" 21) 22 23// ToUTF16Column calculates the utf16 column expressed by the point given the 24// supplied file contents. 25// This is used to convert from the native (always in bytes) column 26// representation and the utf16 counts used by some editors. 27func ToUTF16Column(p Point, content []byte) (int, error) { 28 if content == nil { 29 return -1, fmt.Errorf("ToUTF16Column: missing content") 30 } 31 if !p.HasPosition() { 32 return -1, fmt.Errorf("ToUTF16Column: point is missing position") 33 } 34 if !p.HasOffset() { 35 return -1, fmt.Errorf("ToUTF16Column: point is missing offset") 36 } 37 offset := p.Offset() // 0-based 38 colZero := p.Column() - 1 // 0-based 39 if colZero == 0 { 40 // 0-based column 0, so it must be chr 1 41 return 1, nil 42 } else if colZero < 0 { 43 return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero) 44 } 45 // work out the offset at the start of the line using the column 46 lineOffset := offset - colZero 47 if lineOffset < 0 || offset > len(content) { 48 return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content)) 49 } 50 // Use the offset to pick out the line start. 51 // This cannot panic: offset > len(content) and lineOffset < offset. 52 start := content[lineOffset:] 53 54 // Now, truncate down to the supplied column. 55 start = start[:colZero] 56 57 // and count the number of utf16 characters 58 // in theory we could do this by hand more efficiently... 59 return len(utf16.Encode([]rune(string(start)))) + 1, nil 60} 61 62// FromUTF16Column advances the point by the utf16 character offset given the 63// supplied line contents. 64// This is used to convert from the utf16 counts used by some editors to the 65// native (always in bytes) column representation. 66func FromUTF16Column(p Point, chr int, content []byte) (Point, error) { 67 if !p.HasOffset() { 68 return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset") 69 } 70 // if chr is 1 then no adjustment needed 71 if chr <= 1 { 72 return p, nil 73 } 74 if p.Offset() >= len(content) { 75 return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content)) 76 } 77 remains := content[p.Offset():] 78 // scan forward the specified number of characters 79 for count := 1; count < chr; count++ { 80 if len(remains) <= 0 { 81 return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content") 82 } 83 r, w := utf8.DecodeRune(remains) 84 if r == '\n' { 85 // Per the LSP spec: 86 // 87 // > If the character value is greater than the line length it 88 // > defaults back to the line length. 89 break 90 } 91 remains = remains[w:] 92 if r >= 0x10000 { 93 // a two point rune 94 count++ 95 // if we finished in a two point rune, do not advance past the first 96 if count >= chr { 97 break 98 } 99 } 100 p.v.Column += w 101 p.v.Offset += w 102 } 103 return p, nil 104} 105