1 /*
2 * Copyright 2017 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package trebuchet.util
18
19 import trebuchet.io.GenericByteBuffer
20 import trebuchet.io.StreamingReader
21
22 /**
23 * Partial implementation of the Boyer-Moore string search algorithm.
24 * Requires that the bytearray to look for is <= 127 in length due to the use of
25 * signed bytes for the jump table.
26 */
27 class StringSearch(val lookFor: String) {
<lambda>null28 val skipLut = ByteArray(256) { lookFor.length.toByte() }
29 val suffixSkip = ByteArray(lookFor.length)
30
31 private companion object {
isPrefixnull32 fun isPrefix(str: CharSequence, pos: Int): Boolean {
33 return (0 until (str.length - pos)).none { str[it] != str[pos + it] }
34 }
35
longestCommonSuffixnull36 fun longestCommonSuffix(word: CharSequence, pos: Int): Int {
37 var i: Int = 0
38 while (word[pos - i] == word[word.length - 1 - i] && i < pos) {
39 i++
40 }
41 return i
42 }
43 }
44
45 init {
46 val last = lookFor.length - 1
47 for (i in 0..last - 1) {
48 skipLut[lookFor[i].toInt() and 0xFF] = (last - i).toByte()
49 }
50
51 var lastPrefix = last
52 for (i in last downTo 0) {
53 if (isPrefix(lookFor, i + 1)) {
54 lastPrefix = i + 1
55 }
56 suffixSkip[i] = (lastPrefix + last - i).toByte()
57 }
58 for (i in 0 until last) {
59 val suffixLength = longestCommonSuffix(lookFor, i)
60 if(lookFor[i - suffixLength] != lookFor[last - suffixLength]) {
61 suffixSkip[last - suffixLength] = (suffixLength + last - i).toByte()
62 }
63 }
64 }
65
66 val length get() = lookFor.length
67
findnull68 fun find(reader: StreamingReader, startIndex: Long = 0, inEndIndex: Long = Long.MAX_VALUE): Long {
69 var index = startIndex + lookFor.length - 1
70 var endIndex = inEndIndex
71 while (index <= endIndex) {
72 if (index > reader.endIndex) {
73 if (!reader.loadIndex(index)) return -1
74 if (reader.reachedEof) {
75 endIndex = reader.endIndex
76 }
77 }
78 // Search the overlapping region slowly
79 while (reader.windowFor(index) !== reader.windowFor(index - lookFor.length + 1)) {
80 var lookForIndex = lookFor.length - 1
81 while (lookForIndex >= 0 && reader[index] == lookFor[lookForIndex].toByte()) {
82 index--
83 lookForIndex--
84 }
85 if (lookForIndex < 0) {
86 return index + 1
87 }
88 index += maxOf(skipLut[reader[index].toInt() and 0xFF], suffixSkip[lookForIndex])
89
90 if (index > reader.endIndex) {
91 if (!reader.loadIndex(index)) return -1
92 if (reader.reachedEof) {
93 endIndex = reader.endIndex
94 }
95 }
96 }
97 // Now search the non-overlapping quickly
98 val window = reader.windowFor(index)
99 index = findInWindow(window, index, endIndex)
100 if (index <= window.globalEndIndex) {
101 // Found a match
102 return index
103 }
104 }
105 return -1
106 }
107
findInLoadedRegionnull108 fun findInLoadedRegion(reader: StreamingReader, endIndex: Long = reader.endIndex): Long {
109 var index = reader.startIndex + lookFor.length - 1
110 while (index <= endIndex) {
111 // Search the overlapping region slowly
112 while (reader.windowFor(index) !== reader.windowFor(index - lookFor.length + 1)) {
113 var lookForIndex = lookFor.length - 1
114 while (lookForIndex >= 0 && reader[index] == lookFor[lookForIndex].toByte()) {
115 index--
116 lookForIndex--
117 }
118 if (lookForIndex < 0) {
119 return index + 1
120 }
121 index += maxOf(skipLut[reader[index].toInt() and 0xFF], suffixSkip[lookForIndex])
122 }
123 // Now search the non-overlapping quickly
124 val window = reader.windowFor(index)
125 index = findInWindow(window, index, endIndex)
126 if (index < window.globalEndIndex && index < endIndex) {
127 // Found a match
128 return index
129 }
130 }
131 return -1
132 }
133
findnull134 fun find(buffer: GenericByteBuffer, startIndex: Long = 0, endIndex: Long = buffer.length): Long {
135 var index = startIndex + lookFor.length - 1
136 while (index < endIndex) {
137 var lookForIndex = lookFor.length - 1
138 while (lookForIndex >= 0 && buffer[index] == lookFor[lookForIndex].toByte()) {
139 index--
140 lookForIndex--
141 }
142 if (lookForIndex < 0) {
143 index += 1
144 break
145 }
146 index += maxOf(skipLut[buffer[index].toInt() and 0xFF], suffixSkip[lookForIndex])
147 }
148 return index
149 }
150
findnull151 fun find(buffer: ByteArray, startIndex: Int = 0, endIndex: Int = buffer.size): Int {
152 var index = startIndex + lookFor.length - 1
153 while (index < endIndex) {
154 var lookForIndex = lookFor.length - 1
155 while (lookForIndex >= 0 && buffer[index] == lookFor[lookForIndex].toByte()) {
156 index--
157 lookForIndex--
158 }
159 if (lookForIndex < 0) {
160 index += 1
161 break
162 }
163 index += maxOf(skipLut[buffer[index].toInt() and 0xFF], suffixSkip[lookForIndex])
164 }
165 return index
166 }
167
findInWindownull168 private fun findInWindow(window: StreamingReader.Window, globalStartIndex: Long, globalEndIndex: Long): Long {
169 var index = (globalStartIndex - window.globalStartIndex).toInt()
170 val buffer = window.slice
171 val endIndex = (minOf(window.globalEndIndex, globalEndIndex) - window.globalStartIndex).toInt()
172 while (index <= endIndex) {
173 var lookForIndex = lookFor.length - 1
174 while (lookForIndex >= 0 && buffer[index] == lookFor[lookForIndex].toByte()) {
175 index--
176 lookForIndex--
177 }
178 if (lookForIndex < 0) {
179 index += 1
180 break
181 }
182 index += maxOf(skipLut[buffer[index].toInt() and 0xFF], suffixSkip[lookForIndex])
183 }
184 return index + window.globalStartIndex
185 }
186 }
187
searchFornull188 fun searchFor(str: String) = StringSearch(str)
189
190 fun GenericByteBuffer.indexOf(str: String, endIndex: Long = this.length): Long {
191 val stopAt = minOf(endIndex, this.length - 1)
192 if (this is StreamingReader) {
193 return StringSearch(str).findInLoadedRegion(this, stopAt)
194 }
195 return StringSearch(str).find(this, 0, stopAt)
196 }
197
GenericByteBuffernull198 fun GenericByteBuffer.contains(str: String, endIndex: Long = this.length): Boolean {
199 return this.indexOf(str, endIndex) != -1L
200 }