• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     https://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package trebuchet.util
18 
19 import trebuchet.io.GenericByteBuffer
20 import trebuchet.io.StreamingReader
21 
22 /**
23  * Partial implementation of the Boyer-Moore string search algorithm.
24  * Requires that the bytearray to look for is <= 127 in length due to the use of
25  * signed bytes for the jump table.
26  */
27 class StringSearch(val lookFor: String) {
<lambda>null28     val skipLut = ByteArray(256) { lookFor.length.toByte() }
29     val suffixSkip = ByteArray(lookFor.length)
30 
31     private companion object {
isPrefixnull32         fun isPrefix(str: CharSequence, pos: Int): Boolean {
33             return (0 until (str.length - pos)).none { str[it] != str[pos + it] }
34         }
35 
longestCommonSuffixnull36         fun longestCommonSuffix(word: CharSequence, pos: Int): Int {
37             var i: Int = 0
38             while (word[pos - i] == word[word.length - 1 - i] && i < pos) {
39                 i++
40             }
41             return i
42         }
43     }
44 
45     init {
46         val last = lookFor.length - 1
47         for (i in 0..last - 1) {
48             skipLut[lookFor[i].toInt() and 0xFF] = (last - i).toByte()
49         }
50 
51         var lastPrefix = last
52         for (i in last downTo 0) {
53             if (isPrefix(lookFor, i + 1)) {
54                 lastPrefix = i + 1
55             }
56             suffixSkip[i] = (lastPrefix + last - i).toByte()
57         }
58         for (i in 0 until last) {
59             val suffixLength = longestCommonSuffix(lookFor, i)
60             if(lookFor[i - suffixLength] != lookFor[last - suffixLength]) {
61                 suffixSkip[last - suffixLength] = (suffixLength + last - i).toByte()
62             }
63         }
64     }
65 
66     val length get() = lookFor.length
67 
findnull68     fun find(reader: StreamingReader, startIndex: Long = 0, inEndIndex: Long = Long.MAX_VALUE): Long {
69         var index = startIndex + lookFor.length - 1
70         var endIndex = inEndIndex
71         while (index <= endIndex) {
72             if (index > reader.endIndex) {
73                 if (!reader.loadIndex(index)) return -1
74                 if (reader.reachedEof) {
75                     endIndex = reader.endIndex
76                 }
77             }
78             // Search the overlapping region slowly
79             while (reader.windowFor(index) !== reader.windowFor(index - lookFor.length + 1)) {
80                 var lookForIndex = lookFor.length - 1
81                 while (lookForIndex >= 0 && reader[index] == lookFor[lookForIndex].toByte()) {
82                     index--
83                     lookForIndex--
84                 }
85                 if (lookForIndex < 0) {
86                     return index + 1
87                 }
88                 index += maxOf(skipLut[reader[index].toInt() and 0xFF], suffixSkip[lookForIndex])
89 
90                 if (index > reader.endIndex) {
91                     if (!reader.loadIndex(index)) return -1
92                     if (reader.reachedEof) {
93                         endIndex = reader.endIndex
94                     }
95                 }
96             }
97             // Now search the non-overlapping quickly
98             val window = reader.windowFor(index)
99             index = findInWindow(window, index, endIndex)
100             if (index <= window.globalEndIndex) {
101                 // Found a match
102                 return index
103             }
104         }
105         return -1
106     }
107 
findInLoadedRegionnull108     fun findInLoadedRegion(reader: StreamingReader, endIndex: Long = reader.endIndex): Long {
109         var index = reader.startIndex + lookFor.length - 1
110         while (index <= endIndex) {
111             // Search the overlapping region slowly
112             while (reader.windowFor(index) !== reader.windowFor(index - lookFor.length + 1)) {
113                 var lookForIndex = lookFor.length - 1
114                 while (lookForIndex >= 0 && reader[index] == lookFor[lookForIndex].toByte()) {
115                     index--
116                     lookForIndex--
117                 }
118                 if (lookForIndex < 0) {
119                     return index + 1
120                 }
121                 index += maxOf(skipLut[reader[index].toInt() and 0xFF], suffixSkip[lookForIndex])
122             }
123             // Now search the non-overlapping quickly
124             val window = reader.windowFor(index)
125             index = findInWindow(window, index, endIndex)
126             if (index < window.globalEndIndex && index < endIndex) {
127                 // Found a match
128                 return index
129             }
130         }
131         return -1
132     }
133 
findnull134     fun find(buffer: GenericByteBuffer, startIndex: Long = 0, endIndex: Long = buffer.length): Long {
135         var index = startIndex + lookFor.length - 1
136         while (index < endIndex) {
137             var lookForIndex = lookFor.length - 1
138             while (lookForIndex >= 0 && buffer[index] == lookFor[lookForIndex].toByte()) {
139                 index--
140                 lookForIndex--
141             }
142             if (lookForIndex < 0) {
143                 index += 1
144                 break
145             }
146             index += maxOf(skipLut[buffer[index].toInt() and 0xFF], suffixSkip[lookForIndex])
147         }
148         return index
149     }
150 
findnull151     fun find(buffer: ByteArray, startIndex: Int = 0, endIndex: Int = buffer.size): Int {
152         var index = startIndex + lookFor.length - 1
153         while (index < endIndex) {
154             var lookForIndex = lookFor.length - 1
155             while (lookForIndex >= 0 && buffer[index] == lookFor[lookForIndex].toByte()) {
156                 index--
157                 lookForIndex--
158             }
159             if (lookForIndex < 0) {
160                 index += 1
161                 break
162             }
163             index += maxOf(skipLut[buffer[index].toInt() and 0xFF], suffixSkip[lookForIndex])
164         }
165         return index
166     }
167 
findInWindownull168     private fun findInWindow(window: StreamingReader.Window, globalStartIndex: Long, globalEndIndex: Long): Long {
169         var index = (globalStartIndex - window.globalStartIndex).toInt()
170         val buffer = window.slice
171         val endIndex = (minOf(window.globalEndIndex, globalEndIndex) - window.globalStartIndex).toInt()
172         while (index <= endIndex) {
173             var lookForIndex = lookFor.length - 1
174             while (lookForIndex >= 0 && buffer[index] == lookFor[lookForIndex].toByte()) {
175                 index--
176                 lookForIndex--
177             }
178             if (lookForIndex < 0) {
179                 index += 1
180                 break
181             }
182             index += maxOf(skipLut[buffer[index].toInt() and 0xFF], suffixSkip[lookForIndex])
183         }
184         return index + window.globalStartIndex
185     }
186 }
187 
searchFornull188 fun searchFor(str: String) = StringSearch(str)
189 
190 fun GenericByteBuffer.indexOf(str: String, endIndex: Long = this.length): Long {
191     val stopAt = minOf(endIndex, this.length - 1)
192     if (this is StreamingReader) {
193         return StringSearch(str).findInLoadedRegion(this, stopAt)
194     }
195     return StringSearch(str).find(this, 0, stopAt)
196 }
197 
GenericByteBuffernull198 fun GenericByteBuffer.contains(str: String, endIndex: Long = this.length): Boolean {
199     return this.indexOf(str, endIndex) != -1L
200 }