• Home
  • Raw
  • Download

Lines Matching +full:stack +full:- +full:utils

8  *      http://www.apache.org/licenses/LICENSE-2.0
17 #include "utils/tflite/string_projection.h"
22 #include "utils/strings/utf8.h"
23 #include "utils/tflite/string_projection_base.h"
24 #include "utils/utf8/unilib-common.h"
44 constexpr int kInvalid = -1;
56 if (bytes_read <= 0 || bytes_read > text.length() - i) { in IsDigitString()
109 if (bytes_read <= 0 || bytes_read > len - i) break; in SplitByCharInternal()
110 tokens->emplace_back(input_ptr + i, bytes_read); in SplitByCharInternal()
111 if (max_tokens != kInvalid && tokens->size() == max_tokens) { in SplitByCharInternal()
172 (max_tokens == kAllTokens || tokens->size() < max_tokens - 1)) { in SplitBySpaceInternal()
173 auto length = end - start; in SplitBySpaceInternal()
175 tokens->emplace_back(input_ptr + start, length); in SplitBySpaceInternal()
181 auto length = end == kInvalid ? (last_index - start) : (end - start); in SplitBySpaceInternal()
183 tokens->emplace_back(input_ptr + start, length); in SplitBySpaceInternal()
253 if (i > 0 && input_ptr[i - 1] != ' ' && normalized.back() != ' ') { in NormalizeInternal()
277 // Tokenizes the input by separators_. Limit to max_tokens, when it is not -1.
301 (max_tokens == kAllTokens || tokens.size() < max_tokens - 1)) { in Tokenize()
302 auto length = end - start; in Tokenize()
314 auto length = end == kInvalid ? (last_index - start) : (end - start); in Tokenize()
328 // separators_. Don't search beyond input_ptr[length](non-inclusive). Return
329 // -1 if not found.
353 auto it = std::find_if_not(str->rbegin(), str->rend(), ::ispunct); in StripTrailingAsciiPunctuation()
354 str->erase(str->rend() - it); in StripTrailingAsciiPunctuation()
385 bool ShouldStepInRecursion(const std::vector<int>& stack, int stack_idx, in ShouldStepInRecursion() argument
387 // If current stack size and next word enumeration are within valid range. in ShouldStepInRecursion()
388 if (stack_idx < params.ngram_size && stack[stack_idx] + 1 < num_words) { in ShouldStepInRecursion()
389 // If this stack is empty, step in for first word enumeration. in ShouldStepInRecursion()
395 // next_word_idx = stack[stack_idx] + 1 in ShouldStepInRecursion()
396 // next_word_idx - stack[stack_idx-1] <= max_skip_size + 1 in ShouldStepInRecursion()
397 if (stack[stack_idx] - stack[stack_idx - 1] <= params.max_skip_size) { in ShouldStepInRecursion()
404 std::string JoinTokensBySpace(const std::vector<int>& stack, int stack_idx, in JoinTokensBySpace() argument
408 len += tokens[stack[i]].size(); in JoinTokensBySpace()
410 len += stack_idx - 1; in JoinTokensBySpace()
414 res.append(tokens[stack[0]]); in JoinTokensBySpace()
417 res.append(tokens[stack[i]]); in JoinTokensBySpace()
434 // Stack stores the index of word used to generate ngram. in ExtractSkipGramsImpl()
435 // The size of stack is the size of ngram. in ExtractSkipGramsImpl()
436 std::vector<int> stack(params.ngram_size + 1, 0); in ExtractSkipGramsImpl() local
437 // Stack index that indicates which depth the recursion is operating at. in ExtractSkipGramsImpl()
442 if (ShouldStepInRecursion(stack, stack_idx, num_words, params)) { in ExtractSkipGramsImpl()
445 // fill this word to stack, recurse into next depth. in ExtractSkipGramsImpl()
446 stack[stack_idx]++; in ExtractSkipGramsImpl()
448 stack[stack_idx] = stack[stack_idx - 1]; in ExtractSkipGramsImpl()
451 // Add n-gram to tensor buffer when the stack has filled with enough in ExtractSkipGramsImpl()
453 std::string ngram = JoinTokensBySpace(stack, stack_idx, tokens); in ExtractSkipGramsImpl()
454 if (blacklist->find(ngram) == blacklist->end()) { in ExtractSkipGramsImpl()
461 stack_idx--; in ExtractSkipGramsImpl()
475 : normalizer->Normalize(input, params.max_input_chars); in ExtractSkipGrams()
483 tokens = tokenizer->Tokenize(normalized.data(), normalized.size(), in ExtractSkipGrams()
514 // if true, remove repeated characters in tokens ('loool' -> 'lol').
534 input_ = &context->tensors[node->inputs->data[kInputMessage]]; in InitializeInput()
550 return context->tensors[node->inputs->data[kInputMessage]].dims; in GetInputShape()