/* * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "annotator/number/number_test-include.h" #include #include #include "annotator/collections.h" #include "annotator/model_generated.h" #include "annotator/types-test-util.h" #include "annotator/types.h" #include "utils/tokenizer-utils.h" #include "utils/utf8/unicodetext.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace libtextclassifier3 { namespace test_internal { using ::testing::AllOf; using ::testing::ElementsAre; using ::testing::Field; using ::testing::Matcher; using ::testing::UnorderedElementsAre; const NumberAnnotatorOptions* NumberAnnotatorTest::TestingNumberAnnotatorOptions() { static const flatbuffers::DetachedBuffer* options_data = []() { NumberAnnotatorOptionsT options; options.enabled = true; options.priority_score = -10.0; options.float_number_priority_score = 1.0; options.enabled_annotation_usecases = 1 << AnnotationUsecase_ANNOTATION_USECASE_RAW; options.max_number_of_digits = 20; options.percentage_priority_score = 1.0; options.percentage_annotation_usecases = (1 << AnnotationUsecase_ANNOTATION_USECASE_RAW) + (1 << AnnotationUsecase_ANNOTATION_USECASE_SMART); std::set percent_suffixes({"パーセント", "percent", "pércént", "pc", "pct", "%", "٪", "﹪", "%"}); for (const std::string& string_value : percent_suffixes) { options.percentage_pieces_string.append(string_value); options.percentage_pieces_string.push_back('\0'); } flatbuffers::FlatBufferBuilder builder; builder.Finish(NumberAnnotatorOptions::Pack(builder, &options)); return new flatbuffers::DetachedBuffer(builder.Release()); }(); return flatbuffers::GetRoot(options_data->data()); } MATCHER_P(IsCorrectCollection, collection, "collection is " + collection) { return arg.collection == collection; } MATCHER_P(IsCorrectNumericValue, numeric_value, "numeric value is " + std::to_string(numeric_value)) { return arg.numeric_value == numeric_value; } MATCHER_P(IsCorrectNumericDoubleValue, numeric_double_value, "numeric double value is " + std::to_string(numeric_double_value)) { return arg.numeric_double_value == numeric_double_value; } MATCHER_P(IsCorrectScore, score, "score is " + std::to_string(score)) { return arg.score == score; } MATCHER_P(IsCorrectPriortyScore, priority_score, "priority score is " + std::to_string(priority_score)) { return arg.priority_score == priority_score; } MATCHER_P(IsCorrectSpan, span, "span is (" + std::to_string(span.first) + "," + std::to_string(span.second) + ")") { return arg.span == span; } MATCHER_P(Classification, inner, "") { return testing::ExplainMatchResult(inner, arg.classification, result_listener); } static Matcher IsAnnotatedSpan( const CodepointSpan& codepoint_span, const std::string& collection, const int int_value, const double double_value, const float priority_score = -10, const float score = 1) { return AllOf( IsCorrectSpan(codepoint_span), Classification(ElementsAre(AllOf( IsCorrectCollection(collection), IsCorrectNumericValue(int_value), IsCorrectNumericDoubleValue(double_value), IsCorrectScore(score), IsCorrectPriortyScore(priority_score))))); } TEST_F(NumberAnnotatorTest, ClassifiesAndParsesNumberCorrectly) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345 ..."), {4, 9}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "number"); EXPECT_EQ(classification_result.numeric_value, 12345); EXPECT_FLOAT_EQ(classification_result.numeric_double_value, 12345); } TEST_F(NumberAnnotatorTest, ClassifiesAndParsesNumberAsFloatCorrectly) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345.12345 ..."), {4, 15}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "number"); EXPECT_EQ(classification_result.numeric_value, 12345); EXPECT_FLOAT_EQ(classification_result.numeric_double_value, 12345.12345); } TEST_F(NumberAnnotatorTest, ClassifiesAndParsesNumberAsFloatCorrectlyWithoutDecimals) { ClassificationResult classification_result; // The dot after a number is considered punctuation, not part of a floating // number. EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345. ..."), {4, 9}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345. ..."), {4, 10}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "number"); EXPECT_EQ(classification_result.numeric_value, 12345); EXPECT_FLOAT_EQ(classification_result.numeric_double_value, 12345); EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345. ..."), {4, 9}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "number"); EXPECT_EQ(classification_result.numeric_value, 12345); EXPECT_FLOAT_EQ(classification_result.numeric_double_value, 12345); } TEST_F(NumberAnnotatorTest, FindsAllIntegerAndFloatNumbersInText) { std::vector result; // In the context "68.9#" -> 68.9 is a number because # is punctuation. // In the context "68.9#?" -> 68.9 is not a number because is followed by two // punctuation signs. EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("how much is 2 plus 5 divided by 7% minus 3.14 " "what about 68.9# or 68.9#?"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(12, 13), "number", /*int_value=*/2, /*double_value=*/2.0), IsAnnotatedSpan(CodepointSpan(19, 20), "number", /*int_value=*/5, /*double_value=*/5.0), IsAnnotatedSpan(CodepointSpan(32, 33), "number", /*int_value=*/7, /*double_value=*/7.0), IsAnnotatedSpan(CodepointSpan(32, 34), "percentage", /*int_value=*/7, /*double_value=*/7.0, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(41, 45), "number", /*int_value=*/3, /*double_value=*/3.14, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(57, 61), "number", /*int_value=*/68, /*double_value=*/68.9, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, ClassifiesNonNumberCorrectly) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 123a45 ..."), {4, 10}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345..12345 ..."), {4, 16}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345a ..."), {4, 11}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, ClassifiesNumberSelectionCorrectly) { ClassificationResult classification_result; // Punctuation after a number is not part of the number. EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 14, ..."), {4, 6}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "number"); EXPECT_EQ(classification_result.numeric_value, 14); EXPECT_EQ(classification_result.numeric_double_value, 14); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 14, ..."), {4, 7}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, ClassifiesPercentageSignCorrectly) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 99% ..."), {4, 7}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "percentage"); EXPECT_EQ(classification_result.numeric_value, 99); EXPECT_EQ(classification_result.numeric_double_value, 99); } TEST_F(NumberAnnotatorTest, ClassifiesPercentageWordCorrectly) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 15 percent ..."), {4, 14}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "percentage"); EXPECT_EQ(classification_result.numeric_value, 15); EXPECT_EQ(classification_result.numeric_double_value, 15); } TEST_F(NumberAnnotatorTest, ClassifiesNonAsciiPercentageIncorrectSuffix) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("15 café"), {0, 7}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, ClassifiesNonAsciiFrPercentageCorrectSuffix) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("25 pércént"), {0, 10}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "percentage"); EXPECT_EQ(classification_result.numeric_value, 25); EXPECT_EQ(classification_result.numeric_double_value, 25); } TEST_F(NumberAnnotatorTest, ClassifiesNonAsciiJaPercentageCorrectSuffix) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("10パーセント"), {0, 7}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "percentage"); EXPECT_EQ(classification_result.numeric_value, 10); EXPECT_EQ(classification_result.numeric_double_value, 10); std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("明日の降水確率は10パーセント 音量を12にセット"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(8, 10), "number", /*int_value=*/10, /*double_value=*/10.0), IsAnnotatedSpan(CodepointSpan(8, 15), "percentage", /*int_value=*/10, /*double_value=*/10.0, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(20, 22), "number", /*int_value=*/12, /*double_value=*/12.0))); } TEST_F(NumberAnnotatorTest, FindsAllNumbersInText) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("... 12345 ... 9 is my number and 27% or 68# #38 #39 " "but not $99."), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT( result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(4, 9), "number", /*int_value=*/12345, /*double_value=*/12345.0), IsAnnotatedSpan(CodepointSpan(14, 15), "number", /*int_value=*/9, /*double_value=*/9.0), IsAnnotatedSpan(CodepointSpan(33, 35), "number", /*int_value=*/27, /*double_value=*/27.0), IsAnnotatedSpan(CodepointSpan(33, 36), "percentage", /*int_value=*/27, /*double_value=*/27.0, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(40, 42), "number", /*int_value=*/68, /*double_value=*/68.0), IsAnnotatedSpan(CodepointSpan(45, 47), "number", /*int_value=*/38, /*double_value=*/38.0), IsAnnotatedSpan(CodepointSpan(49, 51), "number", /*int_value=*/39, /*double_value=*/39.0))); } TEST_F(NumberAnnotatorTest, FindsNoNumberInText) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("... 12345a ... 12345..12345 and 123a45 are not valid. " "And -#5% is also bad."), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); ASSERT_EQ(result.size(), 0); } TEST_F(NumberAnnotatorTest, FindsNumberWithPunctuation) { std::vector result; // A number should be followed by only one punctuation signs => 15 is not a // number. EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText( "It's 12, 13, 14! Or 15??? For sure 16: 17; 18. and -19"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(5, 7), "number", /*int_value=*/12, /*double_value=*/12.0), IsAnnotatedSpan(CodepointSpan(9, 11), "number", /*int_value=*/13, /*double_value=*/13.0), IsAnnotatedSpan(CodepointSpan(13, 15), "number", /*int_value=*/14, /*double_value=*/14.0), IsAnnotatedSpan(CodepointSpan(35, 37), "number", /*int_value=*/16, /*double_value=*/16.0), IsAnnotatedSpan(CodepointSpan(39, 41), "number", /*int_value=*/17, /*double_value=*/17.0), IsAnnotatedSpan(CodepointSpan(43, 45), "number", /*int_value=*/18, /*double_value=*/18.0), IsAnnotatedSpan(CodepointSpan(51, 54), "number", /*int_value=*/-19, /*double_value=*/-19.0))); } TEST_F(NumberAnnotatorTest, FindsFloatNumberWithPunctuation) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("It's 12.123, 13.45, 14.54321! Or 15.1? Maybe 16.33: " "17.21; but for sure 18.90."), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(5, 11), "number", /*int_value=*/12, /*double_value=*/12.123, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(13, 18), "number", /*int_value=*/13, /*double_value=*/13.45, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(20, 28), "number", /*int_value=*/14, /*double_value=*/14.54321, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(33, 37), "number", /*int_value=*/15, /*double_value=*/15.1, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(45, 50), "number", /*int_value=*/16, /*double_value=*/16.33, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(52, 57), "number", /*int_value=*/17, /*double_value=*/17.21, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(72, 77), "number", /*int_value=*/18, /*double_value=*/18.9, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, HandlesNumbersAtBeginning) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("-5"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre(IsAnnotatedSpan( CodepointSpan(0, 2), "number", /*int_value=*/-5, /*double_value=*/-5))); } TEST_F(NumberAnnotatorTest, HandlesNegativeNumbers) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("Number -5 and -5% and not number --5%"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(7, 9), "number", /*int_value=*/-5, /*double_value=*/-5), IsAnnotatedSpan(CodepointSpan(14, 16), "number", /*int_value=*/-5, /*double_value=*/-5), IsAnnotatedSpan(CodepointSpan(14, 17), "percentage", /*int_value=*/-5, /*double_value=*/-5, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, FindGoodPercentageContexts) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText( "5 percent, 10 pct, 25 pc and 17%, -5 percent, 10% are percentages"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(0, 1), "number", /*int_value=*/5, /*double_value=*/5), IsAnnotatedSpan(CodepointSpan(0, 9), "percentage", /*int_value=*/5, /*double_value=*/5, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(11, 13), "number", /*int_value=*/10, /*double_value=*/10), IsAnnotatedSpan(CodepointSpan(11, 17), "percentage", /*int_value=*/10, /*double_value=*/10, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(19, 21), "number", /*int_value=*/25, /*double_value=*/25), IsAnnotatedSpan(CodepointSpan(19, 24), "percentage", /*int_value=*/25, /*double_value=*/25, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(29, 31), "number", /*int_value=*/17, /*double_value=*/17), IsAnnotatedSpan(CodepointSpan(29, 32), "percentage", /*int_value=*/17, /*double_value=*/17, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(34, 36), "number", /*int_value=*/-5, /*double_value=*/-5), IsAnnotatedSpan(CodepointSpan(34, 44), "percentage", /*int_value=*/-5, /*double_value=*/-5, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(46, 48), "number", /*int_value=*/10, /*double_value=*/10), IsAnnotatedSpan(CodepointSpan(46, 49), "percentage", /*int_value=*/10, /*double_value=*/10, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, FindSinglePercentageInContext) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("5%"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(0, 1), "number", /*int_value=*/5, /*double_value=*/5), IsAnnotatedSpan(CodepointSpan(0, 2), "percentage", /*int_value=*/5, /*double_value=*/5, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, IgnoreBadPercentageContexts) { std::vector result; // A valid number is followed by only one punctuation element. EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("10, pct, 25 prc, 5#: percentage are not percentages"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(0, 2), "number", /*int_value=*/10, /*double_value=*/10), IsAnnotatedSpan(CodepointSpan(9, 11), "number", /*int_value=*/25, /*double_value=*/25))); } TEST_F(NumberAnnotatorTest, IgnoreBadPercentagePunctuationContexts) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText( "#!24% or :?33 percent are not valid percentages, nor numbers."), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_TRUE(result.empty()); } TEST_F(NumberAnnotatorTest, FindPercentageInNonAsciiContext) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText( "At the café 10% or 25 percent of people are nice. Only 10%!"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(12, 14), "number", /*int_value=*/10, /*double_value=*/10), IsAnnotatedSpan(CodepointSpan(12, 15), "percentage", /*int_value=*/10, /*double_value=*/10, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(19, 21), "number", /*int_value=*/25, /*double_value=*/25), IsAnnotatedSpan(CodepointSpan(19, 29), "percentage", /*int_value=*/25, /*double_value=*/25, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(55, 57), "number", /*int_value=*/10, /*double_value=*/10), IsAnnotatedSpan(CodepointSpan(55, 58), "percentage", /*int_value=*/10, /*double_value=*/10, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, WhenPercentSuffixWithAdditionalIgnoredCharactersDoesNotParseIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("23#!? percent"), {0, 13}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenPercentSuffixWithAdditionalRandomTokensDoesNotParseIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("23 asdf 3.14 pct asdf"), {0, 21}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenPercentSuffixWithAdditionalRandomPrefixSuffixDoesNotParseIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("abdf23 percentabdf"), {0, 18}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenPercentSuffixWithAdditionalRandomStringsDoesNotParsesIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("#?!23 percent#!?"), {0, 16}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenBothPercentSymbolAndSuffixDoesNotParseIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("23% percent"), {0, 11}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenPercentSymbolWithAdditionalPrefixCharactersDoesNotParsesIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("#?23%"), {0, 5}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenNumberWithAdditionalCharactersDoesNotParsesIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("23#!?"), {0, 5}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenPercentSymbolWithAdditionalCharactersDoesNotParsesIt) { ClassificationResult classification_result; // ! does not belong to the percentage annotation EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("23%!"), {0, 3}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "percentage"); EXPECT_EQ(classification_result.numeric_value, 23); EXPECT_EQ(classification_result.numeric_double_value, 23); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("23%!"), {0, 4}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenAdditionalCharactersWithMisplacedPercentSymbolDoesNotParsesIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("23.:;%"), {0, 6}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenMultipleMinusSignsDoesNotParsesIt) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("--11"), {1, 4}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_THAT(classification_result, AllOf(Field(&ClassificationResult::collection, "number"), Field(&ClassificationResult::numeric_value, -11), Field(&ClassificationResult::numeric_double_value, -11))); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("--11"), {0, 4}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenMultipleMinusSignsPercentSignDoesNotParsesIt) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("--11%"), {1, 5}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_THAT(classification_result, AllOf(Field(&ClassificationResult::collection, "percentage"), Field(&ClassificationResult::numeric_value, -11), Field(&ClassificationResult::numeric_double_value, -11))); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("--11%"), {0, 5}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenPlusMinusSignsDoesNotParsesIt) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("+-11"), {1, 4}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_THAT(classification_result, AllOf(Field(&ClassificationResult::collection, "number"), Field(&ClassificationResult::numeric_value, -11), Field(&ClassificationResult::numeric_double_value, -11))); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("+-11"), {0, 4}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenMinusPlusSignsDoesNotParsesIt) { ClassificationResult classification_result; // + right before a number is not included in the number annotation EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("-+11"), {1, 4}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("-+11"), {0, 4}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenMinusSignSuffixDoesNotParsesIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("10-"), {0, 3}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenMultipleCharSuffixDoesNotParsesIt) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("10**"), {0, 2}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_THAT(classification_result, AllOf(Field(&ClassificationResult::collection, "number"), Field(&ClassificationResult::numeric_value, 10), Field(&ClassificationResult::numeric_double_value, 10))); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("10**"), {0, 3}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("10**"), {0, 4}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenMultipleCharPrefixDoesNotParsesIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("**10"), {1, 4}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("**10"), {0, 4}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenLowestSupportedNumberParsesIt) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("-1000000000"), {0, 11}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_THAT( classification_result, AllOf(Field(&ClassificationResult::collection, "number"), Field(&ClassificationResult::numeric_value, -1000000000), Field(&ClassificationResult::numeric_double_value, -1000000000))); } TEST_F(NumberAnnotatorTest, WhenLargestSupportedNumberParsesIt) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("1000000000"), {0, 10}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_THAT( classification_result, AllOf(Field(&ClassificationResult::collection, "number"), Field(&ClassificationResult::numeric_value, 1000000000), Field(&ClassificationResult::numeric_double_value, 1000000000))); } TEST_F(NumberAnnotatorTest, WhenLowestSupportedFloatNumberParsesIt) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("-999999999.999999999"), {0, 20}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_THAT(classification_result, AllOf(Field(&ClassificationResult::collection, "number"), Field(&ClassificationResult::numeric_value, -1000000000), Field(&ClassificationResult::numeric_double_value, -999999999.999999999))); } TEST_F(NumberAnnotatorTest, WhenLargestFloatSupportedNumberParsesIt) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("999999999.999999999"), {0, 19}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_THAT(classification_result, AllOf(Field(&ClassificationResult::collection, "number"), Field(&ClassificationResult::numeric_value, 1000000000), Field(&ClassificationResult::numeric_double_value, 999999999.999999999))); } TEST_F(NumberAnnotatorTest, WhenLargeNumberDoesNotParseIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("1234567890123456789012345678901234567890"), {0, 40}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenMinusInTheMiddleDoesNotParseIt) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("2016-2017"), {0, 9}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, WhenSuffixWithoutNumberDoesNotParseIt) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("... % ..."), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); ASSERT_EQ(result.size(), 0); } TEST_F(NumberAnnotatorTest, WhenPrefixWithoutNumberDoesNotParseIt) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("... $ ..."), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); ASSERT_EQ(result.size(), 0); } TEST_F(NumberAnnotatorTest, WhenPrefixAndSuffixWithoutNumberDoesNotParseIt) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("... $% ..."), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); ASSERT_EQ(result.size(), 0); } TEST_F(NumberAnnotatorTest, ForNumberAnnotationsSetsScoreAndPriorityScore) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345 ..."), {4, 9}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "number"); EXPECT_EQ(classification_result.numeric_value, 12345); EXPECT_EQ(classification_result.numeric_double_value, 12345); EXPECT_EQ(classification_result.score, 1); EXPECT_EQ(classification_result.priority_score, -10); std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("Come at 9 or 10 ok?"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(8, 9), "number", /*int_value=*/9, /*double_value=*/9), IsAnnotatedSpan(CodepointSpan(13, 15), "number", /*int_value=*/10, /*double_value=*/10))); } TEST_F(NumberAnnotatorTest, ForFloatNumberAnnotationsSetsScoreAndPriorityScore) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345.12345 ..."), {4, 15}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "number"); EXPECT_EQ(classification_result.numeric_value, 12345); EXPECT_EQ(classification_result.numeric_double_value, 12345.12345); EXPECT_EQ(classification_result.score, 1); EXPECT_EQ(classification_result.priority_score, 1); std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("Results are between 12.5 and 13.5, right?"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(20, 24), "number", /*int_value=*/12, /*double_value=*/12.5, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(29, 33), "number", /*int_value=*/13, /*double_value=*/13.5, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, ForPercentageAnnotationsSetsScoreAndPriorityScore) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345% ..."), {4, 10}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "percentage"); EXPECT_EQ(classification_result.numeric_value, 12345); EXPECT_EQ(classification_result.numeric_double_value, 12345); EXPECT_EQ(classification_result.score, 1); EXPECT_EQ(classification_result.priority_score, 1); EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345 percent ..."), {4, 17}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "percentage"); EXPECT_EQ(classification_result.numeric_value, 12345); EXPECT_EQ(classification_result.numeric_double_value, 12345); EXPECT_EQ(classification_result.score, 1); EXPECT_EQ(classification_result.priority_score, 1); std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("Results are between 9% and 10 percent."), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(20, 21), "number", /*int_value=*/9, /*double_value=*/9), IsAnnotatedSpan(CodepointSpan(20, 22), "percentage", /*int_value=*/9, /*double_value=*/9, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(27, 29), "number", /*int_value=*/10, /*double_value=*/10), IsAnnotatedSpan(CodepointSpan(27, 37), "percentage", /*int_value=*/10, /*double_value=*/10, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, NumberDisabledPercentageEnabledForSmartUsecase) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345 ..."), {4, 9}, AnnotationUsecase_ANNOTATION_USECASE_SMART, &classification_result)); EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345% ..."), {4, 10}, AnnotationUsecase_ANNOTATION_USECASE_SMART, &classification_result)); EXPECT_EQ(classification_result.collection, "percentage"); EXPECT_EQ(classification_result.numeric_value, 12345); EXPECT_EQ(classification_result.numeric_double_value, 12345.0); EXPECT_EQ(classification_result.score, 1); EXPECT_EQ(classification_result.priority_score, 1); EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("... 12345percent ..."), {4, 16}, AnnotationUsecase_ANNOTATION_USECASE_SMART, &classification_result)); EXPECT_EQ(classification_result.collection, "percentage"); EXPECT_EQ(classification_result.numeric_value, 12345); EXPECT_EQ(classification_result.numeric_double_value, 12345); EXPECT_EQ(classification_result.score, 1); EXPECT_EQ(classification_result.priority_score, 1); std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("Accuracy for experiment 3 is 9%."), AnnotationUsecase_ANNOTATION_USECASE_SMART, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(29, 31), "percentage", /*int_value=*/9, /*double_value=*/9.0, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, MathOperatorsNotAnnotatedAsNumbersFindAll) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("how much is 2 + 2 or 5 - 96 * 89"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(12, 13), "number", /*int_value=*/2, /*double_value=*/2), IsAnnotatedSpan(CodepointSpan(16, 17), "number", /*int_value=*/2, /*double_value=*/2), IsAnnotatedSpan(CodepointSpan(21, 22), "number", /*int_value=*/5, /*double_value=*/5), IsAnnotatedSpan(CodepointSpan(25, 27), "number", /*int_value=*/96, /*double_value=*/96), IsAnnotatedSpan(CodepointSpan(30, 32), "number", /*int_value=*/89, /*double_value=*/89))); } TEST_F(NumberAnnotatorTest, MathOperatorsNotAnnotatedAsNumbersClassifyText) { ClassificationResult classification_result; EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("2 + 2"), {2, 3}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_FALSE(number_annotator_.ClassifyText( UTF8ToUnicodeText("2 - 96 * 89"), {2, 3}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); } TEST_F(NumberAnnotatorTest, SlashSeparatesTwoNumbersFindAll) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("what's 1 + 2/3 * 4/5 * 6 / 7"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(7, 8), "number", /*int_value=*/1, /*double_value=*/1), IsAnnotatedSpan(CodepointSpan(11, 12), "number", /*int_value=*/2, /*double_value=*/2), IsAnnotatedSpan(CodepointSpan(13, 14), "number", /*int_value=*/3, /*double_value=*/3), IsAnnotatedSpan(CodepointSpan(17, 18), "number", /*int_value=*/4, /*double_value=*/4), IsAnnotatedSpan(CodepointSpan(19, 20), "number", /*int_value=*/5, /*double_value=*/5), IsAnnotatedSpan(CodepointSpan(23, 24), "number", /*int_value=*/6, /*double_value=*/6), IsAnnotatedSpan(CodepointSpan(27, 28), "number", /*int_value=*/7, /*double_value=*/7))); } TEST_F(NumberAnnotatorTest, SlashSeparatesTwoNumbersClassifyText) { ClassificationResult classification_result; EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("what's 1 + 2/3 * 4"), {11, 12}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "number"); EXPECT_EQ(classification_result.numeric_value, 2); EXPECT_EQ(classification_result.numeric_double_value, 2); EXPECT_EQ(classification_result.score, 1); EXPECT_TRUE(number_annotator_.ClassifyText( UTF8ToUnicodeText("what's 1 + 2/3 * 4"), {13, 14}, AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result)); EXPECT_EQ(classification_result.collection, "number"); EXPECT_EQ(classification_result.numeric_value, 3); EXPECT_EQ(classification_result.numeric_double_value, 3); EXPECT_EQ(classification_result.score, 1); } TEST_F(NumberAnnotatorTest, SlashDoesNotSeparatesTwoNumbersFindAll) { std::vector result; // 2 in the "2/" context is a number because / is punctuation EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("what's 2a2/3 or 2/s4 or 2/ or /3 or //3 or 2//"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre(IsAnnotatedSpan( CodepointSpan(24, 25), "number", /*int_value=*/2, /*double_value=*/2))); } TEST_F(NumberAnnotatorTest, BracketsContextAnnotatedFindAll) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("The interval is: (12, 13) or [-12, -4.5)"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(18, 20), "number", /*int_value=*/12, /*double_value=*/12), IsAnnotatedSpan(CodepointSpan(22, 24), "number", /*int_value=*/13, /*double_value=*/13), IsAnnotatedSpan(CodepointSpan(30, 33), "number", /*int_value=*/-12, /*double_value=*/-12), IsAnnotatedSpan(CodepointSpan(35, 39), "number", /*int_value=*/-4, /*double_value=*/-4.5, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, BracketsContextNotAnnotatedFindAll) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("The interval is: -(12, 138*)"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_TRUE(result.empty()); } TEST_F(NumberAnnotatorTest, FractionalNumberDotsFindAll) { std::vector result; // Dots source: https://unicode-search.net/unicode-namesearch.pl?term=period EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("3.1 3﹒2 3.3"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(0, 3), "number", /*int_value=*/3, /*double_value=*/3.1, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(4, 7), "number", /*int_value=*/3, /*double_value=*/3.2, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(8, 11), "number", /*int_value=*/3, /*double_value=*/3.3, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, NonAsciiDigitsFindAll) { std::vector result; // Dots source: https://unicode-search.net/unicode-namesearch.pl?term=period // Digits source: https://unicode-search.net/unicode-namesearch.pl?term=digit EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("3 3﹒2 3.3%"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(0, 1), "number", /*int_value=*/3, /*double_value=*/3), IsAnnotatedSpan(CodepointSpan(2, 5), "number", /*int_value=*/3, /*double_value=*/3.2, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(6, 9), "number", /*int_value=*/3, /*double_value=*/3.3, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(6, 10), "percentage", /*int_value=*/3, /*double_value=*/3.3, /*priority_score=*/1))); } TEST_F(NumberAnnotatorTest, AnnotatedZeroPrecededNumbersFindAll) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("Numbers: 0.9 or 09 or 09.9 or 032310"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(9, 12), "number", /*int_value=*/0, /*double_value=*/0.9, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(16, 18), "number", /*int_value=*/9, /*double_value=*/9), IsAnnotatedSpan(CodepointSpan(22, 26), "number", /*int_value=*/9, /*double_value=*/9.9, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(30, 36), "number", /*int_value=*/32310, /*double_value=*/32310))); } TEST_F(NumberAnnotatorTest, ZeroAfterDotFindAll) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("15.0 16.00"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(0, 4), "number", /*int_value=*/15, /*double_value=*/15), IsAnnotatedSpan(CodepointSpan(5, 10), "number", /*int_value=*/16, /*double_value=*/16))); } TEST_F(NumberAnnotatorTest, NineDotNineFindAll) { std::vector result; EXPECT_TRUE(number_annotator_.FindAll( UTF8ToUnicodeText("9.9 9.99 99.99 99.999 99.9999"), AnnotationUsecase_ANNOTATION_USECASE_RAW, &result)); EXPECT_THAT(result, UnorderedElementsAre( IsAnnotatedSpan(CodepointSpan(0, 3), "number", /*int_value=*/9, /*double_value=*/9.9, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(4, 8), "number", /*int_value=*/9, /*double_value=*/9.99, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(9, 14), "number", /*int_value=*/99, /*double_value=*/99.99, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(15, 21), "number", /*int_value=*/99, /*double_value=*/99.999, /*priority_score=*/1), IsAnnotatedSpan(CodepointSpan(22, 29), "number", /*int_value=*/99, /*double_value=*/99.9999, /*priority_score=*/1))); } } // namespace test_internal } // namespace libtextclassifier3