1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "utils/regex-match.h"
18
19 #include <memory>
20
21 #include "utils/jvm-test-utils.h"
22 #include "utils/utf8/unicodetext.h"
23 #include "utils/utf8/unilib.h"
24 #include "gmock/gmock.h"
25 #include "gtest/gtest.h"
26
27 namespace libtextclassifier3 {
28 namespace {
29
30 class RegexMatchTest : public testing::Test {
31 protected:
RegexMatchTest()32 RegexMatchTest() : unilib_(libtextclassifier3::CreateUniLibForTesting()) {}
33 std::unique_ptr<UniLib> unilib_;
34 };
35
36 #ifndef TC3_DISABLE_LUA
TEST_F(RegexMatchTest,HandlesSimpleVerification)37 TEST_F(RegexMatchTest, HandlesSimpleVerification) {
38 EXPECT_TRUE(VerifyMatch(/*context=*/"", /*matcher=*/nullptr, "return true;"));
39 }
40 #endif // TC3_DISABLE_LUA
41
42 #ifndef TC3_DISABLE_LUA
TEST_F(RegexMatchTest,HandlesCustomVerification)43 TEST_F(RegexMatchTest, HandlesCustomVerification) {
44 UnicodeText pattern = UTF8ToUnicodeText("(\\d{16})",
45 /*do_copy=*/true);
46 UnicodeText message = UTF8ToUnicodeText("cc: 4012888888881881",
47 /*do_copy=*/true);
48 const std::string verifier = R"(
49 function luhn(candidate)
50 local sum = 0
51 local num_digits = string.len(candidate)
52 local parity = num_digits % 2
53 for pos = 1,num_digits do
54 d = tonumber(string.sub(candidate, pos, pos))
55 if pos % 2 ~= parity then
56 d = d * 2
57 end
58 if d > 9 then
59 d = d - 9
60 end
61 sum = sum + d
62 end
63 return (sum % 10) == 0
64 end
65 return luhn(match[1].text);
66 )";
67 const std::unique_ptr<UniLib::RegexPattern> regex_pattern =
68 unilib_->CreateRegexPattern(pattern);
69 ASSERT_TRUE(regex_pattern != nullptr);
70 const std::unique_ptr<UniLib::RegexMatcher> matcher =
71 regex_pattern->Matcher(message);
72 ASSERT_TRUE(matcher != nullptr);
73 int status = UniLib::RegexMatcher::kNoError;
74 ASSERT_TRUE(matcher->Find(&status) &&
75 status == UniLib::RegexMatcher::kNoError);
76
77 EXPECT_TRUE(VerifyMatch(message.ToUTF8String(), matcher.get(), verifier));
78 }
79 #endif // TC3_DISABLE_LUA
80
TEST_F(RegexMatchTest,RetrievesMatchGroupTest)81 TEST_F(RegexMatchTest, RetrievesMatchGroupTest) {
82 UnicodeText pattern =
83 UTF8ToUnicodeText("never gonna (?:give (you) up|let (you) down)",
84 /*do_copy=*/true);
85 const std::unique_ptr<UniLib::RegexPattern> regex_pattern =
86 unilib_->CreateRegexPattern(pattern);
87 ASSERT_TRUE(regex_pattern != nullptr);
88 UnicodeText message =
89 UTF8ToUnicodeText("never gonna give you up - never gonna let you down");
90 const std::unique_ptr<UniLib::RegexMatcher> matcher =
91 regex_pattern->Matcher(message);
92 ASSERT_TRUE(matcher != nullptr);
93 int status = UniLib::RegexMatcher::kNoError;
94
95 ASSERT_TRUE(matcher->Find(&status) &&
96 status == UniLib::RegexMatcher::kNoError);
97 EXPECT_THAT(GetCapturingGroupText(matcher.get(), 0).value(),
98 testing::Eq("never gonna give you up"));
99 EXPECT_THAT(GetCapturingGroupText(matcher.get(), 1).value(),
100 testing::Eq("you"));
101 EXPECT_FALSE(GetCapturingGroupText(matcher.get(), 2).has_value());
102
103 ASSERT_TRUE(matcher->Find(&status) &&
104 status == UniLib::RegexMatcher::kNoError);
105 EXPECT_THAT(GetCapturingGroupText(matcher.get(), 0).value(),
106 testing::Eq("never gonna let you down"));
107 EXPECT_FALSE(GetCapturingGroupText(matcher.get(), 1).has_value());
108 EXPECT_THAT(GetCapturingGroupText(matcher.get(), 2).value(),
109 testing::Eq("you"));
110 }
111
112 } // namespace
113 } // namespace libtextclassifier3
114