• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "content/renderer/android/email_detector.h"
6 
7 #include "base/logging.h"
8 #include "base/memory/scoped_ptr.h"
9 #include "base/strings/utf_string_conversions.h"
10 #include "content/public/renderer/android_content_detection_prefixes.h"
11 #include "net/base/escape.h"
12 #include "third_party/icu/source/i18n/unicode/regex.h"
13 
14 namespace {
15 
16 // Maximum length of an email address.
17 const size_t kMaximumEmailLength = 254;
18 
19 // Regex to match email addresses.
20 // This is more specific than RFC 2822 (uncommon special characters are
21 // disallowed) in order to avoid false positives.
22 // Delimiters are word boundaries to allow punctuation, quote marks etc. around
23 // the address.
24 const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b";
25 
26 }  // anonymous namespace
27 
28 namespace content {
29 
EmailDetector()30 EmailDetector::EmailDetector() {
31 }
32 
GetMaximumContentLength()33 size_t EmailDetector::GetMaximumContentLength() {
34   return kMaximumEmailLength;
35 }
36 
GetIntentURL(const std::string & content_text)37 GURL EmailDetector::GetIntentURL(const std::string& content_text) {
38   if (content_text.empty())
39     return GURL();
40 
41   return GURL(kEmailPrefix +
42       net::EscapeQueryParamValue(content_text, true));
43 }
44 
FindContent(const base::string16::const_iterator & begin,const base::string16::const_iterator & end,size_t * start_pos,size_t * end_pos,std::string * content_text)45 bool EmailDetector::FindContent(const base::string16::const_iterator& begin,
46                                 const base::string16::const_iterator& end,
47                                 size_t* start_pos,
48                                 size_t* end_pos,
49                                 std::string* content_text) {
50   base::string16 utf16_input = base::string16(begin, end);
51   icu::UnicodeString pattern(kEmailRegex);
52   icu::UnicodeString input(utf16_input.data(), utf16_input.length());
53   UErrorCode status = U_ZERO_ERROR;
54   scoped_ptr<icu::RegexMatcher> matcher(
55       new icu::RegexMatcher(pattern,
56                             input,
57                             UREGEX_CASE_INSENSITIVE,
58                             status));
59   if (matcher->find()) {
60     *start_pos = matcher->start(status);
61     DCHECK(U_SUCCESS(status));
62     *end_pos = matcher->end(status);
63     DCHECK(U_SUCCESS(status));
64     icu::UnicodeString content_ustr(matcher->group(status));
65     DCHECK(U_SUCCESS(status));
66     base::UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(),
67         content_text);
68     return true;
69   }
70 
71   return false;
72 }
73 
74 }  // namespace content
75