/*
 * Copyright (C) 2011 Google Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1.  Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 * 2.  Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"

#undef WEBKIT_IMPLEMENTATION
#undef LOG

#include "base/utf_string_conversions.h"
#include "net/base/escape.h"
#include "PhoneEmailDetector.h"
#include "Settings.h"
#include "WebString.h"

#define LOG_TAG "PhoneNumberDetector"
#include <cutils/log.h>

#define PHONE_PATTERN "(200) /-.\\ 100 -. 0000"

static const char kTelSchemaPrefix[] = "tel:";
static const char kEmailSchemaPrefix[] = "mailto:";

void FindReset(FindState* state);
void FindResetNumber(FindState* state);
FoundState FindPartialNumber(const UChar* chars, unsigned length,
                             FindState* s);
struct FindState;

static FoundState FindPartialEMail(const UChar* , unsigned length, FindState* );
static bool IsDomainChar(UChar ch);
static bool IsMailboxChar(UChar ch);

PhoneEmailDetector::PhoneEmailDetector()
    : m_foundResult(FOUND_NONE)
{
}

bool PhoneEmailDetector::IsEnabled(const WebKit::WebHitTestInfo& hit_test)
{
    WebCore::Settings* settings = GetSettings(hit_test);
    if (!settings)
        return false;
    m_isPhoneDetectionEnabled = settings->formatDetectionTelephone();
    m_isEmailDetectionEnabled = settings->formatDetectionEmail();
    return m_isEmailDetectionEnabled || m_isPhoneDetectionEnabled;
}

bool PhoneEmailDetector::FindContent(const string16::const_iterator& begin,
                             const string16::const_iterator& end,
                             size_t* start_pos,
                             size_t* end_pos)
{
    FindReset(&m_findState);
    m_foundResult = FOUND_NONE;
    if (m_isPhoneDetectionEnabled)
        m_foundResult = FindPartialNumber(begin, end - begin, &m_findState);
    if (m_foundResult == FOUND_COMPLETE)
        m_prefix = kTelSchemaPrefix;
    else {
        FindReset(&m_findState);
        if (m_isEmailDetectionEnabled)
            m_foundResult = FindPartialEMail(begin, end - begin, &m_findState);
        m_prefix = kEmailSchemaPrefix;
    }
    *start_pos = m_findState.mStartResult;
    *end_pos = m_findState.mEndResult;
    return m_foundResult == FOUND_COMPLETE;
}

std::string PhoneEmailDetector::GetContentText(const WebKit::WebRange& range)
{
    if (m_foundResult == FOUND_COMPLETE) {
        if (m_prefix == kTelSchemaPrefix)
            return UTF16ToUTF8(m_findState.mStore);
        else
            return UTF16ToUTF8(range.toPlainText());
    }
    return std::string();
}

GURL PhoneEmailDetector::GetIntentURL(const std::string& content_text)
{
    return GURL(m_prefix +
            EscapeQueryParamValue(content_text, true));
}

void FindReset(FindState* state)
{
    memset(state, 0, sizeof(FindState));
    state->mCurrent = ' ';
    FindResetNumber(state);
}

void FindResetNumber(FindState* state)
{
    state->mOpenParen = false;
    state->mPattern = (char*) PHONE_PATTERN;
    state->mStorePtr = state->mStore;
}

FoundState FindPartialNumber(const UChar* chars, unsigned length,
    FindState* s)
{
    char* pattern = s->mPattern;
    UChar* store = s->mStorePtr;
    const UChar* start = chars;
    const UChar* end = chars + length;
    const UChar* lastDigit = 0;
    string16 search16(chars, length);
    std::string searchSpace = UTF16ToUTF8(search16);
    do {
        bool initialized = s->mInitialized;
        while (chars < end) {
            if (initialized == false) {
                s->mBackTwo = s->mBackOne;
                s->mBackOne = s->mCurrent;
            }
            UChar ch = s->mCurrent = *chars;
            do {
                char patternChar = *pattern;
                switch (patternChar) {
                    case '2':
                        if (initialized == false) {
                            s->mStartResult = chars - start;
                            initialized = true;
                        }
                    case '0':
                    case '1':
                        if (ch < patternChar || ch > '9')
                            goto resetPattern;
                        *store++ = ch;
                        pattern++;
                        lastDigit = chars;
                        goto nextChar;
                    case '\0':
                        if (WTF::isASCIIDigit(ch) == false) {
                            *store = '\0';
                            goto checkMatch;
                        }
                        goto resetPattern;
                    case ' ':
                        if (ch == patternChar)
                            goto nextChar;
                        break;
                    case '(':
                        if (ch == patternChar) {
                            s->mStartResult = chars - start;
                            initialized = true;
                            s->mOpenParen = true;
                        }
                        goto commonPunctuation;
                    case ')':
                        if ((ch == patternChar) ^ s->mOpenParen)
                            goto resetPattern;
                    default:
                    commonPunctuation:
                        if (ch == patternChar) {
                            pattern++;
                            goto nextChar;
                        }
                }
            } while (++pattern); // never false
    nextChar:
            chars++;
        }
        break;
resetPattern:
        if (s->mContinuationNode)
            return FOUND_NONE;
        FindResetNumber(s);
        pattern = s->mPattern;
        store = s->mStorePtr;
    } while (++chars < end);
checkMatch:
    if (WTF::isASCIIDigit(s->mBackOne != '1' ? s->mBackOne : s->mBackTwo)) {
        return FOUND_NONE;
    }
    *store = '\0';
    s->mStorePtr = store;
    s->mPattern = pattern;
    s->mEndResult = lastDigit - start + 1;
    char pState = pattern[0];
    return pState == '\0' ? FOUND_COMPLETE : pState == '(' || (WTF::isASCIIDigit(pState) && WTF::isASCIIDigit(pattern[-1])) ?
        FOUND_NONE : FOUND_PARTIAL;
}

FoundState FindPartialEMail(const UChar* chars, unsigned length,
    FindState* s)
{
    // the following tables were generated by tests/browser/focusNavigation/BrowserDebug.cpp
    // hand-edit at your own risk
    static const int domainTwoLetter[] = {
        0x02df797c,  // a followed by: [cdefgilmnoqrstuwxz]
        0x036e73fb,  // b followed by: [abdefghijmnorstvwyz]
        0x03b67ded,  // c followed by: [acdfghiklmnorsuvxyz]
        0x02005610,  // d followed by: [ejkmoz]
        0x001e00d4,  // e followed by: [ceghrstu]
        0x00025700,  // f followed by: [ijkmor]
        0x015fb9fb,  // g followed by: [abdefghilmnpqrstuwy]
        0x001a3400,  // h followed by: [kmnrtu]
        0x000f7818,  // i followed by: [delmnoqrst]
        0x0000d010,  // j followed by: [emop]
        0x0342b1d0,  // k followed by: [eghimnprwyz]
        0x013e0507,  // l followed by: [abcikrstuvy]
        0x03fffccd,  // m followed by: [acdghklmnopqrstuvwxyz]
        0x0212c975,  // n followed by: [acefgilopruz]
        0x00001000,  // o followed by: [m]
        0x014e3cf1,  // p followed by: [aefghklmnrstwy]
        0x00000001,  // q followed by: [a]
        0x00504010,  // r followed by: [eouw]
        0x032a7fdf,  // s followed by: [abcdeghijklmnortvyz]
        0x026afeec,  // t followed by: [cdfghjklmnoprtvwz]
        0x03041441,  // u followed by: [agkmsyz]
        0x00102155,  // v followed by: [aceginu]
        0x00040020,  // w followed by: [fs]
        0x00000000,  // x
        0x00180010,  // y followed by: [etu]
        0x00401001,  // z followed by: [amw]
    };

    static char const* const longDomainNames[] = {
        "\x03" "ero" "\x03" "rpa",  // aero, arpa
        "\x02" "iz",  // biz
        "\x02" "at" "\x02" "om" "\x03" "oop",  // cat, com, coop
        NULL,  // d
        "\x02" "du",  // edu
        NULL,  // f
        "\x02" "ov",  // gov
        NULL,  // h
        "\x03" "nfo" "\x02" "nt",  // info, int
        "\x03" "obs",  // jobs
        NULL,  // k
        NULL,  // l
        "\x02" "il" "\x03" "obi" "\x05" "useum",  // mil, mobi, museum
        "\x03" "ame" "\x02" "et",  // name, net
        "\x02" "rg",  // , org
        "\x02" "ro",  // pro
        NULL,  // q
        NULL,  // r
        NULL,  // s
        "\x05" "ravel",  // travel
        NULL,  // u
        NULL,  // v
        NULL,  // w
        NULL,  // x
        NULL,  // y
        NULL,  // z
    };

    const UChar* start = chars;
    const UChar* end = chars + length;
    while (chars < end) {
        UChar ch = *chars++;
        if (ch != '@')
            continue;
        const UChar* atLocation = chars - 1;
        // search for domain
        ch = *chars++ | 0x20; // convert uppercase to lower
        if (ch < 'a' || ch > 'z')
            continue;
        while (chars < end) {
            ch = *chars++;
            if (IsDomainChar(ch) == false)
                goto nextAt;
            if (ch != '.')
                continue;
            UChar firstLetter = *chars++ | 0x20; // first letter of the domain
            if (chars >= end)
                return FOUND_NONE; // only one letter; must be at least two
            firstLetter -= 'a';
            if (firstLetter > 'z' - 'a')
                continue; // non-letter followed '.'
            int secondLetterMask = domainTwoLetter[firstLetter];
            ch = *chars | 0x20; // second letter of the domain
            ch -= 'a';
            if (ch >= 'z' - 'a')
                continue;
            bool secondMatch = (secondLetterMask & 1 << ch) != 0;
            const char* wordMatch = longDomainNames[firstLetter];
            int wordIndex = 0;
            while (wordMatch != NULL) {
                int len = *wordMatch++;
                char match;
                do {
                    match = wordMatch[wordIndex];
                    if (match < 0x20)
                        goto foundDomainStart;
                    if (chars[wordIndex] != match)
                        break;
                    wordIndex++;
                } while (true);
                wordMatch += len;
                if (*wordMatch == '\0')
                    break;
                wordIndex = 0;
            }
            if (secondMatch) {
                wordIndex = 1;
        foundDomainStart:
                chars += wordIndex;
                if (chars < end) {
                    ch = *chars;
                    if (ch != '.') {
                        if (IsDomainChar(ch))
                            goto nextDot;
                    } else if (chars + 1 < end && IsDomainChar(chars[1]))
                        goto nextDot;
                }
                // found domain. Search backwards from '@' for beginning of email address
                s->mEndResult = chars - start;
                chars = atLocation;
                if (chars <= start)
                    goto nextAt;
                ch = *--chars;
                if (ch == '.')
                    goto nextAt; // mailbox can't end in period
                do {
                    if (IsMailboxChar(ch) == false) {
                        chars++;
                        break;
                    }
                    if (chars == start)
                        break;
                    ch = *--chars;
                } while (true);
                UChar firstChar = *chars;
                if (firstChar == '.' || firstChar == '@') // mailbox can't start with period or be empty
                    goto nextAt;
                s->mStartResult = chars - start;
                return FOUND_COMPLETE;
            }
    nextDot:
            ;
        }
nextAt:
        chars = atLocation + 1;
    }
    return FOUND_NONE;
}

bool IsDomainChar(UChar ch)
{
    static const unsigned body[] = {0x03ff6000, 0x07fffffe, 0x07fffffe}; // 0-9 . - A-Z a-z
    ch -= 0x20;
    if (ch > 'z' - 0x20)
        return false;
    return (body[ch >> 5] & 1 << (ch & 0x1f)) != 0;
}

bool IsMailboxChar(UChar ch)
{
    // According to http://en.wikipedia.org/wiki/Email_address
    // ! # $ % & ' * + - . / 0-9 = ?
    // A-Z ^ _
    // ` a-z { | } ~
    static const unsigned body[] = {0xa3ffecfa, 0xc7fffffe, 0x7fffffff};
    ch -= 0x20;
    if (ch > '~' - 0x20)
        return false;
    return (body[ch >> 5] & 1 << (ch & 0x1f)) != 0;
}