/**
 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "keywordsUtil.h"

#include <gen/keywords.h>
#include <lexer/lexer.h>
#include <unicode/uchar.h>
#include <util/enumbitops.h>

namespace panda::es2panda::lexer {

enum class AsciiFlags : uint8_t {
    NONE = 0,
    ID_START = 1 << 0,
    ID_CONTINUE = 1 << 1,
};

constexpr AsciiFlags operator|(AsciiFlags a, AsciiFlags b)
{
    using utype = std::underlying_type_t<AsciiFlags>;
    return static_cast<AsciiFlags>(static_cast<utype>(a) | static_cast<utype>(b));
}

inline std::underlying_type_t<AsciiFlags> operator&(AsciiFlags a, AsciiFlags b)
{
    using utype = std::underlying_type_t<AsciiFlags>;
    /* NOLINTNEXTLINE(hicpp-signed-bitwise) */
    return static_cast<utype>(static_cast<utype>(a) & static_cast<utype>(b));
}

constexpr std::array<AsciiFlags, 128> ASCII_FLAGS = {{
    AsciiFlags::NONE,                               /* NUL */
    AsciiFlags::NONE,                               /* SOH */
    AsciiFlags::NONE,                               /* STX */
    AsciiFlags::NONE,                               /* ETX */
    AsciiFlags::NONE,                               /* EOT */
    AsciiFlags::NONE,                               /* ENQ */
    AsciiFlags::NONE,                               /* ACK */
    AsciiFlags::NONE,                               /* BEL */
    AsciiFlags::NONE,                               /* BS */
    AsciiFlags::NONE,                               /* TAB */
    AsciiFlags::NONE,                               /* LF */
    AsciiFlags::NONE,                               /* VT */
    AsciiFlags::NONE,                               /* FF */
    AsciiFlags::NONE,                               /* CR */
    AsciiFlags::NONE,                               /* SO */
    AsciiFlags::NONE,                               /* SI */
    AsciiFlags::NONE,                               /* DLE */
    AsciiFlags::NONE,                               /* DC1 */
    AsciiFlags::NONE,                               /* DC2 */
    AsciiFlags::NONE,                               /* DC3 */
    AsciiFlags::NONE,                               /* DC4 */
    AsciiFlags::NONE,                               /* NAK */
    AsciiFlags::NONE,                               /* SYN */
    AsciiFlags::NONE,                               /* ETB */
    AsciiFlags::NONE,                               /* CAN */
    AsciiFlags::NONE,                               /* EM */
    AsciiFlags::NONE,                               /* SUB */
    AsciiFlags::NONE,                               /* ESC */
    AsciiFlags::NONE,                               /* FS */
    AsciiFlags::NONE,                               /* GS */
    AsciiFlags::NONE,                               /* RS */
    AsciiFlags::NONE,                               /* US */
    AsciiFlags::NONE,                               /* Space */
    AsciiFlags::NONE,                               /* ! */
    AsciiFlags::NONE,                               /* " */
    AsciiFlags::NONE,                               /* # */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* $ */
    AsciiFlags::NONE,                               /* % */
    AsciiFlags::NONE,                               /* & */
    AsciiFlags::NONE,                               /* ' */
    AsciiFlags::NONE,                               /* ( */
    AsciiFlags::NONE,                               /* ) */
    AsciiFlags::NONE,                               /* * */
    AsciiFlags::NONE,                               /* + */
    AsciiFlags::NONE,                               /* , */
    AsciiFlags::NONE,                               /* - */
    AsciiFlags::NONE,                               /* . */
    AsciiFlags::NONE,                               /* / */
    AsciiFlags::ID_CONTINUE,                        /* 0 */
    AsciiFlags::ID_CONTINUE,                        /* 1 */
    AsciiFlags::ID_CONTINUE,                        /* 2 */
    AsciiFlags::ID_CONTINUE,                        /* 3 */
    AsciiFlags::ID_CONTINUE,                        /* 4 */
    AsciiFlags::ID_CONTINUE,                        /* 5 */
    AsciiFlags::ID_CONTINUE,                        /* 6 */
    AsciiFlags::ID_CONTINUE,                        /* 7 */
    AsciiFlags::ID_CONTINUE,                        /* 8 */
    AsciiFlags::ID_CONTINUE,                        /* 9 */
    AsciiFlags::NONE,                               /* : */
    AsciiFlags::NONE,                               /* ; */
    AsciiFlags::NONE,                               /* < */
    AsciiFlags::NONE,                               /* = */
    AsciiFlags::NONE,                               /* > */
    AsciiFlags::NONE,                               /* ? */
    AsciiFlags::NONE,                               /* @ */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* A */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* B */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* C */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* D */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* E */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* F */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* G */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* H */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* I */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* J */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* K */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* L */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* M */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* N */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* O */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* P */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* Q */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* R */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* S */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* T */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* U */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* V */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* W */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* X */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* Y */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* Z */
    AsciiFlags::NONE,                               /* [ */
    AsciiFlags::NONE,                               /* \ */
    AsciiFlags::NONE,                               /* ] */
    AsciiFlags::NONE,                               /* ^ */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* _ */
    AsciiFlags::NONE,                               /* ` */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* a */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* b */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* c */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* d */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* e */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* f */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* g */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* h */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* i */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* j */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* k */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* l */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* m */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* n */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* o */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* p */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* q */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* r */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* s */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* t */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* u */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* v */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* w */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* x */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* y */
    AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE, /* z */
    AsciiFlags::NONE,                               /* { */
    AsciiFlags::NONE,                               /* | */
    AsciiFlags::NONE,                               /* } */
    AsciiFlags::NONE,                               /* ~ */
    AsciiFlags::NONE                                /* DEL */
}};

bool KeywordsUtil::IsIdentifierStart(char32_t cp)
{
    if (cp < LEX_ASCII_MAX_BITS) {
        return (ASCII_FLAGS[cp] & AsciiFlags::ID_START) != 0;
    }
    // Unicode {xxxxx} may consist of 4 bytes information and cannot be forcibly converted to 2 bytes
    auto uchar = static_cast<UChar32>(cp);
    return u_hasBinaryProperty(uchar, UCHAR_ID_START);
}

bool KeywordsUtil::IsIdentifierPart(char32_t cp)
{
    if (cp < LEX_ASCII_MAX_BITS) {
        return (ASCII_FLAGS[cp] & AsciiFlags::ID_CONTINUE) != 0;
    }

    /**
     * u_isIDPart or Other_ID_Continue characters or ZWJ/ZWNJ.
     * Unicode {xxxxx} may consist of 4 bytes information and cannot be forcibly converted to 2 bytes
     */
    auto uchar = static_cast<UChar32>(cp);
    return (u_hasBinaryProperty(uchar, UCHAR_ID_CONTINUE) || cp == LEX_CHAR_ZWNJ || cp == LEX_CHAR_ZWJ);
}

void KeywordsUtil::ScanIdentifierStart(char32_t cp)
{
    if (!KeywordsUtil::IsIdentifierStart(cp)) {
        lexer_->ThrowError("Expected an identifier");
    }

    cp_ = cp;
    const auto map = KeywordsMap::Map(cp);
    ScanIdContinueMaybeKeyword(map);
}

void KeywordsUtil::ScanIdContinue()
{
    util::UString ident(lexer_->Allocator());
    size_t startPos = lexer_->GetToken().Start().index;

    if (HasEscape()) {
        ident.Append(cp_);
        startPos = Iterator().Index();
    }

    auto escapeEnd = startPos;

    do {
        if (Iterator().Peek() == LEX_CHAR_BACKSLASH) {
            ident.Append(lexer_->SourceView(escapeEnd, Iterator().Index()));

            auto cp = ScanUnicodeEscapeSequence();
            if (!IsIdentifierPart(cp)) {
                lexer_->ThrowError("Invalid identifier part");
            }

            escapeEnd = Iterator().Index();
            ident.Append(cp);
            continue;
        }

        size_t cpSize {};
        auto cp = Iterator().PeekCp(&cpSize);
        if (!IsIdentifierPart(cp)) {
            break;
        }

        Iterator().Forward(cpSize);
    } while (true);

    lexer_->GetToken().type_ = TokenType::LITERAL_IDENT;
    lexer_->GetToken().keywordType_ = TokenType::EOS;

    if (HasEscape()) {
        ident.Append(lexer_->SourceView(escapeEnd, Iterator().Index()));
        lexer_->GetToken().src_ = ident.View();
    } else {
        lexer_->GetToken().src_ = lexer_->SourceView(startPos, Iterator().Index());
    }
}

void KeywordsUtil::ScanIdContinueMaybeKeyword(Span<const KeywordString> map)
{
    ScanIdContinue();

    if (!HasEscape() || map.empty()) {
        return;
    }

    const auto &str = lexer_->GetToken().Ident().Utf8();

    int start = 0;
    int end = static_cast<int>(map.size());
    int middle = end / 2;

    while (true) {
        const auto &kws = map[middle];

        int relation = str.compare(kws.str);
        if (relation == 0) {
            Keywords::SetKeyword(this, kws);
        }

        if (relation > 0) {
            start = middle + 1;
        } else {
            end = middle;
        }

        middle = (start + end) / 2;

        if (start >= end) {
            return;
        }
    }
}

char32_t KeywordsUtil::ScanUnicodeEscapeSequence()
{
    ASSERT(Iterator().Peek() == LEX_CHAR_BACKSLASH);

    lexer_->GetToken().flags_ |= lexer::TokenFlags::HAS_ESCAPE;

    Iterator().Forward(1);

    if (Iterator().Peek() != LEX_CHAR_LOWERCASE_U) {
        return util::StringView::Iterator::INVALID_CP;
    }

    return lexer_->ScanUnicodeEscapeSequence();
}

}  // namespace panda::es2panda::lexer