Lines Matching full:token
94 std::vector<Token> Tokenize(const std::string& utf8_text) const { in Tokenize()
178 std::vector<Token> tokens = tokenizer.Tokenize("Hello world!"); in TEST()
181 ElementsAreArray({Token("Hello", 0, 5), Token("world!", 6, 12)})); in TEST()
214 std::vector<Token>({Token("앨라배마", 0, 4), Token("주", 5, 6), in TEST()
215 Token("전화", 7, 10), Token("(123)", 10, 15), in TEST()
216 Token("456-789", 16, 23), in TEST()
217 Token("웹사이트", 23, 28)})); in TEST()
349 std::vector<Token> tokens; in TEST()
359 ElementsAreArray({Token("問", 0, 1), in TEST()
360 Token("少", 1, 2), in TEST()
361 Token("目", 2, 3), in TEST()
362 Token("hello", 4, 9), in TEST()
363 Token("木", 10, 11), in TEST()
364 Token("輸", 11, 12), in TEST()
365 Token("ย", 12, 13), in TEST()
366 Token("า", 13, 14), in TEST()
367 Token("ม", 14, 15), in TEST()
368 Token("き", 15, 16), in TEST()
369 Token("ゃ", 16, 17)})); in TEST()
379 std::vector<Token> tokens = tokenizer.Tokenize("พระบาท สมเด็จ พระ ปร มิ"); in TEST()
382 std::vector<Token>({Token("พระบาท", 0, 6), in TEST()
383 Token(" ", 6, 7), in TEST()
384 Token("สมเด็จ", 7, 13), in TEST()
385 Token(" ", 13, 14), in TEST()
386 Token("พระ", 14, 17), in TEST()
387 Token(" ", 17, 18), in TEST()
388 Token("ปร", 18, 20), in TEST()
389 Token(" ", 20, 21), in TEST()
390 Token("มิ", 21, 23)})); in TEST()
399 std::vector<Token> tokens = in TEST()
404 std::vector<Token>({Token("The", 0, 3), in TEST()
405 Token(" ", 3, 4), in TEST()
406 Token("interval", 4, 12), in TEST()
407 Token(" ", 12, 13), in TEST()
408 Token("is", 13, 15), in TEST()
409 Token(":", 15, 16), in TEST()
410 Token(" ", 16, 17), in TEST()
411 Token("-", 17, 18), in TEST()
412 Token("(", 18, 19), in TEST()
413 Token("12", 19, 21), in TEST()
414 Token(",", 21, 22), in TEST()
415 Token(" ", 22, 23), in TEST()
416 Token("138", 23, 26), in TEST()
417 Token("*", 26, 27), in TEST()
418 Token(")", 27, 28)})); in TEST()
427 std::vector<Token> tokens = tokenizer.Tokenize("3.1 3﹒2 3.3"); in TEST()
430 std::vector<Token>({Token("3.1", 0, 3), in TEST()
431 Token(" ", 3, 4), in TEST()
432 Token("3﹒2", 4, 7), in TEST()
433 Token(" ", 7, 8), in TEST()
434 Token("3.3", 8, 11)})); in TEST()
445 std::vector<Token> tokens = tokenizer.Tokenize("พระบาทสมเด็จพระปรมิ"); in TEST()
448 std::vector<Token>({Token("พระบาท", 0, 6), in TEST()
449 Token("สมเด็จ", 6, 12), in TEST()
450 Token("พระ", 12, 15), in TEST()
451 Token("ปร", 15, 17), in TEST()
452 Token("มิ", 17, 19)})); in TEST()
495 std::vector<Token> tokens = tokenizer.Tokenize( in TEST()
500 std::vector<Token>({Token("こんにちは", 0, 5), in TEST()
501 Token("Japanese-ląnguagę", 5, 22), in TEST()
502 Token("text", 23, 27), in TEST()
503 Token("你好", 28, 30), in TEST()
504 Token("世界", 30, 32), in TEST()
505 Token("http://www.google.com/", 33, 55)})); in TEST()
527 std::vector<Token>({Token("앨라배마123웹사이트", 0, 11)})); in TEST()
537 std::vector<Token>({Token("앨라배마", 0, 4), Token("123", 4, 7), in TEST()
538 Token("웹사이트", 7, 11)})); in TEST()
548 std::vector<Token> tokens = tokenizer.Tokenize("7% -3.14 68.9#? 7% $99 .18."); in TEST()
550 std::vector<Token>( in TEST()
551 {Token("7", 0, 1), Token("%", 1, 2), Token(" ", 2, 3), in TEST()
552 Token("-", 3, 4), Token("3.14", 4, 8), Token(" ", 8, 9), in TEST()
553 Token("68.9", 9, 13), Token("#", 13, 14), Token("?", 14, 15), in TEST()
554 Token(" ", 15, 16), Token("7", 16, 17), Token("%", 17, 18), in TEST()
555 Token(" ", 18, 19), Token("$", 19, 20), Token("99", 20, 22), in TEST()
556 Token(" ", 22, 23), Token(".", 23, 24), Token("18", 24, 26), in TEST()
557 Token(".", 26, 27)})); in TEST()
565 std::vector<Token> tokens = tokenizer.Tokenize("2 pércént 3パーセント"); in TEST()
566 ASSERT_EQ(tokens, std::vector<Token>({Token("2", 0, 1), Token(" ", 1, 2), in TEST()
567 Token("pércént", 2, 9), in TEST()
568 Token(" ", 9, 10), Token("3", 10, 11), in TEST()
569 Token("パーセント", 11, 16)})); in TEST()
577 std::vector<Token> tokens = tokenizer.Tokenize("3 3﹒2 3.3%"); in TEST()
579 std::vector<Token>({Token("3", 0, 1), Token(" ", 1, 2), in TEST()
580 Token("3﹒2", 2, 5), Token(" ", 5, 6), in TEST()
581 Token("3.3", 6, 9), Token("%", 9, 10)})); in TEST()
589 std::vector<Token> tokens = tokenizer.Tokenize("15.12.2019 january's 3.2"); in TEST()
591 std::vector<Token>( in TEST()
592 {Token("15", 0, 2), Token(".", 2, 3), Token("12", 3, 5), in TEST()
593 Token(".", 5, 6), Token("2019", 6, 10), Token(" ", 10, 11), in TEST()
594 Token("january", 11, 18), Token("'", 18, 19), in TEST()
595 Token("s", 19, 20), Token(" ", 20, 21), Token("3", 21, 22), in TEST()
596 Token(".", 22, 23), Token("2", 23, 24)})); in TEST()
604 std::vector<Token> tokens = tokenizer.Tokenize("The+2345++the +íí+"); in TEST()
606 std::vector<Token>({Token("The", 0, 3), Token("+", 3, 4), in TEST()
607 Token("2345", 4, 8), Token("+", 8, 9), in TEST()
608 Token("+", 9, 10), Token("the", 10, 13), in TEST()
609 Token(" ", 13, 14), Token("+", 14, 15), in TEST()
610 Token("íí", 15, 17), Token("+", 17, 18)})); in TEST()
618 std::vector<Token> tokens = tokenizer.Tokenize("2 3 4 5"); in TEST()
619 ASSERT_EQ(tokens, std::vector<Token>({Token("2", 0, 1), Token(" ", 1, 2), in TEST()
620 Token("3", 2, 3), Token(" ", 3, 5), in TEST()
621 Token("4", 5, 6), Token(" ", 6, 9), in TEST()
622 Token("5", 9, 10)})); in TEST()