• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//*******************************************************************************
2// Copyright (C) 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html
4//*
5//*   Copyright (C) 2002-2010, International Business Machines
6//*   Corporation and others.  All Rights Reserved.
7//*
8//*******************************************************************************
9
10casing:table(nofallback) {
11    Info {
12        Description { "This is test data file for string casing." }
13
14        LongDescription {
15            "each item is an array with\n"
16            "input string, result string, locale ID[, break iterator]\n"
17            "the break iterator (only for titlecasing) is specified as an int, same as in UBreakIteratorType:\n"
18            "0=UBRK_CHARACTER  1=UBRK_WORD  2=UBRK_LINE  3=UBRK_SENTENCE  4=UBRK_TITLE  -1=default (NULL=words) -2=no breaks (.*)\n"
19            "options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I  L=U_TITLECASE_NO_LOWERCASE  A=U_TITLECASE_NO_BREAK_ADJUSTMENT\n"
20        }
21    }
22    TestData {
23      lowercasing {
24        Headers { "Input", "Output", "Locale" }
25        Cases {
26            { " tHe QUIcK bRoWn", " the quick brown", "" },
27            { "aBIΣßΣ/����", "abiσßς/����", "" },
28            { "aBIΣßΣ/����", "abıσßς/����", "tur" } // tur: 3-letter code for Turkish
29        }
30      }
31      uppercasing {
32        Headers { "Input", "Output", "Locale" }
33        Cases {
34            { " tHe QUIcK bRoWn", " THE QUICK BROWN", "" },
35            { "aBiσßς/ffi����", "ABIΣSSΣ/FFI����", "" },
36            { "aBiσßς/ffi����", "ABİΣSSΣ/FFI����", "az" } // az same casing as tr
37        }
38      }
39      titlecasing {
40        Headers { "Input", "Output", "Locale", "Type", "Options" }
41        Cases {
42            { "ʻaMeLikA huI Pū ʻʻʻiA", "ʻAmelika Hui Pū ʻʻʻIa", "", "-1", "" }, // titlecase first _cased_ letter, j4933
43            { " tHe QUIcK bRoWn", " The Quick Brown", "", "4", "" },
44            { "DŽDždžLJLjljNJNjnj", "DžDžDžLjLjLjNjNjNj", "", "0", "" }, // UBRK_CHARACTER
45            { "ljubav ljubav", "Ljubav Ljubav", "", "-1", "" }, // Lj vs. L+j
46            { "ijssel igloo IJMUIDEN", "Ijssel Igloo Ijmuiden", "", "1", "" }, // Dutch titlecasing default
47            { "ijssel igloo IJMUIDEN", "IJssel Igloo IJmuiden", "nl", "1", "" }, // Dutch titlecasing
48            { "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'", "'Oh Don't Titlecase After Letter+'", "", "-1", "" },
49
50            { "a ʻCaT. A ʻdOg! ʻeTc.", "A ʻCat. A ʻDog! ʻEtc.", "", "-1", "" }, // default
51            { "a ʻCaT. A ʻdOg! ʻeTc.", "A ʻcat. A ʻdog! ʻetc.", "", "-1", "A" }, // U_TITLECASE_NO_BREAK_ADJUSTMENT
52            { "a ʻCaT. A ʻdOg! ʻeTc.", "A ʻCaT. A ʻdOg! ʻETc.", "", "3", "L" }, // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE
53
54            { "ʻcAt! ʻeTc.", "ʻCat! ʻetc.", "", "-2", "" }, // -2=Trivial break iterator
55            { "ʻcAt! ʻeTc.", "ʻcat! ʻetc.", "", "-2", "A" }, // U_TITLECASE_NO_BREAK_ADJUSTMENT
56            { "ʻcAt! ʻeTc.", "ʻCAt! ʻeTc.", "", "-2", "L" }, // U_TITLECASE_NO_LOWERCASE
57            { "ʻcAt! ʻeTc.", "ʻcAt! ʻeTc.", "", "-2", "AL" }, // Both options
58
59            // Test case for ticket #7251: UCharacter.toTitleCase() throws OutOfMemoryError
60            // when TITLECASE_NO_LOWERCASE encounters a single-letter word
61            { "a b c", "A B C", "", "1", "L" } // U_TITLECASE_NO_LOWERCASE
62        }
63      }
64      casefolding {
65        Headers { "Input", "Output", "Options" }
66        Cases {
67            { "aBİIıϐßffi��", "abi̇iıβssffi��", "" },
68            { "aBİIıϐßffi��", "abiııβssffi��", "T" } // U_FOLD_CASE_EXCLUDE_SPECIAL_I
69        }
70      }
71    }
72}
73