1 /*
2 * Copyright (C) 2005, 2007, 2010 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "break_lines.h"
28
29 #include "CharacterNames.h"
30 #include "TextBreakIterator.h"
31
32 #if PLATFORM(MAC)
33 #include <CoreServices/CoreServices.h>
34 #endif
35
36 namespace WebCore {
37
isBreakableSpace(UChar ch,bool treatNoBreakSpaceAsBreak)38 static inline bool isBreakableSpace(UChar ch, bool treatNoBreakSpaceAsBreak)
39 {
40 switch (ch) {
41 case ' ':
42 case '\n':
43 case '\t':
44 return true;
45 case noBreakSpace:
46 return treatNoBreakSpaceAsBreak;
47 default:
48 return false;
49 }
50 }
51
52 // This differs from the Unicode algorithm only in that Unicode does not break
53 // between a question mark and a vertical line (U+007C).
54 static const unsigned char internetExplorerLineBreaksAfterQuestionMarkTable[0x80] = {
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, // \t
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, // ! " ' ) , . /
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, // : ; ?
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, // ]
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1 // }
63 };
64
65 static const size_t internetExplorerLineBreaksAfterQuestionMarkTableSize = sizeof(internetExplorerLineBreaksAfterQuestionMarkTable) / sizeof(*internetExplorerLineBreaksAfterQuestionMarkTable);
66
shouldBreakAfter(UChar ch,UChar nextCh)67 static inline bool shouldBreakAfter(UChar ch, UChar nextCh)
68 {
69 switch (ch) {
70 // For a question mark preceding a non-ASCII characters, defer to the Unicode algorithm by returning false.
71 // For ASCII characters, use a lookup table for enhanced speed and for compatibility with Internet Explorer.
72 case '?':
73 return nextCh < internetExplorerLineBreaksAfterQuestionMarkTableSize && internetExplorerLineBreaksAfterQuestionMarkTable[nextCh];
74 // Internet Explorer always allows breaking after a hyphen.
75 case '-':
76 case softHyphen:
77 // FIXME: cases for ideographicComma and ideographicFullStop are a workaround for an issue in Unicode 5.0
78 // which is likely to be resolved in Unicode 5.1 <http://bugs.webkit.org/show_bug.cgi?id=17411>.
79 // We may want to remove or conditionalize this workaround at some point.
80 case ideographicComma:
81 case ideographicFullStop:
82 #ifdef ANDROID_LAYOUT
83 // as '/' is used in uri which is always long, we would like to break it
84 case '/':
85 #endif
86 return true;
87 default:
88 return false;
89 }
90 }
91
needsLineBreakIterator(UChar ch)92 static inline bool needsLineBreakIterator(UChar ch)
93 {
94 return ch > 0x7F && ch != noBreakSpace;
95 }
96
97 #if PLATFORM(MAC) && defined(BUILDING_ON_TIGER)
lineBreakLocator()98 static inline TextBreakLocatorRef lineBreakLocator()
99 {
100 TextBreakLocatorRef locator = 0;
101 UCCreateTextBreakLocator(0, 0, kUCTextBreakLineMask, &locator);
102 return locator;
103 }
104 #endif
105
nextBreakablePosition(const UChar * str,int pos,int len,bool treatNoBreakSpaceAsBreak)106 int nextBreakablePosition(const UChar* str, int pos, int len, bool treatNoBreakSpaceAsBreak)
107 {
108 #if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER)
109 TextBreakIterator* breakIterator = 0;
110 #endif
111 int nextBreak = -1;
112
113 UChar lastCh = pos > 0 ? str[pos - 1] : 0;
114 for (int i = pos; i < len; i++) {
115 UChar ch = str[i];
116
117 if (isBreakableSpace(ch, treatNoBreakSpaceAsBreak) || shouldBreakAfter(lastCh, ch))
118 return i;
119
120 if (needsLineBreakIterator(ch) || needsLineBreakIterator(lastCh)) {
121 if (nextBreak < i && i) {
122 #if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER)
123 if (!breakIterator)
124 breakIterator = lineBreakIterator(str, len);
125 if (breakIterator)
126 nextBreak = textBreakFollowing(breakIterator, i - 1);
127 #else
128 static TextBreakLocatorRef breakLocator = lineBreakLocator();
129 if (breakLocator) {
130 UniCharArrayOffset nextUCBreak;
131 if (UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, str, len, i, &nextUCBreak) == 0)
132 nextBreak = nextUCBreak;
133 }
134 #endif
135 }
136 if (i == nextBreak && !isBreakableSpace(lastCh, treatNoBreakSpaceAsBreak))
137 return i;
138 }
139
140 lastCh = ch;
141 }
142
143 return len;
144 }
145
146 } // namespace WebCore
147