• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2005, 2007, 2010 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "config.h"
27 #include "break_lines.h"
28 
29 #include "CharacterNames.h"
30 #include "TextBreakIterator.h"
31 
32 #if PLATFORM(MAC)
33 #include <CoreServices/CoreServices.h>
34 #endif
35 
36 namespace WebCore {
37 
isBreakableSpace(UChar ch,bool treatNoBreakSpaceAsBreak)38 static inline bool isBreakableSpace(UChar ch, bool treatNoBreakSpaceAsBreak)
39 {
40     switch (ch) {
41         case ' ':
42         case '\n':
43         case '\t':
44             return true;
45         case noBreakSpace:
46             return treatNoBreakSpaceAsBreak;
47         default:
48             return false;
49     }
50 }
51 
52 // This differs from the Unicode algorithm only in that Unicode does not break
53 // between a question mark and a vertical line (U+007C).
54 static const unsigned char internetExplorerLineBreaksAfterQuestionMarkTable[0x80] = {
55     1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, // \t
56     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57     1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, // ! " ' ) , . /
58     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, // : ; ?
59     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, // ]
61     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1  // }
63 };
64 
65 static const size_t internetExplorerLineBreaksAfterQuestionMarkTableSize = sizeof(internetExplorerLineBreaksAfterQuestionMarkTable) / sizeof(*internetExplorerLineBreaksAfterQuestionMarkTable);
66 
shouldBreakAfter(UChar ch,UChar nextCh)67 static inline bool shouldBreakAfter(UChar ch, UChar nextCh)
68 {
69     switch (ch) {
70         // For a question mark preceding a non-ASCII characters, defer to the Unicode algorithm by returning false.
71         // For ASCII characters, use a lookup table for enhanced speed and for compatibility with Internet Explorer.
72         case '?':
73             return nextCh < internetExplorerLineBreaksAfterQuestionMarkTableSize && internetExplorerLineBreaksAfterQuestionMarkTable[nextCh];
74         // Internet Explorer always allows breaking after a hyphen.
75         case '-':
76         case softHyphen:
77         // FIXME: cases for ideographicComma and ideographicFullStop are a workaround for an issue in Unicode 5.0
78         // which is likely to be resolved in Unicode 5.1 <http://bugs.webkit.org/show_bug.cgi?id=17411>.
79         // We may want to remove or conditionalize this workaround at some point.
80         case ideographicComma:
81         case ideographicFullStop:
82 #ifdef ANDROID_LAYOUT
83         // as '/' is used in uri which is always long, we would like to break it
84         case '/':
85 #endif
86             return true;
87         default:
88             return false;
89     }
90 }
91 
needsLineBreakIterator(UChar ch)92 static inline bool needsLineBreakIterator(UChar ch)
93 {
94     return ch > 0x7F && ch != noBreakSpace;
95 }
96 
97 #if PLATFORM(MAC) && defined(BUILDING_ON_TIGER)
lineBreakLocator()98 static inline TextBreakLocatorRef lineBreakLocator()
99 {
100     TextBreakLocatorRef locator = 0;
101     UCCreateTextBreakLocator(0, 0, kUCTextBreakLineMask, &locator);
102     return locator;
103 }
104 #endif
105 
nextBreakablePosition(const UChar * str,int pos,int len,bool treatNoBreakSpaceAsBreak)106 int nextBreakablePosition(const UChar* str, int pos, int len, bool treatNoBreakSpaceAsBreak)
107 {
108 #if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER)
109     TextBreakIterator* breakIterator = 0;
110 #endif
111     int nextBreak = -1;
112 
113     UChar lastCh = pos > 0 ? str[pos - 1] : 0;
114     for (int i = pos; i < len; i++) {
115         UChar ch = str[i];
116 
117         if (isBreakableSpace(ch, treatNoBreakSpaceAsBreak) || shouldBreakAfter(lastCh, ch))
118             return i;
119 
120         if (needsLineBreakIterator(ch) || needsLineBreakIterator(lastCh)) {
121             if (nextBreak < i && i) {
122 #if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER)
123                 if (!breakIterator)
124                     breakIterator = lineBreakIterator(str, len);
125                 if (breakIterator)
126                     nextBreak = textBreakFollowing(breakIterator, i - 1);
127 #else
128                 static TextBreakLocatorRef breakLocator = lineBreakLocator();
129                 if (breakLocator) {
130                     UniCharArrayOffset nextUCBreak;
131                     if (UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, str, len, i, &nextUCBreak) == 0)
132                         nextBreak = nextUCBreak;
133                 }
134 #endif
135             }
136             if (i == nextBreak && !isBreakableSpace(lastCh, treatNoBreakSpaceAsBreak))
137                 return i;
138         }
139 
140         lastCh = ch;
141     }
142 
143     return len;
144 }
145 
146 } // namespace WebCore
147