1 /*
2 * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22 #ifndef BidiResolver_h
23 #define BidiResolver_h
24
25 #include "platform/text/BidiCharacterRun.h"
26 #include "platform/text/BidiContext.h"
27 #include "platform/text/BidiRunList.h"
28 #include "platform/text/TextDirection.h"
29 #include "wtf/HashMap.h"
30 #include "wtf/Noncopyable.h"
31 #include "wtf/PassRefPtr.h"
32 #include "wtf/Vector.h"
33
34 namespace blink {
35
36 class RenderObject;
37
38 template <class Iterator> class MidpointState {
39 public:
MidpointState()40 MidpointState()
41 {
42 reset();
43 }
44
reset()45 void reset()
46 {
47 m_numMidpoints = 0;
48 m_currentMidpoint = 0;
49 m_betweenMidpoints = false;
50 }
51
startIgnoringSpaces(const Iterator & midpoint)52 void startIgnoringSpaces(const Iterator& midpoint)
53 {
54 ASSERT(!(m_numMidpoints % 2));
55 addMidpoint(midpoint);
56 }
57
stopIgnoringSpaces(const Iterator & midpoint)58 void stopIgnoringSpaces(const Iterator& midpoint)
59 {
60 ASSERT(m_numMidpoints % 2);
61 addMidpoint(midpoint);
62 }
63
64 // When ignoring spaces, this needs to be called for objects that need line boxes such as RenderInlines or
65 // hard line breaks to ensure that they're not ignored.
ensureLineBoxInsideIgnoredSpaces(RenderObject * renderer)66 void ensureLineBoxInsideIgnoredSpaces(RenderObject* renderer)
67 {
68 Iterator midpoint(0, renderer, 0);
69 stopIgnoringSpaces(midpoint);
70 startIgnoringSpaces(midpoint);
71 }
72
73 // Adding a pair of midpoints before a character will split it out into a new line box.
ensureCharacterGetsLineBox(Iterator & textParagraphSeparator)74 void ensureCharacterGetsLineBox(Iterator& textParagraphSeparator)
75 {
76 startIgnoringSpaces(Iterator(0, textParagraphSeparator.object(), textParagraphSeparator.offset() - 1));
77 stopIgnoringSpaces(Iterator(0, textParagraphSeparator.object(), textParagraphSeparator.offset()));
78 }
79
checkMidpoints(Iterator & lBreak)80 void checkMidpoints(Iterator& lBreak)
81 {
82 // Check to see if our last midpoint is a start point beyond the line break. If so,
83 // shave it off the list, and shave off a trailing space if the previous end point doesn't
84 // preserve whitespace.
85 if (lBreak.object() && m_numMidpoints && !(m_numMidpoints % 2)) {
86 Iterator* midpointsIterator = m_midpoints.data();
87 Iterator& endpoint = midpointsIterator[m_numMidpoints - 2];
88 const Iterator& startpoint = midpointsIterator[m_numMidpoints - 1];
89 Iterator currpoint = endpoint;
90 while (!currpoint.atEnd() && currpoint != startpoint && currpoint != lBreak)
91 currpoint.increment();
92 if (currpoint == lBreak) {
93 // We hit the line break before the start point. Shave off the start point.
94 m_numMidpoints--;
95 if (endpoint.object()->style()->collapseWhiteSpace() && endpoint.object()->isText())
96 endpoint.setOffset(endpoint.offset() - 1);
97 }
98 }
99 }
100
midpoints()101 Vector<Iterator>& midpoints() { return m_midpoints; }
numMidpoints()102 const unsigned& numMidpoints() const { return m_numMidpoints; }
currentMidpoint()103 const unsigned& currentMidpoint() const { return m_currentMidpoint; }
incrementCurrentMidpoint()104 void incrementCurrentMidpoint() { m_currentMidpoint++; }
betweenMidpoints()105 const bool& betweenMidpoints() const { return m_betweenMidpoints; }
setBetweenMidpoints(bool betweenMidpoint)106 void setBetweenMidpoints(bool betweenMidpoint) { m_betweenMidpoints = betweenMidpoint; }
107 private:
108 // The goal is to reuse the line state across multiple
109 // lines so we just keep an array around for midpoints and never clear it across multiple
110 // lines. We track the number of items and position using the two other variables.
111 Vector<Iterator> m_midpoints;
112 unsigned m_numMidpoints;
113 unsigned m_currentMidpoint;
114 bool m_betweenMidpoints;
115
addMidpoint(const Iterator & midpoint)116 void addMidpoint(const Iterator& midpoint)
117 {
118 if (m_midpoints.size() <= m_numMidpoints)
119 m_midpoints.grow(m_numMidpoints + 10);
120
121 Iterator* midpointsIterator = m_midpoints.data();
122 midpointsIterator[m_numMidpoints++] = midpoint;
123 }
124 };
125
126 // The BidiStatus at a given position (typically the end of a line) can
127 // be cached and then used to restart bidi resolution at that position.
128 struct BidiStatus {
BidiStatusBidiStatus129 BidiStatus()
130 : eor(WTF::Unicode::OtherNeutral)
131 , lastStrong(WTF::Unicode::OtherNeutral)
132 , last(WTF::Unicode::OtherNeutral)
133 {
134 }
135
136 // Creates a BidiStatus representing a new paragraph root with a default direction.
137 // Uses TextDirection as it only has two possibilities instead of WTF::Unicode::Direction which has 19.
BidiStatusBidiStatus138 BidiStatus(TextDirection textDirection, bool isOverride)
139 {
140 WTF::Unicode::Direction direction = textDirection == LTR ? WTF::Unicode::LeftToRight : WTF::Unicode::RightToLeft;
141 eor = lastStrong = last = direction;
142 context = BidiContext::create(textDirection == LTR ? 0 : 1, direction, isOverride);
143 }
144
BidiStatusBidiStatus145 BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext)
146 : eor(eorDir)
147 , lastStrong(lastStrongDir)
148 , last(lastDir)
149 , context(bidiContext)
150 {
151 }
152
153 WTF::Unicode::Direction eor;
154 WTF::Unicode::Direction lastStrong;
155 WTF::Unicode::Direction last;
156 RefPtr<BidiContext> context;
157 };
158
159 class BidiEmbedding {
160 public:
BidiEmbedding(WTF::Unicode::Direction direction,BidiEmbeddingSource source)161 BidiEmbedding(WTF::Unicode::Direction direction, BidiEmbeddingSource source)
162 : m_direction(direction)
163 , m_source(source)
164 {
165 }
166
direction()167 WTF::Unicode::Direction direction() const { return m_direction; }
source()168 BidiEmbeddingSource source() const { return m_source; }
169 private:
170 WTF::Unicode::Direction m_direction;
171 BidiEmbeddingSource m_source;
172 };
173
174 inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
175 {
176 return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
177 }
178
179 inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
180 {
181 return !(status1 == status2);
182 }
183
184 enum VisualDirectionOverride {
185 NoVisualOverride,
186 VisualLeftToRightOverride,
187 VisualRightToLeftOverride
188 };
189
190 // BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm
191 // http://unicode.org/reports/tr9
192 template <class Iterator, class Run> class BidiResolver {
193 WTF_MAKE_NONCOPYABLE(BidiResolver);
194 public:
BidiResolver()195 BidiResolver()
196 : m_direction(WTF::Unicode::OtherNeutral)
197 , m_reachedEndOfLine(false)
198 , m_emptyRun(true)
199 , m_nestedIsolateCount(0)
200 , m_trailingSpaceRun(0)
201 {
202 }
203
204 #if ENABLE(ASSERT)
205 ~BidiResolver();
206 #endif
207
position()208 const Iterator& position() const { return m_current; }
position()209 Iterator& position() { return m_current; }
setPositionIgnoringNestedIsolates(const Iterator & position)210 void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; }
setPosition(const Iterator & position,unsigned nestedIsolatedCount)211 void setPosition(const Iterator& position, unsigned nestedIsolatedCount)
212 {
213 m_current = position;
214 m_nestedIsolateCount = nestedIsolatedCount;
215 }
216
context()217 BidiContext* context() const { return m_status.context.get(); }
setContext(PassRefPtr<BidiContext> c)218 void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
219
setLastDir(WTF::Unicode::Direction lastDir)220 void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; }
setLastStrongDir(WTF::Unicode::Direction lastStrongDir)221 void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; }
setEorDir(WTF::Unicode::Direction eorDir)222 void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; }
223
dir()224 WTF::Unicode::Direction dir() const { return m_direction; }
setDir(WTF::Unicode::Direction d)225 void setDir(WTF::Unicode::Direction d) { m_direction = d; }
226
status()227 const BidiStatus& status() const { return m_status; }
setStatus(const BidiStatus s)228 void setStatus(const BidiStatus s)
229 {
230 ASSERT(s.context);
231 m_status = s;
232 m_paragraphDirectionality = s.context->dir() == WTF::Unicode::LeftToRight ? LTR : RTL;
233 }
234
midpointState()235 MidpointState<Iterator>& midpointState() { return m_midpointState; }
236
237 // The current algorithm handles nested isolates one layer of nesting at a time.
238 // But when we layout each isolated span, we will walk into (and ignore) all
239 // child isolated spans.
enterIsolate()240 void enterIsolate() { m_nestedIsolateCount++; }
exitIsolate()241 void exitIsolate() { ASSERT(m_nestedIsolateCount >= 1); m_nestedIsolateCount--; }
inIsolate()242 bool inIsolate() const { return m_nestedIsolateCount; }
243
244 void embed(WTF::Unicode::Direction, BidiEmbeddingSource);
245 bool commitExplicitEmbedding(BidiRunList<Run>&);
246
247 void createBidiRunsForLine(const Iterator& end, VisualDirectionOverride = NoVisualOverride, bool hardLineBreak = false, bool reorderRuns = true);
248
runs()249 BidiRunList<Run>& runs() { return m_runs; }
250
251 // FIXME: This used to be part of deleteRuns() but was a layering violation.
252 // It's unclear if this is still needed.
markCurrentRunEmpty()253 void markCurrentRunEmpty() { m_emptyRun = true; }
254
isolatedRuns()255 Vector<Run*>& isolatedRuns() { return m_isolatedRuns; }
256
isEndOfLine(const Iterator & end)257 bool isEndOfLine(const Iterator& end) { return m_current == end || m_current.atEnd(); }
258
259 TextDirection determineParagraphDirectionality(bool* hasStrongDirectionality = 0);
260
261 void setMidpointStateForIsolatedRun(Run*, const MidpointState<Iterator>&);
262 MidpointState<Iterator> midpointStateForIsolatedRun(Run*);
263
endOfLine()264 Iterator endOfLine() const { return m_endOfLine; }
265
trailingSpaceRun()266 Run* trailingSpaceRun() const { return m_trailingSpaceRun; }
267
268 protected:
increment()269 void increment() { m_current.increment(); }
270 // FIXME: Instead of InlineBidiResolvers subclassing this method, we should
271 // pass in some sort of Traits object which knows how to create runs for appending.
272 void appendRun(BidiRunList<Run>&);
273
addTrailingRun(BidiRunList<Run> &,int,int,Run *,BidiContext *,TextDirection)274 Run* addTrailingRun(BidiRunList<Run>&, int, int, Run*, BidiContext*, TextDirection) const { return 0; }
275 Iterator m_current;
276 // sor and eor are "start of run" and "end of run" respectively and correpond
277 // to abreviations used in UBA spec: http://unicode.org/reports/tr9/#BD7
278 Iterator m_sor; // Points to the first character in the current run.
279 Iterator m_eor; // Points to the last character in the current run.
280 Iterator m_last;
281 BidiStatus m_status;
282 WTF::Unicode::Direction m_direction;
283 // m_endOfRunAtEndOfLine is "the position last eor in the end of line"
284 Iterator m_endOfRunAtEndOfLine;
285 Iterator m_endOfLine;
286 bool m_reachedEndOfLine;
287 Iterator m_lastBeforeET; // Before a EuropeanNumberTerminator
288 bool m_emptyRun;
289
290 // FIXME: This should not belong to the resolver, but rather be passed
291 // into createBidiRunsForLine by the caller.
292 BidiRunList<Run> m_runs;
293
294 MidpointState<Iterator> m_midpointState;
295
296 unsigned m_nestedIsolateCount;
297 Vector<Run*> m_isolatedRuns;
298 Run* m_trailingSpaceRun;
299 TextDirection m_paragraphDirectionality;
300
301 private:
302 void raiseExplicitEmbeddingLevel(BidiRunList<Run>&, WTF::Unicode::Direction from, WTF::Unicode::Direction to);
303 void lowerExplicitEmbeddingLevel(BidiRunList<Run>&, WTF::Unicode::Direction from);
304 void checkDirectionInLowerRaiseEmbeddingLevel();
305
306 void updateStatusLastFromCurrentDirection(WTF::Unicode::Direction);
307 void reorderRunsFromLevels(BidiRunList<Run>&) const;
308
needsToApplyL1Rule(BidiRunList<Run> &)309 bool needsToApplyL1Rule(BidiRunList<Run>&) { return false; }
findFirstTrailingSpaceAtRun(Run *)310 int findFirstTrailingSpaceAtRun(Run*) { return 0; }
311 // http://www.unicode.org/reports/tr9/#L1
312 void applyL1Rule(BidiRunList<Run>&);
313
314 Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence;
315 HashMap<Run *, MidpointState<Iterator> > m_midpointStateForIsolatedRun;
316 };
317
318 #if ENABLE(ASSERT)
319 template <class Iterator, class Run>
~BidiResolver()320 BidiResolver<Iterator, Run>::~BidiResolver()
321 {
322 // The owner of this resolver should have handled the isolated runs.
323 ASSERT(m_isolatedRuns.isEmpty());
324 }
325 #endif
326
327 template <class Iterator, class Run>
appendRun(BidiRunList<Run> & runs)328 void BidiResolver<Iterator, Run>::appendRun(BidiRunList<Run>& runs)
329 {
330 if (!m_emptyRun && !m_eor.atEnd()) {
331 unsigned startOffset = m_sor.offset();
332 unsigned endOffset = m_eor.offset();
333
334 if (!m_endOfRunAtEndOfLine.atEnd() && endOffset >= m_endOfRunAtEndOfLine.offset()) {
335 m_reachedEndOfLine = true;
336 endOffset = m_endOfRunAtEndOfLine.offset();
337 }
338
339 if (endOffset >= startOffset)
340 runs.addRun(new Run(startOffset, endOffset + 1, context(), m_direction));
341
342 m_eor.increment();
343 m_sor = m_eor;
344 }
345
346 m_direction = WTF::Unicode::OtherNeutral;
347 m_status.eor = WTF::Unicode::OtherNeutral;
348 }
349
350 template <class Iterator, class Run>
embed(WTF::Unicode::Direction dir,BidiEmbeddingSource source)351 void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction dir, BidiEmbeddingSource source)
352 {
353 // Isolated spans compute base directionality during their own UBA run.
354 // Do not insert fake embed characters once we enter an isolated span.
355 ASSERT(!inIsolate());
356 using namespace WTF::Unicode;
357
358 ASSERT(dir == PopDirectionalFormat || dir == LeftToRightEmbedding || dir == LeftToRightOverride || dir == RightToLeftEmbedding || dir == RightToLeftOverride);
359 m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source));
360 }
361
362 template <class Iterator, class Run>
checkDirectionInLowerRaiseEmbeddingLevel()363 void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel()
364 {
365 using namespace WTF::Unicode;
366
367 ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd());
368 ASSERT(m_status.last != NonSpacingMark
369 && m_status.last != BoundaryNeutral
370 && m_status.last != RightToLeftEmbedding
371 && m_status.last != LeftToRightEmbedding
372 && m_status.last != RightToLeftOverride
373 && m_status.last != LeftToRightOverride
374 && m_status.last != PopDirectionalFormat);
375 if (m_direction == OtherNeutral)
376 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
377 }
378
379 template <class Iterator, class Run>
lowerExplicitEmbeddingLevel(BidiRunList<Run> & runs,WTF::Unicode::Direction from)380 void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(BidiRunList<Run>& runs, WTF::Unicode::Direction from)
381 {
382 using namespace WTF::Unicode;
383
384 if (!m_emptyRun && m_eor != m_last) {
385 checkDirectionInLowerRaiseEmbeddingLevel();
386 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
387 if (from == LeftToRight) {
388 // bidi.sor ... bidi.eor ... bidi.last L
389 if (m_status.eor == EuropeanNumber) {
390 if (m_status.lastStrong != LeftToRight) {
391 m_direction = EuropeanNumber;
392 appendRun(runs);
393 }
394 } else if (m_status.eor == ArabicNumber) {
395 m_direction = ArabicNumber;
396 appendRun(runs);
397 } else if (m_status.lastStrong != LeftToRight) {
398 appendRun(runs);
399 m_direction = LeftToRight;
400 }
401 } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
402 appendRun(runs);
403 m_direction = RightToLeft;
404 }
405 m_eor = m_last;
406 }
407
408 appendRun(runs);
409 m_emptyRun = true;
410
411 // sor for the new run is determined by the higher level (rule X10)
412 setLastDir(from);
413 setLastStrongDir(from);
414 m_eor = Iterator();
415 }
416
417 template <class Iterator, class Run>
raiseExplicitEmbeddingLevel(BidiRunList<Run> & runs,WTF::Unicode::Direction from,WTF::Unicode::Direction to)418 void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(BidiRunList<Run>& runs, WTF::Unicode::Direction from, WTF::Unicode::Direction to)
419 {
420 using namespace WTF::Unicode;
421
422 if (!m_emptyRun && m_eor != m_last) {
423 checkDirectionInLowerRaiseEmbeddingLevel();
424 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
425 if (to == LeftToRight) {
426 // bidi.sor ... bidi.eor ... bidi.last L
427 if (m_status.eor == EuropeanNumber) {
428 if (m_status.lastStrong != LeftToRight) {
429 m_direction = EuropeanNumber;
430 appendRun(runs);
431 }
432 } else if (m_status.eor == ArabicNumber) {
433 m_direction = ArabicNumber;
434 appendRun(runs);
435 } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) {
436 appendRun(runs);
437 m_direction = LeftToRight;
438 }
439 } else if (m_status.eor == ArabicNumber
440 || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft))
441 || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) {
442 appendRun(runs);
443 m_direction = RightToLeft;
444 }
445 m_eor = m_last;
446 }
447
448 appendRun(runs);
449 m_emptyRun = true;
450
451 setLastDir(to);
452 setLastStrongDir(to);
453 m_eor = Iterator();
454 }
455
456 template <class Iterator, class Run>
applyL1Rule(BidiRunList<Run> & runs)457 void BidiResolver<Iterator, Run>::applyL1Rule(BidiRunList<Run>& runs)
458 {
459 ASSERT(runs.runCount());
460 if (!needsToApplyL1Rule(runs))
461 return;
462
463 Run* trailingSpaceRun = runs.logicallyLastRun();
464
465 int firstSpace = findFirstTrailingSpaceAtRun(trailingSpaceRun);
466 if (firstSpace == trailingSpaceRun->stop())
467 return;
468
469 bool shouldReorder = trailingSpaceRun != (m_paragraphDirectionality == LTR ? runs.lastRun() : runs.firstRun());
470 if (firstSpace != trailingSpaceRun->start()) {
471 BidiContext* baseContext = context();
472 while (BidiContext* parent = baseContext->parent())
473 baseContext = parent;
474
475 m_trailingSpaceRun = addTrailingRun(runs, firstSpace, trailingSpaceRun->m_stop, trailingSpaceRun, baseContext, m_paragraphDirectionality);
476 ASSERT(m_trailingSpaceRun);
477 trailingSpaceRun->m_stop = firstSpace;
478 return;
479 }
480 if (!shouldReorder) {
481 m_trailingSpaceRun = trailingSpaceRun;
482 return;
483 }
484
485 if (m_paragraphDirectionality == LTR) {
486 runs.moveRunToEnd(trailingSpaceRun);
487 trailingSpaceRun->m_level = 0;
488 } else {
489 runs.moveRunToBeginning(trailingSpaceRun);
490 trailingSpaceRun->m_level = 1;
491 }
492 m_trailingSpaceRun = trailingSpaceRun;
493 }
494
495 template <class Iterator, class Run>
commitExplicitEmbedding(BidiRunList<Run> & runs)496 bool BidiResolver<Iterator, Run>::commitExplicitEmbedding(BidiRunList<Run>& runs)
497 {
498 // When we're "inIsolate()" we're resolving the parent context which
499 // ignores (skips over) the isolated content, including embedding levels.
500 // We should never accrue embedding levels while skipping over isolated content.
501 ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty());
502
503 using namespace WTF::Unicode;
504
505 unsigned char fromLevel = context()->level();
506 RefPtr<BidiContext> toContext = context();
507
508 for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) {
509 BidiEmbedding embedding = m_currentExplicitEmbeddingSequence[i];
510 if (embedding.direction() == PopDirectionalFormat) {
511 if (BidiContext* parentContext = toContext->parent())
512 toContext = parentContext;
513 } else {
514 Direction direction = (embedding.direction() == RightToLeftEmbedding || embedding.direction() == RightToLeftOverride) ? RightToLeft : LeftToRight;
515 bool override = embedding.direction() == LeftToRightOverride || embedding.direction() == RightToLeftOverride;
516 unsigned char level = toContext->level();
517 if (direction == RightToLeft)
518 level = nextGreaterOddLevel(level);
519 else
520 level = nextGreaterEvenLevel(level);
521 if (level < BidiContext::kMaxLevel)
522 toContext = BidiContext::create(level, direction, override, embedding.source(), toContext.get());
523 }
524 }
525
526 unsigned char toLevel = toContext->level();
527
528 if (toLevel > fromLevel)
529 raiseExplicitEmbeddingLevel(runs, fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight);
530 else if (toLevel < fromLevel)
531 lowerExplicitEmbeddingLevel(runs, fromLevel % 2 ? RightToLeft : LeftToRight);
532
533 setContext(toContext);
534
535 m_currentExplicitEmbeddingSequence.clear();
536
537 return fromLevel != toLevel;
538 }
539
540 template <class Iterator, class Run>
updateStatusLastFromCurrentDirection(WTF::Unicode::Direction dirCurrent)541 inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(WTF::Unicode::Direction dirCurrent)
542 {
543 using namespace WTF::Unicode;
544 switch (dirCurrent) {
545 case EuropeanNumberTerminator:
546 if (m_status.last != EuropeanNumber)
547 m_status.last = EuropeanNumberTerminator;
548 break;
549 case EuropeanNumberSeparator:
550 case CommonNumberSeparator:
551 case SegmentSeparator:
552 case WhiteSpaceNeutral:
553 case OtherNeutral:
554 switch (m_status.last) {
555 case LeftToRight:
556 case RightToLeft:
557 case RightToLeftArabic:
558 case EuropeanNumber:
559 case ArabicNumber:
560 m_status.last = dirCurrent;
561 break;
562 default:
563 m_status.last = OtherNeutral;
564 }
565 break;
566 case NonSpacingMark:
567 case BoundaryNeutral:
568 case RightToLeftEmbedding:
569 case LeftToRightEmbedding:
570 case RightToLeftOverride:
571 case LeftToRightOverride:
572 case PopDirectionalFormat:
573 // ignore these
574 break;
575 case EuropeanNumber:
576 // fall through
577 default:
578 m_status.last = dirCurrent;
579 }
580 }
581
582 template <class Iterator, class Run>
reorderRunsFromLevels(BidiRunList<Run> & runs)583 inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels(BidiRunList<Run>& runs) const
584 {
585 unsigned char levelLow = BidiContext::kMaxLevel;
586 unsigned char levelHigh = 0;
587 for (Run* run = runs.firstRun(); run; run = run->next()) {
588 levelHigh = std::max(run->level(), levelHigh);
589 levelLow = std::min(run->level(), levelLow);
590 }
591
592 // This implements reordering of the line (L2 according to Bidi spec):
593 // http://unicode.org/reports/tr9/#L2
594 // L2. From the highest level found in the text to the lowest odd level on each line,
595 // reverse any contiguous sequence of characters that are at that level or higher.
596
597 // Reversing is only done up to the lowest odd level.
598 if (!(levelLow % 2))
599 levelLow++;
600
601 unsigned count = runs.runCount() - 1;
602
603 while (levelHigh >= levelLow) {
604 unsigned i = 0;
605 Run* run = runs.firstRun();
606 while (i < count) {
607 for (;i < count && run && run->level() < levelHigh; i++)
608 run = run->next();
609 unsigned start = i;
610 for (;i <= count && run && run->level() >= levelHigh; i++)
611 run = run->next();
612 unsigned end = i - 1;
613 runs.reverseRuns(start, end);
614 }
615 levelHigh--;
616 }
617 }
618
619 template <class Iterator, class Run>
determineParagraphDirectionality(bool * hasStrongDirectionality)620 TextDirection BidiResolver<Iterator, Run>::determineParagraphDirectionality(bool* hasStrongDirectionality)
621 {
622 while (!m_current.atEnd()) {
623 if (inIsolate()) {
624 increment();
625 continue;
626 }
627 if (m_current.atParagraphSeparator())
628 break;
629 UChar32 current = m_current.current();
630 if (UNLIKELY(U16_IS_SURROGATE(current))) {
631 increment();
632 // If this not the high part of the surrogate pair, then drop it and move to the next.
633 if (!U16_IS_SURROGATE_LEAD(current))
634 continue;
635 UChar high = static_cast<UChar>(current);
636 if (m_current.atEnd())
637 continue;
638 UChar low = m_current.current();
639 // Verify the low part. If invalid, then assume an invalid surrogate pair and retry.
640 if (!U16_IS_TRAIL(low))
641 continue;
642 current = U16_GET_SUPPLEMENTARY(high, low);
643 }
644 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(current);
645 if (charDirection == WTF::Unicode::LeftToRight) {
646 if (hasStrongDirectionality)
647 *hasStrongDirectionality = true;
648 return LTR;
649 }
650 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) {
651 if (hasStrongDirectionality)
652 *hasStrongDirectionality = true;
653 return RTL;
654 }
655 increment();
656 }
657 if (hasStrongDirectionality)
658 *hasStrongDirectionality = false;
659 return LTR;
660 }
661
662 template <class Iterator, class Run>
createBidiRunsForLine(const Iterator & end,VisualDirectionOverride override,bool hardLineBreak,bool reorderRuns)663 void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak, bool reorderRuns)
664 {
665 using namespace WTF::Unicode;
666
667 ASSERT(m_direction == OtherNeutral);
668 m_trailingSpaceRun = 0;
669
670 m_endOfLine = end;
671
672 if (override != NoVisualOverride) {
673 m_emptyRun = false;
674 m_sor = m_current;
675 m_eor = Iterator();
676 while (m_current != end && !m_current.atEnd()) {
677 m_eor = m_current;
678 increment();
679 }
680 m_direction = override == VisualLeftToRightOverride ? LeftToRight : RightToLeft;
681 appendRun(m_runs);
682 m_runs.setLogicallyLastRun(m_runs.lastRun());
683 if (override == VisualRightToLeftOverride && m_runs.runCount())
684 m_runs.reverseRuns(0, m_runs.runCount() - 1);
685 return;
686 }
687
688 m_emptyRun = true;
689
690 m_eor = Iterator();
691
692 m_last = m_current;
693 bool lastLineEnded = false;
694 BidiResolver<Iterator, Run> stateAtEnd;
695
696 while (true) {
697 if (inIsolate() && m_emptyRun) {
698 m_sor = m_current;
699 m_emptyRun = false;
700 }
701
702 if (!lastLineEnded && isEndOfLine(end)) {
703 if (m_emptyRun)
704 break;
705
706 stateAtEnd.m_status = m_status;
707 stateAtEnd.m_sor = m_sor;
708 stateAtEnd.m_eor = m_eor;
709 stateAtEnd.m_last = m_last;
710 stateAtEnd.m_reachedEndOfLine = m_reachedEndOfLine;
711 stateAtEnd.m_lastBeforeET = m_lastBeforeET;
712 stateAtEnd.m_emptyRun = m_emptyRun;
713 m_endOfRunAtEndOfLine = m_last;
714 lastLineEnded = true;
715 }
716 Direction dirCurrent;
717 if (lastLineEnded && (hardLineBreak || m_current.atEnd())) {
718 BidiContext* c = context();
719 if (hardLineBreak) {
720 // A deviation from the Unicode Bidi Algorithm in order to match
721 // WinIE and user expectations: hard line breaks reset bidi state
722 // coming from unicode bidi control characters, but not those from
723 // DOM nodes with specified directionality
724 stateAtEnd.setContext(c->copyStackRemovingUnicodeEmbeddingContexts());
725
726 dirCurrent = stateAtEnd.context()->dir();
727 stateAtEnd.setEorDir(dirCurrent);
728 stateAtEnd.setLastDir(dirCurrent);
729 stateAtEnd.setLastStrongDir(dirCurrent);
730 } else {
731 while (c->parent())
732 c = c->parent();
733 dirCurrent = c->dir();
734 }
735 } else {
736 dirCurrent = m_current.direction();
737 if (context()->override()
738 && dirCurrent != RightToLeftEmbedding
739 && dirCurrent != LeftToRightEmbedding
740 && dirCurrent != RightToLeftOverride
741 && dirCurrent != LeftToRightOverride
742 && dirCurrent != PopDirectionalFormat)
743 dirCurrent = context()->dir();
744 else if (dirCurrent == NonSpacingMark)
745 dirCurrent = m_status.last;
746 }
747
748 // We ignore all character directionality while in unicode-bidi: isolate spans.
749 // We'll handle ordering the isolated characters in a second pass.
750 if (inIsolate())
751 dirCurrent = OtherNeutral;
752
753 ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd());
754 switch (dirCurrent) {
755
756 // embedding and overrides (X1-X9 in the Bidi specs)
757 case RightToLeftEmbedding:
758 case LeftToRightEmbedding:
759 case RightToLeftOverride:
760 case LeftToRightOverride:
761 case PopDirectionalFormat:
762 embed(dirCurrent, FromUnicode);
763 commitExplicitEmbedding(m_runs);
764 break;
765
766 // strong types
767 case LeftToRight:
768 switch (m_status.last) {
769 case RightToLeft:
770 case RightToLeftArabic:
771 case EuropeanNumber:
772 case ArabicNumber:
773 if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight)
774 appendRun(m_runs);
775 break;
776 case LeftToRight:
777 break;
778 case EuropeanNumberSeparator:
779 case EuropeanNumberTerminator:
780 case CommonNumberSeparator:
781 case BoundaryNeutral:
782 case BlockSeparator:
783 case SegmentSeparator:
784 case WhiteSpaceNeutral:
785 case OtherNeutral:
786 if (m_status.eor == EuropeanNumber) {
787 if (m_status.lastStrong != LeftToRight) {
788 // the numbers need to be on a higher embedding level, so let's close that run
789 m_direction = EuropeanNumber;
790 appendRun(m_runs);
791 if (context()->dir() != LeftToRight) {
792 // the neutrals take the embedding direction, which is R
793 m_eor = m_last;
794 m_direction = RightToLeft;
795 appendRun(m_runs);
796 }
797 }
798 } else if (m_status.eor == ArabicNumber) {
799 // Arabic numbers are always on a higher embedding level, so let's close that run
800 m_direction = ArabicNumber;
801 appendRun(m_runs);
802 if (context()->dir() != LeftToRight) {
803 // the neutrals take the embedding direction, which is R
804 m_eor = m_last;
805 m_direction = RightToLeft;
806 appendRun(m_runs);
807 }
808 } else if (m_status.lastStrong != LeftToRight) {
809 // last stuff takes embedding dir
810 if (context()->dir() == RightToLeft) {
811 m_eor = m_last;
812 m_direction = RightToLeft;
813 }
814 appendRun(m_runs);
815 }
816 default:
817 break;
818 }
819 m_eor = m_current;
820 m_status.eor = LeftToRight;
821 m_status.lastStrong = LeftToRight;
822 m_direction = LeftToRight;
823 break;
824 case RightToLeftArabic:
825 case RightToLeft:
826 switch (m_status.last) {
827 case LeftToRight:
828 case EuropeanNumber:
829 case ArabicNumber:
830 appendRun(m_runs);
831 case RightToLeft:
832 case RightToLeftArabic:
833 break;
834 case EuropeanNumberSeparator:
835 case EuropeanNumberTerminator:
836 case CommonNumberSeparator:
837 case BoundaryNeutral:
838 case BlockSeparator:
839 case SegmentSeparator:
840 case WhiteSpaceNeutral:
841 case OtherNeutral:
842 if (m_status.eor == EuropeanNumber) {
843 if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight)
844 m_eor = m_last;
845 appendRun(m_runs);
846 } else if (m_status.eor == ArabicNumber) {
847 appendRun(m_runs);
848 } else if (m_status.lastStrong == LeftToRight) {
849 if (context()->dir() == LeftToRight)
850 m_eor = m_last;
851 appendRun(m_runs);
852 }
853 default:
854 break;
855 }
856 m_eor = m_current;
857 m_status.eor = RightToLeft;
858 m_status.lastStrong = dirCurrent;
859 m_direction = RightToLeft;
860 break;
861
862 // weak types:
863
864 case EuropeanNumber:
865 if (m_status.lastStrong != RightToLeftArabic) {
866 // if last strong was AL change EN to AN
867 switch (m_status.last) {
868 case EuropeanNumber:
869 case LeftToRight:
870 break;
871 case RightToLeft:
872 case RightToLeftArabic:
873 case ArabicNumber:
874 m_eor = m_last;
875 appendRun(m_runs);
876 m_direction = EuropeanNumber;
877 break;
878 case EuropeanNumberSeparator:
879 case CommonNumberSeparator:
880 if (m_status.eor == EuropeanNumber)
881 break;
882 case EuropeanNumberTerminator:
883 case BoundaryNeutral:
884 case BlockSeparator:
885 case SegmentSeparator:
886 case WhiteSpaceNeutral:
887 case OtherNeutral:
888 if (m_status.eor == EuropeanNumber) {
889 if (m_status.lastStrong == RightToLeft) {
890 // ENs on both sides behave like Rs, so the neutrals should be R.
891 // Terminate the EN run.
892 appendRun(m_runs);
893 // Make an R run.
894 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
895 m_direction = RightToLeft;
896 appendRun(m_runs);
897 // Begin a new EN run.
898 m_direction = EuropeanNumber;
899 }
900 } else if (m_status.eor == ArabicNumber) {
901 // Terminate the AN run.
902 appendRun(m_runs);
903 if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) {
904 // Make an R run.
905 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
906 m_direction = RightToLeft;
907 appendRun(m_runs);
908 // Begin a new EN run.
909 m_direction = EuropeanNumber;
910 }
911 } else if (m_status.lastStrong == RightToLeft) {
912 // Extend the R run to include the neutrals.
913 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
914 m_direction = RightToLeft;
915 appendRun(m_runs);
916 // Begin a new EN run.
917 m_direction = EuropeanNumber;
918 }
919 default:
920 break;
921 }
922 m_eor = m_current;
923 m_status.eor = EuropeanNumber;
924 if (m_direction == OtherNeutral)
925 m_direction = LeftToRight;
926 break;
927 }
928 case ArabicNumber:
929 dirCurrent = ArabicNumber;
930 switch (m_status.last) {
931 case LeftToRight:
932 if (context()->dir() == LeftToRight)
933 appendRun(m_runs);
934 break;
935 case ArabicNumber:
936 break;
937 case RightToLeft:
938 case RightToLeftArabic:
939 case EuropeanNumber:
940 m_eor = m_last;
941 appendRun(m_runs);
942 break;
943 case CommonNumberSeparator:
944 if (m_status.eor == ArabicNumber)
945 break;
946 case EuropeanNumberSeparator:
947 case EuropeanNumberTerminator:
948 case BoundaryNeutral:
949 case BlockSeparator:
950 case SegmentSeparator:
951 case WhiteSpaceNeutral:
952 case OtherNeutral:
953 if (m_status.eor == ArabicNumber
954 || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft))
955 || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) {
956 // Terminate the run before the neutrals.
957 appendRun(m_runs);
958 // Begin an R run for the neutrals.
959 m_direction = RightToLeft;
960 } else if (m_direction == OtherNeutral) {
961 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
962 }
963 m_eor = m_last;
964 appendRun(m_runs);
965 default:
966 break;
967 }
968 m_eor = m_current;
969 m_status.eor = ArabicNumber;
970 if (m_direction == OtherNeutral)
971 m_direction = ArabicNumber;
972 break;
973 case EuropeanNumberSeparator:
974 case CommonNumberSeparator:
975 break;
976 case EuropeanNumberTerminator:
977 if (m_status.last == EuropeanNumber) {
978 dirCurrent = EuropeanNumber;
979 m_eor = m_current;
980 m_status.eor = dirCurrent;
981 } else if (m_status.last != EuropeanNumberTerminator) {
982 m_lastBeforeET = m_emptyRun ? m_eor : m_last;
983 }
984 break;
985
986 // boundary neutrals should be ignored
987 case BoundaryNeutral:
988 if (m_eor == m_last)
989 m_eor = m_current;
990 break;
991 // neutrals
992 case BlockSeparator:
993 // ### what do we do with newline and paragraph seperators that come to here?
994 break;
995 case SegmentSeparator:
996 // ### implement rule L1
997 break;
998 case WhiteSpaceNeutral:
999 break;
1000 case OtherNeutral:
1001 break;
1002 default:
1003 break;
1004 }
1005
1006 if (lastLineEnded && m_eor == m_current) {
1007 if (!m_reachedEndOfLine) {
1008 m_eor = m_endOfRunAtEndOfLine;
1009 switch (m_status.eor) {
1010 case LeftToRight:
1011 case RightToLeft:
1012 case ArabicNumber:
1013 m_direction = m_status.eor;
1014 break;
1015 case EuropeanNumber:
1016 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber;
1017 break;
1018 default:
1019 ASSERT_NOT_REACHED();
1020 }
1021 appendRun(m_runs);
1022 }
1023 m_current = end;
1024 m_status = stateAtEnd.m_status;
1025 m_sor = stateAtEnd.m_sor;
1026 m_eor = stateAtEnd.m_eor;
1027 m_last = stateAtEnd.m_last;
1028 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
1029 m_lastBeforeET = stateAtEnd.m_lastBeforeET;
1030 m_emptyRun = stateAtEnd.m_emptyRun;
1031 m_direction = OtherNeutral;
1032 break;
1033 }
1034
1035 updateStatusLastFromCurrentDirection(dirCurrent);
1036 m_last = m_current;
1037
1038 if (m_emptyRun) {
1039 m_sor = m_current;
1040 m_emptyRun = false;
1041 }
1042
1043 increment();
1044 if (!m_currentExplicitEmbeddingSequence.isEmpty()) {
1045 bool committed = commitExplicitEmbedding(m_runs);
1046 if (committed && lastLineEnded) {
1047 m_current = end;
1048 m_status = stateAtEnd.m_status;
1049 m_sor = stateAtEnd.m_sor;
1050 m_eor = stateAtEnd.m_eor;
1051 m_last = stateAtEnd.m_last;
1052 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
1053 m_lastBeforeET = stateAtEnd.m_lastBeforeET;
1054 m_emptyRun = stateAtEnd.m_emptyRun;
1055 m_direction = OtherNeutral;
1056 break;
1057 }
1058 }
1059 }
1060
1061 m_runs.setLogicallyLastRun(m_runs.lastRun());
1062 if (reorderRuns)
1063 reorderRunsFromLevels(m_runs);
1064 m_endOfRunAtEndOfLine = Iterator();
1065 m_endOfLine = Iterator();
1066
1067 if (!hardLineBreak && m_runs.runCount())
1068 applyL1Rule(m_runs);
1069 }
1070
1071 template <class Iterator, class Run>
setMidpointStateForIsolatedRun(Run * run,const MidpointState<Iterator> & midpoint)1072 void BidiResolver<Iterator, Run>::setMidpointStateForIsolatedRun(Run* run, const MidpointState<Iterator>& midpoint)
1073 {
1074 ASSERT(!m_midpointStateForIsolatedRun.contains(run));
1075 m_midpointStateForIsolatedRun.add(run, midpoint);
1076 }
1077
1078 template<class Iterator, class Run>
midpointStateForIsolatedRun(Run * run)1079 MidpointState<Iterator> BidiResolver<Iterator, Run>::midpointStateForIsolatedRun(Run* run)
1080 {
1081 return m_midpointStateForIsolatedRun.take(run);
1082 }
1083
1084
1085 } // namespace blink
1086
1087 #endif // BidiResolver_h
1088