1 /*
2 * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22 #ifndef BidiResolver_h
23 #define BidiResolver_h
24
25 #include "platform/text/BidiContext.h"
26 #include "platform/text/BidiRunList.h"
27 #include "platform/text/TextDirection.h"
28 #include "wtf/HashMap.h"
29 #include "wtf/Noncopyable.h"
30 #include "wtf/PassRefPtr.h"
31 #include "wtf/Vector.h"
32
33 namespace WebCore {
34
35 template <class Iterator> struct MidpointState {
MidpointStateMidpointState36 MidpointState()
37 {
38 reset();
39 }
40
resetMidpointState41 void reset()
42 {
43 numMidpoints = 0;
44 currentMidpoint = 0;
45 betweenMidpoints = false;
46 }
47
48 // The goal is to reuse the line state across multiple
49 // lines so we just keep an array around for midpoints and never clear it across multiple
50 // lines. We track the number of items and position using the two other variables.
51 Vector<Iterator> midpoints;
52 unsigned numMidpoints;
53 unsigned currentMidpoint;
54 bool betweenMidpoints;
55 };
56
57 // The BidiStatus at a given position (typically the end of a line) can
58 // be cached and then used to restart bidi resolution at that position.
59 struct BidiStatus {
BidiStatusBidiStatus60 BidiStatus()
61 : eor(WTF::Unicode::OtherNeutral)
62 , lastStrong(WTF::Unicode::OtherNeutral)
63 , last(WTF::Unicode::OtherNeutral)
64 {
65 }
66
67 // Creates a BidiStatus representing a new paragraph root with a default direction.
68 // Uses TextDirection as it only has two possibilities instead of WTF::Unicode::Direction which has 19.
BidiStatusBidiStatus69 BidiStatus(TextDirection textDirection, bool isOverride)
70 {
71 WTF::Unicode::Direction direction = textDirection == LTR ? WTF::Unicode::LeftToRight : WTF::Unicode::RightToLeft;
72 eor = lastStrong = last = direction;
73 context = BidiContext::create(textDirection == LTR ? 0 : 1, direction, isOverride);
74 }
75
BidiStatusBidiStatus76 BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext)
77 : eor(eorDir)
78 , lastStrong(lastStrongDir)
79 , last(lastDir)
80 , context(bidiContext)
81 {
82 }
83
84 WTF::Unicode::Direction eor;
85 WTF::Unicode::Direction lastStrong;
86 WTF::Unicode::Direction last;
87 RefPtr<BidiContext> context;
88 };
89
90 class BidiEmbedding {
91 public:
BidiEmbedding(WTF::Unicode::Direction direction,BidiEmbeddingSource source)92 BidiEmbedding(WTF::Unicode::Direction direction, BidiEmbeddingSource source)
93 : m_direction(direction)
94 , m_source(source)
95 {
96 }
97
direction()98 WTF::Unicode::Direction direction() const { return m_direction; }
source()99 BidiEmbeddingSource source() const { return m_source; }
100 private:
101 WTF::Unicode::Direction m_direction;
102 BidiEmbeddingSource m_source;
103 };
104
105 inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
106 {
107 return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
108 }
109
110 inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
111 {
112 return !(status1 == status2);
113 }
114
115 struct BidiCharacterRun {
BidiCharacterRunBidiCharacterRun116 BidiCharacterRun(int start, int stop, BidiContext* context, WTF::Unicode::Direction dir)
117 : m_override(context->override())
118 , m_next(0)
119 , m_start(start)
120 , m_stop(stop)
121 {
122 ASSERT(m_start <= m_stop);
123 if (dir == WTF::Unicode::OtherNeutral)
124 dir = context->dir();
125
126 m_level = context->level();
127
128 // add level of run (cases I1 & I2)
129 if (m_level % 2) {
130 if (dir == WTF::Unicode::LeftToRight || dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
131 m_level++;
132 } else {
133 if (dir == WTF::Unicode::RightToLeft)
134 m_level++;
135 else if (dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
136 m_level += 2;
137 }
138 }
139
startBidiCharacterRun140 int start() const { return m_start; }
stopBidiCharacterRun141 int stop() const { return m_stop; }
levelBidiCharacterRun142 unsigned char level() const { return m_level; }
reversedBidiCharacterRun143 bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
dirOverrideBidiCharacterRun144 bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
145
nextBidiCharacterRun146 BidiCharacterRun* next() const { return m_next; }
setNextBidiCharacterRun147 void setNext(BidiCharacterRun* next) { m_next = next; }
148
149 // Do not add anything apart from bitfields until after m_next. See https://bugs.webkit.org/show_bug.cgi?id=100173
150 bool m_override : 1;
151 bool m_hasHyphen : 1; // Used by BidiRun subclass which is a layering violation but enables us to save 8 bytes per object on 64-bit.
152 bool m_startsSegment : 1; // Same comment as m_hasHyphen.
153 unsigned char m_level;
154 BidiCharacterRun* m_next;
155 int m_start;
156 int m_stop;
157 };
158
159 enum VisualDirectionOverride {
160 NoVisualOverride,
161 VisualLeftToRightOverride,
162 VisualRightToLeftOverride
163 };
164
165 // BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm
166 // http://unicode.org/reports/tr9
167 template <class Iterator, class Run> class BidiResolver {
168 WTF_MAKE_NONCOPYABLE(BidiResolver);
169 public:
BidiResolver()170 BidiResolver()
171 : m_direction(WTF::Unicode::OtherNeutral)
172 , m_reachedEndOfLine(false)
173 , m_emptyRun(true)
174 , m_nestedIsolateCount(0)
175 {
176 }
177
178 #ifndef NDEBUG
179 ~BidiResolver();
180 #endif
181
position()182 const Iterator& position() const { return m_current; }
position()183 Iterator& position() { return m_current; }
setPositionIgnoringNestedIsolates(const Iterator & position)184 void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; }
setPosition(const Iterator & position,unsigned nestedIsolatedCount)185 void setPosition(const Iterator& position, unsigned nestedIsolatedCount)
186 {
187 m_current = position;
188 m_nestedIsolateCount = nestedIsolatedCount;
189 }
190
context()191 BidiContext* context() const { return m_status.context.get(); }
setContext(PassRefPtr<BidiContext> c)192 void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
193
setLastDir(WTF::Unicode::Direction lastDir)194 void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; }
setLastStrongDir(WTF::Unicode::Direction lastStrongDir)195 void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; }
setEorDir(WTF::Unicode::Direction eorDir)196 void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; }
197
dir()198 WTF::Unicode::Direction dir() const { return m_direction; }
setDir(WTF::Unicode::Direction d)199 void setDir(WTF::Unicode::Direction d) { m_direction = d; }
200
status()201 const BidiStatus& status() const { return m_status; }
setStatus(const BidiStatus s)202 void setStatus(const BidiStatus s)
203 {
204 ASSERT(s.context);
205 m_status = s;
206 }
207
midpointState()208 MidpointState<Iterator>& midpointState() { return m_midpointState; }
209
210 // The current algorithm handles nested isolates one layer of nesting at a time.
211 // But when we layout each isolated span, we will walk into (and ignore) all
212 // child isolated spans.
enterIsolate()213 void enterIsolate() { m_nestedIsolateCount++; }
exitIsolate()214 void exitIsolate() { ASSERT(m_nestedIsolateCount >= 1); m_nestedIsolateCount--; }
inIsolate()215 bool inIsolate() const { return m_nestedIsolateCount; }
216
217 void embed(WTF::Unicode::Direction, BidiEmbeddingSource);
218 bool commitExplicitEmbedding();
219
220 void createBidiRunsForLine(const Iterator& end, VisualDirectionOverride = NoVisualOverride, bool hardLineBreak = false);
221
runs()222 BidiRunList<Run>& runs() { return m_runs; }
223
224 // FIXME: This used to be part of deleteRuns() but was a layering violation.
225 // It's unclear if this is still needed.
markCurrentRunEmpty()226 void markCurrentRunEmpty() { m_emptyRun = true; }
227
isolatedRuns()228 Vector<Run*>& isolatedRuns() { return m_isolatedRuns; }
229
isEndOfLine(const Iterator & end)230 bool isEndOfLine(const Iterator& end) { return m_current == end || m_current.atEnd(); }
231
232 TextDirection determineParagraphDirectionality(bool* hasStrongDirectionality = 0);
233
234 void setMidpointStateForIsolatedRun(Run*, const MidpointState<Iterator>&);
235 MidpointState<Iterator> midpointStateForIsolatedRun(Run*);
236
endOfLine()237 Iterator endOfLine() const { return m_endOfLine; }
238
239 protected:
increment()240 void increment() { m_current.increment(); }
241 // FIXME: Instead of InlineBidiResolvers subclassing this method, we should
242 // pass in some sort of Traits object which knows how to create runs for appending.
243 void appendRun();
244
245 Iterator m_current;
246 // sor and eor are "start of run" and "end of run" respectively and correpond
247 // to abreviations used in UBA spec: http://unicode.org/reports/tr9/#BD7
248 Iterator m_sor; // Points to the first character in the current run.
249 Iterator m_eor; // Points to the last character in the current run.
250 Iterator m_last;
251 BidiStatus m_status;
252 WTF::Unicode::Direction m_direction;
253 // m_endOfRunAtEndOfLine is "the position last eor in the end of line"
254 Iterator m_endOfRunAtEndOfLine;
255 Iterator m_endOfLine;
256 bool m_reachedEndOfLine;
257 Iterator m_lastBeforeET; // Before a EuropeanNumberTerminator
258 bool m_emptyRun;
259
260 // FIXME: This should not belong to the resolver, but rather be passed
261 // into createBidiRunsForLine by the caller.
262 BidiRunList<Run> m_runs;
263
264 MidpointState<Iterator> m_midpointState;
265
266 unsigned m_nestedIsolateCount;
267 Vector<Run*> m_isolatedRuns;
268
269 private:
270 void raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to);
271 void lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from);
272 void checkDirectionInLowerRaiseEmbeddingLevel();
273
274 void updateStatusLastFromCurrentDirection(WTF::Unicode::Direction);
275 void reorderRunsFromLevels();
276
277 Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence;
278 HashMap<Run *, MidpointState<Iterator> > m_midpointStateForIsolatedRun;
279 };
280
281 #ifndef NDEBUG
282 template <class Iterator, class Run>
~BidiResolver()283 BidiResolver<Iterator, Run>::~BidiResolver()
284 {
285 // The owner of this resolver should have handled the isolated runs.
286 ASSERT(m_isolatedRuns.isEmpty());
287 }
288 #endif
289
290 template <class Iterator, class Run>
appendRun()291 void BidiResolver<Iterator, Run>::appendRun()
292 {
293 if (!m_emptyRun && !m_eor.atEnd()) {
294 unsigned startOffset = m_sor.offset();
295 unsigned endOffset = m_eor.offset();
296
297 if (!m_endOfRunAtEndOfLine.atEnd() && endOffset >= m_endOfRunAtEndOfLine.offset()) {
298 m_reachedEndOfLine = true;
299 endOffset = m_endOfRunAtEndOfLine.offset();
300 }
301
302 if (endOffset >= startOffset)
303 m_runs.addRun(new Run(startOffset, endOffset + 1, context(), m_direction));
304
305 m_eor.increment();
306 m_sor = m_eor;
307 }
308
309 m_direction = WTF::Unicode::OtherNeutral;
310 m_status.eor = WTF::Unicode::OtherNeutral;
311 }
312
313 template <class Iterator, class Run>
embed(WTF::Unicode::Direction dir,BidiEmbeddingSource source)314 void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction dir, BidiEmbeddingSource source)
315 {
316 // Isolated spans compute base directionality during their own UBA run.
317 // Do not insert fake embed characters once we enter an isolated span.
318 ASSERT(!inIsolate());
319 using namespace WTF::Unicode;
320
321 ASSERT(dir == PopDirectionalFormat || dir == LeftToRightEmbedding || dir == LeftToRightOverride || dir == RightToLeftEmbedding || dir == RightToLeftOverride);
322 m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source));
323 }
324
325 template <class Iterator, class Run>
checkDirectionInLowerRaiseEmbeddingLevel()326 void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel()
327 {
328 using namespace WTF::Unicode;
329
330 ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd());
331 ASSERT(m_status.last != NonSpacingMark
332 && m_status.last != BoundaryNeutral
333 && m_status.last != RightToLeftEmbedding
334 && m_status.last != LeftToRightEmbedding
335 && m_status.last != RightToLeftOverride
336 && m_status.last != LeftToRightOverride
337 && m_status.last != PopDirectionalFormat);
338 if (m_direction == OtherNeutral)
339 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
340 }
341
342 template <class Iterator, class Run>
lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from)343 void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from)
344 {
345 using namespace WTF::Unicode;
346
347 if (!m_emptyRun && m_eor != m_last) {
348 checkDirectionInLowerRaiseEmbeddingLevel();
349 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
350 if (from == LeftToRight) {
351 // bidi.sor ... bidi.eor ... bidi.last L
352 if (m_status.eor == EuropeanNumber) {
353 if (m_status.lastStrong != LeftToRight) {
354 m_direction = EuropeanNumber;
355 appendRun();
356 }
357 } else if (m_status.eor == ArabicNumber) {
358 m_direction = ArabicNumber;
359 appendRun();
360 } else if (m_status.lastStrong != LeftToRight) {
361 appendRun();
362 m_direction = LeftToRight;
363 }
364 } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
365 appendRun();
366 m_direction = RightToLeft;
367 }
368 m_eor = m_last;
369 }
370
371 appendRun();
372 m_emptyRun = true;
373
374 // sor for the new run is determined by the higher level (rule X10)
375 setLastDir(from);
376 setLastStrongDir(from);
377 m_eor = Iterator();
378 }
379
380 template <class Iterator, class Run>
raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from,WTF::Unicode::Direction to)381 void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to)
382 {
383 using namespace WTF::Unicode;
384
385 if (!m_emptyRun && m_eor != m_last) {
386 checkDirectionInLowerRaiseEmbeddingLevel();
387 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
388 if (to == LeftToRight) {
389 // bidi.sor ... bidi.eor ... bidi.last L
390 if (m_status.eor == EuropeanNumber) {
391 if (m_status.lastStrong != LeftToRight) {
392 m_direction = EuropeanNumber;
393 appendRun();
394 }
395 } else if (m_status.eor == ArabicNumber) {
396 m_direction = ArabicNumber;
397 appendRun();
398 } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) {
399 appendRun();
400 m_direction = LeftToRight;
401 }
402 } else if (m_status.eor == ArabicNumber
403 || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft))
404 || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) {
405 appendRun();
406 m_direction = RightToLeft;
407 }
408 m_eor = m_last;
409 }
410
411 appendRun();
412 m_emptyRun = true;
413
414 setLastDir(to);
415 setLastStrongDir(to);
416 m_eor = Iterator();
417 }
418
419 template <class Iterator, class Run>
commitExplicitEmbedding()420 bool BidiResolver<Iterator, Run>::commitExplicitEmbedding()
421 {
422 // When we're "inIsolate()" we're resolving the parent context which
423 // ignores (skips over) the isolated content, including embedding levels.
424 // We should never accrue embedding levels while skipping over isolated content.
425 ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty());
426
427 using namespace WTF::Unicode;
428
429 unsigned char fromLevel = context()->level();
430 RefPtr<BidiContext> toContext = context();
431
432 for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) {
433 BidiEmbedding embedding = m_currentExplicitEmbeddingSequence[i];
434 if (embedding.direction() == PopDirectionalFormat) {
435 if (BidiContext* parentContext = toContext->parent())
436 toContext = parentContext;
437 } else {
438 Direction direction = (embedding.direction() == RightToLeftEmbedding || embedding.direction() == RightToLeftOverride) ? RightToLeft : LeftToRight;
439 bool override = embedding.direction() == LeftToRightOverride || embedding.direction() == RightToLeftOverride;
440 unsigned char level = toContext->level();
441 if (direction == RightToLeft)
442 level = nextGreaterOddLevel(level);
443 else
444 level = nextGreaterEvenLevel(level);
445 if (level < BidiContext::kMaxLevel)
446 toContext = BidiContext::create(level, direction, override, embedding.source(), toContext.get());
447 }
448 }
449
450 unsigned char toLevel = toContext->level();
451
452 if (toLevel > fromLevel)
453 raiseExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight);
454 else if (toLevel < fromLevel)
455 lowerExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight);
456
457 setContext(toContext);
458
459 m_currentExplicitEmbeddingSequence.clear();
460
461 return fromLevel != toLevel;
462 }
463
464 template <class Iterator, class Run>
updateStatusLastFromCurrentDirection(WTF::Unicode::Direction dirCurrent)465 inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(WTF::Unicode::Direction dirCurrent)
466 {
467 using namespace WTF::Unicode;
468 switch (dirCurrent) {
469 case EuropeanNumberTerminator:
470 if (m_status.last != EuropeanNumber)
471 m_status.last = EuropeanNumberTerminator;
472 break;
473 case EuropeanNumberSeparator:
474 case CommonNumberSeparator:
475 case SegmentSeparator:
476 case WhiteSpaceNeutral:
477 case OtherNeutral:
478 switch (m_status.last) {
479 case LeftToRight:
480 case RightToLeft:
481 case RightToLeftArabic:
482 case EuropeanNumber:
483 case ArabicNumber:
484 m_status.last = dirCurrent;
485 break;
486 default:
487 m_status.last = OtherNeutral;
488 }
489 break;
490 case NonSpacingMark:
491 case BoundaryNeutral:
492 case RightToLeftEmbedding:
493 case LeftToRightEmbedding:
494 case RightToLeftOverride:
495 case LeftToRightOverride:
496 case PopDirectionalFormat:
497 // ignore these
498 break;
499 case EuropeanNumber:
500 // fall through
501 default:
502 m_status.last = dirCurrent;
503 }
504 }
505
506 template <class Iterator, class Run>
reorderRunsFromLevels()507 inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels()
508 {
509 unsigned char levelLow = BidiContext::kMaxLevel;
510 unsigned char levelHigh = 0;
511 for (Run* run = m_runs.firstRun(); run; run = run->next()) {
512 levelHigh = std::max(run->level(), levelHigh);
513 levelLow = std::min(run->level(), levelLow);
514 }
515
516 // This implements reordering of the line (L2 according to Bidi spec):
517 // http://unicode.org/reports/tr9/#L2
518 // L2. From the highest level found in the text to the lowest odd level on each line,
519 // reverse any contiguous sequence of characters that are at that level or higher.
520
521 // Reversing is only done up to the lowest odd level.
522 if (!(levelLow % 2))
523 levelLow++;
524
525 unsigned count = m_runs.runCount() - 1;
526
527 while (levelHigh >= levelLow) {
528 unsigned i = 0;
529 Run* run = m_runs.firstRun();
530 while (i < count) {
531 for (;i < count && run && run->level() < levelHigh; i++)
532 run = run->next();
533 unsigned start = i;
534 for (;i <= count && run && run->level() >= levelHigh; i++)
535 run = run->next();
536 unsigned end = i - 1;
537 m_runs.reverseRuns(start, end);
538 }
539 levelHigh--;
540 }
541 }
542
543 template <class Iterator, class Run>
determineParagraphDirectionality(bool * hasStrongDirectionality)544 TextDirection BidiResolver<Iterator, Run>::determineParagraphDirectionality(bool* hasStrongDirectionality)
545 {
546 while (!m_current.atEnd()) {
547 if (inIsolate()) {
548 increment();
549 continue;
550 }
551 if (m_current.atParagraphSeparator())
552 break;
553 UChar32 current = m_current.current();
554 if (UNLIKELY(U16_IS_SURROGATE(current))) {
555 increment();
556 // If this not the high part of the surrogate pair, then drop it and move to the next.
557 if (!U16_IS_SURROGATE_LEAD(current))
558 continue;
559 UChar high = static_cast<UChar>(current);
560 if (m_current.atEnd())
561 continue;
562 UChar low = m_current.current();
563 // Verify the low part. If invalid, then assume an invalid surrogate pair and retry.
564 if (!U16_IS_TRAIL(low))
565 continue;
566 current = U16_GET_SUPPLEMENTARY(high, low);
567 }
568 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(current);
569 if (charDirection == WTF::Unicode::LeftToRight) {
570 if (hasStrongDirectionality)
571 *hasStrongDirectionality = true;
572 return LTR;
573 }
574 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) {
575 if (hasStrongDirectionality)
576 *hasStrongDirectionality = true;
577 return RTL;
578 }
579 increment();
580 }
581 if (hasStrongDirectionality)
582 *hasStrongDirectionality = false;
583 return LTR;
584 }
585
586 template <class Iterator, class Run>
createBidiRunsForLine(const Iterator & end,VisualDirectionOverride override,bool hardLineBreak)587 void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak)
588 {
589 using namespace WTF::Unicode;
590
591 ASSERT(m_direction == OtherNeutral);
592
593 m_endOfLine = end;
594
595 if (override != NoVisualOverride) {
596 m_emptyRun = false;
597 m_sor = m_current;
598 m_eor = Iterator();
599 while (m_current != end && !m_current.atEnd()) {
600 m_eor = m_current;
601 increment();
602 }
603 m_direction = override == VisualLeftToRightOverride ? LeftToRight : RightToLeft;
604 appendRun();
605 m_runs.setLogicallyLastRun(m_runs.lastRun());
606 if (override == VisualRightToLeftOverride && m_runs.runCount())
607 m_runs.reverseRuns(0, m_runs.runCount() - 1);
608 return;
609 }
610
611 m_emptyRun = true;
612
613 m_eor = Iterator();
614
615 m_last = m_current;
616 bool lastLineEnded = false;
617 BidiResolver<Iterator, Run> stateAtEnd;
618
619 while (true) {
620 if (inIsolate() && m_emptyRun) {
621 m_sor = m_current;
622 m_emptyRun = false;
623 }
624
625 if (!lastLineEnded && isEndOfLine(end)) {
626 if (m_emptyRun)
627 break;
628
629 stateAtEnd.m_status = m_status;
630 stateAtEnd.m_sor = m_sor;
631 stateAtEnd.m_eor = m_eor;
632 stateAtEnd.m_last = m_last;
633 stateAtEnd.m_reachedEndOfLine = m_reachedEndOfLine;
634 stateAtEnd.m_lastBeforeET = m_lastBeforeET;
635 stateAtEnd.m_emptyRun = m_emptyRun;
636 m_endOfRunAtEndOfLine = m_last;
637 lastLineEnded = true;
638 }
639 Direction dirCurrent;
640 if (lastLineEnded && (hardLineBreak || m_current.atEnd())) {
641 BidiContext* c = context();
642 if (hardLineBreak) {
643 // A deviation from the Unicode Bidi Algorithm in order to match
644 // WinIE and user expectations: hard line breaks reset bidi state
645 // coming from unicode bidi control characters, but not those from
646 // DOM nodes with specified directionality
647 stateAtEnd.setContext(c->copyStackRemovingUnicodeEmbeddingContexts());
648
649 dirCurrent = stateAtEnd.context()->dir();
650 stateAtEnd.setEorDir(dirCurrent);
651 stateAtEnd.setLastDir(dirCurrent);
652 stateAtEnd.setLastStrongDir(dirCurrent);
653 } else {
654 while (c->parent())
655 c = c->parent();
656 dirCurrent = c->dir();
657 }
658 } else {
659 dirCurrent = m_current.direction();
660 if (context()->override()
661 && dirCurrent != RightToLeftEmbedding
662 && dirCurrent != LeftToRightEmbedding
663 && dirCurrent != RightToLeftOverride
664 && dirCurrent != LeftToRightOverride
665 && dirCurrent != PopDirectionalFormat)
666 dirCurrent = context()->dir();
667 else if (dirCurrent == NonSpacingMark)
668 dirCurrent = m_status.last;
669 }
670
671 // We ignore all character directionality while in unicode-bidi: isolate spans.
672 // We'll handle ordering the isolated characters in a second pass.
673 if (inIsolate())
674 dirCurrent = OtherNeutral;
675
676 ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd());
677 switch (dirCurrent) {
678
679 // embedding and overrides (X1-X9 in the Bidi specs)
680 case RightToLeftEmbedding:
681 case LeftToRightEmbedding:
682 case RightToLeftOverride:
683 case LeftToRightOverride:
684 case PopDirectionalFormat:
685 embed(dirCurrent, FromUnicode);
686 commitExplicitEmbedding();
687 break;
688
689 // strong types
690 case LeftToRight:
691 switch (m_status.last) {
692 case RightToLeft:
693 case RightToLeftArabic:
694 case EuropeanNumber:
695 case ArabicNumber:
696 if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight)
697 appendRun();
698 break;
699 case LeftToRight:
700 break;
701 case EuropeanNumberSeparator:
702 case EuropeanNumberTerminator:
703 case CommonNumberSeparator:
704 case BoundaryNeutral:
705 case BlockSeparator:
706 case SegmentSeparator:
707 case WhiteSpaceNeutral:
708 case OtherNeutral:
709 if (m_status.eor == EuropeanNumber) {
710 if (m_status.lastStrong != LeftToRight) {
711 // the numbers need to be on a higher embedding level, so let's close that run
712 m_direction = EuropeanNumber;
713 appendRun();
714 if (context()->dir() != LeftToRight) {
715 // the neutrals take the embedding direction, which is R
716 m_eor = m_last;
717 m_direction = RightToLeft;
718 appendRun();
719 }
720 }
721 } else if (m_status.eor == ArabicNumber) {
722 // Arabic numbers are always on a higher embedding level, so let's close that run
723 m_direction = ArabicNumber;
724 appendRun();
725 if (context()->dir() != LeftToRight) {
726 // the neutrals take the embedding direction, which is R
727 m_eor = m_last;
728 m_direction = RightToLeft;
729 appendRun();
730 }
731 } else if (m_status.lastStrong != LeftToRight) {
732 // last stuff takes embedding dir
733 if (context()->dir() == RightToLeft) {
734 m_eor = m_last;
735 m_direction = RightToLeft;
736 }
737 appendRun();
738 }
739 default:
740 break;
741 }
742 m_eor = m_current;
743 m_status.eor = LeftToRight;
744 m_status.lastStrong = LeftToRight;
745 m_direction = LeftToRight;
746 break;
747 case RightToLeftArabic:
748 case RightToLeft:
749 switch (m_status.last) {
750 case LeftToRight:
751 case EuropeanNumber:
752 case ArabicNumber:
753 appendRun();
754 case RightToLeft:
755 case RightToLeftArabic:
756 break;
757 case EuropeanNumberSeparator:
758 case EuropeanNumberTerminator:
759 case CommonNumberSeparator:
760 case BoundaryNeutral:
761 case BlockSeparator:
762 case SegmentSeparator:
763 case WhiteSpaceNeutral:
764 case OtherNeutral:
765 if (m_status.eor == EuropeanNumber) {
766 if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight)
767 m_eor = m_last;
768 appendRun();
769 } else if (m_status.eor == ArabicNumber) {
770 appendRun();
771 } else if (m_status.lastStrong == LeftToRight) {
772 if (context()->dir() == LeftToRight)
773 m_eor = m_last;
774 appendRun();
775 }
776 default:
777 break;
778 }
779 m_eor = m_current;
780 m_status.eor = RightToLeft;
781 m_status.lastStrong = dirCurrent;
782 m_direction = RightToLeft;
783 break;
784
785 // weak types:
786
787 case EuropeanNumber:
788 if (m_status.lastStrong != RightToLeftArabic) {
789 // if last strong was AL change EN to AN
790 switch (m_status.last) {
791 case EuropeanNumber:
792 case LeftToRight:
793 break;
794 case RightToLeft:
795 case RightToLeftArabic:
796 case ArabicNumber:
797 m_eor = m_last;
798 appendRun();
799 m_direction = EuropeanNumber;
800 break;
801 case EuropeanNumberSeparator:
802 case CommonNumberSeparator:
803 if (m_status.eor == EuropeanNumber)
804 break;
805 case EuropeanNumberTerminator:
806 case BoundaryNeutral:
807 case BlockSeparator:
808 case SegmentSeparator:
809 case WhiteSpaceNeutral:
810 case OtherNeutral:
811 if (m_status.eor == EuropeanNumber) {
812 if (m_status.lastStrong == RightToLeft) {
813 // ENs on both sides behave like Rs, so the neutrals should be R.
814 // Terminate the EN run.
815 appendRun();
816 // Make an R run.
817 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
818 m_direction = RightToLeft;
819 appendRun();
820 // Begin a new EN run.
821 m_direction = EuropeanNumber;
822 }
823 } else if (m_status.eor == ArabicNumber) {
824 // Terminate the AN run.
825 appendRun();
826 if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) {
827 // Make an R run.
828 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
829 m_direction = RightToLeft;
830 appendRun();
831 // Begin a new EN run.
832 m_direction = EuropeanNumber;
833 }
834 } else if (m_status.lastStrong == RightToLeft) {
835 // Extend the R run to include the neutrals.
836 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
837 m_direction = RightToLeft;
838 appendRun();
839 // Begin a new EN run.
840 m_direction = EuropeanNumber;
841 }
842 default:
843 break;
844 }
845 m_eor = m_current;
846 m_status.eor = EuropeanNumber;
847 if (m_direction == OtherNeutral)
848 m_direction = LeftToRight;
849 break;
850 }
851 case ArabicNumber:
852 dirCurrent = ArabicNumber;
853 switch (m_status.last) {
854 case LeftToRight:
855 if (context()->dir() == LeftToRight)
856 appendRun();
857 break;
858 case ArabicNumber:
859 break;
860 case RightToLeft:
861 case RightToLeftArabic:
862 case EuropeanNumber:
863 m_eor = m_last;
864 appendRun();
865 break;
866 case CommonNumberSeparator:
867 if (m_status.eor == ArabicNumber)
868 break;
869 case EuropeanNumberSeparator:
870 case EuropeanNumberTerminator:
871 case BoundaryNeutral:
872 case BlockSeparator:
873 case SegmentSeparator:
874 case WhiteSpaceNeutral:
875 case OtherNeutral:
876 if (m_status.eor == ArabicNumber
877 || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft))
878 || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) {
879 // Terminate the run before the neutrals.
880 appendRun();
881 // Begin an R run for the neutrals.
882 m_direction = RightToLeft;
883 } else if (m_direction == OtherNeutral) {
884 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
885 }
886 m_eor = m_last;
887 appendRun();
888 default:
889 break;
890 }
891 m_eor = m_current;
892 m_status.eor = ArabicNumber;
893 if (m_direction == OtherNeutral)
894 m_direction = ArabicNumber;
895 break;
896 case EuropeanNumberSeparator:
897 case CommonNumberSeparator:
898 break;
899 case EuropeanNumberTerminator:
900 if (m_status.last == EuropeanNumber) {
901 dirCurrent = EuropeanNumber;
902 m_eor = m_current;
903 m_status.eor = dirCurrent;
904 } else if (m_status.last != EuropeanNumberTerminator) {
905 m_lastBeforeET = m_emptyRun ? m_eor : m_last;
906 }
907 break;
908
909 // boundary neutrals should be ignored
910 case BoundaryNeutral:
911 if (m_eor == m_last)
912 m_eor = m_current;
913 break;
914 // neutrals
915 case BlockSeparator:
916 // ### what do we do with newline and paragraph seperators that come to here?
917 break;
918 case SegmentSeparator:
919 // ### implement rule L1
920 break;
921 case WhiteSpaceNeutral:
922 break;
923 case OtherNeutral:
924 break;
925 default:
926 break;
927 }
928
929 if (lastLineEnded && m_eor == m_current) {
930 if (!m_reachedEndOfLine) {
931 m_eor = m_endOfRunAtEndOfLine;
932 switch (m_status.eor) {
933 case LeftToRight:
934 case RightToLeft:
935 case ArabicNumber:
936 m_direction = m_status.eor;
937 break;
938 case EuropeanNumber:
939 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber;
940 break;
941 default:
942 ASSERT_NOT_REACHED();
943 }
944 appendRun();
945 }
946 m_current = end;
947 m_status = stateAtEnd.m_status;
948 m_sor = stateAtEnd.m_sor;
949 m_eor = stateAtEnd.m_eor;
950 m_last = stateAtEnd.m_last;
951 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
952 m_lastBeforeET = stateAtEnd.m_lastBeforeET;
953 m_emptyRun = stateAtEnd.m_emptyRun;
954 m_direction = OtherNeutral;
955 break;
956 }
957
958 updateStatusLastFromCurrentDirection(dirCurrent);
959 m_last = m_current;
960
961 if (m_emptyRun) {
962 m_sor = m_current;
963 m_emptyRun = false;
964 }
965
966 increment();
967 if (!m_currentExplicitEmbeddingSequence.isEmpty()) {
968 bool committed = commitExplicitEmbedding();
969 if (committed && lastLineEnded) {
970 m_current = end;
971 m_status = stateAtEnd.m_status;
972 m_sor = stateAtEnd.m_sor;
973 m_eor = stateAtEnd.m_eor;
974 m_last = stateAtEnd.m_last;
975 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
976 m_lastBeforeET = stateAtEnd.m_lastBeforeET;
977 m_emptyRun = stateAtEnd.m_emptyRun;
978 m_direction = OtherNeutral;
979 break;
980 }
981 }
982 }
983
984 m_runs.setLogicallyLastRun(m_runs.lastRun());
985 reorderRunsFromLevels();
986 m_endOfRunAtEndOfLine = Iterator();
987 m_endOfLine = Iterator();
988 }
989
990 template <class Iterator, class Run>
setMidpointStateForIsolatedRun(Run * run,const MidpointState<Iterator> & midpoint)991 void BidiResolver<Iterator, Run>::setMidpointStateForIsolatedRun(Run* run, const MidpointState<Iterator>& midpoint)
992 {
993 ASSERT(!m_midpointStateForIsolatedRun.contains(run));
994 m_midpointStateForIsolatedRun.add(run, midpoint);
995 }
996
997 template<class Iterator, class Run>
midpointStateForIsolatedRun(Run * run)998 MidpointState<Iterator> BidiResolver<Iterator, Run>::midpointStateForIsolatedRun(Run* run)
999 {
1000 return m_midpointStateForIsolatedRun.take(run);
1001 }
1002
1003
1004 } // namespace WebCore
1005
1006 #endif // BidiResolver_h
1007