• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
3  * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc.  All right reserved.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public License
16  * along with this library; see the file COPYING.LIB.  If not, write to
17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  *
20  */
21 
22 #ifndef BidiResolver_h
23 #define BidiResolver_h
24 
25 #include "platform/text/BidiContext.h"
26 #include "platform/text/BidiRunList.h"
27 #include "platform/text/TextDirection.h"
28 #include "wtf/HashMap.h"
29 #include "wtf/Noncopyable.h"
30 #include "wtf/PassRefPtr.h"
31 #include "wtf/Vector.h"
32 
33 namespace WebCore {
34 
35 template <class Iterator> struct MidpointState {
MidpointStateMidpointState36     MidpointState()
37     {
38         reset();
39     }
40 
resetMidpointState41     void reset()
42     {
43         numMidpoints = 0;
44         currentMidpoint = 0;
45         betweenMidpoints = false;
46     }
47 
48     // The goal is to reuse the line state across multiple
49     // lines so we just keep an array around for midpoints and never clear it across multiple
50     // lines. We track the number of items and position using the two other variables.
51     Vector<Iterator> midpoints;
52     unsigned numMidpoints;
53     unsigned currentMidpoint;
54     bool betweenMidpoints;
55 };
56 
57 // The BidiStatus at a given position (typically the end of a line) can
58 // be cached and then used to restart bidi resolution at that position.
59 struct BidiStatus {
BidiStatusBidiStatus60     BidiStatus()
61         : eor(WTF::Unicode::OtherNeutral)
62         , lastStrong(WTF::Unicode::OtherNeutral)
63         , last(WTF::Unicode::OtherNeutral)
64     {
65     }
66 
67     // Creates a BidiStatus representing a new paragraph root with a default direction.
68     // Uses TextDirection as it only has two possibilities instead of WTF::Unicode::Direction which has 19.
BidiStatusBidiStatus69     BidiStatus(TextDirection textDirection, bool isOverride)
70     {
71         WTF::Unicode::Direction direction = textDirection == LTR ? WTF::Unicode::LeftToRight : WTF::Unicode::RightToLeft;
72         eor = lastStrong = last = direction;
73         context = BidiContext::create(textDirection == LTR ? 0 : 1, direction, isOverride);
74     }
75 
BidiStatusBidiStatus76     BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext)
77         : eor(eorDir)
78         , lastStrong(lastStrongDir)
79         , last(lastDir)
80         , context(bidiContext)
81     {
82     }
83 
84     WTF::Unicode::Direction eor;
85     WTF::Unicode::Direction lastStrong;
86     WTF::Unicode::Direction last;
87     RefPtr<BidiContext> context;
88 };
89 
90 class BidiEmbedding {
91 public:
BidiEmbedding(WTF::Unicode::Direction direction,BidiEmbeddingSource source)92     BidiEmbedding(WTF::Unicode::Direction direction, BidiEmbeddingSource source)
93     : m_direction(direction)
94     , m_source(source)
95     {
96     }
97 
direction()98     WTF::Unicode::Direction direction() const { return m_direction; }
source()99     BidiEmbeddingSource source() const { return m_source; }
100 private:
101     WTF::Unicode::Direction m_direction;
102     BidiEmbeddingSource m_source;
103 };
104 
105 inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
106 {
107     return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
108 }
109 
110 inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
111 {
112     return !(status1 == status2);
113 }
114 
115 struct BidiCharacterRun {
BidiCharacterRunBidiCharacterRun116     BidiCharacterRun(int start, int stop, BidiContext* context, WTF::Unicode::Direction dir)
117         : m_override(context->override())
118         , m_next(0)
119         , m_start(start)
120         , m_stop(stop)
121     {
122         ASSERT(m_start <= m_stop);
123         if (dir == WTF::Unicode::OtherNeutral)
124             dir = context->dir();
125 
126         m_level = context->level();
127 
128         // add level of run (cases I1 & I2)
129         if (m_level % 2) {
130             if (dir == WTF::Unicode::LeftToRight || dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
131                 m_level++;
132         } else {
133             if (dir == WTF::Unicode::RightToLeft)
134                 m_level++;
135             else if (dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
136                 m_level += 2;
137         }
138     }
139 
startBidiCharacterRun140     int start() const { return m_start; }
stopBidiCharacterRun141     int stop() const { return m_stop; }
levelBidiCharacterRun142     unsigned char level() const { return m_level; }
reversedBidiCharacterRun143     bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
dirOverrideBidiCharacterRun144     bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
145 
nextBidiCharacterRun146     BidiCharacterRun* next() const { return m_next; }
setNextBidiCharacterRun147     void setNext(BidiCharacterRun* next) { m_next = next; }
148 
149     // Do not add anything apart from bitfields until after m_next. See https://bugs.webkit.org/show_bug.cgi?id=100173
150     bool m_override : 1;
151     bool m_hasHyphen : 1; // Used by BidiRun subclass which is a layering violation but enables us to save 8 bytes per object on 64-bit.
152     bool m_startsSegment : 1; // Same comment as m_hasHyphen.
153     unsigned char m_level;
154     BidiCharacterRun* m_next;
155     int m_start;
156     int m_stop;
157 };
158 
159 enum VisualDirectionOverride {
160     NoVisualOverride,
161     VisualLeftToRightOverride,
162     VisualRightToLeftOverride
163 };
164 
165 // BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm
166 // http://unicode.org/reports/tr9
167 template <class Iterator, class Run> class BidiResolver {
168     WTF_MAKE_NONCOPYABLE(BidiResolver);
169 public:
BidiResolver()170     BidiResolver()
171         : m_direction(WTF::Unicode::OtherNeutral)
172         , m_reachedEndOfLine(false)
173         , m_emptyRun(true)
174         , m_nestedIsolateCount(0)
175     {
176     }
177 
178 #ifndef NDEBUG
179     ~BidiResolver();
180 #endif
181 
position()182     const Iterator& position() const { return m_current; }
position()183     Iterator& position() { return m_current; }
setPositionIgnoringNestedIsolates(const Iterator & position)184     void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; }
setPosition(const Iterator & position,unsigned nestedIsolatedCount)185     void setPosition(const Iterator& position, unsigned nestedIsolatedCount)
186     {
187         m_current = position;
188         m_nestedIsolateCount = nestedIsolatedCount;
189     }
190 
context()191     BidiContext* context() const { return m_status.context.get(); }
setContext(PassRefPtr<BidiContext> c)192     void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
193 
setLastDir(WTF::Unicode::Direction lastDir)194     void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; }
setLastStrongDir(WTF::Unicode::Direction lastStrongDir)195     void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; }
setEorDir(WTF::Unicode::Direction eorDir)196     void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; }
197 
dir()198     WTF::Unicode::Direction dir() const { return m_direction; }
setDir(WTF::Unicode::Direction d)199     void setDir(WTF::Unicode::Direction d) { m_direction = d; }
200 
status()201     const BidiStatus& status() const { return m_status; }
setStatus(const BidiStatus s)202     void setStatus(const BidiStatus s)
203     {
204         ASSERT(s.context);
205         m_status = s;
206     }
207 
midpointState()208     MidpointState<Iterator>& midpointState() { return m_midpointState; }
209 
210     // The current algorithm handles nested isolates one layer of nesting at a time.
211     // But when we layout each isolated span, we will walk into (and ignore) all
212     // child isolated spans.
enterIsolate()213     void enterIsolate() { m_nestedIsolateCount++; }
exitIsolate()214     void exitIsolate() { ASSERT(m_nestedIsolateCount >= 1); m_nestedIsolateCount--; }
inIsolate()215     bool inIsolate() const { return m_nestedIsolateCount; }
216 
217     void embed(WTF::Unicode::Direction, BidiEmbeddingSource);
218     bool commitExplicitEmbedding();
219 
220     void createBidiRunsForLine(const Iterator& end, VisualDirectionOverride = NoVisualOverride, bool hardLineBreak = false);
221 
runs()222     BidiRunList<Run>& runs() { return m_runs; }
223 
224     // FIXME: This used to be part of deleteRuns() but was a layering violation.
225     // It's unclear if this is still needed.
markCurrentRunEmpty()226     void markCurrentRunEmpty() { m_emptyRun = true; }
227 
isolatedRuns()228     Vector<Run*>& isolatedRuns() { return m_isolatedRuns; }
229 
isEndOfLine(const Iterator & end)230     bool isEndOfLine(const Iterator& end) { return m_current == end || m_current.atEnd(); }
231 
232     TextDirection determineParagraphDirectionality(bool* hasStrongDirectionality = 0);
233 
234     void setMidpointStateForIsolatedRun(Run*, const MidpointState<Iterator>&);
235     MidpointState<Iterator> midpointStateForIsolatedRun(Run*);
236 
endOfLine()237     Iterator endOfLine() const { return m_endOfLine; }
238 
239 protected:
increment()240     void increment() { m_current.increment(); }
241     // FIXME: Instead of InlineBidiResolvers subclassing this method, we should
242     // pass in some sort of Traits object which knows how to create runs for appending.
243     void appendRun();
244 
245     Iterator m_current;
246     // sor and eor are "start of run" and "end of run" respectively and correpond
247     // to abreviations used in UBA spec: http://unicode.org/reports/tr9/#BD7
248     Iterator m_sor; // Points to the first character in the current run.
249     Iterator m_eor; // Points to the last character in the current run.
250     Iterator m_last;
251     BidiStatus m_status;
252     WTF::Unicode::Direction m_direction;
253     // m_endOfRunAtEndOfLine is "the position last eor in the end of line"
254     Iterator m_endOfRunAtEndOfLine;
255     Iterator m_endOfLine;
256     bool m_reachedEndOfLine;
257     Iterator m_lastBeforeET; // Before a EuropeanNumberTerminator
258     bool m_emptyRun;
259 
260     // FIXME: This should not belong to the resolver, but rather be passed
261     // into createBidiRunsForLine by the caller.
262     BidiRunList<Run> m_runs;
263 
264     MidpointState<Iterator> m_midpointState;
265 
266     unsigned m_nestedIsolateCount;
267     Vector<Run*> m_isolatedRuns;
268 
269 private:
270     void raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to);
271     void lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from);
272     void checkDirectionInLowerRaiseEmbeddingLevel();
273 
274     void updateStatusLastFromCurrentDirection(WTF::Unicode::Direction);
275     void reorderRunsFromLevels();
276 
277     Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence;
278     HashMap<Run *, MidpointState<Iterator> > m_midpointStateForIsolatedRun;
279 };
280 
281 #ifndef NDEBUG
282 template <class Iterator, class Run>
~BidiResolver()283 BidiResolver<Iterator, Run>::~BidiResolver()
284 {
285     // The owner of this resolver should have handled the isolated runs.
286     ASSERT(m_isolatedRuns.isEmpty());
287 }
288 #endif
289 
290 template <class Iterator, class Run>
appendRun()291 void BidiResolver<Iterator, Run>::appendRun()
292 {
293     if (!m_emptyRun && !m_eor.atEnd()) {
294         unsigned startOffset = m_sor.offset();
295         unsigned endOffset = m_eor.offset();
296 
297         if (!m_endOfRunAtEndOfLine.atEnd() && endOffset >= m_endOfRunAtEndOfLine.offset()) {
298             m_reachedEndOfLine = true;
299             endOffset = m_endOfRunAtEndOfLine.offset();
300         }
301 
302         if (endOffset >= startOffset)
303             m_runs.addRun(new Run(startOffset, endOffset + 1, context(), m_direction));
304 
305         m_eor.increment();
306         m_sor = m_eor;
307     }
308 
309     m_direction = WTF::Unicode::OtherNeutral;
310     m_status.eor = WTF::Unicode::OtherNeutral;
311 }
312 
313 template <class Iterator, class Run>
embed(WTF::Unicode::Direction dir,BidiEmbeddingSource source)314 void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction dir, BidiEmbeddingSource source)
315 {
316     // Isolated spans compute base directionality during their own UBA run.
317     // Do not insert fake embed characters once we enter an isolated span.
318     ASSERT(!inIsolate());
319     using namespace WTF::Unicode;
320 
321     ASSERT(dir == PopDirectionalFormat || dir == LeftToRightEmbedding || dir == LeftToRightOverride || dir == RightToLeftEmbedding || dir == RightToLeftOverride);
322     m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source));
323 }
324 
325 template <class Iterator, class Run>
checkDirectionInLowerRaiseEmbeddingLevel()326 void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel()
327 {
328     using namespace WTF::Unicode;
329 
330     ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd());
331     ASSERT(m_status.last != NonSpacingMark
332         && m_status.last != BoundaryNeutral
333         && m_status.last != RightToLeftEmbedding
334         && m_status.last != LeftToRightEmbedding
335         && m_status.last != RightToLeftOverride
336         && m_status.last != LeftToRightOverride
337         && m_status.last != PopDirectionalFormat);
338     if (m_direction == OtherNeutral)
339         m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
340 }
341 
342 template <class Iterator, class Run>
lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from)343 void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from)
344 {
345     using namespace WTF::Unicode;
346 
347     if (!m_emptyRun && m_eor != m_last) {
348         checkDirectionInLowerRaiseEmbeddingLevel();
349         // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
350         if (from == LeftToRight) {
351             // bidi.sor ... bidi.eor ... bidi.last L
352             if (m_status.eor == EuropeanNumber) {
353                 if (m_status.lastStrong != LeftToRight) {
354                     m_direction = EuropeanNumber;
355                     appendRun();
356                 }
357             } else if (m_status.eor == ArabicNumber) {
358                 m_direction = ArabicNumber;
359                 appendRun();
360             } else if (m_status.lastStrong != LeftToRight) {
361                 appendRun();
362                 m_direction = LeftToRight;
363             }
364         } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
365             appendRun();
366             m_direction = RightToLeft;
367         }
368         m_eor = m_last;
369     }
370 
371     appendRun();
372     m_emptyRun = true;
373 
374     // sor for the new run is determined by the higher level (rule X10)
375     setLastDir(from);
376     setLastStrongDir(from);
377     m_eor = Iterator();
378 }
379 
380 template <class Iterator, class Run>
raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from,WTF::Unicode::Direction to)381 void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to)
382 {
383     using namespace WTF::Unicode;
384 
385     if (!m_emptyRun && m_eor != m_last) {
386         checkDirectionInLowerRaiseEmbeddingLevel();
387         // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
388         if (to == LeftToRight) {
389             // bidi.sor ... bidi.eor ... bidi.last L
390             if (m_status.eor == EuropeanNumber) {
391                 if (m_status.lastStrong != LeftToRight) {
392                     m_direction = EuropeanNumber;
393                     appendRun();
394                 }
395             } else if (m_status.eor == ArabicNumber) {
396                 m_direction = ArabicNumber;
397                 appendRun();
398             } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) {
399                 appendRun();
400                 m_direction = LeftToRight;
401             }
402         } else if (m_status.eor == ArabicNumber
403             || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft))
404             || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) {
405             appendRun();
406             m_direction = RightToLeft;
407         }
408         m_eor = m_last;
409     }
410 
411     appendRun();
412     m_emptyRun = true;
413 
414     setLastDir(to);
415     setLastStrongDir(to);
416     m_eor = Iterator();
417 }
418 
419 template <class Iterator, class Run>
commitExplicitEmbedding()420 bool BidiResolver<Iterator, Run>::commitExplicitEmbedding()
421 {
422     // When we're "inIsolate()" we're resolving the parent context which
423     // ignores (skips over) the isolated content, including embedding levels.
424     // We should never accrue embedding levels while skipping over isolated content.
425     ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty());
426 
427     using namespace WTF::Unicode;
428 
429     unsigned char fromLevel = context()->level();
430     RefPtr<BidiContext> toContext = context();
431 
432     for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) {
433         BidiEmbedding embedding = m_currentExplicitEmbeddingSequence[i];
434         if (embedding.direction() == PopDirectionalFormat) {
435             if (BidiContext* parentContext = toContext->parent())
436                 toContext = parentContext;
437         } else {
438             Direction direction = (embedding.direction() == RightToLeftEmbedding || embedding.direction() == RightToLeftOverride) ? RightToLeft : LeftToRight;
439             bool override = embedding.direction() == LeftToRightOverride || embedding.direction() == RightToLeftOverride;
440             unsigned char level = toContext->level();
441             if (direction == RightToLeft)
442                 level = nextGreaterOddLevel(level);
443             else
444                 level = nextGreaterEvenLevel(level);
445             if (level < BidiContext::kMaxLevel)
446                 toContext = BidiContext::create(level, direction, override, embedding.source(), toContext.get());
447         }
448     }
449 
450     unsigned char toLevel = toContext->level();
451 
452     if (toLevel > fromLevel)
453         raiseExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight);
454     else if (toLevel < fromLevel)
455         lowerExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight);
456 
457     setContext(toContext);
458 
459     m_currentExplicitEmbeddingSequence.clear();
460 
461     return fromLevel != toLevel;
462 }
463 
464 template <class Iterator, class Run>
updateStatusLastFromCurrentDirection(WTF::Unicode::Direction dirCurrent)465 inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(WTF::Unicode::Direction dirCurrent)
466 {
467     using namespace WTF::Unicode;
468     switch (dirCurrent) {
469     case EuropeanNumberTerminator:
470         if (m_status.last != EuropeanNumber)
471             m_status.last = EuropeanNumberTerminator;
472         break;
473     case EuropeanNumberSeparator:
474     case CommonNumberSeparator:
475     case SegmentSeparator:
476     case WhiteSpaceNeutral:
477     case OtherNeutral:
478         switch (m_status.last) {
479         case LeftToRight:
480         case RightToLeft:
481         case RightToLeftArabic:
482         case EuropeanNumber:
483         case ArabicNumber:
484             m_status.last = dirCurrent;
485             break;
486         default:
487             m_status.last = OtherNeutral;
488         }
489         break;
490     case NonSpacingMark:
491     case BoundaryNeutral:
492     case RightToLeftEmbedding:
493     case LeftToRightEmbedding:
494     case RightToLeftOverride:
495     case LeftToRightOverride:
496     case PopDirectionalFormat:
497         // ignore these
498         break;
499     case EuropeanNumber:
500         // fall through
501     default:
502         m_status.last = dirCurrent;
503     }
504 }
505 
506 template <class Iterator, class Run>
reorderRunsFromLevels()507 inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels()
508 {
509     unsigned char levelLow = BidiContext::kMaxLevel;
510     unsigned char levelHigh = 0;
511     for (Run* run = m_runs.firstRun(); run; run = run->next()) {
512         levelHigh = std::max(run->level(), levelHigh);
513         levelLow = std::min(run->level(), levelLow);
514     }
515 
516     // This implements reordering of the line (L2 according to Bidi spec):
517     // http://unicode.org/reports/tr9/#L2
518     // L2. From the highest level found in the text to the lowest odd level on each line,
519     // reverse any contiguous sequence of characters that are at that level or higher.
520 
521     // Reversing is only done up to the lowest odd level.
522     if (!(levelLow % 2))
523         levelLow++;
524 
525     unsigned count = m_runs.runCount() - 1;
526 
527     while (levelHigh >= levelLow) {
528         unsigned i = 0;
529         Run* run = m_runs.firstRun();
530         while (i < count) {
531             for (;i < count && run && run->level() < levelHigh; i++)
532                 run = run->next();
533             unsigned start = i;
534             for (;i <= count && run && run->level() >= levelHigh; i++)
535                 run = run->next();
536             unsigned end = i - 1;
537             m_runs.reverseRuns(start, end);
538         }
539         levelHigh--;
540     }
541 }
542 
543 template <class Iterator, class Run>
determineParagraphDirectionality(bool * hasStrongDirectionality)544 TextDirection BidiResolver<Iterator, Run>::determineParagraphDirectionality(bool* hasStrongDirectionality)
545 {
546     while (!m_current.atEnd()) {
547         if (inIsolate()) {
548             increment();
549             continue;
550         }
551         if (m_current.atParagraphSeparator())
552             break;
553         UChar32 current = m_current.current();
554         if (UNLIKELY(U16_IS_SURROGATE(current))) {
555             increment();
556             // If this not the high part of the surrogate pair, then drop it and move to the next.
557             if (!U16_IS_SURROGATE_LEAD(current))
558                 continue;
559             UChar high = static_cast<UChar>(current);
560             if (m_current.atEnd())
561                 continue;
562             UChar low = m_current.current();
563             // Verify the low part. If invalid, then assume an invalid surrogate pair and retry.
564             if (!U16_IS_TRAIL(low))
565                 continue;
566             current = U16_GET_SUPPLEMENTARY(high, low);
567         }
568         WTF::Unicode::Direction charDirection = WTF::Unicode::direction(current);
569         if (charDirection == WTF::Unicode::LeftToRight) {
570             if (hasStrongDirectionality)
571                 *hasStrongDirectionality = true;
572             return LTR;
573         }
574         if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) {
575             if (hasStrongDirectionality)
576                 *hasStrongDirectionality = true;
577             return RTL;
578         }
579         increment();
580     }
581     if (hasStrongDirectionality)
582         *hasStrongDirectionality = false;
583     return LTR;
584 }
585 
586 template <class Iterator, class Run>
createBidiRunsForLine(const Iterator & end,VisualDirectionOverride override,bool hardLineBreak)587 void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak)
588 {
589     using namespace WTF::Unicode;
590 
591     ASSERT(m_direction == OtherNeutral);
592 
593     m_endOfLine = end;
594 
595     if (override != NoVisualOverride) {
596         m_emptyRun = false;
597         m_sor = m_current;
598         m_eor = Iterator();
599         while (m_current != end && !m_current.atEnd()) {
600             m_eor = m_current;
601             increment();
602         }
603         m_direction = override == VisualLeftToRightOverride ? LeftToRight : RightToLeft;
604         appendRun();
605         m_runs.setLogicallyLastRun(m_runs.lastRun());
606         if (override == VisualRightToLeftOverride && m_runs.runCount())
607             m_runs.reverseRuns(0, m_runs.runCount() - 1);
608         return;
609     }
610 
611     m_emptyRun = true;
612 
613     m_eor = Iterator();
614 
615     m_last = m_current;
616     bool lastLineEnded = false;
617     BidiResolver<Iterator, Run> stateAtEnd;
618 
619     while (true) {
620         if (inIsolate() && m_emptyRun) {
621             m_sor = m_current;
622             m_emptyRun = false;
623         }
624 
625         if (!lastLineEnded && isEndOfLine(end)) {
626             if (m_emptyRun)
627                 break;
628 
629             stateAtEnd.m_status = m_status;
630             stateAtEnd.m_sor = m_sor;
631             stateAtEnd.m_eor = m_eor;
632             stateAtEnd.m_last = m_last;
633             stateAtEnd.m_reachedEndOfLine = m_reachedEndOfLine;
634             stateAtEnd.m_lastBeforeET = m_lastBeforeET;
635             stateAtEnd.m_emptyRun = m_emptyRun;
636             m_endOfRunAtEndOfLine = m_last;
637             lastLineEnded = true;
638         }
639         Direction dirCurrent;
640         if (lastLineEnded && (hardLineBreak || m_current.atEnd())) {
641             BidiContext* c = context();
642             if (hardLineBreak) {
643                 // A deviation from the Unicode Bidi Algorithm in order to match
644                 // WinIE and user expectations: hard line breaks reset bidi state
645                 // coming from unicode bidi control characters, but not those from
646                 // DOM nodes with specified directionality
647                 stateAtEnd.setContext(c->copyStackRemovingUnicodeEmbeddingContexts());
648 
649                 dirCurrent = stateAtEnd.context()->dir();
650                 stateAtEnd.setEorDir(dirCurrent);
651                 stateAtEnd.setLastDir(dirCurrent);
652                 stateAtEnd.setLastStrongDir(dirCurrent);
653             } else {
654                 while (c->parent())
655                     c = c->parent();
656                 dirCurrent = c->dir();
657             }
658         } else {
659             dirCurrent = m_current.direction();
660             if (context()->override()
661                 && dirCurrent != RightToLeftEmbedding
662                 && dirCurrent != LeftToRightEmbedding
663                 && dirCurrent != RightToLeftOverride
664                 && dirCurrent != LeftToRightOverride
665                 && dirCurrent != PopDirectionalFormat)
666                 dirCurrent = context()->dir();
667             else if (dirCurrent == NonSpacingMark)
668                 dirCurrent = m_status.last;
669         }
670 
671         // We ignore all character directionality while in unicode-bidi: isolate spans.
672         // We'll handle ordering the isolated characters in a second pass.
673         if (inIsolate())
674             dirCurrent = OtherNeutral;
675 
676         ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd());
677         switch (dirCurrent) {
678 
679         // embedding and overrides (X1-X9 in the Bidi specs)
680         case RightToLeftEmbedding:
681         case LeftToRightEmbedding:
682         case RightToLeftOverride:
683         case LeftToRightOverride:
684         case PopDirectionalFormat:
685             embed(dirCurrent, FromUnicode);
686             commitExplicitEmbedding();
687             break;
688 
689         // strong types
690         case LeftToRight:
691             switch (m_status.last) {
692             case RightToLeft:
693             case RightToLeftArabic:
694             case EuropeanNumber:
695             case ArabicNumber:
696                 if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight)
697                     appendRun();
698                 break;
699             case LeftToRight:
700                 break;
701             case EuropeanNumberSeparator:
702             case EuropeanNumberTerminator:
703             case CommonNumberSeparator:
704             case BoundaryNeutral:
705             case BlockSeparator:
706             case SegmentSeparator:
707             case WhiteSpaceNeutral:
708             case OtherNeutral:
709                 if (m_status.eor == EuropeanNumber) {
710                     if (m_status.lastStrong != LeftToRight) {
711                         // the numbers need to be on a higher embedding level, so let's close that run
712                         m_direction = EuropeanNumber;
713                         appendRun();
714                         if (context()->dir() != LeftToRight) {
715                             // the neutrals take the embedding direction, which is R
716                             m_eor = m_last;
717                             m_direction = RightToLeft;
718                             appendRun();
719                         }
720                     }
721                 } else if (m_status.eor == ArabicNumber) {
722                     // Arabic numbers are always on a higher embedding level, so let's close that run
723                     m_direction = ArabicNumber;
724                     appendRun();
725                     if (context()->dir() != LeftToRight) {
726                         // the neutrals take the embedding direction, which is R
727                         m_eor = m_last;
728                         m_direction = RightToLeft;
729                         appendRun();
730                     }
731                 } else if (m_status.lastStrong != LeftToRight) {
732                     // last stuff takes embedding dir
733                     if (context()->dir() == RightToLeft) {
734                         m_eor = m_last;
735                         m_direction = RightToLeft;
736                     }
737                     appendRun();
738                 }
739             default:
740                 break;
741             }
742             m_eor = m_current;
743             m_status.eor = LeftToRight;
744             m_status.lastStrong = LeftToRight;
745             m_direction = LeftToRight;
746             break;
747         case RightToLeftArabic:
748         case RightToLeft:
749             switch (m_status.last) {
750             case LeftToRight:
751             case EuropeanNumber:
752             case ArabicNumber:
753                 appendRun();
754             case RightToLeft:
755             case RightToLeftArabic:
756                 break;
757             case EuropeanNumberSeparator:
758             case EuropeanNumberTerminator:
759             case CommonNumberSeparator:
760             case BoundaryNeutral:
761             case BlockSeparator:
762             case SegmentSeparator:
763             case WhiteSpaceNeutral:
764             case OtherNeutral:
765                 if (m_status.eor == EuropeanNumber) {
766                     if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight)
767                         m_eor = m_last;
768                     appendRun();
769                 } else if (m_status.eor == ArabicNumber) {
770                     appendRun();
771                 } else if (m_status.lastStrong == LeftToRight) {
772                     if (context()->dir() == LeftToRight)
773                         m_eor = m_last;
774                     appendRun();
775                 }
776             default:
777                 break;
778             }
779             m_eor = m_current;
780             m_status.eor = RightToLeft;
781             m_status.lastStrong = dirCurrent;
782             m_direction = RightToLeft;
783             break;
784 
785             // weak types:
786 
787         case EuropeanNumber:
788             if (m_status.lastStrong != RightToLeftArabic) {
789                 // if last strong was AL change EN to AN
790                 switch (m_status.last) {
791                 case EuropeanNumber:
792                 case LeftToRight:
793                     break;
794                 case RightToLeft:
795                 case RightToLeftArabic:
796                 case ArabicNumber:
797                     m_eor = m_last;
798                     appendRun();
799                     m_direction = EuropeanNumber;
800                     break;
801                 case EuropeanNumberSeparator:
802                 case CommonNumberSeparator:
803                     if (m_status.eor == EuropeanNumber)
804                         break;
805                 case EuropeanNumberTerminator:
806                 case BoundaryNeutral:
807                 case BlockSeparator:
808                 case SegmentSeparator:
809                 case WhiteSpaceNeutral:
810                 case OtherNeutral:
811                     if (m_status.eor == EuropeanNumber) {
812                         if (m_status.lastStrong == RightToLeft) {
813                             // ENs on both sides behave like Rs, so the neutrals should be R.
814                             // Terminate the EN run.
815                             appendRun();
816                             // Make an R run.
817                             m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
818                             m_direction = RightToLeft;
819                             appendRun();
820                             // Begin a new EN run.
821                             m_direction = EuropeanNumber;
822                         }
823                     } else if (m_status.eor == ArabicNumber) {
824                         // Terminate the AN run.
825                         appendRun();
826                         if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) {
827                             // Make an R run.
828                             m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
829                             m_direction = RightToLeft;
830                             appendRun();
831                             // Begin a new EN run.
832                             m_direction = EuropeanNumber;
833                         }
834                     } else if (m_status.lastStrong == RightToLeft) {
835                         // Extend the R run to include the neutrals.
836                         m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
837                         m_direction = RightToLeft;
838                         appendRun();
839                         // Begin a new EN run.
840                         m_direction = EuropeanNumber;
841                     }
842                 default:
843                     break;
844                 }
845                 m_eor = m_current;
846                 m_status.eor = EuropeanNumber;
847                 if (m_direction == OtherNeutral)
848                     m_direction = LeftToRight;
849                 break;
850             }
851         case ArabicNumber:
852             dirCurrent = ArabicNumber;
853             switch (m_status.last) {
854             case LeftToRight:
855                 if (context()->dir() == LeftToRight)
856                     appendRun();
857                 break;
858             case ArabicNumber:
859                 break;
860             case RightToLeft:
861             case RightToLeftArabic:
862             case EuropeanNumber:
863                 m_eor = m_last;
864                 appendRun();
865                 break;
866             case CommonNumberSeparator:
867                 if (m_status.eor == ArabicNumber)
868                     break;
869             case EuropeanNumberSeparator:
870             case EuropeanNumberTerminator:
871             case BoundaryNeutral:
872             case BlockSeparator:
873             case SegmentSeparator:
874             case WhiteSpaceNeutral:
875             case OtherNeutral:
876                 if (m_status.eor == ArabicNumber
877                     || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft))
878                     || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) {
879                     // Terminate the run before the neutrals.
880                     appendRun();
881                     // Begin an R run for the neutrals.
882                     m_direction = RightToLeft;
883                 } else if (m_direction == OtherNeutral) {
884                     m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
885                 }
886                 m_eor = m_last;
887                 appendRun();
888             default:
889                 break;
890             }
891             m_eor = m_current;
892             m_status.eor = ArabicNumber;
893             if (m_direction == OtherNeutral)
894                 m_direction = ArabicNumber;
895             break;
896         case EuropeanNumberSeparator:
897         case CommonNumberSeparator:
898             break;
899         case EuropeanNumberTerminator:
900             if (m_status.last == EuropeanNumber) {
901                 dirCurrent = EuropeanNumber;
902                 m_eor = m_current;
903                 m_status.eor = dirCurrent;
904             } else if (m_status.last != EuropeanNumberTerminator) {
905                 m_lastBeforeET = m_emptyRun ? m_eor : m_last;
906             }
907             break;
908 
909         // boundary neutrals should be ignored
910         case BoundaryNeutral:
911             if (m_eor == m_last)
912                 m_eor = m_current;
913             break;
914             // neutrals
915         case BlockSeparator:
916             // ### what do we do with newline and paragraph seperators that come to here?
917             break;
918         case SegmentSeparator:
919             // ### implement rule L1
920             break;
921         case WhiteSpaceNeutral:
922             break;
923         case OtherNeutral:
924             break;
925         default:
926             break;
927         }
928 
929         if (lastLineEnded && m_eor == m_current) {
930             if (!m_reachedEndOfLine) {
931                 m_eor = m_endOfRunAtEndOfLine;
932                 switch (m_status.eor) {
933                 case LeftToRight:
934                 case RightToLeft:
935                 case ArabicNumber:
936                     m_direction = m_status.eor;
937                     break;
938                 case EuropeanNumber:
939                     m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber;
940                     break;
941                 default:
942                     ASSERT_NOT_REACHED();
943                 }
944                 appendRun();
945             }
946             m_current = end;
947             m_status = stateAtEnd.m_status;
948             m_sor = stateAtEnd.m_sor;
949             m_eor = stateAtEnd.m_eor;
950             m_last = stateAtEnd.m_last;
951             m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
952             m_lastBeforeET = stateAtEnd.m_lastBeforeET;
953             m_emptyRun = stateAtEnd.m_emptyRun;
954             m_direction = OtherNeutral;
955             break;
956         }
957 
958         updateStatusLastFromCurrentDirection(dirCurrent);
959         m_last = m_current;
960 
961         if (m_emptyRun) {
962             m_sor = m_current;
963             m_emptyRun = false;
964         }
965 
966         increment();
967         if (!m_currentExplicitEmbeddingSequence.isEmpty()) {
968             bool committed = commitExplicitEmbedding();
969             if (committed && lastLineEnded) {
970                 m_current = end;
971                 m_status = stateAtEnd.m_status;
972                 m_sor = stateAtEnd.m_sor;
973                 m_eor = stateAtEnd.m_eor;
974                 m_last = stateAtEnd.m_last;
975                 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
976                 m_lastBeforeET = stateAtEnd.m_lastBeforeET;
977                 m_emptyRun = stateAtEnd.m_emptyRun;
978                 m_direction = OtherNeutral;
979                 break;
980             }
981         }
982     }
983 
984     m_runs.setLogicallyLastRun(m_runs.lastRun());
985     reorderRunsFromLevels();
986     m_endOfRunAtEndOfLine = Iterator();
987     m_endOfLine = Iterator();
988 }
989 
990 template <class Iterator, class Run>
setMidpointStateForIsolatedRun(Run * run,const MidpointState<Iterator> & midpoint)991 void BidiResolver<Iterator, Run>::setMidpointStateForIsolatedRun(Run* run, const MidpointState<Iterator>& midpoint)
992 {
993     ASSERT(!m_midpointStateForIsolatedRun.contains(run));
994     m_midpointStateForIsolatedRun.add(run, midpoint);
995 }
996 
997 template<class Iterator, class Run>
midpointStateForIsolatedRun(Run * run)998 MidpointState<Iterator> BidiResolver<Iterator, Run>::midpointStateForIsolatedRun(Run* run)
999 {
1000     return m_midpointStateForIsolatedRun.take(run);
1001 }
1002 
1003 
1004 } // namespace WebCore
1005 
1006 #endif // BidiResolver_h
1007