• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Portions Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * Copyright (c) Tor Norbye.
19  *
20  * Licensed under the Apache License, Version 2.0 (the "License");
21  * you may not use this file except in compliance with the License.
22  * You may obtain a copy of the License at
23  *
24  *     http://www.apache.org/licenses/LICENSE-2.0
25  *
26  * Unless required by applicable law or agreed to in writing, software
27  * distributed under the License is distributed on an "AS IS" BASIS,
28  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29  * See the License for the specific language governing permissions and
30  * limitations under the License.
31  */
32 
33 package com.facebook.ktfmt.kdoc
34 
35 import kotlin.math.min
36 
37 class Paragraph(private val task: FormattingTask) {
38   private val options: KDocFormattingOptions
39     get() = task.options
40   var content = StringBuilder()
41   val text
42     get() = content.toString()
43   var prev: Paragraph? = null
44   var next: Paragraph? = null
45 
46   /** If true, this paragraph should be preceded by a blank line. */
47   var separate = false
48 
49   /**
50    * If true, this paragraph is a continuation of the previous paragraph (so should be indented with
51    * the hanging indent, including line 1)
52    */
53   var continuation = false
54 
55   /**
56    * Whether this paragraph is allowed to be empty. Paragraphs are normally merged if this is not
57    * set. This allows the line breaker to call [ParagraphListBuilder.newParagraph] repeatedly
58    * without introducing more than one new paragraph. But for preformatted text we do want to be
59    * able to express repeated blank lines.
60    */
61   var allowEmpty = false
62 
63   /** Is this paragraph preformatted? */
64   var preformatted = false
65 
66   /** Is this paragraph a block paragraph? If so, it must start on its own line. */
67   var block = false
68 
69   /** Is this paragraph specifying a kdoc tag like @param? */
70   var doc = false
71 
72   /**
73    * Is this line quoted? (In the future make this an int such that we can support additional
74    * levels.)
75    */
76   var quoted = false
77 
78   /** Is this line part of a table? */
79   var table = false
80 
81   /** Is this a separator line? */
82   var separator = false
83 
84   /** Should this paragraph use a hanging indent? (Implies [block] as well). */
85   var hanging = false
86     set(value) {
87       block = true
88       field = value
89     }
90 
91   var originalIndent = 0
92 
93   // The indent to use for all lines in the paragraph.
94   var indent = ""
95 
96   // The indent to use for all lines in the paragraph if [hanging] is true,
97   // or the second and subsequent lines if [hanging] is false
98   var hangingIndent = ""
99 
isEmptynull100   fun isEmpty(): Boolean {
101     return content.isEmpty()
102   }
103 
cleanupnull104   fun cleanup() {
105     val original = text
106 
107     if (preformatted) {
108       return
109     }
110 
111     var s = original
112     if (options.convertMarkup) {
113       s = convertMarkup(text)
114     }
115     if (!options.allowParamBrackets) {
116       s = rewriteParams(s)
117     }
118 
119     if (s != original) {
120       content.clear()
121       content.append(s)
122     }
123   }
124 
rewriteParamsnull125   private fun rewriteParams(s: String): String {
126     var start = 0
127     val length = s.length
128     while (start < length && s[start].isWhitespace()) {
129       start++
130     }
131     if (s.startsWith("@param", start)) {
132       start += "@param".length
133       while (start < length && s[start].isWhitespace()) {
134         start++
135       }
136       if (start < length && s[start++] == '[') {
137         while (start < length && s[start].isWhitespace()) {
138           start++
139         }
140         var end = start
141         while (end < length && s[end].isJavaIdentifierPart()) {
142           end++
143         }
144         if (end > start) {
145           val name = s.substring(start, end)
146           while (end < length && s[end].isWhitespace()) {
147             end++
148           }
149           if (end < length && s[end++] == ']') {
150             while (end < length && s[end].isWhitespace()) {
151               end++
152             }
153             return "@param $name ${s.substring(end)}"
154           }
155         }
156       }
157     }
158 
159     return s
160   }
161 
convertMarkupnull162   private fun convertMarkup(s: String): String {
163     // Whether the tag starts with a capital letter and needs to be cleaned, e.g. `@See` -> `@see`.
164     // (isKDocTag only allows the first letter to be capitalized.)
165     val convertKDocTag = s.isKDocTag() && s[1].isUpperCase()
166 
167     if (!convertKDocTag && s.none { it == '<' || it == '&' || it == '{' }) {
168       return s
169     }
170 
171     val sb = StringBuilder(s.length)
172     var i = 0
173     val n = s.length
174 
175     if (convertKDocTag) {
176       sb.append('@').append(s[1].lowercaseChar())
177       i += 2
178     }
179 
180     var code = false
181     var brackets = 0
182     while (i < n) {
183       val c = s[i++]
184       if (c == '\\') {
185         sb.append(c)
186         if (i < n - 1) {
187           sb.append(s[i++])
188         }
189         continue
190       } else if (c == '`') {
191         code = !code
192         sb.append(c)
193         continue
194       } else if (c == '[') {
195         brackets++
196         sb.append(c)
197         continue
198       } else if (c == ']') {
199         brackets--
200         sb.append(c)
201         continue
202       } else if (code || brackets > 0) {
203         sb.append(c)
204         continue
205       } else if (c == '<') {
206         if (s.startsWith("b>", i, false) || s.startsWith("/b>", i, false)) {
207           // "<b>" or </b> -> "**"
208           sb.append('*').append('*')
209           if (s[i] == '/') i++
210           i += 2
211           continue
212         }
213         if (s.startsWith("i>", i, false) || s.startsWith("/i>", i, false)) {
214           // "<i>" or </i> -> "*"
215           sb.append('*')
216           if (s[i] == '/') i++
217           i += 2
218           continue
219         }
220         if (s.startsWith("em>", i, false) || s.startsWith("/em>", i, false)) {
221           // "<em>" or </em> -> "_"
222           sb.append('_')
223           if (s[i] == '/') i++
224           i += 3
225           continue
226         }
227         // (We don't convert <pre> here because those tags appear in paragraphs
228         // marked preformatted, and preformatted paragraphs are never passed to
229         // convertTags)
230       } else if (c == '&') {
231         if (s.startsWith("lt;", i, true)) { // "&lt;" -> "<"
232           sb.append('<')
233           i += 3
234           continue
235         }
236         if (s.startsWith("gt;", i, true)) { // "&gt;" -> ">"
237           sb.append('>')
238           i += 3
239           continue
240         }
241       } else if (c == '{') {
242         if (s.startsWith("@param", i, true)) {
243           val curr = i + 6
244           var end = s.indexOf('}', curr)
245           if (end == -1) {
246             end = n
247           }
248           sb.append('[')
249           sb.append(s.substring(curr, end).trim())
250           sb.append(']')
251           i = end + 1
252           continue
253         } else if (s.startsWith("@link", i, true)
254         // @linkplain is similar to @link, but kdoc does *not* render a [symbol]
255         // into a {@linkplain} in HTML, so converting these would change the output.
256         && !s.startsWith("@linkplain", i, true)) {
257           // {@link} or {@linkplain}
258           sb.append('[')
259           var curr = i + 5
260           while (curr < n) {
261             val ch = s[curr++]
262             if (ch.isWhitespace()) {
263               break
264             }
265             if (ch == '}') {
266               curr--
267               break
268             }
269           }
270           var skip = false
271           while (curr < n) {
272             val ch = s[curr]
273             if (ch == '}') {
274               sb.append(']')
275               curr++
276               break
277             } else if (ch == '(') {
278               skip = true
279             } else if (!skip) {
280               if (ch == '#') {
281                 if (!sb.endsWith('[')) {
282                   sb.append('.')
283                 }
284               } else {
285                 sb.append(ch)
286               }
287             }
288             curr++
289           }
290           i = curr
291           continue
292         }
293       }
294       sb.append(c)
295     }
296 
297     return sb.toString()
298   }
299 
reflownull300   fun reflow(firstLineMaxWidth: Int, maxLineWidth: Int): List<String> {
301     val lineWidth = maxLineWidth - getIndentSize(indent, options)
302     val hangingIndentSize = getIndentSize(hangingIndent, options) - if (quoted) 2 else 0 // "> "
303     if (text.length < (firstLineMaxWidth - hangingIndentSize)) {
304       return listOf(text.collapseSpaces())
305     }
306     // Split text into words
307     val words: List<String> = computeWords()
308 
309     // See divide & conquer algorithm listed here: https://xxyxyz.org/line-breaking/
310     if (words.size == 1) {
311       return listOf(words[0])
312     }
313 
314     if (firstLineMaxWidth < maxLineWidth) {
315       // We have ragged text. We'll just greedily place the first
316       // words on the first line, and then optimize the rest.
317       val line = StringBuilder()
318       val firstLineWidth = firstLineMaxWidth - getIndentSize(indent, options)
319       for (i in words.indices) {
320         val word = words[i]
321         if (line.isEmpty()) {
322           if (word.length + task.type.lineOverhead() > firstLineMaxWidth) {
323             // can't fit anything on the first line: just flow to
324             // full width and caller will need to insert comment on
325             // the next line.
326             return reflow(words, lineWidth, hangingIndentSize)
327           }
328           line.append(word)
329         } else if (line.length + word.length + 1 <= firstLineWidth) {
330           line.append(' ')
331           line.append(word)
332         } else {
333           // Break the rest
334           val remainingWords = words.subList(i, words.size)
335           val reflownRemaining = reflow(remainingWords, lineWidth, hangingIndentSize)
336           return listOf(line.toString()) + reflownRemaining
337         }
338       }
339       // We fit everything on the first line
340       return listOf(line.toString())
341     }
342 
343     return reflow(words, lineWidth, hangingIndentSize)
344   }
345 
reflownull346   private fun reflow(words: List<String>, lineWidth: Int, hangingIndentSize: Int): List<String> {
347     if (options.alternate ||
348         !options.optimal ||
349         hanging && hangingIndentSize > 0 ||
350         // An unbreakable long word may make other lines shorter and won't look good
351         words.any { it.length > lineWidth }) {
352       // Switch to greedy if explicitly turned on, and for hanging indent
353       // paragraphs, since the current implementation doesn't have support
354       // for a different maximum length on the first line from the rest
355       // and there were various cases where this ended up with bad results.
356       // This is typically used in list items (and kdoc sections) which tend
357       // to be short -- and for 2-3 lines the gains of optimal line breaking
358       // isn't worth the cases where we have really unbalanced looking text
359       return reflowGreedy(lineWidth, options, words)
360     }
361 
362     val lines = reflowOptimal(lineWidth - hangingIndentSize, words)
363     if (lines.size <= 2) {
364       // Just 2 lines? We prefer long+short instead of half+half.
365       return reflowGreedy(lineWidth, options, words)
366     } else {
367       // We could just return [lines] here, but the straightforward algorithm
368       // doesn't do a great job with short paragraphs where the last line is
369       // short; it over-corrects and shortens everything else in order to balance
370       // out the last line.
371 
372       val maxLine: (String) -> Int = {
373         // Ignore lines that are unbreakable
374         if (it.indexOf(' ') == -1) {
375           0
376         } else {
377           it.length
378         }
379       }
380       val longestLine = lines.maxOf(maxLine)
381       var lastWord = words.size - 1
382       while (lastWord > 0) {
383         // We can afford to do this because we're only repeating it for a single
384         // line's worth of words and because comments tend to be relatively short
385         // anyway
386         val newLines = reflowOptimal(lineWidth - hangingIndentSize, words.subList(0, lastWord))
387         if (newLines.size < lines.size) {
388           val newLongestLine = newLines.maxOf(maxLine)
389           if (newLongestLine > longestLine &&
390               newLines.subList(0, newLines.size - 1).any { it.length > longestLine }) {
391             return newLines +
392                 reflowGreedy(
393                     lineWidth - hangingIndentSize, options, words.subList(lastWord, words.size))
394           }
395           break
396         }
397         lastWord--
398       }
399 
400       return lines
401     }
402   }
403 
404   /**
405    * Returns true if it's okay to break at the current word.
406    *
407    * We need to check for this, because a word can have a different meaning at the beginning of a
408    * line than in the middle somewhere, so if it just so happens to be at the break boundary, we
409    * need to make sure we don't make it the first word on the next line since that would change the
410    * documentation.
411    */
canBreakAtnull412   private fun canBreakAt(word: String): Boolean {
413     // Can we start a new line with this without interpreting it in a special
414     // way?
415 
416     if (word.startsWith("#") ||
417         word.startsWith("```") ||
418         word.isDirectiveMarker() ||
419         word.startsWith("@") || // interpreted as a tag
420         word.isTodo() ||
421         word.startsWith(">")) {
422       return false
423     }
424 
425     if (!word.first().isLetter()) {
426       val wordWithSpace = "$word " // for regex matching in below checks
427       if (wordWithSpace.isListItem() && !word.equals("<li>", true) || wordWithSpace.isQuoted()) {
428         return false
429       }
430     }
431 
432     return true
433   }
434 
435   /**
436    * Split [text] up into individual "words"; in the case where some words are not allowed to span
437    * lines, it will combine these into single word. For example, if we have a sentence which ends
438    * with a number, e.g. "the sum is 5.", we want to make sure "5." is never placed at the beginning
439    * of a new line (which would turn it into a list item), so for this we'll compute the word list
440    * "the", "sum", "is 5.".
441    */
computeWordsnull442   fun computeWords(): List<String> {
443     val words = text.split(Regex("\\s+")).filter { it.isNotBlank() }.map { it.trim() }
444     if (words.size == 1) {
445       return words
446     }
447 
448     if (task.type != CommentType.KDOC) {
449       // In block comments and line comments we feel free to break anywhere
450       // between words; there isn't a special meaning assigned to certain words
451       // if they appear first on a line like there is in KDoc/Markdown.
452       return words
453     }
454 
455     // See if any of the words should never be broken up. We do that for list
456     // separators and a few others. We never want to put "1." at the beginning
457     // of a line as an overflow.
458 
459     val combined = ArrayList<String>(words.size)
460 
461     var from = 0
462     val end = words.size
463     while (from < end) {
464       val start =
465           if (from == 0 && (quoted || hanging && !text.isKDocTag())) {
466             from + 2
467           } else {
468             from + 1
469           }
470       var to = words.size
471       for (i in start until words.size) {
472         val next = words[i]
473         if (next.startsWith("[") && !next.startsWith("[[")) {
474           // find end
475           var j = -1
476           for (k in i until words.size) {
477             if (']' in words[k]) {
478               j = k
479               break
480             }
481           }
482           if (j != -1) {
483             // combine everything in the string; we can't break link text
484             if (start == from + 1 && canBreakAt(words[start])) {
485               combined.add(words[from])
486               from = start
487             }
488             // Maybe not break; what if the next word isn't okay?
489             to = j + 1
490             if (to == words.size || canBreakAt(words[to])) {
491               break
492             }
493           } // else: unterminated [, ignore
494         } else if (canBreakAt(next)) {
495           to = i
496           break
497         }
498       }
499 
500       if (to == from + 1) {
501         combined.add(words[from])
502       } else if (to > from) {
503         combined.add(words.subList(from, to).joinToString(" "))
504       }
505       from = to
506     }
507 
508     return combined
509   }
510 
511   private data class Quadruple(val i0: Int, val j0: Int, val i1: Int, val j1: Int)
512 
reflowOptimalnull513   private fun reflowOptimal(maxLineWidth: Int, words: List<String>): List<String> {
514     val count = words.size
515     val lines = ArrayList<String>()
516 
517     val offsets = ArrayList<Int>()
518     offsets.add(0)
519 
520     for (boxWidth in words.map { it.length }.toList()) {
521       offsets.add(offsets.last() + min(boxWidth, maxLineWidth))
522     }
523 
524     val big = 10 shl 20
525     val minimum = IntArray(count + 1) { big }
526     val breaks = IntArray(count + 1)
527     minimum[0] = 0
528 
529     fun cost(i: Int, j: Int): Int {
530       val width = offsets[j] - offsets[i] + j - i - 1
531       return if (width <= maxLineWidth) {
532         val squared = (maxLineWidth - width) * (maxLineWidth - width)
533         minimum[i] + squared
534       } else {
535         big
536       }
537     }
538 
539     fun search(pi0: Int, pj0: Int, pi1: Int, pj1: Int) {
540       val stack = ArrayDeque<Quadruple>()
541       stack.add(Quadruple(pi0, pj0, pi1, pj1))
542 
543       while (stack.isNotEmpty()) {
544         val (i0, j0, i1, j1) = stack.removeLast()
545         if (j0 < j1) {
546           val j = (j0 + j1) / 2
547 
548           for (i in i0 until i1) {
549             val c = cost(i, j)
550             if (c <= minimum[j]) {
551               minimum[j] = c
552               breaks[j] = i
553             }
554           }
555           stack.add(Quadruple(breaks[j], j + 1, i1, j1))
556           stack.add(Quadruple(i0, j0, breaks[j] + 1, j))
557         }
558       }
559     }
560 
561     var n = count + 1
562     var i = 0
563     var offset = 0
564 
565     while (true) {
566       val r = min(n, 1 shl (i + 1))
567       val edge = (1 shl i) + offset
568       search(0 + offset, edge, edge, r + offset)
569       val x = minimum[r - 1 + offset]
570       var flag = true
571       for (j in (1 shl i) until (r - 1)) {
572         val y = cost(j + offset, r - 1 + offset)
573         if (y <= x) {
574           n -= j
575           i = 0
576           offset += j
577           flag = false
578           break
579         }
580       }
581       if (flag) {
582         if (r == n) break
583         i++
584       }
585     }
586 
587     var j = count
588     while (j > 0) {
589       i = breaks[j]
590       val sb = StringBuilder()
591       for (w in i until j) {
592         sb.append(words[w])
593         if (w < j - 1) {
594           sb.append(' ')
595         }
596       }
597       lines.add(sb.toString())
598       j = i
599     }
600 
601     lines.reverse()
602     return lines
603   }
604 
reflowGreedynull605   private fun reflowGreedy(
606       lineWidth: Int,
607       options: KDocFormattingOptions,
608       words: List<String>
609   ): List<String> {
610     // Greedy implementation
611 
612     var width = lineWidth
613     if (options.hangingIndent > 0 && hanging && continuation) {
614       width -= getIndentSize(hangingIndent, options)
615     }
616 
617     val lines = mutableListOf<String>()
618     var column = 0
619     val sb = StringBuilder()
620     for (word in words) {
621       when {
622         sb.isEmpty() -> {
623           sb.append(word)
624           column += word.length
625         }
626         column + word.length + 1 <= width -> {
627           sb.append(' ').append(word)
628           column += word.length + 1
629         }
630         else -> {
631           width = lineWidth
632           if (options.hangingIndent > 0 && hanging) {
633             width -= getIndentSize(hangingIndent, options)
634           }
635           lines.add(sb.toString())
636           sb.setLength(0)
637           sb.append(word)
638           column = sb.length
639         }
640       }
641     }
642     if (sb.isNotEmpty()) {
643       lines.add(sb.toString())
644     }
645     return lines
646   }
647 
toStringnull648   override fun toString(): String {
649     return "$content, separate=$separate, block=$block, hanging=$hanging, preformatted=$preformatted, quoted=$quoted, continuation=$continuation, allowempty=$allowEmpty, separator=$separator"
650   }
651 }
652