1 /* 2 * Portions Copyright (c) Meta Platforms, Inc. and affiliates. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * Copyright (c) Tor Norbye. 19 * 20 * Licensed under the Apache License, Version 2.0 (the "License"); 21 * you may not use this file except in compliance with the License. 22 * You may obtain a copy of the License at 23 * 24 * http://www.apache.org/licenses/LICENSE-2.0 25 * 26 * Unless required by applicable law or agreed to in writing, software 27 * distributed under the License is distributed on an "AS IS" BASIS, 28 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 29 * See the License for the specific language governing permissions and 30 * limitations under the License. 31 */ 32 33 package com.facebook.ktfmt.kdoc 34 35 import kotlin.math.min 36 37 class Paragraph(private val task: FormattingTask) { 38 private val options: KDocFormattingOptions 39 get() = task.options 40 var content = StringBuilder() 41 val text 42 get() = content.toString() 43 var prev: Paragraph? = null 44 var next: Paragraph? = null 45 46 /** If true, this paragraph should be preceded by a blank line. */ 47 var separate = false 48 49 /** 50 * If true, this paragraph is a continuation of the previous paragraph (so should be indented with 51 * the hanging indent, including line 1) 52 */ 53 var continuation = false 54 55 /** 56 * Whether this paragraph is allowed to be empty. Paragraphs are normally merged if this is not 57 * set. This allows the line breaker to call [ParagraphListBuilder.newParagraph] repeatedly 58 * without introducing more than one new paragraph. But for preformatted text we do want to be 59 * able to express repeated blank lines. 60 */ 61 var allowEmpty = false 62 63 /** Is this paragraph preformatted? */ 64 var preformatted = false 65 66 /** Is this paragraph a block paragraph? If so, it must start on its own line. */ 67 var block = false 68 69 /** Is this paragraph specifying a kdoc tag like @param? */ 70 var doc = false 71 72 /** 73 * Is this line quoted? (In the future make this an int such that we can support additional 74 * levels.) 75 */ 76 var quoted = false 77 78 /** Is this line part of a table? */ 79 var table = false 80 81 /** Is this a separator line? */ 82 var separator = false 83 84 /** Should this paragraph use a hanging indent? (Implies [block] as well). */ 85 var hanging = false 86 set(value) { 87 block = true 88 field = value 89 } 90 91 var originalIndent = 0 92 93 // The indent to use for all lines in the paragraph. 94 var indent = "" 95 96 // The indent to use for all lines in the paragraph if [hanging] is true, 97 // or the second and subsequent lines if [hanging] is false 98 var hangingIndent = "" 99 isEmptynull100 fun isEmpty(): Boolean { 101 return content.isEmpty() 102 } 103 cleanupnull104 fun cleanup() { 105 val original = text 106 107 if (preformatted) { 108 return 109 } 110 111 var s = original 112 if (options.convertMarkup) { 113 s = convertMarkup(text) 114 } 115 if (!options.allowParamBrackets) { 116 s = rewriteParams(s) 117 } 118 119 if (s != original) { 120 content.clear() 121 content.append(s) 122 } 123 } 124 rewriteParamsnull125 private fun rewriteParams(s: String): String { 126 var start = 0 127 val length = s.length 128 while (start < length && s[start].isWhitespace()) { 129 start++ 130 } 131 if (s.startsWith("@param", start)) { 132 start += "@param".length 133 while (start < length && s[start].isWhitespace()) { 134 start++ 135 } 136 if (start < length && s[start++] == '[') { 137 while (start < length && s[start].isWhitespace()) { 138 start++ 139 } 140 var end = start 141 while (end < length && s[end].isJavaIdentifierPart()) { 142 end++ 143 } 144 if (end > start) { 145 val name = s.substring(start, end) 146 while (end < length && s[end].isWhitespace()) { 147 end++ 148 } 149 if (end < length && s[end++] == ']') { 150 while (end < length && s[end].isWhitespace()) { 151 end++ 152 } 153 return "@param $name ${s.substring(end)}" 154 } 155 } 156 } 157 } 158 159 return s 160 } 161 convertMarkupnull162 private fun convertMarkup(s: String): String { 163 // Whether the tag starts with a capital letter and needs to be cleaned, e.g. `@See` -> `@see`. 164 // (isKDocTag only allows the first letter to be capitalized.) 165 val convertKDocTag = s.isKDocTag() && s[1].isUpperCase() 166 167 if (!convertKDocTag && s.none { it == '<' || it == '&' || it == '{' }) { 168 return s 169 } 170 171 val sb = StringBuilder(s.length) 172 var i = 0 173 val n = s.length 174 175 if (convertKDocTag) { 176 sb.append('@').append(s[1].lowercaseChar()) 177 i += 2 178 } 179 180 var code = false 181 var brackets = 0 182 while (i < n) { 183 val c = s[i++] 184 if (c == '\\') { 185 sb.append(c) 186 if (i < n - 1) { 187 sb.append(s[i++]) 188 } 189 continue 190 } else if (c == '`') { 191 code = !code 192 sb.append(c) 193 continue 194 } else if (c == '[') { 195 brackets++ 196 sb.append(c) 197 continue 198 } else if (c == ']') { 199 brackets-- 200 sb.append(c) 201 continue 202 } else if (code || brackets > 0) { 203 sb.append(c) 204 continue 205 } else if (c == '<') { 206 if (s.startsWith("b>", i, false) || s.startsWith("/b>", i, false)) { 207 // "<b>" or </b> -> "**" 208 sb.append('*').append('*') 209 if (s[i] == '/') i++ 210 i += 2 211 continue 212 } 213 if (s.startsWith("i>", i, false) || s.startsWith("/i>", i, false)) { 214 // "<i>" or </i> -> "*" 215 sb.append('*') 216 if (s[i] == '/') i++ 217 i += 2 218 continue 219 } 220 if (s.startsWith("em>", i, false) || s.startsWith("/em>", i, false)) { 221 // "<em>" or </em> -> "_" 222 sb.append('_') 223 if (s[i] == '/') i++ 224 i += 3 225 continue 226 } 227 // (We don't convert <pre> here because those tags appear in paragraphs 228 // marked preformatted, and preformatted paragraphs are never passed to 229 // convertTags) 230 } else if (c == '&') { 231 if (s.startsWith("lt;", i, true)) { // "<" -> "<" 232 sb.append('<') 233 i += 3 234 continue 235 } 236 if (s.startsWith("gt;", i, true)) { // ">" -> ">" 237 sb.append('>') 238 i += 3 239 continue 240 } 241 } else if (c == '{') { 242 if (s.startsWith("@param", i, true)) { 243 val curr = i + 6 244 var end = s.indexOf('}', curr) 245 if (end == -1) { 246 end = n 247 } 248 sb.append('[') 249 sb.append(s.substring(curr, end).trim()) 250 sb.append(']') 251 i = end + 1 252 continue 253 } else if (s.startsWith("@link", i, true) 254 // @linkplain is similar to @link, but kdoc does *not* render a [symbol] 255 // into a {@linkplain} in HTML, so converting these would change the output. 256 && !s.startsWith("@linkplain", i, true)) { 257 // {@link} or {@linkplain} 258 sb.append('[') 259 var curr = i + 5 260 while (curr < n) { 261 val ch = s[curr++] 262 if (ch.isWhitespace()) { 263 break 264 } 265 if (ch == '}') { 266 curr-- 267 break 268 } 269 } 270 var skip = false 271 while (curr < n) { 272 val ch = s[curr] 273 if (ch == '}') { 274 sb.append(']') 275 curr++ 276 break 277 } else if (ch == '(') { 278 skip = true 279 } else if (!skip) { 280 if (ch == '#') { 281 if (!sb.endsWith('[')) { 282 sb.append('.') 283 } 284 } else { 285 sb.append(ch) 286 } 287 } 288 curr++ 289 } 290 i = curr 291 continue 292 } 293 } 294 sb.append(c) 295 } 296 297 return sb.toString() 298 } 299 reflownull300 fun reflow(firstLineMaxWidth: Int, maxLineWidth: Int): List<String> { 301 val lineWidth = maxLineWidth - getIndentSize(indent, options) 302 val hangingIndentSize = getIndentSize(hangingIndent, options) - if (quoted) 2 else 0 // "> " 303 if (text.length < (firstLineMaxWidth - hangingIndentSize)) { 304 return listOf(text.collapseSpaces()) 305 } 306 // Split text into words 307 val words: List<String> = computeWords() 308 309 // See divide & conquer algorithm listed here: https://xxyxyz.org/line-breaking/ 310 if (words.size == 1) { 311 return listOf(words[0]) 312 } 313 314 if (firstLineMaxWidth < maxLineWidth) { 315 // We have ragged text. We'll just greedily place the first 316 // words on the first line, and then optimize the rest. 317 val line = StringBuilder() 318 val firstLineWidth = firstLineMaxWidth - getIndentSize(indent, options) 319 for (i in words.indices) { 320 val word = words[i] 321 if (line.isEmpty()) { 322 if (word.length + task.type.lineOverhead() > firstLineMaxWidth) { 323 // can't fit anything on the first line: just flow to 324 // full width and caller will need to insert comment on 325 // the next line. 326 return reflow(words, lineWidth, hangingIndentSize) 327 } 328 line.append(word) 329 } else if (line.length + word.length + 1 <= firstLineWidth) { 330 line.append(' ') 331 line.append(word) 332 } else { 333 // Break the rest 334 val remainingWords = words.subList(i, words.size) 335 val reflownRemaining = reflow(remainingWords, lineWidth, hangingIndentSize) 336 return listOf(line.toString()) + reflownRemaining 337 } 338 } 339 // We fit everything on the first line 340 return listOf(line.toString()) 341 } 342 343 return reflow(words, lineWidth, hangingIndentSize) 344 } 345 reflownull346 private fun reflow(words: List<String>, lineWidth: Int, hangingIndentSize: Int): List<String> { 347 if (options.alternate || 348 !options.optimal || 349 hanging && hangingIndentSize > 0 || 350 // An unbreakable long word may make other lines shorter and won't look good 351 words.any { it.length > lineWidth }) { 352 // Switch to greedy if explicitly turned on, and for hanging indent 353 // paragraphs, since the current implementation doesn't have support 354 // for a different maximum length on the first line from the rest 355 // and there were various cases where this ended up with bad results. 356 // This is typically used in list items (and kdoc sections) which tend 357 // to be short -- and for 2-3 lines the gains of optimal line breaking 358 // isn't worth the cases where we have really unbalanced looking text 359 return reflowGreedy(lineWidth, options, words) 360 } 361 362 val lines = reflowOptimal(lineWidth - hangingIndentSize, words) 363 if (lines.size <= 2) { 364 // Just 2 lines? We prefer long+short instead of half+half. 365 return reflowGreedy(lineWidth, options, words) 366 } else { 367 // We could just return [lines] here, but the straightforward algorithm 368 // doesn't do a great job with short paragraphs where the last line is 369 // short; it over-corrects and shortens everything else in order to balance 370 // out the last line. 371 372 val maxLine: (String) -> Int = { 373 // Ignore lines that are unbreakable 374 if (it.indexOf(' ') == -1) { 375 0 376 } else { 377 it.length 378 } 379 } 380 val longestLine = lines.maxOf(maxLine) 381 var lastWord = words.size - 1 382 while (lastWord > 0) { 383 // We can afford to do this because we're only repeating it for a single 384 // line's worth of words and because comments tend to be relatively short 385 // anyway 386 val newLines = reflowOptimal(lineWidth - hangingIndentSize, words.subList(0, lastWord)) 387 if (newLines.size < lines.size) { 388 val newLongestLine = newLines.maxOf(maxLine) 389 if (newLongestLine > longestLine && 390 newLines.subList(0, newLines.size - 1).any { it.length > longestLine }) { 391 return newLines + 392 reflowGreedy( 393 lineWidth - hangingIndentSize, options, words.subList(lastWord, words.size)) 394 } 395 break 396 } 397 lastWord-- 398 } 399 400 return lines 401 } 402 } 403 404 /** 405 * Returns true if it's okay to break at the current word. 406 * 407 * We need to check for this, because a word can have a different meaning at the beginning of a 408 * line than in the middle somewhere, so if it just so happens to be at the break boundary, we 409 * need to make sure we don't make it the first word on the next line since that would change the 410 * documentation. 411 */ canBreakAtnull412 private fun canBreakAt(word: String): Boolean { 413 // Can we start a new line with this without interpreting it in a special 414 // way? 415 416 if (word.startsWith("#") || 417 word.startsWith("```") || 418 word.isDirectiveMarker() || 419 word.startsWith("@") || // interpreted as a tag 420 word.isTodo() || 421 word.startsWith(">")) { 422 return false 423 } 424 425 if (!word.first().isLetter()) { 426 val wordWithSpace = "$word " // for regex matching in below checks 427 if (wordWithSpace.isListItem() && !word.equals("<li>", true) || wordWithSpace.isQuoted()) { 428 return false 429 } 430 } 431 432 return true 433 } 434 435 /** 436 * Split [text] up into individual "words"; in the case where some words are not allowed to span 437 * lines, it will combine these into single word. For example, if we have a sentence which ends 438 * with a number, e.g. "the sum is 5.", we want to make sure "5." is never placed at the beginning 439 * of a new line (which would turn it into a list item), so for this we'll compute the word list 440 * "the", "sum", "is 5.". 441 */ computeWordsnull442 fun computeWords(): List<String> { 443 val words = text.split(Regex("\\s+")).filter { it.isNotBlank() }.map { it.trim() } 444 if (words.size == 1) { 445 return words 446 } 447 448 if (task.type != CommentType.KDOC) { 449 // In block comments and line comments we feel free to break anywhere 450 // between words; there isn't a special meaning assigned to certain words 451 // if they appear first on a line like there is in KDoc/Markdown. 452 return words 453 } 454 455 // See if any of the words should never be broken up. We do that for list 456 // separators and a few others. We never want to put "1." at the beginning 457 // of a line as an overflow. 458 459 val combined = ArrayList<String>(words.size) 460 461 var from = 0 462 val end = words.size 463 while (from < end) { 464 val start = 465 if (from == 0 && (quoted || hanging && !text.isKDocTag())) { 466 from + 2 467 } else { 468 from + 1 469 } 470 var to = words.size 471 for (i in start until words.size) { 472 val next = words[i] 473 if (next.startsWith("[") && !next.startsWith("[[")) { 474 // find end 475 var j = -1 476 for (k in i until words.size) { 477 if (']' in words[k]) { 478 j = k 479 break 480 } 481 } 482 if (j != -1) { 483 // combine everything in the string; we can't break link text 484 if (start == from + 1 && canBreakAt(words[start])) { 485 combined.add(words[from]) 486 from = start 487 } 488 // Maybe not break; what if the next word isn't okay? 489 to = j + 1 490 if (to == words.size || canBreakAt(words[to])) { 491 break 492 } 493 } // else: unterminated [, ignore 494 } else if (canBreakAt(next)) { 495 to = i 496 break 497 } 498 } 499 500 if (to == from + 1) { 501 combined.add(words[from]) 502 } else if (to > from) { 503 combined.add(words.subList(from, to).joinToString(" ")) 504 } 505 from = to 506 } 507 508 return combined 509 } 510 511 private data class Quadruple(val i0: Int, val j0: Int, val i1: Int, val j1: Int) 512 reflowOptimalnull513 private fun reflowOptimal(maxLineWidth: Int, words: List<String>): List<String> { 514 val count = words.size 515 val lines = ArrayList<String>() 516 517 val offsets = ArrayList<Int>() 518 offsets.add(0) 519 520 for (boxWidth in words.map { it.length }.toList()) { 521 offsets.add(offsets.last() + min(boxWidth, maxLineWidth)) 522 } 523 524 val big = 10 shl 20 525 val minimum = IntArray(count + 1) { big } 526 val breaks = IntArray(count + 1) 527 minimum[0] = 0 528 529 fun cost(i: Int, j: Int): Int { 530 val width = offsets[j] - offsets[i] + j - i - 1 531 return if (width <= maxLineWidth) { 532 val squared = (maxLineWidth - width) * (maxLineWidth - width) 533 minimum[i] + squared 534 } else { 535 big 536 } 537 } 538 539 fun search(pi0: Int, pj0: Int, pi1: Int, pj1: Int) { 540 val stack = ArrayDeque<Quadruple>() 541 stack.add(Quadruple(pi0, pj0, pi1, pj1)) 542 543 while (stack.isNotEmpty()) { 544 val (i0, j0, i1, j1) = stack.removeLast() 545 if (j0 < j1) { 546 val j = (j0 + j1) / 2 547 548 for (i in i0 until i1) { 549 val c = cost(i, j) 550 if (c <= minimum[j]) { 551 minimum[j] = c 552 breaks[j] = i 553 } 554 } 555 stack.add(Quadruple(breaks[j], j + 1, i1, j1)) 556 stack.add(Quadruple(i0, j0, breaks[j] + 1, j)) 557 } 558 } 559 } 560 561 var n = count + 1 562 var i = 0 563 var offset = 0 564 565 while (true) { 566 val r = min(n, 1 shl (i + 1)) 567 val edge = (1 shl i) + offset 568 search(0 + offset, edge, edge, r + offset) 569 val x = minimum[r - 1 + offset] 570 var flag = true 571 for (j in (1 shl i) until (r - 1)) { 572 val y = cost(j + offset, r - 1 + offset) 573 if (y <= x) { 574 n -= j 575 i = 0 576 offset += j 577 flag = false 578 break 579 } 580 } 581 if (flag) { 582 if (r == n) break 583 i++ 584 } 585 } 586 587 var j = count 588 while (j > 0) { 589 i = breaks[j] 590 val sb = StringBuilder() 591 for (w in i until j) { 592 sb.append(words[w]) 593 if (w < j - 1) { 594 sb.append(' ') 595 } 596 } 597 lines.add(sb.toString()) 598 j = i 599 } 600 601 lines.reverse() 602 return lines 603 } 604 reflowGreedynull605 private fun reflowGreedy( 606 lineWidth: Int, 607 options: KDocFormattingOptions, 608 words: List<String> 609 ): List<String> { 610 // Greedy implementation 611 612 var width = lineWidth 613 if (options.hangingIndent > 0 && hanging && continuation) { 614 width -= getIndentSize(hangingIndent, options) 615 } 616 617 val lines = mutableListOf<String>() 618 var column = 0 619 val sb = StringBuilder() 620 for (word in words) { 621 when { 622 sb.isEmpty() -> { 623 sb.append(word) 624 column += word.length 625 } 626 column + word.length + 1 <= width -> { 627 sb.append(' ').append(word) 628 column += word.length + 1 629 } 630 else -> { 631 width = lineWidth 632 if (options.hangingIndent > 0 && hanging) { 633 width -= getIndentSize(hangingIndent, options) 634 } 635 lines.add(sb.toString()) 636 sb.setLength(0) 637 sb.append(word) 638 column = sb.length 639 } 640 } 641 } 642 if (sb.isNotEmpty()) { 643 lines.add(sb.toString()) 644 } 645 return lines 646 } 647 toStringnull648 override fun toString(): String { 649 return "$content, separate=$separate, block=$block, hanging=$hanging, preformatted=$preformatted, quoted=$quoted, continuation=$continuation, allowempty=$allowEmpty, separator=$separator" 650 } 651 } 652