• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.tools.metalava.model.source.utils
18 
19 import org.intellij.lang.annotations.Language
20 
21 /** Converts from package.html content to a package-info.java javadoc string. */
22 @Language("JAVA")
packageHtmlToJavadocnull23 fun packageHtmlToJavadoc(@Language("HTML") packageHtml: String?): String {
24     packageHtml ?: return ""
25     if (packageHtml.isBlank()) {
26         return ""
27     }
28 
29     val body = getBodyContents(packageHtml).trim()
30     if (body.isBlank()) {
31         return ""
32     }
33     // Combine into comment lines prefixed by asterisk, ,and make sure we don't
34     // have end-comment markers in the HTML that will escape out of the javadoc comment
35     val comment = body.lines().joinToString(separator = "\n") { " * $it" }.replace("*/", "*/")
36     @Suppress("DanglingJavadoc") return "/**\n$comment\n */\n"
37 }
38 
39 /**
40  * Returns the body content from the given HTML document. Attempts to tokenize the HTML properly
41  * such that it doesn't get confused by comments or text that looks like tags.
42  */
43 @Suppress("LocalVariableName")
getBodyContentsnull44 private fun getBodyContents(html: String): String {
45     val length = html.length
46     val STATE_TEXT = 1
47     val STATE_SLASH = 2
48     val STATE_ATTRIBUTE_NAME = 3
49     val STATE_IN_TAG = 4
50     val STATE_BEFORE_ATTRIBUTE = 5
51     val STATE_ATTRIBUTE_BEFORE_EQUALS = 6
52     val STATE_ATTRIBUTE_AFTER_EQUALS = 7
53     val STATE_ATTRIBUTE_VALUE_NONE = 8
54     val STATE_ATTRIBUTE_VALUE_SINGLE = 9
55     val STATE_ATTRIBUTE_VALUE_DOUBLE = 10
56     val STATE_CLOSE_TAG = 11
57     val STATE_ENDING_TAG = 12
58 
59     var bodyStart = -1
60     var htmlStart = -1
61 
62     var state = STATE_TEXT
63     var offset = 0
64     var tagStart = -1
65     var tagEndStart = -1
66     var prev = -1
67     loop@ while (offset < length) {
68         if (offset == prev) {
69             // Purely here to prevent potential bugs in the state machine from looping
70             // infinitely
71             offset++
72             if (offset == length) {
73                 break
74             }
75         }
76         prev = offset
77 
78         val c = html[offset]
79         when (state) {
80             STATE_TEXT -> {
81                 if (c == '<') {
82                     state = STATE_SLASH
83                     offset++
84                     continue@loop
85                 }
86 
87                 // Other text is just ignored
88                 offset++
89             }
90             STATE_SLASH -> {
91                 if (c == '!') {
92                     if (html.startsWith("!--", offset)) {
93                         // Comment
94                         val end = html.indexOf("-->", offset + 3)
95                         if (end == -1) {
96                             offset = length
97                         } else {
98                             offset = end + 3
99                             state = STATE_TEXT
100                         }
101                         continue@loop
102                     } else if (html.startsWith("![CDATA[", offset)) {
103                         val end = html.indexOf("]]>", offset + 8)
104                         if (end == -1) {
105                             offset = length
106                         } else {
107                             state = STATE_TEXT
108                             offset = end + 3
109                         }
110                         continue@loop
111                     } else {
112                         val end = html.indexOf('>', offset + 2)
113                         if (end == -1) {
114                             offset = length
115                             state = STATE_TEXT
116                         } else {
117                             offset = end + 1
118                             state = STATE_TEXT
119                         }
120                         continue@loop
121                     }
122                 } else if (c == '/') {
123                     state = STATE_CLOSE_TAG
124                     offset++
125                     tagEndStart = offset
126                     continue@loop
127                 } else if (c == '?') {
128                     // XML Prologue
129                     val end = html.indexOf('>', offset + 2)
130                     if (end == -1) {
131                         offset = length
132                         state = STATE_TEXT
133                     } else {
134                         offset = end + 1
135                         state = STATE_TEXT
136                     }
137                     continue@loop
138                 }
139                 state = STATE_IN_TAG
140                 tagStart = offset
141             }
142             STATE_CLOSE_TAG -> {
143                 if (c == '>') {
144                     state = STATE_TEXT
145                     if (html.startsWith("body", tagEndStart, true)) {
146                         val bodyEnd = tagEndStart - 2 // </
147                         if (bodyStart != -1) {
148                             return html.substring(bodyStart, bodyEnd)
149                         }
150                     }
151                     if (html.startsWith("html", tagEndStart, true)) {
152                         val htmlEnd = tagEndStart - 2
153                         if (htmlEnd != -1) {
154                             return html.substring(htmlStart, htmlEnd)
155                         }
156                     }
157                 }
158                 offset++
159             }
160             STATE_IN_TAG -> {
161                 val whitespace = Character.isWhitespace(c)
162                 if (whitespace || c == '>') {
163                     if (html.startsWith("body", tagStart, true)) {
164                         bodyStart = html.indexOf('>', offset) + 1
165                     }
166                     if (html.startsWith("html", tagStart, true)) {
167                         htmlStart = html.indexOf('>', offset) + 1
168                     }
169                 }
170 
171                 when {
172                     whitespace -> state = STATE_BEFORE_ATTRIBUTE
173                     c == '>' -> {
174                         state = STATE_TEXT
175                     }
176                     c == '/' -> state = STATE_ENDING_TAG
177                 }
178                 offset++
179             }
180             STATE_ENDING_TAG -> {
181                 if (c == '>') {
182                     if (html.startsWith("body", tagEndStart, true)) {
183                         val bodyEnd = tagEndStart - 1
184                         if (bodyStart != -1) {
185                             return html.substring(bodyStart, bodyEnd)
186                         }
187                     }
188                     if (html.startsWith("html", tagEndStart, true)) {
189                         val htmlEnd = tagEndStart - 1
190                         if (htmlEnd != -1) {
191                             return html.substring(htmlStart, htmlEnd)
192                         }
193                     }
194                     offset++
195                     state = STATE_TEXT
196                 }
197             }
198             STATE_BEFORE_ATTRIBUTE -> {
199                 if (c == '>') {
200                     state = STATE_TEXT
201                 } else if (c == '/') {
202                     // we expect an '>' next to close the tag
203                 } else if (!Character.isWhitespace(c)) {
204                     state = STATE_ATTRIBUTE_NAME
205                 }
206                 offset++
207             }
208             STATE_ATTRIBUTE_NAME -> {
209                 when {
210                     c == '>' -> state = STATE_TEXT
211                     c == '=' -> state = STATE_ATTRIBUTE_AFTER_EQUALS
212                     Character.isWhitespace(c) -> state = STATE_ATTRIBUTE_BEFORE_EQUALS
213                     c == ':' -> {}
214                 }
215                 offset++
216             }
217             STATE_ATTRIBUTE_BEFORE_EQUALS -> {
218                 if (c == '=') {
219                     state = STATE_ATTRIBUTE_AFTER_EQUALS
220                 } else if (c == '>') {
221                     state = STATE_TEXT
222                 } else if (!Character.isWhitespace(c)) {
223                     // Attribute value not specified (used for some boolean attributes)
224                     state = STATE_ATTRIBUTE_NAME
225                 }
226                 offset++
227             }
228             STATE_ATTRIBUTE_AFTER_EQUALS -> {
229                 if (c == '\'') {
230                     // a='b'
231                     state = STATE_ATTRIBUTE_VALUE_SINGLE
232                 } else if (c == '"') {
233                     // a="b"
234                     state = STATE_ATTRIBUTE_VALUE_DOUBLE
235                 } else if (!Character.isWhitespace(c)) {
236                     // a=b
237                     state = STATE_ATTRIBUTE_VALUE_NONE
238                 }
239                 offset++
240             }
241             STATE_ATTRIBUTE_VALUE_SINGLE -> {
242                 if (c == '\'') {
243                     state = STATE_BEFORE_ATTRIBUTE
244                 }
245                 offset++
246             }
247             STATE_ATTRIBUTE_VALUE_DOUBLE -> {
248                 if (c == '"') {
249                     state = STATE_BEFORE_ATTRIBUTE
250                 }
251                 offset++
252             }
253             STATE_ATTRIBUTE_VALUE_NONE -> {
254                 if (c == '>') {
255                     state = STATE_TEXT
256                 } else if (Character.isWhitespace(c)) {
257                     state = STATE_BEFORE_ATTRIBUTE
258                 }
259                 offset++
260             }
261             else -> assert(false) { state }
262         }
263     }
264 
265     return html
266 }
267