1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package com.android.tools.metalava.model.source.utils
18
19 import org.intellij.lang.annotations.Language
20
21 /** Converts from package.html content to a package-info.java javadoc string. */
22 @Language("JAVA")
packageHtmlToJavadocnull23 fun packageHtmlToJavadoc(@Language("HTML") packageHtml: String?): String {
24 packageHtml ?: return ""
25 if (packageHtml.isBlank()) {
26 return ""
27 }
28
29 val body = getBodyContents(packageHtml).trim()
30 if (body.isBlank()) {
31 return ""
32 }
33 // Combine into comment lines prefixed by asterisk, ,and make sure we don't
34 // have end-comment markers in the HTML that will escape out of the javadoc comment
35 val comment = body.lines().joinToString(separator = "\n") { " * $it" }.replace("*/", "*/")
36 @Suppress("DanglingJavadoc") return "/**\n$comment\n */\n"
37 }
38
39 /**
40 * Returns the body content from the given HTML document. Attempts to tokenize the HTML properly
41 * such that it doesn't get confused by comments or text that looks like tags.
42 */
43 @Suppress("LocalVariableName")
getBodyContentsnull44 private fun getBodyContents(html: String): String {
45 val length = html.length
46 val STATE_TEXT = 1
47 val STATE_SLASH = 2
48 val STATE_ATTRIBUTE_NAME = 3
49 val STATE_IN_TAG = 4
50 val STATE_BEFORE_ATTRIBUTE = 5
51 val STATE_ATTRIBUTE_BEFORE_EQUALS = 6
52 val STATE_ATTRIBUTE_AFTER_EQUALS = 7
53 val STATE_ATTRIBUTE_VALUE_NONE = 8
54 val STATE_ATTRIBUTE_VALUE_SINGLE = 9
55 val STATE_ATTRIBUTE_VALUE_DOUBLE = 10
56 val STATE_CLOSE_TAG = 11
57 val STATE_ENDING_TAG = 12
58
59 var bodyStart = -1
60 var htmlStart = -1
61
62 var state = STATE_TEXT
63 var offset = 0
64 var tagStart = -1
65 var tagEndStart = -1
66 var prev = -1
67 loop@ while (offset < length) {
68 if (offset == prev) {
69 // Purely here to prevent potential bugs in the state machine from looping
70 // infinitely
71 offset++
72 if (offset == length) {
73 break
74 }
75 }
76 prev = offset
77
78 val c = html[offset]
79 when (state) {
80 STATE_TEXT -> {
81 if (c == '<') {
82 state = STATE_SLASH
83 offset++
84 continue@loop
85 }
86
87 // Other text is just ignored
88 offset++
89 }
90 STATE_SLASH -> {
91 if (c == '!') {
92 if (html.startsWith("!--", offset)) {
93 // Comment
94 val end = html.indexOf("-->", offset + 3)
95 if (end == -1) {
96 offset = length
97 } else {
98 offset = end + 3
99 state = STATE_TEXT
100 }
101 continue@loop
102 } else if (html.startsWith("![CDATA[", offset)) {
103 val end = html.indexOf("]]>", offset + 8)
104 if (end == -1) {
105 offset = length
106 } else {
107 state = STATE_TEXT
108 offset = end + 3
109 }
110 continue@loop
111 } else {
112 val end = html.indexOf('>', offset + 2)
113 if (end == -1) {
114 offset = length
115 state = STATE_TEXT
116 } else {
117 offset = end + 1
118 state = STATE_TEXT
119 }
120 continue@loop
121 }
122 } else if (c == '/') {
123 state = STATE_CLOSE_TAG
124 offset++
125 tagEndStart = offset
126 continue@loop
127 } else if (c == '?') {
128 // XML Prologue
129 val end = html.indexOf('>', offset + 2)
130 if (end == -1) {
131 offset = length
132 state = STATE_TEXT
133 } else {
134 offset = end + 1
135 state = STATE_TEXT
136 }
137 continue@loop
138 }
139 state = STATE_IN_TAG
140 tagStart = offset
141 }
142 STATE_CLOSE_TAG -> {
143 if (c == '>') {
144 state = STATE_TEXT
145 if (html.startsWith("body", tagEndStart, true)) {
146 val bodyEnd = tagEndStart - 2 // </
147 if (bodyStart != -1) {
148 return html.substring(bodyStart, bodyEnd)
149 }
150 }
151 if (html.startsWith("html", tagEndStart, true)) {
152 val htmlEnd = tagEndStart - 2
153 if (htmlEnd != -1) {
154 return html.substring(htmlStart, htmlEnd)
155 }
156 }
157 }
158 offset++
159 }
160 STATE_IN_TAG -> {
161 val whitespace = Character.isWhitespace(c)
162 if (whitespace || c == '>') {
163 if (html.startsWith("body", tagStart, true)) {
164 bodyStart = html.indexOf('>', offset) + 1
165 }
166 if (html.startsWith("html", tagStart, true)) {
167 htmlStart = html.indexOf('>', offset) + 1
168 }
169 }
170
171 when {
172 whitespace -> state = STATE_BEFORE_ATTRIBUTE
173 c == '>' -> {
174 state = STATE_TEXT
175 }
176 c == '/' -> state = STATE_ENDING_TAG
177 }
178 offset++
179 }
180 STATE_ENDING_TAG -> {
181 if (c == '>') {
182 if (html.startsWith("body", tagEndStart, true)) {
183 val bodyEnd = tagEndStart - 1
184 if (bodyStart != -1) {
185 return html.substring(bodyStart, bodyEnd)
186 }
187 }
188 if (html.startsWith("html", tagEndStart, true)) {
189 val htmlEnd = tagEndStart - 1
190 if (htmlEnd != -1) {
191 return html.substring(htmlStart, htmlEnd)
192 }
193 }
194 offset++
195 state = STATE_TEXT
196 }
197 }
198 STATE_BEFORE_ATTRIBUTE -> {
199 if (c == '>') {
200 state = STATE_TEXT
201 } else if (c == '/') {
202 // we expect an '>' next to close the tag
203 } else if (!Character.isWhitespace(c)) {
204 state = STATE_ATTRIBUTE_NAME
205 }
206 offset++
207 }
208 STATE_ATTRIBUTE_NAME -> {
209 when {
210 c == '>' -> state = STATE_TEXT
211 c == '=' -> state = STATE_ATTRIBUTE_AFTER_EQUALS
212 Character.isWhitespace(c) -> state = STATE_ATTRIBUTE_BEFORE_EQUALS
213 c == ':' -> {}
214 }
215 offset++
216 }
217 STATE_ATTRIBUTE_BEFORE_EQUALS -> {
218 if (c == '=') {
219 state = STATE_ATTRIBUTE_AFTER_EQUALS
220 } else if (c == '>') {
221 state = STATE_TEXT
222 } else if (!Character.isWhitespace(c)) {
223 // Attribute value not specified (used for some boolean attributes)
224 state = STATE_ATTRIBUTE_NAME
225 }
226 offset++
227 }
228 STATE_ATTRIBUTE_AFTER_EQUALS -> {
229 if (c == '\'') {
230 // a='b'
231 state = STATE_ATTRIBUTE_VALUE_SINGLE
232 } else if (c == '"') {
233 // a="b"
234 state = STATE_ATTRIBUTE_VALUE_DOUBLE
235 } else if (!Character.isWhitespace(c)) {
236 // a=b
237 state = STATE_ATTRIBUTE_VALUE_NONE
238 }
239 offset++
240 }
241 STATE_ATTRIBUTE_VALUE_SINGLE -> {
242 if (c == '\'') {
243 state = STATE_BEFORE_ATTRIBUTE
244 }
245 offset++
246 }
247 STATE_ATTRIBUTE_VALUE_DOUBLE -> {
248 if (c == '"') {
249 state = STATE_BEFORE_ATTRIBUTE
250 }
251 offset++
252 }
253 STATE_ATTRIBUTE_VALUE_NONE -> {
254 if (c == '>') {
255 state = STATE_TEXT
256 } else if (Character.isWhitespace(c)) {
257 state = STATE_BEFORE_ATTRIBUTE
258 }
259 offset++
260 }
261 else -> assert(false) { state }
262 }
263 }
264
265 return html
266 }
267