• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.googlejavaformat.java.javadoc;
16 
17 /**
18  * Javadoc token. Our idea of what constitutes a token is often larger or smaller than what you'd
19  * naturally expect. The decision is usually pragmatic rather than theoretical. Most of the details
20  * are in {@link JavadocLexer}.
21  */
22 final class Token {
23   /**
24    * Javadoc token type.
25    *
26    * <p>The general idea is that every token that requires special handling (extra line breaks,
27    * indentation, forcing or forbidding whitespace) from {@link JavadocWriter} gets its own type.
28    * But I haven't been super careful about it, so I'd imagine that we could merge or remove some of
29    * these if we wanted. (For example, PARAGRAPH_CLOSE_TAG and LIST_ITEM_CLOSE_TAG could share a
30    * common IGNORABLE token type. But their corresponding OPEN tags exist, so I've kept the CLOSE
31    * tags.)
32    *
33    * <p>Note, though, that tokens of the same type may still have been handled differently by {@link
34    * JavadocLexer} when it created them. For example, LITERAL is used for both plain text and inline
35    * tags, even though the two affect the lexer's state differently.
36    */
37   enum Type {
38     /** ∕✱✱ */
39     BEGIN_JAVADOC,
40     /** ✱∕ */
41     END_JAVADOC,
42     /** The {@code @foo} that begins a block Javadoc tag like {@code @throws}. */
43     FOOTER_JAVADOC_TAG_START,
44     LIST_OPEN_TAG,
45     LIST_CLOSE_TAG,
46     LIST_ITEM_OPEN_TAG,
47     LIST_ITEM_CLOSE_TAG,
48     HEADER_OPEN_TAG,
49     HEADER_CLOSE_TAG,
50     PARAGRAPH_OPEN_TAG,
51     PARAGRAPH_CLOSE_TAG,
52     // TODO(cpovirk): Support <div> (probably identically to <blockquote>).
53     BLOCKQUOTE_OPEN_TAG,
54     BLOCKQUOTE_CLOSE_TAG,
55     PRE_OPEN_TAG,
56     PRE_CLOSE_TAG,
57     CODE_OPEN_TAG,
58     CODE_CLOSE_TAG,
59     TABLE_OPEN_TAG,
60     TABLE_CLOSE_TAG,
61     /** {@code <!-- MOE:begin_intracomment_strip -->} */
62     MOE_BEGIN_STRIP_COMMENT,
63     /** {@code <!-- MOE:end_intracomment_strip -->} */
64     MOE_END_STRIP_COMMENT,
65     HTML_COMMENT,
66     // TODO(cpovirk): Support <hr> (probably a blank line before and after).
67     BR_TAG,
68     /**
69      * Whitespace that is not in a {@code <pre>} or {@code <table>} section. Whitespace includes
70      * leading newlines, asterisks, and tabs and spaces. In the output, it is translated to newlines
71      * (with leading spaces and asterisks) or spaces.
72      */
73     WHITESPACE,
74     /**
75      * A newline in a {@code <pre>} or {@code <table>} section. We preserve user formatting in these
76      * sections, including newlines.
77      */
78     FORCED_NEWLINE,
79     /**
80      * Token that permits but does not force a line break. The way that we accomplish this is
81      * somewhat indirect: As far as {@link JavadocWriter} is concerned, this token is meaningless.
82      * But its mere existence prevents {@link JavadocLexer} from joining two {@link #LITERAL} tokens
83      * that would otherwise be adjacent. Since this token is not real whitespace, the writer may end
84      * up writing the literals together with no space between, just as if they'd been joined.
85      * However, if they don't fit together on the line, the writer will write the first one, start a
86      * new line, and write the second. Hence, the token acts as an optional line break.
87      */
88     OPTIONAL_LINE_BREAK,
89     /**
90      * Anything else: {@code foo}, {@code <b>}, {@code {@code foo}} etc. {@link JavadocLexer}
91      * sometimes creates adjacent literal tokens, which it then merges into a single, larger literal
92      * token before returning its output.
93      *
94      * <p>This also includes whitespace in a {@code <pre>} or {@code <table>} section. We preserve
95      * user formatting in these sections, including arbitrary numbers of spaces. By treating such
96      * whitespace as a literal, we can merge it with adjacent literals, preventing us from
97      * autowrapping inside these sections -- and doing so naively, to boot. The wrapped line would
98      * have no indentation after "* " or, possibly worse, it might begin with an arbitrary amount of
99      * whitespace that didn't fit on the previous line. Of course, by doing this, we're potentially
100      * creating lines of more than 100 characters. But it seems fair to call in the humans to
101      * resolve such problems.
102      */
103     LITERAL,
104     ;
105   }
106 
107   private final Type type;
108   private final String value;
109 
Token(Type type, String value)110   Token(Type type, String value) {
111     this.type = type;
112     this.value = value;
113   }
114 
getType()115   Type getType() {
116     return type;
117   }
118 
getValue()119   String getValue() {
120     return value;
121   }
122 
length()123   int length() {
124     return value.length();
125   }
126 
127   @Override
toString()128   public String toString() {
129     return "\n" + getType() + ": " + getValue();
130   }
131 }
132