1 package org.jsoup.nodes; 2 3 import org.jsoup.helper.Validate; 4 import org.jsoup.internal.StringUtil; 5 6 import java.io.IOException; 7 8 /** 9 A text node. 10 11 @author Jonathan Hedley, jonathan@hedley.net */ 12 public class TextNode extends LeafNode { 13 /** 14 Create a new TextNode representing the supplied (unencoded) text). 15 16 @param text raw text 17 @see #createFromEncoded(String) 18 */ TextNode(String text)19 public TextNode(String text) { 20 value = text; 21 } 22 nodeName()23 public String nodeName() { 24 return "#text"; 25 } 26 27 /** 28 * Get the text content of this text node. 29 * @return Unencoded, normalised text. 30 * @see TextNode#getWholeText() 31 */ text()32 public String text() { 33 return StringUtil.normaliseWhitespace(getWholeText()); 34 } 35 36 /** 37 * Set the text content of this text node. 38 * @param text unencoded text 39 * @return this, for chaining 40 */ text(String text)41 public TextNode text(String text) { 42 coreValue(text); 43 return this; 44 } 45 46 /** 47 Get the (unencoded) text of this text node, including any newlines and spaces present in the original. 48 @return text 49 */ getWholeText()50 public String getWholeText() { 51 return coreValue(); 52 } 53 54 /** 55 Test if this text node is blank -- that is, empty or only whitespace (including newlines). 56 @return true if this document is empty or only whitespace, false if it contains any text content. 57 */ isBlank()58 public boolean isBlank() { 59 return StringUtil.isBlank(coreValue()); 60 } 61 62 /** 63 * Split this text node into two nodes at the specified string offset. After splitting, this node will contain the 64 * original text up to the offset, and will have a new text node sibling containing the text after the offset. 65 * @param offset string offset point to split node at. 66 * @return the newly created text node containing the text after the offset. 67 */ splitText(int offset)68 public TextNode splitText(int offset) { 69 final String text = coreValue(); 70 Validate.isTrue(offset >= 0, "Split offset must be not be negative"); 71 Validate.isTrue(offset < text.length(), "Split offset must not be greater than current text length"); 72 73 String head = text.substring(0, offset); 74 String tail = text.substring(offset); 75 text(head); 76 TextNode tailNode = new TextNode(tail); 77 if (parentNode != null) 78 parentNode.addChildren(siblingIndex()+1, tailNode); 79 80 return tailNode; 81 } 82 83 @Override 84 void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 85 final boolean prettyPrint = out.prettyPrint(); 86 final Element parent = parentNode instanceof Element ? ((Element) parentNode) : null; 87 final boolean normaliseWhite = prettyPrint && !Element.preserveWhitespace(parentNode); 88 final boolean trimLikeBlock = parent != null && (parent.tag().isBlock() || parent.tag().formatAsBlock()); 89 boolean trimLeading = false, trimTrailing = false; 90 91 if (normaliseWhite) { 92 trimLeading = (trimLikeBlock && siblingIndex == 0) || parentNode instanceof Document; 93 trimTrailing = trimLikeBlock && nextSibling() == null; 94 95 // if this text is just whitespace, and the next node will cause an indent, skip this text: 96 Node next = nextSibling(); 97 Node prev = previousSibling(); 98 boolean isBlank = isBlank(); 99 boolean couldSkip = (next instanceof Element && ((Element) next).shouldIndent(out)) // next will indent 100 || (next instanceof TextNode && (((TextNode) next).isBlank())) // next is blank text, from re-parenting 101 || (prev instanceof Element && (((Element) prev).isBlock() || prev.nameIs("br"))) // br is a bit special - make sure we don't get a dangling blank line, but not a block otherwise wraps in head 102 ; 103 if (couldSkip && isBlank) return; 104 105 if ( 106 (prev == null && parent != null && parent.tag().formatAsBlock() && !isBlank) || 107 (out.outline() && siblingNodes().size() > 0 && !isBlank) || 108 (prev != null && prev.nameIs("br")) // special case wrap on inline <br> - doesn't make sense as a block tag 109 ) 110 indent(accum, depth, out); 111 } 112 Entities.escape(accum, coreValue(), out, false, normaliseWhite, trimLeading, trimTrailing)113 Entities.escape(accum, coreValue(), out, false, normaliseWhite, trimLeading, trimTrailing); 114 } 115 116 @Override outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out)117 void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException {} 118 119 @Override toString()120 public String toString() { 121 return outerHtml(); 122 } 123 124 @Override clone()125 public TextNode clone() { 126 return (TextNode) super.clone(); 127 } 128 129 /** 130 * Create a new TextNode from HTML encoded (aka escaped) data. 131 * @param encodedText Text containing encoded HTML (e.g. {@code <}) 132 * @return TextNode containing unencoded data (e.g. {@code <}) 133 */ createFromEncoded(String encodedText)134 public static TextNode createFromEncoded(String encodedText) { 135 String text = Entities.unescape(encodedText); 136 return new TextNode(text); 137 } 138 normaliseWhitespace(String text)139 static String normaliseWhitespace(String text) { 140 text = StringUtil.normaliseWhitespace(text); 141 return text; 142 } 143 stripLeadingWhitespace(String text)144 static String stripLeadingWhitespace(String text) { 145 return text.replaceFirst("^\\s+", ""); 146 } 147 lastCharIsWhitespace(StringBuilder sb)148 static boolean lastCharIsWhitespace(StringBuilder sb) { 149 return sb.length() != 0 && sb.charAt(sb.length() - 1) == ' '; 150 } 151 } 152