1 /*
2 * Copyright (C) 2009 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "config.h"
32 #include "WebEntities.h"
33
34 #include <string.h>
35
36 #include "PlatformString.h"
37 #include "StringBuilder.h"
38 #include <wtf/HashMap.h>
39
40 #include "WebString.h"
41
42 using namespace WebCore;
43
44 namespace {
45 // Note that this file is also included by HTMLTokenizer.cpp so we are getting
46 // two copies of the data in memory. We can fix this by changing the script
47 // that generated the array to create a static const that is its length, but
48 // this is low priority since the data is less than 4K. We use anonymous
49 // namespace to prevent name collisions.
50 #include "HTMLEntityNames.c" // NOLINT
51 }
52
53 namespace WebKit {
54
populateMap(WTF::HashMap<int,WebCore::String> & map,const Entity * entities,size_t entitiesCount,bool standardHTML)55 void populateMap(WTF::HashMap<int, WebCore::String>& map,
56 const Entity* entities,
57 size_t entitiesCount,
58 bool standardHTML)
59 {
60 ASSERT(map.isEmpty());
61 const Entity* entity = &entities[0];
62 for (size_t i = 0; i < entitiesCount; i++, entity++) {
63 int code = entity->code;
64 String name = entity->name;
65 // For consistency, use the lowe case for entities that have both.
66 if (map.contains(code) && map.get(code) == name.lower())
67 continue;
68 // Don't register %, ⊅ and &supl;.
69 if (standardHTML && (code == '%' || code == 0x2285 || code == 0x00b9))
70 continue;
71 map.set(code, name);
72 }
73 if (standardHTML)
74 map.set(static_cast<int>(0x0027), String("#39"));
75 }
76
77 static const Entity xmlBuiltInEntityCodes[] = {
78 { "lt", 0x003c },
79 { "gt", 0x003e },
80 { "amp", 0x0026 },
81 { "apos", 0x0027 },
82 { "quot", 0x0022 }
83 };
84
WebEntities(bool xmlEntities)85 WebEntities::WebEntities(bool xmlEntities)
86 {
87 if (xmlEntities)
88 populateMap(m_entitiesMap,
89 xmlBuiltInEntityCodes,
90 sizeof(xmlBuiltInEntityCodes) / sizeof(Entity),
91 false);
92 else
93 populateMap(m_entitiesMap,
94 wordlist,
95 sizeof(wordlist) / sizeof(Entity),
96 true);
97 }
98
entityNameByCode(int code) const99 String WebEntities::entityNameByCode(int code) const
100 {
101 if (m_entitiesMap.contains(code))
102 return m_entitiesMap.get(code);
103 return "";
104 }
105
convertEntitiesInString(const String & value) const106 String WebEntities::convertEntitiesInString(const String& value) const
107 {
108 unsigned len = value.length();
109 const UChar* startPos = value.characters();
110 const UChar* curPos = startPos;
111
112 // FIXME: Optimize - create StringBuilder only if value has any entities.
113 StringBuilder result;
114 while (len--) {
115 if (m_entitiesMap.contains(*curPos)) {
116 // Append content before entity code.
117 if (curPos > startPos)
118 result.append(String(startPos, curPos - startPos));
119 result.append("&");
120 result.append(m_entitiesMap.get(*curPos));
121 result.append(";");
122 startPos = ++curPos;
123 } else
124 curPos++;
125 }
126 // Append the remaining content.
127 if (curPos > startPos)
128 result.append(String(startPos, curPos - startPos));
129
130 return result.toString();
131 }
132
133 } // namespace WebKit
134