1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2003-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 *
9 * File prscmnts.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 08/22/2003 ram Creation.
15 *******************************************************************************
16 */
17
18 // Safer use of UnicodeString.
19 #ifndef UNISTR_FROM_CHAR_EXPLICIT
20 # define UNISTR_FROM_CHAR_EXPLICIT explicit
21 #endif
22
23 // Less important, but still a good idea.
24 #ifndef UNISTR_FROM_STRING_EXPLICIT
25 # define UNISTR_FROM_STRING_EXPLICIT explicit
26 #endif
27
28 #include "unicode/regex.h"
29 #include "unicode/unistr.h"
30 #include "unicode/parseerr.h"
31 #include "prscmnts.h"
32 #include <stdio.h>
33 #include <stdlib.h>
34
35 U_NAMESPACE_USE
36
37 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
38
39 #define MAX_SPLIT_STRINGS 20
40
41 const char *patternStrings[UPC_LIMIT]={
42 "^translate\\s*(.*)",
43 "^note\\s*(.*)"
44 };
45
46 U_CFUNC int32_t
removeText(UChar * source,int32_t srcLen,UnicodeString patString,uint32_t options,UnicodeString replaceText,UErrorCode * status)47 removeText(UChar *source, int32_t srcLen,
48 UnicodeString patString,uint32_t options,
49 UnicodeString replaceText, UErrorCode *status){
50
51 if(status == NULL || U_FAILURE(*status)){
52 return 0;
53 }
54
55 UnicodeString src(source, srcLen);
56
57 RegexMatcher myMatcher(patString, src, options, *status);
58 if(U_FAILURE(*status)){
59 return 0;
60 }
61 UnicodeString dest;
62
63
64 dest = myMatcher.replaceAll(replaceText,*status);
65
66
67 return dest.extract(source, srcLen, *status);
68
69 }
70 U_CFUNC int32_t
trim(UChar * src,int32_t srcLen,UErrorCode * status)71 trim(UChar *src, int32_t srcLen, UErrorCode *status){
72 srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
73 srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
74 srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remove trailing spcaes
75 return srcLen;
76 }
77
78 U_CFUNC int32_t
removeCmtText(UChar * source,int32_t srcLen,UErrorCode * status)79 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
80 srcLen = trim(source, srcLen, status);
81 UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the beginning of the line
82 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
83 return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
84 }
85
86 U_CFUNC int32_t
getText(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UnicodeString patternString,UErrorCode * status)87 getText(const UChar* source, int32_t srcLen,
88 UChar** dest, int32_t destCapacity,
89 UnicodeString patternString,
90 UErrorCode* status){
91
92 if(status == NULL || U_FAILURE(*status)){
93 return 0;
94 }
95
96 UnicodeString stringArray[MAX_SPLIT_STRINGS];
97 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
98 UnicodeString src (source,srcLen);
99
100 if (U_FAILURE(*status)) {
101 return 0;
102 }
103 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
104
105 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
106 if (U_FAILURE(*status)) {
107 return 0;
108 }
109 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
110 matcher.reset(stringArray[i]);
111 if(matcher.lookingAt(*status)){
112 UnicodeString out = matcher.group(1, *status);
113
114 return out.extract(*dest, destCapacity,*status);
115 }
116 }
117 return 0;
118 }
119
120
121 #define AT_SIGN 0x0040
122
123 U_CFUNC int32_t
getDescription(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)124 getDescription( const UChar* source, int32_t srcLen,
125 UChar** dest, int32_t destCapacity,
126 UErrorCode* status){
127 if(status == NULL || U_FAILURE(*status)){
128 return 0;
129 }
130
131 UnicodeString stringArray[MAX_SPLIT_STRINGS];
132 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
133 UnicodeString src(source, srcLen);
134
135 if (U_FAILURE(*status)) {
136 return 0;
137 }
138 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
139
140 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
141 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
142 return trim(*dest, destLen, status);
143 }
144 return 0;
145 }
146
147 U_CFUNC int32_t
getCount(const UChar * source,int32_t srcLen,UParseCommentsOption option,UErrorCode * status)148 getCount(const UChar* source, int32_t srcLen,
149 UParseCommentsOption option, UErrorCode *status){
150
151 if(status == NULL || U_FAILURE(*status)){
152 return 0;
153 }
154
155 UnicodeString stringArray[MAX_SPLIT_STRINGS];
156 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
157 UnicodeString src (source, srcLen);
158
159
160 if (U_FAILURE(*status)) {
161 return 0;
162 }
163 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
164
165 UnicodeString patternString(patternStrings[option]);
166 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
167 if (U_FAILURE(*status)) {
168 return 0;
169 }
170 int32_t count = 0;
171 for(int32_t i=0; i<retLen; i++){
172 matcher.reset(stringArray[i]);
173 if(matcher.lookingAt(*status)){
174 count++;
175 }
176 }
177 if(option == UPC_TRANSLATE && count > 1){
178 fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
179 exit(U_UNSUPPORTED_ERROR);
180 }
181 return count;
182 }
183
184 U_CFUNC int32_t
getAt(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,int32_t index,UParseCommentsOption option,UErrorCode * status)185 getAt(const UChar* source, int32_t srcLen,
186 UChar** dest, int32_t destCapacity,
187 int32_t index,
188 UParseCommentsOption option,
189 UErrorCode* status){
190
191 if(status == NULL || U_FAILURE(*status)){
192 return 0;
193 }
194
195 UnicodeString stringArray[MAX_SPLIT_STRINGS];
196 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
197 UnicodeString src (source, srcLen);
198
199
200 if (U_FAILURE(*status)) {
201 return 0;
202 }
203 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
204
205 UnicodeString patternString(patternStrings[option]);
206 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
207 if (U_FAILURE(*status)) {
208 return 0;
209 }
210 int32_t count = 0;
211 for(int32_t i=0; i<retLen; i++){
212 matcher.reset(stringArray[i]);
213 if(matcher.lookingAt(*status)){
214 if(count == index){
215 UnicodeString out = matcher.group(1, *status);
216 return out.extract(*dest, destCapacity,*status);
217 }
218 count++;
219
220 }
221 }
222 return 0;
223
224 }
225
226 U_CFUNC int32_t
getTranslate(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)227 getTranslate( const UChar* source, int32_t srcLen,
228 UChar** dest, int32_t destCapacity,
229 UErrorCode* status){
230 UnicodeString notePatternString("^translate\\s*?(.*)");
231
232 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
233 return trim(*dest, destLen, status);
234 }
235
236 U_CFUNC int32_t
getNote(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)237 getNote(const UChar* source, int32_t srcLen,
238 UChar** dest, int32_t destCapacity,
239 UErrorCode* status){
240
241 UnicodeString notePatternString("^note\\s*?(.*)");
242 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
243 return trim(*dest, destLen, status);
244
245 }
246
247 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
248
249