1 /*
2 *******************************************************************************
3 * Copyright (C) 2003-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File prscmnts.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 08/22/2003 ram Creation.
13 *******************************************************************************
14 */
15
16 // Safer use of UnicodeString.
17 #ifndef UNISTR_FROM_CHAR_EXPLICIT
18 # define UNISTR_FROM_CHAR_EXPLICIT explicit
19 #endif
20
21 // Less important, but still a good idea.
22 #ifndef UNISTR_FROM_STRING_EXPLICIT
23 # define UNISTR_FROM_STRING_EXPLICIT explicit
24 #endif
25
26 #include "unicode/regex.h"
27 #include "unicode/unistr.h"
28 #include "unicode/parseerr.h"
29 #include "prscmnts.h"
30 #include <stdio.h>
31 #include <stdlib.h>
32
33 U_NAMESPACE_USE
34
35 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
36
37 #define MAX_SPLIT_STRINGS 20
38
39 const char *patternStrings[UPC_LIMIT]={
40 "^translate\\s*(.*)",
41 "^note\\s*(.*)"
42 };
43
44 U_CFUNC int32_t
removeText(UChar * source,int32_t srcLen,UnicodeString patString,uint32_t options,UnicodeString replaceText,UErrorCode * status)45 removeText(UChar *source, int32_t srcLen,
46 UnicodeString patString,uint32_t options,
47 UnicodeString replaceText, UErrorCode *status){
48
49 if(status == NULL || U_FAILURE(*status)){
50 return 0;
51 }
52
53 UnicodeString src(source, srcLen);
54
55 RegexMatcher myMatcher(patString, src, options, *status);
56 if(U_FAILURE(*status)){
57 return 0;
58 }
59 UnicodeString dest;
60
61
62 dest = myMatcher.replaceAll(replaceText,*status);
63
64
65 return dest.extract(source, srcLen, *status);
66
67 }
68 U_CFUNC int32_t
trim(UChar * src,int32_t srcLen,UErrorCode * status)69 trim(UChar *src, int32_t srcLen, UErrorCode *status){
70 srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
71 srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
72 srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
73 return srcLen;
74 }
75
76 U_CFUNC int32_t
removeCmtText(UChar * source,int32_t srcLen,UErrorCode * status)77 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
78 srcLen = trim(source, srcLen, status);
79 UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line
80 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
81 return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
82 }
83
84 U_CFUNC int32_t
getText(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UnicodeString patternString,UErrorCode * status)85 getText(const UChar* source, int32_t srcLen,
86 UChar** dest, int32_t destCapacity,
87 UnicodeString patternString,
88 UErrorCode* status){
89
90 if(status == NULL || U_FAILURE(*status)){
91 return 0;
92 }
93
94 UnicodeString stringArray[MAX_SPLIT_STRINGS];
95 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
96 UnicodeString src (source,srcLen);
97
98 if (U_FAILURE(*status)) {
99 return 0;
100 }
101 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
102
103 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
104 if (U_FAILURE(*status)) {
105 return 0;
106 }
107 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
108 matcher.reset(stringArray[i]);
109 if(matcher.lookingAt(*status)){
110 UnicodeString out = matcher.group(1, *status);
111
112 return out.extract(*dest, destCapacity,*status);
113 }
114 }
115 return 0;
116 }
117
118
119 #define AT_SIGN 0x0040
120
121 U_CFUNC int32_t
getDescription(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)122 getDescription( const UChar* source, int32_t srcLen,
123 UChar** dest, int32_t destCapacity,
124 UErrorCode* status){
125 if(status == NULL || U_FAILURE(*status)){
126 return 0;
127 }
128
129 UnicodeString stringArray[MAX_SPLIT_STRINGS];
130 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
131 UnicodeString src(source, srcLen);
132
133 if (U_FAILURE(*status)) {
134 return 0;
135 }
136 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
137
138 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
139 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
140 return trim(*dest, destLen, status);
141 }
142 return 0;
143 }
144
145 U_CFUNC int32_t
getCount(const UChar * source,int32_t srcLen,UParseCommentsOption option,UErrorCode * status)146 getCount(const UChar* source, int32_t srcLen,
147 UParseCommentsOption option, UErrorCode *status){
148
149 if(status == NULL || U_FAILURE(*status)){
150 return 0;
151 }
152
153 UnicodeString stringArray[MAX_SPLIT_STRINGS];
154 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
155 UnicodeString src (source, srcLen);
156
157
158 if (U_FAILURE(*status)) {
159 return 0;
160 }
161 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
162
163 UnicodeString patternString(patternStrings[option]);
164 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
165 if (U_FAILURE(*status)) {
166 return 0;
167 }
168 int32_t count = 0;
169 for(int32_t i=0; i<retLen; i++){
170 matcher.reset(stringArray[i]);
171 if(matcher.lookingAt(*status)){
172 count++;
173 }
174 }
175 if(option == UPC_TRANSLATE && count > 1){
176 fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
177 exit(U_UNSUPPORTED_ERROR);
178 }
179 return count;
180 }
181
182 U_CFUNC int32_t
getAt(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,int32_t index,UParseCommentsOption option,UErrorCode * status)183 getAt(const UChar* source, int32_t srcLen,
184 UChar** dest, int32_t destCapacity,
185 int32_t index,
186 UParseCommentsOption option,
187 UErrorCode* status){
188
189 if(status == NULL || U_FAILURE(*status)){
190 return 0;
191 }
192
193 UnicodeString stringArray[MAX_SPLIT_STRINGS];
194 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
195 UnicodeString src (source, srcLen);
196
197
198 if (U_FAILURE(*status)) {
199 return 0;
200 }
201 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
202
203 UnicodeString patternString(patternStrings[option]);
204 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
205 if (U_FAILURE(*status)) {
206 return 0;
207 }
208 int32_t count = 0;
209 for(int32_t i=0; i<retLen; i++){
210 matcher.reset(stringArray[i]);
211 if(matcher.lookingAt(*status)){
212 if(count == index){
213 UnicodeString out = matcher.group(1, *status);
214 return out.extract(*dest, destCapacity,*status);
215 }
216 count++;
217
218 }
219 }
220 return 0;
221
222 }
223
224 U_CFUNC int32_t
getTranslate(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)225 getTranslate( const UChar* source, int32_t srcLen,
226 UChar** dest, int32_t destCapacity,
227 UErrorCode* status){
228 UnicodeString notePatternString("^translate\\s*?(.*)");
229
230 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
231 return trim(*dest, destLen, status);
232 }
233
234 U_CFUNC int32_t
getNote(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)235 getNote(const UChar* source, int32_t srcLen,
236 UChar** dest, int32_t destCapacity,
237 UErrorCode* status){
238
239 UnicodeString notePatternString("^note\\s*?(.*)");
240 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
241 return trim(*dest, destLen, status);
242
243 }
244
245 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
246
247