1 /*
2 *******************************************************************************
3 * Copyright (C) 2003-2007, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File prscmnts.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 08/22/2003 ram Creation.
13 *******************************************************************************
14 */
15
16 #include "unicode/regex.h"
17 #include "unicode/unistr.h"
18 #include "unicode/parseerr.h"
19 #include "prscmnts.h"
20 #include <stdio.h>
21 #include <stdlib.h>
22
23 U_NAMESPACE_USE
24
25 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
26
27 #define MAX_SPLIT_STRINGS 20
28
29 const char *patternStrings[UPC_LIMIT]={
30 "^translate\\s*(.*)",
31 "^note\\s*(.*)"
32 };
33
34 U_CFUNC int32_t
removeText(UChar * source,int32_t srcLen,UnicodeString patString,uint32_t options,UnicodeString replaceText,UErrorCode * status)35 removeText(UChar *source, int32_t srcLen,
36 UnicodeString patString,uint32_t options,
37 UnicodeString replaceText, UErrorCode *status){
38
39 if(status == NULL || U_FAILURE(*status)){
40 return 0;
41 }
42
43 UnicodeString src(source, srcLen);
44
45 RegexMatcher myMatcher(patString, src, options, *status);
46 if(U_FAILURE(*status)){
47 return 0;
48 }
49 UnicodeString dest;
50
51
52 dest = myMatcher.replaceAll(replaceText,*status);
53
54
55 return dest.extract(source, srcLen, *status);
56
57 }
58 U_CFUNC int32_t
trim(UChar * src,int32_t srcLen,UErrorCode * status)59 trim(UChar *src, int32_t srcLen, UErrorCode *status){
60 srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines
61 srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces
62 srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes
63 return srcLen;
64 }
65
66 U_CFUNC int32_t
removeCmtText(UChar * source,int32_t srcLen,UErrorCode * status)67 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
68 srcLen = trim(source, srcLen, status);
69 UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line
70 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status);
71 return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines;
72 }
73
74 U_CFUNC int32_t
getText(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UnicodeString patternString,UErrorCode * status)75 getText(const UChar* source, int32_t srcLen,
76 UChar** dest, int32_t destCapacity,
77 UnicodeString patternString,
78 UErrorCode* status){
79
80 if(status == NULL || U_FAILURE(*status)){
81 return 0;
82 }
83
84 UnicodeString stringArray[MAX_SPLIT_STRINGS];
85 RegexPattern *pattern = RegexPattern::compile("@", 0, *status);
86 UnicodeString src (source,srcLen);
87
88 if (U_FAILURE(*status)) {
89 return 0;
90 }
91 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
92
93 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
94 if (U_FAILURE(*status)) {
95 return 0;
96 }
97 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
98 matcher.reset(stringArray[i]);
99 if(matcher.lookingAt(*status)){
100 UnicodeString out = matcher.group(1, *status);
101
102 return out.extract(*dest, destCapacity,*status);
103 }
104 }
105 return 0;
106 }
107
108
109 #define AT_SIGN 0x0040
110
111 U_CFUNC int32_t
getDescription(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)112 getDescription( const UChar* source, int32_t srcLen,
113 UChar** dest, int32_t destCapacity,
114 UErrorCode* status){
115 if(status == NULL || U_FAILURE(*status)){
116 return 0;
117 }
118
119 UnicodeString stringArray[MAX_SPLIT_STRINGS];
120 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
121 UnicodeString src(source, srcLen);
122
123 if (U_FAILURE(*status)) {
124 return 0;
125 }
126 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
127
128 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
129 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
130 return trim(*dest, destLen, status);
131 }
132 return 0;
133 }
134
135 U_CFUNC int32_t
getCount(const UChar * source,int32_t srcLen,UParseCommentsOption option,UErrorCode * status)136 getCount(const UChar* source, int32_t srcLen,
137 UParseCommentsOption option, UErrorCode *status){
138
139 if(status == NULL || U_FAILURE(*status)){
140 return 0;
141 }
142
143 UnicodeString stringArray[MAX_SPLIT_STRINGS];
144 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
145 UnicodeString src (source, srcLen);
146
147
148 if (U_FAILURE(*status)) {
149 return 0;
150 }
151 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
152
153 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
154 if (U_FAILURE(*status)) {
155 return 0;
156 }
157 int32_t count = 0;
158 for(int32_t i=0; i<retLen; i++){
159 matcher.reset(stringArray[i]);
160 if(matcher.lookingAt(*status)){
161 count++;
162 }
163 }
164 if(option == UPC_TRANSLATE && count > 1){
165 fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
166 exit(U_UNSUPPORTED_ERROR);
167 }
168 return count;
169 }
170
171 U_CFUNC int32_t
getAt(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,int32_t index,UParseCommentsOption option,UErrorCode * status)172 getAt(const UChar* source, int32_t srcLen,
173 UChar** dest, int32_t destCapacity,
174 int32_t index,
175 UParseCommentsOption option,
176 UErrorCode* status){
177
178 if(status == NULL || U_FAILURE(*status)){
179 return 0;
180 }
181
182 UnicodeString stringArray[MAX_SPLIT_STRINGS];
183 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
184 UnicodeString src (source, srcLen);
185
186
187 if (U_FAILURE(*status)) {
188 return 0;
189 }
190 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
191
192 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
193 if (U_FAILURE(*status)) {
194 return 0;
195 }
196 int32_t count = 0;
197 for(int32_t i=0; i<retLen; i++){
198 matcher.reset(stringArray[i]);
199 if(matcher.lookingAt(*status)){
200 if(count == index){
201 UnicodeString out = matcher.group(1, *status);
202 return out.extract(*dest, destCapacity,*status);
203 }
204 count++;
205
206 }
207 }
208 return 0;
209
210 }
211
212 U_CFUNC int32_t
getTranslate(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)213 getTranslate( const UChar* source, int32_t srcLen,
214 UChar** dest, int32_t destCapacity,
215 UErrorCode* status){
216 UnicodeString notePatternString = "^translate\\s*?(.*)";
217
218 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
219 return trim(*dest, destLen, status);
220 }
221
222 U_CFUNC int32_t
getNote(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)223 getNote(const UChar* source, int32_t srcLen,
224 UChar** dest, int32_t destCapacity,
225 UErrorCode* status){
226
227 UnicodeString notePatternString = "^note\\s*?(.*)";
228 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
229 return trim(*dest, destLen, status);
230
231 }
232
233 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
234
235