• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  *   Copyright (C) 2003-2007, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  *******************************************************************************
6  *
7  * File prscmnts.cpp
8  *
9  * Modification History:
10  *
11  *   Date          Name        Description
12  *   08/22/2003    ram         Creation.
13  *******************************************************************************
14  */
15 
16 #include "unicode/regex.h"
17 #include "unicode/unistr.h"
18 #include "unicode/parseerr.h"
19 #include "prscmnts.h"
20 #include <stdio.h>
21 #include <stdlib.h>
22 
23 U_NAMESPACE_USE
24 
25 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
26 
27 #define MAX_SPLIT_STRINGS 20
28 
29 const char *patternStrings[UPC_LIMIT]={
30     "^translate\\s*(.*)",
31     "^note\\s*(.*)"
32 };
33 
34 U_CFUNC int32_t
removeText(UChar * source,int32_t srcLen,UnicodeString patString,uint32_t options,UnicodeString replaceText,UErrorCode * status)35 removeText(UChar *source, int32_t srcLen,
36            UnicodeString patString,uint32_t options,
37            UnicodeString replaceText, UErrorCode *status){
38 
39     if(status == NULL || U_FAILURE(*status)){
40         return 0;
41     }
42 
43     UnicodeString src(source, srcLen);
44 
45     RegexMatcher    myMatcher(patString, src, options, *status);
46     if(U_FAILURE(*status)){
47         return 0;
48     }
49     UnicodeString dest;
50 
51 
52     dest = myMatcher.replaceAll(replaceText,*status);
53 
54 
55     return dest.extract(source, srcLen, *status);
56 
57 }
58 U_CFUNC int32_t
trim(UChar * src,int32_t srcLen,UErrorCode * status)59 trim(UChar *src, int32_t srcLen, UErrorCode *status){
60      srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines
61      srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces
62      srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes
63      return srcLen;
64 }
65 
66 U_CFUNC int32_t
removeCmtText(UChar * source,int32_t srcLen,UErrorCode * status)67 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
68     srcLen = trim(source, srcLen, status);
69     UnicodeString     patString = "^\\s*?\\*\\s*?";     // remove pattern like " * " at the begining of the line
70     srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status);
71     return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines;
72 }
73 
74 U_CFUNC int32_t
getText(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UnicodeString patternString,UErrorCode * status)75 getText(const UChar* source, int32_t srcLen,
76         UChar** dest, int32_t destCapacity,
77         UnicodeString patternString,
78         UErrorCode* status){
79 
80     if(status == NULL || U_FAILURE(*status)){
81         return 0;
82     }
83 
84     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
85     RegexPattern      *pattern = RegexPattern::compile("@", 0, *status);
86     UnicodeString src (source,srcLen);
87 
88     if (U_FAILURE(*status)) {
89         return 0;
90     }
91     pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
92 
93     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
94     if (U_FAILURE(*status)) {
95         return 0;
96     }
97     for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
98         matcher.reset(stringArray[i]);
99         if(matcher.lookingAt(*status)){
100             UnicodeString out = matcher.group(1, *status);
101 
102             return out.extract(*dest, destCapacity,*status);
103         }
104     }
105     return 0;
106 }
107 
108 
109 #define AT_SIGN  0x0040
110 
111 U_CFUNC int32_t
getDescription(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)112 getDescription( const UChar* source, int32_t srcLen,
113                 UChar** dest, int32_t destCapacity,
114                 UErrorCode* status){
115     if(status == NULL || U_FAILURE(*status)){
116         return 0;
117     }
118 
119     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
120     RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
121     UnicodeString src(source, srcLen);
122 
123     if (U_FAILURE(*status)) {
124         return 0;
125     }
126     pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
127 
128     if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
129         int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
130         return trim(*dest, destLen, status);
131     }
132     return 0;
133 }
134 
135 U_CFUNC int32_t
getCount(const UChar * source,int32_t srcLen,UParseCommentsOption option,UErrorCode * status)136 getCount(const UChar* source, int32_t srcLen,
137          UParseCommentsOption option, UErrorCode *status){
138 
139     if(status == NULL || U_FAILURE(*status)){
140         return 0;
141     }
142 
143     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
144     RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
145     UnicodeString src (source, srcLen);
146 
147 
148     if (U_FAILURE(*status)) {
149         return 0;
150     }
151     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
152 
153     RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
154     if (U_FAILURE(*status)) {
155         return 0;
156     }
157     int32_t count = 0;
158     for(int32_t i=0; i<retLen; i++){
159         matcher.reset(stringArray[i]);
160         if(matcher.lookingAt(*status)){
161             count++;
162         }
163     }
164     if(option == UPC_TRANSLATE && count > 1){
165         fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
166         exit(U_UNSUPPORTED_ERROR);
167     }
168     return count;
169 }
170 
171 U_CFUNC int32_t
getAt(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,int32_t index,UParseCommentsOption option,UErrorCode * status)172 getAt(const UChar* source, int32_t srcLen,
173         UChar** dest, int32_t destCapacity,
174         int32_t index,
175         UParseCommentsOption option,
176         UErrorCode* status){
177 
178     if(status == NULL || U_FAILURE(*status)){
179         return 0;
180     }
181 
182     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
183     RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
184     UnicodeString src (source, srcLen);
185 
186 
187     if (U_FAILURE(*status)) {
188         return 0;
189     }
190     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
191 
192     RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
193     if (U_FAILURE(*status)) {
194         return 0;
195     }
196     int32_t count = 0;
197     for(int32_t i=0; i<retLen; i++){
198         matcher.reset(stringArray[i]);
199         if(matcher.lookingAt(*status)){
200             if(count == index){
201                 UnicodeString out = matcher.group(1, *status);
202                 return out.extract(*dest, destCapacity,*status);
203             }
204             count++;
205 
206         }
207     }
208     return 0;
209 
210 }
211 
212 U_CFUNC int32_t
getTranslate(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)213 getTranslate( const UChar* source, int32_t srcLen,
214               UChar** dest, int32_t destCapacity,
215               UErrorCode* status){
216     UnicodeString     notePatternString = "^translate\\s*?(.*)";
217 
218     int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
219     return trim(*dest, destLen, status);
220 }
221 
222 U_CFUNC int32_t
getNote(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)223 getNote(const UChar* source, int32_t srcLen,
224         UChar** dest, int32_t destCapacity,
225         UErrorCode* status){
226 
227     UnicodeString     notePatternString = "^note\\s*?(.*)";
228     int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
229     return trim(*dest, destLen, status);
230 
231 }
232 
233 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
234 
235