1 /*
2 * Copyright 2003. Vladimir Prus
3 * Distributed under the Boost Software License, Version 1.0.
4 * (See accompanying file LICENSE_1_0.txt or copy at
5 * http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #include "../mem.h"
9 #include "../native.h"
10 #include "../jam_strings.h"
11 #include "../subst.h"
12
13 /*
14 rule split ( string separator )
15 {
16 local result ;
17 local s = $(string) ;
18
19 local match = 1 ;
20 while $(match)
21 {
22 match = [ MATCH ^(.*)($(separator))(.*) : $(s) ] ;
23 if $(match)
24 {
25 match += "" ; # in case 3rd item was empty - works around MATCH bug
26 result = $(match[3]) $(result) ;
27 s = $(match[1]) ;
28 }
29 }
30 return $(s) $(result) ;
31 }
32 */
33
regex_split(FRAME * frame,int flags)34 LIST * regex_split( FRAME * frame, int flags )
35 {
36 LIST * args = lol_get( frame->args, 0 );
37 OBJECT * s;
38 OBJECT * separator;
39 regexp * re;
40 const char * pos, * prev;
41 LIST * result = L0;
42 LISTITER iter = list_begin( args );
43 s = list_item( iter );
44 separator = list_item( list_next( iter ) );
45
46 re = regex_compile( separator );
47
48 prev = pos = object_str( s );
49 while ( regexec( re, pos ) )
50 {
51 result = list_push_back( result, object_new_range( prev, re->startp[ 0 ] - prev ) );
52 prev = re->endp[ 0 ];
53 /* Handle empty matches */
54 if ( *pos == '\0' )
55 break;
56 else if ( pos == re->endp[ 0 ] )
57 pos++;
58 else
59 pos = re->endp[ 0 ];
60 }
61
62 result = list_push_back( result, object_new( pos ) );
63
64 return result;
65 }
66
67 /*
68 rule replace (
69 string # The string to modify.
70 match # The characters to replace.
71 replacement # The string to replace with.
72 )
73 {
74 local result = "" ;
75 local parts = 1 ;
76 while $(parts)
77 {
78 parts = [ MATCH ^(.*)($(match))(.*) : $(string) ] ;
79 if $(parts)
80 {
81 parts += "" ;
82 result = "$(replacement)$(parts[3])$(result)" ;
83 string = $(parts[1]) ;
84 }
85 }
86 string ?= "" ;
87 result = "$(string)$(result)" ;
88 return $(result) ;
89 }
90 */
91
regex_replace(FRAME * frame,int flags)92 LIST * regex_replace( FRAME * frame, int flags )
93 {
94 LIST * args = lol_get( frame->args, 0 );
95 OBJECT * s;
96 OBJECT * match;
97 OBJECT * replacement;
98 regexp * re;
99 const char * pos;
100 string buf[ 1 ];
101 LIST * result;
102 LISTITER iter = list_begin( args );
103 s = list_item( iter );
104 iter = list_next( iter );
105 match = list_item( iter );
106 iter = list_next( iter );
107 replacement = list_item(iter );
108
109 re = regex_compile( match );
110
111 string_new( buf );
112
113 pos = object_str( s );
114 while ( regexec( re, pos ) )
115 {
116 string_append_range( buf, pos, re->startp[ 0 ] );
117 string_append( buf, object_str( replacement ) );
118 /* Handle empty matches */
119 if ( *pos == '\0' )
120 break;
121 else if ( pos == re->endp[ 0 ] )
122 string_push_back( buf, *pos++ );
123 else
124 pos = re->endp[ 0 ];
125 }
126 string_append( buf, pos );
127
128 result = list_new( object_new( buf->value ) );
129
130 string_free( buf );
131
132 return result;
133 }
134
135 /*
136 rule transform ( list * : pattern : indices * )
137 {
138 indices ?= 1 ;
139 local result ;
140 for local e in $(list)
141 {
142 local m = [ MATCH $(pattern) : $(e) ] ;
143 if $(m)
144 {
145 result += $(m[$(indices)]) ;
146 }
147 }
148 return $(result) ;
149 }
150 */
151
regex_transform(FRAME * frame,int flags)152 LIST * regex_transform( FRAME * frame, int flags )
153 {
154 LIST * const l = lol_get( frame->args, 0 );
155 LIST * const pattern = lol_get( frame->args, 1 );
156 LIST * const indices_list = lol_get( frame->args, 2 );
157 int * indices = 0;
158 int size;
159 LIST * result = L0;
160
161 if ( !list_empty( indices_list ) )
162 {
163 int * p;
164 LISTITER iter = list_begin( indices_list );
165 LISTITER const end = list_end( indices_list );
166 size = list_length( indices_list );
167 indices = (int *)BJAM_MALLOC( size * sizeof( int ) );
168 for ( p = indices; iter != end; iter = list_next( iter ) )
169 *p++ = atoi( object_str( list_item( iter ) ) );
170 }
171 else
172 {
173 size = 1;
174 indices = (int *)BJAM_MALLOC( sizeof( int ) );
175 *indices = 1;
176 }
177
178 {
179 /* Result is cached and intentionally never freed */
180 regexp * const re = regex_compile( list_front( pattern ) );
181
182 LISTITER iter = list_begin( l );
183 LISTITER const end = list_end( l );
184
185 string buf[ 1 ];
186 string_new( buf );
187
188 for ( ; iter != end; iter = list_next( iter ) )
189 {
190 if ( regexec( re, object_str( list_item( iter ) ) ) )
191 {
192 int i = 0;
193 for ( ; i < size; ++i )
194 {
195 int const index = indices[ i ];
196 /* Skip empty submatches. Not sure it is right in all cases,
197 * but surely is right for the case for which this routine
198 * is optimized -- header scanning.
199 */
200 if ( re->startp[ index ] != re->endp[ index ] )
201 {
202 string_append_range( buf, re->startp[ index ],
203 re->endp[ index ] );
204 result = list_push_back( result, object_new( buf->value
205 ) );
206 string_truncate( buf, 0 );
207 }
208 }
209 }
210 }
211 string_free( buf );
212 }
213
214 BJAM_FREE( indices );
215 return result;
216 }
217
218
init_regex()219 void init_regex()
220 {
221 {
222 char const * args[] = { "string", "separator", 0 };
223 declare_native_rule( "regex", "split", args, regex_split, 1 );
224 }
225 {
226 char const * args[] = { "string", "match", "replacement", 0 };
227 declare_native_rule( "regex", "replace", args, regex_replace, 1 );
228 }
229 {
230 char const * args[] = { "list", "*", ":", "pattern", ":", "indices", "*", 0 };
231 declare_native_rule( "regex", "transform", args, regex_transform, 2 );
232 }
233 }
234