• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2003. Vladimir Prus
3  * Distributed under the Boost Software License, Version 1.0.
4  * (See accompanying file LICENSE_1_0.txt or copy at
5  * http://www.boost.org/LICENSE_1_0.txt)
6  */
7 
8 #include "../mem.h"
9 #include "../native.h"
10 #include "../jam_strings.h"
11 #include "../subst.h"
12 
13 /*
14 rule split ( string separator )
15 {
16     local result ;
17     local s = $(string) ;
18 
19     local match = 1 ;
20     while $(match)
21     {
22         match = [ MATCH ^(.*)($(separator))(.*) : $(s) ] ;
23         if $(match)
24         {
25             match += "" ;  # in case 3rd item was empty - works around MATCH bug
26             result = $(match[3]) $(result) ;
27             s = $(match[1]) ;
28         }
29     }
30     return $(s) $(result) ;
31 }
32 */
33 
regex_split(FRAME * frame,int flags)34 LIST * regex_split( FRAME * frame, int flags )
35 {
36     LIST * args = lol_get( frame->args, 0 );
37     OBJECT * s;
38     OBJECT * separator;
39     regexp * re;
40     const char * pos, * prev;
41     LIST * result = L0;
42     LISTITER iter = list_begin( args );
43     s = list_item( iter );
44     separator = list_item( list_next( iter ) );
45 
46     re = regex_compile( separator );
47 
48     prev = pos = object_str( s );
49     while ( regexec( re, pos ) )
50     {
51         result = list_push_back( result, object_new_range( prev, re->startp[ 0 ] - prev ) );
52         prev = re->endp[ 0 ];
53         /* Handle empty matches */
54         if ( *pos == '\0' )
55             break;
56         else if ( pos == re->endp[ 0 ] )
57             pos++;
58         else
59             pos = re->endp[ 0 ];
60     }
61 
62     result = list_push_back( result, object_new( pos ) );
63 
64     return result;
65 }
66 
67 /*
68 rule replace (
69     string  # The string to modify.
70     match  # The characters to replace.
71     replacement  # The string to replace with.
72     )
73 {
74     local result = "" ;
75     local parts = 1 ;
76     while $(parts)
77     {
78         parts = [ MATCH ^(.*)($(match))(.*) : $(string) ] ;
79         if $(parts)
80         {
81             parts += "" ;
82             result = "$(replacement)$(parts[3])$(result)" ;
83             string = $(parts[1]) ;
84         }
85     }
86     string ?= "" ;
87     result = "$(string)$(result)" ;
88     return $(result) ;
89 }
90 */
91 
regex_replace(FRAME * frame,int flags)92 LIST * regex_replace( FRAME * frame, int flags )
93 {
94     LIST * args = lol_get( frame->args, 0 );
95     OBJECT * s;
96     OBJECT * match;
97     OBJECT * replacement;
98     regexp * re;
99     const char * pos;
100     string buf[ 1 ];
101     LIST * result;
102     LISTITER iter = list_begin( args );
103     s = list_item( iter );
104     iter = list_next( iter );
105     match = list_item( iter );
106     iter = list_next( iter );
107     replacement = list_item(iter );
108 
109     re = regex_compile( match );
110 
111     string_new( buf );
112 
113     pos = object_str( s );
114     while ( regexec( re, pos ) )
115     {
116         string_append_range( buf, pos, re->startp[ 0 ] );
117         string_append( buf, object_str( replacement ) );
118         /* Handle empty matches */
119         if ( *pos == '\0' )
120             break;
121         else if ( pos == re->endp[ 0 ] )
122             string_push_back( buf, *pos++ );
123         else
124             pos = re->endp[ 0 ];
125     }
126     string_append( buf, pos );
127 
128     result = list_new( object_new( buf->value ) );
129 
130     string_free( buf );
131 
132     return result;
133 }
134 
135 /*
136 rule transform ( list * : pattern : indices * )
137 {
138     indices ?= 1 ;
139     local result ;
140     for local e in $(list)
141     {
142         local m = [ MATCH $(pattern) : $(e) ] ;
143         if $(m)
144         {
145             result += $(m[$(indices)]) ;
146         }
147     }
148     return $(result) ;
149 }
150 */
151 
regex_transform(FRAME * frame,int flags)152 LIST * regex_transform( FRAME * frame, int flags )
153 {
154     LIST * const l = lol_get( frame->args, 0 );
155     LIST * const pattern = lol_get( frame->args, 1 );
156     LIST * const indices_list = lol_get( frame->args, 2 );
157     int * indices = 0;
158     int size;
159     LIST * result = L0;
160 
161     if ( !list_empty( indices_list ) )
162     {
163         int * p;
164         LISTITER iter = list_begin( indices_list );
165         LISTITER const end = list_end( indices_list );
166         size = list_length( indices_list );
167         indices = (int *)BJAM_MALLOC( size * sizeof( int ) );
168         for ( p = indices; iter != end; iter = list_next( iter ) )
169             *p++ = atoi( object_str( list_item( iter ) ) );
170     }
171     else
172     {
173         size = 1;
174         indices = (int *)BJAM_MALLOC( sizeof( int ) );
175         *indices = 1;
176     }
177 
178     {
179         /* Result is cached and intentionally never freed */
180         regexp * const re = regex_compile( list_front( pattern ) );
181 
182         LISTITER iter = list_begin( l );
183         LISTITER const end = list_end( l );
184 
185         string buf[ 1 ];
186         string_new( buf );
187 
188         for ( ; iter != end; iter = list_next( iter ) )
189         {
190             if ( regexec( re, object_str( list_item( iter ) ) ) )
191             {
192                 int i = 0;
193                 for ( ; i < size; ++i )
194                 {
195                     int const index = indices[ i ];
196                     /* Skip empty submatches. Not sure it is right in all cases,
197                      * but surely is right for the case for which this routine
198                      * is optimized -- header scanning.
199                      */
200                     if ( re->startp[ index ] != re->endp[ index ] )
201                     {
202                         string_append_range( buf, re->startp[ index ],
203                             re->endp[ index ] );
204                         result = list_push_back( result, object_new( buf->value
205                             ) );
206                         string_truncate( buf, 0 );
207                     }
208                 }
209             }
210         }
211         string_free( buf );
212     }
213 
214     BJAM_FREE( indices );
215     return result;
216 }
217 
218 
init_regex()219 void init_regex()
220 {
221     {
222         char const * args[] = { "string", "separator", 0  };
223         declare_native_rule( "regex", "split", args, regex_split, 1 );
224     }
225     {
226         char const * args[] = { "string", "match", "replacement", 0  };
227         declare_native_rule( "regex", "replace", args, regex_replace, 1 );
228     }
229     {
230         char const * args[] = { "list", "*", ":", "pattern", ":", "indices", "*", 0 };
231         declare_native_rule( "regex", "transform", args, regex_transform, 2 );
232     }
233 }
234