1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 package org.apache.bcel.util; 19 20 import java.util.ArrayList; 21 import java.util.HashMap; 22 import java.util.Iterator; 23 import java.util.List; 24 import java.util.Locale; 25 import java.util.Map; 26 import java.util.regex.Matcher; 27 import java.util.regex.Pattern; 28 29 import org.apache.bcel.Const; 30 import org.apache.bcel.generic.ClassGenException; 31 import org.apache.bcel.generic.InstructionHandle; 32 import org.apache.bcel.generic.InstructionList; 33 34 /** 35 * InstructionFinder is a tool to search for given instructions patterns, i.e., 36 * match sequences of instructions in an instruction list via regular 37 * expressions. This can be used, e.g., in order to implement a peep hole 38 * optimizer that looks for code patterns and replaces them with faster 39 * equivalents. 40 * 41 * <p> 42 * This class internally uses the java.util.regex 43 * package to search for regular expressions. 44 * 45 * A typical application would look like this: 46 * 47 * <pre> 48 * 49 * 50 * InstructionFinder f = new InstructionFinder(il); 51 * String pat = "IfInstruction ICONST_0 GOTO ICONST_1 NOP (IFEQ|IFNE)"; 52 * 53 * for (Iterator i = f.search(pat, constraint); i.hasNext(); ) { 54 * InstructionHandle[] match = (InstructionHandle[])i.next(); 55 * ... 56 * il.delete(match[1], match[5]); 57 * ... 58 * } 59 * 60 * 61 * </pre> 62 * 63 * @version $Id$ 64 * @see org.apache.bcel.generic.Instruction 65 * @see InstructionList 66 */ 67 public class InstructionFinder { 68 69 private static final int OFFSET = 32767; // char + OFFSET is outside of LATIN-1 70 private static final int NO_OPCODES = 256; // Potential number, some are not used 71 private static final Map<String, String> map = new HashMap<>(); 72 private final InstructionList il; 73 private String il_string; // instruction list as string 74 private InstructionHandle[] handles; // map instruction 75 76 77 // list to array 78 /** 79 * @param il 80 * instruction list to search for given patterns 81 */ InstructionFinder(final InstructionList il)82 public InstructionFinder(final InstructionList il) { 83 this.il = il; 84 reread(); 85 } 86 87 88 /** 89 * Reread the instruction list, e.g., after you've altered the list upon a 90 * match. 91 */ reread()92 public final void reread() { 93 final int size = il.getLength(); 94 final char[] buf = new char[size]; // Create a string with length equal to il length 95 handles = il.getInstructionHandles(); 96 // Map opcodes to characters 97 for (int i = 0; i < size; i++) { 98 buf[i] = makeChar(handles[i].getInstruction().getOpcode()); 99 } 100 il_string = new String(buf); 101 } 102 103 104 /** 105 * Map symbolic instruction names like "getfield" to a single character. 106 * 107 * @param pattern 108 * instruction pattern in lower case 109 * @return encoded string for a pattern such as "BranchInstruction". 110 */ mapName( final String pattern )111 private static String mapName( final String pattern ) { 112 final String result = map.get(pattern); 113 if (result != null) { 114 return result; 115 } 116 for (short i = 0; i < NO_OPCODES; i++) { 117 if (pattern.equals(Const.getOpcodeName(i))) { 118 return "" + makeChar(i); 119 } 120 } 121 throw new RuntimeException("Instruction unknown: " + pattern); 122 } 123 124 125 /** 126 * Replace symbolic names of instructions with the appropiate character and 127 * remove all white space from string. Meta characters such as +, * are 128 * ignored. 129 * 130 * @param pattern 131 * The pattern to compile 132 * @return translated regular expression string 133 */ compilePattern( final String pattern )134 private static String compilePattern( final String pattern ) { 135 //Bug: BCEL-77 - Instructions are assumed to be english, to avoid odd Locale issues 136 final String lower = pattern.toLowerCase(Locale.ENGLISH); 137 final StringBuilder buf = new StringBuilder(); 138 final int size = pattern.length(); 139 for (int i = 0; i < size; i++) { 140 char ch = lower.charAt(i); 141 if (Character.isLetterOrDigit(ch)) { 142 final StringBuilder name = new StringBuilder(); 143 while ((Character.isLetterOrDigit(ch) || ch == '_') && i < size) { 144 name.append(ch); 145 if (++i < size) { 146 ch = lower.charAt(i); 147 } else { 148 break; 149 } 150 } 151 i--; 152 buf.append(mapName(name.toString())); 153 } else if (!Character.isWhitespace(ch)) { 154 buf.append(ch); 155 } 156 } 157 return buf.toString(); 158 } 159 160 161 /** 162 * @return the matched piece of code as an array of instruction (handles) 163 */ getMatch( final int matched_from, final int match_length )164 private InstructionHandle[] getMatch( final int matched_from, final int match_length ) { 165 final InstructionHandle[] match = new InstructionHandle[match_length]; 166 System.arraycopy(handles, matched_from, match, 0, match_length); 167 return match; 168 } 169 170 171 /** 172 * Search for the given pattern in the instruction list. You can search for 173 * any valid opcode via its symbolic name, e.g. "istore". You can also use a 174 * super class or an interface name to match a whole set of instructions, e.g. 175 * "BranchInstruction" or "LoadInstruction". "istore" is also an alias for all 176 * "istore_x" instructions. Additional aliases are "if" for "ifxx", "if_icmp" 177 * for "if_icmpxx", "if_acmp" for "if_acmpxx". 178 * 179 * Consecutive instruction names must be separated by white space which will 180 * be removed during the compilation of the pattern. 181 * 182 * For the rest the usual pattern matching rules for regular expressions 183 * apply. 184 * <P> 185 * Example pattern: 186 * 187 * <pre> 188 * search("BranchInstruction NOP ((IfInstruction|GOTO)+ ISTORE Instruction)*"); 189 * </pre> 190 * 191 * <p> 192 * If you alter the instruction list upon a match such that other matching 193 * areas are affected, you should call reread() to update the finder and call 194 * search() again, because the matches are cached. 195 * 196 * @param pattern 197 * the instruction pattern to search for, where case is ignored 198 * @param from 199 * where to start the search in the instruction list 200 * @param constraint 201 * optional CodeConstraint to check the found code pattern for 202 * user-defined constraints 203 * @return iterator of matches where e.nextElement() returns an array of 204 * instruction handles describing the matched area 205 */ search( final String pattern, final InstructionHandle from, final CodeConstraint constraint )206 public final Iterator<InstructionHandle[]> search( final String pattern, final InstructionHandle from, final CodeConstraint constraint ) { 207 final String search = compilePattern(pattern); 208 int start = -1; 209 for (int i = 0; i < handles.length; i++) { 210 if (handles[i] == from) { 211 start = i; // Where to start search from (index) 212 break; 213 } 214 } 215 if (start == -1) { 216 throw new ClassGenException("Instruction handle " + from 217 + " not found in instruction list."); 218 } 219 final Pattern regex = Pattern.compile(search); 220 final List<InstructionHandle[]> matches = new ArrayList<>(); 221 final Matcher matcher = regex.matcher(il_string); 222 while (start < il_string.length() && matcher.find(start)) { 223 final int startExpr = matcher.start(); 224 final int endExpr = matcher.end(); 225 final int lenExpr = endExpr - startExpr; 226 final InstructionHandle[] match = getMatch(startExpr, lenExpr); 227 if ((constraint == null) || constraint.checkCode(match)) { 228 matches.add(match); 229 } 230 start = endExpr; 231 } 232 return matches.iterator(); 233 } 234 235 236 /** 237 * Start search beginning from the start of the given instruction list. 238 * 239 * @param pattern 240 * the instruction pattern to search for, where case is ignored 241 * @return iterator of matches where e.nextElement() returns an array of 242 * instruction handles describing the matched area 243 */ search( final String pattern )244 public final Iterator<InstructionHandle[]> search( final String pattern ) { 245 return search(pattern, il.getStart(), null); 246 } 247 248 249 /** 250 * Start search beginning from `from'. 251 * 252 * @param pattern 253 * the instruction pattern to search for, where case is ignored 254 * @param from 255 * where to start the search in the instruction list 256 * @return iterator of matches where e.nextElement() returns an array of 257 * instruction handles describing the matched area 258 */ search( final String pattern, final InstructionHandle from )259 public final Iterator<InstructionHandle[]> search( final String pattern, final InstructionHandle from ) { 260 return search(pattern, from, null); 261 } 262 263 264 /** 265 * Start search beginning from the start of the given instruction list. Check 266 * found matches with the constraint object. 267 * 268 * @param pattern 269 * the instruction pattern to search for, case is ignored 270 * @param constraint 271 * constraints to be checked on matching code 272 * @return instruction handle or `null' if the match failed 273 */ search( final String pattern, final CodeConstraint constraint )274 public final Iterator<InstructionHandle[]> search( final String pattern, final CodeConstraint constraint ) { 275 return search(pattern, il.getStart(), constraint); 276 } 277 278 279 /** 280 * Convert opcode number to char. 281 */ makeChar( final short opcode )282 private static char makeChar( final short opcode ) { 283 return (char) (opcode + OFFSET); 284 } 285 286 287 /** 288 * @return the inquired instruction list 289 */ getInstructionList()290 public final InstructionList getInstructionList() { 291 return il; 292 } 293 294 /** 295 * Code patterns found may be checked using an additional user-defined 296 * constraint object whether they really match the needed criterion. I.e., 297 * check constraints that can not expressed with regular expressions. 298 * 299 */ 300 public interface CodeConstraint { 301 302 /** 303 * @param match 304 * array of instructions matching the requested pattern 305 * @return true if the matched area is really useful 306 */ checkCode( InstructionHandle[] match )307 boolean checkCode( InstructionHandle[] match ); 308 } 309 310 // Initialize pattern map 311 static { 312 map.put("arithmeticinstruction","(irem|lrem|iand|ior|ineg|isub|lneg|fneg|fmul|ldiv|fadd|lxor|frem|idiv|land|ixor|ishr|fsub|lshl|fdiv|iadd|lor|dmul|lsub|ishl|imul|lmul|lushr|dneg|iushr|lshr|ddiv|drem|dadd|ladd|dsub)"); 313 map.put("invokeinstruction", "(invokevirtual|invokeinterface|invokestatic|invokespecial|invokedynamic)"); 314 map.put("arrayinstruction", "(baload|aastore|saload|caload|fastore|lastore|iaload|castore|iastore|aaload|bastore|sastore|faload|laload|daload|dastore)"); 315 map.put("gotoinstruction", "(goto|goto_w)"); 316 map.put("conversioninstruction", "(d2l|l2d|i2s|d2i|l2i|i2b|l2f|d2f|f2i|i2d|i2l|f2d|i2c|f2l|i2f)"); 317 map.put("localvariableinstruction","(fstore|iinc|lload|dstore|dload|iload|aload|astore|istore|fload|lstore)"); 318 map.put("loadinstruction", "(fload|dload|lload|iload|aload)"); 319 map.put("fieldinstruction", "(getfield|putstatic|getstatic|putfield)"); 320 map.put("cpinstruction", "(ldc2_w|invokeinterface|invokedynamic|multianewarray|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|ldc_w|invokestatic|invokevirtual|putfield|ldc|new|anewarray)"); 321 map.put("stackinstruction", "(dup2|swap|dup2_x2|pop|pop2|dup|dup2_x1|dup_x2|dup_x1)"); 322 map.put("branchinstruction", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)"); 323 map.put("returninstruction", "(lreturn|ireturn|freturn|dreturn|areturn|return)"); 324 map.put("storeinstruction", "(istore|fstore|dstore|astore|lstore)"); 325 map.put("select", "(tableswitch|lookupswitch)"); 326 map.put("ifinstruction", "(ifeq|ifgt|if_icmpne|if_icmpeq|ifge|ifnull|ifne|if_icmple|if_icmpge|if_acmpeq|if_icmplt|if_acmpne|ifnonnull|iflt|if_icmpgt|ifle)"); 327 map.put("jsrinstruction", "(jsr|jsr_w)"); 328 map.put("variablelengthinstruction", "(tableswitch|jsr|goto|lookupswitch)"); 329 map.put("unconditionalbranch", "(goto|jsr|jsr_w|athrow|goto_w)"); 330 map.put("constantpushinstruction", "(dconst|bipush|sipush|fconst|iconst|lconst)"); 331 map.put("typedinstruction", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dastore|ret|f2d|f2i|drem|iinc|i2c|checkcast|frem|lreturn|astore|lushr|daload|dneg|fastore|istore|lshl|ldiv|lstore|areturn|ishr|ldc_w|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|faload|sipush|iushr|caload|instanceof|invokespecial|putfield|fmul|ireturn|laload|d2f|lneg|ixor|i2l|fdiv|lastore|multianewarray|i2b|getstatic|i2d|putstatic|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|freturn|ldc|aconst_null|castore|lmul|ldc2_w|dadd|iconst|f2l|ddiv|dstore|land|jsr|anewarray|dmul|bipush|dsub|sastore|d2i|i2s|lshr|iadd|l2i|lload|bastore|fstore|fneg|iload|fadd|baload|fconst|ior|ineg|dreturn|l2f|lconst|getfield|invokevirtual|invokestatic|iastore)"); 332 map.put("popinstruction", "(fstore|dstore|pop|pop2|astore|putstatic|istore|lstore)"); 333 map.put("allocationinstruction", "(multianewarray|new|anewarray|newarray)"); 334 map.put("indexedinstruction", "(lload|lstore|fload|ldc2_w|invokeinterface|invokedynamic|multianewarray|astore|dload|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|dstore|istore|iinc|ldc_w|ret|fstore|invokestatic|iload|putfield|invokevirtual|ldc|new|aload|anewarray)"); 335 map.put("pushinstruction", "(dup|lload|dup2|bipush|fload|ldc2_w|sipush|lconst|fconst|dload|getstatic|ldc_w|aconst_null|dconst|iload|ldc|iconst|aload)"); 336 map.put("stackproducer", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dup|f2d|f2i|drem|i2c|checkcast|frem|lushr|daload|dneg|lshl|ldiv|ishr|ldc_w|invokeinterface|invokedynamic|lxor|ishl|l2d|i2f|faload|sipush|iushr|caload|instanceof|invokespecial|fmul|laload|d2f|lneg|ixor|i2l|fdiv|getstatic|i2b|swap|i2d|dup2|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|ldc|arraylength|aconst_null|tableswitch|lmul|ldc2_w|iconst|dadd|f2l|ddiv|land|jsr|anewarray|dmul|bipush|dsub|d2i|newarray|i2s|lshr|iadd|lload|l2i|fneg|iload|fadd|baload|fconst|lookupswitch|ior|ineg|lconst|l2f|getfield|invokevirtual|invokestatic)"); 337 map.put("stackconsumer", "(imul|lsub|lor|iflt|fcmpg|if_icmpgt|iand|ifeq|if_icmplt|lrem|ifnonnull|idiv|d2l|isub|dcmpg|dastore|if_icmpeq|f2d|f2i|drem|i2c|checkcast|frem|lreturn|astore|lushr|pop2|monitorexit|dneg|fastore|istore|lshl|ldiv|lstore|areturn|if_icmpge|ishr|monitorenter|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|iushr|instanceof|invokespecial|fmul|ireturn|d2f|lneg|ixor|pop|i2l|ifnull|fdiv|lastore|i2b|if_acmpeq|ifge|swap|i2d|putstatic|fcmpl|ladd|irem|dcmpl|fsub|freturn|ifgt|castore|lmul|dadd|f2l|ddiv|dstore|land|if_icmpne|if_acmpne|dmul|dsub|sastore|ifle|d2i|i2s|lshr|iadd|l2i|bastore|fstore|fneg|fadd|ior|ineg|ifne|dreturn|l2f|if_icmple|getfield|invokevirtual|invokestatic|iastore)"); 338 map.put("exceptionthrower","(irem|lrem|laload|putstatic|baload|dastore|areturn|getstatic|ldiv|anewarray|iastore|castore|idiv|saload|lastore|fastore|putfield|lreturn|caload|getfield|return|aastore|freturn|newarray|instanceof|multianewarray|athrow|faload|iaload|aaload|dreturn|monitorenter|checkcast|bastore|arraylength|new|invokevirtual|sastore|ldc_w|ireturn|invokespecial|monitorexit|invokeinterface|invokedynamic|ldc|invokestatic|daload)"); 339 map.put("loadclass", "(multianewarray|invokeinterface|invokedynamic|instanceof|invokespecial|putfield|checkcast|putstatic|invokevirtual|new|getstatic|invokestatic|getfield|anewarray)"); 340 map.put("instructiontargeter", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)"); 341 // Some aliases 342 map.put("if_icmp", "(if_icmpne|if_icmpeq|if_icmple|if_icmpge|if_icmplt|if_icmpgt)"); 343 map.put("if_acmp", "(if_acmpeq|if_acmpne)"); 344 map.put("if", "(ifeq|ifne|iflt|ifge|ifgt|ifle)"); 345 // Precompile some aliases first 346 map.put("iconst", precompile(Const.ICONST_0, Const.ICONST_5, Const.ICONST_M1)); 347 map.put("lconst", new String(new char[] { '(', makeChar(Const.LCONST_0), '|', makeChar(Const.LCONST_1), ')' })); 348 map.put("dconst", new String(new char[] { '(', makeChar(Const.DCONST_0), '|', makeChar(Const.DCONST_1), ')' })); 349 map.put("fconst", new String(new char[] { '(', makeChar(Const.FCONST_0), '|', makeChar(Const.FCONST_1), '|', makeChar(Const.FCONST_2), ')' })); 350 map.put("lload", precompile(Const.LLOAD_0, Const.LLOAD_3, Const.LLOAD)); 351 map.put("iload", precompile(Const.ILOAD_0, Const.ILOAD_3, Const.ILOAD)); 352 map.put("dload", precompile(Const.DLOAD_0, Const.DLOAD_3, Const.DLOAD)); 353 map.put("fload", precompile(Const.FLOAD_0, Const.FLOAD_3, Const.FLOAD)); 354 map.put("aload", precompile(Const.ALOAD_0, Const.ALOAD_3, Const.ALOAD)); 355 map.put("lstore", precompile(Const.LSTORE_0, Const.LSTORE_3, Const.LSTORE)); 356 map.put("istore", precompile(Const.ISTORE_0, Const.ISTORE_3, Const.ISTORE)); 357 map.put("dstore", precompile(Const.DSTORE_0, Const.DSTORE_3, Const.DSTORE)); 358 map.put("fstore", precompile(Const.FSTORE_0, Const.FSTORE_3, Const.FSTORE)); 359 map.put("astore", precompile(Const.ASTORE_0, Const.ASTORE_3, Const.ASTORE)); 360 // Compile strings 361 for (final Map.Entry<String, String> entry : map.entrySet()) { 362 final String key = entry.getKey(); 363 final String value = entry.getValue(); 364 final char ch = value.charAt(1); // Omit already precompiled patterns 365 if (ch < OFFSET) { map.put(key, compilePattern(value))366 map.put(key, compilePattern(value)); // precompile all patterns 367 } 368 } 369 // Add instruction alias to match anything 370 final StringBuilder buf = new StringBuilder("("); 371 for (short i = 0; i < NO_OPCODES; i++) { 372 if (Const.getNoOfOperands(i) != Const.UNDEFINED) { // Not an invalid opcode makeChar(i)373 buf.append(makeChar(i)); 374 if (i < NO_OPCODES - 1) { 375 buf.append('|'); 376 } 377 } 378 } 379 buf.append(')'); 380 map.put("instruction", buf.toString()); 381 } 382 383 precompile( final short from, final short to, final short extra )384 private static String precompile( final short from, final short to, final short extra ) { 385 final StringBuilder buf = new StringBuilder("("); 386 for (short i = from; i <= to; i++) { 387 buf.append(makeChar(i)); 388 buf.append('|'); 389 } 390 buf.append(makeChar(extra)); 391 buf.append(")"); 392 return buf.toString(); 393 } 394 395 396 /* 397 * Internal debugging routines. 398 */ 399 // private static final String pattern2string( String pattern ) { 400 // return pattern2string(pattern, true); 401 // } 402 403 404 // private static final String pattern2string( String pattern, boolean make_string ) { 405 // StringBuffer buf = new StringBuffer(); 406 // for (int i = 0; i < pattern.length(); i++) { 407 // char ch = pattern.charAt(i); 408 // if (ch >= OFFSET) { 409 // if (make_string) { 410 // buf.append(Constants.getOpcodeName(ch - OFFSET)); 411 // } else { 412 // buf.append((ch - OFFSET)); 413 // } 414 // } else { 415 // buf.append(ch); 416 // } 417 // } 418 // return buf.toString(); 419 // } 420 } 421