1 package org.unicode.cldr.draft; 2 3 import java.text.ParsePosition; 4 import java.util.Arrays; 5 import java.util.List; 6 7 import com.ibm.icu.dev.util.UnicodeMap; 8 import com.ibm.icu.text.UTF16; 9 import com.ibm.icu.text.UnicodeSet; 10 11 public class StateMachine<T> { 12 static boolean SHOW_STATE_TRANSITIONS = false; // Utility.getProperty("transitions", false); 13 14 private static final short START = 0; 15 static final short EXIT = -1; 16 static final short POP = -2; 17 static final short ERROR = -3; 18 static final short UNDEFINED = -4; 19 20 private final UnicodeMap[] stateToData; 21 private final StateObjectBuilderFactory<T> factory; 22 private String[] stateNames; 23 private String[] actionNames; 24 StateMachine(List<UnicodeMap> stateToData, StateObjectBuilderFactory<T> factory, List<String> stateNames, List<String> actionNames)25 StateMachine(List<UnicodeMap> stateToData, StateObjectBuilderFactory<T> factory, 26 List<String> stateNames, List<String> actionNames) { 27 this.stateToData = stateToData.toArray(new UnicodeMap[stateToData.size()]); 28 this.stateNames = stateNames.toArray(new String[stateNames.size()]); 29 this.actionNames = actionNames.toArray(new String[actionNames.size()]); 30 this.factory = factory; 31 } 32 33 /** 34 * Immutable internal object that contains the row of a state machine 35 */ 36 public static class StateAction { 37 boolean advanceToNextCodePoint = false; 38 short nextState = StateMachine.UNDEFINED; 39 short pushState = StateMachine.UNDEFINED; 40 short action = -1; 41 42 @Override equals(Object other)43 public boolean equals(Object other) { 44 StateAction that = (StateAction) other; 45 return advanceToNextCodePoint == that.advanceToNextCodePoint 46 && nextState == that.nextState 47 && pushState == that.pushState 48 && action == that.action; 49 } 50 } 51 52 public interface StateObjectBuilderFactory<T> { getInstance()53 public StateObjectBuilder<T> getInstance(); 54 } 55 56 public static class StateObjectBuilder<T> { 57 protected CharSequence string; 58 private StateMachine<T> stateMachine; 59 private short[] stateStack = new short[100]; 60 private int stackSize = 0; 61 push(short state)62 private final void push(short state) { 63 stateStack[stackSize++] = state; 64 } 65 pop()66 private final short pop() { 67 return stateStack[--stackSize]; 68 } 69 init(CharSequence string, StateMachine<T> stateMachine, int start)70 protected void init(CharSequence string, StateMachine<T> stateMachine, int start) { 71 this.string = string; 72 this.stateMachine = stateMachine; 73 } 74 getResult()75 protected T getResult() { 76 return null; 77 } 78 getActionName(short action)79 protected String getActionName(short action) { 80 return stateMachine.getActionName(action); 81 } 82 handle(int position, StateAction action)83 protected void handle(int position, StateAction action) { 84 85 } 86 87 @Override toString()88 public String toString() { 89 StringBuilder result = new StringBuilder("["); 90 for (int i = stackSize - 1; i >= 0; --i) { 91 if (i != stackSize - 1) { 92 result.append(", "); 93 } 94 result.append(stateMachine.getStateName(stateStack[i])); 95 } 96 return result.append("]").toString(); 97 } 98 } 99 toString(StateAction action)100 public String toString(StateAction action) { 101 return "{" 102 + (action.advanceToNextCodePoint ? "+" : "") 103 + getStateName(action.nextState) 104 + (action.pushState == StateMachine.UNDEFINED ? "" : " ^" + getStateName(action.pushState)) 105 + (action.action < 0 ? "" : " " + getActionName(action.action)) 106 + "}"; 107 } 108 getStateName(short nextState)109 private String getStateName(short nextState) { 110 switch (nextState) { 111 case POP: 112 return "pop"; 113 case EXIT: 114 return "exit"; 115 case ERROR: 116 return "errorDeath"; 117 default: 118 return (stateNames == null ? String.valueOf(nextState) : stateNames[nextState]); 119 } 120 } 121 getActionName(short action)122 private String getActionName(short action) { 123 return (actionNames == null ? String.valueOf(action) : actionNames[action]); 124 } 125 getActionNames()126 public List<String> getActionNames() { 127 return Arrays.asList(actionNames); 128 } 129 130 @Override toString()131 public String toString() { 132 StringBuffer output = new StringBuffer(); 133 int i = 0; 134 for (UnicodeMap unicodeMap : stateToData) { 135 String stateName = stateNames == null ? String.valueOf(i) : stateNames[i]; 136 output.append(stateName).append(":\n"); 137 if (unicodeMap == null) { 138 output.append("\tnull\n"); 139 } else { 140 for (Object action : unicodeMap.getAvailableValues()) { 141 UnicodeSet sources = unicodeMap.keySet(action); 142 output.append("\t" + sources.toPattern(false) + "\t" + toString((StateAction) action) + "\n"); 143 } 144 } 145 i++; 146 } 147 return output.toString(); 148 } 149 parse(CharSequence string, ParsePosition parsePosition)150 public T parse(CharSequence string, ParsePosition parsePosition) { 151 int i = parsePosition.getIndex(); 152 if (i < 0 || i >= string.length()) { 153 throw new StringIndexOutOfBoundsException(i); 154 } 155 int cp; 156 short state = START; 157 StateObjectBuilder<T> stateObject = factory.getInstance(); 158 stateObject.init(string, this, i); 159 cp = Character.codePointAt(string, 0); 160 if (SHOW_STATE_TRANSITIONS) { 161 System.out.println("@Fetched: " + UTF16.valueOf(cp)); 162 } 163 while (true) { 164 StateAction action = (StateAction) stateToData[state].getValue(cp); 165 if (action.pushState >= 0) { 166 stateObject.push(action.pushState); 167 if (SHOW_STATE_TRANSITIONS) { 168 System.out.println("\t@Pushed " + stateObject); 169 } 170 } 171 if (action.action >= 0) { 172 stateObject.handle(i, action); 173 } 174 switch (state = action.nextState) { 175 default: 176 if (SHOW_STATE_TRANSITIONS) { 177 System.out.println("\t@NextState " + getStateName(state)); 178 } 179 break; 180 case POP: 181 if (SHOW_STATE_TRANSITIONS) { 182 System.out.println("\t@Popping " + stateObject); 183 } 184 state = stateObject.pop(); 185 break; 186 case EXIT: 187 parsePosition.setIndex(i); 188 return stateObject.getResult(); 189 case ERROR: 190 parsePosition.setErrorIndex(i); 191 throw new IllegalArgumentException(getActionName(action.action)); 192 } 193 if (action.advanceToNextCodePoint) { 194 i += UTF16.getCharCount(cp); 195 cp = i < string.length() ? Character.codePointAt(string, i) : 0xFFFF; 196 if (SHOW_STATE_TRANSITIONS) { 197 System.out.println("@Fetched: " + UTF16.valueOf(cp)); 198 } 199 } 200 } 201 } 202 } 203