1 // properties.cc
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Functions for updating property bits for various FST operations and
20 // string names of the properties.
21
22 #include <fst/properties.h>
23
24 #include <stddef.h>
25 #include <vector>
26 using std::vector;
27
28 namespace fst {
29
30 // These functions determine the properties associated with the FST
31 // result of various finite-state operations. The property arguments
32 // correspond to the operation's FST arguments. The properties
33 // returned assume the operation modifies its first argument.
34 // Bitwise-and this result with kCopyProperties for the case when a
35 // new (possibly delayed) FST is instead constructed.
36
37 // Properties for a concatenatively-closed FST.
ClosureProperties(uint64 inprops,bool star,bool delayed)38 uint64 ClosureProperties(uint64 inprops, bool star, bool delayed) {
39 uint64 outprops = (kError | kAcceptor | kUnweighted | kAccessible) & inprops;
40 if (!delayed)
41 outprops |= (kExpanded | kMutable | kCoAccessible |
42 kNotTopSorted | kNotString) & inprops;
43 if (!delayed || inprops & kAccessible)
44 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
45 kNotILabelSorted | kNotOLabelSorted | kWeighted |
46 kNotAccessible | kNotCoAccessible) & inprops;
47 return outprops;
48 }
49
50 // Properties for a complemented FST.
ComplementProperties(uint64 inprops)51 uint64 ComplementProperties(uint64 inprops) {
52 uint64 outprops = kAcceptor | kUnweighted | kNoEpsilons |
53 kNoIEpsilons | kNoOEpsilons |
54 kIDeterministic | kODeterministic | kAccessible;
55 outprops |= (kError | kILabelSorted | kOLabelSorted | kInitialCyclic) &
56 inprops;
57 if (inprops & kAccessible)
58 outprops |= kNotILabelSorted | kNotOLabelSorted | kCyclic;
59 return outprops;
60 }
61
62 // Properties for a composed FST.
ComposeProperties(uint64 inprops1,uint64 inprops2)63 uint64 ComposeProperties(uint64 inprops1, uint64 inprops2) {
64 uint64 outprops = kError & (inprops1 | inprops2);
65 if (inprops1 & kAcceptor && inprops2 & kAcceptor) {
66 outprops |= kAcceptor | kAccessible;
67 outprops |= (kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kAcyclic |
68 kInitialAcyclic) & inprops1 & inprops2;
69 if (kNoIEpsilons & inprops1 & inprops2)
70 outprops |= (kIDeterministic | kODeterministic) & inprops1 & inprops2;
71 } else {
72 outprops |= kAccessible;
73 outprops |= (kAcceptor | kNoIEpsilons | kAcyclic | kInitialAcyclic) &
74 inprops1 & inprops2;
75 if (kNoIEpsilons & inprops1 & inprops2)
76 outprops |= kIDeterministic & inprops1 & inprops2;
77 }
78 return outprops;
79 }
80
81 // Properties for a concatenated FST.
ConcatProperties(uint64 inprops1,uint64 inprops2,bool delayed)82 uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
83 uint64 outprops =
84 (kAcceptor | kUnweighted | kAcyclic) & inprops1 & inprops2;
85 outprops |= kError & (inprops1 | inprops2);
86
87 bool empty1 = delayed; // Can fst1 be the empty machine?
88 bool empty2 = delayed; // Can fst2 be the empty machine?
89
90 if (!delayed) {
91 outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
92 outprops |= (kNotTopSorted | kNotString) & inprops2;
93 }
94 if (!empty1)
95 outprops |= (kInitialAcyclic | kInitialCyclic) & inprops1;
96 if (!delayed || inprops1 & kAccessible)
97 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
98 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
99 kNotOLabelSorted | kWeighted | kCyclic |
100 kNotAccessible | kNotCoAccessible) & inprops1;
101 if ((inprops1 & (kAccessible | kCoAccessible)) ==
102 (kAccessible | kCoAccessible) && !empty1) {
103 outprops |= kAccessible & inprops2;
104 if (!empty2)
105 outprops |= kCoAccessible & inprops2;
106 if (!delayed || inprops2 & kAccessible)
107 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
108 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
109 kNotOLabelSorted | kWeighted | kCyclic |
110 kNotAccessible | kNotCoAccessible) & inprops2;
111 }
112 return outprops;
113 }
114
115 // Properties for a determinized FST.
DeterminizeProperties(uint64 inprops,bool has_subsequential_label)116 uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label) {
117 uint64 outprops = kAccessible;
118 if (((kAcceptor | kNoIEpsilons) & inprops) || has_subsequential_label)
119 outprops |= kIDeterministic;
120 outprops |= (kError | kAcceptor | kNoEpsilons | kAcyclic |
121 kInitialAcyclic | kCoAccessible | kString) & inprops;
122 if (inprops & kAccessible)
123 outprops |= (kNotAcceptor | kEpsilons | kIEpsilons | kOEpsilons |
124 kCyclic) & inprops;
125 if (inprops & kAcceptor)
126 outprops |= (kNoIEpsilons | kNoOEpsilons) & inprops;
127 if ((inprops & kNoIEpsilons) && has_subsequential_label)
128 outprops |= kNoIEpsilons;
129 return outprops;
130 }
131
132 // Properties for factored weight FST.
FactorWeightProperties(uint64 inprops)133 uint64 FactorWeightProperties(uint64 inprops) {
134 uint64 outprops = (kExpanded | kMutable | kError | kAcceptor |
135 kAcyclic | kAccessible | kCoAccessible) & inprops;
136 if (inprops & kAccessible)
137 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
138 kEpsilons | kIEpsilons | kOEpsilons | kCyclic |
139 kNotILabelSorted | kNotOLabelSorted)
140 & inprops;
141 return outprops;
142 }
143
144 // Properties for an inverted FST.
InvertProperties(uint64 inprops)145 uint64 InvertProperties(uint64 inprops) {
146 uint64 outprops = (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
147 kEpsilons | kNoEpsilons | kWeighted | kUnweighted |
148 kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
149 kTopSorted | kNotTopSorted |
150 kAccessible | kNotAccessible |
151 kCoAccessible | kNotCoAccessible |
152 kString | kNotString) & inprops;
153 if (kIDeterministic & inprops)
154 outprops |= kODeterministic;
155 if (kNonIDeterministic & inprops)
156 outprops |= kNonODeterministic;
157 if (kODeterministic & inprops)
158 outprops |= kIDeterministic;
159 if (kNonODeterministic & inprops)
160 outprops |= kNonIDeterministic;
161
162 if (kIEpsilons & inprops)
163 outprops |= kOEpsilons;
164 if (kNoIEpsilons & inprops)
165 outprops |= kNoOEpsilons;
166 if (kOEpsilons & inprops)
167 outprops |= kIEpsilons;
168 if (kNoOEpsilons & inprops)
169 outprops |= kNoIEpsilons;
170
171 if (kILabelSorted & inprops)
172 outprops |= kOLabelSorted;
173 if (kNotILabelSorted & inprops)
174 outprops |= kNotOLabelSorted;
175 if (kOLabelSorted & inprops)
176 outprops |= kILabelSorted;
177 if (kNotOLabelSorted & inprops)
178 outprops |= kNotILabelSorted;
179 return outprops;
180 }
181
182 // Properties for a projected FST.
ProjectProperties(uint64 inprops,bool project_input)183 uint64 ProjectProperties(uint64 inprops, bool project_input) {
184 uint64 outprops = kAcceptor;
185 outprops |= (kExpanded | kMutable | kError | kWeighted | kUnweighted |
186 kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
187 kTopSorted | kNotTopSorted | kAccessible | kNotAccessible |
188 kCoAccessible | kNotCoAccessible |
189 kString | kNotString) & inprops;
190 if (project_input) {
191 outprops |= (kIDeterministic | kNonIDeterministic |
192 kIEpsilons | kNoIEpsilons |
193 kILabelSorted | kNotILabelSorted) & inprops;
194
195 if (kIDeterministic & inprops)
196 outprops |= kODeterministic;
197 if (kNonIDeterministic & inprops)
198 outprops |= kNonODeterministic;
199
200 if (kIEpsilons & inprops)
201 outprops |= kOEpsilons | kEpsilons;
202 if (kNoIEpsilons & inprops)
203 outprops |= kNoOEpsilons | kNoEpsilons;
204
205 if (kILabelSorted & inprops)
206 outprops |= kOLabelSorted;
207 if (kNotILabelSorted & inprops)
208 outprops |= kNotOLabelSorted;
209 } else {
210 outprops |= (kODeterministic | kNonODeterministic |
211 kOEpsilons | kNoOEpsilons |
212 kOLabelSorted | kNotOLabelSorted) & inprops;
213
214 if (kODeterministic & inprops)
215 outprops |= kIDeterministic;
216 if (kNonODeterministic & inprops)
217 outprops |= kNonIDeterministic;
218
219 if (kOEpsilons & inprops)
220 outprops |= kIEpsilons | kEpsilons;
221 if (kNoOEpsilons & inprops)
222 outprops |= kNoIEpsilons | kNoEpsilons;
223
224 if (kOLabelSorted & inprops)
225 outprops |= kILabelSorted;
226 if (kNotOLabelSorted & inprops)
227 outprops |= kNotILabelSorted;
228 }
229 return outprops;
230 }
231
232 // Properties for a randgen FST.
RandGenProperties(uint64 inprops,bool weighted)233 uint64 RandGenProperties(uint64 inprops, bool weighted) {
234 uint64 outprops = kAcyclic | kInitialAcyclic | kAccessible;
235 outprops |= inprops & kError;
236 if (weighted) {
237 outprops |= kTopSorted;
238 outprops |= (kAcceptor | kNoEpsilons |
239 kNoIEpsilons | kNoOEpsilons |
240 kIDeterministic | kODeterministic |
241 kILabelSorted | kOLabelSorted) & inprops;
242 } else {
243 outprops |= kUnweighted;
244 outprops |= (kAcceptor | kILabelSorted | kOLabelSorted) & inprops;
245 }
246 return outprops;
247 }
248
249 // Properties for a replace FST.
ReplaceProperties(const vector<uint64> & inprops,ssize_t root,bool epsilon_on_replace,bool no_empty_fsts)250 uint64 ReplaceProperties(const vector<uint64>& inprops,
251 ssize_t root,
252 bool epsilon_on_replace,
253 bool no_empty_fsts) {
254 if (inprops.size() == 0)
255 return kNullProperties;
256 uint64 outprops = 0;
257 for (size_t i = 0; i < inprops.size(); ++i)
258 outprops |= kError & inprops[i];
259 uint64 access_props = no_empty_fsts ? kAccessible | kCoAccessible : 0;
260 for (size_t i = 0; i < inprops.size(); ++i)
261 access_props &= (inprops[i] & (kAccessible | kCoAccessible));
262 if (access_props == (kAccessible | kCoAccessible)) {
263 outprops |= access_props;
264 if (inprops[root] & kInitialCyclic)
265 outprops |= kInitialCyclic;
266 uint64 props = 0;
267 bool string = true;
268 for (size_t i = 0; i < inprops.size(); ++i) {
269 if (epsilon_on_replace == false)
270 props |= kNotAcceptor & inprops[i];
271 props |= (kNonIDeterministic | kNonODeterministic | kEpsilons |
272 kIEpsilons | kOEpsilons | kWeighted | kCyclic |
273 kNotTopSorted | kNotString) & inprops[i];
274 if (!(inprops[i] & kString))
275 string = false;
276 }
277 outprops |= props;
278 if (string)
279 outprops |= kString;
280 }
281 bool acceptor = epsilon_on_replace;
282 bool ideterministic = !epsilon_on_replace;
283 bool no_iepsilons = !epsilon_on_replace;
284 bool acyclic = true;
285 bool unweighted = true;
286 for (size_t i = 0; i < inprops.size(); ++i) {
287 if (!(inprops[i] & kAcceptor))
288 acceptor = false;
289 if (!(inprops[i] & kIDeterministic))
290 ideterministic = false;
291 if (!(inprops[i] & kNoIEpsilons))
292 no_iepsilons = false;
293 if (!(inprops[i] & kAcyclic))
294 acyclic = false;
295 if (!(inprops[i] & kUnweighted))
296 unweighted = false;
297 }
298 if (acceptor)
299 outprops |= kAcceptor;
300 if (ideterministic)
301 outprops |= kIDeterministic;
302 if (no_iepsilons)
303 outprops |= kNoIEpsilons;
304 if (acyclic)
305 outprops |= kAcyclic;
306 if (unweighted)
307 outprops |= kUnweighted;
308 if (inprops[root] & kInitialAcyclic)
309 outprops |= kInitialAcyclic;
310 return outprops;
311 }
312
313 // Properties for a relabeled FST.
RelabelProperties(uint64 inprops)314 uint64 RelabelProperties(uint64 inprops) {
315 uint64 outprops = (kExpanded | kMutable | kError |
316 kWeighted | kUnweighted |
317 kCyclic | kAcyclic |
318 kInitialCyclic | kInitialAcyclic |
319 kTopSorted | kNotTopSorted |
320 kAccessible | kNotAccessible |
321 kCoAccessible | kNotCoAccessible |
322 kString | kNotString) & inprops;
323 return outprops;
324 }
325
326 // Properties for a reversed FST. (the superinitial state limits this set)
ReverseProperties(uint64 inprops)327 uint64 ReverseProperties(uint64 inprops) {
328 uint64 outprops =
329 (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kEpsilons |
330 kIEpsilons | kOEpsilons | kWeighted | kUnweighted |
331 kCyclic | kAcyclic) & inprops;
332 return outprops;
333 }
334
335 // Properties for re-weighted FST.
ReweightProperties(uint64 inprops)336 uint64 ReweightProperties(uint64 inprops) {
337 uint64 outprops = inprops & kWeightInvariantProperties;
338 outprops = outprops & ~kCoAccessible;
339 return outprops;
340 }
341
342 // Properties for an epsilon-removed FST.
RmEpsilonProperties(uint64 inprops,bool delayed)343 uint64 RmEpsilonProperties(uint64 inprops, bool delayed) {
344 uint64 outprops = kNoEpsilons;
345 outprops |= (kError | kAcceptor | kAcyclic | kInitialAcyclic) & inprops;
346 if (inprops & kAcceptor)
347 outprops |= kNoIEpsilons | kNoOEpsilons;
348 if (!delayed) {
349 outprops |= kExpanded | kMutable;
350 outprops |= kTopSorted & inprops;
351 }
352 if (!delayed || inprops & kAccessible)
353 outprops |= kNotAcceptor & inprops;
354 return outprops;
355 }
356
357 // Properties for shortest path. This function computes how the properties
358 // of the output of shortest path need to be updated, given that 'props' is
359 // already known.
ShortestPathProperties(uint64 props)360 uint64 ShortestPathProperties(uint64 props) {
361 return props | kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
362 }
363
364 // Properties for a synchronized FST.
SynchronizeProperties(uint64 inprops)365 uint64 SynchronizeProperties(uint64 inprops) {
366 uint64 outprops = (kError | kAcceptor | kAcyclic | kAccessible |
367 kCoAccessible | kUnweighted) & inprops;
368 if (inprops & kAccessible)
369 outprops |= (kCyclic | kNotCoAccessible | kWeighted) & inprops;
370 return outprops;
371 }
372
373 // Properties for a unioned FST.
UnionProperties(uint64 inprops1,uint64 inprops2,bool delayed)374 uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
375 uint64 outprops = (kAcceptor | kUnweighted | kAcyclic | kAccessible)
376 & inprops1 & inprops2;
377 outprops |= kError & (inprops1 | inprops2);
378
379 bool empty1 = delayed; // Can fst1 be the empty machine?
380 bool empty2 = delayed; // Can fst2 be the empty machine?
381 if (!delayed) {
382 outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
383 outprops |= (kNotTopSorted | kNotString) & inprops2;
384 }
385 if (!empty1 && !empty2) {
386 outprops |= kEpsilons | kIEpsilons | kOEpsilons;
387 outprops |= kCoAccessible & inprops1 & inprops2;
388 }
389 // Note kNotCoAccessible does not hold because of kInitialAcyclic opt.
390 if (!delayed || inprops1 & kAccessible)
391 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
392 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
393 kNotOLabelSorted | kWeighted | kCyclic |
394 kNotAccessible) & inprops1;
395 if (!delayed || inprops2 & kAccessible)
396 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
397 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
398 kNotOLabelSorted | kWeighted | kCyclic |
399 kNotAccessible | kNotCoAccessible) & inprops2;
400 return outprops;
401 }
402
403
404 // Property string names (indexed by bit position).
405 const char *PropertyNames[] = {
406 // binary
407 "expanded", "mutable", "error", "", "", "", "", "",
408 "", "", "", "", "", "", "", "",
409 // trinary
410 "acceptor", "not acceptor",
411 "input deterministic", "non input deterministic",
412 "output deterministic", "non output deterministic",
413 "input/output epsilons", "no input/output epsilons",
414 "input epsilons", "no input epsilons",
415 "output epsilons", "no output epsilons",
416 "input label sorted", "not input label sorted",
417 "output label sorted", "not output label sorted",
418 "weighted", "unweighted",
419 "cyclic", "acyclic",
420 "cyclic at initial state", "acyclic at initial state",
421 "top sorted", "not top sorted",
422 "accessible", "not accessible",
423 "coaccessible", "not coaccessible",
424 "string", "not string",
425 };
426
427 } // namespace fst
428