• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2User name to file name conversion.
3This was taken from the UFO 3 spec.
4"""
5
6# Restrictions are taken mostly from
7# https://docs.microsoft.com/en-gb/windows/win32/fileio/naming-a-file#naming-conventions.
8#
9# 1. Integer value zero, sometimes referred to as the ASCII NUL character.
10# 2. Characters whose integer representations are in the range 1 to 31,
11#    inclusive.
12# 3. Various characters that (mostly) Windows and POSIX-y filesystems don't
13#    allow, plus "(" and ")", as per the specification.
14illegalCharacters = {
15    "\x00",
16    "\x01",
17    "\x02",
18    "\x03",
19    "\x04",
20    "\x05",
21    "\x06",
22    "\x07",
23    "\x08",
24    "\t",
25    "\n",
26    "\x0b",
27    "\x0c",
28    "\r",
29    "\x0e",
30    "\x0f",
31    "\x10",
32    "\x11",
33    "\x12",
34    "\x13",
35    "\x14",
36    "\x15",
37    "\x16",
38    "\x17",
39    "\x18",
40    "\x19",
41    "\x1a",
42    "\x1b",
43    "\x1c",
44    "\x1d",
45    "\x1e",
46    "\x1f",
47    '"',
48    "*",
49    "+",
50    "/",
51    ":",
52    "<",
53    ">",
54    "?",
55    "[",
56    "\\",
57    "]",
58    "(",
59    ")",
60    "|",
61    "\x7f",
62}
63reservedFileNames = {
64    "aux",
65    "clock$",
66    "com1",
67    "com2",
68    "com3",
69    "com4",
70    "com5",
71    "com6",
72    "com7",
73    "com8",
74    "com9",
75    "con",
76    "lpt1",
77    "lpt2",
78    "lpt3",
79    "lpt4",
80    "lpt5",
81    "lpt6",
82    "lpt7",
83    "lpt8",
84    "lpt9",
85    "nul",
86    "prn",
87}
88maxFileNameLength = 255
89
90
91class NameTranslationError(Exception):
92	pass
93
94
95def userNameToFileName(userName: str, existing=(), prefix="", suffix=""):
96	"""
97	`existing` should be a set-like object.
98
99	>>> userNameToFileName("a") == "a"
100	True
101	>>> userNameToFileName("A") == "A_"
102	True
103	>>> userNameToFileName("AE") == "A_E_"
104	True
105	>>> userNameToFileName("Ae") == "A_e"
106	True
107	>>> userNameToFileName("ae") == "ae"
108	True
109	>>> userNameToFileName("aE") == "aE_"
110	True
111	>>> userNameToFileName("a.alt") == "a.alt"
112	True
113	>>> userNameToFileName("A.alt") == "A_.alt"
114	True
115	>>> userNameToFileName("A.Alt") == "A_.A_lt"
116	True
117	>>> userNameToFileName("A.aLt") == "A_.aL_t"
118	True
119	>>> userNameToFileName(u"A.alT") == "A_.alT_"
120	True
121	>>> userNameToFileName("T_H") == "T__H_"
122	True
123	>>> userNameToFileName("T_h") == "T__h"
124	True
125	>>> userNameToFileName("t_h") == "t_h"
126	True
127	>>> userNameToFileName("F_F_I") == "F__F__I_"
128	True
129	>>> userNameToFileName("f_f_i") == "f_f_i"
130	True
131	>>> userNameToFileName("Aacute_V.swash") == "A_acute_V_.swash"
132	True
133	>>> userNameToFileName(".notdef") == "_notdef"
134	True
135	>>> userNameToFileName("con") == "_con"
136	True
137	>>> userNameToFileName("CON") == "C_O_N_"
138	True
139	>>> userNameToFileName("con.alt") == "_con.alt"
140	True
141	>>> userNameToFileName("alt.con") == "alt._con"
142	True
143	"""
144	# the incoming name must be a string
145	if not isinstance(userName, str):
146		raise ValueError("The value for userName must be a string.")
147	# establish the prefix and suffix lengths
148	prefixLength = len(prefix)
149	suffixLength = len(suffix)
150	# replace an initial period with an _
151	# if no prefix is to be added
152	if not prefix and userName[0] == ".":
153		userName = "_" + userName[1:]
154	# filter the user name
155	filteredUserName = []
156	for character in userName:
157		# replace illegal characters with _
158		if character in illegalCharacters:
159			character = "_"
160		# add _ to all non-lower characters
161		elif character != character.lower():
162			character += "_"
163		filteredUserName.append(character)
164	userName = "".join(filteredUserName)
165	# clip to 255
166	sliceLength = maxFileNameLength - prefixLength - suffixLength
167	userName = userName[:sliceLength]
168	# test for illegal files names
169	parts = []
170	for part in userName.split("."):
171		if part.lower() in reservedFileNames:
172			part = "_" + part
173		parts.append(part)
174	userName = ".".join(parts)
175	# test for clash
176	fullName = prefix + userName + suffix
177	if fullName.lower() in existing:
178		fullName = handleClash1(userName, existing, prefix, suffix)
179	# finished
180	return fullName
181
182def handleClash1(userName, existing=[], prefix="", suffix=""):
183	"""
184	existing should be a case-insensitive list
185	of all existing file names.
186
187	>>> prefix = ("0" * 5) + "."
188	>>> suffix = "." + ("0" * 10)
189	>>> existing = ["a" * 5]
190
191	>>> e = list(existing)
192	>>> handleClash1(userName="A" * 5, existing=e,
193	...		prefix=prefix, suffix=suffix) == (
194	... 	'00000.AAAAA000000000000001.0000000000')
195	True
196
197	>>> e = list(existing)
198	>>> e.append(prefix + "aaaaa" + "1".zfill(15) + suffix)
199	>>> handleClash1(userName="A" * 5, existing=e,
200	...		prefix=prefix, suffix=suffix) == (
201	... 	'00000.AAAAA000000000000002.0000000000')
202	True
203
204	>>> e = list(existing)
205	>>> e.append(prefix + "AAAAA" + "2".zfill(15) + suffix)
206	>>> handleClash1(userName="A" * 5, existing=e,
207	...		prefix=prefix, suffix=suffix) == (
208	... 	'00000.AAAAA000000000000001.0000000000')
209	True
210	"""
211	# if the prefix length + user name length + suffix length + 15 is at
212	# or past the maximum length, silce 15 characters off of the user name
213	prefixLength = len(prefix)
214	suffixLength = len(suffix)
215	if prefixLength + len(userName) + suffixLength + 15 > maxFileNameLength:
216		l = (prefixLength + len(userName) + suffixLength + 15)
217		sliceLength = maxFileNameLength - l
218		userName = userName[:sliceLength]
219	finalName = None
220	# try to add numbers to create a unique name
221	counter = 1
222	while finalName is None:
223		name = userName + str(counter).zfill(15)
224		fullName = prefix + name + suffix
225		if fullName.lower() not in existing:
226			finalName = fullName
227			break
228		else:
229			counter += 1
230		if counter >= 999999999999999:
231			break
232	# if there is a clash, go to the next fallback
233	if finalName is None:
234		finalName = handleClash2(existing, prefix, suffix)
235	# finished
236	return finalName
237
238def handleClash2(existing=[], prefix="", suffix=""):
239	"""
240	existing should be a case-insensitive list
241	of all existing file names.
242
243	>>> prefix = ("0" * 5) + "."
244	>>> suffix = "." + ("0" * 10)
245	>>> existing = [prefix + str(i) + suffix for i in range(100)]
246
247	>>> e = list(existing)
248	>>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
249	... 	'00000.100.0000000000')
250	True
251
252	>>> e = list(existing)
253	>>> e.remove(prefix + "1" + suffix)
254	>>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
255	... 	'00000.1.0000000000')
256	True
257
258	>>> e = list(existing)
259	>>> e.remove(prefix + "2" + suffix)
260	>>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
261	... 	'00000.2.0000000000')
262	True
263	"""
264	# calculate the longest possible string
265	maxLength = maxFileNameLength - len(prefix) - len(suffix)
266	maxValue = int("9" * maxLength)
267	# try to find a number
268	finalName = None
269	counter = 1
270	while finalName is None:
271		fullName = prefix + str(counter) + suffix
272		if fullName.lower() not in existing:
273			finalName = fullName
274			break
275		else:
276			counter += 1
277		if counter >= maxValue:
278			break
279	# raise an error if nothing has been found
280	if finalName is None:
281		raise NameTranslationError("No unique name could be found.")
282	# finished
283	return finalName
284
285if __name__ == "__main__":
286	import doctest
287	doctest.testmod()
288