• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1'use strict';
2
3// See /FileAPI/file/resources/echo-content-escaped.py
4function escapeString(string) {
5  return string.replace(/\\/g, "\\\\").replace(
6    /[^\x20-\x7E]/g,
7    (x) => {
8      let hex = x.charCodeAt(0).toString(16);
9      if (hex.length < 2) hex = "0" + hex;
10      return `\\x${hex}`;
11    },
12  ).replace(/\\x0d\\x0a/g, "\r\n");
13}
14
15// Rationale for this particular test character sequence, which is
16// used in filenames and also in file contents:
17//
18// - ABC~ ensures the string starts with something we can read to
19//   ensure it is from the correct source; ~ is used because even
20//   some 1-byte otherwise-ASCII-like parts of ISO-2022-JP
21//   interpret it differently.
22// - ‾¥ are inside a single-byte range of ISO-2022-JP and help
23//   diagnose problems due to filesystem encoding or locale
24// - ≈ is inside IBM437 and helps diagnose problems due to filesystem
25//   encoding or locale
26// - ¤ is inside Latin-1 and helps diagnose problems due to
27//   filesystem encoding or locale; it is also the "simplest" case
28//   needing substitution in ISO-2022-JP
29// - ・ is inside a single-byte range of ISO-2022-JP in some variants
30//   and helps diagnose problems due to filesystem encoding or locale;
31//   on the web it is distinct when decoding but unified when encoding
32// - ・ is inside a double-byte range of ISO-2022-JP and helps
33//   diagnose problems due to filesystem encoding or locale
34// - • is inside Windows-1252 and helps diagnose problems due to
35//   filesystem encoding or locale and also ensures these aren't
36//   accidentally turned into e.g. control codes
37// - ∙ is inside IBM437 and helps diagnose problems due to filesystem
38//   encoding or locale
39// - · is inside Latin-1 and helps diagnose problems due to
40//   filesystem encoding or locale and also ensures HTML named
41//   character references (e.g. &middot;) are not used
42// - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to
43//   filesystem encoding or locale and also ensures these aren't
44//   accidentally turned into e.g. control codes
45// - ★ is inside ISO-2022-JP on a non-Kanji page and makes correct
46//   output easier to spot
47// - 星 is inside ISO-2022-JP on a Kanji page and makes correct
48//   output easier to spot
49// - �� is outside the BMP and makes incorrect surrogate pair
50//   substitution detectable and ensures substitutions work
51//   correctly immediately after Kanji 2-byte ISO-2022-JP
52// - 星 repeated here ensures the correct codec state is used
53//   after a non-BMP substitution
54// - ★ repeated here also makes correct output easier to spot
55// - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to
56//   filesystem encoding or locale and also ensures these aren't
57//   accidentally turned into e.g. control codes and also ensures
58//   substitutions work correctly immediately after non-Kanji
59//   2-byte ISO-2022-JP
60// - · is inside Latin-1 and helps diagnose problems due to
61//   filesystem encoding or locale and also ensures HTML named
62//   character references (e.g. &middot;) are not used
63// - ∙ is inside IBM437 and helps diagnose problems due to filesystem
64//   encoding or locale
65// - • is inside Windows-1252 and again helps diagnose problems
66//   due to filesystem encoding or locale
67// - ・ is inside a double-byte range of ISO-2022-JP and helps
68//   diagnose problems due to filesystem encoding or locale
69// - ・ is inside a single-byte range of ISO-2022-JP in some variants
70//   and helps diagnose problems due to filesystem encoding or locale;
71//   on the web it is distinct when decoding but unified when encoding
72// - ¤ is inside Latin-1 and helps diagnose problems due to
73//   filesystem encoding or locale; again it is a "simple"
74//   substitution case
75// - ≈ is inside IBM437 and helps diagnose problems due to filesystem
76//   encoding or locale
77// - ¥‾ are inside a single-byte range of ISO-2022-JP and help
78//   diagnose problems due to filesystem encoding or locale
79// - ~XYZ ensures earlier errors don't lead to misencoding of
80//   simple ASCII
81//
82// Overall the near-symmetry makes common I18N mistakes like
83// off-by-1-after-non-BMP easier to spot. All the characters
84// are also allowed in Windows Unicode filenames.
85const kTestChars = 'ABC~‾¥≈¤・・•∙·☼★星��星★☼·∙•・・¤≈¥‾~XYZ';
86
87// The kTestFallback* strings represent the expected byte sequence from
88// encoding kTestChars with the given encoding with "html" replacement
89// mode, isomorphic-decoded. That means, characters that can't be
90// encoded in that encoding get HTML-escaped, but no further
91// `escapeString`-like escapes are needed.
92const kTestFallbackUtf8 = (
93  "ABC~\xE2\x80\xBE\xC2\xA5\xE2\x89\x88\xC2\xA4\xEF\xBD\xA5\xE3\x83\xBB\xE2" +
94    "\x80\xA2\xE2\x88\x99\xC2\xB7\xE2\x98\xBC\xE2\x98\x85\xE6\x98\x9F\xF0\x9F" +
95    "\x8C\x9F\xE6\x98\x9F\xE2\x98\x85\xE2\x98\xBC\xC2\xB7\xE2\x88\x99\xE2\x80" +
96    "\xA2\xE3\x83\xBB\xEF\xBD\xA5\xC2\xA4\xE2\x89\x88\xC2\xA5\xE2\x80\xBE~XYZ"
97);
98
99const kTestFallbackIso2022jp = (
100  ("ABC~\x1B(J~\\≈¤\x1B$B!&!&\x1B(B•∙·☼\x1B$B!z@1\x1B(B��" +
101    "\x1B$B@1!z\x1B(B☼·∙•\x1B$B!&!&\x1B(B¤≈\x1B(J\\~\x1B(B~XYZ")
102    .replace(/[^\0-\x7F]/gu, (x) => `&#${x.codePointAt(0)};`)
103);
104
105const kTestFallbackWindows1252 = (
106  "ABC~‾\xA5≈\xA4・・\x95∙\xB7☼★星��星★☼\xB7∙\x95・・\xA4≈\xA5‾~XYZ".replace(
107    /[^\0-\xFF]/gu,
108    (x) => `&#${x.codePointAt(0)};`,
109  )
110);
111
112const kTestFallbackXUserDefined = kTestChars.replace(
113  /[^\0-\x7F]/gu,
114  (x) => `&#${x.codePointAt(0)};`,
115);
116
117// formPostFileUploadTest - verifies multipart upload structure and
118// numeric character reference replacement for filenames, field names,
119// and field values using form submission.
120//
121// Uses /FileAPI/file/resources/echo-content-escaped.py to echo the
122// upload POST with controls and non-ASCII bytes escaped. This is done
123// because navigations whose response body contains [\0\b\v] may get
124// treated as a download, which is not what we want. Use the
125// `escapeString` function to replicate that kind of escape (note that
126// it takes an isomorphic-decoded string, not a byte sequence).
127//
128// Fields in the parameter object:
129//
130// - fileNameSource: purely explanatory and gives a clue about which
131//   character encoding is the source for the non-7-bit-ASCII parts of
132//   the fileBaseName, or Unicode if no smaller-than-Unicode source
133//   contains all the characters. Used in the test name.
134// - fileBaseName: the not-necessarily-just-7-bit-ASCII file basename
135//   used for the constructed test file. Used in the test name.
136// - formEncoding: the acceptCharset of the form used to submit the
137//   test file. Used in the test name.
138// - expectedEncodedBaseName: the expected formEncoding-encoded
139//   version of fileBaseName, isomorphic-decoded. That means, characters
140//   that can't be encoded in that encoding get HTML-escaped, but no
141//   further `escapeString`-like escapes are needed.
142const formPostFileUploadTest = ({
143  fileNameSource,
144  fileBaseName,
145  formEncoding,
146  expectedEncodedBaseName,
147}) => {
148  promise_test(async testCase => {
149
150    if (document.readyState !== 'complete') {
151      await new Promise(resolve => addEventListener('load', resolve));
152    }
153
154    const formTargetFrame = Object.assign(document.createElement('iframe'), {
155      name: 'formtargetframe',
156    });
157    document.body.append(formTargetFrame);
158    testCase.add_cleanup(() => {
159      document.body.removeChild(formTargetFrame);
160    });
161
162    const form = Object.assign(document.createElement('form'), {
163      acceptCharset: formEncoding,
164      action: '/FileAPI/file/resources/echo-content-escaped.py',
165      method: 'POST',
166      enctype: 'multipart/form-data',
167      target: formTargetFrame.name,
168    });
169    document.body.append(form);
170    testCase.add_cleanup(() => {
171      document.body.removeChild(form);
172    });
173
174    // Used to verify that the browser agrees with the test about
175    // which form charset is used.
176    form.append(Object.assign(document.createElement('input'), {
177      type: 'hidden',
178      name: '_charset_',
179    }));
180
181    // Used to verify that the browser agrees with the test about
182    // field value replacement and encoding independently of file system
183    // idiosyncracies.
184    form.append(Object.assign(document.createElement('input'), {
185      type: 'hidden',
186      name: 'filename',
187      value: fileBaseName,
188    }));
189
190    // Same, but with name and value reversed to ensure field names
191    // get the same treatment.
192    form.append(Object.assign(document.createElement('input'), {
193      type: 'hidden',
194      name: fileBaseName,
195      value: 'filename',
196    }));
197
198    const fileInput = Object.assign(document.createElement('input'), {
199      type: 'file',
200      name: 'file',
201    });
202    form.append(fileInput);
203
204    // Removes c:\fakepath\ or other pseudofolder and returns just the
205    // final component of filePath; allows both / and \ as segment
206    // delimiters.
207    const baseNameOfFilePath = filePath => filePath.split(/[\/\\]/).pop();
208    await new Promise(resolve => {
209      const dataTransfer = new DataTransfer;
210      dataTransfer.items.add(
211          new File([kTestChars], fileBaseName, {type: 'text/plain'}));
212      fileInput.files = dataTransfer.files;
213      // For historical reasons .value will be prefixed with
214      // c:\fakepath\, but the basename should match the file name
215      // exposed through the newer .files[0].name API. This check
216      // verifies that assumption.
217      assert_equals(
218          baseNameOfFilePath(fileInput.files[0].name),
219          baseNameOfFilePath(fileInput.value),
220          `The basename of the field's value should match its files[0].name`);
221      form.submit();
222      formTargetFrame.onload = resolve;
223    });
224
225    const formDataText = formTargetFrame.contentDocument.body.textContent;
226    const formDataLines = formDataText.split('\n');
227    if (formDataLines.length && !formDataLines[formDataLines.length - 1]) {
228      --formDataLines.length;
229    }
230    assert_greater_than(
231        formDataLines.length,
232        2,
233        `${fileBaseName}: multipart form data must have at least 3 lines: ${
234             JSON.stringify(formDataText)
235           }`);
236    const boundary = formDataLines[0];
237    assert_equals(
238        formDataLines[formDataLines.length - 1],
239        boundary + '--',
240        `${fileBaseName}: multipart form data must end with ${boundary}--: ${
241             JSON.stringify(formDataText)
242           }`);
243
244    const asValue = expectedEncodedBaseName.replace(/\r\n?|\n/g, "\r\n");
245    const asName = asValue.replace(/[\r\n"]/g, encodeURIComponent);
246    const asFilename = expectedEncodedBaseName.replace(/[\r\n"]/g, encodeURIComponent);
247
248    // The response body from echo-content-escaped.py has controls and non-ASCII
249    // bytes escaped, so any caller-provided field that might contain such bytes
250    // must be passed to `escapeString`, after any other expected
251    // transformations.
252    const expectedText = [
253      boundary,
254      'Content-Disposition: form-data; name="_charset_"',
255      '',
256      formEncoding,
257      boundary,
258      'Content-Disposition: form-data; name="filename"',
259      '',
260      // Unlike for names and filenames, multipart/form-data values don't escape
261      // \r\n linebreaks, and when they're read from an iframe they become \n.
262      escapeString(asValue).replace(/\r\n/g, "\n"),
263      boundary,
264      `Content-Disposition: form-data; name="${escapeString(asName)}"`,
265      '',
266      'filename',
267      boundary,
268      `Content-Disposition: form-data; name="file"; ` +
269          `filename="${escapeString(asFilename)}"`,
270      'Content-Type: text/plain',
271      '',
272      escapeString(kTestFallbackUtf8),
273      boundary + '--',
274    ].join('\n');
275
276    assert_true(
277        formDataText.startsWith(expectedText),
278        `Unexpected multipart-shaped form data received:\n${
279             formDataText
280           }\nExpected:\n${expectedText}`);
281  }, `Upload ${fileBaseName} (${fileNameSource}) in ${formEncoding} form`);
282};
283