1'use strict'; 2 3// See /FileAPI/file/resources/echo-content-escaped.py 4function escapeString(string) { 5 return string.replace(/\\/g, "\\\\").replace( 6 /[^\x20-\x7E]/g, 7 (x) => { 8 let hex = x.charCodeAt(0).toString(16); 9 if (hex.length < 2) hex = "0" + hex; 10 return `\\x${hex}`; 11 }, 12 ).replace(/\\x0d\\x0a/g, "\r\n"); 13} 14 15// Rationale for this particular test character sequence, which is 16// used in filenames and also in file contents: 17// 18// - ABC~ ensures the string starts with something we can read to 19// ensure it is from the correct source; ~ is used because even 20// some 1-byte otherwise-ASCII-like parts of ISO-2022-JP 21// interpret it differently. 22// - ‾¥ are inside a single-byte range of ISO-2022-JP and help 23// diagnose problems due to filesystem encoding or locale 24// - ≈ is inside IBM437 and helps diagnose problems due to filesystem 25// encoding or locale 26// - ¤ is inside Latin-1 and helps diagnose problems due to 27// filesystem encoding or locale; it is also the "simplest" case 28// needing substitution in ISO-2022-JP 29// - ・ is inside a single-byte range of ISO-2022-JP in some variants 30// and helps diagnose problems due to filesystem encoding or locale; 31// on the web it is distinct when decoding but unified when encoding 32// - ・ is inside a double-byte range of ISO-2022-JP and helps 33// diagnose problems due to filesystem encoding or locale 34// - • is inside Windows-1252 and helps diagnose problems due to 35// filesystem encoding or locale and also ensures these aren't 36// accidentally turned into e.g. control codes 37// - ∙ is inside IBM437 and helps diagnose problems due to filesystem 38// encoding or locale 39// - · is inside Latin-1 and helps diagnose problems due to 40// filesystem encoding or locale and also ensures HTML named 41// character references (e.g. ·) are not used 42// - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to 43// filesystem encoding or locale and also ensures these aren't 44// accidentally turned into e.g. control codes 45// - ★ is inside ISO-2022-JP on a non-Kanji page and makes correct 46// output easier to spot 47// - 星 is inside ISO-2022-JP on a Kanji page and makes correct 48// output easier to spot 49// - is outside the BMP and makes incorrect surrogate pair 50// substitution detectable and ensures substitutions work 51// correctly immediately after Kanji 2-byte ISO-2022-JP 52// - 星 repeated here ensures the correct codec state is used 53// after a non-BMP substitution 54// - ★ repeated here also makes correct output easier to spot 55// - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to 56// filesystem encoding or locale and also ensures these aren't 57// accidentally turned into e.g. control codes and also ensures 58// substitutions work correctly immediately after non-Kanji 59// 2-byte ISO-2022-JP 60// - · is inside Latin-1 and helps diagnose problems due to 61// filesystem encoding or locale and also ensures HTML named 62// character references (e.g. ·) are not used 63// - ∙ is inside IBM437 and helps diagnose problems due to filesystem 64// encoding or locale 65// - • is inside Windows-1252 and again helps diagnose problems 66// due to filesystem encoding or locale 67// - ・ is inside a double-byte range of ISO-2022-JP and helps 68// diagnose problems due to filesystem encoding or locale 69// - ・ is inside a single-byte range of ISO-2022-JP in some variants 70// and helps diagnose problems due to filesystem encoding or locale; 71// on the web it is distinct when decoding but unified when encoding 72// - ¤ is inside Latin-1 and helps diagnose problems due to 73// filesystem encoding or locale; again it is a "simple" 74// substitution case 75// - ≈ is inside IBM437 and helps diagnose problems due to filesystem 76// encoding or locale 77// - ¥‾ are inside a single-byte range of ISO-2022-JP and help 78// diagnose problems due to filesystem encoding or locale 79// - ~XYZ ensures earlier errors don't lead to misencoding of 80// simple ASCII 81// 82// Overall the near-symmetry makes common I18N mistakes like 83// off-by-1-after-non-BMP easier to spot. All the characters 84// are also allowed in Windows Unicode filenames. 85const kTestChars = 'ABC~‾¥≈¤・・•∙·☼★星星★☼·∙•・・¤≈¥‾~XYZ'; 86 87// The kTestFallback* strings represent the expected byte sequence from 88// encoding kTestChars with the given encoding with "html" replacement 89// mode, isomorphic-decoded. That means, characters that can't be 90// encoded in that encoding get HTML-escaped, but no further 91// `escapeString`-like escapes are needed. 92const kTestFallbackUtf8 = ( 93 "ABC~\xE2\x80\xBE\xC2\xA5\xE2\x89\x88\xC2\xA4\xEF\xBD\xA5\xE3\x83\xBB\xE2" + 94 "\x80\xA2\xE2\x88\x99\xC2\xB7\xE2\x98\xBC\xE2\x98\x85\xE6\x98\x9F\xF0\x9F" + 95 "\x8C\x9F\xE6\x98\x9F\xE2\x98\x85\xE2\x98\xBC\xC2\xB7\xE2\x88\x99\xE2\x80" + 96 "\xA2\xE3\x83\xBB\xEF\xBD\xA5\xC2\xA4\xE2\x89\x88\xC2\xA5\xE2\x80\xBE~XYZ" 97); 98 99const kTestFallbackIso2022jp = ( 100 ("ABC~\x1B(J~\\≈¤\x1B$B!&!&\x1B(B•∙·☼\x1B$B!z@1\x1B(B" + 101 "\x1B$B@1!z\x1B(B☼·∙•\x1B$B!&!&\x1B(B¤≈\x1B(J\\~\x1B(B~XYZ") 102 .replace(/[^\0-\x7F]/gu, (x) => `&#${x.codePointAt(0)};`) 103); 104 105const kTestFallbackWindows1252 = ( 106 "ABC~‾\xA5≈\xA4・・\x95∙\xB7☼★星星★☼\xB7∙\x95・・\xA4≈\xA5‾~XYZ".replace( 107 /[^\0-\xFF]/gu, 108 (x) => `&#${x.codePointAt(0)};`, 109 ) 110); 111 112const kTestFallbackXUserDefined = kTestChars.replace( 113 /[^\0-\x7F]/gu, 114 (x) => `&#${x.codePointAt(0)};`, 115); 116 117// formPostFileUploadTest - verifies multipart upload structure and 118// numeric character reference replacement for filenames, field names, 119// and field values using form submission. 120// 121// Uses /FileAPI/file/resources/echo-content-escaped.py to echo the 122// upload POST with controls and non-ASCII bytes escaped. This is done 123// because navigations whose response body contains [\0\b\v] may get 124// treated as a download, which is not what we want. Use the 125// `escapeString` function to replicate that kind of escape (note that 126// it takes an isomorphic-decoded string, not a byte sequence). 127// 128// Fields in the parameter object: 129// 130// - fileNameSource: purely explanatory and gives a clue about which 131// character encoding is the source for the non-7-bit-ASCII parts of 132// the fileBaseName, or Unicode if no smaller-than-Unicode source 133// contains all the characters. Used in the test name. 134// - fileBaseName: the not-necessarily-just-7-bit-ASCII file basename 135// used for the constructed test file. Used in the test name. 136// - formEncoding: the acceptCharset of the form used to submit the 137// test file. Used in the test name. 138// - expectedEncodedBaseName: the expected formEncoding-encoded 139// version of fileBaseName, isomorphic-decoded. That means, characters 140// that can't be encoded in that encoding get HTML-escaped, but no 141// further `escapeString`-like escapes are needed. 142const formPostFileUploadTest = ({ 143 fileNameSource, 144 fileBaseName, 145 formEncoding, 146 expectedEncodedBaseName, 147}) => { 148 promise_test(async testCase => { 149 150 if (document.readyState !== 'complete') { 151 await new Promise(resolve => addEventListener('load', resolve)); 152 } 153 154 const formTargetFrame = Object.assign(document.createElement('iframe'), { 155 name: 'formtargetframe', 156 }); 157 document.body.append(formTargetFrame); 158 testCase.add_cleanup(() => { 159 document.body.removeChild(formTargetFrame); 160 }); 161 162 const form = Object.assign(document.createElement('form'), { 163 acceptCharset: formEncoding, 164 action: '/FileAPI/file/resources/echo-content-escaped.py', 165 method: 'POST', 166 enctype: 'multipart/form-data', 167 target: formTargetFrame.name, 168 }); 169 document.body.append(form); 170 testCase.add_cleanup(() => { 171 document.body.removeChild(form); 172 }); 173 174 // Used to verify that the browser agrees with the test about 175 // which form charset is used. 176 form.append(Object.assign(document.createElement('input'), { 177 type: 'hidden', 178 name: '_charset_', 179 })); 180 181 // Used to verify that the browser agrees with the test about 182 // field value replacement and encoding independently of file system 183 // idiosyncracies. 184 form.append(Object.assign(document.createElement('input'), { 185 type: 'hidden', 186 name: 'filename', 187 value: fileBaseName, 188 })); 189 190 // Same, but with name and value reversed to ensure field names 191 // get the same treatment. 192 form.append(Object.assign(document.createElement('input'), { 193 type: 'hidden', 194 name: fileBaseName, 195 value: 'filename', 196 })); 197 198 const fileInput = Object.assign(document.createElement('input'), { 199 type: 'file', 200 name: 'file', 201 }); 202 form.append(fileInput); 203 204 // Removes c:\fakepath\ or other pseudofolder and returns just the 205 // final component of filePath; allows both / and \ as segment 206 // delimiters. 207 const baseNameOfFilePath = filePath => filePath.split(/[\/\\]/).pop(); 208 await new Promise(resolve => { 209 const dataTransfer = new DataTransfer; 210 dataTransfer.items.add( 211 new File([kTestChars], fileBaseName, {type: 'text/plain'})); 212 fileInput.files = dataTransfer.files; 213 // For historical reasons .value will be prefixed with 214 // c:\fakepath\, but the basename should match the file name 215 // exposed through the newer .files[0].name API. This check 216 // verifies that assumption. 217 assert_equals( 218 baseNameOfFilePath(fileInput.files[0].name), 219 baseNameOfFilePath(fileInput.value), 220 `The basename of the field's value should match its files[0].name`); 221 form.submit(); 222 formTargetFrame.onload = resolve; 223 }); 224 225 const formDataText = formTargetFrame.contentDocument.body.textContent; 226 const formDataLines = formDataText.split('\n'); 227 if (formDataLines.length && !formDataLines[formDataLines.length - 1]) { 228 --formDataLines.length; 229 } 230 assert_greater_than( 231 formDataLines.length, 232 2, 233 `${fileBaseName}: multipart form data must have at least 3 lines: ${ 234 JSON.stringify(formDataText) 235 }`); 236 const boundary = formDataLines[0]; 237 assert_equals( 238 formDataLines[formDataLines.length - 1], 239 boundary + '--', 240 `${fileBaseName}: multipart form data must end with ${boundary}--: ${ 241 JSON.stringify(formDataText) 242 }`); 243 244 const asValue = expectedEncodedBaseName.replace(/\r\n?|\n/g, "\r\n"); 245 const asName = asValue.replace(/[\r\n"]/g, encodeURIComponent); 246 const asFilename = expectedEncodedBaseName.replace(/[\r\n"]/g, encodeURIComponent); 247 248 // The response body from echo-content-escaped.py has controls and non-ASCII 249 // bytes escaped, so any caller-provided field that might contain such bytes 250 // must be passed to `escapeString`, after any other expected 251 // transformations. 252 const expectedText = [ 253 boundary, 254 'Content-Disposition: form-data; name="_charset_"', 255 '', 256 formEncoding, 257 boundary, 258 'Content-Disposition: form-data; name="filename"', 259 '', 260 // Unlike for names and filenames, multipart/form-data values don't escape 261 // \r\n linebreaks, and when they're read from an iframe they become \n. 262 escapeString(asValue).replace(/\r\n/g, "\n"), 263 boundary, 264 `Content-Disposition: form-data; name="${escapeString(asName)}"`, 265 '', 266 'filename', 267 boundary, 268 `Content-Disposition: form-data; name="file"; ` + 269 `filename="${escapeString(asFilename)}"`, 270 'Content-Type: text/plain', 271 '', 272 escapeString(kTestFallbackUtf8), 273 boundary + '--', 274 ].join('\n'); 275 276 assert_true( 277 formDataText.startsWith(expectedText), 278 `Unexpected multipart-shaped form data received:\n${ 279 formDataText 280 }\nExpected:\n${expectedText}`); 281 }, `Upload ${fileBaseName} (${fileNameSource}) in ${formEncoding} form`); 282}; 283