From 03405dddc33890a6de31f051033557390bdf37f4 Mon Sep 17 00:00:00 2001 From: Shin Date: Sat, 28 Nov 2020 12:36:51 -0300 Subject: [PATCH 1/3] feat: created new multiparser --- api.ts | 43 +++-------- multiparser.ts | 189 +++++++++++++++++++++++++++++++++++++++++++++++++ std.ts | 1 + types.ts | 10 +-- 4 files changed, 207 insertions(+), 36 deletions(-) create mode 100644 multiparser.ts diff --git a/api.ts b/api.ts index c984934ab..6a2c06e84 100644 --- a/api.ts +++ b/api.ts @@ -1,7 +1,7 @@ import { compress as brotli } from 'https://deno.land/x/brotli@v0.1.4/mod.ts' -import { FormDataReader } from 'https://deno.land/x/oak@v6.3.2/multipart.ts' import { gzipEncode } from 'https://deno.land/x/wasm_gzip@v1.0.0/mod.ts' import log from './log.ts' +import { multiParser } from './multiparser.ts' import { ServerRequest } from './std.ts' import type { APIRequest, FormDataBody } from './types.ts' @@ -90,43 +90,22 @@ export class Request extends ServerRequest implements APIRequest { async decodeBody(type: "form-data"): Promise async decodeBody(type: string): Promise { if (type === "text") { - try { - const buff: Uint8Array = await Deno.readAll(this.body); - const encoded = new TextDecoder("utf-8").decode(buff); - return encoded; - } catch (err) { - console.error("Failed to parse the request body.", err); - } + const buff: Uint8Array = await Deno.readAll(this.body); + const encoded = new TextDecoder("utf-8").decode(buff); + return encoded; } if (type === "json") { - try { - const buff: Uint8Array = await Deno.readAll(this.body); - const encoded = new TextDecoder("utf-8").decode(buff); - const json = JSON.parse(encoded); - return json; - } catch (err) { - console.error("Failed to parse the request body.", err); - } + const buff: Uint8Array = await Deno.readAll(this.body); + const encoded = new TextDecoder("utf-8").decode(buff); + const json = JSON.parse(encoded); + return json; } if (type === "form-data") { - try { - const boundary = this.headers.get("content-type"); - - if (!boundary) throw new Error("Failed to get the content-type") - - const reader = new FormDataReader(boundary, this.body); - const { fields, files } = await reader.read({ maxSize: 1024 * 1024 * 10 }); - - return { - get: (key: string) => fields[key], - getFile: (key: string) => files?.find(i => i.name === key) - } - - } catch (err) { - console.error("Failed to parse the request form-data", err) - } + const contentType = this.headers.get("content-type") as string + const form = await multiParser(this.body, contentType); + return form; } } diff --git a/multiparser.ts b/multiparser.ts new file mode 100644 index 000000000..1e46dad20 --- /dev/null +++ b/multiparser.ts @@ -0,0 +1,189 @@ +import { bytes } from "./std.ts"; +import { FormDataBody, FormFile } from "./types.ts"; + +const encoder = new TextEncoder(); +const decoder = new TextDecoder(); + +const encode = { + contentType: encoder.encode("Content-Type"), + filename: encoder.encode("filename"), + name: encoder.encode("name"), + dashdash: encoder.encode("--"), + boundaryEqual: encoder.encode("boundary="), + returnNewline2: encoder.encode("\r\n\r\n"), + carriageReturn: encoder.encode("\r"), +}; + +export async function multiParser( + body: Deno.Reader, + contentType: string +): Promise { + let buf = await Deno.readAll(body); + let boundaryByte = getBoundary(contentType); + + if (!boundaryByte) { + throw new Error("No boundary data information"); + } + + // Generate an array of Uint8Array + const pieces = getFieldPieces(buf, boundaryByte!); + + // Set all the pieces into one single object + const form = getForm(pieces); + + return form; +} + +function createFormData(): FormDataBody { + return { + fields: {}, + files: [], + getFile(key: string) { + return this.files.find((i) => i.name === key); + }, + get(key: string) { + return this.fields[key]; + }, + }; +} + +function getForm(pieces: Uint8Array[]) { + let form: FormDataBody = createFormData(); + // let form: Form = { fields: {}, files: {} }; + + for (let piece of pieces) { + const { headerByte, contentByte } = splitPiece(piece); + const headers = getHeaders(headerByte); + + // it's a string field + if (typeof headers === "string") { + // empty content, discard it + if (contentByte.byteLength === 1 && contentByte[0] === 13) { + continue; + } else { + // headers = "field1" + form.fields[headers] = decoder.decode(contentByte); + } + } // it's a file field + else { + let file: FormFile = { + name: headers.name, + filename: headers.filename, + contentType: headers.contentType, + size: contentByte.byteLength, + content: contentByte, + }; + + form.files.push(file); + } + } + return form; +} + +function getHeaders(headerByte: Uint8Array) { + let contentTypeIndex = bytes.findIndex(headerByte, encode.contentType); + + // no contentType, it may be a string field, return name only + if (contentTypeIndex < 0) { + return getNameOnly(headerByte); + } // file field, return with name, filename and contentType + else { + return getHeaderNContentType(headerByte, contentTypeIndex); + } +} + +function getHeaderNContentType( + headerByte: Uint8Array, + contentTypeIndex: number, +) { + let headers: Record = {}; + + let contentDispositionByte = headerByte.slice(0, contentTypeIndex - 2); + headers = getHeaderOnly(contentDispositionByte); + + // jump over + let contentTypeByte = headerByte.slice( + contentTypeIndex + encode.contentType.byteLength + 2, + ); + + headers.contentType = decoder.decode(contentTypeByte); + return headers; +} + +function getHeaderOnly(headerLineByte: Uint8Array) { + let headers: Record = {}; + + let filenameIndex = bytes.findIndex(headerLineByte, encode.filename); + if (filenameIndex < 0) { + headers.name = getNameOnly(headerLineByte); + } else { + headers = getNameNFilename(headerLineByte, filenameIndex); + } + return headers; +} + +function getNameNFilename(headerLineByte: Uint8Array, filenameIndex: number) { + // fetch filename first + let nameByte = headerLineByte.slice(0, filenameIndex - 2); + let filenameByte = headerLineByte.slice( + filenameIndex + encode.filename.byteLength + 2, + headerLineByte.byteLength - 1, + ); + + let name = getNameOnly(nameByte); + let filename = decoder.decode(filenameByte); + return { name, filename }; +} + +function getNameOnly(headerLineByte: Uint8Array) { + let nameIndex = bytes.findIndex(headerLineByte, encode.name); + // jump and get string inside double quote => "string" + let nameByte = headerLineByte.slice( + nameIndex + encode.name.byteLength + 2, + headerLineByte.byteLength - 1, + ); + return decoder.decode(nameByte); +} + +function splitPiece(piece: Uint8Array) { + const contentIndex = bytes.findIndex(piece, encode.returnNewline2); + const headerByte = piece.slice(0, contentIndex); + const contentByte = piece.slice(contentIndex + 4); + + return { headerByte, contentByte }; +} + +function getFieldPieces( + buf: Uint8Array, + boundaryByte: Uint8Array, +): Uint8Array[] { + const startBoundaryByte = bytes.concat(encode.dashdash, boundaryByte); + const endBoundaryByte = bytes.concat(startBoundaryByte, encode.dashdash); + + const pieces = []; + + while (!bytes.hasPrefix(buf, endBoundaryByte)) { + // jump over boundary + '\r\n' + buf = buf.slice(startBoundaryByte.byteLength + 2); + let boundaryIndex = bytes.findIndex(buf, startBoundaryByte); + // get field content piece + pieces.push(buf.slice(0, boundaryIndex - 1)); + buf = buf.slice(boundaryIndex); + } + + return pieces; +} + +function getBoundary(contentType: string): Uint8Array | undefined { + let contentTypeByte = encoder.encode(contentType); + let boundaryIndex = bytes.findIndex(contentTypeByte, encode.boundaryEqual); + if (boundaryIndex >= 0) { + // jump over 'boundary=' to get the real boundary + let boundary = contentTypeByte.slice( + boundaryIndex + encode.boundaryEqual.byteLength, + ); + return boundary; + } else { + return undefined; + } +} diff --git a/std.ts b/std.ts index 3accfd968..e04595d19 100644 --- a/std.ts +++ b/std.ts @@ -1,3 +1,4 @@ +export * as bytes from 'https://deno.land/std@0.61.0/bytes/mod.ts' export { Untar } from 'https://deno.land/std@0.78.0/archive/tar.ts' export * as colors from 'https://deno.land/std@0.78.0/fmt/colors.ts' export { ensureDir } from 'https://deno.land/std@0.78.0/fs/ensure_dir.ts' diff --git a/types.ts b/types.ts index 4a69c7d32..49bab0a42 100644 --- a/types.ts +++ b/types.ts @@ -114,8 +114,10 @@ export interface RouterURL { * The form data body */ export interface FormDataBody { - get(key: string): string - getFile(key: string): FormFile + fields: Record; + files: FormFile[]; + get(key: string): string | undefined; + getFile(key: string): FormFile | undefined; } /** @@ -126,5 +128,5 @@ export interface FormFile { content: Uint8Array contentType: string filename: string - originalName: string -} \ No newline at end of file + size: number +} From b1a0e6023d9aed3aa6069cc7dc9b15beee101e29 Mon Sep 17 00:00:00 2001 From: Shin Date: Sat, 28 Nov 2020 12:43:06 -0300 Subject: [PATCH 2/3] feat: cleaned the code --- multiparser.ts | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/multiparser.ts b/multiparser.ts index 1e46dad20..91fc69d3e 100644 --- a/multiparser.ts +++ b/multiparser.ts @@ -49,7 +49,6 @@ function createFormData(): FormDataBody { function getForm(pieces: Uint8Array[]) { let form: FormDataBody = createFormData(); - // let form: Form = { fields: {}, files: {} }; for (let piece of pieces) { const { headerByte, contentByte } = splitPiece(piece); @@ -60,11 +59,15 @@ function getForm(pieces: Uint8Array[]) { // empty content, discard it if (contentByte.byteLength === 1 && contentByte[0] === 13) { continue; - } else { - // headers = "field1" + } + + // headers = "field1" + else { form.fields[headers] = decoder.decode(contentByte); } - } // it's a file field + } + + // it's a file field else { let file: FormFile = { name: headers.name, @@ -86,7 +89,9 @@ function getHeaders(headerByte: Uint8Array) { // no contentType, it may be a string field, return name only if (contentTypeIndex < 0) { return getNameOnly(headerByte); - } // file field, return with name, filename and contentType + } + + // file field, return with name, filename and contentType else { return getHeaderNContentType(headerByte, contentTypeIndex); } @@ -137,11 +142,13 @@ function getNameNFilename(headerLineByte: Uint8Array, filenameIndex: number) { function getNameOnly(headerLineByte: Uint8Array) { let nameIndex = bytes.findIndex(headerLineByte, encode.name); + // jump and get string inside double quote => "string" let nameByte = headerLineByte.slice( nameIndex + encode.name.byteLength + 2, headerLineByte.byteLength - 1, ); + return decoder.decode(nameByte); } @@ -166,6 +173,7 @@ function getFieldPieces( // jump over boundary + '\r\n' buf = buf.slice(startBoundaryByte.byteLength + 2); let boundaryIndex = bytes.findIndex(buf, startBoundaryByte); + // get field content piece pieces.push(buf.slice(0, boundaryIndex - 1)); buf = buf.slice(boundaryIndex); @@ -177,6 +185,7 @@ function getFieldPieces( function getBoundary(contentType: string): Uint8Array | undefined { let contentTypeByte = encoder.encode(contentType); let boundaryIndex = bytes.findIndex(contentTypeByte, encode.boundaryEqual); + if (boundaryIndex >= 0) { // jump over 'boundary=' to get the real boundary let boundary = contentTypeByte.slice( From 6f42b204c2bfb4f2b6768933970e6c06f0888cbe Mon Sep 17 00:00:00 2001 From: Shin Date: Sun, 29 Nov 2020 20:20:18 -0300 Subject: [PATCH 3/3] feat: added unit test for multiparser --- multiparser_test.ts | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 multiparser_test.ts diff --git a/multiparser_test.ts b/multiparser_test.ts new file mode 100644 index 000000000..07210d6b6 --- /dev/null +++ b/multiparser_test.ts @@ -0,0 +1,34 @@ +import { assertEquals } from "https://deno.land/std/testing/asserts.ts"; +import { multiParser } from "./multiparser.ts"; + +const encoder = new TextEncoder(); + +const contentType = "multipart/form-data; boundary=ALEPH-BOUNDARY"; +const simpleString = '--ALEPH-BOUNDARY\rContent-Disposition: form-data; name="string_1"\r\n\r\nsimple string here\r--ALEPH-BOUNDARY--'; +const complexString = 'some text to be ignored\r\r--ALEPH-BOUNDARY\rContent-Disposition: form-data; name="id"\r\n\r\n666\r--ALEPH-BOUNDARY\rContent-Disposition: form-data; name="title"\r\n\r\nHello World\r--ALEPH-BOUNDARY\rContent-Disposition: form-data; name="multiline"\r\n\r\nworld,\n hello\r--ALEPH-BOUNDARY\rContent-Disposition: form-data; name="file1"; filename="file_name.ext"\rContent-Type: video/mp2t\r\n\r\nsome random data\r--ALEPH-BOUNDARY--\rmore text to be ignored to be ignored\r'; + +Deno.test(`basic multiparser string`, async () => { + const buff = new Deno.Buffer(encoder.encode(simpleString)); + const multiForm = await multiParser(buff, contentType); + + assertEquals(multiForm.get("string_1"), "simple string here"); +}); + +Deno.test(`complex multiparser string`, async () => { + const buff = new Deno.Buffer(encoder.encode(complexString)); + const multiFrom = await multiParser(buff, contentType); + + // Asseting multiple string values + assertEquals(multiFrom.get("id"), "666"); + assertEquals(multiFrom.get("title"), "Hello World"); + assertEquals(multiFrom.get("multiline"), "world,\n hello"); + + // Asserting a file information + const file = multiFrom.getFile("file1"); + + if (!file) { return } + + assertEquals(file.name, "file1"); + assertEquals(file.contentType, "video/mp2t"); + assertEquals(file.size, 16); +});