Skip to content

Commit

Permalink
Add support for reading from a web stream (#635)
Browse files Browse the repository at this point in the history
Co-authored-by: Sindre Sorhus <[email protected]>
  • Loading branch information
Borewit and sindresorhus authored Jul 7, 2024
1 parent 00e051b commit b815b5e
Show file tree
Hide file tree
Showing 11 changed files with 204 additions and 189 deletions.
1 change: 1 addition & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ jobs:
fail-fast: false
matrix:
node-version:
- 22
- 20
- 18
steps:
Expand Down
29 changes: 0 additions & 29 deletions browser.d.ts

This file was deleted.

15 changes: 0 additions & 15 deletions browser.js

This file was deleted.

61 changes: 13 additions & 48 deletions core.d.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
import type {Readable as ReadableStream} from 'node:stream';
/**
Typings for primary entry point, Node.js specific typings can be found in index.d.ts
*/

import type {ReadableStream as WebReadableStream} from 'node:stream/web';
import type {ITokenizer} from 'strtok3';

/**
Either the Node.js ReadableStream or the `lib.dom.d.ts` ReadableStream.
Related issue: https://github.com/DefinitelyTyped/DefinitelyTyped/pull/60377
*/
export type AnyWebReadableStream<G> = WebReadableStream<G> | ReadableStream<G>;

export type FileExtension =
| 'jpg'
| 'png'
Expand Down Expand Up @@ -318,10 +328,6 @@ export type FileTypeResult = {
readonly mime: MimeType;
};

export type ReadableStreamWithFileType = ReadableStream & {
readonly fileType?: FileTypeResult;
};

/**
Detect the file type of a `Uint8Array`, or `ArrayBuffer`.
Expand All @@ -339,10 +345,10 @@ Detect the file type of a Node.js [readable stream](https://nodejs.org/api/strea
The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
@param stream - A readable stream representing file data.
@param stream - A Node.js Readable stream or Web API Readable Stream representing file data. The Web Readable stream **must be a byte stream**.
@returns The detected file type, or `undefined` when there is no match.
*/
export function fileTypeFromStream(stream: ReadableStream): Promise<FileTypeResult | undefined>;
export function fileTypeFromStream(stream: AnyWebReadableStream<Uint8Array>): Promise<FileTypeResult | undefined>;

/**
Detect the file type from an [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer) source.
Expand Down Expand Up @@ -391,37 +397,6 @@ export type StreamOptions = {
readonly sampleSize?: number;
};

/**
Returns a `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
This method can be handy to put in between a stream, but it comes with a price.
Internally `stream()` builds up a buffer of `sampleSize` bytes, used as a sample, to determine the file type.
The sample size impacts the file detection resolution.
A smaller sample size will result in lower probability of the best file type detection.
**Note:** This method is only available when using Node.js.
**Note:** Requires Node.js 14 or later.
@param readableStream - A [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable) containing a file to examine.
@returns A `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
@example
```
import got from 'got';
import {fileTypeStream} from 'file-type';
const url = 'https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg';
const stream1 = got.stream(url);
const stream2 = await fileTypeStream(stream1, {sampleSize: 1024});
if (stream2.fileType?.mime === 'image/jpeg') {
// stream2 can be used to stream the JPEG image (from the very beginning of the stream)
}
```
*/
export function fileTypeStream(readableStream: ReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;

/**
Detect the file type of a [`Blob`](https://nodejs.org/api/buffer.html#class-blob) or [`File`](https://developer.mozilla.org/en-US/docs/Web/API/File).
Expand Down Expand Up @@ -508,11 +483,6 @@ export declare class FileTypeParser {
*/
fromBuffer(buffer: Uint8Array | ArrayBuffer): Promise<FileTypeResult | undefined>;

/**
Works the same way as {@link fileTypeFromStream}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
fromStream(stream: ReadableStream): Promise<FileTypeResult | undefined>;

/**
Works the same way as {@link fileTypeFromTokenizer}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
Expand All @@ -522,9 +492,4 @@ export declare class FileTypeParser {
Works the same way as {@link fileTypeFromBlob}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
fromBlob(blob: Blob): Promise<FileTypeResult | undefined>;

/**
Works the same way as {@link fileTypeStream}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
toDetectionStream(readableStream: ReadableStream, options?: StreamOptions): Promise<FileTypeResult | undefined>;
}
52 changes: 8 additions & 44 deletions core.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
/**
Primary entry point, Node.js specific entry point is index.js
*/

import * as Token from 'token-types';
import * as strtok3 from 'strtok3/core';
import {includes, indexOf, getUintBE} from 'uint8array-extras';
Expand All @@ -8,7 +12,7 @@ import {
} from './util.js';
import {extensions, mimeTypes} from './supported.js';

const minimumBytes = 4100; // A fair amount of file-types are detectable within this range.
export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.

export async function fileTypeFromStream(stream) {
return new FileTypeParser().fromStream(stream);
Expand Down Expand Up @@ -88,54 +92,18 @@ export class FileTypeParser {
}

async fromBlob(blob) {
const buffer = await blob.arrayBuffer();
return this.fromBuffer(new Uint8Array(buffer));
return this.fromStream(blob.stream());
}

async fromStream(stream) {
const tokenizer = await strtok3.fromStream(stream);
const tokenizer = await strtok3.fromWebStream(stream);
try {
return await this.fromTokenizer(tokenizer);
} finally {
await tokenizer.close();
}
}

async toDetectionStream(readableStream, options = {}) {
const {default: stream} = await import('node:stream');
const {sampleSize = minimumBytes} = options;

return new Promise((resolve, reject) => {
readableStream.on('error', reject);

readableStream.once('readable', () => {
(async () => {
try {
// Set up output stream
const pass = new stream.PassThrough();
const outputStream = stream.pipeline ? stream.pipeline(readableStream, pass, () => {}) : readableStream.pipe(pass);

// Read the input stream and detect the filetype
const chunk = readableStream.read(sampleSize) ?? readableStream.read() ?? new Uint8Array(0);
try {
pass.fileType = await this.fromBuffer(chunk);
} catch (error) {
if (error instanceof strtok3.EndOfStreamError) {
pass.fileType = undefined;
} else {
reject(error);
}
}

resolve(outputStream);
} catch (error) {
reject(error);
}
})();
});
});
}

check(header, options) {
return _check(this.buffer, header, options);
}
Expand All @@ -145,7 +113,7 @@ export class FileTypeParser {
}

async parse(tokenizer) {
this.buffer = new Uint8Array(minimumBytes);
this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);

// Keep reading until EOF if the file size is unknown.
if (tokenizer.fileInfo.size === undefined) {
Expand Down Expand Up @@ -1690,9 +1658,5 @@ export class FileTypeParser {
}
}

export async function fileTypeStream(readableStream, options = {}) {
return new FileTypeParser().toDetectionStream(readableStream, options);
}

export const supportedExtensions = new Set(extensions);
export const supportedMimeTypes = new Set(mimeTypes);
60 changes: 58 additions & 2 deletions index.d.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,69 @@
import type {FileTypeResult} from './core.js';
/**
Typings for Node.js specific entry point.
*/

import type {Readable as NodeReadableStream} from 'node:stream';
import type {FileTypeResult, StreamOptions, AnyWebReadableStream} from './core.js';
import {FileTypeParser} from './core.js';

export type ReadableStreamWithFileType = NodeReadableStream & {
readonly fileType?: FileTypeResult;
};

export declare class NodeFileTypeParser extends FileTypeParser {
/**
@param stream - Node.js `stream.Readable` or Web API `ReadableStream`.
*/
fromStream(stream: AnyWebReadableStream<Uint8Array> | NodeReadableStream): Promise<FileTypeResult | undefined>;

/**
Works the same way as {@link fileTypeStream}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
toDetectionStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;
}

/**
Detect the file type of a file path.
The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
@param path - The file path to parse.
@param path
@returns The detected file type and MIME type or `undefined` when there is no match.
*/
export function fileTypeFromFile(path: string): Promise<FileTypeResult | undefined>;

export function fileTypeFromStream(stream: AnyWebReadableStream<Uint8Array> | NodeReadableStream): Promise<FileTypeResult | undefined>;

/**
Returns a `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
This method can be handy to put in between a stream, but it comes with a price.
Internally `stream()` builds up a buffer of `sampleSize` bytes, used as a sample, to determine the file type.
The sample size impacts the file detection resolution.
A smaller sample size will result in lower probability of the best file type detection.
**Note:** This method is only available when using Node.js.
**Note:** Requires Node.js 14 or later.
@param readableStream - A [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable) containing a file to examine.
@param options - Maybe used to override the default sample-size.
@returns A `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
@example
```
import got from 'got';
import {fileTypeStream} from 'file-type';
const url = 'https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg';
const stream1 = got.stream(url);
const stream2 = await fileTypeStream(stream1, {sampleSize: 1024});
if (stream2.fileType?.mime === 'image/jpeg') {
// stream2 can be used to stream the JPEG image (from the very beginning of the stream)
}
```
*/
export function fileTypeStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;

export * from './core.js';
63 changes: 61 additions & 2 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,56 @@
/**
Node.js specific entry point.
*/

import {ReadableStream as WebReadableStream} from 'node:stream/web';
import * as strtok3 from 'strtok3';
import {FileTypeParser} from './core.js';
import {FileTypeParser, reasonableDetectionSizeInBytes} from './core.js';

export class NodeFileTypeParser extends FileTypeParser {
async fromStream(stream) {
const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream) : strtok3.fromStream(stream));
try {
return super.fromTokenizer(tokenizer);
} finally {
await tokenizer.close();
}
}

async toDetectionStream(readableStream, options = {}) {
const {default: stream} = await import('node:stream');
const {sampleSize = reasonableDetectionSizeInBytes} = options;

return new Promise((resolve, reject) => {
readableStream.on('error', reject);

readableStream.once('readable', () => {
(async () => {
try {
// Set up output stream
const pass = new stream.PassThrough();
const outputStream = stream.pipeline ? stream.pipeline(readableStream, pass, () => {}) : readableStream.pipe(pass);

// Read the input stream and detect the filetype
const chunk = readableStream.read(sampleSize) ?? readableStream.read() ?? new Uint8Array(0);
try {
pass.fileType = await this.fromBuffer(chunk);
} catch (error) {
if (error instanceof strtok3.EndOfStreamError) {
pass.fileType = undefined;
} else {
reject(error);
}
}

resolve(outputStream);
} catch (error) {
reject(error);
}
})();
});
});
}
}

export async function fileTypeFromFile(path, fileTypeOptions) {
const tokenizer = await strtok3.fromFile(path);
Expand All @@ -11,4 +62,12 @@ export async function fileTypeFromFile(path, fileTypeOptions) {
}
}

export * from './core.js';
export async function fileTypeFromStream(stream, fileTypeOptions) {
return (new NodeFileTypeParser(fileTypeOptions)).fromStream(stream);
}

export async function fileTypeStream(readableStream, options = {}) {
return new NodeFileTypeParser().toDetectionStream(readableStream, options);
}

export {fileTypeFromBuffer, fileTypeFromBlob, FileTypeParser, supportedMimeTypes, supportedExtensions} from './core.js';
Loading

0 comments on commit b815b5e

Please sign in to comment.