diff --git a/History.md b/History.md
index 03f87eb5..251a00cc 100644
--- a/History.md
+++ b/History.md
@@ -1,3 +1,7 @@
+# v3.5.1
+
+* [ADDED] `maxRows` option to limit the number of rows parsed. [#275](https://github.com/C2FO/fast-csv/issues/275) [#277](https://github.com/C2FO/fast-csv/pull/277) - [@cbrittingham](https://github.com/cbrittingham)
+
# v3.5.0
* Upgraded dependencies
diff --git a/benchmark/.eslintrc.js b/benchmark/.eslintrc.js
index 19068549..377d30db 100644
--- a/benchmark/.eslintrc.js
+++ b/benchmark/.eslintrc.js
@@ -1,6 +1,8 @@
module.exports = {
+ parserOptions: {
+ project: null,
+ },
rules: {
"no-console": 0,
- "@typescript-eslint/no-var-requires": 0
},
};
diff --git a/benchmark/index.js b/benchmark/index.js
index b60a7ccf..f4a4ded5 100644
--- a/benchmark/index.js
+++ b/benchmark/index.js
@@ -2,7 +2,6 @@ const path = require('path');
const fs = require('fs');
const fastCsv = require('..');
-
function camelize(str) {
return str.replace(/_(.)/g, (a, b) => b.toUpperCase());
}
@@ -11,7 +10,7 @@ const promisfyStream = (stream, expectedRows) => {
let count = 0;
return new Promise((res, rej) => {
stream
- .on('data', (row) => {
+ .on('data', row => {
count += 1;
})
.on('end', () => {
@@ -25,13 +24,14 @@ const promisfyStream = (stream, expectedRows) => {
});
};
-const benchmarkFastCsv = type => (num) => {
+const benchmarkFastCsv = type => num => {
const file = path.resolve(__dirname, `./assets/${num}.${type}.csv`);
- const stream = fs.createReadStream(file)
- .pipe(fastCsv.parse({ headers: true }))
- .transform((data) => {
+ const stream = fs
+ .createReadStream(file)
+ .pipe(fastCsv.parse({ headers: true, maxRows: 10 }))
+ .transform(data => {
const ret = {};
- [ 'first_name', 'last_name', 'email_address' ].forEach((prop) => {
+ ['first_name', 'last_name', 'email_address'].forEach(prop => {
ret[camelize(prop)] = data[prop];
});
ret.address = data.address;
@@ -47,7 +47,7 @@ async function benchmarkRun(title, num, m) {
for (let i = 0; i < howMany; i += 1) {
// eslint-disable-next-line no-await-in-loop
await m(num);
- console.log('%s: RUN(%d lines) 1 %dms', title, num, (new Date() - runStart));
+ console.log('%s: RUN(%d lines) 1 %dms', title, num, new Date() - runStart);
runStart = new Date();
}
console.log('%s: 3xAVG for %d lines %dms', title, num, (new Date() - start) / howMany);
@@ -55,7 +55,7 @@ async function benchmarkRun(title, num, m) {
function runBenchmarks(num, type) {
console.log(`\nRUNNING ${num}.${type}.csv benchmarks`, num);
- return benchmarkRun('fast-csv', num, benchmarkFastCsv(type))
+ return benchmarkRun('fast-csv', num, benchmarkFastCsv(type));
}
function benchmarks(type) {
@@ -67,7 +67,7 @@ function benchmarks(type) {
benchmarks('nonquoted')
.then(() => benchmarks('quoted'))
.then(() => process.exit())
- .catch((e) => {
+ .catch(e => {
console.error(e.stack);
return process.exit(1);
});
diff --git a/docs/parsing.md b/docs/parsing.md
index 5bfb55af..de6fd830 100644
--- a/docs/parsing.md
+++ b/docs/parsing.md
@@ -17,6 +17,7 @@
* [Ignoring Empty Rows](#csv-parse-ignoring-empty-rows)
* [Transforming Rows](#csv-parse-transforming)
* [Validating Rows](#csv-parse-validation)
+ * [Max Rows](#max-rows)
## Options
@@ -45,6 +46,7 @@
* `rtrim: {boolean} = false`: Set to `true` to right trim all fields.
* `ltrim: {boolean} = false`: Set to `true` to left trim all fields.
* `encoding: {string} = 'utf8'`: Passed to [StringDecoder](https://nodejs.org/api/string_decoder.html#string_decoder_new_stringdecoder_encoding) when decoding incoming buffers. Change if incoming content is not 'utf8' encoded.
+- `maxRows: {number}`: If number is `> 0` the specified number of rows will be parsed.(e.g. `100` would return the first 100 rows of data).
## Events
@@ -585,3 +587,44 @@ Valid [row={"firstName":"timmy","lastName":"yukon"}]
Parsed 2 rows
```
+
+[`examples/parsing/max_rows.example.example.js`](../examples/parsing/max_rows.example.js)
+
+In the following example there are 10 rows, but only 5 will be parsed because of the `maxRows` option.
+
+```javascript
+const rows = [
+ 'header1,header2\n',
+ 'col1,col1\n',
+ 'col2,col2\n',
+ 'col3,col3\n',
+ 'col4,col4\n',
+ 'col5,col5\n',
+ 'col6,col6\n',
+ 'col7,col7\n',
+ 'col8,col8\n',
+ 'col9,col9\n',
+ 'col10,col10',
+];
+
+const stream = csv
+ .parse({ headers: true, maxRows: 5 })
+ .on('error', error => console.error(error))
+ .on('data', row => console.log(row))
+ .on('end', rowCount => console.log(`Parsed ${rowCount} rows`));
+
+rows.forEach(row => stream.write(row));
+stream.end();
+```
+
+Expected output
+
+```
+{ header1: 'col1', header2: 'col1' }
+{ header1: 'col2', header2: 'col2' }
+{ header1: 'col3', header2: 'col3' }
+{ header1: 'col4', header2: 'col4' }
+{ header1: 'col5', header2: 'col5' }
+Parsed 5 rows
+```
+
diff --git a/examples/parsing/max_rows.example.js b/examples/parsing/max_rows.example.js
new file mode 100644
index 00000000..261f92bf
--- /dev/null
+++ b/examples/parsing/max_rows.example.js
@@ -0,0 +1,24 @@
+const csv = require('../../');
+
+const rows = [
+ 'header1,header2\n',
+ 'col1,col1\n',
+ 'col2,col2\n',
+ 'col3,col3\n',
+ 'col4,col4\n',
+ 'col5,col5\n',
+ 'col6,col6\n',
+ 'col7,col7\n',
+ 'col8,col8\n',
+ 'col9,col9\n',
+ 'col10,col10',
+];
+
+const stream = csv
+ .parse({ headers: true, maxRows: 5 })
+ .on('error', error => console.error(error))
+ .on('data', row => console.log(row))
+ .on('end', rowCount => console.log(`Parsed ${rowCount} rows`));
+
+rows.forEach(row => stream.write(row));
+stream.end();
diff --git a/src/parser/CsvParserStream.ts b/src/parser/CsvParserStream.ts
index b740f317..c4460c1d 100644
--- a/src/parser/CsvParserStream.ts
+++ b/src/parser/CsvParserStream.ts
@@ -31,6 +31,10 @@ export default class CsvParserStream extends Transform {
this.rowTransformerValidator = new RowTransformerValidator();
}
+ private get hasHitRowLimit(): boolean {
+ return this.parserOptions.limitRows && this.rowCount >= this.parserOptions.maxRows;
+ }
+
public transform(transformFunction: RowTransformFunction): CsvParserStream {
this.rowTransformerValidator.rowTransform = transformFunction;
return this;
@@ -54,23 +58,31 @@ export default class CsvParserStream extends Transform {
}
public _transform(data: Buffer, encoding: string, done: TransformCallback): void {
+ // if we have hit our maxRows parsing limit then skip parsing
+ if (this.hasHitRowLimit) {
+ return done();
+ }
try {
const { lines } = this;
const newLine = lines + this.decoder.write(data);
const rows = this.parse(newLine, true);
- this.processRows(rows, done);
+ return this.processRows(rows, done);
} catch (e) {
- done(e);
+ return done(e);
}
}
public _flush(done: TransformCallback): void {
+ // if we have hit our maxRows parsing limit then skip parsing
+ if (this.hasHitRowLimit) {
+ return done();
+ }
try {
const newLine = this.lines + this.decoder.end();
const rows = this.parse(newLine, false);
- this.processRows(rows, done);
+ return this.processRows(rows, done);
} catch (e) {
- done(e);
+ return done(e);
}
}
@@ -86,7 +98,9 @@ export default class CsvParserStream extends Transform {
private processRows(rows: string[][], cb: TransformCallback): void {
const rowsLength = rows.length;
const iterate = (i: number): void => {
- if (i >= rowsLength) {
+ // if we have emitted all rows or we have hit the maxRows limit option
+ // then end
+ if (i >= rowsLength || this.hasHitRowLimit) {
return cb();
}
const row = rows[i];
diff --git a/src/parser/ParserOptions.ts b/src/parser/ParserOptions.ts
index a3b2c562..6db33b29 100644
--- a/src/parser/ParserOptions.ts
+++ b/src/parser/ParserOptions.ts
@@ -16,6 +16,7 @@ export interface ParserOptionsArgs {
ltrim?: boolean;
rtrim?: boolean;
encoding?: string;
+ maxRows?: number;
}
export class ParserOptions {
@@ -57,6 +58,10 @@ export class ParserOptions {
public readonly encoding: string = 'utf8';
+ public readonly limitRows: boolean = false;
+
+ public readonly maxRows: number = 0;
+
public constructor(opts?: ParserOptionsArgs) {
Object.assign(this, opts || {});
if (this.delimiter.length > 1) {
@@ -66,5 +71,9 @@ export class ParserOptions {
this.escapeChar = this.escape ?? this.quote;
this.supportsComments = !isNil(this.comment);
this.NEXT_TOKEN_REGEXP = new RegExp(`([^\\s]|\\r\\n|\\n|\\r|${this.escapedDelimiter})`);
+
+ if (this.maxRows > 0) {
+ this.limitRows = true;
+ }
}
}
diff --git a/test/parser/CsvParsingStream.test.ts b/test/parser/CsvParsingStream.test.ts
index b4018d14..4a5ba5dc 100644
--- a/test/parser/CsvParsingStream.test.ts
+++ b/test/parser/CsvParsingStream.test.ts
@@ -267,6 +267,24 @@ describe('CsvParserStream', () => {
});
});
+ describe('maxRows', () => {
+ it('should parse up to the specified number of maxRows', () => {
+ const maxRows = 3;
+ parseContentAndCollect(assets.withHeaders, { headers: true, maxRows }).then(({ count, rows }) => {
+ assert.deepStrictEqual(rows, assets.withHeaders.parsed.slice(0, maxRows));
+ assert.strictEqual(count, maxRows);
+ });
+ });
+
+ it('should parse all rows if maxRows === 0', () => {
+ const maxRows = 0;
+ parseContentAndCollect(assets.withHeaders, { headers: true, maxRows }).then(({ count, rows }) => {
+ assert.deepStrictEqual(rows, assets.withHeaders.parsed);
+ assert.strictEqual(count, rows.length);
+ });
+ });
+ });
+
it('should emit an error for malformed rows', next => {
assets.write(assets.malformed);
const stream = csv.parseFile(assets.malformed.path, { headers: true });
diff --git a/test/parser/ParserOptions.test.ts b/test/parser/ParserOptions.test.ts
index c3bea9b5..98e828b0 100644
--- a/test/parser/ParserOptions.test.ts
+++ b/test/parser/ParserOptions.test.ts
@@ -163,4 +163,24 @@ describe('ParserOptions', () => {
assert.strictEqual(createOptions({ renameHeaders: false }).renameHeaders, false);
});
});
+
+ describe('#maxRows', () => {
+ it('should default maxRows 0 and limitRows to false', () => {
+ const opts = createOptions();
+ assert.strictEqual(opts.maxRows, 0);
+ assert.strictEqual(opts.limitRows, false);
+ });
+
+ it('should set maxRows to the provided option and limitRows to true if maxRows > 0', () => {
+ const opts = createOptions({ maxRows: 1 });
+ assert.strictEqual(opts.maxRows, 1);
+ assert.strictEqual(opts.limitRows, true);
+ });
+
+ it('should set maxRows to the provided option and limitRows to true if maxRows === 0', () => {
+ const opts = createOptions({ maxRows: 0 });
+ assert.strictEqual(opts.maxRows, 0);
+ assert.strictEqual(opts.limitRows, false);
+ });
+ });
});