Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pr277 #294

Merged
merged 3 commits into from
Dec 15, 2019
Merged

Pr277 #294

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions History.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# v3.5.1

* [ADDED] `maxRows` option to limit the number of rows parsed. [#275](https://github.com/C2FO/fast-csv/issues/275) [#277](https://github.com/C2FO/fast-csv/pull/277) - [@cbrittingham](https://github.com/cbrittingham)

# v3.5.0

* Upgraded dependencies
Expand Down
4 changes: 3 additions & 1 deletion benchmark/.eslintrc.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
module.exports = {
parserOptions: {
project: null,
},
rules: {
"no-console": 0,
"@typescript-eslint/no-var-requires": 0
},
};
20 changes: 10 additions & 10 deletions benchmark/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ const path = require('path');
const fs = require('fs');
const fastCsv = require('..');


function camelize(str) {
return str.replace(/_(.)/g, (a, b) => b.toUpperCase());
}
Expand All @@ -11,7 +10,7 @@ const promisfyStream = (stream, expectedRows) => {
let count = 0;
return new Promise((res, rej) => {
stream
.on('data', (row) => {
.on('data', row => {
count += 1;
})
.on('end', () => {
Expand All @@ -25,13 +24,14 @@ const promisfyStream = (stream, expectedRows) => {
});
};

const benchmarkFastCsv = type => (num) => {
const benchmarkFastCsv = type => num => {
const file = path.resolve(__dirname, `./assets/${num}.${type}.csv`);
const stream = fs.createReadStream(file)
.pipe(fastCsv.parse({ headers: true }))
.transform((data) => {
const stream = fs
.createReadStream(file)
.pipe(fastCsv.parse({ headers: true, maxRows: 10 }))
.transform(data => {
const ret = {};
[ 'first_name', 'last_name', 'email_address' ].forEach((prop) => {
['first_name', 'last_name', 'email_address'].forEach(prop => {
ret[camelize(prop)] = data[prop];
});
ret.address = data.address;
Expand All @@ -47,15 +47,15 @@ async function benchmarkRun(title, num, m) {
for (let i = 0; i < howMany; i += 1) {
// eslint-disable-next-line no-await-in-loop
await m(num);
console.log('%s: RUN(%d lines) 1 %dms', title, num, (new Date() - runStart));
console.log('%s: RUN(%d lines) 1 %dms', title, num, new Date() - runStart);
runStart = new Date();
}
console.log('%s: 3xAVG for %d lines %dms', title, num, (new Date() - start) / howMany);
}

function runBenchmarks(num, type) {
console.log(`\nRUNNING ${num}.${type}.csv benchmarks`, num);
return benchmarkRun('fast-csv', num, benchmarkFastCsv(type))
return benchmarkRun('fast-csv', num, benchmarkFastCsv(type));
}

function benchmarks(type) {
Expand All @@ -67,7 +67,7 @@ function benchmarks(type) {
benchmarks('nonquoted')
.then(() => benchmarks('quoted'))
.then(() => process.exit())
.catch((e) => {
.catch(e => {
console.error(e.stack);
return process.exit(1);
});
43 changes: 43 additions & 0 deletions docs/parsing.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* [Ignoring Empty Rows](#csv-parse-ignoring-empty-rows)
* [Transforming Rows](#csv-parse-transforming)
* [Validating Rows](#csv-parse-validation)
* [Max Rows](#max-rows)

<a name="parsing-options"></a>
## Options
Expand Down Expand Up @@ -45,6 +46,7 @@
* `rtrim: {boolean} = false`: Set to `true` to right trim all fields.
* `ltrim: {boolean} = false`: Set to `true` to left trim all fields.
* `encoding: {string} = 'utf8'`: Passed to [StringDecoder](https://nodejs.org/api/string_decoder.html#string_decoder_new_stringdecoder_encoding) when decoding incoming buffers. Change if incoming content is not 'utf8' encoded.
- `maxRows: {number}`: If number is `> 0` the specified number of rows will be parsed.(e.g. `100` would return the first 100 rows of data).

<a name="parsing-events"></a>
## Events
Expand Down Expand Up @@ -585,3 +587,44 @@ Valid [row={"firstName":"timmy","lastName":"yukon"}]
Parsed 2 rows
```

<a name="max-rows"></a>
[`examples/parsing/max_rows.example.example.js`](../examples/parsing/max_rows.example.js)

In the following example there are 10 rows, but only 5 will be parsed because of the `maxRows` option.

```javascript
const rows = [
'header1,header2\n',
'col1,col1\n',
'col2,col2\n',
'col3,col3\n',
'col4,col4\n',
'col5,col5\n',
'col6,col6\n',
'col7,col7\n',
'col8,col8\n',
'col9,col9\n',
'col10,col10',
];

const stream = csv
.parse({ headers: true, maxRows: 5 })
.on('error', error => console.error(error))
.on('data', row => console.log(row))
.on('end', rowCount => console.log(`Parsed ${rowCount} rows`));

rows.forEach(row => stream.write(row));
stream.end();
```

Expected output

```
{ header1: 'col1', header2: 'col1' }
{ header1: 'col2', header2: 'col2' }
{ header1: 'col3', header2: 'col3' }
{ header1: 'col4', header2: 'col4' }
{ header1: 'col5', header2: 'col5' }
Parsed 5 rows
```

24 changes: 24 additions & 0 deletions examples/parsing/max_rows.example.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const csv = require('../../');

const rows = [
'header1,header2\n',
'col1,col1\n',
'col2,col2\n',
'col3,col3\n',
'col4,col4\n',
'col5,col5\n',
'col6,col6\n',
'col7,col7\n',
'col8,col8\n',
'col9,col9\n',
'col10,col10',
];

const stream = csv
.parse({ headers: true, maxRows: 5 })
.on('error', error => console.error(error))
.on('data', row => console.log(row))
.on('end', rowCount => console.log(`Parsed ${rowCount} rows`));

rows.forEach(row => stream.write(row));
stream.end();
24 changes: 19 additions & 5 deletions src/parser/CsvParserStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ export default class CsvParserStream extends Transform {
this.rowTransformerValidator = new RowTransformerValidator();
}

private get hasHitRowLimit(): boolean {
return this.parserOptions.limitRows && this.rowCount >= this.parserOptions.maxRows;
}

public transform(transformFunction: RowTransformFunction): CsvParserStream {
this.rowTransformerValidator.rowTransform = transformFunction;
return this;
Expand All @@ -54,23 +58,31 @@ export default class CsvParserStream extends Transform {
}

public _transform(data: Buffer, encoding: string, done: TransformCallback): void {
// if we have hit our maxRows parsing limit then skip parsing
if (this.hasHitRowLimit) {
return done();
}
try {
const { lines } = this;
const newLine = lines + this.decoder.write(data);
const rows = this.parse(newLine, true);
this.processRows(rows, done);
return this.processRows(rows, done);
} catch (e) {
done(e);
return done(e);
}
}

public _flush(done: TransformCallback): void {
// if we have hit our maxRows parsing limit then skip parsing
if (this.hasHitRowLimit) {
return done();
}
try {
const newLine = this.lines + this.decoder.end();
const rows = this.parse(newLine, false);
this.processRows(rows, done);
return this.processRows(rows, done);
} catch (e) {
done(e);
return done(e);
}
}

Expand All @@ -86,7 +98,9 @@ export default class CsvParserStream extends Transform {
private processRows(rows: string[][], cb: TransformCallback): void {
const rowsLength = rows.length;
const iterate = (i: number): void => {
if (i >= rowsLength) {
// if we have emitted all rows or we have hit the maxRows limit option
// then end
if (i >= rowsLength || this.hasHitRowLimit) {
return cb();
}
const row = rows[i];
Expand Down
9 changes: 9 additions & 0 deletions src/parser/ParserOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export interface ParserOptionsArgs {
ltrim?: boolean;
rtrim?: boolean;
encoding?: string;
maxRows?: number;
}

export class ParserOptions {
Expand Down Expand Up @@ -57,6 +58,10 @@ export class ParserOptions {

public readonly encoding: string = 'utf8';

public readonly limitRows: boolean = false;

public readonly maxRows: number = 0;

public constructor(opts?: ParserOptionsArgs) {
Object.assign(this, opts || {});
if (this.delimiter.length > 1) {
Expand All @@ -66,5 +71,9 @@ export class ParserOptions {
this.escapeChar = this.escape ?? this.quote;
this.supportsComments = !isNil(this.comment);
this.NEXT_TOKEN_REGEXP = new RegExp(`([^\\s]|\\r\\n|\\n|\\r|${this.escapedDelimiter})`);

if (this.maxRows > 0) {
this.limitRows = true;
}
}
}
18 changes: 18 additions & 0 deletions test/parser/CsvParsingStream.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,24 @@ describe('CsvParserStream', () => {
});
});

describe('maxRows', () => {
it('should parse up to the specified number of maxRows', () => {
const maxRows = 3;
parseContentAndCollect(assets.withHeaders, { headers: true, maxRows }).then(({ count, rows }) => {
assert.deepStrictEqual(rows, assets.withHeaders.parsed.slice(0, maxRows));
assert.strictEqual(count, maxRows);
});
});

it('should parse all rows if maxRows === 0', () => {
const maxRows = 0;
parseContentAndCollect(assets.withHeaders, { headers: true, maxRows }).then(({ count, rows }) => {
assert.deepStrictEqual(rows, assets.withHeaders.parsed);
assert.strictEqual(count, rows.length);
});
});
});

it('should emit an error for malformed rows', next => {
assets.write(assets.malformed);
const stream = csv.parseFile(assets.malformed.path, { headers: true });
Expand Down
20 changes: 20 additions & 0 deletions test/parser/ParserOptions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,24 @@ describe('ParserOptions', () => {
assert.strictEqual(createOptions({ renameHeaders: false }).renameHeaders, false);
});
});

describe('#maxRows', () => {
it('should default maxRows 0 and limitRows to false', () => {
const opts = createOptions();
assert.strictEqual(opts.maxRows, 0);
assert.strictEqual(opts.limitRows, false);
});

it('should set maxRows to the provided option and limitRows to true if maxRows > 0', () => {
const opts = createOptions({ maxRows: 1 });
assert.strictEqual(opts.maxRows, 1);
assert.strictEqual(opts.limitRows, true);
});

it('should set maxRows to the provided option and limitRows to true if maxRows === 0', () => {
const opts = createOptions({ maxRows: 0 });
assert.strictEqual(opts.maxRows, 0);
assert.strictEqual(opts.limitRows, false);
});
});
});