From b133cfb63421075671fb3033d0321b94a1a74bc4 Mon Sep 17 00:00:00 2001 From: Bhuvaneshwara Raja Date: Sun, 24 Sep 2023 20:55:35 +0530 Subject: [PATCH] Skip lines future (Feature Suggestion) #738 --- docs/docs.html | 11 ++++++++- papaparse.js | 20 +++++++++++++++-- player/player.html | 1 + player/player.js | 1 + tests/test-cases.js | 54 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 84 insertions(+), 3 deletions(-) diff --git a/docs/docs.html b/docs/docs.html index 49095b17..b9292fad 100644 --- a/docs/docs.html +++ b/docs/docs.html @@ -450,7 +450,8 @@
Default Config With All Options
beforeFirstChunk: undefined, withCredentials: undefined, transform: undefined, - delimitersToGuess: [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP] + delimitersToGuess: [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP], + skipFirstNLines: 0 }
@@ -682,6 +683,14 @@
Config Options
An array of delimiters to guess from if the delimiter option is not set. + + + skipFirstNLines + + + To skip first N number of lines when converting a CSV file to JSON + + diff --git a/papaparse.js b/papaparse.js index 3ce39621..ff110692 100755 --- a/papaparse.js +++ b/papaparse.js @@ -484,6 +484,10 @@ License: MIT } } + function customSplice(inputArray, startIndex, count) { + return [...inputArray.slice(0, startIndex), ...inputArray.slice(startIndex + count)]; + } + /** ChunkStreamer is the base prototype for various streamer implementations. */ function ChunkStreamer(config) { @@ -519,8 +523,21 @@ License: MIT // Rejoin the line we likely just split in two by chunking the file var aggregate = this._partialLine + chunk; + this._skipLines = parseInt(this._config.skipFirstNLines) || 0; + this._pendingSkip = parseInt(this._config.skipFirstNLines) || 0; + this._skipHeader = this._config.header ? 1 : 0; + if (this._pendingSkip > 0 && this._pendingSkip <= this._skipLines) { + var splitChunk = aggregate.split('\n'); + var currentChunkLength = splitChunk.length; + if (currentChunkLength <= this._pendingSkip) { + aggregate = this._partialLine; + } + else{ + aggregate = this._partialLine + customSplice(splitChunk,this._skipHeader,this._pendingSkip).join('\n'); + } + this._pendingSkip = this._skipLines - currentChunkLength; + } this._partialLine = ''; - var results = this._handle.parse(aggregate, this._baseIndex, !this._finished); if (this._handle.paused() || this._handle.aborted()) { @@ -1929,7 +1946,6 @@ License: MIT { return function() { f.apply(self, arguments); }; } - function isFunction(func) { return typeof func === 'function'; diff --git a/player/player.html b/player/player.html index b90fe986..48015e35 100644 --- a/player/player.html +++ b/player/player.html @@ -24,6 +24,7 @@

Papa Parse Player

+ diff --git a/player/player.js b/player/player.js index 8150de64..f8b8e3ac 100644 --- a/player/player.js +++ b/player/player.js @@ -108,6 +108,7 @@ function buildConfig() skipEmptyLines: $('#skipEmptyLines').prop('checked'), chunk: $('#chunk').prop('checked') ? chunkFn : undefined, beforeFirstChunk: undefined, + skipFirstNLines: $('#skipFirstNLines').val() }; function getLineEnding() diff --git a/tests/test-cases.js b/tests/test-cases.js index da489e8c..66a6f090 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -1574,6 +1574,60 @@ var PARSE_TESTS = [ data: [['a', 'b', 'c\n'], ['d', 'e', 'f']], errors: [] } + }, + { + description: "Skip First N number of lines , with header and 2 rows", + input: 'a,b,c,d\n1,2,3,4', + config: { header: true, skipFirstNLines: 1 }, + expected: { + data: [], + errors: [] + } + }, + { + description: "Skip First N number of lines , with header and 3 rows", + input: 'a,b,c,d\n1,2,3,4\n4,5,6,7', + config: { header: true, skipFirstNLines: 1 }, + expected: { + data: [{a: '4', b: '5', c: '6', d: '7'}], + errors: [] + } + }, + { + description: "Skip First N number of lines , with header false", + input: 'a,b,c,d\n1,2,3,4\n4,5,6,7', + config: { header: false, skipFirstNLines: 1 }, + expected: { + data: [['1','2','3','4'],['4','5','6','7']], + errors: [] + } + }, + { + description: "Skip First N number of lines , with header false and skipFirstNLines as 0", + input: 'a,b,c,d\n1,2,3,4\n4,5,6,7', + config: { header: false, skipFirstNLines: 0 }, + expected: { + data: [['a','b','c','d'],['1','2','3','4'],['4','5','6','7']], + errors: [] + } + }, + { + description: "Skip First N number of lines , with header false and skipFirstNLines as negative value", + input: 'a,b,c,d\n1,2,3,4\n4,5,6,7', + config: { header: false, skipFirstNLines: -2 }, + expected: { + data: [['a','b','c','d'],['1','2','3','4'],['4','5','6','7']], + errors: [] + } + }, + { + description: "Without Skip First N number of lines", + input: 'a,b,c,d\n1,2,3,4\n4,5,6,7', + config: { header: false}, + expected: { + data: [['a','b','c','d'],['1','2','3','4'],['4','5','6','7']], + errors: [] + } } ];