Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor header renaming logic to adress #1052, #1007 #1058

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 43 additions & 50 deletions papaparse.js
Original file line number Diff line number Diff line change
Expand Up @@ -1486,61 +1486,14 @@ License: MIT
if (!input)
return returnable();

// Rename headers if there are duplicates
var firstLine;
if (config.header && !baseIndex)
{
firstLine = input.split(newline)[0];
var headers = firstLine.split(delim);
var separator = '_';
var headerMap = new Set();
var headerCount = {};
var duplicateHeaders = false;

// Using old-style 'for' loop to avoid prototype pollution that would be picked up with 'var j in headers'
for (var j = 0; j < headers.length; j++) {
var header = headers[j];
if (isFunction(config.transformHeader))
header = config.transformHeader(header, j);
var headerName = header;

var count = headerCount[header] || 0;
if (count > 0) {
duplicateHeaders = true;
headerName = header + separator + count;
// Initialise the variable if it hasn't been.
if (renamedHeaders === null) {
renamedHeaders = {};
}
}
headerCount[header] = count + 1;
// In case it already exists, we add more separators
while (headerMap.has(headerName)) {
headerName = headerName + separator + count;
}
headerMap.add(headerName);
if (count > 0) {
renamedHeaders[headerName] = header;
}
}
if (duplicateHeaders) {
var editedInput = input.split(newline);
editedInput[0] = Array.from(headerMap).join(delim);
input = editedInput.join(newline);
}
}
if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
{
var rows = input.split(newline);
for (var i = 0; i < rows.length; i++)
{
row = rows[i];
// use firstline as row length may be changed due to duplicated headers
if (i === 0 && firstLine !== undefined) {
cursor += firstLine.length;
}else{
cursor += row.length;
}
cursor += row.length;

if (i !== rows.length - 1)
cursor += newline.length;
else if (ignoreLastRow)
Expand Down Expand Up @@ -1735,7 +1688,6 @@ License: MIT
break;
}


return finish();


Expand Down Expand Up @@ -1795,6 +1747,47 @@ License: MIT
/** Returns an object with the results, errors, and meta. */
function returnable(stopped)
{
if (config.header && !baseIndex && data.length)
{
const result = data[0];
const headerCount = {}; // To track the count of each base header
const usedHeaders = new Set(result); // To track used headers and avoid duplicates
let duplicateHeaders = false;

for (let i = 0; i < result.length; i++) {
let header = result[i];
if (isFunction(config.transformHeader))
header = config.transformHeader(header, i);

if (!headerCount[header]) {
headerCount[header] = 1;
result[i] = header;
} else {
let newHeader;
let suffixCount = headerCount[header];

// Find a unique new header
do {
newHeader = `${header}_${suffixCount}`;
suffixCount++;
} while (usedHeaders.has(newHeader));

usedHeaders.add(newHeader); // Mark this new Header as used
result[i] = newHeader;
headerCount[header]++;
duplicateHeaders = true;
if (renamedHeaders === null) {
renamedHeaders = {};
}
renamedHeaders[newHeader] = header;
}

usedHeaders.add(header); // Ensure the original header is marked as used
}
if (duplicateHeaders) {
console.warn('Duplicate headers found and renamed.');
}
}
return {
data: data,
errors: errors,
Expand Down
4 changes: 2 additions & 2 deletions tests/test-cases.js
Original file line number Diff line number Diff line change
Expand Up @@ -618,10 +618,10 @@ var CORE_PARSER_TESTS = [
input: 'c,c,c,c_1\n1,2,3,4',
config: { header: true },
expected: {
data: [['c', 'c_1', 'c_2', 'c_1_0'], ['1', '2', '3', '4']],
data: [['c', 'c_2', 'c_3', 'c_1'], ['1', '2', '3', '4']],
errors: [],
meta: {
renamedHeaders: {c_1: 'c', c_2: 'c'},
renamedHeaders: {c_2: 'c', c_3: 'c'},
cursor: 17
}
}
Expand Down