Skip to content

Commit

Permalink
Fixes parsing CSV files with BOM
Browse files Browse the repository at this point in the history
  • Loading branch information
mmenozzi committed Feb 15, 2019
1 parent 00d725f commit 825a276
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 2 deletions.
7 changes: 5 additions & 2 deletions src/Iterator.php
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,11 @@ private function attachEmitter()
$this->emitter->fail(
new \LogicException(
sprintf(
'Invalid number of columns at line %d of given CSV file.',
$this->csvParser->getRowsParsed()
'Invalid number of columns at line %d of given CSV file. Header has %d columns, ' .
'this line %d columns.',
$this->csvParser->getRowsParsed(),
\count($header),
\count($row)
)
)
);
Expand Down
18 changes: 18 additions & 0 deletions src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,17 @@ public function __construct(
public function parseRow(): Promise
{
return call(function () {
$isFirstRead = $this->fileHandle->tell() === 0;
if ($this->fileHandle->eof()) {
return null;
}
$buffer = '';
$newLinePos = null;
while ($chunk = yield $this->fileHandle->read()) {
if ($isFirstRead) {
$chunk = $this->removeBom($chunk);
}
$isFirstRead = false;
$buffer .= $chunk;
$newLinePos = strpos($buffer, PHP_EOL);
if ($newLinePos !== false) {
Expand Down Expand Up @@ -85,4 +90,17 @@ public function getRowsParsed(): int
{
return $this->rowsParsed;
}

/**
* @param $chunk
* @return string
*/
private function removeBom(string $chunk): string
{
$bom = pack('CCC', 0xEF, 0xBB, 0xBF);
if (strpos($chunk, $bom) === 0) {
$chunk = (string)substr($chunk, 3);
}
return $chunk;
}
}
17 changes: 17 additions & 0 deletions tests/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,21 @@ public function testParseFileWithEscapedCharactersInValues()
$this->assertEquals('Algeria', $rows[2][0]);
$this->assertEquals($expectedAlgeriaDescription, $rows[2][1]);
}

public function testParseFileWithBom()
{
$rows = [];
Loop::run(function () use (&$rows) {
$parser = new Parser(yield File\open(__DIR__ . '/file-with-bom.csv', 'rb'), ';');
while ($row = yield $parser->parseRow()) {
$rows[] = $row;
}
});
$this->assertCount(3, $rows);
$this->assertEquals(
'sku',
$rows[0][0],
'It appears that there is a BOM because the first field of the first row doesn\'t match the expected value.'
);
}
}
3 changes: 3 additions & 0 deletions tests/file-with-bom.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
sku;name
1;first
2;second

0 comments on commit 825a276

Please sign in to comment.