firefly-import-preprocessor/src/CsvReader.php
2026-05-04 00:23:02 +02:00

184 lines
5.1 KiB
PHP

<?php
namespace UbsCsvTransformer;
/**
* Reads and parses CSV files
*
* Reads CSV files with a configurable delimiter and separates
* metadata lines from the actual data rows.
*/
class CsvReader
{
private string $filePath;
private string $delimiter;
private int $headerLine;
private bool $hasBom;
/**
* @param string $filePath Path to the CSV file
* @param array $csvStructure CSV structure from configuration
*/
public function __construct(string $filePath, array $csvStructure)
{
$this->filePath = $filePath;
$this->delimiter = $csvStructure['inputDelimiter'] ?? ';';
$this->headerLine = $csvStructure['headerLine'] ?? 1;
$this->hasBom = $csvStructure['hasBom'] ?? false;
}
/**
* Reads all lines from the file
*
* @param int $maxLines Maximum number of lines (0 = all)
* @return array Array of lines (without newlines)
* @throws \RuntimeException if file cannot be read
*/
public function readLines(int $maxLines = 0): array
{
if (!file_exists($this->filePath) || !is_readable($this->filePath)) {
throw new \RuntimeException("Could not read file: {$this->filePath}");
}
$lines = file($this->filePath, FILE_IGNORE_NEW_LINES);
if ($lines === false) {
throw new \RuntimeException("Could not read file: {$this->filePath}");
}
// Remove BOM if present
if ($this->hasBom && !empty($lines)) {
$lines[0] = $this->removeBom($lines[0]);
}
if ($maxLines > 0 && count($lines) > $maxLines) {
$lines = array_slice($lines, 0, $maxLines);
}
return $lines;
}
/**
* Reads the metadata lines (before the header line)
*
* @return array Array of metadata lines
*/
public function readMetadataLines(): array
{
$lines = $this->readLines();
if ($this->headerLine <= 1) {
return [];
}
return array_slice($lines, 0, $this->headerLine - 1);
}
/**
* Reads CSV data with headers
*
* @param int $maxDataRows Maximum number of data rows (0 = all)
* @return array Array of associative arrays (with column names as keys)
* @throws \RuntimeException if header line is not found
*/
public function readCsvData(int $maxDataRows = 0): array
{
$lines = $this->readLines();
if ($this->headerLine > count($lines)) {
throw new \RuntimeException("Header line {$this->headerLine} not found in file with " . count($lines) . " lines");
}
// Parse header
$headerLineContent = $lines[$this->headerLine - 1];
$headers = str_getcsv($headerLineContent, $this->delimiter, '"', '\\');
$headers = array_map(static fn(?string $v): string => trim($v ?? ''), $headers);
// Parse data rows
$data = [];
$dataStartLine = $this->headerLine; // 0-based
$lineCount = 0;
for ($i = $dataStartLine; $i < count($lines); $i++) {
if ($maxDataRows > 0 && $lineCount >= $maxDataRows) {
break;
}
$lineContent = $lines[$i];
// Skip empty lines
if (trim($lineContent) === '') {
continue;
}
$row = str_getcsv($lineContent, $this->delimiter, '"', '\\');
$row = array_map(static fn(?string $v): string => trim($v ?? ''), $row);
// Combine row with header keys
$rowData = [];
foreach ($headers as $index => $header) {
$rowData[$header] = $row[$index] ?? '';
}
$data[] = $rowData;
$lineCount++;
}
return $data;
}
/**
* Returns the column headers
*
* @return array Array of column names
* @throws \RuntimeException if header line is not found
*/
public function getHeaders(): array
{
$lines = $this->readLines();
if ($this->headerLine > count($lines)) {
throw new \RuntimeException("Header line {$this->headerLine} not found");
}
$headerLineContent = $lines[$this->headerLine - 1];
$headers = str_getcsv($headerLineContent, $this->delimiter, '"', '\\');
return array_map(static fn(?string $v): string => trim($v ?? ''), $headers);
}
/**
* Removes UTF-8 BOM (Byte Order Mark) from string
*
* @param string $text String with potential BOM
* @return string String without BOM
*/
private function removeBom(string $text): string
{
if (str_starts_with($text, "\xEF\xBB\xBF")) {
return substr($text, 3);
}
return $text;
}
/**
* Returns the total number of lines in the file
*
* @return int Number of lines
*/
public function countLines(): int
{
return count($this->readLines());
}
/**
* Returns the number of data rows (excluding header and metadata)
*
* @return int Number of data rows
*/
public function countDataRows(): int
{
return count($this->readCsvData());
}
}