184 lines
5.1 KiB
PHP
184 lines
5.1 KiB
PHP
<?php
|
|
|
|
namespace UbsCsvTransformer;
|
|
|
|
/**
|
|
* Reads and parses CSV files
|
|
*
|
|
* Reads CSV files with a configurable delimiter and separates
|
|
* metadata lines from the actual data rows.
|
|
*/
|
|
class CsvReader
|
|
{
|
|
private string $filePath;
|
|
private string $delimiter;
|
|
private int $headerLine;
|
|
private bool $hasBom;
|
|
|
|
/**
|
|
* @param string $filePath Path to the CSV file
|
|
* @param array $csvStructure CSV structure from configuration
|
|
*/
|
|
public function __construct(string $filePath, array $csvStructure)
|
|
{
|
|
$this->filePath = $filePath;
|
|
$this->delimiter = $csvStructure['inputDelimiter'] ?? ';';
|
|
$this->headerLine = $csvStructure['headerLine'] ?? 1;
|
|
$this->hasBom = $csvStructure['hasBom'] ?? false;
|
|
}
|
|
|
|
/**
|
|
* Reads all lines from the file
|
|
*
|
|
* @param int $maxLines Maximum number of lines (0 = all)
|
|
* @return array Array of lines (without newlines)
|
|
* @throws \RuntimeException if file cannot be read
|
|
*/
|
|
public function readLines(int $maxLines = 0): array
|
|
{
|
|
if (!file_exists($this->filePath) || !is_readable($this->filePath)) {
|
|
throw new \RuntimeException("Could not read file: {$this->filePath}");
|
|
}
|
|
|
|
$lines = file($this->filePath, FILE_IGNORE_NEW_LINES);
|
|
|
|
if ($lines === false) {
|
|
throw new \RuntimeException("Could not read file: {$this->filePath}");
|
|
}
|
|
|
|
// Remove BOM if present
|
|
if ($this->hasBom && !empty($lines)) {
|
|
$lines[0] = $this->removeBom($lines[0]);
|
|
}
|
|
|
|
if ($maxLines > 0 && count($lines) > $maxLines) {
|
|
$lines = array_slice($lines, 0, $maxLines);
|
|
}
|
|
|
|
return $lines;
|
|
}
|
|
|
|
/**
|
|
* Reads the metadata lines (before the header line)
|
|
*
|
|
* @return array Array of metadata lines
|
|
*/
|
|
public function readMetadataLines(): array
|
|
{
|
|
$lines = $this->readLines();
|
|
|
|
if ($this->headerLine <= 1) {
|
|
return [];
|
|
}
|
|
|
|
return array_slice($lines, 0, $this->headerLine - 1);
|
|
}
|
|
|
|
/**
|
|
* Reads CSV data with headers
|
|
*
|
|
* @param int $maxDataRows Maximum number of data rows (0 = all)
|
|
* @return array Array of associative arrays (with column names as keys)
|
|
* @throws \RuntimeException if header line is not found
|
|
*/
|
|
public function readCsvData(int $maxDataRows = 0): array
|
|
{
|
|
$lines = $this->readLines();
|
|
|
|
if ($this->headerLine > count($lines)) {
|
|
throw new \RuntimeException("Header line {$this->headerLine} not found in file with " . count($lines) . " lines");
|
|
}
|
|
|
|
// Parse header
|
|
$headerLineContent = $lines[$this->headerLine - 1];
|
|
$headers = str_getcsv($headerLineContent, $this->delimiter, '"', '\\');
|
|
$headers = array_map(static fn(?string $v): string => trim($v ?? ''), $headers);
|
|
|
|
// Parse data rows
|
|
$data = [];
|
|
$dataStartLine = $this->headerLine; // 0-based
|
|
$lineCount = 0;
|
|
|
|
for ($i = $dataStartLine; $i < count($lines); $i++) {
|
|
if ($maxDataRows > 0 && $lineCount >= $maxDataRows) {
|
|
break;
|
|
}
|
|
|
|
$lineContent = $lines[$i];
|
|
|
|
// Skip empty lines
|
|
if (trim($lineContent) === '') {
|
|
continue;
|
|
}
|
|
|
|
$row = str_getcsv($lineContent, $this->delimiter, '"', '\\');
|
|
$row = array_map(static fn(?string $v): string => trim($v ?? ''), $row);
|
|
|
|
// Combine row with header keys
|
|
$rowData = [];
|
|
foreach ($headers as $index => $header) {
|
|
$rowData[$header] = $row[$index] ?? '';
|
|
}
|
|
|
|
$data[] = $rowData;
|
|
$lineCount++;
|
|
}
|
|
|
|
return $data;
|
|
}
|
|
|
|
/**
|
|
* Returns the column headers
|
|
*
|
|
* @return array Array of column names
|
|
* @throws \RuntimeException if header line is not found
|
|
*/
|
|
public function getHeaders(): array
|
|
{
|
|
$lines = $this->readLines();
|
|
|
|
if ($this->headerLine > count($lines)) {
|
|
throw new \RuntimeException("Header line {$this->headerLine} not found");
|
|
}
|
|
|
|
$headerLineContent = $lines[$this->headerLine - 1];
|
|
$headers = str_getcsv($headerLineContent, $this->delimiter, '"', '\\');
|
|
|
|
return array_map(static fn(?string $v): string => trim($v ?? ''), $headers);
|
|
}
|
|
|
|
/**
|
|
* Removes UTF-8 BOM (Byte Order Mark) from string
|
|
*
|
|
* @param string $text String with potential BOM
|
|
* @return string String without BOM
|
|
*/
|
|
private function removeBom(string $text): string
|
|
{
|
|
if (str_starts_with($text, "\xEF\xBB\xBF")) {
|
|
return substr($text, 3);
|
|
}
|
|
return $text;
|
|
}
|
|
|
|
/**
|
|
* Returns the total number of lines in the file
|
|
*
|
|
* @return int Number of lines
|
|
*/
|
|
public function countLines(): int
|
|
{
|
|
return count($this->readLines());
|
|
}
|
|
|
|
/**
|
|
* Returns the number of data rows (excluding header and metadata)
|
|
*
|
|
* @return int Number of data rows
|
|
*/
|
|
public function countDataRows(): int
|
|
{
|
|
return count($this->readCsvData());
|
|
}
|
|
}
|