382 lines
13 KiB
PHP
382 lines
13 KiB
PHP
<?php
|
|
|
|
namespace UbsCsvTransformer;
|
|
|
|
use UbsCsvTransformer\CsvReader;
|
|
use UbsCsvTransformer\CsvWriter;
|
|
use UbsCsvTransformer\ConfigurationLoader;
|
|
use UbsCsvTransformer\MetadataExtractor;
|
|
use UbsCsvTransformer\ColumnTransformer;
|
|
use UbsCsvTransformer\FireflyImporter;
|
|
use UbsCsvTransformer\RowFilter;
|
|
|
|
/**
|
|
* Orchestrates the complete CSV transformation pipeline
|
|
*
|
|
* Coordinates all steps from reading the CSV through metadata extraction
|
|
* and column transformation to output and optional import into Firefly III.
|
|
*
|
|
* @property ConfigurationLoader $configLoader Manages configuration
|
|
* @property CsvWriter $csvWriter Writes output CSV
|
|
* @property MetadataExtractor $metadataExtractor Extracts metadata from header
|
|
* @property ColumnTransformer $columnTransformer Transforms columns
|
|
* @property array $csvStructure CSV structure configuration
|
|
*/
|
|
class TransformerEngine
|
|
{
|
|
private ConfigurationLoader $configLoader;
|
|
private CsvWriter $csvWriter;
|
|
private MetadataExtractor $metadataExtractor;
|
|
private ColumnTransformer $columnTransformer;
|
|
private array $csvStructure;
|
|
private array $sampleData = [];
|
|
private int $rowsProcessed = 0;
|
|
private bool $debugMode = false;
|
|
|
|
/**
|
|
* Initialises TransformerEngine with configuration
|
|
*
|
|
* Loads all required configurations and initialises
|
|
* the components (MetadataExtractor, ColumnTransformer, CsvWriter).
|
|
* CsvReader is instantiated later in transform() and validate() with the file path.
|
|
*
|
|
* @param ConfigurationLoader $configLoader Loads configuration files
|
|
* @param bool $debugMode true = enable debug mode
|
|
*
|
|
* @throws \RuntimeException if required configurations are missing
|
|
*/
|
|
public function __construct(ConfigurationLoader $configLoader, bool $debugMode = false)
|
|
{
|
|
$this->configLoader = $configLoader;
|
|
$this->debugMode = $debugMode;
|
|
if ($debugMode) {
|
|
DebugLogger::enable();
|
|
}
|
|
|
|
$config = $configLoader->getAll();
|
|
|
|
$this->csvStructure = $config['csvStructure'] ?? [];
|
|
|
|
$this->metadataExtractor = new MetadataExtractor(
|
|
$config['metadata']['extractionRules'] ?? []
|
|
);
|
|
|
|
$this->columnTransformer = new ColumnTransformer(
|
|
$config['columnTransformations'] ?? [],
|
|
[],
|
|
$config['capitalizationExceptions'] ?? []
|
|
);
|
|
|
|
// Determine output file name from configuration
|
|
$outputDir = $config['directories']['output'] ?? './output';
|
|
$outputFileName = $config['csvStructure']['outputFilename'] ?? 'transformed.csv';
|
|
$outputFile = rtrim($outputDir, '/') . '/' . $outputFileName;
|
|
|
|
$this->csvWriter = new CsvWriter(
|
|
$outputFile,
|
|
$config['csvStructure'] ?? []
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Enables or disables debug mode
|
|
*
|
|
* @param bool $enabled true = debug mode enabled
|
|
* @return void
|
|
*/
|
|
public function setDebugMode(bool $enabled): void
|
|
{
|
|
$this->debugMode = $enabled;
|
|
if ($enabled) {
|
|
DebugLogger::enable();
|
|
} else {
|
|
DebugLogger::disable();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Transforms a CSV file
|
|
*
|
|
* Performs the following steps:
|
|
* 1. Read CSV file with CsvReader
|
|
* 2. Extract metadata from header
|
|
* 3. Transform columns according to configuration
|
|
* 4. Write data to output CSV
|
|
* 5. Collect sample data (maximum 3 rows or maxRows)
|
|
*
|
|
* The output file path is determined from the configuration and cannot be overridden.
|
|
*
|
|
* @param string $inputFile Path to the input CSV file
|
|
* @param int $maxRows Maximum number of data rows to transform (0 = all).
|
|
* Sample data is limited to min(3, maxRows)
|
|
*
|
|
* @return array Transformation result with:
|
|
* - success: bool (true = successful, false = error)
|
|
* - inputFile: string (input file path, on success only)
|
|
* - outputFile: string (output file path, on success only)
|
|
* - rowsProcessed: int (actually processed data rows)
|
|
* - sampleData: array (first sample rows, max 3 or maxRows)
|
|
* - metadata: array (extracted metadata, on success only)
|
|
* - outputColumns: int (number of output columns)
|
|
* - error: string (error message, on failure only)
|
|
*/
|
|
public function transform(string $inputFile, int $maxRows = 0): array
|
|
{
|
|
$this->sampleData = [];
|
|
$this->rowsProcessed = 0;
|
|
DebugLogger::reset();
|
|
|
|
try {
|
|
if ($this->debugMode) {
|
|
DebugLogger::log('transformer', 'Transformation started', [
|
|
'inputFile' => $inputFile,
|
|
'maxRows' => $maxRows
|
|
]);
|
|
}
|
|
|
|
// Validate input file
|
|
if (!file_exists($inputFile)) {
|
|
throw new \RuntimeException("Input file not found: {$inputFile}");
|
|
}
|
|
|
|
// Initialise CsvReader with file path and configuration
|
|
$csvReader = new CsvReader($inputFile, $this->csvStructure);
|
|
|
|
// Read metadata lines (before the header line)
|
|
$metadataLines = $csvReader->readMetadataLines();
|
|
|
|
// Extract metadata from the metadata lines
|
|
$metadata = $this->metadataExtractor->extract($metadataLines);
|
|
|
|
// Initialise ColumnTransformer with extracted metadata
|
|
$this->columnTransformer = new ColumnTransformer(
|
|
$this->configLoader->get('columnTransformations', []),
|
|
$metadata,
|
|
$this->configLoader->get('capitalizationExceptions', [])
|
|
);
|
|
|
|
// Read CSV data with header keys as array keys
|
|
$dataRows = $csvReader->readCsvData($maxRows);
|
|
if (empty($dataRows)) {
|
|
throw new \RuntimeException("No data rows in CSV file");
|
|
}
|
|
|
|
// Calculate limit for sample data
|
|
$sampleLimit = $maxRows == 0 ? 3 : $maxRows;
|
|
|
|
// Transform rows and collect them
|
|
$transformedData = [];
|
|
|
|
/** @var array<string, mixed>|null $skipIfNode */
|
|
$skipIfNode = $this->configLoader->get('skipIf', null);
|
|
|
|
foreach ($dataRows as $row) {
|
|
// Check if maxRows reached
|
|
if ($maxRows > 0 && $this->rowsProcessed >= $maxRows) {
|
|
break;
|
|
}
|
|
|
|
// Skip row if filter condition matches
|
|
if ($skipIfNode !== null && RowFilter::evaluate($skipIfNode, $row)) {
|
|
DebugLogger::log('transformer', 'Row skipped by skipIf filter', ['row' => $row]);
|
|
continue;
|
|
}
|
|
|
|
// Transform row
|
|
$transformedRow = $this->columnTransformer->transformRow($row);
|
|
$transformedData[] = $transformedRow;
|
|
|
|
// Save sample data
|
|
if (count($this->sampleData) < $sampleLimit) {
|
|
$this->sampleData[] = $transformedRow;
|
|
}
|
|
|
|
$this->rowsProcessed++;
|
|
}
|
|
|
|
// Remove columns to be excluded from the output
|
|
$excludeColumns = $this->csvStructure['excludeOutputColumns'] ?? [];
|
|
if (!empty($excludeColumns)) {
|
|
$excludeMap = array_flip($excludeColumns);
|
|
$transformedData = array_map(
|
|
static fn(array $row): array => array_diff_key($row, $excludeMap),
|
|
$transformedData
|
|
);
|
|
$this->sampleData = array_map(
|
|
static fn(array $row): array => array_diff_key($row, $excludeMap),
|
|
$this->sampleData
|
|
);
|
|
}
|
|
|
|
// Write all transformed data to output CSV
|
|
$this->csvWriter->write($transformedData);
|
|
|
|
$result = [
|
|
'success' => true,
|
|
'inputFile' => $inputFile,
|
|
'outputFile' => $this->csvWriter->getOutputFile(),
|
|
'rowsProcessed' => $this->rowsProcessed,
|
|
'sampleData' => $this->sampleData,
|
|
'metadata' => $metadata,
|
|
'outputColumns' => $this->columnTransformer->getOutputColumns(),
|
|
];
|
|
|
|
if ($this->debugMode) {
|
|
$result['debug_logs'] = DebugLogger::getLogs();
|
|
}
|
|
|
|
return $result;
|
|
} catch (\Exception $e) {
|
|
return [
|
|
'success' => false,
|
|
'error' => $e->getMessage(),
|
|
'rowsProcessed' => $this->rowsProcessed,
|
|
'sampleData' => $this->sampleData,
|
|
'outputColumns' => [],
|
|
];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Transforms and imports CSV into Firefly III
|
|
*
|
|
* Performs transformation and imports the output file
|
|
* into Firefly III if enabled in the configuration.
|
|
*
|
|
* Backwards-compatible with legacy signature.
|
|
*
|
|
* @param string $inputFile Path to the input CSV file
|
|
* @param int $maxRows Maximum number of data rows to process (0 = all)
|
|
*
|
|
* @return array Transformation and import result with:
|
|
* - success: bool (true = transformation successful)
|
|
* - inputFile: string
|
|
* - outputFile: string
|
|
* - rowsProcessed: int
|
|
* - sampleData: array
|
|
* - metadata: array
|
|
* - outputColumns: int
|
|
* - import: array (Firefly import result, if autoImport active)
|
|
* - error: string (if error)
|
|
*/
|
|
public function transformAndImport(string $inputFile, int $maxRows = 0): array
|
|
{
|
|
// Transform first
|
|
$transformResult = $this->transform($inputFile, $maxRows);
|
|
|
|
if (!$transformResult['success']) {
|
|
return $transformResult;
|
|
}
|
|
|
|
// Check whether auto-import is enabled in configuration
|
|
$fireflyConfig = $this->configLoader->get('fireflyImport', []);
|
|
if (empty($fireflyConfig['autoImport'])) {
|
|
return $transformResult;
|
|
}
|
|
|
|
// Perform Firefly import
|
|
try {
|
|
$importer = new FireflyImporter($fireflyConfig);
|
|
$importResult = $importer->import($transformResult['outputFile']);
|
|
$transformResult['import'] = $importResult;
|
|
|
|
return $transformResult;
|
|
} catch (\Exception $e) {
|
|
$transformResult['import'] = [
|
|
'success' => false,
|
|
'error' => $e->getMessage(),
|
|
];
|
|
return $transformResult;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Validates a CSV file against the configuration
|
|
*
|
|
* Checks whether required metadata is present
|
|
* and whether the CSV structure matches the configuration.
|
|
*
|
|
* @param string $inputFile Path to the CSV file to validate
|
|
*
|
|
* @return array Validation result with:
|
|
* - valid: bool (true = validation successful)
|
|
* - metadata: array (extracted metadata, when valid)
|
|
* - line_count: int (total number of lines, when valid)
|
|
* - error: string (error message, when not valid)
|
|
* - metadata_found: array (found metadata despite error)
|
|
*/
|
|
public function validate(string $inputFile): array
|
|
{
|
|
try {
|
|
if (!file_exists($inputFile)) {
|
|
return [
|
|
'valid' => false,
|
|
'error' => "File not found: {$inputFile}",
|
|
];
|
|
}
|
|
|
|
// Initialise CsvReader with file path
|
|
$csvReader = new CsvReader($inputFile, $this->csvStructure);
|
|
|
|
// Extract metadata lines (before the header line)
|
|
$metadataLines = $csvReader->readMetadataLines();
|
|
$metadata = $this->metadataExtractor->extract($metadataLines);
|
|
|
|
// Check for required metadata
|
|
$requiredMetadata = [
|
|
'account_iban',
|
|
'currency_code',
|
|
];
|
|
|
|
$missingMetadata = [];
|
|
foreach ($requiredMetadata as $key) {
|
|
if (empty($metadata[$key])) {
|
|
$missingMetadata[] = $key;
|
|
}
|
|
}
|
|
|
|
if (!empty($missingMetadata)) {
|
|
return [
|
|
'valid' => false,
|
|
'error' => 'Missing metadata: ' . implode(', ', $missingMetadata),
|
|
'metadata_found' => $metadata,
|
|
];
|
|
}
|
|
|
|
// Count total number of lines
|
|
$lineCount = $csvReader->countLines();
|
|
|
|
return [
|
|
'valid' => true,
|
|
'metadata' => $metadata,
|
|
'line_count' => $lineCount,
|
|
];
|
|
} catch (\Exception $e) {
|
|
return [
|
|
'valid' => false,
|
|
'error' => 'Validation error: ' . $e->getMessage(),
|
|
];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the collected sample data
|
|
*
|
|
* @return array Sample data (maximum 3 or maxRows rows)
|
|
*/
|
|
public function getSampleData(): array
|
|
{
|
|
return $this->sampleData;
|
|
}
|
|
|
|
/**
|
|
* Returns the number of processed data rows
|
|
*
|
|
* @return int Number of transformed rows
|
|
*/
|
|
public function getRowsProcessed(): int
|
|
{
|
|
return $this->rowsProcessed;
|
|
}
|
|
}
|