firefly-import-preprocessor/src/TransformerEngine.php
2026-05-06 23:17:54 +02:00

382 lines
13 KiB
PHP

<?php
namespace UbsCsvTransformer;
use UbsCsvTransformer\CsvReader;
use UbsCsvTransformer\CsvWriter;
use UbsCsvTransformer\ConfigurationLoader;
use UbsCsvTransformer\MetadataExtractor;
use UbsCsvTransformer\ColumnTransformer;
use UbsCsvTransformer\FireflyImporter;
use UbsCsvTransformer\RowFilter;
/**
* Orchestrates the complete CSV transformation pipeline
*
* Coordinates all steps from reading the CSV through metadata extraction
* and column transformation to output and optional import into Firefly III.
*
* @property ConfigurationLoader $configLoader Manages configuration
* @property CsvWriter $csvWriter Writes output CSV
* @property MetadataExtractor $metadataExtractor Extracts metadata from header
* @property ColumnTransformer $columnTransformer Transforms columns
* @property array $csvStructure CSV structure configuration
*/
class TransformerEngine
{
private ConfigurationLoader $configLoader;
private CsvWriter $csvWriter;
private MetadataExtractor $metadataExtractor;
private ColumnTransformer $columnTransformer;
private array $csvStructure;
private array $sampleData = [];
private int $rowsProcessed = 0;
private bool $debugMode = false;
/**
* Initialises TransformerEngine with configuration
*
* Loads all required configurations and initialises
* the components (MetadataExtractor, ColumnTransformer, CsvWriter).
* CsvReader is instantiated later in transform() and validate() with the file path.
*
* @param ConfigurationLoader $configLoader Loads configuration files
* @param bool $debugMode true = enable debug mode
*
* @throws \RuntimeException if required configurations are missing
*/
public function __construct(ConfigurationLoader $configLoader, bool $debugMode = false)
{
$this->configLoader = $configLoader;
$this->debugMode = $debugMode;
if ($debugMode) {
DebugLogger::enable();
}
$config = $configLoader->getAll();
$this->csvStructure = $config['csvStructure'] ?? [];
$this->metadataExtractor = new MetadataExtractor(
$config['metadata']['extractionRules'] ?? []
);
$this->columnTransformer = new ColumnTransformer(
$config['columnTransformations'] ?? [],
[],
$config['capitalizationExceptions'] ?? []
);
// Determine output file name from configuration
$outputDir = $config['directories']['output'] ?? './output';
$outputFileName = $config['csvStructure']['outputFilename'] ?? 'transformed.csv';
$outputFile = rtrim($outputDir, '/') . '/' . $outputFileName;
$this->csvWriter = new CsvWriter(
$outputFile,
$config['csvStructure'] ?? []
);
}
/**
* Enables or disables debug mode
*
* @param bool $enabled true = debug mode enabled
* @return void
*/
public function setDebugMode(bool $enabled): void
{
$this->debugMode = $enabled;
if ($enabled) {
DebugLogger::enable();
} else {
DebugLogger::disable();
}
}
/**
* Transforms a CSV file
*
* Performs the following steps:
* 1. Read CSV file with CsvReader
* 2. Extract metadata from header
* 3. Transform columns according to configuration
* 4. Write data to output CSV
* 5. Collect sample data (maximum 3 rows or maxRows)
*
* The output file path is determined from the configuration and cannot be overridden.
*
* @param string $inputFile Path to the input CSV file
* @param int $maxRows Maximum number of data rows to transform (0 = all).
* Sample data is limited to min(3, maxRows)
*
* @return array Transformation result with:
* - success: bool (true = successful, false = error)
* - inputFile: string (input file path, on success only)
* - outputFile: string (output file path, on success only)
* - rowsProcessed: int (actually processed data rows)
* - sampleData: array (first sample rows, max 3 or maxRows)
* - metadata: array (extracted metadata, on success only)
* - outputColumns: int (number of output columns)
* - error: string (error message, on failure only)
*/
public function transform(string $inputFile, int $maxRows = 0): array
{
$this->sampleData = [];
$this->rowsProcessed = 0;
DebugLogger::reset();
try {
if ($this->debugMode) {
DebugLogger::log('transformer', 'Transformation started', [
'inputFile' => $inputFile,
'maxRows' => $maxRows
]);
}
// Validate input file
if (!file_exists($inputFile)) {
throw new \RuntimeException("Input file not found: {$inputFile}");
}
// Initialise CsvReader with file path and configuration
$csvReader = new CsvReader($inputFile, $this->csvStructure);
// Read metadata lines (before the header line)
$metadataLines = $csvReader->readMetadataLines();
// Extract metadata from the metadata lines
$metadata = $this->metadataExtractor->extract($metadataLines);
// Initialise ColumnTransformer with extracted metadata
$this->columnTransformer = new ColumnTransformer(
$this->configLoader->get('columnTransformations', []),
$metadata,
$this->configLoader->get('capitalizationExceptions', [])
);
// Read CSV data with header keys as array keys
$dataRows = $csvReader->readCsvData($maxRows);
if (empty($dataRows)) {
throw new \RuntimeException("No data rows in CSV file");
}
// Calculate limit for sample data
$sampleLimit = $maxRows == 0 ? 3 : $maxRows;
// Transform rows and collect them
$transformedData = [];
/** @var array<string, mixed>|null $skipIfNode */
$skipIfNode = $this->configLoader->get('skipIf', null);
foreach ($dataRows as $row) {
// Check if maxRows reached
if ($maxRows > 0 && $this->rowsProcessed >= $maxRows) {
break;
}
// Skip row if filter condition matches
if ($skipIfNode !== null && RowFilter::evaluate($skipIfNode, $row)) {
DebugLogger::log('transformer', 'Row skipped by skipIf filter', ['row' => $row]);
continue;
}
// Transform row
$transformedRow = $this->columnTransformer->transformRow($row);
$transformedData[] = $transformedRow;
// Save sample data
if (count($this->sampleData) < $sampleLimit) {
$this->sampleData[] = $transformedRow;
}
$this->rowsProcessed++;
}
// Remove columns to be excluded from the output
$excludeColumns = $this->csvStructure['excludeOutputColumns'] ?? [];
if (!empty($excludeColumns)) {
$excludeMap = array_flip($excludeColumns);
$transformedData = array_map(
static fn(array $row): array => array_diff_key($row, $excludeMap),
$transformedData
);
$this->sampleData = array_map(
static fn(array $row): array => array_diff_key($row, $excludeMap),
$this->sampleData
);
}
// Write all transformed data to output CSV
$this->csvWriter->write($transformedData);
$result = [
'success' => true,
'inputFile' => $inputFile,
'outputFile' => $this->csvWriter->getOutputFile(),
'rowsProcessed' => $this->rowsProcessed,
'sampleData' => $this->sampleData,
'metadata' => $metadata,
'outputColumns' => $this->columnTransformer->getOutputColumns(),
];
if ($this->debugMode) {
$result['debug_logs'] = DebugLogger::getLogs();
}
return $result;
} catch (\Exception $e) {
return [
'success' => false,
'error' => $e->getMessage(),
'rowsProcessed' => $this->rowsProcessed,
'sampleData' => $this->sampleData,
'outputColumns' => [],
];
}
}
/**
* Transforms and imports CSV into Firefly III
*
* Performs transformation and imports the output file
* into Firefly III if enabled in the configuration.
*
* Backwards-compatible with legacy signature.
*
* @param string $inputFile Path to the input CSV file
* @param int $maxRows Maximum number of data rows to process (0 = all)
*
* @return array Transformation and import result with:
* - success: bool (true = transformation successful)
* - inputFile: string
* - outputFile: string
* - rowsProcessed: int
* - sampleData: array
* - metadata: array
* - outputColumns: int
* - import: array (Firefly import result, if autoImport active)
* - error: string (if error)
*/
public function transformAndImport(string $inputFile, int $maxRows = 0): array
{
// Transform first
$transformResult = $this->transform($inputFile, $maxRows);
if (!$transformResult['success']) {
return $transformResult;
}
// Check whether auto-import is enabled in configuration
$fireflyConfig = $this->configLoader->get('fireflyImport', []);
if (empty($fireflyConfig['autoImport'])) {
return $transformResult;
}
// Perform Firefly import
try {
$importer = new FireflyImporter($fireflyConfig);
$importResult = $importer->import($transformResult['outputFile']);
$transformResult['import'] = $importResult;
return $transformResult;
} catch (\Exception $e) {
$transformResult['import'] = [
'success' => false,
'error' => $e->getMessage(),
];
return $transformResult;
}
}
/**
* Validates a CSV file against the configuration
*
* Checks whether required metadata is present
* and whether the CSV structure matches the configuration.
*
* @param string $inputFile Path to the CSV file to validate
*
* @return array Validation result with:
* - valid: bool (true = validation successful)
* - metadata: array (extracted metadata, when valid)
* - line_count: int (total number of lines, when valid)
* - error: string (error message, when not valid)
* - metadata_found: array (found metadata despite error)
*/
public function validate(string $inputFile): array
{
try {
if (!file_exists($inputFile)) {
return [
'valid' => false,
'error' => "File not found: {$inputFile}",
];
}
// Initialise CsvReader with file path
$csvReader = new CsvReader($inputFile, $this->csvStructure);
// Extract metadata lines (before the header line)
$metadataLines = $csvReader->readMetadataLines();
$metadata = $this->metadataExtractor->extract($metadataLines);
// Check for required metadata
$requiredMetadata = [
'account_iban',
'currency_code',
];
$missingMetadata = [];
foreach ($requiredMetadata as $key) {
if (empty($metadata[$key])) {
$missingMetadata[] = $key;
}
}
if (!empty($missingMetadata)) {
return [
'valid' => false,
'error' => 'Missing metadata: ' . implode(', ', $missingMetadata),
'metadata_found' => $metadata,
];
}
// Count total number of lines
$lineCount = $csvReader->countLines();
return [
'valid' => true,
'metadata' => $metadata,
'line_count' => $lineCount,
];
} catch (\Exception $e) {
return [
'valid' => false,
'error' => 'Validation error: ' . $e->getMessage(),
];
}
}
/**
* Returns the collected sample data
*
* @return array Sample data (maximum 3 or maxRows rows)
*/
public function getSampleData(): array
{
return $this->sampleData;
}
/**
* Returns the number of processed data rows
*
* @return int Number of transformed rows
*/
public function getRowsProcessed(): int
{
return $this->rowsProcessed;
}
}