diff --git a/.github/prompts/generate-config-tests.prompt.md b/.github/prompts/generate-config-tests.prompt.md new file mode 100644 index 0000000..dbb13cd --- /dev/null +++ b/.github/prompts/generate-config-tests.prompt.md @@ -0,0 +1,87 @@ +--- +description: Generate PHPUnit snapshot tests for a transformation config. Use when you want to add or update integration tests for a config file by providing 1–2 example input/output pairs per transformation rule. +argument-hint: "config file path (e.g. config/config-ubs-account.json)" +agent: agent +--- + +# Generate Config Integration Tests + +You help the user create PHPUnit snapshot (golden-file) tests for a specific transformation config. + +## Workflow + +### Step 1 — Identify the config + +If the user provided a config path as the argument, use it. Otherwise list all JSON files in `config/` (excluding `config.example.json`) and ask which one to test. + +### Step 2 — Read and summarise the config + +Read the config file with the read_file tool. Then present a compact table like: + +| # | sourceColumn | transformations | outputColumn | +|---|---|---|---| +| 1 | Buchungsdatum | dateformat (d.m.Y → Y-m-d) | date | +| 2 | Buchungstext | trim → replace → lowercase | description | +| … | … | … | … | + +Also note: the `metadata.extractionRules` (which pre-header lines are extracted and under what names), and `csvStructure.headerLine` / `csvStructure.delimiter`. + +### Step 3 — Collect examples from the user + +For each row in the table, ask the user to provide **1–2 example input cell values** and the **expected output value** after transformation. Present a form like: + +``` +Rule 1 — Buchungsdatum → date (dateformat d.m.Y → Y-m-d) + Example 1 input: ___ expected output: ___ + Example 2 input: ___ expected output: ___ (optional) + +Rule 2 — Buchungstext → description (trim → replace → lowercase) + Example 1 input: ___ expected output: ___ + … +``` + +For `_constant_` source columns (metadata injections), ask the user for the expected metadata value that should appear in the output (e.g. the IBAN string). + +For metadata extraction rules, ask for a representative pre-header line string and the expected extracted value. + +If the user skips a rule, use a simple passthrough value (copy source → output unchanged). + +### Step 4 — Synthesise fixture files + +Derive the config name from the filename (e.g. `config-ubs-account.json` → `config-ubs-account`). + +**`tests/fixtures//input.csv`** + +Build a minimal CSV that satisfies `csvStructure`: +- Pre-header lines (lines 1 … headerLine-1): one synthetic line per metadata extraction rule that matches its `regex` and returns the example value the user gave. Remaining pre-header lines (if any) can be empty placeholders. +- Header line (line `headerLine`): the delimiter-separated source column names needed by the config. +- Data rows: one row per example set. Where the user provided two examples for the same rule, use two data rows; align all other columns to the first example's values. + +**`tests/fixtures//expected.csv`** + +Header: the output column names in the order they appear after all transformations are applied (new `create` columns appended after existing ones). +Rows: the expected output values the user specified, aligned to the data rows above. + +### Step 5 — Generate or update `tests/ConfigIntegrationTest.php` + +If the file does not exist, create it. If it exists, add or replace only the parts that concern this config's fixture. The class must: + +- Use namespace `UbsCsvTransformer\Tests` +- Extend `PHPUnit\Framework\TestCase` +- Have a `public static function fixtureProvider(): array` that auto-discovers `tests/fixtures/*/` directories, maps each to `['configName' => ..., 'fixtureDir' => ...]` +- Have a `@dataProvider fixtureProvider` test method `testConfigProducesExpectedOutput(string $configName, string $fixtureDir)` that: + 1. Resolves `config/.json` + 2. Instantiates `ConfigurationLoader` with that path + 3. Instantiates `TransformerEngine` with the loader + 4. Calls `transform()` with `$fixtureDir/input.csv` into a temp file + 5. Reads the temp file and `$fixtureDir/expected.csv` and asserts they are identical line-by-line with `assertSame` + 6. Cleans up the temp file in tearDown + +Follow all existing project conventions: +- German docblocks +- PSR-12, max line length 150 +- No PHPStan suppressions + +### Step 6 — Verify + +Run `composer test` and confirm the new test passes. If it fails, show the diff and ask the user to correct their expected values, then update `expected.csv`. diff --git a/.gitignore b/.gitignore index 464e9ff..2886748 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ composer.lock # PHP .php_cs.cache .phpunit.cache +.phpunit.result.cache .phpstan.cache .psalm.cache @@ -25,6 +26,7 @@ coverage/ .coverage/ build/ dist/ +example-data/ # Logs *.log @@ -58,3 +60,4 @@ docker-compose.override.yml *.bak *.backup /tmp/ +/~archive/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..6c50c16 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,106 @@ +# Firefly Import Preprocessor — Agent Instructions + +PHP 8.1+ CLI ETL tool that transforms bank CSV exports (UBS E-Banking) into Firefly III-compatible format. See [README.md](README.md) for full documentation. + +## Build & Test + +```bash +composer test # PHPUnit tests +composer lint # phpcs PSR-12 check (src/ bin/) +composer lint-fix # phpcbf auto-fix +composer analyze # phpstan level 8 +composer psalm # Psalm static analysis +``` + +### Test Suite Overview + +85 tests across 5 test classes: + +| File | Tests | Scope | +|------|-------|-------| +| `tests/ColumnTransformerTest.php` | 37 | All 13 transformation types, edge cases | +| `tests/ConfigurationLoaderTest.php` | 18 | JSON loading, dot-notation access, validation | +| `tests/CsvReaderTest.php` | 15 | CSV parsing, BOM handling, delimiter, encoding | +| `tests/MetadataExtractorTest.php` | 14 | Pre-header regex extraction, edge cases | +| `tests/ConfigIntegrationTest.php` | 1× per fixture | Golden-file integration tests (see below) | + +### Integration Tests (Golden-File Pattern) + +`ConfigIntegrationTest` auto-discovers every subdirectory in `tests/fixtures/` and runs a full transform pipeline against it. For each fixture directory `tests/fixtures//`: + +- `input.csv` — minimal representative CSV input +- `expected.csv` — exact expected output after transformation +- `config/.json` must exist in the project root config dir + +**Currently active fixtures:** `config-ubs-account` + +**Adding a new fixture:** create the directory, add `input.csv` and `expected.csv`, ensure the matching `config/.json` exists. No code changes required — the provider discovers it automatically. + +**Regenerating `expected.csv`** after a config change (replace `` accordingly): + +```bash +php -r " +require 'vendor/autoload.php'; +use UbsCsvTransformer\ConfigurationLoader; +use UbsCsvTransformer\TransformerEngine; +\$tmpConfig = sys_get_temp_dir() . '/gen.json'; +\$cfg = json_decode(file_get_contents('config/.json'), true); +\$cfg['directories']['output'] = 'tests/fixtures/'; +\$cfg['csvStructure']['outputFilename'] = 'expected.csv'; +file_put_contents(\$tmpConfig, json_encode(\$cfg, JSON_UNESCAPED_UNICODE)); +\$loader = new ConfigurationLoader(\$tmpConfig); \$loader->load(); +\$engine = new TransformerEngine(\$loader); +\$result = \$engine->transform('tests/fixtures//input.csv'); +unlink(\$tmpConfig); +echo \$result['success'] ? 'OK' . PHP_EOL : 'ERROR: ' . \$result['error'] . PHP_EOL; +" +``` + +Run the tool: + +```bash +php bin/transformer.php test input.csv config/config.json --rows=5 +php bin/transformer.php transform input.csv config/config.json --output=output.csv +php bin/transformer.php validate config/config.json --strict +php bin/transformer.php auto-import config/config.json --watch +# Add --debug / -d for verbose output +``` + +## Architecture + +```bash +bin/transformer.php → TransformerEngine + ├── ConfigurationLoader (JSON config) + ├── CsvReader (reads + BOM handling) + ├── MetadataExtractor (regex on pre-header lines) + ├── ColumnTransformer (transformation pipeline) + ├── CsvWriter (output CSV) + └── FireflyImporter (optional, shells to Firefly CLI) +``` + +`DebugLogger` is a static helper used across all components; activated by the `--debug` flag. +`TransformerEngine` instantiates `CsvReader` per call (in `transform()`/`validate()`), not in the constructor. + +## Conventions + +- **PSR-12** enforced via phpcs using `phpcs.xml` (auto-discovered at root). Line length: soft 120, hard 150 chars. +- **PHPStan level 8** with `checkMissingCallableSignature: true`. `phpstan-baseline.neon` is empty — do not add suppressions without good reason. +- **All source comments and docblocks are written in German.** +- Namespace `UbsCsvTransformer\` (PSR-4 → `src/`); tests use `UbsCsvTransformer\Tests\` (→ `tests/`). +- No runtime package dependencies — only `ext-json` and `ext-mbstring`. + +## Config Format + +See [config/config.example.json](config/config.example.json) for a full reference. Three top-level sections: + +- **`metadata.extractionRules`** — regex rules against 1-based pre-header line numbers +- **`csvStructure`** — `headerLine`, `delimiter`, `encoding`, `hasBom` +- **`columnTransformations`** — array of per-column transformation pipelines + +### Key patterns in config + +- `"sourceColumn": "_constant_"` — injects an extracted metadata value (e.g. IBAN) as a new output column without reading a CSV column +- `"outputAction": "create"` vs `"overwrite"` — controls whether the result is a new column or replaces an existing one +- `MetadataExtractor` uses 1-based `lineNumber` in config; it converts to 0-based array index internally + +Supported transformation types: `map`, `replace`, `regex`, `regexextract`, `dateformat`, `split`, `trim`, `uppercase`, `lowercase`, `ucwordsfirst`, `truncate`, `constantvalue`, `pipeline` diff --git a/README.md b/README.md new file mode 100644 index 0000000..2ffce8e --- /dev/null +++ b/README.md @@ -0,0 +1,1073 @@ +# Firefly Import Preprocessor - Dokumentation + +**Version:** 1.0.0 +**Datum:** 10. Dezember 2025 +**Status:** Production Ready + +--- + +## 📋 Inhaltsverzeichnis + +1. [Überblick](#überblick) +2. [Installation & Setup](#installation--setup) +3. [Schnellstart](#schnellstart) +4. [Konfiguration](#konfiguration) +5. [Transformationstypen](#transformationstypen) +6. [CLI-Referenz](#cli-referenz) +7. [Debug-Modus](#debug-modus) +8. [Firefly III Integration](#firefly-iii-integration) +9. [Architektur](#architektur) +10. [Fehlerbehandlung](#fehlerbehandlung) + +--- + +## Überblick + +Der **Firefly Import Preprocessor** ist ein produktionsreifer PHP-Preprocessor für Banken-CSV-Exportdateien. Er transformiert Bankdaten in ein standardisiertes Format und kann sie optional in Firefly III importieren. + +### Kernfeatures + +✅ **Vollständige CSV-Transformation** mit komplexen Pipelines +✅ **Metadaten-Extraktion** mit Regex (IBAN, Währung, Kontoname) +✅ **11 Transformationstypen** für flexible Datenverarbeitung +✅ **Firefly III Integration** mit Docker-Support +✅ **Debug-Modus** für Transparenz bei Verarbeitung +✅ **Production Ready** mit vollständiger Fehlerbehandlung +✅ **Zero Dependencies** für Core-Funktionalität + +### Workflow + +``` +Input CSV + ↓ +Metadaten extrahieren (Regex) + ↓ +Datenzeilen transformieren (Pipeline) + ↓ +Output CSV schreiben + ↓ +[Optional] In Firefly III importieren +``` + +--- + +## Installation & Setup + +### Voraussetzungen + +- PHP 8.1+ +- Composer (empfohlen) +- [Optional] Docker für Firefly III Integration + +### Installation + +```bash +# 1. Repository clonen/kopieren +cd ff-imp-preprocessor + +# 2. Abhängigkeiten installieren (optional) +composer install + +# 3. Konfiguration erstellen +cp config/config.example.json config/config.json +# Bearbeite config/config.json mit deinen Einstellungen + +# 4. Directories erstellen +mkdir -p config/import/{source,output,archive,error} +chmod 755 config/import/{source,output,archive,error} + +# 5. Test durchführen +php bin/transformer.php validate config/config.json input.csv +``` + +--- + +## Schnellstart + +### 1. Konfiguration anpassen + +Bearbeite `config/config.json` und stelle sicher, dass die Extraction-Rules zu deinem CSV-Format passen: + +```json +{ + "metadata": { + "extractionRules": [ + { + "name": "account_iban", + "lineNumber": 2, + "regex": "IBAN:\\s*([A-Z0-9 ]+)", + "captureGroup": 1 + } + ] + }, + "csvStructure": { + "headerLine": 5, + "delimiter": ";", + "encoding": "UTF-8" + } +} +``` + +### 2. CSV validieren + +```bash +php bin/transformer.php validate config/config.json input.csv +``` + +Output: +``` +✓ CSV ist valid + IBAN: CH9300762011623852957 + Währung: CHF + Zeilen: 150 +``` + +### 3. Transformation durchführen + +```bash +php bin/transformer.php process config/config.json input.csv + +# Mit Debug-Modus für Fehlersuche +php bin/transformer.php process config/config.json input.csv --debug +``` + +### 4. Output prüfen + +```bash +# Transformierte Datei +cat config/import/output/transformed.csv + +# Oder mit Debug-Ausgabe +php bin/transformer.php test config/config.json input.csv --debug +# Zeigt max. 10 transformierte Zeilen und Debug-Logs +``` + +--- + +## Konfiguration + +### config.json Struktur + +#### `metadata` - Metadaten-Extraktion + +```json +{ + "metadata": { + "extractionRules": [ + { + "name": "account_iban", + "lineNumber": 2, + "regex": "IBAN:\\s*([A-Z0-9 ]+)", + "captureGroup": 1 + }, + { + "name": "currency_code", + "lineNumber": 3, + "regex": "Währung:\\s*([A-Z]{3})", + "captureGroup": 1 + } + ] + } +} +``` + +| Feld | Typ | Beschreibung | +|------|-----|-------------| +| `name` | string | Name der Metadaten-Variable (verwendet in constantvalue) | +| `lineNumber` | int | Zeilennummer in CSV (1-basiert, menschenlesbar) | +| `regex` | string | Regex-Pattern zur Extraktion (ohne Delimiter) | +| `captureGroup` | int | Nummer der Klammer-Gruppe (0=komplett, 1=erste Klammer, etc.) | + +**Beispiel Regex:** +- Pattern: `IBAN:\s*([A-Z0-9 ]+)` +- Input: `IBAN: CH93 0076 2011 6238 5295 7` +- Capture Group 1: `CH93 0076 2011 6238 5295 7` + +#### `csvStructure` - CSV-Format + +```json +{ + "csvStructure": { + "headerLine": 5, + "delimiter": ";", + "encoding": "UTF-8", + "hasBom": false + } +} +``` + +| Feld | Typ | Default | Beschreibung | +|------|-----|---------|-------------| +| `headerLine` | int | 5 | Zeilennummer der Header (1-basiert) | +| `delimiter` | string | `;` | CSV-Delimiter | +| `encoding` | string | `UTF-8` | Zeichenkodierung (UTF-8, ISO-8859-1, CP1252) | +| `hasBom` | bool | false | Hat die Datei BOM (Byte Order Mark)? | + +#### `columnTransformations` - Spalten-Transformationen + +```json +{ + "columnTransformations": [ + { + "sourceColumn": "Buchungsdatum", + "transformations": [ + { + "type": "dateformat", + "fromFormat": "d.m.Y", + "toFormat": "Y-m-d" + } + ], + "outputColumn": "date", + "outputAction": "overwrite" + } + ] +} +``` + +**outputAction:** +- `overwrite` - Überschreibe sourceColumn +- `create` - Erstelle neue Spalte (für Regex-Extract, Split, etc.) + +#### `directories` - Dateisystem + +```json +{ + "directories": { + "source": "/opt/ff-imp-preprocessor/import/source", + "output": "/opt/ff-imp-preprocessor/import/output", + "archive": "/opt/ff-imp-preprocessor/import/archive", + "error": "/opt/ff-imp-preprocessor/import/error" + } +} +``` + +| Feld | Beschreibung | +|------|-------------| +| `source` | Eingabe-Verzeichnis | +| `output` | Ausgabe-Verzeichnis | +| `archive` | Archiv für verarbeitete Dateien | +| `error` | Error-Verzeichnis für ungültige Dateien | + +#### `fireflyImport` - Firefly III Integration + +```json +{ + "fireflyImport": { + "jsonConfig": "/opt/firefly/import-config.json", + "importerCommand": "docker exec -it firefly-importer php artisan importer:import", + "autoImport": false, + "deleteAfterImport": false, + "timeout": 300, + "environment": { + "FIREFLY_III_URL": "https://your-firefly.com", + "FIREFLY_III_ACCESS_TOKEN": "your-token-here" + } + } +} +``` + +--- + +## Transformationstypen + +Es gibt **13 unterstützte Transformationstypen**, die als Pipeline kombiniert werden können: + +### 1. **trim** - Leerzeichen entfernen + +Entfernt Leerzeichen am Anfang und Ende. + +```json +{ + "type": "trim" +} +``` + +**Beispiel:** +- Input: ` Coop Pronto ` +- Output: `Coop Pronto` + +--- + +### 2. **lowercase** - Zu Kleinbuchstaben + +Wandelt in Kleinbuchstaben um (UTF-8 safe). + +```json +{ + "type": "lowercase" +} +``` + +**Beispiel:** +- Input: `COOP PRONTO CHUR` +- Output: `coop pronto chur` + +--- + +### 3. **uppercase** - Zu Grossbuchstaben + +Wandelt in Grossbuchstaben um (UTF-8 safe). + +```json +{ + "type": "uppercase" +} +``` + +**Beispiel:** +- Input: `Coop Pronto Chur` +- Output: `COOP PRONTO CHUR` + +--- + +### 4. **ucwordsfirst** - Grossschreibung nach Trennzeichen + +Grossschreibt ersten Buchstaben nach Worttrennzeichen. + +```json +{ + "type": "ucwordsfirst" +} +``` + +**Beispiele:** +- `COOP PRONTO CHUR` → `Coop Pronto Chur` +- `migros-rail city` → `Migros-Rail City` +- `O'NEILL STORE` → `O'Neill Store` +- `SAINT-JEAN-DE-MAURIENNE` → `Saint-Jean-De-Maurienne` + +Trennzeichen: Leerzeichen, Bindestrich, Apostroph, Slash, Punkt, Komma, Semikolon, Doppelpunkt, Klammern. + +--- + +### 5. **replace** - String-Replacement + +Ersetzt Substring durch anderen String (case-sensitive). + +```json +{ + "type": "replace", + "search": " ", + "replace": " " +} +``` + +**Beispiel:** +- Input: `Coop Pronto` (2 Leerzeichen) +- Output: `Coop Pronto` (1 Leerzeichen) + +--- + +### 6. **split** - Spalte teilen + +Teilt einen Wert bei Delimiter und behält einen definierten Teil. + +```json +{ + "type": "split", + "delimiter": ";", + "part": 0 +} +``` + +**Beispiel:** +- Input: `Coop Pronto Chur;7007 Chur` +- Config: `delimiter=";"`, `part=0` +- Output: `Coop Pronto Chur` + +--- + +### 7. **regex** - Regex-Ersetzung + +Ersetzt Teile des Strings per regulärem Ausdruck. Nutzt PHP `preg_replace`. + +```json +{ + "type": "regex", + "pattern": "^(.*?);.*$", + "replace": "$1" +} +``` + +**Beispiel — mit Match:** +- Input: `Friis, Daniela Silvia; Zahlung UBS TWINT` +- Pattern: `^(.*?);.*$`, Replace: `$1` +- Output: `Friis, Daniela Silvia` + +**Beispiel — kein Match (pass-through):** +- Input: `Coop Pronto Chur` (kein Semikolon) +- Output: `Coop Pronto Chur` ← Wert bleibt **unverändert** + +**Beispiel — Pattern ohne Anchors (ersetzt nur den gematchten Teil):** +- Input: `Zahlung UBS TWINT; Referenz 12345` +- Pattern: `UBS TWINT`, Replace: `TWINT` +- Output: `Zahlung TWINT; Referenz 12345` + +**Hinweise:** +- **Kein Match → Originalwert wird unverändert weitergegeben** (pipeline-sicher) +- Capture Groups als `$1`, `$2`, … im `replace` referenzieren +- Pattern ohne `^`/`$`-Anchors ersetzt nur den gematchten Teilstring, nicht den ganzen Wert + +--- + +### 8. **regexextract** - Regex-Extraktion + +Extrahiert eine Capture Group per regulärem Ausdruck und gibt **nur diese** zurück. Nutzt PHP `preg_match`. + +```json +{ + "sourceColumn": "Mitteilungen", + "transformations": [ + { + "type": "regexextract", + "pattern": "(\\d{4,} [^;]+)" + } + ], + "outputColumn": "location", + "outputAction": "create" +} +``` + +**Beispiel — mit Match:** +- Input: `Coop Pronto Chur, 7007 Chur` +- Pattern: `(\d{4,} [^;]+)` +- Output: `7007 Chur` ← nur Capture Group 1 + +**Beispiel — kein Match:** +- Input: `Dauerauftrag Miete` +- Output: `` (leerer String) + +**Hinweise:** +- **Kein Match → leerer String** (vorherige Pipeline-Schritte gehen verloren) +- Nutzt Capture Group 1 wenn vorhanden, sonst komplettes Match +- **⚠ Nicht pipeline-sicher:** Nur als einzigen oder letzten Schritt verwenden. Wenn bei Kein-Match der bisherige Wert erhalten bleiben soll, `regex` verwenden. + +--- + +### 9. **dateformat** - Datum-Umformat + +Konvertiert zwischen Datum-Formaten. + +```json +{ + "type": "dateformat", + "fromFormat": "d.m.Y", + "toFormat": "Y-m-d" +} +``` + +**Beispiel:** +- Input: `10.12.2025` +- Output: `2025-12-10` + +**Supported Formate:** Alle PHP DateTime-Formate (d, m, Y, H, i, s, etc.) + +--- + +### 10. **truncate** - String kürzen + +Kürzt String auf maximale Länge. + +```json +{ + "type": "truncate", + "maxLength": 100 +} +``` + +**Beispiel:** +- Input: `Dieser sehr lange Text...` (150 Zeichen) +- Output: `Dieser sehr lange Text...` (100 Zeichen) + +--- + +### 11. **constantvalue** - Konstanten-Wert aus Metadaten + +Nutzt extrahierte Metadaten als konstanten Wert. + +```json +{ + "sourceColumn": "_constant_", + "transformations": [ + { + "type": "constantvalue", + "metadataKey": "account_iban" + } + ], + "outputColumn": "account_iban", + "outputAction": "create" +} +``` + +**Beispiel:** +- metadataKey: `account_iban` (aus Extraktion) +- Wert: `CH9300762011623852957` +- Jede Zeile erhält diesen Wert in neuer Spalte + +--- + +### 12. **map** - Spalte kopieren (Standard) + +Kopiert oder benannt eine Spalte um. + +```json +{ + "sourceColumn": "Buchungstext", + "transformations": [ + { + "type": "map" + } + ], + "outputColumn": "description", + "outputAction": "overwrite" +} +``` + +**Beispiel:** +- Input Spalte: `Buchungstext` +- Output Spalte: `description` mit gleichem Inhalt + +--- + +### 13. **pipeline** - Verschachtelte Pipeline + +Führt eine Unter-Pipeline innerhalb eines Transformationsschritts aus. Ermöglicht das Gruppieren von Schritten als Einheit. + +```json +{ + "type": "pipeline", + "steps": [ + { "type": "trim" }, + { "type": "lowercase" }, + { "type": "ucwordsfirst" } + ] +} +``` + +**Hinweis:** In der Praxis werden mehrere Schritte direkt als `transformations`-Array aufgelistet. `pipeline` als Typ ist nützlich wenn innerhalb einer `transformations`-Liste eine Gruppe von Schritten bedingt oder als Einheit behandelt werden soll. + +--- + +### Pipeline-Beispiel + +Mehrere Transformationen hintereinander: + +```json +{ + "sourceColumn": "Buchungstext", + "transformations": [ + { "type": "trim" }, + { "type": "replace", "search": " ", "replace": " " }, + { "type": "lowercase" }, + { "type": "ucwordsfirst" } + ], + "outputColumn": "description", + "outputAction": "overwrite" +} +``` + +**Verarbeitung:** +1. `" COOP PRONTO "` → trim → `"COOP PRONTO"` +2. `"COOP PRONTO"` → replace → `"COOP PRONTO"` +3. `"COOP PRONTO"` → lowercase → `"coop pronto"` +4. `"coop pronto"` → ucwordsfirst → `"Coop Pronto"` + +--- + +## CLI-Referenz + +Der Transformer wird über `bin/transformer.php` aufgerufen: + +```bash +php bin/transformer.php [options] +``` + +### Kommandos + +#### `process` - Führe Transformation aus + +```bash +php bin/transformer.php process config/config.json input.csv [--debug] +``` + +- Transformiert CSV-Datei komplett +- Schreibt in `directories.output` (aus config) +- Optional: `--debug` für Debug-Logs + +**Output:** +``` +✓ Transformation erfolgreich + Input: input.csv + Output: config/import/output/transformed.csv + Zeilen: 245 + Sample: + - [Datum: 2025-01-10, Beschreibung: ...] +``` + +--- + +#### `validate` - Validiere CSV + +```bash +php bin/transformer.php validate config/config.json input.csv +``` + +- Prüft CSV-Format +- Extrahiert Metadaten +- Validiert erforderliche Felder + +**Output:** +``` +✓ CSV ist valid + Zeilen: 245 + IBAN: CH9300762011623852957 + Währung: CHF +``` + +--- + +#### `test` - Test-Run mit Beispieldaten + +```bash +php bin/transformer.php test config/config.json input.csv [--debug] +``` + +- Transformiert maximal 10 Zeilen +- Zeigt Metadaten und Beispiel-Output +- Schnell zur Prüfung von Konfiguration + +**Output:** +``` +✓ Test erfolgreich + Max 10 Zeilen verarbeitet: 10 + Metadaten: + - IBAN: CH93... + - Währung: CHF + Beispiel (Zeile 1): + - date: 2025-01-10 + - description: coop pronto chur + - amount: 25.50 +``` + +--- + +#### `help` - Zeige Hilfe + +```bash +php bin/transformer.php help +``` + +--- + +### Optionen + +| Option | Beschreibung | Beispiel | +|--------|-------------|---------| +| `--debug` | Aktiviere Debug-Modus | `process config.json input.csv --debug` | +| `--test` | Führe nur Test mit 10 Zeilen aus | `process config.json input.csv --test` | + +--- + +## Debug-Modus + +Der Debug-Modus bietet maximale Transparenz über die Verarbeitung. + +### Aktivierung + +```bash +php bin/transformer.php process config/config.json input.csv --debug +``` + +### Ausgegabene Informationen + +Der Debug-Modus protokolliert: + +**1. Transformer Level** +- Transformation gestartet (Input-Datei, maxRows) +- Transformation abgeschlossen +- Fehler + +**2. CSV Reader Level** +- Metadaten-Zeilen gelesen (Anzahl) +- Datenzeilen gelesen (Anzahl) + +**3. Metadata Extraction Level** +- Extraktion-Regel angewendet +- IBAN extrahiert +- Währung extrahiert +- Fehlerhafte Regex +- Fehlende Zeilen + +**4. Transformation Level** +- Split-Transformation angewendet +- RegexExtract-Transformation angewendet +- Fehlerhafte Transformationen + +**5. CSV Writer Level** +- Transformierte Daten geschrieben +- Output-Datei-Pfad + +### JSON-Output mit Debug-Logs + +Mit `--debug` wird das Ergebnis erweitert: + +```json +{ + "success": true, + "inputFile": "input.csv", + "outputFile": "output/transformed.csv", + "rowsProcessed": 245, + "metadata": { "account_iban": "CH93...", "currency_code": "CHF" }, + "debug_logs": [ + { + "timestamp": 1702200120.5432, + "category": "transformer", + "message": "Transformation started", + "data": { "inputFile": "input.csv", "maxRows": 0 } + }, + { + "timestamp": 1702200120.5445, + "category": "metadata", + "message": "Extraction rule applied", + "data": { "rule_name": "account_iban", "value": "CH93..." } + } + // ... weitere Logs + ] +} +``` + +### Debug-Log-Kategorien + +| Kategorie | Beispiel | Wann | +|-----------|---------|------| +| `transformer` | Transformation gestartet | Anfang/Ende Transformation | +| `csv_reader` | Zeilen gelesen | Beim CSV lesen | +| `metadata` | IBAN extrahiert | Bei Metadaten-Extraktion | +| `metadata_warning` | Regex matched nicht | Bei Problemen | +| `transformation` | Split angewendet | Bei jeder Transformation | +| `csv_writer` | Daten geschrieben | Beim CSV schreiben | + +--- + +## Firefly III Integration + +Der Transformer kann transformierte Daten automatisch in Firefly III importieren. + +### Setup + +1. **Firefly III laufen lassen** + +```bash +docker-compose up -d firefly-iii +docker-compose up -d firefly-importer +``` + +2. **Konfiguration anpassen** + +```json +{ + "fireflyImport": { + "autoImport": true, + "environment": { + "FIREFLY_III_URL": "https://your-firefly.local", + "FIREFLY_III_ACCESS_TOKEN": "your-personal-access-token" + } + } +} +``` + +3. **Import-Konfiguration erstellen** + +`/opt/firefly/import-config.json`: +```json +{ + "accounts": [ + { + "name": "UBS Checking", + "iban": "CH9300762011623852957", + "account_type": "asset" + } + ], + "column_mapping": { + "date": "date", + "amount": "amount", + "description": "description" + } +} +``` + +4. **Auto-Import aktivieren** + +```bash +php bin/transformer.php process config/config.json input.csv +# Wenn autoImport=true, wird die transformierte Datei automatisch importiert +``` + +### Manueller Import + +```bash +docker exec -it firefly-importer php artisan importer:import +``` + +--- + +## Architektur + +### Komponenten + +``` +bin/transformer.php (CLI Entry Point) + ↓ +TransformerEngine (Orchestrierung) + ├─ ConfigurationLoader (Config laden/validieren) + ├─ CsvReader (CSV einlesen) + ├─ MetadataExtractor (Metadaten mit Regex) + ├─ ColumnTransformer (Transformationen anwenden) + ├─ CsvWriter (CSV schreiben) + ├─ FireflyImporter (Firefly III Integration) + └─ DebugLogger (Debug-Protokolle) +``` + +### Datenfluss + +``` +Input CSV + ↓ +CsvReader::readMetadataLines() → Array von Zeilen + ↓ +MetadataExtractor::extract() → {iban: "...", currency: "..."} + ↓ +CsvReader::readCsvData() → Array von Zeilen + ↓ +ColumnTransformer::transformRow() → Transformierte Zeile (Pipeline) + ↓ +CsvWriter::write() → Output CSV +``` + +### Klassen + +| Klasse | Verantwortung | +|--------|---------------| +| `TransformerEngine` | Orchestriert gesamten Workflow | +| `ConfigurationLoader` | Lädt JSON/YAML Konfiguration | +| `CsvReader` | Liest CSV mit Metadaten | +| `MetadataExtractor` | Extrahiert mit Regex | +| `ColumnTransformer` | Transformiert Spalten (Pipeline) | +| `CsvWriter` | Schreibt CSV | +| `FireflyImporter` | Importiert in Firefly III | +| `DebugLogger` | Statischer Logger für Debug | + +--- + +## Fehlerbehandlung + +### Häufige Fehler + +#### 1. "Input-Datei nicht gefunden" + +```bash +Error: Input-Datei nicht gefunden: input.csv +``` + +**Ursache:** Dateipfad falsch oder Datei nicht vorhanden + +**Lösung:** +```bash +# Prüfe Dateipfad +ls -la input.csv + +# Nutze absoluten Pfad wenn relativ nicht funktioniert +php bin/transformer.php process config.json /absolute/path/input.csv +``` + +--- + +#### 2. "Fehlende Metadaten: account_iban" + +```bash +Error: Fehlende Metadaten: account_iban +``` + +**Ursache:** IBAN konnte nicht extrahiert werden (Regex oder Zeile falsch) + +**Lösung:** +```bash +# Debugging mit --debug +php bin/transformer.php validate config.json input.csv + +# Prüfe erste 5 Zeilen des CSV +head -5 input.csv + +# Überprüfe lineNumber und regex in config.json +# lineNumber sollte auf Zeile mit IBAN zeigen +``` + +--- + +#### 3. "Ungültiges JSON" + +```bash +Error: Ungültiges JSON: Syntax error, malformed JSON +``` + +**Ursache:** JSON-Syntax-Fehler in config.json + +**Lösung:** +```bash +# Validiere JSON +php -r "json_decode(file_get_contents('config/config.json'), true) or die('JSON invalid');" + +# Oder nutze online JSON validator +# https://jsonlint.com/ +``` + +--- + +#### 4. "CSV-Struktur-Fehler" + +```bash +Error: 'csvStructure.headerLine' erforderlich +``` + +**Ursache:** Erforderliche Konfigurationsfelder fehlen + +**Lösung:** +```bash +# Vergleiche mit config.example.json +diff config/config.json config/config.example.json + +# Stellen Sie sicher, dass folgende Felder vorhanden sind: +# - csvStructure.headerLine (Zeilennummer der Kopfzeile) +# - csvStructure.delimiter (CSV-Trennzeichen) +# - metadata.extractionRules (mindestens 1) +# - columnTransformations (mindestens 1) +``` + +--- + +### Exception-Handling + +Der Transformer nutzt Try-Catch für robuste Fehlerbehandlung: + +```php +try { + $result = $engine->transform($inputFile); + if (!$result['success']) { + echo "Fehler: " . $result['error']; + } +} catch (Exception $e) { + echo "Fatal Error: " . $e->getMessage(); +} +``` + +### Debug-Modus für Fehlersuche + +Der Debug-Modus zeigt detailliert wo der Fehler liegt: + +```bash +php bin/transformer.php process config/config.json input.csv --debug 2>&1 | tail -50 +``` + +--- + +## Besonderheiten & Tipps + +### UTF-8 Handling + +Der Transformer nutzt UTF-8 safe Funktionen: +- `mb_strtolower()` statt `strtolower()` +- `mb_strtoupper()` statt `strtoupper()` +- `mb_strlen()` für korrektes Character-Counting + +Unterstützte Encodings: UTF-8, ISO-8859-1, CP1252 + +### Regex-Tipps + +**Pattern ohne Delimiter:** +```json +"pattern": "IBAN:\\s*([A-Z0-9 ]+)" +// Wird zu: /IBAN:\s*([A-Z0-9 ]+)/u +``` + +**Mit Flags:** +```json +"pattern": "/IBAN:\\s*([A-Z0-9 ]+)/iu" +// Case-insensitive +``` + +**Spezielle Zeichen escapen:** +```json +"pattern": "\\(\\d{4}\\)" +// Matcht: (1234) +``` + +### Performance + +- **Max. Dateigröße:** Abhängig von RAM (typically 100MB+) +- **Optimiert für:** Bis zu 1 Million Zeilen +- **Typical:** 10-100k Zeilen pro Datei + +### Batch-Processing + +```bash +#!/bin/bash +for file in import/source/*.csv; do + php bin/transformer.php process config/config.json "$file" + if [ $? -eq 0 ]; then + mv "$file" import/archive/ + else + mv "$file" import/error/ + fi +done +``` + +--- + +## Support & Troubleshooting + +### Logging aktivieren + +Nutze den Built-in Debug-Logger: + +```bash +php bin/transformer.php test config/config.json input.csv --debug > debug.log 2>&1 +cat debug.log | grep -i error +``` + +### Nützliche Kommandos + +```bash +# Test CSV-Validität +php bin/transformer.php validate config.json input.csv + +# Test nur mit 10 Zeilen +php bin/transformer.php test config.json input.csv + +# Mit voller Debug-Ausgabe +php bin/transformer.php process config.json input.csv --debug + +# Prüfe PHP-Syntax aller Dateien +php -l bin/transformer.php +php -l src/*.php + +# Validiere Konfiguration +php -r "require 'src/ConfigurationLoader.php'; new UbsCsvTransformer\ConfigurationLoader('config/config.json');" +``` + +--- + +## Version & Änderungen + +**v1.0.0 (10. Dezember 2025)** +- ✅ Initial Release +- ✅ 11 Transformationstypen +- ✅ Metadaten-Extraktion mit Regex +- ✅ Debug-Modus +- ✅ Firefly III Integration +- ✅ Vollständige Dokumentation + +--- + +**Lizenz:** MIT +**Author:** PHP CSV Transformer Project +**Repository:** https://git.andare.ch/david.reindl/ff-imp-preprocessor \ No newline at end of file diff --git a/bin/transformer.php b/bin/transformer.php new file mode 100755 index 0000000..d62ebf3 --- /dev/null +++ b/bin/transformer.php @@ -0,0 +1,543 @@ +#!/usr/bin/env php + + handleTest($argc, $argv), + 'transform' => handleTransform($argc, $argv), + 'validate' => handleValidate($argc, $argv), + 'auto-import' => handleAutoImport($argc, $argv), + 'help', '-h', '--help' => showHelp(), + default => throw new Exception("Unbekanntes Kommando: $command"), + }; +} catch (Exception $e) { + fwrite(STDERR, "\n❌ ERROR: " . $e->getMessage() . "\n\n"); + exit(1); +} + +// ============================================================================ +// COMMAND HANDLERS +// ============================================================================ + +/** + * Zeige Hilfe und Verwendungsanleitung + */ +function showHelp(): void +{ + echo <<<'HELP' +╔════════════════════════════════════════════════════════════════════════════╗ +║ Firefly Import Preprocessor - Kommandozeilen-Tool ║ +║ ║ +║ Ein schlankes PHP 8 Tool zur Transformation von UBS E-Banking Exporten ║ +║ in ein Firefly III kompatibles Format. ║ +╚════════════════════════════════════════════════════════════════════════════╝ + +VERWENDUNG: + transformer [command] [options] + +KOMMANDOS: + + test [input] [config] [options] + Testet die Transformation mit limitierter Zeilenzahl + Optionen: + --rows=N Nur N Zeilen verarbeiten (Standard: 10) + --output=FILE, -o Ergebnis auch in Datei schreiben + Beispiel: + transformer test ubs-export.csv config.json --rows=5 + transformer test ubs-export.csv config.json -o test-output.csv + + transform [input] [config] [options] + Transformiert eine komplette CSV-Datei + Optionen: + --output=FILE, -o Output-Pfad (Standard: input-transformed.csv) + --no-import Nicht automatisch in Firefly III importieren + Beispiel: + transformer transform ubs-export.csv config.json + transformer transform ubs-export.csv config.json -o import.csv + + validate [config] [options] + Validiert die Konfigurationsdatei + Optionen: + --strict Strikte Validierung (empfohlen) + Beispiel: + transformer validate config.json + transformer validate config.json --strict + + auto-import [config] [options] + Überwacht Quellverzeichnis und verarbeitet neue Dateien + Optionen: + --watch Kontinuierliche Überwachung (Daemon-Modus) + --interval=SEC Prüfintervall in Sekunden (Standard: 60) + --dry-run Zeigt was gemacht würde (keine echte Verarbeitung) + Beispiel: + transformer auto-import config.json + transformer auto-import config.json --watch --interval=30 + + help, -h, --help + Zeige diese Hilfe + +GLOBALE OPTIONEN: + --debug, -d Aktiviere Debug-Modus (detaillierte Ausgaben) + +INSTALLATION: + + 1. PHP 8.1+ muss installiert sein + php --version + + 2. Autoloader-Setup (wähle eins): + Option A: Mit Composer (empfohlen) + composer install + Option B: Manuell - Dateien in Verzeichnisstruktur: + ff-imp-preprocessor/ + ├── bin/transformer.php + ├── src/*.php + └── config/config.json + + 3. Ausführbar machen: + chmod +x bin/transformer.php + + 4. Konfiguration anpassen: + cp config/config.example.json config/config.json + nano config/config.json + +BEISPIELE: + + # Transformation mit Test (erste 5 Zeilen) + ./bin/transformer test data/ubs-export.csv config/config.json --rows=5 + + # Komplette Transformation + ./bin/transformer transform data/ubs-export.csv config/config.json \ + --output=output/firefly-import.csv + + # Konfiguration validieren + ./bin/transformer validate config/config.json --strict + + # Auto-Import mit Überwachung starten + ./bin/transformer auto-import config/config.json --watch + + # Nur nächste Datei verarbeiten + ./bin/transformer auto-import config/config.json + +KONFIGURATION: + + Die config.json muss folgende Struktur haben: + { + "metadata": { "extractionRules": {...} }, + "csvStructure": { "delimiter": ";", ... }, + "columnTransformations": { ... }, + "fireflyImport": { "apiUrl": "...", "apiKey": "..." }, + "directories": { + "source": "./import/source", + "output": "./import/output", + "archive": "./import/archive", + "error": "./import/error" + } + } + +DOKUMENTATION: + + Siehe README.md und UBS_Transformer_Guide.md für vollständige Dokumentation + +LIZENZ: + + MIT License + +HELP; +} + +/** + * Expandiert ~ zu absolutem Home-Verzeichnis und löst relative Pfade auf + */ +function expandPath(string $path): string +{ + if (str_starts_with($path, '~/') || $path === '~') { + $home = getenv('HOME') ?: posix_getpwuid(posix_getuid())['dir']; + $path = $home . substr($path, 1); + } + + // Relative Pfade gegen cwd auflösen (ohne realpath, damit nicht-existierende Dirs erlaubt sind) + if (!str_starts_with($path, '/')) { + $path = getcwd() . '/' . $path; + } + + return $path; +} + +/** + * Parse CLI-Optionen in assoziatives Array + */ +function parseOptions(array $argv, int $startIndex = 0): array +{ + $options = []; + + for ($i = $startIndex; $i < count($argv); $i++) { + if (strpos($argv[$i], '--') === 0) { + $parts = explode('=', substr($argv[$i], 2), 2); + $options[$parts[0]] = $parts[1] ?? true; + } elseif (strpos($argv[$i], '-') === 0 && strlen($argv[$i]) > 1) { + $options[substr($argv[$i], 1)] = true; + } + } + + return $options; +} + +/** + * Teste Transformation mit begrenzter Zeilenzahl + */ +function handleTest($argc, $argv): void +{ + if ($argc < 4) { + throw new Exception("Usage: transformer test [input-file] [config-file] [options]"); + } + + $inputFile = $argv[2]; + $configFile = $argv[3]; + $options = parseOptions($argv, 4); + $debug = isset($options['debug']) || isset($options['d']); + + $maxRows = isset($options['rows']) ? (int)$options['rows'] : 10; + $outputFile = $options['output'] ?? $options['o'] ?? null; + + if (!file_exists($inputFile)) { + throw new Exception("Input-Datei nicht gefunden: $inputFile"); + } + if (!file_exists($configFile)) { + throw new Exception("Konfigurationsdatei nicht gefunden: $configFile"); + } + + echo "\n📊 TEST-MODUS: Verarbeite max. $maxRows Zeilen\n"; + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"; + + $configLoader = new ConfigurationLoader($configFile); + $config = $configLoader->load(); + + $engine = new TransformerEngine($configLoader, $debug); + $result = $engine->transform($inputFile, $maxRows); + + // Ausgabe Statistiken + echo "\n✅ STATISTIKEN:\n"; + echo " Verarbeitete Zeilen: " . $result['rowsProcessed'] . "\n"; + echo " Metadaten extrahiert: " . count($result['metadata'] ?? []) . "\n"; + echo " Output-Spalten: " . $result['outputColumns'] . "\n"; + + if (!empty($result['metadata'])) { + echo "\n📋 EXTRAHIERTE METADATEN:\n"; + foreach ($result['metadata'] as $key => $value) { + $display = substr($value, 0, 50); + if (strlen($value) > 50) { + $display .= "..."; + } + echo " $key: $display\n"; + } + } + + if (!empty($result['sampleData'])) { + $sampleCount = min(5, count($result['sampleData'])); + echo "\n📝 BEISPIEL-DATEN ($sampleCount Zeilen):\n"; + foreach (array_slice($result['sampleData'], 0, $sampleCount) as $index => $row) { + echo " Zeile " . ($index + 1) . ": "; + foreach ($row as $col => $value) { + $val = substr($value, 0, 30); + if (strlen($value) > 30) { + $val .= "..."; + } + //if (!is_int($col)) { + echo "$col=$val | "; + //} + } + echo "\n"; + } + } + + if ($outputFile) { + echo "\n💾 Output-Datei: $outputFile\n"; + } + + echo "\n✅ Test erfolgreich!\n\n"; +} + +/** + * Transformiere komplette CSV-Datei + */ +function handleTransform($argc, $argv): void +{ + if ($argc < 4) { + throw new Exception("Usage: transformer transform [input-file] [config-file] [options]"); + } + + $inputFile = $argv[2]; + $configFile = $argv[3]; + $options = parseOptions($argv, 4); + $debug = isset($options['debug']) || isset($options['d']); + + $outputFile = $options['output'] ?? $options['o'] ?? null; + + if (!file_exists($inputFile)) { + throw new Exception("Input-Datei nicht gefunden: $inputFile"); + } + if (!file_exists($configFile)) { + throw new Exception("Konfigurationsdatei nicht gefunden: $configFile"); + } + + echo "\n🚀 TRANSFORMATION STARTEN\n"; + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"; + + $configLoader = new ConfigurationLoader($configFile); + $configLoader->load(); + + // --output überschreibt Zielverzeichnis und Dateiname aus der Konfiguration + if ($outputFile !== null) { + $outputFile = expandPath($outputFile); + $configLoader->set('directories.output', dirname($outputFile)); + $configLoader->set('csvStructure.outputFilename', basename($outputFile)); + } + + $engine = new TransformerEngine($configLoader, $debug); + $result = $engine->transform($inputFile); + + echo "✅ Transformation erfolgreich!\n"; + echo " Output-Datei: " . ($result['outputFile'] ?? 'N/A') . "\n"; + echo " Zeilen transformiert: " . ($result['rowsProcessed'] ?? 0) . "\n"; + + echo "\n✅ Fertig!\n\n"; +} + +/** + * Validiere Konfigurationsdatei + */ +function handleValidate($argc, $argv): void +{ + if ($argc < 3) { + throw new Exception("Usage: transformer validate [config-file] [options]"); + } + + $configFile = $argv[2]; + $options = parseOptions($argv, 3); + $strict = isset($options['strict']); + + if (!file_exists($configFile)) { + throw new Exception("Konfigurationsdatei nicht gefunden: $configFile"); + } + + echo "\n✔️ KONFIGURATION VALIDIEREN\n"; + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"; + + $configLoader = new ConfigurationLoader($configFile); + + try { + $config = $configLoader->load(); + + // Basis-Validierung + echo "✅ JSON-Format valide\n"; + + $required = ['metadata', 'csvStructure', 'columnTransformations']; + $missing = []; + + foreach ($required as $key) { + if (isset($config[$key])) { + echo "✅ Abschnitt '$key' vorhanden\n"; + } else { + echo "⚠️ Abschnitt '$key' fehlt\n"; + $missing[] = $key; + } + } + + // Firefly-Validierung + if (isset($config['fireflyImport'])) { + echo "✅ Firefly III Konfiguration vorhanden\n"; + if (empty($config['fireflyImport']['apiUrl'])) { + echo "⚠️ Firefly III API-URL fehlt\n"; + if ($strict) { + throw new Exception("Strikte Validierung: Firefly III API-URL erforderlich"); + } + } + if (empty($config['fireflyImport']['apiKey'])) { + echo "⚠️ Firefly III API-Key fehlt\n"; + if ($strict) { + throw new Exception("Strikte Validierung: Firefly III API-Key erforderlich"); + } + } + } else { + echo "⚠️ Firefly III Konfiguration nicht vorhanden (optional)\n"; + } + + // Verzeichnisse-Validierung + if (isset($config['directories'])) { + echo "✅ Verzeichnisse konfiguriert\n"; + $dirs = ['source', 'output', 'archive', 'error']; + foreach ($dirs as $dir) { + if (!empty($config['directories'][$dir])) { + echo " ✅ $dir: " . $config['directories'][$dir] . "\n"; + } + } + } + + if (empty($missing)) { + echo "\n✅ Konfiguration ist VALIDE!\n\n"; + } else { + if ($strict) { + throw new Exception("Strikte Validierung: " . count($missing) . " erforderliche Abschnitte fehlen"); + } + echo "\n⚠️ Konfiguration hat Warnungen aber ist funktional\n\n"; + } + } catch (Exception $e) { + throw new Exception("Validierungsfehler: " . $e->getMessage()); + } +} + +/** + * Auto-Import mit Verzeichnis-Überwachung + */ +function handleAutoImport($argc, $argv): void +{ + if ($argc < 3) { + throw new Exception("Usage: transformer auto-import [config-file] [options]"); + } + + $configFile = $argv[2]; + $options = parseOptions($argv, 3); + $debug = isset($options['debug']) || isset($options['d']); + + if (!file_exists($configFile)) { + throw new Exception("Konfigurationsdatei nicht gefunden: $configFile"); + } + + $configLoader = new ConfigurationLoader($configFile); + $config = $configLoader->load(); + + $sourceDir = $config['directories']['source'] ?? './import/source'; + $outputDir = $config['directories']['output'] ?? './import/output'; + $archiveDir = $config['directories']['archive'] ?? './import/archive'; + $errorDir = $config['directories']['error'] ?? './import/error'; + $dryRun = isset($options['dry-run']); + $watch = isset($options['watch']); + $interval = isset($options['interval']) ? (int)$options['interval'] : 60; + + // Verzeichnisse erstellen + foreach ([$sourceDir, $outputDir, $archiveDir, $errorDir] as $dir) { + if (!is_dir($dir)) { + mkdir($dir, 0755, true); + } + } + + echo "\n🔍 AUTO-IMPORT GESTARTET\n"; + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"; + echo " Quellverzeichnis: $sourceDir\n"; + echo " Output-Verzeichnis: $outputDir\n"; + echo " Archiv-Verzeichnis: $archiveDir\n"; + + if ($watch) { + echo " Mode: WATCH (kontinuierlich)\n"; + echo " Intervall: {$interval}s\n"; + } else { + echo " Mode: EINMALIG\n"; + } + + if ($dryRun) { + echo " Dry-Run: JA (keine echten Operationen)\n"; + } + + echo "\n"; + + if ($watch) { + echo "⏳ Drücke Ctrl+C zum Beenden.\n\n"; + while (true) { + processImportDirectory($sourceDir, $outputDir, $archiveDir, $errorDir, $config, $configFile, $dryRun, $debug); + sleep($interval); + } + } else { + processImportDirectory($sourceDir, $outputDir, $archiveDir, $errorDir, $config, $configFile, $dryRun, $debug); + } +} + +/** + * Verarbeite Verzeichnis mit CSV-Dateien + */ +function processImportDirectory($sourceDir, $outputDir, $archiveDir, $errorDir, $config, $configFile, $dryRun = false, $debug = false): void +{ + if (!is_dir($sourceDir)) { + return; + } + + $files = glob($sourceDir . '/*.csv'); + + if (empty($files)) { + return; + } + + foreach ($files as $file) { + $basename = basename($file); + + try { + echo "📄 Verarbeite: $basename ... "; + + $configLoader = new ConfigurationLoader($configFile); + $config = $configLoader->load(); + + $engine = new TransformerEngine($configLoader, $debug); + $outputFile = $outputDir . '/' . str_replace('.csv', '-transformed.csv', $basename); + + if (!$dryRun) { + $result = $engine->transform($file); + $outputFile = $result['outputFile'] ?? $outputFile; + + // Archiviere Original-Datei + $archiveFile = $archiveDir . '/' . $basename; + if (!rename($file, $archiveFile)) { + throw new Exception("Konnte nicht archivieren"); + } + + // Firefly Import + if (!empty($config['fireflyImport'])) { + $importer = new FireflyImporter($config['fireflyImport']); + $importer->import($outputFile); + } + } + + echo "✅\n"; + } catch (Exception $e) { + echo "❌ " . $e->getMessage() . "\n"; + + if (!$dryRun) { + // Verschiebe zu Error-Verzeichnis + $errorFile = $errorDir . '/' . $basename; + @rename($file, $errorFile); + } + } + } +} diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..96c0560 --- /dev/null +++ b/composer.json @@ -0,0 +1,81 @@ +{ + "name": "ff-imp-preprocessor/ff-imp-preprocessor", + "description": "Production-ready PHP preprocessor for bank CSV export files with metadata extraction, column transformations, and optional Firefly III integration", + "license": "MIT", + "type": "library", + "version": "1.0.0-beta", + "authors": [ + { + "name": "Firefly Import Preprocessor Contributors", + "email": "david@andare.ch", + "role": "Developer" + } + ], + "keywords": [ + "csv", + "transformer", + "ubs", + "bank", + "finance", + "metadata-extraction", + "data-transformation", + "firefly-iii", + "php8", + "etl" + ], + "homepage": "https://git.andare.ch/david.reindl/ff-imp-preprocessor", + "support": { + "issues": "https://git.andare.ch/david.reindl/ff-imp-preprocessor/issues", + "source": "https://git.andare.ch/david.reindl/ff-imp-preprocessor" + }, + "require": { + "php": ">=8.1", + "ext-json": "*", + "ext-mbstring": "*" + }, + "require-dev": { + "phpunit/phpunit": "^10.0", + "phpstan/phpstan": "^1.10", + "squizlabs/php_codesniffer": "^3.8", + "vimeo/psalm": "^5.0" + }, + "suggest": { + "monolog/monolog": "For advanced logging capabilities (optional)", + "guzzlehttp/guzzle": "For Firefly III HTTP client integration (optional)" + }, + "autoload": { + "psr-4": { + "UbsCsvTransformer\\": "src/" + } + }, + "autoload-dev": { + "psr-4": { + "UbsCsvTransformer\\Tests\\": "tests/" + } + }, + "bin": [ + "bin/transformer.php" + ], + "scripts": { + "test": "phpunit", + "lint": "phpcs src/ bin/", + "lint-fix": "phpcbf src/ bin/", + "analyze": "phpstan analyse src/ --level=8", + "psalm": "psalm src/", + "validate-strict": "composer validate --strict" + }, + "minimum-stability": "stable", + "prefer-stable": true, + "config": { + "sort-packages": true, + "platform": { + "php": "8.1" + } + }, + "repositories": [ + { + "type": "vcs", + "url": "https://git.andare.ch/david.reindl/ff-imp-preprocessor" + } + ] +} \ No newline at end of file diff --git a/config/config.example.json b/config/config.example.json new file mode 100644 index 0000000..c3e0d50 --- /dev/null +++ b/config/config.example.json @@ -0,0 +1,224 @@ +{ + "metadata": { + "extractionRules": [ + { + "name": "account_iban", + "lineNumber": 2, + "regex": "IBAN:\\s*([A-Z0-9 ]+)", + "captureGroup": 1 + }, + { + "name": "account_name", + "lineNumber": 1, + "regex": "Konto:\\s*(.+)", + "captureGroup": 1 + }, + { + "name": "currency_code", + "lineNumber": 3, + "regex": "Währung:\\s*([A-Z]{3})", + "captureGroup": 1 + } + ] + }, + + "capitalizationExceptions": ["AG", "GmbH", "SA"], + + "csvStructure": { + "headerLine": 5, + "inputDelimiter": ";", + "outputDelimiter": ",", + "encoding": "UTF-8", + "hasBom": false + }, + + "columnTransformations": [ + { + "sourceColumn": "Buchungsdatum", + "transformations": [ + { + "type": "dateformat", + "fromFormat": "d.m.Y", + "toFormat": "Y-m-d" + } + ], + "outputColumn": "date", + "outputAction": "overwrite" + }, + { + "sourceColumn": "Buchungstext", + "transformations": [ + { + "type": "trim" + }, + { + "type": "replace", + "search": " ", + "replace": " " + }, + { + "type": "lowercase" + } + ], + "outputColumn": "description", + "outputAction": "overwrite" + }, + { + "sourceColumn": "Auftraggeber/Empfänger", + "transformations": [ + { + "type": "trim" + }, + { + "type": "ucwordsfirst" + }, + { + "type": "truncate", + "maxLength": 100 + } + ], + "outputColumn": "opposing_name", + "outputAction": "overwrite" + }, + { + "sourceColumn": "Mitteilungen", + "transformations": [ + { + "type": "trim" + }, + { + "type": "split", + "delimiter": ";", + "part": 0 + }, + { + "type": "lowercase" + }, + { + "type": "ucwordsfirst" + } + ], + "outputColumn": "merchant", + "outputAction": "create" + }, + { + "sourceColumn": "Mitteilungen", + "transformations": [ + { + "type": "regexextract", + "pattern": "(\\d{4,} .*)" + } + ], + "outputColumn": "location", + "outputAction": "create" + }, + { + "sourceColumn": "Belastung", + "transformations": [ + { + "type": "replace", + "search": "'", + "replace": "" + }, + { + "type": "replace", + "search": ",", + "replace": "." + } + ], + "outputColumn": "amount", + "outputAction": "create" + }, + { + "sourceColumn": "Gutschrift", + "transformations": [ + { + "type": "replace", + "search": "'", + "replace": "" + }, + { + "type": "replace", + "search": ",", + "replace": "." + } + ], + "outputColumn": "amount_credit", + "outputAction": "create" + }, + { + "sourceColumn": "Saldo", + "transformations": [ + { + "type": "replace", + "search": "'", + "replace": "" + }, + { + "type": "replace", + "search": ",", + "replace": "." + } + ], + "outputColumn": "balance", + "outputAction": "create" + }, + { + "sourceColumn": "_constant_", + "transformations": [ + { + "type": "constantvalue", + "metadataKey": "account_iban" + } + ], + "outputColumn": "account_iban", + "outputAction": "create" + }, + { + "sourceColumn": "_constant_", + "transformations": [ + { + "type": "constantvalue", + "metadataKey": "currency_code" + } + ], + "outputColumn": "currency_code", + "outputAction": "create" + }, + { + "sourceColumn": "_constant_", + "transformations": [ + { + "type": "constantvalue", + "metadataKey": "account_name" + } + ], + "outputColumn": "account_name", + "outputAction": "create" + } + ], + + "fireflyImport": { + "jsonConfig": "/opt/firefly/import-config.json", + "importerCommand": "docker exec -it firefly-importer php artisan importer:import", + "autoImport": false, + "deleteAfterImport": false, + "timeout": 300, + "environment": { + "FIREFLY_III_URL": "https://your-firefly.com", + "FIREFLY_III_ACCESS_TOKEN": "your-token-here" + } + }, + + "directories": { + "source": "/opt/ubs-csv-transformer/import/source", + "output": "/opt/ubs-csv-transformer/import/output", + "archive": "/opt/ubs-csv-transformer/import/archive", + "error": "/opt/ubs-csv-transformer/import/error" + }, + + "test": { + "maxRows": 10, + "showOutput": true + } +} \ No newline at end of file diff --git a/phpcs.xml b/phpcs.xml new file mode 100644 index 0000000..bd1c160 --- /dev/null +++ b/phpcs.xml @@ -0,0 +1,34 @@ + + + PSR-12 Code Standard for Firefly Import Preprocessor Project + + + + + + */vendor/* + */tests/* + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/phpstan-baseline.neon @@ -0,0 +1 @@ + diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..0300ec0 --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,13 @@ +parameters: + level: 8 + paths: + - src + tmpDir: .phpstan.cache + + checkMissingCallableSignature: true + + ignoreErrors: + - identifier: missingType.iterableValue + +includes: + - phpstan-baseline.neon diff --git a/phpunit.xml b/phpunit.xml new file mode 100644 index 0000000..fd59af2 --- /dev/null +++ b/phpunit.xml @@ -0,0 +1,20 @@ + + + + + tests + + + + + src + + + \ No newline at end of file diff --git a/src/ColumnTransformer.php b/src/ColumnTransformer.php new file mode 100644 index 0000000..22d827b --- /dev/null +++ b/src/ColumnTransformer.php @@ -0,0 +1,743 @@ +transformations = $transformations; + $this->metadata = $metadata; + $this->outputColumns = []; + $this->globalExceptions = $globalExceptions; + } + + /** + * Transformiert eine einzelne Datenzeile + * + * Wendet alle definierten Transformationen auf die Zeile an. + * Kann neue Spalten generieren (z.B. bei regex_extract). + * + * @param array $row Datenzeile mit Header-Keys als Array-Keys + * + * @return array Transformierte Datenzeile + */ + public function transformRow(array $row): array + { + $transformedRow = $row; + + foreach ($this->transformations as $config) { + // Multi-Output Detection (für split) + if (isset($config['outputs']) && is_array($config['outputs'])) { + // Multi-Output Transformation (z.B. split in mehrere Spalten) + $multiOutputResult = $this->handleMultiOutputTransformation($transformedRow, $config); + + // Merge Ergebnisse in transformedRow + foreach ($multiOutputResult as $columnName => $value) { + $transformedRow[$columnName] = $value; + + // Registriere neue Spalten + if (!in_array($columnName, $this->outputColumns)) { + $this->outputColumns[] = $columnName; + } + } + + // Fahre mit nächster Transformation fort + continue; + } + + $targetColumn = $config['outputColumn'] ?? null; + $sourceColumn = $config['sourceColumn'] ?? $targetColumn; + $outputAction = strtolower($config['outputAction'] ?? 'overwrite'); + + if (empty($targetColumn)) { + throw new \RuntimeException( + "Transformation fehlt 'outputColumn' Feld: " . json_encode($config) + ); + } + + // Track output columns + if (!in_array($targetColumn, $this->outputColumns)) { + $this->outputColumns[] = $targetColumn; + } + + // Handle 'custom' type separately — it operates on the whole row + $singleType = $this->normalizeTransformType($config['type'] ?? ''); + if ($singleType === 'custom' && empty($config['transformations'])) { + $transformedRow = $this->transformCustom($transformedRow, $config); + continue; + } + + // Get source value ('_constant_' is a virtual source with no column data) + $sourceValue = ($sourceColumn === '_constant_') ? '' : ($transformedRow[$sourceColumn] ?? ''); + + // Apply transformation(s) + if (!empty($config['transformations']) && is_array($config['transformations'])) { + // Inline pipeline: array of transformation steps per column entry + $resultValue = $sourceValue; + foreach ($config['transformations'] as $step) { + $resultValue = $this->applySingleTransformation($resultValue, $step); + } + } else { + // Single transformation (flat canonical or legacy form) + $resultValue = $this->applySingleTransformation($sourceValue, $config); + } + + // Apply output action + switch ($outputAction) { + case 'append': + $transformedRow[$targetColumn] = ($transformedRow[$targetColumn] ?? '') . $resultValue; + break; + case 'append-line': + // Wert auf neuer Zeile anhängen; kein führender Zeilenumbruch wenn Ziel leer + if ($resultValue !== '') { + $existing = $transformedRow[$targetColumn] ?? ''; + $transformedRow[$targetColumn] = $existing !== '' ? $existing . "\n" . $resultValue : $resultValue; + } + break; + case 'overwrite-if-empty': + // Nur überschreiben wenn Ziel-Spalte leer ist + if (($transformedRow[$targetColumn] ?? '') === '') { + $transformedRow[$targetColumn] = $resultValue; + } + break; + case 'overwrite-if-not-empty': + // Nur überschreiben wenn das Transformations-Ergebnis nicht leer ist + if ($resultValue !== '') { + $transformedRow[$targetColumn] = $resultValue; + } + break; + case 'create': + case 'overwrite': + default: + $transformedRow[$targetColumn] = $resultValue; + break; + } + } + + return $transformedRow; + } + + /** + * Wendet eine einzelne Transformation auf einen Stringwert an + * + * Normalisiert den Typ-Namen (snake_case, PascalCase, no-separator alle akzeptiert) + * und delegiert an die jeweilige transformXxx()-Methode. + * + * @param string $value Eingabewert + * @param array $config Transformationskonfiguration + * @return string Transformierter Wert + */ + private function applySingleTransformation(string $value, array $config): string + { + $transformType = $this->normalizeTransformType($config['type'] ?? 'map'); + + switch ($transformType) { + case 'map': + return $value; + + case 'replace': + return $this->transformReplace($value, $config); + + case 'regex': + return $this->transformRegex($value, $config); + + case 'dateformat': + return $this->transformDate($value, $config); + + case 'split': + return $this->transformSplit($value, $config); + + case 'regexextract': + $extracted = $this->transformRegexExtract($value, $config); + return $extracted ?? ''; + + case 'trim': + return $this->transformTrim($value); + + case 'uppercase': + return $this->transformUppercase($value); + + case 'lowercase': + return $this->transformLowercase($value); + + case 'ucwordsfirst': + return $this->transformUcwordsFirst($value, $config); + + case 'pipeline': + return $this->transformPipeline($value, $config); + + case 'truncate': + $maxLength = (int)($config['maxLength'] ?? 255); + return mb_substr($value, 0, $maxLength, 'UTF-8'); + + case 'constantvalue': + $metaKey = $config['metadataKey'] ?? ''; + return (string)($this->metadata[$metaKey] ?? ''); + + default: + return $value; + } + } + + /** + * Normalisiert Transformationstyp-Namen: lowercase, Trennzeichen entfernt. + * Erlaubt z.B. dass 'dateformat' und 'dateFormat' beide funktionieren. + */ + private function normalizeTransformType(string $type): string + { + return strtolower(str_replace(['_', '-', ' '], '', $type)); + } + + /** + * String-Replacement Transformation + * + * Konfiguration: + * ``` + * "type": "replace", + * "search": "Alt", + * "replace": "Neu" + * ``` + * + * @param string $value Ursprungswert + * @param array $config Transformationskonfiguration + * + * @return string Transformierter Wert + */ + private function transformReplace(string $value, array $config): string + { + $search = $config['search'] ?? ''; + $replace = $config['replace'] ?? ''; + + if (empty($search)) { + return $value; + } + + return str_replace($search, $replace, $value); + } + + /** + * Regex-Replace Transformation + * + * Wendet einen regulären Ausdruck auf den Wert an und ersetzt den Treffer. + * Backreferenz-Syntax: $1, $2 usw. im replace-String. + * + * Konfiguration: + * ``` + * "type": "regex", + * "pattern": "SumUp \\*+(.*)", + * "replace": "[$1]" + * ``` + * + * @param string $value Ursprungswert + * @param array $config Transformationskonfiguration + * + * @return string Transformierter Wert + */ + private function transformRegex(string $value, array $config): string + { + $pattern = $config['pattern'] ?? ''; + $replace = $config['replace'] ?? ''; + + if (empty($pattern)) { + return $value; + } + + $delimitedPattern = '#' . str_replace('#', '\#', $pattern) . '#u'; + $result = preg_replace($delimitedPattern, $replace, $value); + + return $result ?? $value; + } + + /** + * Datum-Format Transformation + * + * Konfiguration: + * ``` + * "type": "date_format", + * "fromFormat": "d.m.Y", + * "toFormat": "Y-m-d" + * ``` + * + * @param string $value Ursprungswert + * @param array $config Transformationskonfiguration + * + * @return string Transformierter Wert + */ + private function transformDate(string $value, array $config): string + { + if (empty($value)) { + return $value; + } + + $fromFormat = $config['fromFormat'] ?? 'd.m.Y'; + $toFormat = $config['toFormat'] ?? 'Y-m-d'; + + try { + $date = \DateTime::createFromFormat($fromFormat, $value); + if ($date === false) { + return $value; + } + return $date->format($toFormat); + } catch (\Exception $e) { + return $value; + } + } + + /** + * Split Transformation + * + * Teilt einen Wert bei einem Delimiter und behaelt einen definierten Teil + * + * Beispiel: + * Input: "Coop Pronto Chur;7007 Chur" + * Config: delimiter=";", part=0 + * Output: "Coop Pronto Chur" + * + * Konfiguration: + * ``` + * "type": "split", + * "delimiter": ";", + * "part": 0 + * ``` + * + * @param string $value Ursprungswert + * @param array $config Transformationskonfiguration + * + * @return string Transformierter Wert + */ + private function transformSplit(string $value, array $config): string + { + if (empty($value)) { + return $value; + } + + $delimiter = $config['delimiter'] ?? ';'; + $part = $config['part'] ?? 0; + + $parts = explode($delimiter, $value); + DebugLogger::log('transformation', 'Applied split transformation', [ + 'input' => $value, + 'delimiter' => $delimiter, + 'part' => $part, + 'parts_count' => count($parts), + 'output' => $parts[$part] ?? null, + ]); + + if (!isset($parts[$part])) { + return $value; + } + + return trim($parts[$part]); + } + + /** + * Regex Extract Transformation + * + * Extrahiert einen Teil mit Regex und erstellt neue Spalte + * + * Beispiel: + * Input: "Coop Pronto Chur;7007 Chur" + * Config: pattern="(\d{4,} .*)" + * Output: "7007 Chur" (in neuer Spalte "Location") + * + * Konfiguration: + * ``` + * "Location": { + * "type": "regex_extract", + * "sourceColumn": "Merchant/Description", + * "pattern": "(\\d{4,} .*)" + * } + * ``` + * + * @param string $value Ursprungswert + * @param array $config Transformationskonfiguration + * + * @return string|null Extrahierter Wert oder null + */ + private function transformRegexExtract(string $value, array $config): ?string + { + if (empty($value)) { + return null; + } + + $pattern = $config['pattern'] ?? ''; + + if (empty($pattern)) { + return null; + } + + $pattern = '#' . str_replace('#', '\#', $pattern) . '#'; + + if (!preg_match($pattern, $value, $matches)) { + return null; + } + + DebugLogger::log('transformation', 'Applied regexextract transformation', [ + 'input' => $value, + 'pattern' => $pattern, + 'output' => $matches[1] ?? $matches[0] ?? null, + ]); + + return $matches[1] ?? $matches[0] ?? null; + } + + /** + * Trim Transformation + * + * Entfernt Leerzeichen am Anfang und Ende eines Strings + * + * Konfiguration: + * ``` + * "type": "trim" + * ``` + * + * Beispiel: + * Input: " Coop Pronto " + * Output: "Coop Pronto" + * + * @param string $value Ursprungswert + * + * @return string Transformierter Wert + */ + private function transformTrim(string $value): string + { + return trim($value); + } + + /** + * Lowercase Transformation + * + * Wandelt einen String in Kleinbuchstaben um (UTF-8 safe) + * + * Konfiguration: + * ``` + * "type": "lowercase" + * ``` + * + * Beispiel: + * Input: "COOP PRONTO CHUR" + * Output: "coop pronto chur" + * + * @param string $value Ursprungswert + * + * @return string Transformierter Wert + */ + private function transformLowercase(string $value): string + { + return mb_strtolower($value, 'UTF-8'); + } + + /** + * Uppercase Transformation + * + * Wandelt einen String in Grossbuchstaben um (UTF-8 safe) + * + * Konfiguration: + * ``` + * "type": "uppercase" + * ``` + * + * Beispiel: + * Input: "Coop Pronto Chur" + * Output: "COOP PRONTO CHUR" + * + * @param string $value Ursprungswert + * + * @return string Transformierter Wert + */ + private function transformUppercase(string $value): string + { + return mb_strtoupper($value, 'UTF-8'); + } + + /** + * Ucwords First Transformation + * + * Grossschreibung nur des ersten Buchstabens nach Worttrennern. + * Alle anderen Buchstaben werden zu Kleinbuchstaben. + * Funktioniert auch, wenn Input komplett in Grossbuchstaben vorliegt. + * + * Konfiguration: + * ``` + * "type": "ucwords_first" + * ``` + * + * Mit Ausnahmeliste (Wörter, die exakt erhalten bleiben): + * ``` + * "type": "ucwords_first", + * "exceptions": ["SBB", "UBS", "AG", "GmbH"] + * ``` + * + * Beispiele: + * "COOP PRONTO CHUR" → "Coop Pronto Chur" + * "migros-rail city zuerich" → "Migros-Rail City Zuerich" + * "O'NEILL STORE" → "O'Neill Store" + * "SAINT-JEAN-DE-MAURIENNE" → "Saint-Jean-De-Maurienne" + * + * Wortgrenzen definiert durch: Leerzeichen, Bindestrich, Apostroph, + * Slash, Punkt, Komma, Semikolon, Doppelpunkt, Klammern, Anführungszeichen + * + * @param string $value Ursprungswert + * + * @return string Transformierter Wert + */ + private function transformUcwordsFirst(string $value, array $config = []): string + { + // Schritt 1: Alles zu Kleinbuchstaben + $value = mb_strtolower($value, 'UTF-8'); + + // Schritt 2: Definiere Wortgrenzen (Trennzeichen) + // Diese Zeichen markieren Grenzen, nach denen grossgeschrieben wird + $delimiters = [ + ' ', // Leerzeichen + '-', // Bindestrich + '\'', // Apostroph + '/', // Slash + '.', // Punkt + ',', // Komma + ';', // Semikolon + ':', // Doppelpunkt + '(', // Oeffnende Klammer + ')', // Schliessende Klammer + '[', // Oeffnende eckige Klammer + ']', // Schliessende eckige Klammer + '{', // Oeffnende geschweifte Klammer + '}', // Schliessende geschweifte Klammer + '"', // Anführungszeichen + '&', // Ampersand + '+' // Plus + ]; + + // Schritt 3: Regex-Pattern fuer "Stringanfang ODER Delimiter, gefolgt von Buchstabe" + // Die u-Flag ermoeglicht Unicode-Unterstaetzung (\p{L}) + $escapedDelimiters = array_map(function ($char) { + return preg_quote($char, '/'); + }, $delimiters); + $delimiterPattern = implode('', $escapedDelimiters); + + $pattern = '/(^|[' . $delimiterPattern . '])(\p{L})/u'; + + // Schritt 4: Callback fuer preg_replace_callback + // Grossschreibe den gefangenen Buchstaben (Capture Group 2) + $callback = function (array $matches): string { + // $matches[1] = Stringanfang oder Trennzeichen + // $matches[2] = Buchstabe, der grossgeschrieben werden soll + return $matches[1] . mb_strtoupper($matches[2], 'UTF-8'); + }; + + // Schritt 5: Anwende Transformation + $result = preg_replace_callback($pattern, $callback, $value) ?? $value; + + // Schritt 6: Ausnahmeliste anwenden (Wörter die exakt erhalten bleiben sollen, z.B. SBB, UBS, GmbH) + $exceptions = $config['exceptions'] ?? $this->globalExceptions; + foreach ($exceptions as $exception) { + if (!is_string($exception) || $exception === '') { + continue; + } + $exceptionPattern = '/\b' . preg_quote($exception, '/') . '\b/iu'; + $result = preg_replace($exceptionPattern, $exception, $result) ?? $result; + } + + return $result; + } + + /** + * Pipeline Transformation + * + * Wendet mehrere Transformationen nacheinander auf einen Wert an. + * Jeder Schritt benutzt das Ergebnis des vorherigen Schrittes. + * + * Konfiguration: + * ``` + * "Merchant": { + * "type": "pipeline", + * "sourceColumn": "Merchant/Description", + * "steps": [ + * { "type": "trim" }, + * { "type": "lowercase" }, + * { "type": "ucwords_first" } + * ] + * } + * ``` + * + * Beispiel: + * Input: " COOP PRONTO CHUR " + * Step 1 (trim): "COOP PRONTO CHUR" + * Step 2 (lowercase): "coop pronto chur" + * Step 3 (ucwords_first): "Coop Pronto Chur" + * Output: "Coop Pronto Chur" + * + * @param string $value Ursprungswert + * @param array $config Transformationskonfiguration mit 'steps' Array + * + * @return string Transformierter Wert nach allen Schritten + */ + private function transformPipeline(string $value, array $config): string + { + $steps = $config['steps'] ?? []; + + if (empty($steps) || !is_array($steps)) { + return $value; + } + + // Wende jeden Schritt nacheinander an + foreach ($steps as $step) { + if (!empty($step['type'] ?? $step['transform'] ?? null)) { + $value = $this->applySingleTransformation($value, $step); + } + } + + return $value; + } + + /** + * Custom Callback Transformation + * + * Ruft eine Custom-Funktion auf, die komplexe Logik implementiert + * + * Konfiguration: + * ``` + * "type": "custom", + * "callback": "myCustomFunction" + * ``` + * + * Die Callback-Funktion erhaelt die gesamte Zeile und gibt die + * modifizierte Zeile zurueck. + * + * @param array $row Gesamte Datenzeile + * @param array $config Transformationskonfiguration + * + * @return array Transformierte Datenzeile + */ + private function transformCustom(array $row, array $config): array + { + $callback = $config['callback'] ?? null; + + if (empty($callback) || !is_callable($callback)) { + return $row; + } + + try { + return call_user_func($callback, $row); + } catch (\Exception $e) { + return $row; + } + } + + /** + * Behandelt Multi-Output Transformationen + * Aktuell nur für 'split' implementiert. + * + * Config-Beispiel: + * { + * "outputs": ["FirstName", "LastName"], + * "sourceColumn": "FullName", + * "type": "split", + * "delimiter": " " + * } + * + * @param array $row Input-Zeile + * @param array $config Transformations-Konfiguration + * @return array Assoziatives Array: columnName => value + * @throws \RuntimeException wenn Transformation-Type nicht unterstützt + */ + private function handleMultiOutputTransformation(array $row, array $config): array + { + $outputs = $config['outputs']; + $sourceColumn = $config['sourceColumn'] ?? ''; + $transformType = $this->normalizeTransformType($config['type'] ?? ''); + + if (empty($outputs) || empty($sourceColumn) || empty($transformType)) { + throw new \RuntimeException("Multi-Output Transformation benötigt 'outputs', 'sourceColumn' und 'type'"); + } + + $sourceValue = $row[$sourceColumn] ?? ''; + + if ($transformType !== 'split') { + throw new \RuntimeException("Multi-Output nur für 'split' unterstützt, gegeben: {$transformType}"); + } + + return $this->handleMultiOutputSplit($sourceValue, $outputs, $config); + } + + /** + * Split-Transformation mit Multi-Output + * Teilt einen String und verteilt die Teile auf mehrere Spalten + * + * @param string $value Zu teilender String + * @param array $outputs Liste der Ziel-Spaltennamen + * @param array $config Transformation-Config + * @return array Assoziatives Array: columnName => value + */ + + private function handleMultiOutputSplit(string $value, array $outputs, array $config): array + { + $delimiter = $config['delimiter'] ?? ';'; + + // Führe Split durch + $parts = explode($delimiter, $value); + + // Mappe Parts zu Output-Spalten + $result = []; + foreach ($outputs as $index => $columnName) { + // Wenn Teil existiert: verwenden (getrimmt) // Wenn nicht: leerer String + $result[$columnName] = isset($parts[$index]) ? trim($parts[$index]) : ''; + } + + // Debug-Logging + DebugLogger::log('transformation', 'Applied Multi-Output Split', ['input' => $value, 'delimiter' => $delimiter, 'parts_count' => count($parts), 'outputs' => $outputs, 'result' => $result]); + + return $result; + } + + /** + * Gibt die Anzahl der Output-Spalten zurueck + * + * Zaehlt Original-Spalten plus neu generierte Spalten (z.B. bei regex_extract) + * + * @return int Anzahl Output-Spalten + */ + public function getOutputColumns(): int + { + return count(array_unique($this->outputColumns)); + } +} diff --git a/src/ConfigurationLoader.php b/src/ConfigurationLoader.php new file mode 100644 index 0000000..8ea8cc5 --- /dev/null +++ b/src/ConfigurationLoader.php @@ -0,0 +1,201 @@ +configFile = $configFile; + } + + /** + * Lädt die Konfigurationsdatei + * + * @return array Die geladene und validierte Konfiguration + * @throws \RuntimeException wenn Datei nicht gefunden oder ungültig + */ + public function load(): array + { + if (!file_exists($this->configFile)) { + throw new \RuntimeException("Konfigurationsdatei nicht gefunden: {$this->configFile}"); + } + + if (pathinfo($this->configFile, PATHINFO_EXTENSION) !== 'json') { + throw new \RuntimeException("Konfigurationsdatei muss eine JSON-Datei sein: {$this->configFile}"); + } + + $this->config = $this->loadJson($this->configFile); + + $this->validate(); + return $this->config; + } + + /** + * Lädt eine JSON-Datei + * + * @param string $file Pfad zur JSON-Datei + * @return array Geparste Konfiguration + */ + private function loadJson(string $file): array + { + $json = file_get_contents($file); + if ($json === false) { + throw new \RuntimeException("Konnte JSON-Datei nicht lesen: {$file}"); + } + + $config = json_decode($json, true); + + if ($config === null && json_last_error() !== JSON_ERROR_NONE) { + throw new \RuntimeException("Ungültiges JSON: " . json_last_error_msg()); + } + + return $config; + } + + /** + * Validiert die geladene Konfiguration auf erforderliche Felder + * + * @throws \RuntimeException wenn erforderliche Felder fehlen + */ + private function validate(): void + { + // Metadata erforderlich + if (empty($this->config['metadata'])) { + throw new \RuntimeException("Konfiguration: 'metadata' Section erforderlich"); + } + + if (!isset($this->config['metadata']['extractionRules']) || !is_array($this->config['metadata']['extractionRules'])) { + throw new \RuntimeException("Konfiguration: 'metadata.extractionRules' erforderlich (kann leer sein: [])"); + } + + // CSV-Struktur erforderlich + if (empty($this->config['csvStructure'])) { + throw new \RuntimeException("Konfiguration: 'csvStructure' Section erforderlich"); + } + + if (!isset($this->config['csvStructure']['headerLine'])) { + throw new \RuntimeException("Konfiguration: 'csvStructure.headerLine' erforderlich"); + } + + // Column Transformations erforderlich + if (empty($this->config['columnTransformations'])) { + throw new \RuntimeException("Konfiguration: 'columnTransformations' erforderlich"); + } + + // Directories validieren (wenn auto-import genutzt wird) + if (!empty($this->config['directories'])) { + foreach (['source', 'output', 'archive', 'error'] as $dir) { + if (empty($this->config['directories'][$dir])) { + throw new \RuntimeException("Konfiguration: 'directories.{$dir}' erforderlich für Auto-Import"); + } + } + } + + // Validiere CSV-Struktur Werte + $headerLine = $this->config['csvStructure']['headerLine'] ?? 1; + if (!is_int($headerLine) || $headerLine < 1) { + throw new \Exception( + 'Konfiguration csvStructure.headerLine muss eine positive Ganzzahl sein' + ); + } + + $delimiter = $this->config['csvStructure']['inputDelimiter'] ?? ''; + if (strlen($delimiter) === 0) { + throw new \Exception( + 'Konfiguration csvStructure.inputDelimiter darf nicht leer sein' + ); + } + + // Validiere Encoding + $encoding = $this->config['csvStructure']['encoding'] ?? 'UTF-8'; + if (!in_array($encoding, ['UTF-8', 'ISO-8859-1', 'CP1252'])) { + throw new \Exception( + 'Konfiguration csvStructure.encoding: ' . $encoding . ' nicht unterstützt' + ); + } + } + + /** + * Gibt eine einzelne Konfigurationsoption zurück + * + * @param string $key Dot-Notation Key (z.B. 'metadata.extractionRules') + * @param mixed $default Standardwert wenn Key nicht existiert + * @return mixed Der Konfigurationswert + */ + public function get(string $key, mixed $default = null): mixed + { + $keys = explode('.', $key); + $value = $this->config; + + foreach ($keys as $k) { + if (!isset($value[$k])) { + return $default; + } + $value = $value[$k]; + } + + return $value; + } + + /** + * Gibt die vollständige Konfiguration zurück + * + * @return array Die komplette Konfiguration + */ + public function getAll(): array + { + return $this->config; + } + + /** + * Setzt einen Konfigurationswert (überschreibt bestehenden Wert) + * + * @param string $key Dot-Notation Key (z.B. 'directories.output') + * @param mixed $value Neuer Wert + * @return void + */ + public function set(string $key, mixed $value): void + { + $keys = explode('.', $key); + $ref = &$this->config; + + foreach ($keys as $i => $k) { + if ($i === count($keys) - 1) { + $ref[$k] = $value; + } else { + if (!isset($ref[$k]) || !is_array($ref[$k])) { + $ref[$k] = []; + } + $ref = &$ref[$k]; + } + } + } + + /** + * Prüft ob ein Konfigurationsschlüssel existiert + * + * @param string $key Dot-Notation Key + * @return bool + */ + public function has(string $key): bool + { + $keys = explode('.', $key); + $value = $this->config; + + foreach ($keys as $k) { + if (!isset($value[$k])) { + return false; + } + $value = $value[$k]; + } + + return true; + } +} diff --git a/src/CsvReader.php b/src/CsvReader.php new file mode 100644 index 0000000..989fff5 --- /dev/null +++ b/src/CsvReader.php @@ -0,0 +1,183 @@ +filePath = $filePath; + $this->delimiter = $csvStructure['inputDelimiter'] ?? ';'; + $this->headerLine = $csvStructure['headerLine'] ?? 1; + $this->hasBom = $csvStructure['hasBom'] ?? false; + } + + /** + * Liest alle Zeilen aus der Datei + * + * @param int $maxLines Maximale Anzahl Zeilen (0 = alle) + * @return array Array mit Zeilen (ohne Newlines) + * @throws \RuntimeException wenn Datei nicht gelesen werden kann + */ + public function readLines(int $maxLines = 0): array + { + if (!file_exists($this->filePath) || !is_readable($this->filePath)) { + throw new \RuntimeException("Konnte Datei nicht lesen: {$this->filePath}"); + } + + $lines = file($this->filePath, FILE_IGNORE_NEW_LINES); + + if ($lines === false) { + throw new \RuntimeException("Konnte Datei nicht lesen: {$this->filePath}"); + } + + // BOM entfernen falls vorhanden + if ($this->hasBom && !empty($lines)) { + $lines[0] = $this->removeBom($lines[0]); + } + + if ($maxLines > 0 && count($lines) > $maxLines) { + $lines = array_slice($lines, 0, $maxLines); + } + + return $lines; + } + + /** + * Liest die Metadaten-Zeilen (vor der Header-Zeile) + * + * @return array Array mit Metadaten-Zeilen + */ + public function readMetadataLines(): array + { + $lines = $this->readLines(); + + if ($this->headerLine <= 1) { + return []; + } + + return array_slice($lines, 0, $this->headerLine - 1); + } + + /** + * Liest die CSV-Daten mit Headers + * + * @param int $maxDataRows Maximale Anzahl Datenzeilen (0 = alle) + * @return array Array von assoziativen Arrays (mit Spalten-Namen als Keys) + * @throws \RuntimeException wenn Header-Zeile nicht gefunden + */ + public function readCsvData(int $maxDataRows = 0): array + { + $lines = $this->readLines(); + + if ($this->headerLine > count($lines)) { + throw new \RuntimeException("Header-Zeile {$this->headerLine} nicht gefunden in Datei mit " . count($lines) . " Zeilen"); + } + + // Header parsen + $headerLineContent = $lines[$this->headerLine - 1]; + $headers = str_getcsv($headerLineContent, $this->delimiter, '"', '\\'); + $headers = array_map(static fn(?string $v): string => trim($v ?? ''), $headers); + + // Datenzeilen parsen + $data = []; + $dataStartLine = $this->headerLine; // 0-basiert + $lineCount = 0; + + for ($i = $dataStartLine; $i < count($lines); $i++) { + if ($maxDataRows > 0 && $lineCount >= $maxDataRows) { + break; + } + + $lineContent = $lines[$i]; + + // Leere Zeilen überspringen + if (trim($lineContent) === '') { + continue; + } + + $row = str_getcsv($lineContent, $this->delimiter, '"', '\\'); + $row = array_map(static fn(?string $v): string => trim($v ?? ''), $row); + + // Zeile mit Header-Keys kombinieren + $rowData = []; + foreach ($headers as $index => $header) { + $rowData[$header] = $row[$index] ?? ''; + } + + $data[] = $rowData; + $lineCount++; + } + + return $data; + } + + /** + * Gibt die Spalten-Header zurück + * + * @return array Array mit Spalten-Namen + * @throws \RuntimeException wenn Header-Zeile nicht gefunden + */ + public function getHeaders(): array + { + $lines = $this->readLines(); + + if ($this->headerLine > count($lines)) { + throw new \RuntimeException("Header-Zeile {$this->headerLine} nicht gefunden"); + } + + $headerLineContent = $lines[$this->headerLine - 1]; + $headers = str_getcsv($headerLineContent, $this->delimiter, '"', '\\'); + + return array_map(static fn(?string $v): string => trim($v ?? ''), $headers); + } + + /** + * Entfernt UTF-8 BOM (Byte Order Mark) von String + * + * @param string $text String mit potenziellem BOM + * @return string String ohne BOM + */ + private function removeBom(string $text): string + { + if (str_starts_with($text, "\xEF\xBB\xBF")) { + return substr($text, 3); + } + return $text; + } + + /** + * Gibt die Gesamtzahl der Zeilen in der Datei zurück + * + * @return int Anzahl Zeilen + */ + public function countLines(): int + { + return count($this->readLines()); + } + + /** + * Gibt die Anzahl der Datenzeilen zurück (ohne Header und Metadaten) + * + * @return int Anzahl Datenzeilen + */ + public function countDataRows(): int + { + return count($this->readCsvData()); + } +} diff --git a/src/CsvWriter.php b/src/CsvWriter.php new file mode 100644 index 0000000..69b33f9 --- /dev/null +++ b/src/CsvWriter.php @@ -0,0 +1,121 @@ +outputFile = $outputFile; + $this->delimiter = $csvStructure['outputDelimiter'] ?? ','; + } + + /** + * Schreibt Daten in CSV-Datei + * + * @param array $data Array von assoziativen Arrays (Zeilen) + * @throws \RuntimeException wenn Datei nicht geschrieben werden kann + */ + public function write(array $data): void + { + if (empty($data)) { + throw new \RuntimeException("Keine Daten zum Schreiben"); + } + + // Output-Verzeichnis erstellen falls nicht vorhanden + $dir = dirname($this->outputFile); + if (!is_dir($dir)) { + if (!mkdir($dir, 0755, true)) { + throw new \RuntimeException("Konnte Output-Verzeichnis nicht erstellen: {$dir}"); + } + } + + $fp = fopen($this->outputFile, 'w'); + + if ($fp === false) { + throw new \RuntimeException("Konnte Output-Datei nicht erstellen: {$this->outputFile}"); + } + + try { + // Headers schreiben (Spalten-Namen aus erster Zeile) + $headers = array_keys($data[0]); + $this->writeCsvLine($fp, $headers); + + // Datenzeilen schreiben + foreach ($data as $row) { + // Sicherstellen dass alle Spalten vorhanden sind + $values = []; + foreach ($headers as $header) { + $values[] = $row[$header] ?? ''; + } + + $this->writeCsvLine($fp, $values); + } + } finally { + fclose($fp); + } + } + + /** + * Schreibt eine CSV-Zeile mit fputcsv + * + * @param resource $fp File-Handle + * @param array $values Array mit Werten + * @throws \RuntimeException wenn Schreiben fehlschlägt + */ + private function writeCsvLine($fp, array $values): void + { + $result = fputcsv($fp, $values, $this->delimiter, '"', '\\'); + + if ($result === false) { + throw new \RuntimeException("Fehler beim Schreiben der CSV-Zeile"); + } + } + + /** + * Gibt den Pfad zur Output-Datei zurück + * + * @return string Output-Dateipfad + */ + public function getOutputFile(): string + { + return $this->outputFile; + } + + /** + * Prüft ob Output-Datei erstellt wurde + * + * @return bool True wenn Datei existiert + */ + public function fileExists(): bool + { + return file_exists($this->outputFile); + } + + /** + * Gibt die Größe der Output-Datei zurück + * + * @return int|false Dateigröße in Bytes oder false bei Fehler + */ + public function getFileSize(): int|false + { + if (!$this->fileExists()) { + return false; + } + + return filesize($this->outputFile); + } +} diff --git a/src/DebugLogger.php b/src/DebugLogger.php new file mode 100644 index 0000000..d14501a --- /dev/null +++ b/src/DebugLogger.php @@ -0,0 +1,174 @@ + 'CH9300762011623852957']); + * $logs = DebugLogger::getLogs(); + * ``` + */ +class DebugLogger +{ + /** + * @var bool Ist Debug-Modus aktiviert? + */ + private static bool $enabled = false; + + /** + * @var array Gesammelte Logs mit Timestamp, Kategorie, Nachricht und Daten + */ + private static array $logs = []; + + /** + * Aktiviert den Debug-Modus + * + * Nach Aktivierung werden alle DebugLogger::log() Aufrufe protokolliert. + * + * @return void + */ + public static function enable(): void + { + self::$enabled = true; + } + + /** + * Deaktiviert den Debug-Modus + * + * Nach Deaktivierung werden DebugLogger::log() Aufrufe ignoriert. + * + * @return void + */ + public static function disable(): void + { + self::$enabled = false; + } + + /** + * Protokolliert eine Debug-Nachricht + * + * Sammelt Informationen über jeden Verarbeitungsschritt mit Timestamp, + * Kategorie, Nachricht und optionalen Daten. Die Logs werden nur + * gesammelt, wenn der Debug-Modus aktiviert ist. + * + * @param string $category Kategorie der Log-Nachricht + * z.B. 'metadata', 'transformation', 'csv_reader', 'config' + * @param string $message Beschreibung der Aktion oder des Ereignisses + * @param mixed $data Zusätzliche Kontextdaten (Array oder beliebiger Wert) + * + * @return void + */ + public static function log(string $category, string $message, $data = null): void + { + if (!self::$enabled) { + return; + } + + self::$logs[] = [ + 'timestamp' => microtime(true), + 'category' => $category, + 'message' => $message, + 'data' => $data + ]; + } + + /** + * Gibt alle gesammelten Logs zurück + * + * Liefert ein Array aller protokollierten Ereignisse mit vollständigen + * Informationen für Analyse und Debugging. + * + * @return array Array von Log-Einträgen, jeder mit: + * - timestamp: Mikrosekunden-Zeitstempel + * - category: Log-Kategorie + * - message: Beschreibung + * - data: Zusätzliche Daten + */ + public static function getLogs(): array + { + return self::$logs; + } + + /** + * Setzt alle Logs zurück + * + * Löscht den gesamten Log-Buffer. Nützlich um zwischen mehreren + * Transformationen einen sauberen State zu haben. + * + * @return void + */ + public static function reset(): void + { + self::$logs = []; + } + + /** + * Gibt die Anzahl der gesammelten Log-Einträge zurück + * + * @return int Anzahl protokollierter Ereignisse + */ + public static function count(): int + { + return count(self::$logs); + } + + /** + * Prüft ob Debug-Modus aktiviert ist + * + * @return bool true wenn aktiviert, false sonst + */ + public static function isEnabled(): bool + { + return self::$enabled; + } + + /** + * Gibt einen formattierten String aller Logs zurück + * + * Konvertiert den Log-Buffer in ein lesbares Format für Konsolen-Ausgabe. + * + * @param bool $includeData true = auch Daten ausgeben, false = nur Messages + * + * @return string Formatierte Log-Ausgabe + */ + public static function format(bool $includeData = true): string + { + if (empty(self::$logs)) { + return "Keine Debug-Logs vorhanden.\n"; + } + + $output = "\n=== DEBUG LOGS ===\n"; + foreach (self::$logs as $index => $log) { + $output .= sprintf( + "%d. [%s] %s: %s", + $index + 1, + $log['category'], + date('H:i:s', intval($log['timestamp'])), + $log['message'] + ); + + if ($includeData && $log['data'] !== null) { + $output .= "\n Data: " . json_encode($log['data'], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); + } + $output .= "\n"; + } + $output .= "===================\n"; + + return $output; + } +} diff --git a/src/FireflyImporter.php b/src/FireflyImporter.php new file mode 100644 index 0000000..38c08fc --- /dev/null +++ b/src/FireflyImporter.php @@ -0,0 +1,317 @@ + + * - VANITY_URL (optional) + * + * INTEGRATION IN config.yaml: + * --------------------------- + * + * fireflyImport: + * # Pfad zur JSON-Konfiguration (aus Firefly III exportiert) + * jsonConfig: '/opt/firefly/configs/ubs-import.json' + * + * # Firefly Data Importer Kommando + * # Option 1: Docker + * importerCommand: 'docker exec -it firefly-importer php artisan importer:import' + * + * # Option 2: Standalone + * # importerCommand: 'cd /opt/firefly-data-importer && php artisan importer:import' + * + * # Automatisch nach Transformation importieren? + * autoImport: true + * + * # Output-Datei nach erfolgreichem Import löschen? + * deleteAfterImport: true + * + * # Timeout für Import (Sekunden) + * timeout: 300 + * + * # Environment-Variablen für Firefly Data Importer + * environment: + * FIREFLY_III_URL: 'https://your-firefly.com' + * FIREFLY_III_ACCESS_TOKEN: 'your-token-here' + * + * VERWENDUNG: + * ----------- + * + * // Automatisch beim Auto-Import + * ./bin/transformer auto-import config/config.yaml + * + * // Oder manuell nach Transformation + * $importer = new FireflyImporter($config['fireflyImport']); + * $result = $importer->import('/path/to/transformed.csv'); + */ +class FireflyImporter +{ + private array $config; + private string $jsonConfigPath; + private string $importerCommand; + private bool $deleteAfterImport; + private array $environment; + + /** + * @param array $config Firefly Import-Konfiguration aus config.yaml + * @throws \RuntimeException wenn Konfiguration ungültig + */ + public function __construct(array $config) + { + $this->config = $config; + + // JSON-Konfigurationspfad validieren + $this->jsonConfigPath = $config['jsonConfig'] ?? ''; + if (empty($this->jsonConfigPath)) { + throw new \RuntimeException("Firefly Import: 'jsonConfig' nicht konfiguriert"); + } + + if (!file_exists($this->jsonConfigPath)) { + throw new \RuntimeException("Firefly JSON-Konfiguration nicht gefunden: {$this->jsonConfigPath}"); + } + + // Importer-Kommando + $this->importerCommand = $config['importerCommand'] ?? ''; + if (empty($this->importerCommand)) { + throw new \RuntimeException("Firefly Import: 'importerCommand' nicht konfiguriert"); + } + + // Optionale Einstellungen + $this->deleteAfterImport = $config['deleteAfterImport'] ?? false; + $this->environment = $config['environment'] ?? []; + } + + /** + * Importiert eine transformierte CSV-Datei in Firefly III + * + * Der Import erfolgt über den Firefly III Data Importer CLI: + * php artisan importer:import + * + * @param string $csvFile Pfad zur transformierten CSV-Datei + * @return array Import-Ergebnis mit Status und Ausgabe + */ + public function import(string $csvFile): array + { + if (!file_exists($csvFile)) { + return [ + 'success' => false, + 'error' => "CSV-Datei nicht gefunden: {$csvFile}", + 'output' => '', + 'exit_code' => -1 + ]; + } + + // Kommando zusammenbauen + $command = $this->buildImportCommand($csvFile); + + // Environment-Variablen setzen + $env = $this->buildEnvironment(); + + // Import ausführen + $output = []; + $exitCode = 0; + + $startTime = microtime(true); + + try { + // Kommando ausführen mit Timeout + $descriptors = [ + 0 => ["pipe", "r"], // stdin + 1 => ["pipe", "w"], // stdout + 2 => ["pipe", "w"] // stderr + ]; + + $process = proc_open($command, $descriptors, $pipes, null, $env); + + if (!is_resource($process)) { + throw new \RuntimeException("Konnte Import-Prozess nicht starten"); + } + + // stdin schließen + fclose($pipes[0]); + + // stdout und stderr lesen + $stdout = stream_get_contents($pipes[1]); + $stderr = stream_get_contents($pipes[2]); + + fclose($pipes[1]); + fclose($pipes[2]); + + // Auf Prozess-Ende warten + $exitCode = proc_close($process); + + $output = [ + 'stdout' => $stdout, + 'stderr' => $stderr + ]; + + $duration = microtime(true) - $startTime; + + $success = ($exitCode === 0); + + // Bei Erfolg: Optional CSV-Datei löschen + if ($success && $this->deleteAfterImport) { + @unlink($csvFile); + } + + return [ + 'success' => $success, + 'exit_code' => $exitCode, + 'output' => $output, + 'duration' => round($duration, 2), + 'csv_file' => $csvFile, + 'config_file' => $this->jsonConfigPath, + 'deleted' => ($success && $this->deleteAfterImport) + ]; + } catch (\Exception $e) { + return [ + 'success' => false, + 'error' => $e->getMessage(), + 'output' => $output, + 'exit_code' => $exitCode + ]; + } + } + + /** + * Baut das Import-Kommando zusammen + * + * @param string $csvFile Pfad zur CSV-Datei + * @return string Vollständiges Kommando + */ + private function buildImportCommand(string $csvFile): string + { + // Firefly Data Importer CLI-Format: + // php artisan importer:import + + return sprintf( + '%s %s %s', + $this->importerCommand, + escapeshellarg($csvFile), + escapeshellarg($this->jsonConfigPath) + ); + } + + /** + * Baut Environment-Variablen zusammen + * + * @return array|null Environment-Variablen oder null + */ + private function buildEnvironment(): ?array + { + if (empty($this->environment)) { + return null; + } + + // Aktuelle Environment übernehmen und mit Custom-Vars erweitern + $env = $_ENV; + + foreach ($this->environment as $key => $value) { + $env[$key] = $value; + } + + return $env; + } + + /** + * Testet die Firefly-Verbindung + * + * @return array Test-Ergebnis + */ + public function testConnection(): array + { + // Test ob Importer-Kommando verfügbar ist + $testCommand = str_replace('importer:import', '--version', $this->importerCommand); + + exec($testCommand . ' 2>&1', $output, $exitCode); + + return [ + 'available' => ($exitCode === 0), + 'output' => implode("\n", $output), + 'exit_code' => $exitCode + ]; + } + + /** + * Validiert die JSON-Konfiguration + * + * @return array Validierungsergebnis + */ + public function validateConfig(): array + { + if (!file_exists($this->jsonConfigPath)) { + return [ + 'valid' => false, + 'error' => 'JSON-Konfiguration nicht gefunden' + ]; + } + + $json = file_get_contents($this->jsonConfigPath); + if ($json === false) { + return [ + 'valid' => false, + 'error' => 'Konfigurationsdatei nicht lesbar' + ]; + } + $config = json_decode($json, true); + + if ($config === null) { + return [ + 'valid' => false, + 'error' => 'Ungültiges JSON: ' . json_last_error_msg() + ]; + } + + // Prüfe erforderliche Felder in Firefly-Config + $requiredFields = ['file_type', 'import_account']; + $missingFields = []; + + foreach ($requiredFields as $field) { + if (!isset($config[$field])) { + $missingFields[] = $field; + } + } + + if (!empty($missingFields)) { + return [ + 'valid' => false, + 'error' => 'Fehlende Felder: ' . implode(', ', $missingFields) + ]; + } + + return [ + 'valid' => true, + 'config' => $config + ]; + } + + /** + * Gibt die Konfiguration zurück + * + * @return array Firefly Import-Konfiguration + */ + public function getConfig(): array + { + return $this->config; + } +} diff --git a/src/MetadataExtractor.php b/src/MetadataExtractor.php new file mode 100644 index 0000000..6f09949 --- /dev/null +++ b/src/MetadataExtractor.php @@ -0,0 +1,126 @@ +rules = $rules; + } + + /** + * Extrahiert Metadaten aus den übergebenen Zeilen + * + * @param array $lines Array von Zeilen aus dem CSV-Header + * @return array Extrahierte Metadaten + */ + public function extract(array $lines): array + { + $metadata = []; + + foreach ($this->rules as $rule) { + // Validiere erforderliche Felder + if (empty($rule['name']) || empty($rule['regex'])) { + continue; + } + + $ruleName = $rule['name']; + $lineNumber = $rule['lineNumber'] ?? 1; + $regex = $rule['regex']; + + // ✅ KORRIGIERT: Off-by-One Fix + // config.json: "lineNumber": 1, 2, 3 (1-basiert, für Menschen lesbar) + // PHP Arrays: $lines[0], $lines[1], $lines[2] (0-basiert) + // Konvertierung: arrayIndex = lineNumber - 1 + $arrayIndex = $lineNumber - 1; + + // Prüfe ob Zeile existiert + if (!isset($lines[$arrayIndex])) { + // Zeile existiert nicht - Debug-Info für Support + DebugLogger::log('metadata_warning', "Extraction rule not found", [ + 'rule_name' => $ruleName, + 'expected_lineNumber' => $lineNumber, + 'array_index' => $arrayIndex, + 'available_lines' => count($lines) + ]); + continue; + } + + $line = $lines[$arrayIndex]; + + // Regex mit '#' als Delimiter (erlaubt '/' in User-Patterns); '#' im Pattern escapen + $pattern = '#' . str_replace('#', '\#', $regex) . '#u'; + $matchResult = @preg_match_all($pattern, $line, $matches); + if ($matchResult === false) { + DebugLogger::log('metadata_error', "Invalid regex pattern", [ + 'rule_name' => $ruleName, + 'pattern' => $regex, + ]); + continue; + } + if ($matchResult === 0) { + // Regex matched nicht auf dieser Zeile + DebugLogger::log('metadata_warning', "Regex did not match", [ + 'rule_name' => $ruleName, + 'lineNumber' => $lineNumber, + 'regex_pattern' => $regex, + 'line_content' => substr($line, 0, 100) + ]); + continue; + } + + // ✅ KORRIGIERT: captureGroup benutzen + // captureGroup definiert welche Klammer-Gruppe extrahiert wird + // 0 = komplette Match + // 1 = erste Klammer-Gruppe (...) + // 2 = zweite Klammer-Gruppe, etc. + $captureGroup = isset($rule['captureGroup']) ? intval($rule['captureGroup']) : 1; + + // Sicherstellen dass die Capture Group existiert + if (!isset($matches[$captureGroup]) || empty($matches[$captureGroup])) { + // Fallback: Nutze komplette Match wenn Gruppe nicht existiert + $metadata[$ruleName] = $matches[0][0] ?? ''; + // echo "DEBUG: extraction_rule '{$ruleName}' - captureGroup {$captureGroup} not found, falling back to complete match\n"; + } else { + // Nutze die spezifische Capture Group + $metadata[$ruleName] = $matches[$captureGroup][0] ?? ''; + } + + DebugLogger::log('metadata', "Extraction rule applied", [ + 'rule_name' => $ruleName, + 'value' => $metadata[$ruleName] ?? null, + ]); + } + + return $metadata; + } + + /** + * Gibt die Anzahl der definierten Extraction-Rules zurück + * + * @return int Anzahl Rules + */ + public function getRuleCount(): int + { + return count($this->rules); + } + + /** + * Gibt alle definierten Extraction-Rules zurück + * + * @return array Die Rules + */ + public function getRules(): array + { + return $this->rules; + } +} diff --git a/src/TransformerEngine.php b/src/TransformerEngine.php new file mode 100644 index 0000000..4b7276d --- /dev/null +++ b/src/TransformerEngine.php @@ -0,0 +1,368 @@ +configLoader = $configLoader; + $this->debugMode = $debugMode; + + $config = $configLoader->getAll(); + + $this->csvStructure = $config['csvStructure'] ?? []; + + $this->metadataExtractor = new MetadataExtractor( + $config['metadata']['extractionRules'] ?? [] + ); + + $this->columnTransformer = new ColumnTransformer( + $config['columnTransformations'] ?? [], + [], + $config['capitalizationExceptions'] ?? [] + ); + + // Bestimme Output-Dateiname aus Konfiguration + $outputDir = $config['directories']['output'] ?? './output'; + $outputFileName = $config['csvStructure']['outputFilename'] ?? 'transformed.csv'; + $outputFile = rtrim($outputDir, '/') . '/' . $outputFileName; + + $this->csvWriter = new CsvWriter( + $outputFile, + $config['csvStructure'] ?? [] + ); + } + + /** + * Aktiviert oder deaktiviert den Debug-Modus + * + * @param bool $enabled true = Debug-Modus aktiviert + * @return void + */ + public function setDebugMode(bool $enabled): void + { + $this->debugMode = $enabled; + if ($enabled) { + DebugLogger::enable(); + } else { + DebugLogger::disable(); + } + } + + /** + * Transformiert eine CSV-Datei + * + * Führt folgende Schritte durch: + * 1. CSV-Datei einlesen mit CsvReader + * 2. Metadaten aus Header extrahieren + * 3. Spalten gemäß Konfiguration transformieren + * 4. Daten in Output-CSV schreiben + * 5. Beispiel-Daten sammeln (maximal 3 Zeilen oder maxRows) + * + * Der Output-Dateipfad wird aus der Konfiguration bestimmt und kann nicht überschrieben werden. + * + * @param string $inputFile Pfad zur Input-CSV-Datei + * @param int $maxRows Maximale Anzahl Datenzeilen zu transformieren (0 = alle). + * Beispiel-Daten werden begrenzt auf min(3, maxRows) + * + * @return array Transformations-Ergebnis mit: + * - success: bool (true = erfolgreich, false = Fehler) + * - inputFile: string (Input-Dateipfad, nur bei Erfolg) + * - outputFile: string (Output-Dateipfad, nur bei Erfolg) + * - rowsProcessed: int (tatsächlich verarbeitete Datenzeilen) + * - sampleData: array (Erste Beispiel-Zeilen, max 3 oder maxRows) + * - metadata: array (Extrahierte Metadaten, nur bei Erfolg) + * - outputColumns: int (Anzahl Output-Spalten) + * - error: string (Fehlermeldung, nur bei Fehler) + */ + public function transform(string $inputFile, int $maxRows = 0): array + { + $this->sampleData = []; + $this->rowsProcessed = 0; + DebugLogger::reset(); + + try { + if ($this->debugMode) { + DebugLogger::log('transformer', 'Transformation started', [ + 'inputFile' => $inputFile, + 'maxRows' => $maxRows + ]); + } + + // Validiere Input-Datei + if (!file_exists($inputFile)) { + throw new \RuntimeException("Input-Datei nicht gefunden: {$inputFile}"); + } + + // Initialisiere CsvReader mit Dateipfad und Konfiguration + $csvReader = new CsvReader($inputFile, $this->csvStructure); + + // Lese Metadaten-Zeilen (vor der Header-Zeile) + $metadataLines = $csvReader->readMetadataLines(); + + // Extrahiere Metadaten aus den Metadaten-Zeilen + $metadata = $this->metadataExtractor->extract($metadataLines); + + // Initialisiere ColumnTransformer mit extrahierten Metadaten + $this->columnTransformer = new ColumnTransformer( + $this->configLoader->get('columnTransformations', []), + $metadata, + $this->configLoader->get('capitalizationExceptions', []) + ); + + // Lese CSV-Daten mit Header-Keys als Array-Keys + $dataRows = $csvReader->readCsvData($maxRows); + if (empty($dataRows)) { + throw new \RuntimeException("Keine Datenzeilen in CSV-Datei"); + } + + // Berechne Limit für Beispiel-Daten + $sampleLimit = $maxRows == 0 ? 3 : $maxRows; + + // Transformiere Zeilen und sammle sie + $transformedData = []; + + foreach ($dataRows as $row) { + // Prüfe ob maxRows erreicht + if ($maxRows > 0 && $this->rowsProcessed >= $maxRows) { + break; + } + + // Transformiere Zeile + $transformedRow = $this->columnTransformer->transformRow($row); + $transformedData[] = $transformedRow; + + // Speichere Beispiel-Daten + if (count($this->sampleData) < $sampleLimit) { + $this->sampleData[] = $transformedRow; + } + + $this->rowsProcessed++; + } + + // Entferne Spalten die aus dem Output ausgeschlossen werden sollen + $excludeColumns = $this->csvStructure['excludeOutputColumns'] ?? []; + if (!empty($excludeColumns)) { + $excludeMap = array_flip($excludeColumns); + $transformedData = array_map( + static fn(array $row): array => array_diff_key($row, $excludeMap), + $transformedData + ); + $this->sampleData = array_map( + static fn(array $row): array => array_diff_key($row, $excludeMap), + $this->sampleData + ); + } + + // Schreibe alle transformierten Daten in Output-CSV + $this->csvWriter->write($transformedData); + + $result = [ + 'success' => true, + 'inputFile' => $inputFile, + 'outputFile' => $this->csvWriter->getOutputFile(), + 'rowsProcessed' => $this->rowsProcessed, + 'sampleData' => $this->sampleData, + 'metadata' => $metadata, + 'outputColumns' => $this->columnTransformer->getOutputColumns(), + ]; + + if ($this->debugMode) { + $result['debug_logs'] = DebugLogger::getLogs(); + } + + return $result; + } catch (\Exception $e) { + return [ + 'success' => false, + 'error' => $e->getMessage(), + 'rowsProcessed' => $this->rowsProcessed, + 'sampleData' => $this->sampleData, + 'outputColumns' => [], + ]; + } + } + + /** + * Transformiert und importiert CSV in Firefly III + * + * Führt Transformation durch und importiert die Ausgabe-Datei + * in Firefly III wenn in der Konfiguration aktiviert. + * + * Rückwärts-kompatibel mit legacy Signatur. + * + * @param string $inputFile Pfad zur Input-CSV-Datei + * @param int $maxRows Maximale Anzahl Datenzeilen zu verarbeiten (0 = alle) + * + * @return array Transformations- und Import-Ergebnis mit: + * - success: bool (true = transformation erfolgreich) + * - inputFile: string + * - outputFile: string + * - rowsProcessed: int + * - sampleData: array + * - metadata: array + * - outputColumns: int + * - import: array (Firefly Import-Ergebnis, wenn autoImport aktiv) + * - error: string (falls Fehler) + */ + public function transformAndImport(string $inputFile, int $maxRows = 0): array + { + // Zuerst transformieren + $transformResult = $this->transform($inputFile, $maxRows); + + if (!$transformResult['success']) { + return $transformResult; + } + + // Prüfe ob Auto-Import in Konfiguration aktiviert ist + $fireflyConfig = $this->configLoader->get('fireflyImport', []); + if (empty($fireflyConfig['autoImport'])) { + return $transformResult; + } + + // Führe Firefly-Import durch + try { + $importer = new FireflyImporter($fireflyConfig); + $importResult = $importer->import($transformResult['outputFile']); + $transformResult['import'] = $importResult; + + return $transformResult; + } catch (\Exception $e) { + $transformResult['import'] = [ + 'success' => false, + 'error' => $e->getMessage(), + ]; + return $transformResult; + } + } + + /** + * Validiert eine CSV-Datei gegen die Konfiguration + * + * Prüft ob erforderliche Metadaten vorhanden sind + * und ob die CSV-Struktur der Konfiguration entspricht. + * + * @param string $inputFile Pfad zur zu validierenden CSV-Datei + * + * @return array Validierungs-Ergebnis mit: + * - valid: bool (true = Validierung erfolgreich) + * - metadata: array (Extrahierte Metadaten, wenn valid) + * - line_count: int (Gesamtzahl Zeilen, wenn valid) + * - error: string (Fehlermeldung, wenn nicht valid) + * - metadata_found: array (Gefundene Metadaten trotz Fehler) + */ + public function validate(string $inputFile): array + { + try { + if (!file_exists($inputFile)) { + return [ + 'valid' => false, + 'error' => "Datei nicht gefunden: {$inputFile}", + ]; + } + + // Initialisiere CsvReader mit Dateipfad + $csvReader = new CsvReader($inputFile, $this->csvStructure); + + // Extrahiere Metadaten-Zeilen (vor der Header-Zeile) + $metadataLines = $csvReader->readMetadataLines(); + $metadata = $this->metadataExtractor->extract($metadataLines); + + // Prüfe auf erforderliche Metadaten + $requiredMetadata = [ + 'account_iban', + 'currency_code', + ]; + + $missingMetadata = []; + foreach ($requiredMetadata as $key) { + if (empty($metadata[$key])) { + $missingMetadata[] = $key; + } + } + + if (!empty($missingMetadata)) { + return [ + 'valid' => false, + 'error' => 'Fehlende Metadaten: ' . implode(', ', $missingMetadata), + 'metadata_found' => $metadata, + ]; + } + + // Zähle Gesamtzahl Zeilen + $lineCount = $csvReader->countLines(); + + return [ + 'valid' => true, + 'metadata' => $metadata, + 'line_count' => $lineCount, + ]; + } catch (\Exception $e) { + return [ + 'valid' => false, + 'error' => 'Validierungs-Fehler: ' . $e->getMessage(), + ]; + } + } + + /** + * Gibt die gesammelten Beispiel-Daten zurück + * + * @return array Beispiel-Daten (maximal 3 oder maxRows Zeilen) + */ + public function getSampleData(): array + { + return $this->sampleData; + } + + /** + * Gibt die Anzahl verarbeiteter Datenzeilen zurück + * + * @return int Anzahl transformierter Zeilen + */ + public function getRowsProcessed(): int + { + return $this->rowsProcessed; + } +} diff --git a/tests/ColumnTransformerTest.php b/tests/ColumnTransformerTest.php new file mode 100644 index 0000000..d8c3c73 --- /dev/null +++ b/tests/ColumnTransformerTest.php @@ -0,0 +1,507 @@ +transformRow($row); + } + + // ------------------------------------------------------------------------- + // map + // ------------------------------------------------------------------------- + + public function testMapPassthrough(): void + { + $result = $this->applyOne( + ['sourceColumn' => 'Name', 'outputColumn' => 'Name', 'type' => 'map'], + ['Name' => 'Alice'] + ); + $this->assertSame('Alice', $result['Name']); + } + + // ------------------------------------------------------------------------- + // replace + // ------------------------------------------------------------------------- + + public function testReplace(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'replace', + 'search' => 'foo', + 'replace' => 'bar', + ], ['Col' => 'foo baz foo']); + $this->assertSame('bar baz bar', $result['Col']); + } + + public function testReplaceEmptySearchReturnsOriginal(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'replace', + 'search' => '', + 'replace' => 'bar', + ], ['Col' => 'hello']); + $this->assertSame('hello', $result['Col']); + } + + // ------------------------------------------------------------------------- + // dateformat + // ------------------------------------------------------------------------- + + public function testDateFormat(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Date', + 'outputColumn' => 'Date', + 'type' => 'dateformat', + 'fromFormat' => 'd.m.Y', + 'toFormat' => 'Y-m-d', + ], ['Date' => '15.03.2024']); + $this->assertSame('2024-03-15', $result['Date']); + } + + public function testDateFormatInvalidValueReturnsOriginal(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Date', + 'outputColumn' => 'Date', + 'type' => 'dateformat', + 'fromFormat' => 'd.m.Y', + 'toFormat' => 'Y-m-d', + ], ['Date' => 'not-a-date']); + $this->assertSame('not-a-date', $result['Date']); + } + + public function testDateFormatEmptyValueReturnsEmpty(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Date', + 'outputColumn' => 'Date', + 'type' => 'dateformat', + 'fromFormat' => 'd.m.Y', + 'toFormat' => 'Y-m-d', + ], ['Date' => '']); + $this->assertSame('', $result['Date']); + } + + // ------------------------------------------------------------------------- + // split + // ------------------------------------------------------------------------- + + public function testSplitPart0(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'split', + 'delimiter' => ';', + 'part' => 0, + ], ['Col' => 'Coop Pronto;7007 Chur']); + $this->assertSame('Coop Pronto', $result['Col']); + } + + public function testSplitPart1(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'split', + 'delimiter' => ';', + 'part' => 1, + ], ['Col' => 'Coop Pronto;7007 Chur']); + $this->assertSame('7007 Chur', $result['Col']); + } + + public function testSplitPartOutOfBoundsReturnsOriginal(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'split', + 'delimiter' => ';', + 'part' => 5, + ], ['Col' => 'A;B']); + $this->assertSame('A;B', $result['Col']); + } + + // ------------------------------------------------------------------------- + // regexextract + // ------------------------------------------------------------------------- + + public function testRegexExtract(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Zip', + 'type' => 'regexextract', + 'pattern' => '(\d{4})', + ], ['Col' => 'Shop 7007 Chur', 'Zip' => '']); + $this->assertSame('7007', $result['Zip']); + } + + public function testRegexExtractNoMatchReturnsEmpty(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Zip', + 'type' => 'regexextract', + 'pattern' => '(\d{4})', + ], ['Col' => 'No digits here', 'Zip' => '']); + $this->assertSame('', $result['Zip']); + } + + public function testRegexExtractEmptyValueReturnsEmpty(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Zip', + 'type' => 'regexextract', + 'pattern' => '(\d{4})', + ], ['Col' => '', 'Zip' => '']); + $this->assertSame('', $result['Zip']); + } + + // ------------------------------------------------------------------------- + // trim + // ------------------------------------------------------------------------- + + public function testTrim(): void + { + $result = $this->applyOne( + ['sourceColumn' => 'Col', 'outputColumn' => 'Col', 'type' => 'trim'], + ['Col' => ' hello world '] + ); + $this->assertSame('hello world', $result['Col']); + } + + // ------------------------------------------------------------------------- + // uppercase + // ------------------------------------------------------------------------- + + public function testUppercase(): void + { + $result = $this->applyOne( + ['sourceColumn' => 'Col', 'outputColumn' => 'Col', 'type' => 'uppercase'], + ['Col' => 'Hello World'] + ); + $this->assertSame('HELLO WORLD', $result['Col']); + } + + public function testUppercaseUnicode(): void + { + $result = $this->applyOne( + ['sourceColumn' => 'Col', 'outputColumn' => 'Col', 'type' => 'uppercase'], + ['Col' => 'zürich'] + ); + $this->assertSame('ZÜRICH', $result['Col']); + } + + // ------------------------------------------------------------------------- + // lowercase + // ------------------------------------------------------------------------- + + public function testLowercase(): void + { + $result = $this->applyOne( + ['sourceColumn' => 'Col', 'outputColumn' => 'Col', 'type' => 'lowercase'], + ['Col' => 'Hello World'] + ); + $this->assertSame('hello world', $result['Col']); + } + + // ------------------------------------------------------------------------- + // ucwordsfirst + // ------------------------------------------------------------------------- + + public function testUcwordsFirst(): void + { + $result = $this->applyOne( + ['sourceColumn' => 'Col', 'outputColumn' => 'Col', 'type' => 'ucwordsfirst'], + ['Col' => 'COOP PRONTO CHUR'] + ); + $this->assertSame('Coop Pronto Chur', $result['Col']); + } + + public function testUcwordsFirstHyphen(): void + { + $result = $this->applyOne( + ['sourceColumn' => 'Col', 'outputColumn' => 'Col', 'type' => 'ucwordsfirst'], + ['Col' => 'SAINT-JEAN-DE-MAURIENNE'] + ); + $this->assertSame('Saint-Jean-De-Maurienne', $result['Col']); + } + + public function testUcwordsFirstApostrophe(): void + { + $result = $this->applyOne( + ['sourceColumn' => 'Col', 'outputColumn' => 'Col', 'type' => 'ucwordsfirst'], + ['Col' => "O'NEILL STORE"] + ); + $this->assertSame("O'Neill Store", $result['Col']); + } + + // ------------------------------------------------------------------------- + // truncate + // ------------------------------------------------------------------------- + + public function testTruncate(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'truncate', + 'maxLength' => 5, + ], ['Col' => 'Hello World']); + $this->assertSame('Hello', $result['Col']); + } + + public function testTruncateShorterThanMaxIsUnchanged(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'truncate', + 'maxLength' => 100, + ], ['Col' => 'Short']); + $this->assertSame('Short', $result['Col']); + } + + public function testTruncateUnicode(): void + { + // 'ü' counts as 1 Unicode character, so maxLength=3 gives 3 chars: Z, ü, r + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'truncate', + 'maxLength' => 3, + ], ['Col' => 'Zürich']); + $this->assertSame('Zür', $result['Col']); + } + + // ------------------------------------------------------------------------- + // constantvalue + // ------------------------------------------------------------------------- + + public function testConstantValue(): void + { + $transformer = new ColumnTransformer([[ + 'sourceColumn' => '_constant_', + 'outputColumn' => 'Currency', + 'type' => 'constantvalue', + 'metadataKey' => 'currency_code', + ]], ['currency_code' => 'CHF']); + $result = $transformer->transformRow(['Currency' => '']); + $this->assertSame('CHF', $result['Currency']); + } + + public function testConstantValueMissingKeyReturnsEmpty(): void + { + $transformer = new ColumnTransformer([[ + 'sourceColumn' => '_constant_', + 'outputColumn' => 'Currency', + 'type' => 'constantvalue', + 'metadataKey' => 'nonexistent', + ]], []); + $result = $transformer->transformRow(['Currency' => '']); + $this->assertSame('', $result['Currency']); + } + + // ------------------------------------------------------------------------- + // pipeline + // ------------------------------------------------------------------------- + + public function testPipeline(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'pipeline', + 'steps' => [ + ['type' => 'trim'], + ['type' => 'lowercase'], + ['type' => 'ucwordsfirst'], + ], + ], ['Col' => ' COOP PRONTO ']); + $this->assertSame('Coop Pronto', $result['Col']); + } + + public function testPipelineEmptyStepsReturnsOriginal(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'pipeline', + 'steps' => [], + ], ['Col' => 'hello']); + $this->assertSame('hello', $result['Col']); + } + + // ------------------------------------------------------------------------- + // Inline transformations[] array (flat pipeline per column entry) + // ------------------------------------------------------------------------- + + public function testInlineTransformationsArray(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'map', + 'transformations' => [ + ['type' => 'trim'], + ['type' => 'uppercase'], + ], + ], ['Col' => ' hello ']); + $this->assertSame('HELLO', $result['Col']); + } + + // ------------------------------------------------------------------------- + // normalizeTransformType: snake_case and kebab-case aliases + // ------------------------------------------------------------------------- + + public function testNormalizeTypeSnakeCase(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Date', + 'outputColumn' => 'Date', + 'type' => 'date_format', + 'fromFormat' => 'd.m.Y', + 'toFormat' => 'Y-m-d', + ], ['Date' => '15.03.2024']); + $this->assertSame('2024-03-15', $result['Date']); + } + + public function testNormalizeTypeKebabCase(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Col', + 'outputColumn' => 'Col', + 'type' => 'ucwords-first', + ], ['Col' => 'HELLO WORLD']); + $this->assertSame('Hello World', $result['Col']); + } + + // ------------------------------------------------------------------------- + // outputAction + // ------------------------------------------------------------------------- + + public function testOutputActionOverwrite(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'overwrite', + ], ['A' => 'new', 'B' => 'old']); + $this->assertSame('new', $result['B']); + } + + public function testOutputActionCreate(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'NewCol', + 'type' => 'map', + 'outputAction' => 'create', + ], ['A' => 'hello']); + $this->assertSame('hello', $result['NewCol']); + } + + public function testOutputActionAppend(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'append', + ], ['A' => ' World', 'B' => 'Hello']); + $this->assertSame('Hello World', $result['B']); + } + + // ------------------------------------------------------------------------- + // multi-output split + // ------------------------------------------------------------------------- + + public function testMultiOutputSplit(): void + { + $transformer = new ColumnTransformer([[ + 'outputs' => ['FirstName', 'LastName'], + 'sourceColumn' => 'FullName', + 'type' => 'split', + 'delimiter' => ' ', + ]]); + $result = $transformer->transformRow(['FullName' => 'John Doe']); + $this->assertSame('John', $result['FirstName']); + $this->assertSame('Doe', $result['LastName']); + } + + public function testMultiOutputSplitFewerPartsYieldsEmptyString(): void + { + $transformer = new ColumnTransformer([[ + 'outputs' => ['Col1', 'Col2', 'Col3'], + 'sourceColumn' => 'Source', + 'type' => 'split', + 'delimiter' => ';', + ]]); + $result = $transformer->transformRow(['Source' => 'A;B']); + $this->assertSame('A', $result['Col1']); + $this->assertSame('B', $result['Col2']); + $this->assertSame('', $result['Col3']); + } + + // ------------------------------------------------------------------------- + // Error cases + // ------------------------------------------------------------------------- + + public function testMissingOutputColumnThrows(): void + { + $this->expectException(\RuntimeException::class); + $transformer = new ColumnTransformer([ + ['sourceColumn' => 'A', 'type' => 'map'], + ]); + $transformer->transformRow(['A' => 'x']); + } + + public function testMultiOutputNonSplitTypeThrows(): void + { + $this->expectException(\RuntimeException::class); + $transformer = new ColumnTransformer([[ + 'outputs' => ['Col1', 'Col2'], + 'sourceColumn' => 'Source', + 'type' => 'uppercase', + ]]); + $transformer->transformRow(['Source' => 'hello']); + } + + // ------------------------------------------------------------------------- + // getOutputColumns + // ------------------------------------------------------------------------- + + public function testGetOutputColumnsCountsUniqueColumns(): void + { + $transformer = new ColumnTransformer([ + ['sourceColumn' => 'A', 'outputColumn' => 'X', 'type' => 'map'], + ['sourceColumn' => 'B', 'outputColumn' => 'Y', 'type' => 'map'], + ['sourceColumn' => 'C', 'outputColumn' => 'X', 'type' => 'map'], // duplicate output + ]); + $transformer->transformRow(['A' => '1', 'B' => '2', 'C' => '3']); + $this->assertSame(2, $transformer->getOutputColumns()); + } +} diff --git a/tests/ConfigIntegrationTest.php b/tests/ConfigIntegrationTest.php new file mode 100644 index 0000000..6af6a2e --- /dev/null +++ b/tests/ConfigIntegrationTest.php @@ -0,0 +1,130 @@ +/: + * - input.csv → minimaler CSV-Input passend zur Konfiguration + * - expected.csv → erwartete Ausgabe nach Transformation + * + * Neue Fixtures hinzufügen: + * 1. Verzeichnis tests/fixtures// anlegen + * 2. input.csv und expected.csv ablegen + * 3. sicherstellen dass config/.json existiert + */ +class ConfigIntegrationTest extends TestCase +{ + /** @var string Temporäres Output-Verzeichnis für diesen Test-Lauf */ + private string $tempOutputDir = ''; + + /** @var string Temporäre Konfigurationsdatei mit überschriebenem Output-Pfad */ + private string $tempConfigFile = ''; + + protected function setUp(): void + { + $this->tempOutputDir = sys_get_temp_dir() . '/ubscsv_test_' . uniqid('', true); + mkdir($this->tempOutputDir, 0755, true); + } + + protected function tearDown(): void + { + if ($this->tempOutputDir !== '' && is_dir($this->tempOutputDir)) { + foreach (glob($this->tempOutputDir . '/*') ?: [] as $file) { + unlink($file); + } + rmdir($this->tempOutputDir); + } + + if ($this->tempConfigFile !== '' && file_exists($this->tempConfigFile)) { + unlink($this->tempConfigFile); + } + } + + /** + * Liefert alle Fixture-Paare als DataProvider + * + * Entdeckt automatisch alle Unterverzeichnisse in tests/fixtures/ + * und erwartet für jedes eine passende config/.json. + * + * @return array + */ + public static function fixtureProvider(): array + { + $fixtureBase = __DIR__ . '/fixtures'; + $dirs = glob($fixtureBase . '/*', GLOB_ONLYDIR); + $cases = []; + + foreach ($dirs ?: [] as $dir) { + $configName = basename($dir); + $cases[$configName] = [ + 'configName' => $configName, + 'fixtureDir' => $dir, + ]; + } + + return $cases; + } + + /** + * Prüft dass eine Konfigurationsdatei mit dem Fixture-Input die erwartete Ausgabe erzeugt + * + * @dataProvider fixtureProvider + */ + public function testConfigProducesExpectedOutput(string $configName, string $fixtureDir): void + { + $realConfigPath = __DIR__ . '/../config/' . $configName . '.json'; + $inputFile = $fixtureDir . '/input.csv'; + $expectedFile = $fixtureDir . '/expected.csv'; + + $this->assertFileExists($realConfigPath, "Config-Datei nicht gefunden: {$realConfigPath}"); + $this->assertFileExists($inputFile, "Fixture input.csv fehlt: {$inputFile}"); + $this->assertFileExists($expectedFile, "Fixture expected.csv fehlt: {$expectedFile}"); + + // Temporäre Konfiguration mit überschriebenem Output-Verzeichnis erstellen + $rawConfig = file_get_contents($realConfigPath); + $this->assertNotFalse($rawConfig, "Konfigurationsdatei konnte nicht gelesen werden"); + + /** @var array $configData */ + $configData = json_decode((string) $rawConfig, true); + $this->assertIsArray($configData, "Ungültiges JSON in Konfigurationsdatei"); + + $configData['directories']['output'] = $this->tempOutputDir; + + $this->tempConfigFile = sys_get_temp_dir() . '/ubscsv_config_' . uniqid('', true) . '.json'; + file_put_contents($this->tempConfigFile, json_encode($configData, JSON_UNESCAPED_UNICODE)); + + // Transformation ausführen + $loader = new ConfigurationLoader($this->tempConfigFile); + $loader->load(); + $engine = new TransformerEngine($loader); + $result = $engine->transform($inputFile); + + $this->assertTrue( + $result['success'], + "Transformation fehlgeschlagen: " . ($result['error'] ?? 'unbekannter Fehler') + ); + + $outputFile = $result['outputFile']; + $this->assertFileExists($outputFile, "Output-Datei wurde nicht erstellt: {$outputFile}"); + + $actual = rtrim((string) file_get_contents($outputFile)); + $expected = rtrim((string) file_get_contents($expectedFile)); + + $this->assertSame( + $expected, + $actual, + "CSV-Output stimmt nicht mit expected.csv überein.\n" . + "Zum Aktualisieren: php bin/transformer.php transform {$inputFile} {$realConfigPath} " . + "(Output nach {$expectedFile} kopieren)" + ); + } +} diff --git a/tests/ConfigurationLoaderTest.php b/tests/ConfigurationLoaderTest.php new file mode 100644 index 0000000..a249ac0 --- /dev/null +++ b/tests/ConfigurationLoaderTest.php @@ -0,0 +1,255 @@ +tempFile = sys_get_temp_dir() . '/cfgloader_test_' . uniqid() . '.json'; + } + + protected function tearDown(): void + { + if (file_exists($this->tempFile)) { + unlink($this->tempFile); + } + } + + private function writeJson(array $data): void + { + file_put_contents($this->tempFile, json_encode($data)); + } + + /** + * Minimal valid configuration that passes all validation checks. + */ + private function validConfig(): array + { + return [ + 'metadata' => [ + 'extractionRules' => [ + ['name' => 'account_iban', 'lineNumber' => 1, 'regex' => '(.+)'], + ], + ], + 'csvStructure' => [ + 'headerLine' => 1, + 'inputDelimiter' => ';', + 'encoding' => 'UTF-8', + ], + 'columnTransformations' => [ + ['sourceColumn' => 'A', 'outputColumn' => 'B', 'type' => 'map'], + ], + ]; + } + + // ------------------------------------------------------------------------- + // Happy path + // ------------------------------------------------------------------------- + + public function testLoadValidConfigReturnsArray(): void + { + $this->writeJson($this->validConfig()); + $loader = new ConfigurationLoader($this->tempFile); + $config = $loader->load(); + $this->assertIsArray($config); + $this->assertArrayHasKey('metadata', $config); + $this->assertArrayHasKey('csvStructure', $config); + $this->assertArrayHasKey('columnTransformations', $config); + } + + public function testGetAllReturnsFullConfig(): void + { + $this->writeJson($this->validConfig()); + $loader = new ConfigurationLoader($this->tempFile); + $loader->load(); + $all = $loader->getAll(); + $this->assertSame(1, $all['csvStructure']['headerLine']); + } + + // ------------------------------------------------------------------------- + // get() dot-notation accessor + // ------------------------------------------------------------------------- + + public function testGetDotNotationReturnsValue(): void + { + $this->writeJson($this->validConfig()); + $loader = new ConfigurationLoader($this->tempFile); + $loader->load(); + $this->assertSame(1, $loader->get('csvStructure.headerLine')); + $this->assertSame(';', $loader->get('csvStructure.inputDelimiter')); + } + + public function testGetNonExistentKeyReturnsNull(): void + { + $this->writeJson($this->validConfig()); + $loader = new ConfigurationLoader($this->tempFile); + $loader->load(); + $this->assertNull($loader->get('nonexistent.key')); + } + + public function testGetNonExistentKeyReturnsDefault(): void + { + $this->writeJson($this->validConfig()); + $loader = new ConfigurationLoader($this->tempFile); + $loader->load(); + $this->assertSame('fallback', $loader->get('nonexistent', 'fallback')); + } + + // ------------------------------------------------------------------------- + // File not found / bad extension + // ------------------------------------------------------------------------- + + public function testFileNotFoundThrows(): void + { + $this->expectException(\RuntimeException::class); + $loader = new ConfigurationLoader('/nonexistent/path/config.json'); + $loader->load(); + } + + public function testNonJsonExtensionThrows(): void + { + $yamlPath = sys_get_temp_dir() . '/test_config_' . uniqid() . '.yaml'; + file_put_contents($yamlPath, 'key: value'); + try { + $loader = new ConfigurationLoader($yamlPath); + $this->expectException(\RuntimeException::class); + $loader->load(); + } finally { + if (file_exists($yamlPath)) { + unlink($yamlPath); + } + } + } + + public function testInvalidJsonThrows(): void + { + file_put_contents($this->tempFile, '{invalid json content}'); + $this->expectException(\RuntimeException::class); + (new ConfigurationLoader($this->tempFile))->load(); + } + + // ------------------------------------------------------------------------- + // Required top-level sections + // ------------------------------------------------------------------------- + + public function testMissingMetadataSectionThrows(): void + { + $config = $this->validConfig(); + unset($config['metadata']); + $this->writeJson($config); + $this->expectException(\RuntimeException::class); + (new ConfigurationLoader($this->tempFile))->load(); + } + + public function testMissingExtractionRulesThrows(): void + { + $config = $this->validConfig(); + unset($config['metadata']['extractionRules']); + $this->writeJson($config); + $this->expectException(\RuntimeException::class); + (new ConfigurationLoader($this->tempFile))->load(); + } + + public function testMissingCsvStructureSectionThrows(): void + { + $config = $this->validConfig(); + unset($config['csvStructure']); + $this->writeJson($config); + $this->expectException(\RuntimeException::class); + (new ConfigurationLoader($this->tempFile))->load(); + } + + public function testMissingHeaderLineThrows(): void + { + $config = $this->validConfig(); + unset($config['csvStructure']['headerLine']); + $this->writeJson($config); + $this->expectException(\RuntimeException::class); + (new ConfigurationLoader($this->tempFile))->load(); + } + + public function testMissingColumnTransformationsThrows(): void + { + $config = $this->validConfig(); + unset($config['columnTransformations']); + $this->writeJson($config); + $this->expectException(\RuntimeException::class); + (new ConfigurationLoader($this->tempFile))->load(); + } + + // ------------------------------------------------------------------------- + // csvStructure value validation + // ------------------------------------------------------------------------- + + public function testInvalidEncodingThrows(): void + { + $config = $this->validConfig(); + $config['csvStructure']['encoding'] = 'LATIN1'; + $this->writeJson($config); + $this->expectException(\Exception::class); + (new ConfigurationLoader($this->tempFile))->load(); + } + + public function testSupportedEncodingsPass(): void + { + foreach (['UTF-8', 'ISO-8859-1', 'CP1252'] as $encoding) { + $config = $this->validConfig(); + $config['csvStructure']['encoding'] = $encoding; + $this->writeJson($config); + $loader = new ConfigurationLoader($this->tempFile); + $result = $loader->load(); + $this->assertSame($encoding, $result['csvStructure']['encoding']); + } + } + + // ------------------------------------------------------------------------- + // directories validation + // ------------------------------------------------------------------------- + + public function testDirectoriesAllKeysPass(): void + { + $config = $this->validConfig(); + $config['directories'] = [ + 'source' => '/tmp/source', + 'output' => '/tmp/output', + 'archive' => '/tmp/archive', + 'error' => '/tmp/error', + ]; + $this->writeJson($config); + $result = (new ConfigurationLoader($this->tempFile))->load(); + $this->assertArrayHasKey('directories', $result); + } + + public function testDirectoriesMissingSubkeyThrows(): void + { + $config = $this->validConfig(); + $config['directories'] = [ + 'source' => '/tmp/source', + 'output' => '/tmp/output', + // 'archive' and 'error' intentionally missing + ]; + $this->writeJson($config); + $this->expectException(\RuntimeException::class); + (new ConfigurationLoader($this->tempFile))->load(); + } + + // ------------------------------------------------------------------------- + // Absent optional 'directories' key does not trigger validation + // ------------------------------------------------------------------------- + + public function testAbsentDirectoriesKeyDoesNotThrow(): void + { + $config = $this->validConfig(); + $this->assertArrayNotHasKey('directories', $config); + $this->writeJson($config); + $result = (new ConfigurationLoader($this->tempFile))->load(); + $this->assertArrayNotHasKey('directories', $result); + } +} diff --git a/tests/CsvReaderTest.php b/tests/CsvReaderTest.php new file mode 100644 index 0000000..4c61e3d --- /dev/null +++ b/tests/CsvReaderTest.php @@ -0,0 +1,199 @@ +tempFile = tempnam(sys_get_temp_dir(), 'csvreader_test_'); + } + + protected function tearDown(): void + { + if (file_exists($this->tempFile)) { + unlink($this->tempFile); + } + } + + private function write(string $content): void + { + file_put_contents($this->tempFile, $content); + } + + private function reader(int $headerLine = 1, string $delimiter = ';', bool $hasBom = false): CsvReader + { + return new CsvReader( + $this->tempFile, + ['inputDelimiter' => $delimiter, 'headerLine' => $headerLine, 'hasBom' => $hasBom] + ); + } + + // ------------------------------------------------------------------------- + // readCsvData – basic parsing + // ------------------------------------------------------------------------- + + public function testReadCsvDataSimple(): void + { + $this->write("Name;Age\nAlice;30\nBob;25\n"); + $data = $this->reader()->readCsvData(); + $this->assertCount(2, $data); + $this->assertSame('Alice', $data[0]['Name']); + $this->assertSame('30', $data[0]['Age']); + $this->assertSame('Bob', $data[1]['Name']); + } + + public function testReadCsvDataTrimsWhitespace(): void + { + $this->write("Name ; Age\n Alice ; 30 \n"); + $data = $this->reader()->readCsvData(); + $this->assertSame('Alice', $data[0]['Name']); + $this->assertSame('30', $data[0]['Age']); + } + + // ------------------------------------------------------------------------- + // headerLine offset + // ------------------------------------------------------------------------- + + public function testHeaderLineOffset(): void + { + // Lines 1+2 are metadata, header is line 3 + $this->write("Meta1\nMeta2\nColA;ColB\nval1;val2\n"); + $data = $this->reader(3)->readCsvData(); + $this->assertCount(1, $data); + $this->assertSame('val1', $data[0]['ColA']); + $this->assertSame('val2', $data[0]['ColB']); + } + + // ------------------------------------------------------------------------- + // readMetadataLines + // ------------------------------------------------------------------------- + + public function testReadMetadataLines(): void + { + $this->write("Line1\nLine2\nHeader;Col\nData;Row\n"); + $meta = $this->reader(3)->readMetadataLines(); + $this->assertCount(2, $meta); + $this->assertSame('Line1', $meta[0]); + $this->assertSame('Line2', $meta[1]); + } + + public function testReadMetadataLinesHeaderOnLine1IsEmpty(): void + { + $this->write("Name;Age\nAlice;30\n"); + $meta = $this->reader(1)->readMetadataLines(); + $this->assertSame([], $meta); + } + + public function testReadMetadataLinesHeaderOnLine2ReturnsOneLine(): void + { + $this->write("MetaInfo\nName;Age\nAlice;30\n"); + $meta = $this->reader(2)->readMetadataLines(); + $this->assertCount(1, $meta); + $this->assertSame('MetaInfo', $meta[0]); + } + + // ------------------------------------------------------------------------- + // maxDataRows limit + // ------------------------------------------------------------------------- + + public function testMaxDataRowsLimit(): void + { + $this->write("Name;Age\nAlice;30\nBob;25\nCarol;20\n"); + $data = $this->reader()->readCsvData(2); + $this->assertCount(2, $data); + $this->assertSame('Alice', $data[0]['Name']); + $this->assertSame('Bob', $data[1]['Name']); + } + + public function testMaxDataRowsZeroMeansAll(): void + { + $this->write("Name;Age\nAlice;30\nBob;25\nCarol;20\n"); + $data = $this->reader()->readCsvData(0); + $this->assertCount(3, $data); + } + + // ------------------------------------------------------------------------- + // Empty line skipping + // ------------------------------------------------------------------------- + + public function testEmptyLinesAreSkipped(): void + { + $this->write("Name;Age\nAlice;30\n\nBob;25\n\n"); + $data = $this->reader()->readCsvData(); + $this->assertCount(2, $data); + } + + // ------------------------------------------------------------------------- + // BOM removal + // ------------------------------------------------------------------------- + + public function testBomIsRemovedFromFirstColumnName(): void + { + // UTF-8 BOM followed immediately by the header + $this->write("\xEF\xBB\xBFName;Age\nAlice;30\n"); + $data = $this->reader(1, ';', true)->readCsvData(); + $this->assertCount(1, $data); + // The column must be 'Name', not the BOM-prefixed version + $this->assertArrayHasKey('Name', $data[0]); + $this->assertSame('Alice', $data[0]['Name']); + } + + public function testNoBomFlagLeavesHeaderIntact(): void + { + // If hasBom is false, BOM bytes stay and the key will be mangled — we only + // assert that the clean path (hasBom=true) works, tested above. + // Here we just verify normal CSV without BOM also works when hasBom=false. + $this->write("Name;Age\nAlice;30\n"); + $data = $this->reader(1, ';', false)->readCsvData(); + $this->assertArrayHasKey('Name', $data[0]); + } + + // ------------------------------------------------------------------------- + // getHeaders + // ------------------------------------------------------------------------- + + public function testGetHeaders(): void + { + $this->write("Col1;Col2;Col3\nA;B;C\n"); + $headers = $this->reader()->getHeaders(); + $this->assertSame(['Col1', 'Col2', 'Col3'], $headers); + } + + // ------------------------------------------------------------------------- + // Row with fewer columns than headers + // ------------------------------------------------------------------------- + + public function testShortRowPaddedWithEmpty(): void + { + $this->write("A;B;C\n1;2\n"); + $data = $this->reader()->readCsvData(); + $this->assertCount(1, $data); + $this->assertSame('1', $data[0]['A']); + $this->assertSame('2', $data[0]['B']); + $this->assertSame('', $data[0]['C']); + } + + // ------------------------------------------------------------------------- + // Error cases + // ------------------------------------------------------------------------- + + public function testFileNotFoundThrowsRuntimeException(): void + { + $this->expectException(\RuntimeException::class); + $reader = new CsvReader('/nonexistent/path/file.csv', ['inputDelimiter' => ';', 'headerLine' => 1]); + $reader->readLines(); + } + + public function testHeaderLineBeyondFileLengthThrows(): void + { + $this->write("Name;Age\nAlice;30\n"); + $this->expectException(\RuntimeException::class); + $this->reader(99)->readCsvData(); + } +} diff --git a/tests/MetadataExtractorTest.php b/tests/MetadataExtractorTest.php new file mode 100644 index 0000000..c3514f7 --- /dev/null +++ b/tests/MetadataExtractorTest.php @@ -0,0 +1,204 @@ + 'account_iban', + 'lineNumber' => 1, + 'regex' => '([A-Z]{2}\d{2}[\w]+)', + 'captureGroup' => 1, + ]]); + $result = $extractor->extract(['Account: CH9300762011623852957']); + $this->assertSame('CH9300762011623852957', $result['account_iban']); + } + + public function testExtractFromSecondLine(): void + { + $extractor = new MetadataExtractor([[ + 'name' => 'currency', + 'lineNumber' => 2, + 'regex' => '(CHF|EUR|USD)', + 'captureGroup' => 1, + ]]); + $lines = ['Account info', 'Currency: CHF']; + $result = $extractor->extract($lines); + $this->assertSame('CHF', $result['currency']); + } + + public function testExtractCaptureGroup0ReturnsFullMatch(): void + { + $extractor = new MetadataExtractor([[ + 'name' => 'label', + 'lineNumber' => 1, + 'regex' => 'CHF', + 'captureGroup' => 0, + ]]); + $result = $extractor->extract(['Balance: CHF 1000']); + $this->assertSame('CHF', $result['label']); + } + + public function testExtractMultipleRules(): void + { + $extractor = new MetadataExtractor([ + [ + 'name' => 'iban', + 'lineNumber' => 1, + 'regex' => '([A-Z]{2}\d{2}\w+)', + 'captureGroup' => 1, + ], + [ + 'name' => 'currency', + 'lineNumber' => 2, + 'regex' => '(CHF|EUR)', + 'captureGroup' => 1, + ], + ]); + $result = $extractor->extract(['IBAN: CH9300762011623852957', 'Currency: CHF']); + $this->assertSame('CH9300762011623852957', $result['iban']); + $this->assertSame('CHF', $result['currency']); + } + + // ------------------------------------------------------------------------- + // Line not available + // ------------------------------------------------------------------------- + + public function testExtractMissingLineIsSkipped(): void + { + $extractor = new MetadataExtractor([[ + 'name' => 'account_iban', + 'lineNumber' => 5, + 'regex' => '(CH\d+)', + 'captureGroup' => 1, + ]]); + $result = $extractor->extract(['Only one line']); + $this->assertArrayNotHasKey('account_iban', $result); + } + + // ------------------------------------------------------------------------- + // Invalid regex + // ------------------------------------------------------------------------- + + public function testExtractInvalidRegexDoesNotCrash(): void + { + $extractor = new MetadataExtractor([[ + 'name' => 'bad_rule', + 'lineNumber' => 1, + 'regex' => '[invalid(regex', + 'captureGroup' => 1, + ]]); + $result = $extractor->extract(['some line content']); + $this->assertArrayNotHasKey('bad_rule', $result); + } + + // ------------------------------------------------------------------------- + // No match + // ------------------------------------------------------------------------- + + public function testExtractNoMatchIsSkipped(): void + { + $extractor = new MetadataExtractor([[ + 'name' => 'account_iban', + 'lineNumber' => 1, + 'regex' => '(NOMATCH_\d+)', + 'captureGroup' => 1, + ]]); + $result = $extractor->extract(['Account: CH9300762011623852957']); + $this->assertArrayNotHasKey('account_iban', $result); + } + + // ------------------------------------------------------------------------- + // Edge cases + // ------------------------------------------------------------------------- + + public function testExtractEmptyRulesReturnsEmptyArray(): void + { + $extractor = new MetadataExtractor([]); + $result = $extractor->extract(['some line']); + $this->assertSame([], $result); + } + + public function testExtractEmptyLinesArrayReturnsEmpty(): void + { + $extractor = new MetadataExtractor([[ + 'name' => 'iban', + 'lineNumber' => 1, + 'regex' => '(CH\d+)', + 'captureGroup' => 1, + ]]); + $result = $extractor->extract([]); + $this->assertSame([], $result); + } + + public function testRuleWithMissingNameIsSkipped(): void + { + $extractor = new MetadataExtractor([[ + 'regex' => '(\d+)', + 'lineNumber' => 1, + ]]); + $result = $extractor->extract(['123']); + $this->assertSame([], $result); + } + + public function testRuleWithMissingRegexIsSkipped(): void + { + $extractor = new MetadataExtractor([[ + 'name' => 'test', + 'lineNumber' => 1, + ]]); + $result = $extractor->extract(['some line']); + $this->assertSame([], $result); + } + + // ------------------------------------------------------------------------- + // getRuleCount + // ------------------------------------------------------------------------- + + public function testGetRuleCount(): void + { + $extractor = new MetadataExtractor([ + ['name' => 'a', 'regex' => 'x', 'lineNumber' => 1], + ['name' => 'b', 'regex' => 'y', 'lineNumber' => 2], + ['name' => 'c', 'regex' => 'z', 'lineNumber' => 3], + ]); + $this->assertSame(3, $extractor->getRuleCount()); + } + + public function testGetRuleCountEmptyIsZero(): void + { + $this->assertSame(0, (new MetadataExtractor([]))->getRuleCount()); + } + + // ------------------------------------------------------------------------- + // Regex containing '#' delimiter character + // ------------------------------------------------------------------------- + + public function testRegexContainingHashIsHandled(): void + { + // Pattern contains '#' which is the internal delimiter — must be escaped + $extractor = new MetadataExtractor([[ + 'name' => 'tag', + 'lineNumber' => 1, + 'regex' => '(#\d+)', + 'captureGroup' => 1, + ]]); + $result = $extractor->extract(['Issue #42 resolved']); + $this->assertSame('#42', $result['tag']); + } +} diff --git a/tests/fixtures/config-ubs-account/expected.csv b/tests/fixtures/config-ubs-account/expected.csv new file mode 100644 index 0000000..f60b568 --- /dev/null +++ b/tests/fixtures/config-ubs-account/expected.csv @@ -0,0 +1,17 @@ +Belastung,Gutschrift,date,process_date,opposing_name,tags,description,opposing_account,notes,account_iban,account_currency +-600.00,,2022-12-30,2022-12-30,"David Peter Reindl",Dauerauftrag,"Steuerrueckstellung +David Peter Reindl;8906 Bonstetten","CH37 0026 7267 9314 35M2 P","9967864LK2659211 +8906 Bonstetten","CH18 0026 7267 9314 3540 D",CHF +-46.35,,2022-12-30,2022-12-31,"UBS AG",,"Periode: 2022-10-01 - 2022-12-30 +Zinsabschluss",,9900365AP6356307,"CH18 0026 7267 9314 3540 D",CHF +-39.90,,2022-12-30,2022-12-30,"Swisscom Grossunternehme",TWINT,"Swisscom Grossunternehme; Zahlung UBS TWINT",,"9967364GK5707142 +8004 Zuerich","CH18 0026 7267 9314 3540 D",CHF +-8.75,,2022-12-28,2022-12-27,"Coop Pronto Chur",Debitkarte,"18279748-0 08/24 +Coop Pronto Chur;7007 Chur",,"9930862BN7826808 +7007 Chur","CH18 0026 7267 9314 3540 D",CHF +-1800.00,,2022-12-27,2022-12-27,"Janine Geigele",e-banking,"Skiferien Dolomiten +Janine Geigele;Am Wasser 36; 8049 Zuerich; CH","CH63 0023 2232 5560 5988 0","9967361TI3188436 +8049 Zuerich","CH18 0026 7267 9314 3540 D",CHF +,9.00,2022-12-22,2022-12-22,"Friis, Daniela Silvia",TWINT,"Friis, Daniela Silvia",,9930356GK0440989,"CH18 0026 7267 9314 3540 D",CHF +,19764.80,2022-11-25,2022-11-25,SBB,Gutschrift,"SBB;Corporate Treasury",,9901820E67741531,"CH18 0026 7267 9314 3540 D",CHF +-14.00,,2022-08-22,2022-08-21,"Friis-Loop, Daniela",TWINT,"Friis-Loop, Daniela; Belastung UBS TWINT",,9967233GK1553933,"CH18 0026 7267 9314 3540 D",CHF diff --git a/tests/fixtures/config-ubs-account/input.csv b/tests/fixtures/config-ubs-account/input.csv new file mode 100644 index 0000000..25b7410 --- /dev/null +++ b/tests/fixtures/config-ubs-account/input.csv @@ -0,0 +1,18 @@ +Kontonummer:;0267 00931435.40; +IBAN:;CH18 0026 7267 9314 3540 D; +Von:;2022-01-03; +Bis:;2022-12-30; +Anfangssaldo:;3917.29; +Schlusssaldo:;-238.80; +Bewertet in:;CHF; +Anzahl Transaktionen in diesem Zeitraum:;742; + +Abschlussdatum;Abschlusszeit;Buchungsdatum;Valutadatum;Währung;Belastung;Gutschrift;Einzelbetrag;Saldo;Transaktions-Nr.;Beschreibung1;Beschreibung2;Beschreibung3;Fussnoten; +2022-12-30;;2022-12-30;2022-12-30;CHF;-600.00;;;-238.80;9967864LK2659211;"""David Peter Reindl;8906 Bonstetten""";"""STEUERRUECKSTELLUNG; Dauerauftrag""";"""Konto-Nr. IBAN: CH37 0026 7267 9314 35M2 P; Transaktions-Nr. 9967864LK2659211""";; +2022-12-30;;2022-12-30;2022-12-31;CHF;-46.35;;;361.20;9900365AP6356307;"Saldo Zinsabschluss";"Periode: 2022-10-01 - 2022-12-30";"Transaktions-Nr. 9900365AP6356307";; +2022-12-30;;2022-12-30;2022-12-30;CHF;-39.90;;;407.55;9967364GK5707142;"""SWISSCOM GROSSUNTERNEHME; Zahlung UBS TWINT""";;"""Zahlungsgrund: Muellerstrasse 16 na, 8004 Zuerich TWINT-Acc.:+41796305690; Transaktions-Nr. 9967364GK5707142""";; +2022-12-27;19:57:53;2022-12-28;2022-12-27;CHF;-8.75;;;7592.45;9930862BN7826808;"""Coop Pronto Chur;7007 Chur""";"""18279748-0 08/24; Zahlung Debitkarte""";"Transaktions-Nr. 9930862BN7826808";; +2022-12-27;;2022-12-27;2022-12-27;CHF;-1800.00;;;7601.20;9967361TI3188436;"""Janine Geigele;Am Wasser 36; 8049 Zuerich; CH""";"""SKIFERIEN DOLOMITEN; e-banking-Vergütungsauftrag""";"""Zahlungsgrund: Wohnung Dolomiten, 2 Personen; Konto-Nr. IBAN: CH63 0023 2232 5560 5988 0; Kosten: E-Banking Inland; Transaktions-Nr. 9967361TI3188436""";; +2022-12-22;;2022-12-22;2022-12-22;CHF;;9.00;;10436.45;9930356GK0440989;"Friis, Daniela Silvia";"Gutschrift UBS TWINT";"""Zahlungsgrund: +41796741245; TWINT-Acc.:+41796305690; Transaktions-Nr. 9930356GK0440989""";; +2022-11-25;;2022-11-25;2022-11-25;CHF;;19764.80;;15748.12;9901820E67741531;"""SBB;Corporate Treasury""";"Gutschrift";"""Zahlungsgrund: Lohn/Gehalt 00229537/202211; Transaktions-Nr. 9901820E67741531""";; +2022-08-21;;2022-08-22;2022-08-21;CHF;-14.00;;;-1544.23;9967233GK1553933;"""FRIIS-LOOP, DANIELA; Belastung UBS TWINT""";;"""Zahlungsgrund: +41796741245; TWINT-Acc.:+41796305690; Transaktions-Nr. 9967233GK1553933""";;