From ec95ea26c3f168510924eb414b3a6be2d5778fe5 Mon Sep 17 00:00:00 2001 From: "Reindl David (IT-PTR-CEN2-SL10)" Date: Wed, 6 May 2026 23:17:54 +0200 Subject: [PATCH] additional, improved transformers / skipIf to eliminate records --- .gitignore | 1 + AGENTS.md | 8 +- README.de.md | 207 +++++- README.md | 213 +++++- bin/transformer.php | 174 ++++- config/config.example.json | 4 + config/ff-import-ubs-account.json | 79 ++ src/ColumnTransformer.php | 115 ++- src/FireflyImporter.php | 699 +++++++++++++++++- src/RowFilter.php | 161 ++++ src/TransformerEngine.php | 13 + tests/ColumnTransformerTest.php | 219 ++++++ tests/FireflyImporterChunkStateTest.php | 414 +++++++++++ tests/RowFilterTest.php | 255 +++++++ .../fixtures/config-ubs-account/expected.csv | 31 +- 15 files changed, 2463 insertions(+), 130 deletions(-) create mode 100644 config/ff-import-ubs-account.json create mode 100644 src/RowFilter.php create mode 100644 tests/FireflyImporterChunkStateTest.php create mode 100644 tests/RowFilterTest.php diff --git a/.gitignore b/.gitignore index 2886748..3489a99 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,4 @@ docker-compose.override.yml *.backup /tmp/ /~archive/ +firefly-import-preprocessor.code-workspace diff --git a/AGENTS.md b/AGENTS.md index cc2b82f..c867772 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,15 +14,17 @@ composer psalm # Psalm static analysis ### Test Suite Overview -85 tests across 5 test classes: +129 tests across 7 test classes: | File | Tests | Scope | | ------ | -------: | ------- | -| `tests/ColumnTransformerTest.php` | 37 | All 13 transformation types, edge cases | +| `tests/ColumnTransformerTest.php` | 51 | All 14 transformation types, edge cases | | `tests/ConfigurationLoaderTest.php` | 18 | JSON loading, dot-notation access, validation | | `tests/CsvReaderTest.php` | 15 | CSV parsing, BOM handling, delimiter, encoding | | `tests/MetadataExtractorTest.php` | 14 | Pre-header regex extraction, edge cases | | `tests/ConfigIntegrationTest.php` | 1× per fixture | Golden-file integration tests (see below) | +| `tests/RowFilterTest.php` | 19 | skipIf conditions, all operators, nested AND/OR groups | +| `tests/FireflyImporterChunkStateTest.php` | 11 | Chunk state persistence, resume, reset | ### Integration Tests (Golden-File Pattern) @@ -106,4 +108,4 @@ See [config/config.example.json](config/config.example.json) for a full referenc - `"outputAction": "create"` vs `"overwrite"` — controls whether the result is a new column or replaces an existing one - `MetadataExtractor` uses 1-based `lineNumber` in config; it converts to 0-based array index internally -Supported transformation types: `map`, `replace`, `regex`, `regexextract`, `dateformat`, `split`, `trim`, `uppercase`, `lowercase`, `ucwordsfirst`, `truncate`, `constantvalue`, `pipeline` +Supported transformation types: `map`, `replace`, `regex`, `regexextract`, `dateformat`, `split`, `trim`, `uppercase`, `lowercase`, `ucwordsfirst`, `truncate`, `constantvalue`, `pipeline`, `timeperiod` diff --git a/README.de.md b/README.de.md index 8e43256..736e540 100644 --- a/README.de.md +++ b/README.de.md @@ -31,7 +31,7 @@ Der **Firefly Import Preprocessor** ist ein produktionsreifer PHP-Preprocessor f ✅ **Vollständige CSV-Transformation** mit komplexen Pipelines ✅ **Metadaten-Extraktion** mit Regex (IBAN, Währung, Kontoname) -✅ **13 Transformationstypen** für flexible Datenverarbeitung +✅ **14 Transformationstypen** für flexible Datenverarbeitung ✅ **Firefly III Integration** — CLI, Docker und HTTP-Upload ✅ **Debug-Modus** für Transparenz bei Verarbeitung ✅ **Production Ready** mit vollständiger Fehlerbehandlung @@ -217,8 +217,15 @@ php bin/transformer.php test input.csv config/config.json --debug **outputAction:** -- `overwrite` — Überschreibe sourceColumn -- `create` — Erstelle neue Spalte (für Regex-Extract, Split, etc.) +| Wert | Verhalten | +|---|---| +| `overwrite` | Ziel-Spalte mit dem Transformations-Ergebnis überschreiben (Standard) | +| `create` | Ergebnis in eine neue Ausgabe-Spalte schreiben | +| `append` | Ergebnis ans Ende des bestehenden Spalten-Werts anhängen. Mit `"appendDelimiter": " "` (beliebige Zeichenkette) wird ein Trennzeichen zwischen bestehendem und neuem Wert eingefügt — der Trennzeichen entfällt, wenn die Ziel-Spalte noch leer ist | +| `append-if-not-empty` | Wie `append` (inkl. `appendDelimiter`), aber überspringt die Operation vollständig, wenn das Transformations-Ergebnis leer ist — geeignet für optionale Werte wie Tags oder Notiz-Zeilen | +| `append-line` | Wie `append`, aber als Trennzeichen wird immer ein Zeilenumbruch `\n` verwendet; kein führender Zeilenumbruch wenn die Ziel-Spalte leer ist | +| `overwrite-if-empty` | Ergebnis nur schreiben, wenn die Ziel-Spalte aktuell leer ist | +| `overwrite-if-not-empty` | Ergebnis nur schreiben, wenn das Transformations-Ergebnis nicht leer ist | #### `directories` - Dateisystem @@ -242,39 +249,15 @@ php bin/transformer.php test input.csv config/config.json --debug #### `fireflyImport` - Firefly III Integration -Der Betriebsmodus wird über das Feld `mode` gesteuert. Mögliche Werte: `cli`, `docker`, `http`. -Details und vollständige Konfigurationsbeispiele: [Firefly III Integration](#firefly-iii-integration). +Optional. Mit dem Flag `--do-import` beim `transform`-Kommando (oder via `auto-import`) wird der Firefly III Data Importer nach dem Schreiben der Output-CSV aufgerufen. -```json -{ - "fireflyImport": { - "mode": "docker", - "jsonConfig": "/import/configs/ubs-import.json", - "importerCommand": "docker exec firefly-importer php artisan importer:import", - "autoImport": false, - "deleteAfterImport": false, - "timeout": 300 - } -} -``` - -| Feld | Typ | Beschreibung | -| --- | --- | --- | -| `mode` | String | Betriebsmodus: `cli` \| `docker` \| `http` (Standard: `cli`) | -| `jsonConfig` | String | Pfad zur Firefly III Data Importer JSON-Konfigurationsdatei (Format v3) | -| `importerCommand` | String | Vollständiges CLI-Kommando *(Modi: cli, docker)* | -| `importerUrl` | String | URL des Data Importers *(Modus: http)* | -| `importerSecret` | String | `AUTO_IMPORT_SECRET` des Importers (mind. 16 Zeichen) *(Modus: http)* | -| `autoImport` | Boolean | Import direkt nach Transformation ausführen | -| `deleteAfterImport` | Boolean | Transformierte CSV nach erfolgreichem Import löschen | -| `timeout` | Integer | Timeout in Sekunden (Standard: 300) | -| `environment` | Object | Zusätzliche Umgebungsvariablen *(Modi: cli, docker)* | +Details und vollständige Beispiele: [Firefly III Integration](#firefly-iii-integration). --- ## Transformationstypen -Es gibt **13 unterstützte Transformationstypen**, die als Pipeline kombiniert werden können: +Es gibt **14 unterstützte Transformationstypen**, die als Pipeline kombiniert werden können: ### 1. **trim** - Leerzeichen entfernen @@ -318,6 +301,8 @@ Es gibt **13 unterstützte Transformationstypen**, die als Pipeline kombiniert w Trennzeichen: Leerzeichen, Bindestrich, Apostroph, Slash, Punkt, Komma, Semikolon, Doppelpunkt, Klammern. +> **Guard:** Wenn der Eingabe-String bereits sowohl Groß- als auch Kleinbuchstaben enthält (gemischte Groß-/Kleinschreibung), wird er unverändert zurückgegeben. So werden bereits korrekt formatierte Strings wie `"Coop pronto chur"` nicht verändert. Vollständig groß- oder kleingeschriebene Strings werden weiterhin verarbeitet. + --- ### 5. **replace** - String-Replacement @@ -414,6 +399,94 @@ Trennzeichen: Leerzeichen, Bindestrich, Apostroph, Slash, Punkt, Komma, Semikolo --- +### 14. **timeperiod** - Zeit einer Tagesperiode zuordnen + +Parst eine Zeitangabe und gibt das Label des passenden Perioden-Bereichs zurück. +Unterstützt mitternachtübergreifende Bereiche (z. B. 22:00–03:59). +Gibt `default` (standardmäßig leer) zurück, wenn keine Periode passt oder die Eingabe ungültig ist. + +```json +{ + "type": "timeperiod", + "timeFormat": "H:i:s", + "periods": [ + { "from": "04:00:00", "to": "08:59:59", "label": "Morgen" }, + { "from": "09:00:00", "to": "10:59:59", "label": "Vormittag" }, + { "from": "11:00:00", "to": "13:59:59", "label": "Mittag" }, + { "from": "14:00:00", "to": "17:59:59", "label": "Nachmittag" }, + { "from": "18:00:00", "to": "21:59:59", "label": "Abend" }, + { "from": "22:00:00", "to": "03:59:59", "label": "Nacht" } + ], + "default": "" +} +``` + +- `"09:30:00"` → `"Vormittag"` +- `"23:00:00"` → `"Nacht"` (mitternachtübergreifender Bereich) +- `"02:00:00"` → `"Nacht"` (mitternachtübergreifender Bereich) +- `""` oder nicht parsbare Eingabe → `""` + +`timeFormat` folgt der PHP-Syntax `DateTime::createFromFormat` (Standard: `H:i:s`). + +--- + +### Zeilen-Filterung — `skipIf` + +Zeilen können durch einen Top-Level-Schlüssel `skipIf` in der Konfiguration ausgeschlossen werden. +Der Wert ist ein Filter-Knoten — entweder eine einzelne Bedingung oder eine verschachtelte `and`/`or`-Gruppe. + +**Einzelne Bedingung:** + +```json +"skipIf": { "column": "Buchungstext", "operator": "equals", "value": "Saldovortrag" } +``` + +**AND-Gruppe:** + +```json +"skipIf": { + "and": [ + { "column": "Beschreibung1", "operator": "empty" }, + { "column": "Beschreibung2", "operator": "empty" } + ] +} +``` + +**Verschachtelte AND/OR-Gruppen:** + +```json +"skipIf": { + "or": [ + { "column": "Amount", "operator": "gt", "value": "10000" }, + { + "and": [ + { "column": "Type", "operator": "equals", "value": "Saldo" }, + { "column": "Notes", "operator": "empty" } + ] + } + ] +} +``` + +**Unterstützte Operatoren:** + +| Operator | Passt wenn… | +|---|---| +| `empty` | Spaltenwert ist leer | +| `not-empty` | Spaltenwert ist nicht leer | +| `equals` | Spaltenwert gleich `"value"` | +| `not-equals` | Spaltenwert ungleich `"value"` | +| `contains` | Spaltenwert enthält `"value"` | +| `not-contains` | Spaltenwert enthält `"value"` nicht | +| `matches` | Spaltenwert entspricht Regex `"pattern"` | +| `not-matches` | Spaltenwert entspricht Regex `"pattern"` nicht | +| `gt` | `(float) Spalte > (float) value` | +| `gte` | `(float) Spalte >= (float) value` | +| `lt` | `(float) Spalte < (float) value` | +| `lte` | `(float) Spalte <= (float) value` | + +--- + ### Pipeline-Beispiel ```json @@ -462,6 +535,7 @@ php bin/transformer.php [input] [config] [options] | `--debug`, `-d` | Debug-Modus aktivieren | | `--rows=N` | Max. N Zeilen (test-Kommando) | | `--output=FILE`, `-o` | Output-Pfad | +| `--do-import` | Nach der Transformation in Firefly III importieren (`transform`) | | `--strict` | Strikte Validierung | | `--watch` | Kontinuierliche Überwachung | | `--interval=SEC` | Prüfintervall in Sekunden | @@ -492,6 +566,14 @@ Der Debug-Modus protokolliert Ereignisse in folgenden Kategorien: Drei Betriebsmodi decken alle typischen Deployment-Szenarien ab. +**`chunkSize`** (optional, Standard: 0 = deaktiviert): Die Output-CSV wird vor dem Import in Blöcke von maximal N Datenzeilen aufgeteilt. Jeder Block wird als separate Anfrage gesendet. Das verhindert serverseitige Timeouts bei grossen Dateien (Faustregel: ~3–4 s/Transaktion im HTTP-Modus). Der `timeout`-Wert gilt pro Block, nicht für den gesamten Lauf. + +**`chunkRetries`** (optional, Standard: 0 = kein Retry): Anzahl zusätzlicher Importversuche pro Block nach dem ersten. Bei einem Fehler wiederholt der Importer den Upload bis zu dieser Anzahl, bevor er abbricht. Nur wirksam wenn `chunkSize > 0`. + +**`chunkRetryDelay`** (optional, Standard: 0 = keine Pause): Pause in Sekunden vor jedem Block-Request ab dem zweiten Block sowie zwischen Wiederholungsversuchen desselben fehlgeschlagenen Blocks. Ein einziger Wert für Cooldown und Retry-Back-off. Nur wirksam wenn `chunkSize > 0`. + +**`connectionTimeout`** (optional, Standard: 10): Maximale Wartezeit in Sekunden für den Aufbau der TCP-Verbindung zum Importer-Server. Unabhängig von `timeout` (der die gesamte Übertragungsdauer begrenzt). Nur im Modus `http`. + ### Modus `cli` Transformer und Importer auf demselben Server. @@ -501,7 +583,9 @@ Transformer und Importer auf demselben Server. "mode": "cli", "jsonConfig": "/opt/firefly-data-importer/storage/configurations/ubs-import.json", "importerCommand": "php /opt/firefly-data-importer/artisan importer:import", - "autoImport": true, + "chunkSize": 50, + "chunkRetries": 3, + "chunkRetryDelay": 10, "timeout": 300, "environment": { "FIREFLY_III_URL": "https://localhost", @@ -519,7 +603,9 @@ Transformer lokal, Importer in Docker. Das Ausgabeverzeichnis muss als Volume ei "mode": "docker", "jsonConfig": "/import/configs/ubs-import.json", "importerCommand": "docker exec firefly-importer php artisan importer:import", - "autoImport": true, + "chunkSize": 50, + "chunkRetries": 3, + "chunkRetryDelay": 10, "timeout": 300 } ``` @@ -539,13 +625,62 @@ AUTO_IMPORT_SECRET= # mindestens 16 Zeichen "fireflyImport": { "mode": "http", "importerUrl": "https://importer.your-server.com", - "importerSecret": "your-auto-import-secret-min-16-chars", - "jsonConfig": "/local/path/to/ubs-import.json", - "autoImport": true, + "personalSecret": "your-auto-import-secret-min-16-chars", + "accessToken": "your-firefly-iii-personal-access-token", + "jsonConfig": "config/ubs-import.json", + "chunkSize": 50, + "chunkRetries": 3, + "chunkRetryDelay": 10, + "connectionTimeout": 10, "timeout": 300 } ``` +Die Anfrage geht an `POST {importerUrl}/autoupload?secret={personalSecret}` mit CSV und JSON-Config als Multipart-Felder. `accessToken` wird als `Authorization: Bearer` gesendet. Falls `FIREFLY_III_ACCESS_TOKEN` bereits in der Importer-Umgebung gesetzt ist, kann `accessToken` weggelassen werden. + +--- + +### Serverseitige Konfiguration + +Bei grossen Importen liegt der Engpass meist auf dem Firefly III Data Importer-Server, nicht im Transformer. Die folgenden Einstellungen gehören in die Umgebung des Importers (`.env` oder `docker-compose.yml`): + +| Einstellung | Empfohlener Wert | Hinweis | +|---|---|---| +| `PHP_MEMORY_LIMIT` | `512M` – `2048M` | Docker-Umgebungsvariable. Erhöhen, wenn PHP mit „Allowed memory size exhausted" abbricht. | +| `CONNECTION_TIMEOUT` | `60` | Sekunden für den TCP-Verbindungsaufbau zu Firefly III. Standard ~31 s (π × 10). | +| `IGNORE_DUPLICATE_ERRORS` | `true` | Doppelte Transaktionswarnungen bei Wiederholungsimporten unterdrücken. | + +**nginx Reverse Proxy** (falls vorhanden): +```nginx +proxy_read_timeout 600s; # muss länger sein als der längste Einzelblock-Import +client_max_body_size 64M; # muss die grösste Chunk-CSV abdecken +``` + +**Docker Compose** Beispiel: +```yaml +services: + firefly-importer: + environment: + - PHP_MEMORY_LIMIT=1024M + - CONNECTION_TIMEOUT=60 + - IGNORE_DUPLICATE_ERRORS=true +``` + +--- + +### Verwendung + +```bash +# Nur transformieren (kein Import) +php bin/transformer.php transform input.csv config/config.json + +# Transformieren und in Firefly III importieren +php bin/transformer.php transform input.csv config/config.json --do-import + +# Watch-Modus: automatisch transformieren und importieren bei neuer CSV +php bin/transformer.php auto-import config/config.json --watch +``` + --- ## Architektur @@ -619,7 +754,7 @@ diff config/config.json config/config.example.json **v1.0.0 (03. Mai 2026)** - ✅ Initial Release -- ✅ 13 Transformationstypen +- ✅ 14 Transformationstypen - ✅ Metadaten-Extraktion mit Regex - ✅ Debug-Modus - ✅ Firefly III Integration (cli / docker / http) diff --git a/README.md b/README.md index bea1221..8cad1a3 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ The **Firefly Import Preprocessor** is a production-ready PHP preprocessor for b ✅ **Full CSV transformation** with complex pipelines ✅ **Metadata extraction** via regex (IBAN, currency, account name) -✅ **13 transformation types** for flexible data processing +✅ **14 transformation types** for flexible data processing ✅ **Firefly III integration** — CLI, Docker, and HTTP upload ✅ **Debug mode** for full processing transparency ✅ **Production ready** with complete error handling @@ -217,8 +217,15 @@ php bin/transformer.php test input.csv config/config.json --debug **outputAction:** -- `overwrite` — overwrite the source column -- `create` — create a new column (for regex extract, split, etc.) +| Value | Behaviour | +|---|---| +| `overwrite` | Replace the target column with the transformation result (default) | +| `create` | Write the result into a new output column | +| `append` | Concatenate the result to the end of the existing column value. Add `"appendDelimiter": " "` (any string) to insert a separator between the existing and new value — the delimiter is omitted when the target column is still empty | +| `append-if-not-empty` | Same as `append` (including optional `appendDelimiter`) but skips entirely when the transformation result is empty — safe for optional values such as tags or notes lines | +| `append-line` | Same as `append` but the separator is always a newline `\n`; no leading newline when the target is empty | +| `overwrite-if-empty` | Only write the result if the target column is currently empty | +| `overwrite-if-not-empty` | Only write the result if the transformation result is not empty | #### `directories` — File system @@ -242,39 +249,15 @@ php bin/transformer.php test input.csv config/config.json --debug #### `fireflyImport` — Firefly III integration -The operating mode is controlled by the `mode` field. Possible values: `cli`, `docker`, `http`. -Full details and examples: [Firefly III Integration](#firefly-iii-integration). +Optional. When present, passing `--do-import` to the `transform` command (or using `auto-import`) will call the Firefly III Data Importer after the output CSV is written. -```json -{ - "fireflyImport": { - "mode": "docker", - "jsonConfig": "/import/configs/ubs-import.json", - "importerCommand": "docker exec firefly-importer php artisan importer:import", - "autoImport": false, - "deleteAfterImport": false, - "timeout": 300 - } -} -``` - -| Field | Type | Description | -| --- | --- | --- | -| `mode` | string | Operating mode: `cli` \| `docker` \| `http` (default: `cli`) | -| `jsonConfig` | string | Path to the Firefly III Data Importer JSON config file (format v3) | -| `importerCommand` | string | Full CLI command *(modes: cli, docker)* | -| `importerUrl` | string | URL of the Data Importer *(mode: http)* | -| `importerSecret` | string | `AUTO_IMPORT_SECRET` of the importer (min. 16 chars) *(mode: http)* | -| `autoImport` | boolean | Run import immediately after transformation | -| `deleteAfterImport` | boolean | Delete transformed CSV after successful import | -| `timeout` | integer | Timeout in seconds (default: 300) | -| `environment` | object | Additional environment variables *(modes: cli, docker)* | +See [Firefly III Integration](#firefly-iii-integration) for the full field reference and mode-specific examples. --- ## Transformation Types -There are **13 supported transformation types** that can be combined as a pipeline: +There are **14 supported transformation types** that can be combined as a pipeline: ### 1. **trim** — Remove whitespace @@ -327,6 +310,8 @@ Capitalises the first letter after each word separator. Separators: space, hyphen, apostrophe, slash, period, comma, semicolon, colon, parentheses. +> **Guard:** If the input already contains *both* uppercase and lowercase letters (mixed-case), it is returned unchanged. This prevents accidentally re-casing intentionally formatted strings such as `"Coop pronto chur"`. Fully uppercase or fully lowercase inputs are always processed. + --- ### 5. **replace** — String replacement @@ -455,6 +440,94 @@ Useful for grouping steps as a logical unit within a `transformations` array. --- +### 14. **timeperiod** — Map time to a period label + +Parses a time string and returns the label of the matching period range. +Supports midnight-spanning ranges (e.g. 22:00–03:59). +Returns `default` (empty string by default) when no range matches or the input is invalid. + +```json +{ + "type": "timeperiod", + "timeFormat": "H:i:s", + "periods": [ + { "from": "04:00:00", "to": "08:59:59", "label": "Morgen" }, + { "from": "09:00:00", "to": "10:59:59", "label": "Vormittag" }, + { "from": "11:00:00", "to": "13:59:59", "label": "Mittag" }, + { "from": "14:00:00", "to": "17:59:59", "label": "Nachmittag" }, + { "from": "18:00:00", "to": "21:59:59", "label": "Abend" }, + { "from": "22:00:00", "to": "03:59:59", "label": "Nacht" } + ], + "default": "" +} +``` + +- `"09:30:00"` → `"Vormittag"` +- `"23:00:00"` → `"Nacht"` (midnight-spanning range) +- `"02:00:00"` → `"Nacht"` (midnight-spanning range) +- `""` or unparseable input → `""` + +`timeFormat` follows PHP's `DateTime::createFromFormat` syntax (default `H:i:s`). + +--- + +### Row filtering — `skipIf` + +Rows can be excluded from the output by adding a top-level `skipIf` key to the configuration. +The value is a filter node — either a bare condition or a nested `and`/`or` group. + +**Bare condition:** + +```json +"skipIf": { "column": "Buchungstext", "operator": "equals", "value": "Saldovortrag" } +``` + +**AND group:** + +```json +"skipIf": { + "and": [ + { "column": "Beschreibung1", "operator": "empty" }, + { "column": "Beschreibung2", "operator": "empty" } + ] +} +``` + +**Nested AND / OR:** + +```json +"skipIf": { + "or": [ + { "column": "Amount", "operator": "gt", "value": "10000" }, + { + "and": [ + { "column": "Type", "operator": "equals", "value": "Saldo" }, + { "column": "Notes", "operator": "empty" } + ] + } + ] +} +``` + +**Supported operators:** + +| Operator | Matches when… | +|---|---| +| `empty` | column value is empty string | +| `not-empty` | column value is not empty | +| `equals` | column value equals `"value"` | +| `not-equals` | column value does not equal `"value"` | +| `contains` | column value contains `"value"` | +| `not-contains` | column value does not contain `"value"` | +| `matches` | column value matches regex `"pattern"` | +| `not-matches` | column value does not match regex `"pattern"` | +| `gt` | `(float) column > (float) value` | +| `gte` | `(float) column >= (float) value` | +| `lt` | `(float) column < (float) value` | +| `lte` | `(float) column <= (float) value` | + +--- + ### Pipeline example Multiple transformations chained: @@ -505,6 +578,7 @@ php bin/transformer.php [input] [config] [options] | `--debug`, `-d` | Enable debug mode | | `--rows=N` | Max. N rows (`test` command) | | `--output=FILE`, `-o` | Output path | +| `--do-import` | Import into Firefly III after transformation (`transform` only) | | `--strict` | Strict validation | | `--watch` | Continuous monitoring | | `--interval=SEC` | Check interval in seconds (default: 60) | @@ -570,6 +644,26 @@ Recommended approach: upload a sample CSV once in the Firefly III Data Importer --- +### `fireflyImport` field reference + +| Field | Type | Description | +| --- | --- | --- | +| `mode` | string | Operating mode: `cli` \| `docker` \| `http` (default: `cli`) | +| `jsonConfig` | string | Path to the Firefly III Data Importer JSON config file (format v3). For `cli` and `http` modes the file must exist locally; relative paths are resolved from the **working directory** where `php bin/transformer.php` is invoked (typically the project root). For `docker` mode the path is **inside the container** — local existence is not checked. | +| `importerCommand` | string | Full CLI command *(modes: cli, docker)* | +| `importerUrl` | string | URL of the Data Importer *(mode: http)* | +| `personalSecret` | string | The `AUTO_IMPORT_SECRET` set on the importer server (min. 16 chars). Sent as `?secret=` URL query parameter. *(mode: http)* | +| `accessToken` | string | Firefly III Personal Access Token. Sent as `Authorization: Bearer` header. Required if not already set as `FIREFLY_III_ACCESS_TOKEN` in the importer environment. *(mode: http)* | +| `deleteAfterImport` | boolean | Delete transformed CSV after successful import | +| `chunkSize` | integer | Split the CSV into chunks of at most N data rows and import each chunk as a separate request. Prevents server-side timeouts on large files (rule of thumb: ~3–4 s/transaction for HTTP mode). `0` or absent = no chunking (default). Applies to all modes. | +| `chunkRetries` | integer | Number of additional import attempts per chunk after the first. On failure the importer retries up to this many times before aborting. `0` or absent = no retry (default). Only effective when `chunkSize > 0`. | +| `chunkRetryDelay` | integer | Pause in seconds before each chunk request after the first, and between retry attempts for the same failed chunk. Addresses both inter-chunk cooldown and retry back-off. `0` or absent = no pause (default). Only effective when `chunkSize > 0`. | +| `connectionTimeout` | integer | Maximum seconds to wait for the TCP connection to the importer to be established. Distinct from `timeout` (full transfer duration). Default: `10`. *(mode: http only)* | +| `timeout` | integer | Timeout in seconds per request (default: 300). For chunked imports this applies per chunk, not for the total run. | +| `environment` | object | Additional environment variables *(modes: cli, docker)* | + +--- + ### Mode `cli` — Transformer and Firefly on the same server Both the transformer and the Firefly III Data Importer run on the same server. The transformer calls the importer directly as a local command. @@ -579,8 +673,10 @@ Both the transformer and the Firefly III Data Importer run on the same server. T "mode": "cli", "jsonConfig": "/opt/firefly-data-importer/storage/configurations/ubs-import.json", "importerCommand": "php /opt/firefly-data-importer/artisan importer:import", - "autoImport": true, "deleteAfterImport": false, + "chunkSize": 50, + "chunkRetries": 3, + "chunkRetryDelay": 10, "timeout": 300, "environment": { "FIREFLY_III_URL": "https://localhost", @@ -616,8 +712,10 @@ services: "mode": "docker", "jsonConfig": "/import/configs/ubs-import.json", "importerCommand": "docker exec firefly-importer php artisan importer:import", - "autoImport": true, "deleteAfterImport": false, + "chunkSize": 50, + "chunkRetries": 3, + "chunkRetryDelay": 10, "timeout": 300 } ``` @@ -638,7 +736,7 @@ The transformer runs locally; the Firefly III Data Importer is reachable over HT ```text CAN_POST_FILES=true -AUTO_IMPORT_SECRET= # at least 16 characters +AUTO_IMPORT_SECRET= # at least 16 characters — set this as personalSecret in your config ``` **Local requirement:** PHP extension `ext-curl` @@ -647,25 +745,60 @@ AUTO_IMPORT_SECRET= # at least 16 characters "fireflyImport": { "mode": "http", "importerUrl": "https://importer.your-server.com", - "importerSecret": "your-auto-import-secret-min-16-chars", - "jsonConfig": "/local/path/to/ubs-import.json", - "autoImport": true, + "personalSecret": "your-auto-import-secret-min-16-chars", + "accessToken": "your-firefly-iii-personal-access-token", + "jsonConfig": "config/ubs-import.json", "deleteAfterImport": false, + "chunkSize": 50, + "chunkRetries": 3, + "chunkRetryDelay": 10, + "connectionTimeout": 10, "timeout": 300 } ``` -The transformer sends files to `POST {importerUrl}/autoupload`. The JSON config lives locally — the transformer uploads it together with the CSV. No volume mount or SSH access to the remote server is required. +The transformer sends a `POST` request to `{importerUrl}/autoupload?secret={personalSecret}` with the CSV and JSON config as multipart form fields. The `accessToken` is sent as `Authorization: Bearer`. If `FIREFLY_III_ACCESS_TOKEN` is already set in the importer's environment, `accessToken` can be omitted. + +--- + +### Server-side tuning + +For large imports the bottleneck is usually the Firefly III Data Importer server, not the transformer. The settings below belong in the importer's environment (`.env` or `docker-compose.yml`): + +| Setting | Recommended value | Notes | +|---|---|---| +| `PHP_MEMORY_LIMIT` | `512M` – `2048M` | Docker env var. Raise when PHP crashes with "Allowed memory size exhausted". | +| `CONNECTION_TIMEOUT` | `60` | Seconds to wait for TCP connect to Firefly III API. Default is ~31 s (π × 10). | +| `IGNORE_DUPLICATE_ERRORS` | `true` | Suppress duplicate-transaction warnings on repeated imports. | + +**nginx reverse proxy** (if applicable): +```nginx +proxy_read_timeout 600s; # must exceed the longest single-chunk import time +client_max_body_size 64M; # must accommodate your largest chunk CSV +``` + +**Docker Compose** example: +```yaml +services: + firefly-importer: + environment: + - PHP_MEMORY_LIMIT=1024M + - CONNECTION_TIMEOUT=60 + - IGNORE_DUPLICATE_ERRORS=true +``` --- ### Usage ```bash -# Transformation + automatic import (when autoImport=true) +# Transform only (no import) php bin/transformer.php transform input.csv config/config.json -# Watch mode: trigger on new CSV in source directory +# Transform and import into Firefly III +php bin/transformer.php transform input.csv config/config.json --do-import + +# Watch mode: transform and import automatically for each new CSV in source directory php bin/transformer.php auto-import config/config.json --watch ``` @@ -838,7 +971,7 @@ done **v1.0.0 (03 May 2026)** - ✅ Initial release -- ✅ 13 transformation types +- ✅ 14 transformation types - ✅ Metadata extraction via regex - ✅ Debug mode - ✅ Firefly III integration (cli / docker / http) diff --git a/bin/transformer.php b/bin/transformer.php index 1c3e5ef..3e512f9 100755 --- a/bin/transformer.php +++ b/bin/transformer.php @@ -16,6 +16,7 @@ require_once __DIR__ . '/../vendor/autoload.php'; use UbsCsvTransformer\TransformerEngine; use UbsCsvTransformer\ConfigurationLoader; use UbsCsvTransformer\FireflyImporter; +use UbsCsvTransformer\DebugLogger; // ============================================================================ // CLI argument processing @@ -99,10 +100,10 @@ KOMMANDOS: Transformiert eine komplette CSV-Datei Optionen: --output=FILE, -o Output-Pfad (Standard: input-transformed.csv) - --no-import Nicht automatisch in Firefly III importieren + --do-import Nach der Transformation in Firefly III importieren Beispiel: transformer transform ubs-export.csv config.json - transformer transform ubs-export.csv config.json -o import.csv + transformer transform ubs-export.csv config.json --do-import validate [config] [options] Validiert die Konfigurationsdatei @@ -221,10 +222,10 @@ COMMANDS: Transforms a complete CSV file Options: --output=FILE, -o Output path (default: input-transformed.csv) - --no-import Do not automatically import into Firefly III + --do-import Import into Firefly III after transformation Example: transformer transform ubs-export.csv config.json - transformer transform ubs-export.csv config.json -o import.csv + transformer transform ubs-export.csv config.json --do-import validate [config] [options] Validates the configuration file @@ -427,6 +428,10 @@ function handleTest(int $argc, array $argv): void echo "\n💾 Output-Datei: $outputFile\n"; } + if ($debug) { + echo DebugLogger::format(true); + } + echo "\n✅ Test erfolgreich!\n\n"; } @@ -445,6 +450,8 @@ function handleTransform(int $argc, array $argv): void $debug = isset($options['debug']) || isset($options['d']); $outputFile = $options['output'] ?? $options['o'] ?? null; + $doImport = isset($options['do-import']); + $resetImport = isset($options['reset-import']); if (!file_exists($inputFile)) { throw new Exception("Input file not found: $inputFile"); @@ -453,11 +460,11 @@ function handleTransform(int $argc, array $argv): void throw new Exception("Configuration file not found: $configFile"); } - echo "\n🚀 TRANSFORMATION STARTEN\n"; + echo "\n🚀 TRANSFORMATION\n"; echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"; $configLoader = new ConfigurationLoader($configFile); - $configLoader->load(); + $config = $configLoader->load(); // --output overrides target directory and filename from configuration if ($outputFile !== null) { @@ -469,11 +476,158 @@ function handleTransform(int $argc, array $argv): void $engine = new TransformerEngine($configLoader, $debug); $result = $engine->transform($inputFile); - echo "✅ Transformation erfolgreich!\n"; - echo " Output-Datei: " . ($result['outputFile'] ?? 'N/A') . "\n"; - echo " Zeilen transformiert: " . ($result['rowsProcessed'] ?? 0) . "\n"; + echo "✅ Transformation complete!\n"; + echo " Output file: " . ($result['outputFile'] ?? 'N/A') . "\n"; + echo " Rows transformed: " . ($result['rowsProcessed'] ?? 0) . "\n"; - echo "\n✅ Fertig!\n\n"; + if ($doImport) { + if (!empty($config['fireflyImport'])) { + echo "\n🚀 FIREFLY III IMPORT\n"; + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"; + + $fireflyConfig = $config['fireflyImport']; + $importer = new FireflyImporter($fireflyConfig); + + $outputCsv = $result['outputFile'] ?? ''; + + if ($resetImport) { + $importer->resetImportState($outputCsv); + echo " ℹ️ Import state cleared — starting fresh.\n"; + } elseif ($importer->hasResumeState($outputCsv)) { + $stateRaw = @file_get_contents($outputCsv . '.ffi-state.json'); + $stateData = is_string($stateRaw) ? json_decode($stateRaw, true) : null; + if (is_array($stateData)) { + $doneSoFar = count((array) ($stateData['completed_chunks'] ?? [])); + $totalSoFar = (int) ($stateData['total_chunks'] ?? 0); + echo " ℹ️ Resuming previous import: {$doneSoFar}/{$totalSoFar} chunks already completed.\n"; + echo " Add --reset-import to start over from scratch.\n"; + } + } + + $inChunkedMode = false; + + // Detect the system timezone: PHP CLI often defaults to UTC even when the OS + // is configured otherwise. Read /etc/localtime symlink to get the real zone. + $localTzName = date_default_timezone_get(); + if (is_link('/etc/localtime')) { + $link = (string) readlink('/etc/localtime'); + if (preg_match('#zoneinfo/(.+)$#', $link, $tzMatch) === 1) { + $localTzName = $tzMatch[1]; + } + } + $localTz = new \DateTimeZone($localTzName); + + $importer->setProgressCallback( + function (string $event, array $data) use (&$inChunkedMode, $localTz): void { + static $chunkHadRetry = false; + $ts = '[' . (new \DateTimeImmutable('now', $localTz))->format('H:i:s') . ']'; + if ($event === 'chunk_start') { + $inChunkedMode = true; + $chunkHadRetry = false; + echo " ⏳ {$ts} Chunk {$data['chunk']}/{$data['total']} ({$data['rows']} rows)..."; + flush(); + } elseif ($event === 'chunk_done') { + $d = round((float) ($data['result']['duration'] ?? 0), 1); + $status = $data['result']['success'] ? 'done' : 'failed'; + if ($chunkHadRetry) { + // After retries the line is already terminated — print a full new line + echo " ✅ {$ts} Chunk {$data['chunk']}/{$data['total']}: {$status} ({$d}s)\n"; + } else { + echo " {$status} ({$d}s)\n"; + } + flush(); + } elseif ($event === 'chunk_retry') { + $chunkHadRetry = true; + $err = (string) ($data['error'] ?? ''); + $msg = $err !== '' ? " — {$err}" : ''; + echo "\n 🔄 {$ts} Chunk {$data['chunk']}/{$data['total']}: attempt {$data['attempt']}/{$data['max_attempts']} failed{$msg}\n"; + flush(); + } elseif ($event === 'chunk_delay') { + $ctx = ($data['context'] ?? '') === 'retry' ? 'retry' : 'next chunk'; + echo " ⏸ {$ts} Waiting {$data['seconds']}s before {$ctx}...\n"; + flush(); + } elseif ($event === 'chunk_skip') { + echo " ⏭ {$ts} Chunk {$data['chunk']}/{$data['total']} already completed — skipping\n"; + flush(); + } elseif ($event === 'request_start' && !$inChunkedMode) { + echo " ⏳ {$ts} Sending to importer...\n"; + flush(); + } + } + ); + + $outputDelimiter = (string) ($config['csvStructure']['outputDelimiter'] ?? ','); + $importResult = $importer->importChunked($outputCsv, $outputDelimiter); + + $duration = $importResult['duration'] ?? null; + $chunks = $importResult['chunks'] ?? null; + $summary = $importResult['summary'] ?? null; + + if ($importResult['success']) { + if (is_array($summary)) { + $created = $summary['created'] ?? 0; + $byType = $summary['by_type'] ?? []; + $completed = $summary['completed'] ?? false; + $duplicates = $summary['duplicates'] ?? 0; + $errors = $summary['errors'] ?? []; + + $status = $completed ? '✅ Import complete!' : '⚠️ Import finished (no "Done!" marker received)'; + echo $status . ($duration !== null ? " ({$duration}s)" : '') . "\n"; + echo " Transactions created: {$created}\n"; + + $typeLabels = ['deposit' => 'Deposits', 'withdrawal' => 'Withdrawals', 'transfer' => 'Transfers']; + foreach ($byType as $type => $count) { + $label = $typeLabels[$type] ?? ucfirst($type); + echo " {$label}: {$count}\n"; + } + + if ($duplicates > 0) { + echo " ⚠️ Duplicates skipped: {$duplicates}\n"; + } + + if (!empty($errors)) { + $errorCount = count($errors); + echo " ❌ Errors ({$errorCount}):\n"; + foreach ($errors as $err) { + echo " - {$err}\n"; + } + } + } else { + echo "✅ Import complete!" . ($duration !== null ? " ({$duration}s)" : '') . "\n"; + if (!empty($importResult['output']['stdout'])) { + echo $importResult['output']['stdout'] . "\n"; + } + } + } else { + $errorMsg = $importResult['error'] + ?? ('HTTP ' . ($importResult['exit_code'] ?? '?')); + $chunksData = $importResult['chunks'] ?? null; + if (is_array($chunksData) && $chunksData['total'] > 1) { + $failedChunk = $chunksData['done'] + 1; + echo "❌ Import failed at chunk {$failedChunk}/{$chunksData['total']}: {$errorMsg}\n"; + echo " Run the same command again to resume from where it stopped.\n"; + echo " Add --reset-import to start over from scratch.\n"; + } else { + echo "❌ Import failed: {$errorMsg}\n"; + } + // Only dump the raw response body in debug mode + if ($debug && !empty($importResult['output']['stdout'])) { + echo $importResult['output']['stdout'] . "\n"; + } + if (!empty($importResult['output']['stderr'])) { + echo $importResult['output']['stderr'] . "\n"; + } + } + } else { + echo "\n⚠️ --do-import specified but no fireflyImport section found in config.\n"; + } + } + + if ($debug) { + echo DebugLogger::format(true); + } + + echo "\n✅ Done!\n\n"; } /** diff --git a/config/config.example.json b/config/config.example.json index 3918c5c..f0339f8 100644 --- a/config/config.example.json +++ b/config/config.example.json @@ -207,6 +207,10 @@ "autoImport": false, "deleteAfterImport": false, + "chunkSize": 0, + "chunkRetries": 0, + "chunkRetryDelay": 0, + "connectionTimeout": 10, "timeout": 300, "environment": { diff --git a/config/ff-import-ubs-account.json b/config/ff-import-ubs-account.json new file mode 100644 index 0000000..e790abb --- /dev/null +++ b/config/ff-import-ubs-account.json @@ -0,0 +1,79 @@ +{ + "version": 3, + "source": "ff3-importer-2.1.1", + "created_at": "2026-05-04T22:22:39+02:00", + "date": "Y-m-d", + "default_account": 1, + "delimiter": "comma", + "headers": true, + "rules": true, + "webhooks": true, + "skip_form": false, + "add_import_tag": true, + "roles": [ + "amount_debit", + "amount_credit", + "date_transaction", + "date_process", + "opposing-name", + "tags-comma", + "description", + "opposing-iban", + "opposing-number", + "note", + "account-iban", + "currency-code" + ], + "do_mapping": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "mapping": {}, + "duplicate_detection_method": "classic", + "ignore_duplicate_lines": false, + "unique_column_index": 0, + "unique_column_type": "note", + "pseudo_identifier": [], + "flow": "file", + "content_type": "csv", + "camt_type": "", + "custom_tag": "test001", + "identifier": "0", + "connection": "0", + "ignore_spectre_categories": false, + "grouped_transaction_handling": "", + "use_entire_opposing_address": false, + "map_all_data": true, + "pending_transactions": false, + "access_token": "", + "accounts": {}, + "new_accounts": [], + "date_range": "", + "date_range_number": 30, + "date_range_unit": "d", + "date_range_not_after_unit": "", + "date_range_not_after_number": 0, + "date_not_before": "", + "date_not_after": "", + "nordigen_country": "", + "nordigen_bank": "", + "nordigen_requisitions": {}, + "nordigen_max_days": "90", + "lunch_flow_api_key": "", + "enable_banking_country": "", + "enable_banking_bank": "", + "enable_banking_auth_id": "", + "enable_banking_sessions": [], + "conversion": false, + "ignore_duplicate_transactions": true +} \ No newline at end of file diff --git a/src/ColumnTransformer.php b/src/ColumnTransformer.php index a2220b6..c2de67b 100644 --- a/src/ColumnTransformer.php +++ b/src/ColumnTransformer.php @@ -9,21 +9,27 @@ namespace UbsCsvTransformer; * - map: Copy/rename column (default) * - replace: String replacement (str_replace) * - regex: Regex replace via preg_replace (backreferences: $1, $2 …) - * - dateformat: Date formatting + * - dateformat: Date formatting (toFormat: 'l' yields English weekday name) * - split: Split column at delimiter * - regexextract: Extract using regex * - trim: Remove whitespace * - uppercase: Convert to uppercase * - lowercase: Convert to lowercase - * - ucwordsfirst: Capitalise first letter after word boundaries + * - ucwordsfirst: Capitalise first letter after word boundaries (only when input + * has no lowercase letters; strings already mixed-case are returned + * unchanged) * - truncate: Truncate string to maximum length * - constantvalue: Constant value from metadata * - pipeline: Chain multiple transformations (via steps[]) * - custom: Custom PHP callback + * - timeperiod: Map a time string to a period label (morning, evening, …) * * Supported outputAction values: * - create / overwrite: Set target column (default) - * - append: Append value + * - append: Append value directly; optional "appendDelimiter" inserts a separator + * between existing and new value (skipped when target is still empty) + * - append-if-not-empty: Like append, but skips entirely when the transformation result is + * empty (safe for optional values like tags and notes lines) * - append-line: Append value on new line (no leading newline if target is empty) * - overwrite-if-empty: Only set if target column is empty * - overwrite-if-not-empty: Only set if transformation result is not empty @@ -124,7 +130,22 @@ class ColumnTransformer // Apply output action switch ($outputAction) { case 'append': - $transformedRow[$targetColumn] = ($transformedRow[$targetColumn] ?? '') . $resultValue; + $existing = $transformedRow[$targetColumn] ?? ''; + if (isset($config['appendDelimiter']) && $existing !== '') { + $transformedRow[$targetColumn] = $existing . (string) $config['appendDelimiter'] . $resultValue; + } else { + $transformedRow[$targetColumn] = $existing . $resultValue; + } + break; + case 'append-if-not-empty': + if ($resultValue !== '') { + $existing = $transformedRow[$targetColumn] ?? ''; + if (isset($config['appendDelimiter']) && $existing !== '') { + $transformedRow[$targetColumn] = $existing . (string) $config['appendDelimiter'] . $resultValue; + } else { + $transformedRow[$targetColumn] = $existing . $resultValue; + } + } break; case 'append-line': // Append value on new line; no leading newline if target is empty @@ -205,6 +226,9 @@ class ColumnTransformer case 'pipeline': return $this->transformPipeline($value, $config); + case 'timeperiod': + return $this->transformTimePeriod($value, $config); + case 'truncate': $maxLength = (int)($config['maxLength'] ?? 255); return mb_substr($value, 0, $maxLength, 'UTF-8'); @@ -522,6 +546,14 @@ class ColumnTransformer */ private function transformUcwordsFirst(string $value, array $config = []): string { + // Guard: if the string already contains both uppercase and lowercase letters + // (i.e. mixed-case), it has already been intentionally formatted — leave it alone. + // Fully-uppercase or fully-lowercase strings are still processed so that patterns + // like "lowercase → ucwordsfirst" continue to work as expected. + if (preg_match('/\p{Lu}/u', $value) && preg_match('/\p{Ll}/u', $value)) { + return $value; + } + // Step 1: Convert everything to lowercase $value = mb_strtolower($value, 'UTF-8'); @@ -740,4 +772,79 @@ class ColumnTransformer { return count(array_unique($this->outputColumns)); } + + /** + * Time-period transformer + * + * Maps a time string to a period label via a configurable list of ranges. + * Supports midnight-spanning ranges (e.g. "22:00:00" to "03:59:59"). + * Returns the configured default (empty string by default) when no range matches + * or the input cannot be parsed. + * + * Configuration: + * ```json + * { + * "type": "timeperiod", + * "timeFormat": "H:i:s", + * "periods": [ + * {"from": "04:00:00", "to": "08:59:59", "label": "Morgen"}, + * {"from": "22:00:00", "to": "03:59:59", "label": "Nacht"} + * ], + * "default": "" + * } + * ``` + * + * @param string $value Time string to evaluate + * @param array $config Transformation configuration + * @return string Period label or default + */ + private function transformTimePeriod(string $value, array $config): string + { + $default = (string) ($config['default'] ?? ''); + $timeFormat = (string) ($config['timeFormat'] ?? 'H:i:s'); + /** @var array> $periods */ + $periods = $config['periods'] ?? []; + + if ($value === '' || empty($periods)) { + return $default; + } + + $parsed = \DateTime::createFromFormat($timeFormat, $value); + if ($parsed === false) { + return $default; + } + + // Represent time as total minutes from midnight for easy comparison + $minutes = (int) $parsed->format('H') * 60 + (int) $parsed->format('i'); + + foreach ($periods as $period) { + $fromStr = (string) ($period['from'] ?? ''); + $toStr = (string) ($period['to'] ?? ''); + $label = (string) ($period['label'] ?? ''); + + $fromParsed = \DateTime::createFromFormat($timeFormat, $fromStr); + $toParsed = \DateTime::createFromFormat($timeFormat, $toStr); + + if ($fromParsed === false || $toParsed === false) { + continue; + } + + $fromMin = (int) $fromParsed->format('H') * 60 + (int) $fromParsed->format('i'); + $toMin = (int) $toParsed->format('H') * 60 + (int) $toParsed->format('i'); + + if ($fromMin <= $toMin) { + // Normal range (e.g. 04:00 – 08:59) + if ($minutes >= $fromMin && $minutes <= $toMin) { + return $label; + } + } else { + // Midnight-spanning range (e.g. 22:00 – 03:59) + if ($minutes >= $fromMin || $minutes <= $toMin) { + return $label; + } + } + } + + return $default; + } } diff --git a/src/FireflyImporter.php b/src/FireflyImporter.php index 3ba007e..dc20e8e 100644 --- a/src/FireflyImporter.php +++ b/src/FireflyImporter.php @@ -20,6 +20,9 @@ namespace UbsCsvTransformer; * "importerCommand": "php /opt/firefly-data-importer/artisan importer:import", * "autoImport": true, * "deleteAfterImport": false, + * "chunkSize": 50, + * "chunkRetries": 3, + * "chunkRetryDelay": 10, * "timeout": 300, * "environment": { * "FIREFLY_III_URL": "https://localhost", @@ -48,6 +51,9 @@ namespace UbsCsvTransformer; * "importerCommand": "docker exec firefly-importer php artisan importer:import", * "autoImport": true, * "deleteAfterImport": false, + * "chunkSize": 50, + * "chunkRetries": 3, + * "chunkRetryDelay": 10, * "timeout": 300 * } * @@ -57,23 +63,53 @@ namespace UbsCsvTransformer; * The transformer uploads CSV and JSON configuration via HTTP multipart upload to the * Firefly III Data Importer. The importer must have these environment variables set: * CAN_POST_FILES=true (allows file upload via API) - * AUTO_IMPORT_SECRET= (at least 16 characters, must match "importerSecret") + * AUTO_IMPORT_SECRET= (at least 16 characters, must match "accessToken") * - * HTTP endpoint: POST {importerUrl}/autoupload - * Fields: secret (string), json (JSON config file), importable (CSV file) + * HTTP endpoint: POST {importerUrl}/autoupload?secret={accessToken} + * Headers: Accept: application/json + * Authorization: Bearer {personalSecret} (Firefly III Personal Access Token) + * Fields: json (JSON config file), importable (CSV file) * * Local requirement: PHP extension ext-curl must be available. * * "fireflyImport": { * "mode": "http", * "importerUrl": "https://importer.your-server.com", - * "importerSecret": "your-auto-import-secret-min-16-chars", + * "accessToken": "your-auto-import-secret-min-16-chars", + * "personalSecret": "your-firefly-iii-personal-access-token", * "jsonConfig": "/local/path/to/ubs-import.json", * "autoImport": true, * "deleteAfterImport": false, + * "chunkSize": 50, + * "chunkRetries": 3, + * "chunkRetryDelay": 10, + * "connectionTimeout": 10, * "timeout": 300 * } * + * "chunkSize" (optional, default: 0 = disabled): + * Splits the transformed CSV into chunks of at most N data rows before + * uploading. Each chunk is sent as a separate import request. This avoids + * server-side timeouts on large files (rule of thumb: ~3–4 s/transaction). + * Chunks share the same header row. Temporary files are cleaned up after + * every chunk regardless of outcome. Applies to all three modes. + * + * "chunkRetries" (optional, default: 0 = no retry): + * Number of additional import attempts per chunk after the first. On failure + * the importer retries up to this many times before aborting. Only effective + * when chunkSize > 0. + * + * "chunkRetryDelay" (optional, default: 0 = no pause): + * Pause in seconds applied before each chunk request after the first, and + * between retry attempts for the same failed chunk. One knob for both + * inter-chunk cooldown and inter-retry back-off. Only effective when + * chunkSize > 0. + * + * "connectionTimeout" (optional, default: 10): + * Maximum seconds to wait for the TCP connection to the importer server to + * be established. Distinct from "timeout" (which limits the full transfer + * duration). Mode "http" only. + * * ═══════════════════════════════════════════════════════════════════════════════ * COMMON SETUP REQUIREMENTS (all modes) * ═══════════════════════════════════════════════════════════════════════════════ @@ -98,11 +134,18 @@ class FireflyImporter private string $jsonConfigPath; private string $importerCommand; private string $importerUrl; - private string $importerSecret; + private string $personalSecret; + private string $accessToken; private bool $deleteAfterImport; private int $timeout; + private int $chunkSize; + private int $chunkRetries; + private int $chunkRetryDelay; + private int $connectionTimeout; /** @var array */ private array $environment; + /** @var (callable(string, array): void)|null */ + private mixed $progressCallback = null; /** * @param array $config Firefly import configuration @@ -143,10 +186,13 @@ class FireflyImporter if (empty($this->importerUrl)) { throw new \RuntimeException("Firefly Import: 'importerUrl' not configured (mode: http)"); } - $this->importerSecret = (string) ($config['importerSecret'] ?? ''); - if (empty($this->importerSecret)) { - throw new \RuntimeException("Firefly Import: 'importerSecret' not configured (mode: http)"); + // accessToken = AUTO_IMPORT_SECRET on the importer → sent as ?secret= URL query parameter + $this->accessToken = (string) ($config['accessToken'] ?? ''); + if (empty($this->accessToken)) { + throw new \RuntimeException("Firefly Import: 'accessToken' not configured (mode: http)"); } + // personalSecret = Firefly III Personal Access Token → sent as Authorization: Bearer + $this->personalSecret = (string) ($config['personalSecret'] ?? ''); $this->importerCommand = ''; } else { $this->importerCommand = (string) ($config['importerCommand'] ?? ''); @@ -156,17 +202,45 @@ class FireflyImporter ); } $this->importerUrl = ''; - $this->importerSecret = ''; + $this->personalSecret = ''; + $this->accessToken = ''; } // Common optional fields $this->deleteAfterImport = (bool) ($config['deleteAfterImport'] ?? false); $this->timeout = (int) ($config['timeout'] ?? 300); + $this->chunkSize = max(0, (int) ($config['chunkSize'] ?? 0)); + $this->chunkRetries = max(0, (int) ($config['chunkRetries'] ?? 0)); + $this->chunkRetryDelay = max(0, (int) ($config['chunkRetryDelay'] ?? 0)); + $this->connectionTimeout = max(1, (int) ($config['connectionTimeout'] ?? 10)); /** @var array $env */ $env = $config['environment'] ?? []; $this->environment = $env; } + /** + * Sets an optional progress callback invoked at key points during import. + * + * Events and their $data keys: + * 'request_start' — fired just before each blocking I/O call (HTTP or CLI). No data keys. + * 'chunk_start' — fired before each chunk is imported. + * Keys: 'chunk' (int), 'total' (int), 'rows' (int) + * 'chunk_done' — fired after each chunk import returns (success or failure). + * Keys: 'chunk' (int), 'total' (int), 'result' (array) + * 'chunk_skip' — fired when a chunk is skipped (already completed in a previous run). + * Keys: 'chunk' (int), 'total' (int) + * 'chunk_delay' — fired just before an inter-chunk or inter-retry pause. + * Keys: 'chunk' (int), 'total' (int), 'seconds' (int), 'context' (string: 'between_chunks'|'retry') + * 'chunk_retry' — fired after a failed attempt and before a retry sleep. + * Keys: 'chunk' (int), 'total' (int), 'attempt' (int), 'max_attempts' (int), 'error' (string) + * + * @param (callable(string, array): void)|null $callback + */ + public function setProgressCallback(?callable $callback): void + { + $this->progressCallback = $callback; + } + /** * Imports a transformed CSV file into Firefly III * @@ -195,6 +269,444 @@ class FireflyImporter return $this->importViaCli($csvFile); } + /** + * Imports a CSV file in row-limited chunks, with resume support. + * + * Reads the output CSV, splits it into temporary files of at most + * $chunkSize data rows (header is repeated on every chunk), imports + * each chunk sequentially, then aggregates the results. + * + * Falls back to a plain import() call when chunkSize is 0 or the file + * has fewer rows than the chunk size. + * + * If a previous run was interrupted, a state file (.ffi-state.json) + * records which chunks completed. The next call automatically skips those + * chunks and resumes from the first incomplete one. + * On full success the state file is deleted. Use resetImportState() to + * force a fresh start regardless of any existing state. + * + * @param string $csvFile Path to the transformed CSV file + * @param string $delimiter CSV delimiter used in the file + * @return array Aggregated import result + */ + public function importChunked(string $csvFile, string $delimiter = ','): array + { + if (!file_exists($csvFile)) { + return [ + 'success' => false, + 'error' => "CSV file not found: {$csvFile}", + 'output' => ['stdout' => '', 'stderr' => ''], + 'exit_code' => -1, + ]; + } + + if ($this->chunkSize <= 0) { + return $this->import($csvFile); + } + + // Read the whole file into memory (it is already in the output dir) + $fp = fopen($csvFile, 'r'); + if ($fp === false) { + return [ + 'success' => false, + 'error' => "Cannot open CSV file: {$csvFile}", + 'output' => ['stdout' => '', 'stderr' => ''], + 'exit_code' => -1, + ]; + } + + /** @var string[] $header */ + $header = fgetcsv($fp, 0, $delimiter, '"', '\\') ?: []; + /** @var string[][] $rows */ + $rows = []; + while (($row = fgetcsv($fp, 0, $delimiter, '"', '\\')) !== false) { + $rows[] = $row; + } + fclose($fp); + + $totalRows = count($rows); + if ($totalRows === 0 || $totalRows <= $this->chunkSize) { + return $this->import($csvFile); + } + + $chunks = array_chunk($rows, $this->chunkSize); + $totalChunks = count($chunks); + $tmpFiles = []; + $startAll = microtime(true); + + // Load existing state (resume support) or initialise fresh + $state = $this->readState($csvFile, $totalRows); + if ($state === null) { + $state = [ + 'csv_file' => realpath($csvFile) ?: $csvFile, + 'total_rows' => $totalRows, + 'chunk_size' => $this->chunkSize, + 'total_chunks' => $totalChunks, + 'completed_chunks' => [], + 'chunk_results' => [], + 'created_at' => (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM), + 'updated_at' => (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM), + ]; + } else { + DebugLogger::log( + 'chunked_import', + 'Resuming from state file', + ['completed' => $state['completed_chunks'], 'total' => $totalChunks] + ); + } + + /** @var int[] $completedIndices */ + $completedIndices = $state['completed_chunks']; + + // Pre-populate results with stored summaries for already-completed chunks + /** @var array> $results */ + $results = []; + foreach ($completedIndices as $doneIndex) { + /** @var array $stored */ + $stored = $state['chunk_results'][(string) $doneIndex] ?? ['success' => true]; + $results[$doneIndex] = $stored; + } + + DebugLogger::log('chunked_import', "Splitting {$totalRows} rows into {$totalChunks} chunks of {$this->chunkSize}"); + + $lastImportedIndex = -1; + + try { + foreach ($chunks as $index => $chunkRows) { + // Skip chunks already successfully imported in a previous run + if (in_array($index, $completedIndices, true)) { + DebugLogger::log('chunked_import', "Chunk " . ($index + 1) . "/{$totalChunks} already completed, skipping"); + if ($this->progressCallback !== null) { + ($this->progressCallback)('chunk_skip', [ + 'chunk' => $index + 1, + 'total' => $totalChunks, + ]); + } + continue; + } + + // Delay between chunks: pause before every chunk after the first *actual* import + // (skipped/resumed chunks don't count — no need to delay before the first real request) + if ($lastImportedIndex >= 0 && $this->chunkRetryDelay > 0) { + DebugLogger::log( + 'chunked_import', + "Delay: sleeping {$this->chunkRetryDelay}s before chunk " . ($index + 1) . "/{$totalChunks}" + ); + if ($this->progressCallback !== null) { + ($this->progressCallback)('chunk_delay', [ + 'chunk' => $index + 1, + 'total' => $totalChunks, + 'seconds' => $this->chunkRetryDelay, + 'context' => 'between_chunks', + ]); + } + sleep($this->chunkRetryDelay); + } + + $chunkNum = $index + 1; + $tmpFile = tempnam(sys_get_temp_dir(), 'ffi_chunk_'); + if ($tmpFile === false) { + throw new \RuntimeException('Could not create temporary chunk file'); + } + $tmpFiles[] = $tmpFile; + + // Write chunk CSV (header + data rows) + $out = fopen($tmpFile, 'w'); + if ($out === false) { + throw new \RuntimeException("Could not write chunk file: {$tmpFile}"); + } + fputcsv($out, $header, $delimiter, '"', '\\'); + foreach ($chunkRows as $row) { + fputcsv($out, $row, $delimiter, '"', '\\'); + } + fclose($out); + + DebugLogger::log('chunked_import', "Importing chunk {$chunkNum}/{$totalChunks}", [ + 'rows' => count($chunkRows), + 'file' => $tmpFile, + ]); + + if ($this->progressCallback !== null) { + ($this->progressCallback)('chunk_start', [ + 'chunk' => $chunkNum, + 'total' => $totalChunks, + 'rows' => count($chunkRows), + ]); + } + + // Import with optional retry on failure — never retry on timeout (the server is already overloaded) + $maxAttempts = 1 + $this->chunkRetries; + $result = $this->import($tmpFile); + for ($attempt = 1; $attempt < $maxAttempts && !$result['success']; $attempt++) { + DebugLogger::log('chunked_import', "Chunk {$chunkNum}/{$totalChunks}: attempt {$attempt}/{$maxAttempts} failed, retrying", [ + 'error' => $result['error'] ?? '', + ]); + if ($this->progressCallback !== null) { + ($this->progressCallback)('chunk_retry', [ + 'chunk' => $chunkNum, + 'total' => $totalChunks, + 'attempt' => $attempt, + 'max_attempts' => $maxAttempts, + 'error' => (string) ($result['error'] ?? ''), + ]); + } + if ($this->chunkRetryDelay > 0) { + if ($this->progressCallback !== null) { + ($this->progressCallback)('chunk_delay', [ + 'chunk' => $chunkNum, + 'total' => $totalChunks, + 'seconds' => $this->chunkRetryDelay, + 'context' => 'retry', + ]); + } + sleep($this->chunkRetryDelay); + } + $result = $this->import($tmpFile); + } + + $results[$index] = $result; + + if ($this->progressCallback !== null) { + ($this->progressCallback)('chunk_done', [ + 'chunk' => $chunkNum, + 'total' => $totalChunks, + 'result' => $result, + ]); + } + + if ($result['success']) { + $lastImportedIndex = $index; + // Persist progress so a subsequent run can resume + $completedIndices[] = $index; + $state['completed_chunks'] = $completedIndices; + $state['chunk_results'][(string) $index] = $result; + $state['updated_at'] = (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM); + $this->writeState($csvFile, $state); + } else { + // All retry attempts exhausted — persist current state and abort remaining chunks + $state['updated_at'] = (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM); + $this->writeState($csvFile, $state); + return $this->mergeChunkResults(array_values($results), microtime(true) - $startAll, $totalChunks); + } + } + } finally { + foreach ($tmpFiles as $f) { + if (file_exists($f)) { + @unlink($f); + } + } + } + + // All chunks succeeded — clean up state and optionally delete the source CSV + $this->clearState($csvFile); + + if ($this->deleteAfterImport) { + @unlink($csvFile); + } + + return $this->mergeChunkResults(array_values($results), microtime(true) - $startAll, $totalChunks); + } + + /** + * Deletes any existing import state file for the given CSV. + * + * Call this before importChunked() to force a fresh import regardless + * of a previously interrupted run. + * + * @param string $csvFile Path to the transformed CSV file + */ + public function resetImportState(string $csvFile): void + { + $this->clearState($csvFile); + } + + /** + * Returns true when a resumable state file exists for the given CSV + * and its metadata (total rows, chunk size) still matches. + * + * @param string $csvFile Path to the transformed CSV file + */ + public function hasResumeState(string $csvFile): bool + { + if (!file_exists($csvFile)) { + return false; + } + // Count rows to validate the state + $fp = @fopen($csvFile, 'r'); + if ($fp === false) { + return false; + } + $rowCount = 0; + while (fgetcsv($fp, 0, ',', '"', '\\') !== false) { + $rowCount++; + } + fclose($fp); + $totalRows = max(0, $rowCount - 1); // subtract header + + return $this->readState($csvFile, $totalRows) !== null; + } + + // ─── State-file helpers (resume support) ──────────────────────────────── + + /** + * Returns the path of the state file for a given CSV file. + */ + private function stateFilePath(string $csvFile): string + { + return $csvFile . '.ffi-state.json'; + } + + /** + * Reads and validates the state file for the given CSV. + * + * Returns null when: + * - the state file does not exist + * - the JSON is corrupt + * - csv_file, total_rows, or chunk_size do not match the current run + * + * @param string $csvFile Path to the transformed CSV file + * @param int $totalRows Number of data rows in the current CSV + * @return array|null Decoded state or null + */ + private function readState(string $csvFile, int $totalRows): ?array + { + $path = $this->stateFilePath($csvFile); + if (!file_exists($path)) { + return null; + } + + $raw = @file_get_contents($path); + if ($raw === false) { + return null; + } + + /** @var array|null $state */ + $state = json_decode($raw, true); + if (!is_array($state)) { + DebugLogger::log('chunked_import', 'State file is corrupt (invalid JSON), starting fresh', ['path' => $path]); + return null; + } + + $absPath = realpath($csvFile) ?: $csvFile; + if ( + ($state['csv_file'] ?? null) !== $absPath + || (int) ($state['total_rows'] ?? -1) !== $totalRows + || (int) ($state['chunk_size'] ?? -1) !== $this->chunkSize + ) { + DebugLogger::log('chunked_import', 'State file metadata mismatch, starting fresh', [ + 'state_csv' => $state['csv_file'] ?? null, + 'current_csv' => $absPath, + 'state_total_rows' => $state['total_rows'] ?? null, + 'current_total_rows' => $totalRows, + 'state_chunk_size' => $state['chunk_size'] ?? null, + 'current_chunk_size' => $this->chunkSize, + ]); + return null; + } + + return $state; + } + + /** + * Atomically writes the state to disk (tmp → rename) so a crash during + * the write cannot leave a corrupt state file. + * + * @param string $csvFile Path to the transformed CSV file + * @param array $state State data to persist + */ + private function writeState(string $csvFile, array $state): void + { + $path = $this->stateFilePath($csvFile); + $tmpPath = $path . '.tmp'; + $json = json_encode($state, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT); + if ($json === false) { + DebugLogger::log('chunked_import', 'Could not encode state to JSON, skipping state write'); + return; + } + if (file_put_contents($tmpPath, $json) === false) { + DebugLogger::log('chunked_import', 'Could not write state tmp file', ['path' => $tmpPath]); + return; + } + if (!rename($tmpPath, $path)) { + @unlink($tmpPath); + DebugLogger::log('chunked_import', 'Could not rename state tmp file', ['tmp' => $tmpPath, 'target' => $path]); + } + } + + /** + * Deletes the state file for the given CSV if it exists. + * + * @param string $csvFile Path to the transformed CSV file + */ + private function clearState(string $csvFile): void + { + $path = $this->stateFilePath($csvFile); + if (file_exists($path)) { + @unlink($path); + DebugLogger::log('chunked_import', 'State file deleted', ['path' => $path]); + } + } + + // ─── Result aggregation ────────────────────────────────────────────────── + + /** + * Merges per-chunk import results into a single aggregate result. + * + * @param array> $results Per-chunk result arrays + * @param float $totalDuration Wall-clock seconds for all chunks + * @param int $totalChunks Total number of chunks attempted + * @return array + */ + private function mergeChunkResults(array $results, float $totalDuration, int $totalChunks): array + { + $successCount = 0; + $created = 0; + /** @var array $byType */ + $byType = []; + $duplicates = 0; + $errors = []; + $completed = true; + + foreach ($results as $r) { + if (!empty($r['success'])) { + $successCount++; + } + /** @var array{completed?: bool, created?: int, by_type?: array, duplicates?: int, errors?: string[]}|null $s */ + $s = $r['summary'] ?? null; + if (is_array($s)) { + $created += (int) ($s['created'] ?? 0); + $duplicates += (int) ($s['duplicates'] ?? 0); + if (!($s['completed'] ?? false)) { + $completed = false; + } + foreach (($s['by_type'] ?? []) as $type => $count) { + $byType[$type] = ($byType[$type] ?? 0) + (int) $count; + } + foreach (($s['errors'] ?? []) as $err) { + $errors[] = $err; + } + } elseif (!empty($r['error'])) { + $errors[] = $r['error']; + $completed = false; + } + } + + $allSuccess = ($successCount === count($results)); + + return [ + 'success' => $allSuccess, + 'chunks' => ['done' => $successCount, 'total' => $totalChunks], + 'duration' => round($totalDuration, 2), + 'summary' => [ + 'completed' => $completed, + 'created' => $created, + 'by_type' => $byType, + 'duplicates' => $duplicates, + 'errors' => $errors, + ], + ]; + } + /** * Import via command line (modes: cli, docker) * @@ -222,6 +734,10 @@ class FireflyImporter 2 => ['pipe', 'w'], ]; + if ($this->progressCallback !== null) { + ($this->progressCallback)('request_start', []); + } + $process = proc_open($command, $descriptors, $pipes, null, $env); if (!is_resource($process)) { @@ -280,7 +796,9 @@ class FireflyImporter */ private function importViaHttp(string $csvFile): array { - $url = rtrim($this->importerUrl, '/') . '/autoupload'; + // Secret goes as a URL query parameter per the Firefly III Data Importer API spec + // accessToken = AUTO_IMPORT_SECRET value configured on the importer server + $url = rtrim($this->importerUrl, '/') . '/autoupload?secret=' . urlencode($this->accessToken); $ch = curl_init(); if ($ch === false) { @@ -292,34 +810,77 @@ class FireflyImporter ]; } + // POST fields: only the files (no secret field — it is in the URL) $postFields = [ - 'secret' => $this->importerSecret, 'json' => new \CURLFile($this->jsonConfigPath), 'importable' => new \CURLFile($csvFile), ]; + $headers = ['Accept: application/json']; + // personalSecret = Firefly III Personal Access Token → Authorization: Bearer + if ($this->personalSecret !== '') { + $headers[] = 'Authorization: Bearer ' . $this->personalSecret; + } + curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_POSTFIELDS, $postFields); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->connectionTimeout); curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + + // Log the request details when debug mode is on (redact secret from URL) + if (DebugLogger::isEnabled()) { + $redactedUrl = preg_replace('/secret=[^&]+/', 'secret=[REDACTED]', $url) ?? $url; + DebugLogger::log('http_import', 'cURL request', [ + 'url' => $redactedUrl, + 'headers' => array_map( + static fn (string $h): string => str_starts_with($h, 'Authorization:') + ? 'Authorization: Bearer [REDACTED]' + : $h, + $headers + ), + 'files' => [ + 'json' => $this->jsonConfigPath, + 'importable' => $csvFile, + ], + ]); + } + + if ($this->progressCallback !== null) { + ($this->progressCallback)('request_start', []); + } $startTime = microtime(true); $response = curl_exec($ch); $httpCode = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE); $curlError = curl_error($ch); - curl_close($ch); + $curlErrno = curl_errno($ch); + // Note: curl_close() is a no-op since PHP 8.0 and deprecated in PHP 8.5 $duration = microtime(true) - $startTime; $responseBody = is_string($response) ? $response : ''; + if (DebugLogger::isEnabled()) { + DebugLogger::log('http_import', 'cURL response', [ + 'http_code' => $httpCode, + 'curl_error' => $curlError !== '' ? $curlError : null, + 'duration_s' => round($duration, 3), + 'body' => $this->extractErrorText($responseBody), + ]); + } + if ($curlError !== '') { + // errno 28 = CURLE_OPERATION_TIMEDOUT (covers both CURLOPT_CONNECTTIMEOUT and CURLOPT_TIMEOUT) return [ - 'success' => false, - 'error' => "cURL error: {$curlError}", - 'output' => ['stdout' => '', 'stderr' => $curlError], - 'exit_code' => -1, - 'duration' => round($duration, 2), + 'success' => false, + 'error' => "cURL error: {$curlError}", + 'timed_out' => ($curlErrno === 28), + 'output' => ['stdout' => '', 'stderr' => $curlError], + 'exit_code' => -1, + 'duration' => round($duration, 2), ]; } @@ -329,17 +890,115 @@ class FireflyImporter @unlink($csvFile); } + if (!$success) { + return [ + 'success' => false, + 'error' => "HTTP {$httpCode}: " . $this->extractErrorText($responseBody), + 'output' => ['stdout' => $responseBody, 'stderr' => ''], + 'exit_code' => $httpCode, + 'duration' => round($duration, 2), + 'csv_file' => $csvFile, + ]; + } + return [ - 'success' => $success, + 'success' => true, 'exit_code' => $httpCode, 'output' => ['stdout' => $responseBody, 'stderr' => ''], 'duration' => round($duration, 2), 'csv_file' => $csvFile, 'config_file' => $this->jsonConfigPath, - 'deleted' => ($success && $this->deleteAfterImport), + 'deleted' => $this->deleteAfterImport, + 'summary' => $this->parseImportResponse($responseBody), ]; } + /** + * Parses the plain-text response from the Firefly III Data Importer. + * + * Handles two line formats: + * Created: Import index N: Created []#[] "" ( ) + * Issue: Import index N: [aNNN]: : + * + * [aNNN] codes classified by message content: + * "Duplicate of transaction" → counted as duplicate (skipped) + * anything else → counted as error + * + * @param string $body Raw response body (may contain HTML links) + * @return array{completed: bool, created: int, by_type: array, duplicates: int, errors: string[]} + */ + private function parseImportResponse(string $body): array + { + /** @var array $counts */ + $counts = []; + $errors = []; + $duplicates = 0; + $completed = false; + + // Strip HTML tags first — some responses wrap transaction IDs in links + $plain = strip_tags($body); + + // Split on timestamp prefixes + $lines = preg_split('/\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}: /', $plain, -1, PREG_SPLIT_NO_EMPTY); + if (!is_array($lines)) { + $lines = [trim($plain)]; + } + + foreach ($lines as $line) { + $line = trim($line); + + if (preg_match('/^Import index \d+: Created (\w+) #\d+/', $line, $m)) { + // Successfully created transaction + $type = strtolower($m[1]); + $counts[$type] = ($counts[$type] ?? 0) + 1; + } elseif (preg_match('/^Import index \d+:.*\[a(\d+)\]:(.*)/s', $line, $m)) { + // [aNNN] = importer issue code; classify by message content, not code number + if (stripos($m[2], 'Duplicate of transaction') !== false) { + $duplicates++; + } else { + $errors[] = trim("[a{$m[1]}]:" . $m[2]); + } + } elseif (str_starts_with($line, 'Done!')) { + $completed = true; + } elseif (preg_match('/\b(error|failed|skipped)\b/i', $line)) { + $errors[] = $line; + } + } + + return [ + 'completed' => $completed, + 'created' => array_sum($counts), + 'by_type' => $counts, + 'duplicates' => $duplicates, + 'errors' => $errors, + ]; + } + + /** + * Extracts human-readable text from an HTML error response. + * + * Strips HTML tags (preferring the content when present) + * and collapses whitespace so the result is a single readable line. + * + * @param string $html Raw HTTP response body + * @return string Plain-text summary, max 500 chars + */ + private function extractErrorText(string $html): string + { + $html = trim($html); + if ($html === '') { + return '(empty response)'; + } + // If it looks like HTML, try to narrow down to content + if (stripos($html, ']*>(.*?)<\/body>/si', $html, $m)) { + $html = $m[1]; + } + } + $text = trim(preg_replace('/\s+/', ' ', strip_tags($html)) ?? ''); + return $text !== '' ? mb_substr($text, 0, 500) : '(no text content in response)'; + } + /** * Builds the CLI import command (modes: cli, docker) * @@ -442,7 +1101,7 @@ class FireflyImporter $response = curl_exec($ch); $httpCode = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE); $curlError = curl_error($ch); - curl_close($ch); + // Note: curl_close() is a no-op since PHP 8.0 and deprecated in PHP 8.5 return [ 'available' => ($curlError === '' && $httpCode === 200), diff --git a/src/RowFilter.php b/src/RowFilter.php new file mode 100644 index 0000000..689384b --- /dev/null +++ b/src/RowFilter.php @@ -0,0 +1,161 @@ +, , ... ] } + * + * - An OR group: + * { "or": [ , , ... ] } + * + * Groups may be nested arbitrarily. + * + * Supported operators for conditions: + * + * | Operator | Matches when … | + * |----------------|------------------------------------------------------| + * | empty | column value is empty string | + * | not-empty | column value is not empty | + * | equals | value === "value" (string compare) | + * | not-equals | value !== "value" | + * | contains | strpos(value, "value") !== false | + * | not-contains | strpos(value, "value") === false | + * | matches | preg_match("pattern", value) === 1 | + * | not-matches | preg_match("pattern", value) === 0 | + * | gt | (float) value > (float) "value" | + * | gte | (float) value >= (float) "value" | + * | lt | (float) value < (float) "value" | + * | lte | (float) value <= (float) "value" | + * + * Usage in config: + * ```json + * "skipIf": { + * "and": [ + * { "column": "Beschreibung1", "operator": "empty" }, + * { "column": "Beschreibung2", "operator": "empty" } + * ] + * } + * ``` + * + * ```json + * "skipIf": { + * "or": [ + * { "column": "Amount", "operator": "gt", "value": "10000" }, + * { "and": [ + * { "column": "Type", "operator": "equals", "value": "Saldo" }, + * { "column": "Notes", "operator": "empty" } + * ]} + * ] + * } + * ``` + */ +class RowFilter +{ + /** + * Evaluates a filter node against a data row. + * + * Returns true when the row should be skipped. + * + * @param array $node Filter node (condition or group) + * @param array $row Data row with column values + * + * @throws \InvalidArgumentException on unknown operator + */ + public static function evaluate(array $node, array $row): bool + { + // AND group + if (isset($node['and'])) { + /** @var array> $children */ + $children = $node['and']; + foreach ($children as $child) { + if (!self::evaluate($child, $row)) { + return false; + } + } + return true; + } + + // OR group + if (isset($node['or'])) { + /** @var array> $children */ + $children = $node['or']; + foreach ($children as $child) { + if (self::evaluate($child, $row)) { + return true; + } + } + return false; + } + + // Bare condition + return self::evaluateCondition($node, $row); + } + + /** + * Evaluates a single leaf condition. + * + * @param array $condition + * @param array $row + * + * @throws \InvalidArgumentException on unknown operator + */ + private static function evaluateCondition(array $condition, array $row): bool + { + $column = (string) ($condition['column'] ?? ''); + $operator = strtolower((string) ($condition['operator'] ?? '')); + $colValue = (string) ($row[$column] ?? ''); + $cmpValue = (string) ($condition['value'] ?? ''); + $pattern = (string) ($condition['pattern'] ?? ''); + + switch ($operator) { + case 'empty': + return $colValue === ''; + + case 'not-empty': + return $colValue !== ''; + + case 'equals': + return $colValue === $cmpValue; + + case 'not-equals': + return $colValue !== $cmpValue; + + case 'contains': + return str_contains($colValue, $cmpValue); + + case 'not-contains': + return !str_contains($colValue, $cmpValue); + + case 'matches': + $delimited = '#' . str_replace('#', '\#', $pattern) . '#u'; + return preg_match($delimited, $colValue) === 1; + + case 'not-matches': + $delimited = '#' . str_replace('#', '\#', $pattern) . '#u'; + return preg_match($delimited, $colValue) !== 1; + + case 'gt': + return (float) $colValue > (float) $cmpValue; + + case 'gte': + return (float) $colValue >= (float) $cmpValue; + + case 'lt': + return (float) $colValue < (float) $cmpValue; + + case 'lte': + return (float) $colValue <= (float) $cmpValue; + + default: + throw new \InvalidArgumentException("Unknown RowFilter operator: '{$operator}'"); + } + } +} diff --git a/src/TransformerEngine.php b/src/TransformerEngine.php index 33a323e..579acd0 100644 --- a/src/TransformerEngine.php +++ b/src/TransformerEngine.php @@ -8,6 +8,7 @@ use UbsCsvTransformer\ConfigurationLoader; use UbsCsvTransformer\MetadataExtractor; use UbsCsvTransformer\ColumnTransformer; use UbsCsvTransformer\FireflyImporter; +use UbsCsvTransformer\RowFilter; /** * Orchestrates the complete CSV transformation pipeline @@ -48,6 +49,9 @@ class TransformerEngine { $this->configLoader = $configLoader; $this->debugMode = $debugMode; + if ($debugMode) { + DebugLogger::enable(); + } $config = $configLoader->getAll(); @@ -163,12 +167,21 @@ class TransformerEngine // Transform rows and collect them $transformedData = []; + /** @var array|null $skipIfNode */ + $skipIfNode = $this->configLoader->get('skipIf', null); + foreach ($dataRows as $row) { // Check if maxRows reached if ($maxRows > 0 && $this->rowsProcessed >= $maxRows) { break; } + // Skip row if filter condition matches + if ($skipIfNode !== null && RowFilter::evaluate($skipIfNode, $row)) { + DebugLogger::log('transformer', 'Row skipped by skipIf filter', ['row' => $row]); + continue; + } + // Transform row $transformedRow = $this->columnTransformer->transformRow($row); $transformedData[] = $transformedRow; diff --git a/tests/ColumnTransformerTest.php b/tests/ColumnTransformerTest.php index d8c3c73..589b8ef 100644 --- a/tests/ColumnTransformerTest.php +++ b/tests/ColumnTransformerTest.php @@ -435,6 +435,90 @@ class ColumnTransformerTest extends TestCase $this->assertSame('Hello World', $result['B']); } + public function testOutputActionAppendWithDelimiter(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'append', + 'appendDelimiter' => ', ', + ], ['A' => 'World', 'B' => 'Hello']); + $this->assertSame('Hello, World', $result['B']); + } + + public function testOutputActionAppendWithDelimiterSkippedWhenTargetEmpty(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'append', + 'appendDelimiter' => ', ', + ], ['A' => 'Hello', 'B' => '']); + $this->assertSame('Hello', $result['B']); + } + + public function testOutputActionAppendLine(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'append-line', + ], ['A' => 'Line2', 'B' => 'Line1']); + $this->assertSame("Line1\nLine2", $result['B']); + } + + public function testOutputActionAppendLineNoLeadingNewlineWhenEmpty(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'append-line', + ], ['A' => 'Line1', 'B' => '']); + $this->assertSame('Line1', $result['B']); + } + + public function testOutputActionOverwriteIfEmpty(): void + { + $resultEmpty = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'overwrite-if-empty', + ], ['A' => 'new', 'B' => '']); + $this->assertSame('new', $resultEmpty['B']); + + $resultFilled = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'overwrite-if-empty', + ], ['A' => 'new', 'B' => 'existing']); + $this->assertSame('existing', $resultFilled['B']); + } + + public function testOutputActionOverwriteIfNotEmpty(): void + { + $resultNotEmpty = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'overwrite-if-not-empty', + ], ['A' => 'new', 'B' => 'old']); + $this->assertSame('new', $resultNotEmpty['B']); + + $resultEmpty = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'overwrite-if-not-empty', + ], ['A' => '', 'B' => 'old']); + $this->assertSame('old', $resultEmpty['B']); + } + // ------------------------------------------------------------------------- // multi-output split // ------------------------------------------------------------------------- @@ -504,4 +588,139 @@ class ColumnTransformerTest extends TestCase $transformer->transformRow(['A' => '1', 'B' => '2', 'C' => '3']); $this->assertSame(2, $transformer->getOutputColumns()); } + + // ------------------------------------------------------------------------- + // timeperiod + // ------------------------------------------------------------------------- + + /** @var array> */ + private array $testPeriods = [ + ['from' => '04:00:00', 'to' => '08:59:59', 'label' => 'Morgen'], + ['from' => '09:00:00', 'to' => '10:59:59', 'label' => 'Vormittag'], + ['from' => '11:00:00', 'to' => '13:59:59', 'label' => 'Mittag'], + ['from' => '14:00:00', 'to' => '17:59:59', 'label' => 'Nachmittag'], + ['from' => '18:00:00', 'to' => '21:59:59', 'label' => 'Abend'], + ['from' => '22:00:00', 'to' => '03:59:59', 'label' => 'Nacht'], + ]; + + public function testTimePeriodBasicMapping(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Time', + 'outputColumn' => 'Period', + 'type' => 'timeperiod', + 'timeFormat' => 'H:i:s', + 'periods' => $this->testPeriods, + 'default' => '', + ], ['Time' => '09:30:00', 'Period' => '']); + $this->assertSame('Vormittag', $result['Period']); + } + + public function testTimePeriodMidnightSpanning(): void + { + $result1 = $this->applyOne([ + 'sourceColumn' => 'Time', + 'outputColumn' => 'Period', + 'type' => 'timeperiod', + 'timeFormat' => 'H:i:s', + 'periods' => $this->testPeriods, + 'default' => '', + ], ['Time' => '23:00:00', 'Period' => '']); + $this->assertSame('Nacht', $result1['Period']); + + $result2 = $this->applyOne([ + 'sourceColumn' => 'Time', + 'outputColumn' => 'Period', + 'type' => 'timeperiod', + 'timeFormat' => 'H:i:s', + 'periods' => $this->testPeriods, + 'default' => '', + ], ['Time' => '02:00:00', 'Period' => '']); + $this->assertSame('Nacht', $result2['Period']); + } + + public function testTimePeriodNoMatch(): void + { + // 03:45 falls outside all labelled ranges except Nacht (00:00-03:59) + $result = $this->applyOne([ + 'sourceColumn' => 'Time', + 'outputColumn' => 'Period', + 'type' => 'timeperiod', + 'timeFormat' => 'H:i:s', + 'periods' => [ + ['from' => '09:00:00', 'to' => '17:59:59', 'label' => 'Day'], + ], + 'default' => 'Unknown', + ], ['Time' => '03:45:00', 'Period' => '']); + $this->assertSame('Unknown', $result['Period']); + } + + public function testTimePeriodInvalidInput(): void + { + $result = $this->applyOne([ + 'sourceColumn' => 'Time', + 'outputColumn' => 'Period', + 'type' => 'timeperiod', + 'timeFormat' => 'H:i:s', + 'periods' => $this->testPeriods, + 'default' => 'N/A', + ], ['Time' => '', 'Period' => '']); + $this->assertSame('N/A', $result['Period']); + } + + // ------------------------------------------------------------------------- + // ucwordsfirst guard + // ------------------------------------------------------------------------- + + public function testUcwordsFirstSkipsLowercase(): void + { + // Input already contains lowercase letters → must be returned unchanged + $result = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'A', + 'type' => 'ucwordsfirst', + ], ['A' => 'Coop pronto chur']); + $this->assertSame('Coop pronto chur', $result['A']); + } + + public function testUcwordsFirstAppliesAllCaps(): void + { + // Fully uppercase input → capitalise first letter of each word + $result = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'A', + 'type' => 'ucwordsfirst', + ], ['A' => 'COOP PRONTO']); + $this->assertSame('Coop Pronto', $result['A']); + } + + // ------------------------------------------------------------------------- + // append-if-not-empty + // ------------------------------------------------------------------------- + + public function testAppendIfNotEmptySkipsEmpty(): void + { + // Result is empty → target column must remain unchanged + $result = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'append-if-not-empty', + 'appendDelimiter' => ' ', + ], ['A' => '', 'B' => 'existing']); + $this->assertSame('existing', $result['B']); + } + + public function testAppendIfNotEmptyAppendsNonEmpty(): void + { + // Non-empty result → appended with delimiter + $result = $this->applyOne([ + 'sourceColumn' => 'A', + 'outputColumn' => 'B', + 'type' => 'map', + 'outputAction' => 'append-if-not-empty', + 'appendDelimiter' => ' ', + ], ['A' => 'new', 'B' => 'existing']); + $this->assertSame('existing new', $result['B']); + } } diff --git a/tests/FireflyImporterChunkStateTest.php b/tests/FireflyImporterChunkStateTest.php new file mode 100644 index 0000000..aed93f9 --- /dev/null +++ b/tests/FireflyImporterChunkStateTest.php @@ -0,0 +1,414 @@ +tmpDir = sys_get_temp_dir() . '/ffi_state_test_' . uniqid('', true); + mkdir($this->tmpDir, 0700, true); + + // Minimal Firefly importer config (format v3) + $configData = [ + 'version' => 3, + 'flow' => 'csv', + 'roles' => ['amount'], + 'default_account' => 1, + ]; + $this->jsonConfig = $this->tmpDir . '/ff-config.json'; + file_put_contents($this->jsonConfig, json_encode($configData)); + } + + protected function tearDown(): void + { + // Remove all temp files + foreach (glob($this->tmpDir . '/*') ?: [] as $f) { + @unlink($f); + } + @rmdir($this->tmpDir); + } + + // ─── Helpers ───────────────────────────────────────────────────────────── + + /** + * Creates an importer stub whose import() calls return results from $queue + * in order. Each element of the queue is either true (success) or false (failure). + * + * @param array $importResultQueue + * @param int $chunkSize + */ + private function makeImporter(array $importResultQueue, int $chunkSize): FireflyImporter + { + $config = [ + 'mode' => 'http', + 'importerUrl' => 'https://example.com', + 'accessToken' => 'test-secret-1234567', + 'personalSecret' => 'test-pat', + 'jsonConfig' => $this->jsonConfig, + 'chunkSize' => $chunkSize, + ]; + + $queue = $importResultQueue; + + return new class ($config, $queue) extends FireflyImporter { + /** @var array */ + private array $queue; + + /** @param array $queue */ + public function __construct(array $config, array $queue) + { + parent::__construct($config); + $this->queue = $queue; + } + + public function import(string $csvFile): array + { + $success = array_shift($this->queue) ?? true; + if ($success) { + return [ + 'success' => true, + 'exit_code' => 200, + 'output' => ['stdout' => '', 'stderr' => ''], + 'duration' => 1.0, + 'csv_file' => $csvFile, + 'summary' => [ + 'completed' => true, + 'created' => 1, + 'by_type' => ['withdrawal' => 1], + 'duplicates' => 0, + 'errors' => [], + ], + ]; + } + return [ + 'success' => false, + 'error' => 'Simulated failure', + 'output' => ['stdout' => '', 'stderr' => ''], + 'exit_code' => 500, + ]; + } + }; + } + + /** + * Writes a CSV with $dataRows data rows (each row has two columns). + */ + private function writeCsv(string $path, int $dataRows): void + { + $fp = fopen($path, 'w'); + assert($fp !== false); + fputcsv($fp, ['col_a', 'col_b'], ',', '"', '\\'); + for ($i = 1; $i <= $dataRows; $i++) { + fputcsv($fp, ["val_a_{$i}", "val_b_{$i}"], ',', '"', '\\'); + } + fclose($fp); + } + + private function stateFile(string $csvPath): string + { + return $csvPath . '.ffi-state.json'; + } + + // ─── Tests ─────────────────────────────────────────────────────────────── + + /** + * When chunkSize is 0, import() is used directly — no state file should appear. + */ + public function testNoStateFileWhenChunkingNotUsed(): void + { + $csv = $this->tmpDir . '/test.csv'; + $this->writeCsv($csv, 5); + + $importer = $this->makeImporter([true], 0); + $result = $importer->importChunked($csv); + + $this->assertTrue($result['success']); + $this->assertFileDoesNotExist($this->stateFile($csv)); + } + + /** + * When the file has fewer rows than chunkSize, no chunking occurs — no state file. + */ + public function testNoStateFileWhenRowsBelowChunkSize(): void + { + $csv = $this->tmpDir . '/test.csv'; + $this->writeCsv($csv, 3); + + $importer = $this->makeImporter([true], 10); + $importer->importChunked($csv); + + $this->assertFileDoesNotExist($this->stateFile($csv)); + } + + /** + * After chunk 1 of 3 fails, the state file must exist and record 0 completed chunks. + */ + public function testStateFileCreatedOnFirstChunkFailure(): void + { + $csv = $this->tmpDir . '/test.csv'; + $this->writeCsv($csv, 9); // 3 chunks of 3 + + // Chunk 1 fails immediately + $importer = $this->makeImporter([false], 3); + $result = $importer->importChunked($csv); + + $this->assertFalse($result['success']); + $this->assertFileExists($this->stateFile($csv)); + + /** @var array $state */ + $state = json_decode((string) file_get_contents($this->stateFile($csv)), true); + $this->assertSame([], $state['completed_chunks']); + } + + /** + * After chunks 1 and 2 succeed but chunk 3 fails, the state file records [0, 1]. + */ + public function testStateFileRecordsCompletedChunksOnPartialFailure(): void + { + $csv = $this->tmpDir . '/test.csv'; + $this->writeCsv($csv, 9); // 3 chunks of 3 + + // Chunks 0, 1 succeed; chunk 2 fails + $importer = $this->makeImporter([true, true, false], 3); + $result = $importer->importChunked($csv); + + $this->assertFalse($result['success']); + $this->assertFileExists($this->stateFile($csv)); + + /** @var array $state */ + $state = json_decode((string) file_get_contents($this->stateFile($csv)), true); + $this->assertSame([0, 1], $state['completed_chunks']); + $this->assertArrayHasKey('0', $state['chunk_results']); + $this->assertArrayHasKey('1', $state['chunk_results']); + } + + /** + * After full success the state file is deleted automatically. + */ + public function testStateFileDeletedAfterFullSuccess(): void + { + $csv = $this->tmpDir . '/test.csv'; + $this->writeCsv($csv, 6); // 2 chunks of 3 + + $importer = $this->makeImporter([true, true], 3); + $result = $importer->importChunked($csv); + + $this->assertTrue($result['success']); + $this->assertFileDoesNotExist($this->stateFile($csv)); + } + + /** + * On a second run with an existing state showing [0, 1] done, only chunk 2 + * (index 2) should call import() — i.e., exactly one call is made. + */ + public function testResumeSkipsAlreadyCompletedChunks(): void + { + $csv = $this->tmpDir . '/test.csv'; + $this->writeCsv($csv, 9); // 3 chunks of 3 + + // ── First run: chunks 0+1 succeed, chunk 2 fails ──────────────────── + $run1 = $this->makeImporter([true, true, false], 3); + $run1->importChunked($csv); + + $this->assertFileExists($this->stateFile($csv)); + + // ── Second run: only chunk 2 should be attempted ──────────────────── + // We record how many times import() is actually called via a counting wrapper + $counter = new \stdClass(); + $counter->value = 0; + + $config = [ + 'mode' => 'http', + 'importerUrl' => 'https://example.com', + 'accessToken' => 'test-secret-1234567', + 'personalSecret' => 'test-pat', + 'jsonConfig' => $this->jsonConfig, + 'chunkSize' => 3, + ]; + + $run2 = new class ($config, $counter) extends FireflyImporter { + private \stdClass $counter; + + public function __construct(array $config, \stdClass $counter) + { + parent::__construct($config); + $this->counter = $counter; + } + + public function import(string $csvFile): array + { + $this->counter->value++; + return [ + 'success' => true, + 'exit_code' => 200, + 'output' => ['stdout' => '', 'stderr' => ''], + 'duration' => 1.0, + 'csv_file' => $csvFile, + 'summary' => [ + 'completed' => true, + 'created' => 1, + 'by_type' => ['withdrawal' => 1], + 'duplicates' => 0, + 'errors' => [], + ], + ]; + } + }; + + $result2 = $run2->importChunked($csv); + + $this->assertTrue($result2['success']); + $this->assertSame(1, $counter->value, 'Only the 1 remaining chunk (index 2) should be imported'); + $this->assertFileDoesNotExist($this->stateFile($csv), 'State file must be deleted after full success'); + } + + /** + * A state file whose total_rows does not match the current CSV is silently + * discarded and a fresh import is started. + */ + public function testStaleMismatchedStateIsIgnored(): void + { + $csv = $this->tmpDir . '/test.csv'; + $this->writeCsv($csv, 9); // 3 chunks of 3 + + // Plant a stale state file with a wrong total_rows + $staleState = [ + 'csv_file' => realpath($csv) ?: $csv, + 'total_rows' => 99, // wrong + 'chunk_size' => 3, + 'total_chunks' => 3, + 'completed_chunks' => [0, 1], + 'chunk_results' => [], + 'created_at' => '2020-01-01T00:00:00+00:00', + 'updated_at' => '2020-01-01T00:00:00+00:00', + ]; + file_put_contents($this->stateFile($csv), json_encode($staleState)); + + // All 3 chunks should be called (fresh start despite stale state) + $counter = new \stdClass(); + $counter->value = 0; + + $config = [ + 'mode' => 'http', + 'importerUrl' => 'https://example.com', + 'accessToken' => 'test-secret-1234567', + 'personalSecret' => 'test-pat', + 'jsonConfig' => $this->jsonConfig, + 'chunkSize' => 3, + ]; + + $importer = new class ($config, $counter) extends FireflyImporter { + private \stdClass $counter; + + public function __construct(array $config, \stdClass $counter) + { + parent::__construct($config); + $this->counter = $counter; + } + + public function import(string $csvFile): array + { + $this->counter->value++; + return [ + 'success' => true, + 'exit_code' => 200, + 'output' => ['stdout' => '', 'stderr' => ''], + 'duration' => 1.0, + 'csv_file' => $csvFile, + 'summary' => [ + 'completed' => true, + 'created' => 1, + 'by_type' => ['withdrawal' => 1], + 'duplicates' => 0, + 'errors' => [], + ], + ]; + } + }; + + $result = $importer->importChunked($csv); + + $this->assertTrue($result['success']); + $this->assertSame(3, $counter->value, 'All 3 chunks must be imported when stale state is discarded'); + } + + /** + * A corrupt (non-JSON) state file is silently discarded; no exception is thrown. + */ + public function testCorruptStateFileIsIgnored(): void + { + $csv = $this->tmpDir . '/test.csv'; + $this->writeCsv($csv, 6); // 2 chunks of 3 + + file_put_contents($this->stateFile($csv), '{this is not valid json!!!}'); + + $importer = $this->makeImporter([true, true], 3); + $result = $importer->importChunked($csv); + + $this->assertTrue($result['success']); + } + + /** + * resetImportState() deletes an existing state file. + */ + public function testResetImportStateClearsStateFile(): void + { + $csv = $this->tmpDir . '/test.csv'; + $this->writeCsv($csv, 9); + + // Plant a state file + file_put_contents($this->stateFile($csv), '{}'); + $this->assertFileExists($this->stateFile($csv)); + + $importer = $this->makeImporter([], 3); + $importer->resetImportState($csv); + + $this->assertFileDoesNotExist($this->stateFile($csv)); + } + + /** + * hasResumeState() returns false when no state file is present. + */ + public function testHasResumeStateReturnsFalseWithoutStateFile(): void + { + $csv = $this->tmpDir . '/test.csv'; + $this->writeCsv($csv, 9); + + $importer = $this->makeImporter([], 3); + $this->assertFalse($importer->hasResumeState($csv)); + } + + /** + * hasResumeState() returns true after a partial failure creates a valid state file. + */ + public function testHasResumeStateReturnsTrueAfterPartialFailure(): void + { + $csv = $this->tmpDir . '/test.csv'; + $this->writeCsv($csv, 9); // 3 chunks of 3 + + $importer = $this->makeImporter([true, false], 3); // chunk 2 (index 1) fails + $importer->importChunked($csv); + + $importer2 = $this->makeImporter([], 3); + $this->assertTrue($importer2->hasResumeState($csv)); + } +} diff --git a/tests/RowFilterTest.php b/tests/RowFilterTest.php new file mode 100644 index 0000000..fd3f06c --- /dev/null +++ b/tests/RowFilterTest.php @@ -0,0 +1,255 @@ +assertTrue(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'empty'], + ['A' => ''] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'empty'], + ['A' => 'something'] + )); + } + + public function testNotEmptyOperator(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'not-empty'], + ['A' => 'value'] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'not-empty'], + ['A' => ''] + )); + } + + public function testEqualsOperator(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'equals', 'value' => 'hello'], + ['A' => 'hello'] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'equals', 'value' => 'hello'], + ['A' => 'world'] + )); + } + + public function testNotEqualsOperator(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'not-equals', 'value' => 'hello'], + ['A' => 'world'] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'not-equals', 'value' => 'hello'], + ['A' => 'hello'] + )); + } + + public function testContainsOperator(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'contains', 'value' => 'foo'], + ['A' => 'foobar'] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'contains', 'value' => 'baz'], + ['A' => 'foobar'] + )); + } + + public function testNotContainsOperator(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'not-contains', 'value' => 'baz'], + ['A' => 'foobar'] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'not-contains', 'value' => 'foo'], + ['A' => 'foobar'] + )); + } + + public function testMatchesOperator(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'matches', 'pattern' => '^\d{4}$'], + ['A' => '1234'] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'matches', 'pattern' => '^\d{4}$'], + ['A' => 'abcd'] + )); + } + + public function testNotMatchesOperator(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'not-matches', 'pattern' => '^\d{4}$'], + ['A' => 'abcd'] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'A', 'operator' => 'not-matches', 'pattern' => '^\d{4}$'], + ['A' => '1234'] + )); + } + + public function testGtOperator(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'Amount', 'operator' => 'gt', 'value' => '100'], + ['Amount' => '150.50'] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'Amount', 'operator' => 'gt', 'value' => '100'], + ['Amount' => '50'] + )); + } + + public function testGteOperator(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'Amount', 'operator' => 'gte', 'value' => '100'], + ['Amount' => '100'] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'Amount', 'operator' => 'gte', 'value' => '100'], + ['Amount' => '99.99'] + )); + } + + public function testLtOperator(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'Amount', 'operator' => 'lt', 'value' => '100'], + ['Amount' => '50'] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'Amount', 'operator' => 'lt', 'value' => '100'], + ['Amount' => '200'] + )); + } + + public function testLteOperator(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'Amount', 'operator' => 'lte', 'value' => '100'], + ['Amount' => '100'] + )); + $this->assertFalse(RowFilter::evaluate( + ['column' => 'Amount', 'operator' => 'lte', 'value' => '100'], + ['Amount' => '100.01'] + )); + } + + // ------------------------------------------------------------------------- + // Groups + // ------------------------------------------------------------------------- + + public function testAndGroupBothTrue(): void + { + $this->assertTrue(RowFilter::evaluate([ + 'and' => [ + ['column' => 'A', 'operator' => 'empty'], + ['column' => 'B', 'operator' => 'empty'], + ], + ], ['A' => '', 'B' => ''])); + } + + public function testAndGroupOneFalse(): void + { + $this->assertFalse(RowFilter::evaluate([ + 'and' => [ + ['column' => 'A', 'operator' => 'empty'], + ['column' => 'B', 'operator' => 'empty'], + ], + ], ['A' => '', 'B' => 'not-empty'])); + } + + public function testOrGroupOneTrue(): void + { + $this->assertTrue(RowFilter::evaluate([ + 'or' => [ + ['column' => 'A', 'operator' => 'equals', 'value' => 'yes'], + ['column' => 'B', 'operator' => 'equals', 'value' => 'yes'], + ], + ], ['A' => 'no', 'B' => 'yes'])); + } + + public function testOrGroupBothFalse(): void + { + $this->assertFalse(RowFilter::evaluate([ + 'or' => [ + ['column' => 'A', 'operator' => 'equals', 'value' => 'yes'], + ['column' => 'B', 'operator' => 'equals', 'value' => 'yes'], + ], + ], ['A' => 'no', 'B' => 'no'])); + } + + // ------------------------------------------------------------------------- + // Nested groups + // ------------------------------------------------------------------------- + + public function testNestedAndOrGroup(): void + { + // (A is empty) AND (B equals "foo" OR C not-empty) + $node = [ + 'and' => [ + ['column' => 'A', 'operator' => 'empty'], + [ + 'or' => [ + ['column' => 'B', 'operator' => 'equals', 'value' => 'foo'], + ['column' => 'C', 'operator' => 'not-empty'], + ], + ], + ], + ]; + + // A empty, B matches → true + $this->assertTrue(RowFilter::evaluate($node, ['A' => '', 'B' => 'foo', 'C' => ''])); + // A empty, C not-empty → true + $this->assertTrue(RowFilter::evaluate($node, ['A' => '', 'B' => 'bar', 'C' => 'value'])); + // A empty, but neither B nor C match → false + $this->assertFalse(RowFilter::evaluate($node, ['A' => '', 'B' => 'bar', 'C' => ''])); + // A not empty → false + $this->assertFalse(RowFilter::evaluate($node, ['A' => 'x', 'B' => 'foo', 'C' => ''])); + } + + // ------------------------------------------------------------------------- + // Unknown operator + // ------------------------------------------------------------------------- + + public function testUnknownOperatorThrows(): void + { + $this->expectException(\InvalidArgumentException::class); + RowFilter::evaluate( + ['column' => 'A', 'operator' => 'nonexistent'], + ['A' => 'value'] + ); + } + + // ------------------------------------------------------------------------- + // Missing column (treats as empty string) + // ------------------------------------------------------------------------- + + public function testMissingColumnTreatedAsEmpty(): void + { + $this->assertTrue(RowFilter::evaluate( + ['column' => 'NonExistent', 'operator' => 'empty'], + ['A' => 'something'] + )); + } +} diff --git a/tests/fixtures/config-ubs-account/expected.csv b/tests/fixtures/config-ubs-account/expected.csv index f60b568..9120448 100644 --- a/tests/fixtures/config-ubs-account/expected.csv +++ b/tests/fixtures/config-ubs-account/expected.csv @@ -1,17 +1,14 @@ -Belastung,Gutschrift,date,process_date,opposing_name,tags,description,opposing_account,notes,account_iban,account_currency --600.00,,2022-12-30,2022-12-30,"David Peter Reindl",Dauerauftrag,"Steuerrueckstellung -David Peter Reindl;8906 Bonstetten","CH37 0026 7267 9314 35M2 P","9967864LK2659211 -8906 Bonstetten","CH18 0026 7267 9314 3540 D",CHF --46.35,,2022-12-30,2022-12-31,"UBS AG",,"Periode: 2022-10-01 - 2022-12-30 -Zinsabschluss",,9900365AP6356307,"CH18 0026 7267 9314 3540 D",CHF --39.90,,2022-12-30,2022-12-30,"Swisscom Grossunternehme",TWINT,"Swisscom Grossunternehme; Zahlung UBS TWINT",,"9967364GK5707142 -8004 Zuerich","CH18 0026 7267 9314 3540 D",CHF --8.75,,2022-12-28,2022-12-27,"Coop Pronto Chur",Debitkarte,"18279748-0 08/24 -Coop Pronto Chur;7007 Chur",,"9930862BN7826808 -7007 Chur","CH18 0026 7267 9314 3540 D",CHF --1800.00,,2022-12-27,2022-12-27,"Janine Geigele",e-banking,"Skiferien Dolomiten -Janine Geigele;Am Wasser 36; 8049 Zuerich; CH","CH63 0023 2232 5560 5988 0","9967361TI3188436 -8049 Zuerich","CH18 0026 7267 9314 3540 D",CHF -,9.00,2022-12-22,2022-12-22,"Friis, Daniela Silvia",TWINT,"Friis, Daniela Silvia",,9930356GK0440989,"CH18 0026 7267 9314 3540 D",CHF -,19764.80,2022-11-25,2022-11-25,SBB,Gutschrift,"SBB;Corporate Treasury",,9901820E67741531,"CH18 0026 7267 9314 3540 D",CHF --14.00,,2022-08-22,2022-08-21,"Friis-Loop, Daniela",TWINT,"Friis-Loop, Daniela; Belastung UBS TWINT",,9967233GK1553933,"CH18 0026 7267 9314 3540 D",CHF +Belastung,Gutschrift,date,process_date,tags,opposing_iban,opposing_account,opposing_name,notes,description,account_iban,account_currency +-600.00,,2022-12-30,2022-12-30,Dauerauftrag,"CH37 0026 7267 9314 35M2 P",,"David Peter Reindl","8906 Bonstetten +9967864LK2659211","David Peter Reindl;8906 Bonstetten; STEUERRUECKSTELLUNG; Dauerauftrag","CH18 0026 7267 9314 3540 D",CHF +-46.35,,2022-12-30,2022-12-31,,,,"UBS AG",9900365AP6356307,"Saldo Zinsabschluss; Periode: 2022-10-01 - 2022-12-30","CH18 0026 7267 9314 3540 D",CHF +-39.90,,2022-12-30,2022-12-30,TWINT,,,"Swisscom Grossunternehme","Muellerstrasse 16 8004 Zuerich TWINT-Acc.:+41796305690 +9967364GK5707142","SWISSCOM GROSSUNTERNEHME; Zahlung UBS TWINT; Muellerstrasse 16 na, 8004 Zuerich TWINT-Acc.:+41796305690","CH18 0026 7267 9314 3540 D",CHF +-8.75,,2022-12-28,2022-12-27,"Abend Debitkarte",,,"Coop Pronto Chur","18279748-0 08/24 +7007 Chur +9930862BN7826808","Coop Pronto Chur;7007 Chur; Zahlung Debitkarte","CH18 0026 7267 9314 3540 D",CHF +-1800.00,,2022-12-27,2022-12-27,e-banking,"CH63 0023 2232 5560 5988 0",,"Janine Geigele","8049 Zuerich +9967361TI3188436","Janine Geigele;Am Wasser 36; 8049 Zuerich; CH; SKIFERIEN DOLOMITEN; e-banking-Vergütungsauftrag; Wohnung Dolomiten, 2 Personen","CH18 0026 7267 9314 3540 D",CHF +,9.00,2022-12-22,2022-12-22,TWINT,,,"Friis, Daniela Silvia",9930356GK0440989,"Friis, Daniela Silvia; Gutschrift UBS TWINT; +41796741245; TWINT-Acc.:+41796305690","CH18 0026 7267 9314 3540 D",CHF +,19764.80,2022-11-25,2022-11-25,Gutschrift,,,SBB,9901820E67741531,"SBB;Corporate Treasury; Gutschrift; Lohn/Gehalt 00229537/202211","CH18 0026 7267 9314 3540 D",CHF +-14.00,,2022-08-22,2022-08-21,TWINT,,,"Friis-Loop, Daniela",9967233GK1553933,"FRIIS-LOOP, DANIELA; Belastung UBS TWINT; +41796741245; TWINT-Acc.:+41796305690","CH18 0026 7267 9314 3540 D",CHF