firefly-import-preprocessor/tests/MetadataExtractorTest.php
Reindl David (IT-PTR-CEN2-SL10) 170b2d2016 release 1.0
2026-05-02 17:53:19 +02:00

205 lines
6.8 KiB
PHP

<?php
namespace UbsCsvTransformer\Tests;
use PHPUnit\Framework\TestCase;
use UbsCsvTransformer\DebugLogger;
use UbsCsvTransformer\MetadataExtractor;
class MetadataExtractorTest extends TestCase
{
protected function setUp(): void
{
DebugLogger::reset();
}
// -------------------------------------------------------------------------
// Happy path
// -------------------------------------------------------------------------
public function testExtractBasicCapture(): void
{
$extractor = new MetadataExtractor([[
'name' => 'account_iban',
'lineNumber' => 1,
'regex' => '([A-Z]{2}\d{2}[\w]+)',
'captureGroup' => 1,
]]);
$result = $extractor->extract(['Account: CH9300762011623852957']);
$this->assertSame('CH9300762011623852957', $result['account_iban']);
}
public function testExtractFromSecondLine(): void
{
$extractor = new MetadataExtractor([[
'name' => 'currency',
'lineNumber' => 2,
'regex' => '(CHF|EUR|USD)',
'captureGroup' => 1,
]]);
$lines = ['Account info', 'Currency: CHF'];
$result = $extractor->extract($lines);
$this->assertSame('CHF', $result['currency']);
}
public function testExtractCaptureGroup0ReturnsFullMatch(): void
{
$extractor = new MetadataExtractor([[
'name' => 'label',
'lineNumber' => 1,
'regex' => 'CHF',
'captureGroup' => 0,
]]);
$result = $extractor->extract(['Balance: CHF 1000']);
$this->assertSame('CHF', $result['label']);
}
public function testExtractMultipleRules(): void
{
$extractor = new MetadataExtractor([
[
'name' => 'iban',
'lineNumber' => 1,
'regex' => '([A-Z]{2}\d{2}\w+)',
'captureGroup' => 1,
],
[
'name' => 'currency',
'lineNumber' => 2,
'regex' => '(CHF|EUR)',
'captureGroup' => 1,
],
]);
$result = $extractor->extract(['IBAN: CH9300762011623852957', 'Currency: CHF']);
$this->assertSame('CH9300762011623852957', $result['iban']);
$this->assertSame('CHF', $result['currency']);
}
// -------------------------------------------------------------------------
// Line not available
// -------------------------------------------------------------------------
public function testExtractMissingLineIsSkipped(): void
{
$extractor = new MetadataExtractor([[
'name' => 'account_iban',
'lineNumber' => 5,
'regex' => '(CH\d+)',
'captureGroup' => 1,
]]);
$result = $extractor->extract(['Only one line']);
$this->assertArrayNotHasKey('account_iban', $result);
}
// -------------------------------------------------------------------------
// Invalid regex
// -------------------------------------------------------------------------
public function testExtractInvalidRegexDoesNotCrash(): void
{
$extractor = new MetadataExtractor([[
'name' => 'bad_rule',
'lineNumber' => 1,
'regex' => '[invalid(regex',
'captureGroup' => 1,
]]);
$result = $extractor->extract(['some line content']);
$this->assertArrayNotHasKey('bad_rule', $result);
}
// -------------------------------------------------------------------------
// No match
// -------------------------------------------------------------------------
public function testExtractNoMatchIsSkipped(): void
{
$extractor = new MetadataExtractor([[
'name' => 'account_iban',
'lineNumber' => 1,
'regex' => '(NOMATCH_\d+)',
'captureGroup' => 1,
]]);
$result = $extractor->extract(['Account: CH9300762011623852957']);
$this->assertArrayNotHasKey('account_iban', $result);
}
// -------------------------------------------------------------------------
// Edge cases
// -------------------------------------------------------------------------
public function testExtractEmptyRulesReturnsEmptyArray(): void
{
$extractor = new MetadataExtractor([]);
$result = $extractor->extract(['some line']);
$this->assertSame([], $result);
}
public function testExtractEmptyLinesArrayReturnsEmpty(): void
{
$extractor = new MetadataExtractor([[
'name' => 'iban',
'lineNumber' => 1,
'regex' => '(CH\d+)',
'captureGroup' => 1,
]]);
$result = $extractor->extract([]);
$this->assertSame([], $result);
}
public function testRuleWithMissingNameIsSkipped(): void
{
$extractor = new MetadataExtractor([[
'regex' => '(\d+)',
'lineNumber' => 1,
]]);
$result = $extractor->extract(['123']);
$this->assertSame([], $result);
}
public function testRuleWithMissingRegexIsSkipped(): void
{
$extractor = new MetadataExtractor([[
'name' => 'test',
'lineNumber' => 1,
]]);
$result = $extractor->extract(['some line']);
$this->assertSame([], $result);
}
// -------------------------------------------------------------------------
// getRuleCount
// -------------------------------------------------------------------------
public function testGetRuleCount(): void
{
$extractor = new MetadataExtractor([
['name' => 'a', 'regex' => 'x', 'lineNumber' => 1],
['name' => 'b', 'regex' => 'y', 'lineNumber' => 2],
['name' => 'c', 'regex' => 'z', 'lineNumber' => 3],
]);
$this->assertSame(3, $extractor->getRuleCount());
}
public function testGetRuleCountEmptyIsZero(): void
{
$this->assertSame(0, (new MetadataExtractor([]))->getRuleCount());
}
// -------------------------------------------------------------------------
// Regex containing '#' delimiter character
// -------------------------------------------------------------------------
public function testRegexContainingHashIsHandled(): void
{
// Pattern contains '#' which is the internal delimiter — must be escaped
$extractor = new MetadataExtractor([[
'name' => 'tag',
'lineNumber' => 1,
'regex' => '(#\d+)',
'captureGroup' => 1,
]]);
$result = $extractor->extract(['Issue #42 resolved']);
$this->assertSame('#42', $result['tag']);
}
}