'account_iban', 'lineNumber' => 1, 'regex' => '([A-Z]{2}\d{2}[\w]+)', 'captureGroup' => 1, ]]); $result = $extractor->extract(['Account: CH9300762011623852957']); $this->assertSame('CH9300762011623852957', $result['account_iban']); } public function testExtractFromSecondLine(): void { $extractor = new MetadataExtractor([[ 'name' => 'currency', 'lineNumber' => 2, 'regex' => '(CHF|EUR|USD)', 'captureGroup' => 1, ]]); $lines = ['Account info', 'Currency: CHF']; $result = $extractor->extract($lines); $this->assertSame('CHF', $result['currency']); } public function testExtractCaptureGroup0ReturnsFullMatch(): void { $extractor = new MetadataExtractor([[ 'name' => 'label', 'lineNumber' => 1, 'regex' => 'CHF', 'captureGroup' => 0, ]]); $result = $extractor->extract(['Balance: CHF 1000']); $this->assertSame('CHF', $result['label']); } public function testExtractMultipleRules(): void { $extractor = new MetadataExtractor([ [ 'name' => 'iban', 'lineNumber' => 1, 'regex' => '([A-Z]{2}\d{2}\w+)', 'captureGroup' => 1, ], [ 'name' => 'currency', 'lineNumber' => 2, 'regex' => '(CHF|EUR)', 'captureGroup' => 1, ], ]); $result = $extractor->extract(['IBAN: CH9300762011623852957', 'Currency: CHF']); $this->assertSame('CH9300762011623852957', $result['iban']); $this->assertSame('CHF', $result['currency']); } // ------------------------------------------------------------------------- // Line not available // ------------------------------------------------------------------------- public function testExtractMissingLineIsSkipped(): void { $extractor = new MetadataExtractor([[ 'name' => 'account_iban', 'lineNumber' => 5, 'regex' => '(CH\d+)', 'captureGroup' => 1, ]]); $result = $extractor->extract(['Only one line']); $this->assertArrayNotHasKey('account_iban', $result); } // ------------------------------------------------------------------------- // Invalid regex // ------------------------------------------------------------------------- public function testExtractInvalidRegexDoesNotCrash(): void { $extractor = new MetadataExtractor([[ 'name' => 'bad_rule', 'lineNumber' => 1, 'regex' => '[invalid(regex', 'captureGroup' => 1, ]]); $result = $extractor->extract(['some line content']); $this->assertArrayNotHasKey('bad_rule', $result); } // ------------------------------------------------------------------------- // No match // ------------------------------------------------------------------------- public function testExtractNoMatchIsSkipped(): void { $extractor = new MetadataExtractor([[ 'name' => 'account_iban', 'lineNumber' => 1, 'regex' => '(NOMATCH_\d+)', 'captureGroup' => 1, ]]); $result = $extractor->extract(['Account: CH9300762011623852957']); $this->assertArrayNotHasKey('account_iban', $result); } // ------------------------------------------------------------------------- // Edge cases // ------------------------------------------------------------------------- public function testExtractEmptyRulesReturnsEmptyArray(): void { $extractor = new MetadataExtractor([]); $result = $extractor->extract(['some line']); $this->assertSame([], $result); } public function testExtractEmptyLinesArrayReturnsEmpty(): void { $extractor = new MetadataExtractor([[ 'name' => 'iban', 'lineNumber' => 1, 'regex' => '(CH\d+)', 'captureGroup' => 1, ]]); $result = $extractor->extract([]); $this->assertSame([], $result); } public function testRuleWithMissingNameIsSkipped(): void { $extractor = new MetadataExtractor([[ 'regex' => '(\d+)', 'lineNumber' => 1, ]]); $result = $extractor->extract(['123']); $this->assertSame([], $result); } public function testRuleWithMissingRegexIsSkipped(): void { $extractor = new MetadataExtractor([[ 'name' => 'test', 'lineNumber' => 1, ]]); $result = $extractor->extract(['some line']); $this->assertSame([], $result); } // ------------------------------------------------------------------------- // getRuleCount // ------------------------------------------------------------------------- public function testGetRuleCount(): void { $extractor = new MetadataExtractor([ ['name' => 'a', 'regex' => 'x', 'lineNumber' => 1], ['name' => 'b', 'regex' => 'y', 'lineNumber' => 2], ['name' => 'c', 'regex' => 'z', 'lineNumber' => 3], ]); $this->assertSame(3, $extractor->getRuleCount()); } public function testGetRuleCountEmptyIsZero(): void { $this->assertSame(0, (new MetadataExtractor([]))->getRuleCount()); } // ------------------------------------------------------------------------- // Regex containing '#' delimiter character // ------------------------------------------------------------------------- public function testRegexContainingHashIsHandled(): void { // Pattern contains '#' which is the internal delimiter — must be escaped $extractor = new MetadataExtractor([[ 'name' => 'tag', 'lineNumber' => 1, 'regex' => '(#\d+)', 'captureGroup' => 1, ]]); $result = $extractor->extract(['Issue #42 resolved']); $this->assertSame('#42', $result['tag']); } }