rules = $rules; } /** * Extracts metadata from the provided lines * * @param array $lines Array of lines from the CSV header * @return array Extracted metadata */ public function extract(array $lines): array { $metadata = []; foreach ($this->rules as $rule) { // Validate required fields if (empty($rule['name']) || empty($rule['regex'])) { continue; } $ruleName = $rule['name']; $lineNumber = $rule['lineNumber'] ?? 1; $regex = $rule['regex']; // Off-by-one fix // config.json: "lineNumber": 1, 2, 3 (1-based, human-readable) // PHP arrays: $lines[0], $lines[1], $lines[2] (0-based) // Conversion: arrayIndex = lineNumber - 1 $arrayIndex = $lineNumber - 1; // Check if line exists if (!isset($lines[$arrayIndex])) { // Line does not exist - debug info for support DebugLogger::log('metadata_warning', "Extraction rule not found", [ 'rule_name' => $ruleName, 'expected_lineNumber' => $lineNumber, 'array_index' => $arrayIndex, 'available_lines' => count($lines) ]); continue; } $line = $lines[$arrayIndex]; // Regex with '#' as delimiter (allows '/' in user patterns); escape '#' in pattern $pattern = '#' . str_replace('#', '\#', $regex) . '#u'; $matchResult = @preg_match_all($pattern, $line, $matches); if ($matchResult === false) { DebugLogger::log('metadata_error', "Invalid regex pattern", [ 'rule_name' => $ruleName, 'pattern' => $regex, ]); continue; } if ($matchResult === 0) { // Regex did not match on this line DebugLogger::log('metadata_warning', "Regex did not match", [ 'rule_name' => $ruleName, 'lineNumber' => $lineNumber, 'regex_pattern' => $regex, 'line_content' => substr($line, 0, 100) ]); continue; } // Use captureGroup to select the extraction group // captureGroup defines which capture group is extracted // 0 = complete match // 1 = first capture group (...) // 2 = second capture group, etc. $captureGroup = isset($rule['captureGroup']) ? intval($rule['captureGroup']) : 1; // Ensure the capture group exists if (!isset($matches[$captureGroup]) || empty($matches[$captureGroup])) { // Fallback: use complete match if group does not exist $metadata[$ruleName] = $matches[0][0] ?? ''; // echo "DEBUG: extraction_rule '{$ruleName}' - captureGroup {$captureGroup} not found, falling back to complete match\n"; } else { // Use the specific capture group $metadata[$ruleName] = $matches[$captureGroup][0] ?? ''; } DebugLogger::log('metadata', "Extraction rule applied", [ 'rule_name' => $ruleName, 'value' => $metadata[$ruleName] ?? null, ]); } return $metadata; } /** * Returns the number of defined extraction rules * * @return int Number of rules */ public function getRuleCount(): int { return count($this->rules); } /** * Returns all defined extraction rules * * @return array The rules */ public function getRules(): array { return $this->rules; } }