#!/bin/zsh emulate -L zsh setopt extended_glob no_nomatch no_unset pipefail typeset -gr SCRIPT_NAME=${0:t} typeset -g INPUT_ROOT="" typeset -g OUTPUT_ROOT="" typeset -g DRY_RUN=0 typeset -g VERBOSE=0 usage() { cat <<'EOF' Usage: salvage-damaged-zips.zsh [options] DIRECTORY Attempt repair and partial extraction for damaged ZIP files under DIRECTORY. Options: -n, --dry-run Print planned actions without writing repaired files. -v, --verbose Print extra diagnostics while processing. -o, --output DIR Write results into DIR. Defaults to DIRECTORY.salvaged. -h, --help Show this help text. EOF } log() { print -r -- "$*" } verbose() { if (( VERBOSE )); then print -r -- "$*" fi } die() { print -u2 -r -- "error: $*" exit 1 } ensure_tools() { local tool for tool in zip unzip bsdtar file strings perl mktemp find; do command -v "$tool" >/dev/null 2>&1 || die "required tool not found: $tool" done } trim_value() { local value="$1" value="${value//$'\r'/ }" value="${value//$'\n'/ }" value="${value//$'\t'/ }" value=${value##[[:space:]]##} value=${value%%[[:space:]]##} print -r -- "$value" } sanitize_name() { local value="$1" value=$(trim_value "$value") value=${value//$'\0'/} value=${value//\//-} value=${value//:/-} value=${value//\\/-} value=$(print -r -- "$value" | tr -s ' ') value=${value##.##} value=${value%%[[:space:]]##} value=${value##[[:space:]]##} if [[ -z "$value" ]]; then value="Untitled" fi print -r -- "$value" } parse_args() { local arg while (( $# )); do arg=$1 case "$arg" in -n|--dry-run) DRY_RUN=1 ;; -v|--verbose) VERBOSE=1 ;; -o|--output) shift (( $# )) || die "missing argument for --output" OUTPUT_ROOT=$1 ;; -h|--help) usage exit 0 ;; --) shift break ;; -*) die "unknown option: $arg" ;; *) if [[ -n "$INPUT_ROOT" ]]; then die "only one directory may be provided" fi INPUT_ROOT=$arg ;; esac shift done [[ -n "$INPUT_ROOT" ]] || { usage exit 1 } [[ -d "$INPUT_ROOT" ]] || die "directory does not exist: $INPUT_ROOT" INPUT_ROOT=${INPUT_ROOT:A} if [[ -z "$OUTPUT_ROOT" ]]; then OUTPUT_ROOT="${INPUT_ROOT}.salvaged" fi OUTPUT_ROOT=${OUTPUT_ROOT:A} } collect_archives() { find "$INPUT_ROOT" -type f -iname '*.zip' -print | sort } archive_markers() { strings -a "$1" 2>/dev/null } text_has_marker() { local text="$1" local pattern="$2" print -r -- "$text" | grep -E -q -- "$pattern" } archive_has_binary_marker() { local archive="$1" local pattern="$2" LC_ALL=C grep -aE -q -- "$pattern" "$archive" } classify_marker_family() { local archive="$1" if archive_has_binary_marker "$archive" 'Index/Tables/'; then print -r -- "Damaged-Numbers" return 0 fi if archive_has_binary_marker "$archive" 'Metadata/DocumentProperties\.plist|Pages/'; then print -r -- "Damaged-Pages" return 0 fi if archive_has_binary_marker "$archive" 'Index/Document\.iwa' && archive_has_binary_marker "$archive" 'Index/CalculationEngine'; then print -r -- "Damaged-Apple-iWork" return 0 fi print -r -- "Damaged-Zip" } escape_md_cell() { local value="$1" value=${value//|/\\|} print -r -- "$value" } repair_archive() { local source_archive="$1" local repaired_archive="$2" zip -FF "$source_archive" --out "$repaired_archive" <<'EOF' >/dev/null 2>"${repaired_archive}.repair.log" y EOF } extract_repaired_archive() { local repaired_archive="$1" local extract_dir="$2" mkdir -p -- "$extract_dir" || return 1 bsdtar -xf "$repaired_archive" -C "$extract_dir" 2>"${extract_dir}.extract.log" } write_report_header() { local markdown_report="$1" local tsv_report="$2" cat > "$markdown_report" < "$tsv_report" } append_report_row() { local markdown_report="$1" local tsv_report="$2" local archive_label="$3" local family="$4" local repaired_entries="$5" local visible_assets="$6" local notes="$7" print -r -- "| $(escape_md_cell "$archive_label") | $(escape_md_cell "$family") | $repaired_entries | $visible_assets | $(escape_md_cell "$notes") |" >> "$markdown_report" print -r -- "$archive_label\t$family\t$repaired_entries\t$visible_assets\t$notes" >> "$tsv_report" } main() { local -a archives local archive="" local source_name="" local base_name="" local family="" local family_dir="" local repaired_archive="" local extract_dir="" local repaired_listing="" local repaired_entries=0 local visible_assets=0 local notes="" local markdown_report="" local tsv_report="" parse_args "$@" ensure_tools archives=(${(f)"$(collect_archives)"}) if (( ${#archives} == 0 )); then log "No .zip files found under $INPUT_ROOT" return 0 fi if (( DRY_RUN )); then for archive in $archives; do family=$(classify_marker_family "$archive") log "DRY-RUN $archive => $family" done return 0 fi mkdir -p -- "$OUTPUT_ROOT/repaired" "$OUTPUT_ROOT/extracted" "$OUTPUT_ROOT/logs" || die "failed to create output directories" markdown_report="$OUTPUT_ROOT/salvage-report.md" tsv_report="$OUTPUT_ROOT/salvage-report.tsv" write_report_header "$markdown_report" "$tsv_report" for archive in $archives; do source_name=${archive:t} base_name=$(sanitize_name "${source_name:r}") family=$(classify_marker_family "$archive") family_dir="$OUTPUT_ROOT/extracted/$family/$base_name" repaired_archive="$OUTPUT_ROOT/repaired/${base_name}.repaired.zip" extract_dir="$family_dir" notes="" repaired_entries=0 visible_assets=0 verbose "repairing $archive => $family" if ! repair_archive "$archive" "$repaired_archive"; then notes="zip -FF could not rebuild a readable archive" append_report_row "$markdown_report" "$tsv_report" "$source_name" "$family" "$repaired_entries" "$visible_assets" "$notes" continue fi repaired_listing=$(unzip -Z1 "$repaired_archive" 2>/dev/null) if [[ -n "$repaired_listing" ]]; then repaired_entries=$(print -r -- "$repaired_listing" | sed '/^$/d' | wc -l | tr -d ' ') fi extract_repaired_archive "$repaired_archive" "$extract_dir" || true visible_assets=$(find "$extract_dir" -type f \( -iname '*.jpg' -o -iname '*.jpeg' -o -iname '*.png' -o -iname '*.tiff' -o -iname '*.tif' -o -iname '*.pdf' -o -iname '*.heic' \) 2>/dev/null | wc -l | tr -d ' ') if (( visible_assets > 0 )); then notes="visible embedded assets recovered" elif (( repaired_entries > 0 )); then notes="internal iWork entries recovered" else notes="repair succeeded but no entries were listed" fi append_report_row "$markdown_report" "$tsv_report" "$source_name" "$family" "$repaired_entries" "$visible_assets" "$notes" done log "Wrote salvage output to $OUTPUT_ROOT" log "Report: $markdown_report" } main "$@"