299 lines
7.2 KiB
Bash
Executable File
299 lines
7.2 KiB
Bash
Executable File
#!/bin/zsh
|
|
|
|
emulate -L zsh
|
|
setopt extended_glob no_nomatch no_unset pipefail
|
|
|
|
typeset -gr SCRIPT_NAME=${0:t}
|
|
typeset -g INPUT_ROOT=""
|
|
typeset -g OUTPUT_ROOT=""
|
|
typeset -g DRY_RUN=0
|
|
typeset -g VERBOSE=0
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Usage: salvage-damaged-zips.zsh [options] DIRECTORY
|
|
|
|
Attempt repair and partial extraction for damaged ZIP files under DIRECTORY.
|
|
|
|
Options:
|
|
-n, --dry-run Print planned actions without writing repaired files.
|
|
-v, --verbose Print extra diagnostics while processing.
|
|
-o, --output DIR Write results into DIR. Defaults to DIRECTORY.salvaged.
|
|
-h, --help Show this help text.
|
|
EOF
|
|
}
|
|
|
|
log() {
|
|
print -r -- "$*"
|
|
}
|
|
|
|
verbose() {
|
|
if (( VERBOSE )); then
|
|
print -r -- "$*"
|
|
fi
|
|
}
|
|
|
|
die() {
|
|
print -u2 -r -- "error: $*"
|
|
exit 1
|
|
}
|
|
|
|
ensure_tools() {
|
|
local tool
|
|
for tool in zip unzip bsdtar file strings perl mktemp find; do
|
|
command -v "$tool" >/dev/null 2>&1 || die "required tool not found: $tool"
|
|
done
|
|
}
|
|
|
|
trim_value() {
|
|
local value="$1"
|
|
value="${value//$'\r'/ }"
|
|
value="${value//$'\n'/ }"
|
|
value="${value//$'\t'/ }"
|
|
value=${value##[[:space:]]##}
|
|
value=${value%%[[:space:]]##}
|
|
print -r -- "$value"
|
|
}
|
|
|
|
sanitize_name() {
|
|
local value="$1"
|
|
value=$(trim_value "$value")
|
|
value=${value//$'\0'/}
|
|
value=${value//\//-}
|
|
value=${value//:/-}
|
|
value=${value//\\/-}
|
|
value=$(print -r -- "$value" | tr -s ' ')
|
|
value=${value##.##}
|
|
value=${value%%[[:space:]]##}
|
|
value=${value##[[:space:]]##}
|
|
if [[ -z "$value" ]]; then
|
|
value="Untitled"
|
|
fi
|
|
print -r -- "$value"
|
|
}
|
|
|
|
parse_args() {
|
|
local arg
|
|
|
|
while (( $# )); do
|
|
arg=$1
|
|
case "$arg" in
|
|
-n|--dry-run)
|
|
DRY_RUN=1
|
|
;;
|
|
-v|--verbose)
|
|
VERBOSE=1
|
|
;;
|
|
-o|--output)
|
|
shift
|
|
(( $# )) || die "missing argument for --output"
|
|
OUTPUT_ROOT=$1
|
|
;;
|
|
-h|--help)
|
|
usage
|
|
exit 0
|
|
;;
|
|
--)
|
|
shift
|
|
break
|
|
;;
|
|
-*)
|
|
die "unknown option: $arg"
|
|
;;
|
|
*)
|
|
if [[ -n "$INPUT_ROOT" ]]; then
|
|
die "only one directory may be provided"
|
|
fi
|
|
INPUT_ROOT=$arg
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
|
|
[[ -n "$INPUT_ROOT" ]] || {
|
|
usage
|
|
exit 1
|
|
}
|
|
|
|
[[ -d "$INPUT_ROOT" ]] || die "directory does not exist: $INPUT_ROOT"
|
|
INPUT_ROOT=${INPUT_ROOT:A}
|
|
|
|
if [[ -z "$OUTPUT_ROOT" ]]; then
|
|
OUTPUT_ROOT="${INPUT_ROOT}.salvaged"
|
|
fi
|
|
OUTPUT_ROOT=${OUTPUT_ROOT:A}
|
|
}
|
|
|
|
collect_archives() {
|
|
find "$INPUT_ROOT" -type f -iname '*.zip' -print | sort
|
|
}
|
|
|
|
archive_markers() {
|
|
strings -a "$1" 2>/dev/null
|
|
}
|
|
|
|
text_has_marker() {
|
|
local text="$1"
|
|
local pattern="$2"
|
|
print -r -- "$text" | grep -E -q -- "$pattern"
|
|
}
|
|
|
|
archive_has_binary_marker() {
|
|
local archive="$1"
|
|
local pattern="$2"
|
|
LC_ALL=C grep -aE -q -- "$pattern" "$archive"
|
|
}
|
|
|
|
classify_marker_family() {
|
|
local archive="$1"
|
|
if archive_has_binary_marker "$archive" 'Index/Tables/'; then
|
|
print -r -- "Damaged-Numbers"
|
|
return 0
|
|
fi
|
|
|
|
if archive_has_binary_marker "$archive" 'Metadata/DocumentProperties\.plist|Pages/'; then
|
|
print -r -- "Damaged-Pages"
|
|
return 0
|
|
fi
|
|
|
|
if archive_has_binary_marker "$archive" 'Index/Document\.iwa' && archive_has_binary_marker "$archive" 'Index/CalculationEngine'; then
|
|
print -r -- "Damaged-Apple-iWork"
|
|
return 0
|
|
fi
|
|
|
|
print -r -- "Damaged-Zip"
|
|
}
|
|
|
|
escape_md_cell() {
|
|
local value="$1"
|
|
value=${value//|/\\|}
|
|
print -r -- "$value"
|
|
}
|
|
|
|
repair_archive() {
|
|
local source_archive="$1"
|
|
local repaired_archive="$2"
|
|
|
|
zip -FF "$source_archive" --out "$repaired_archive" <<'EOF' >/dev/null 2>"${repaired_archive}.repair.log"
|
|
y
|
|
EOF
|
|
}
|
|
|
|
extract_repaired_archive() {
|
|
local repaired_archive="$1"
|
|
local extract_dir="$2"
|
|
|
|
mkdir -p -- "$extract_dir" || return 1
|
|
bsdtar -xf "$repaired_archive" -C "$extract_dir" 2>"${extract_dir}.extract.log"
|
|
}
|
|
|
|
write_report_header() {
|
|
local markdown_report="$1"
|
|
local tsv_report="$2"
|
|
|
|
cat > "$markdown_report" <<EOF
|
|
# Damaged ZIP Salvage Report
|
|
|
|
Source: $INPUT_ROOT
|
|
Output: $OUTPUT_ROOT
|
|
|
|
| Archive | Likely family | Repaired entries | Extracted visible assets | Notes |
|
|
| --- | --- | ---: | ---: | --- |
|
|
EOF
|
|
|
|
print -r -- $'archive\tfamily\trepaired_entries\tvisible_assets\tnotes' > "$tsv_report"
|
|
}
|
|
|
|
append_report_row() {
|
|
local markdown_report="$1"
|
|
local tsv_report="$2"
|
|
local archive_label="$3"
|
|
local family="$4"
|
|
local repaired_entries="$5"
|
|
local visible_assets="$6"
|
|
local notes="$7"
|
|
|
|
print -r -- "| $(escape_md_cell "$archive_label") | $(escape_md_cell "$family") | $repaired_entries | $visible_assets | $(escape_md_cell "$notes") |" >> "$markdown_report"
|
|
print -r -- "$archive_label\t$family\t$repaired_entries\t$visible_assets\t$notes" >> "$tsv_report"
|
|
}
|
|
|
|
main() {
|
|
local -a archives
|
|
local archive=""
|
|
local source_name=""
|
|
local base_name=""
|
|
local family=""
|
|
local family_dir=""
|
|
local repaired_archive=""
|
|
local extract_dir=""
|
|
local repaired_listing=""
|
|
local repaired_entries=0
|
|
local visible_assets=0
|
|
local notes=""
|
|
local markdown_report=""
|
|
local tsv_report=""
|
|
|
|
parse_args "$@"
|
|
ensure_tools
|
|
|
|
archives=(${(f)"$(collect_archives)"})
|
|
if (( ${#archives} == 0 )); then
|
|
log "No .zip files found under $INPUT_ROOT"
|
|
return 0
|
|
fi
|
|
|
|
if (( DRY_RUN )); then
|
|
for archive in $archives; do
|
|
family=$(classify_marker_family "$archive")
|
|
log "DRY-RUN $archive => $family"
|
|
done
|
|
return 0
|
|
fi
|
|
|
|
mkdir -p -- "$OUTPUT_ROOT/repaired" "$OUTPUT_ROOT/extracted" "$OUTPUT_ROOT/logs" || die "failed to create output directories"
|
|
markdown_report="$OUTPUT_ROOT/salvage-report.md"
|
|
tsv_report="$OUTPUT_ROOT/salvage-report.tsv"
|
|
write_report_header "$markdown_report" "$tsv_report"
|
|
|
|
for archive in $archives; do
|
|
source_name=${archive:t}
|
|
base_name=$(sanitize_name "${source_name:r}")
|
|
family=$(classify_marker_family "$archive")
|
|
family_dir="$OUTPUT_ROOT/extracted/$family/$base_name"
|
|
repaired_archive="$OUTPUT_ROOT/repaired/${base_name}.repaired.zip"
|
|
extract_dir="$family_dir"
|
|
notes=""
|
|
repaired_entries=0
|
|
visible_assets=0
|
|
|
|
verbose "repairing $archive => $family"
|
|
if ! repair_archive "$archive" "$repaired_archive"; then
|
|
notes="zip -FF could not rebuild a readable archive"
|
|
append_report_row "$markdown_report" "$tsv_report" "$source_name" "$family" "$repaired_entries" "$visible_assets" "$notes"
|
|
continue
|
|
fi
|
|
|
|
repaired_listing=$(unzip -Z1 "$repaired_archive" 2>/dev/null)
|
|
if [[ -n "$repaired_listing" ]]; then
|
|
repaired_entries=$(print -r -- "$repaired_listing" | sed '/^$/d' | wc -l | tr -d ' ')
|
|
fi
|
|
|
|
extract_repaired_archive "$repaired_archive" "$extract_dir" || true
|
|
visible_assets=$(find "$extract_dir" -type f \( -iname '*.jpg' -o -iname '*.jpeg' -o -iname '*.png' -o -iname '*.tiff' -o -iname '*.tif' -o -iname '*.pdf' -o -iname '*.heic' \) 2>/dev/null | wc -l | tr -d ' ')
|
|
|
|
if (( visible_assets > 0 )); then
|
|
notes="visible embedded assets recovered"
|
|
elif (( repaired_entries > 0 )); then
|
|
notes="internal iWork entries recovered"
|
|
else
|
|
notes="repair succeeded but no entries were listed"
|
|
fi
|
|
|
|
append_report_row "$markdown_report" "$tsv_report" "$source_name" "$family" "$repaired_entries" "$visible_assets" "$notes"
|
|
done
|
|
|
|
log "Wrote salvage output to $OUTPUT_ROOT"
|
|
log "Report: $markdown_report"
|
|
}
|
|
|
|
main "$@" |