- Updated MarketplaceDrawer to include security notes and manual installation hints. - Refactored SkillDetailDrawer to display default icons for skills. - Simplified SkillListItem to use default icons for better readability. - Integrated gateway status checks and warnings in SkillsPage for improved user awareness. - Enhanced error handling for skill installation and fetching, providing clearer feedback to users. - Added new translations for error messages and gateway warnings to improve localization support.
133 lines
4.0 KiB
Python
Executable File
133 lines
4.0 KiB
Python
Executable File
"""Unpack Office files (DOCX, PPTX, XLSX) for editing.
|
|
|
|
Extracts the ZIP archive, pretty-prints XML files, and optionally:
|
|
- Merges adjacent runs with identical formatting (DOCX only)
|
|
- Simplifies adjacent tracked changes from same author (DOCX only)
|
|
|
|
Usage:
|
|
python unpack.py <office_file> <output_dir> [options]
|
|
|
|
Examples:
|
|
python unpack.py document.docx unpacked/
|
|
python unpack.py presentation.pptx unpacked/
|
|
python unpack.py document.docx unpacked/ --merge-runs false
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
import defusedxml.minidom
|
|
|
|
from helpers.merge_runs import merge_runs as do_merge_runs
|
|
from helpers.simplify_redlines import simplify_redlines as do_simplify_redlines
|
|
|
|
SMART_QUOTE_REPLACEMENTS = {
|
|
"\u201c": "“",
|
|
"\u201d": "”",
|
|
"\u2018": "‘",
|
|
"\u2019": "’",
|
|
}
|
|
|
|
|
|
def unpack(
|
|
input_file: str,
|
|
output_directory: str,
|
|
merge_runs: bool = True,
|
|
simplify_redlines: bool = True,
|
|
) -> tuple[None, str]:
|
|
input_path = Path(input_file)
|
|
output_path = Path(output_directory)
|
|
suffix = input_path.suffix.lower()
|
|
|
|
if not input_path.exists():
|
|
return None, f"Error: {input_file} does not exist"
|
|
|
|
if suffix not in {".docx", ".pptx", ".xlsx"}:
|
|
return None, f"Error: {input_file} must be a .docx, .pptx, or .xlsx file"
|
|
|
|
try:
|
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
with zipfile.ZipFile(input_path, "r") as zf:
|
|
zf.extractall(output_path)
|
|
|
|
xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels"))
|
|
for xml_file in xml_files:
|
|
_pretty_print_xml(xml_file)
|
|
|
|
message = f"Unpacked {input_file} ({len(xml_files)} XML files)"
|
|
|
|
if suffix == ".docx":
|
|
if simplify_redlines:
|
|
simplify_count, _ = do_simplify_redlines(str(output_path))
|
|
message += f", simplified {simplify_count} tracked changes"
|
|
|
|
if merge_runs:
|
|
merge_count, _ = do_merge_runs(str(output_path))
|
|
message += f", merged {merge_count} runs"
|
|
|
|
for xml_file in xml_files:
|
|
_escape_smart_quotes(xml_file)
|
|
|
|
return None, message
|
|
|
|
except zipfile.BadZipFile:
|
|
return None, f"Error: {input_file} is not a valid Office file"
|
|
except Exception as e:
|
|
return None, f"Error unpacking: {e}"
|
|
|
|
|
|
def _pretty_print_xml(xml_file: Path) -> None:
|
|
try:
|
|
content = xml_file.read_text(encoding="utf-8")
|
|
dom = defusedxml.minidom.parseString(content)
|
|
xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="utf-8"))
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def _escape_smart_quotes(xml_file: Path) -> None:
|
|
try:
|
|
content = xml_file.read_text(encoding="utf-8")
|
|
for char, entity in SMART_QUOTE_REPLACEMENTS.items():
|
|
content = content.replace(char, entity)
|
|
xml_file.write_text(content, encoding="utf-8")
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="Unpack an Office file (DOCX, PPTX, XLSX) for editing"
|
|
)
|
|
parser.add_argument("input_file", help="Office file to unpack")
|
|
parser.add_argument("output_directory", help="Output directory")
|
|
parser.add_argument(
|
|
"--merge-runs",
|
|
type=lambda x: x.lower() == "true",
|
|
default=True,
|
|
metavar="true|false",
|
|
help="Merge adjacent runs with identical formatting (DOCX only, default: true)",
|
|
)
|
|
parser.add_argument(
|
|
"--simplify-redlines",
|
|
type=lambda x: x.lower() == "true",
|
|
default=True,
|
|
metavar="true|false",
|
|
help="Merge adjacent tracked changes from same author (DOCX only, default: true)",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
_, message = unpack(
|
|
args.input_file,
|
|
args.output_directory,
|
|
merge_runs=args.merge_runs,
|
|
simplify_redlines=args.simplify_redlines,
|
|
)
|
|
print(message)
|
|
|
|
if "Error" in message:
|
|
sys.exit(1)
|