#!/usr/bin/env python3 """Extract CHM files to text/HTML format""" import os import sys from pathlib import Path try: from chm import chm has_pychm = True except ImportError: has_pychm = False def extract_with_hh(chm_file, output_dir): """Extract using Windows hh.exe decompiler""" import subprocess output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) # Use Windows HTML Help decompiler cmd = f'hh.exe -decompile "{output_dir}" "{chm_file}"' print(f"Running: {cmd}") result = subprocess.run(cmd, shell=True, capture_output=True, text=True) if result.returncode == 0 or output_dir.exists(): print(f"Successfully extracted to {output_dir}") return True else: print(f"Failed to extract: {result.stderr}") return False def extract_with_pychm(chm_file, output_dir): """Extract using pychm library""" output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) chmfile = chm.CHMFile() chmfile.LoadCHM(chm_file) def extract_callback(ui, param, output): if ui.path.endswith('.html') or ui.path.endswith('.htm'): file_path = output_dir / ui.path.lstrip('/') file_path.parent.mkdir(parents=True, exist_ok=True) data = chmfile.RetrieveObject(ui)[1] if data: with open(file_path, 'wb') as f: f.write(data) print(f"Extracted: {ui.path}") return chm.CHM_ENUMERATE_CONTINUE chmfile.EnumerateFiles(extract_callback, output_dir) print(f"Successfully extracted to {output_dir}") return True if __name__ == "__main__": chm_file = r"C:\GEVISOFT\Documentation\GeViSoft .NET SDK API Documentation.chm" output_dir = r"C:\DEV\COPILOT\SOURCES\GeViSoft_API_Documentation_extracted" print(f"Extracting: {chm_file}") print(f"Output: {output_dir}") # Try Windows hh.exe first (most reliable on Windows) success = extract_with_hh(chm_file, output_dir) if not success and has_pychm: print("\nTrying pychm...") success = extract_with_pychm(chm_file, output_dir) if success: print("\n✓ Extraction complete!") print(f"Files extracted to: {output_dir}") else: print("\n✗ Extraction failed") sys.exit(1)