#!/usr/bin/env python3 """ SCAN ALL PAGES - No stopping, capture every single page 123-226 User specifically requested ALL pages regardless of duplicates """ import asyncio from playwright.async_api import async_playwright from pathlib import Path import time import json async def scan_all_pages(start_page=123, total_pages=226): """ Scan ALL remaining pages - no early stopping for duplicates """ storage_state_path = "kindle_session_state.json" if not Path(storage_state_path).exists(): print("āŒ No session state found.") return False async with async_playwright() as p: browser = await p.chromium.launch( headless=False, args=[ "--disable-blink-features=AutomationControlled", "--disable-web-security", "--disable-features=VizDisplayCompositor" ] ) context = await browser.new_context( storage_state=storage_state_path, viewport={"width": 1920, "height": 1080}, user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ) page = await context.new_page() try: print(f"šŸš€ SCANNING ALL PAGES: {start_page} to {total_pages}") print(f"šŸ“‹ User requested: COMPLETE BOOK - NO EARLY STOPPING") print("=" * 60) # Load book await page.goto("https://read.amazon.com/?asin=B0DJP2C8M6&ref_=kwl_kr_iv_rec_1") await page.wait_for_timeout(5000) # Navigate to start page print(f"šŸŽÆ Navigating to page {start_page}...") for i in range(start_page - 1): await page.keyboard.press("ArrowRight") if i % 30 == 29: print(f" šŸ“ Navigated {i + 1} pages...") await page.wait_for_timeout(100) # Fast navigation print(f" āœ… Reached page {start_page}") # Scan ALL remaining pages - NO STOPPING output_dir = Path("scanned_pages") output_dir.mkdir(exist_ok=True) print(f"šŸ“ø SCANNING ALL PAGES {start_page} to {total_pages}...") print("āš ļø NO DUPLICATE DETECTION - CAPTURING EVERYTHING") pages_captured = 0 for page_num in range(start_page, total_pages + 1): print(f"šŸ“ø Scanning page {page_num}/{total_pages}...") filename = output_dir / f"page_{page_num:03d}.png" await page.screenshot(path=str(filename)) file_size = filename.stat().st_size print(f" āœ… Captured ({file_size} bytes)") pages_captured += 1 # Progress reports if page_num % 20 == 0: progress = (page_num / total_pages) * 100 print(f"šŸ“Š MAJOR PROGRESS: {page_num}/{total_pages} ({progress:.1f}%)") if page_num % 50 == 0: print(f"šŸŽÆ MILESTONE: {pages_captured} pages captured so far!") # Navigate to next page (except last) if page_num < total_pages: await page.keyboard.press("ArrowRight") await page.wait_for_timeout(800) # Reliable timing # Final progress save progress_data = { "last_completed_page": total_pages, "total_pages": total_pages, "completed_percentage": 100.0, "timestamp": time.time(), "session_state_file": storage_state_path, "scan_complete": True, "all_pages_captured": True } with open("scan_progress.json", 'w') as f: json.dump(progress_data, f, indent=2) print(f"\nšŸŽ‰ ALL PAGES SCANNING COMPLETED!") print(f"šŸ“Š FINAL RESULT: ALL {total_pages} pages captured") print(f"šŸ“ˆ Completion: 100%") print(f"āœ… COMPLETE BOOK SUCCESSFULLY SCANNED!") return total_pages except Exception as e: print(f"āŒ Scanning error: {e}") import traceback traceback.print_exc() # Save partial progress partial_progress = { "last_completed_page": start_page + pages_captured - 1, "total_pages": total_pages, "completed_percentage": ((start_page + pages_captured - 1) / total_pages) * 100, "timestamp": time.time(), "session_state_file": storage_state_path, "scan_complete": False, "error_occurred": True } with open("scan_progress.json", 'w') as f: json.dump(partial_progress, f, indent=2) return start_page + pages_captured - 1 finally: await browser.close() if __name__ == "__main__": result = asyncio.run(scan_all_pages()) print(f"\nšŸ FINAL RESULT: {result} pages total captured") if result >= 226: print("šŸŽ‰ SUCCESS: Complete 226-page book captured!") else: print(f"šŸ“Š Progress: {result}/226 pages captured")