#!/usr/bin/env python3 """ FINAL WORKING SOLUTION - Fix TOC overlay and implement proper navigation Key discoveries: - Successfully reached cover page โœ… - TOC is stuck open and blocking content โŒ - Need to close TOC properly before scanning โŒ - Need working page navigation โŒ """ import asyncio import re from playwright.async_api import async_playwright from pathlib import Path import time async def final_working_solution(): async with async_playwright() as p: browser = await p.chromium.launch( headless=False, args=[ "--disable-blink-features=AutomationControlled", "--disable-web-security", "--disable-features=VizDisplayCompositor" ] ) context = await browser.new_context( viewport={"width": 1920, "height": 1080}, user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ) await context.add_init_script(""" Object.defineProperty(navigator, 'webdriver', { get: () => undefined, }); """) page = await context.new_page() try: print("๐ŸŽฏ FINAL WORKING SOLUTION - Fix TOC + Navigation") print("=" * 60) # STEP 1: LOGIN print("๐Ÿ” Step 1: Logging in...") await page.goto("https://read.amazon.com/?asin=B0DJP2C8M6&ref_=kwl_kr_iv_rec_1") await page.wait_for_timeout(5000) if "signin" in page.url: email_field = await page.wait_for_selector("#ap_email", timeout=10000) await email_field.fill("ondrej.glaser@gmail.com") continue_btn = await page.wait_for_selector("#continue", timeout=5000) await continue_btn.click() await page.wait_for_timeout(3000) password_field = await page.wait_for_selector("#ap_password", timeout=10000) await password_field.fill("csjXgew3In") signin_btn = await page.wait_for_selector("#signInSubmit", timeout=5000) await signin_btn.click() await page.wait_for_timeout(5000) print("โœ… Login completed") # STEP 2: WAIT FOR READER TO LOAD print("๐Ÿ“– Step 2: Waiting for reader to load...") await page.wait_for_selector("#reader-header", timeout=30000) await page.wait_for_timeout(5000) # STEP 3: NAVIGATE TO BEGINNING AND CLOSE TOC print("๐ŸŽฏ Step 3: Navigate to beginning and properly close TOC...") # First, open TOC to navigate to beginning try: toc_button = await page.wait_for_selector("[aria-label='Table of Contents']", timeout=5000) await toc_button.click() await page.wait_for_timeout(2000) print(" โœ… Opened TOC") # Click on "Cover" to go to beginning cover_link = await page.wait_for_selector("text=Cover", timeout=5000) await cover_link.click() await page.wait_for_timeout(3000) print(" โœ… Clicked Cover link") except Exception as e: print(f" โš ๏ธ TOC navigation failed: {e}") # CRITICAL: PROPERLY CLOSE THE TOC print("๐Ÿ”ง Step 4: CLOSING TOC OVERLAY...") # Method 1: Try clicking the X button try: close_button = await page.wait_for_selector("[aria-label='Close'], .close, button[aria-label*='close' i]", timeout=3000) await close_button.click() await page.wait_for_timeout(2000) print(" โœ… Closed TOC with X button") except: print(" โš ๏ธ No X button found") # Method 2: Press Escape multiple times for i in range(5): await page.keyboard.press("Escape") await page.wait_for_timeout(500) print(f" โŒจ๏ธ Pressed Escape {i+1}/5") # Method 3: Click outside the TOC area (on the main content) await page.click("body", position={"x": 400, "y": 400}) await page.wait_for_timeout(1000) # Method 4: Try clicking on the main reading area try: # Click on the left side where the book content should be await page.click("body", position={"x": 600, "y": 400}) await page.wait_for_timeout(1000) print(" โœ… Clicked on main content area") except: pass print("โœ… TOC closure attempts completed") # STEP 5: TEST NAVIGATION print("๐Ÿ” Step 5: Testing navigation...") async def get_page_info(): try: content = await page.inner_text("body") match = re.search(r'page\s+(\d+)\s+of\s+(\d+)', content.lower()) if match: return int(match.group(1)), int(match.group(2)) # Alternative: look for location info location_match = re.search(r'location\s+(\d+)\s+of\s+(\d+)', content.lower()) if location_match: return int(location_match.group(1)), int(location_match.group(2)) return None, None except: return None, None # Test navigation methods print(" ๐Ÿ“‹ Testing different navigation methods...") navigation_methods = [ ("ArrowRight", lambda: page.keyboard.press("ArrowRight")), ("PageDown", lambda: page.keyboard.press("PageDown")), ("Space", lambda: page.keyboard.press("Space")), ("ArrowLeft/Right", lambda: page.keyboard.press("ArrowLeft") or page.keyboard.press("ArrowRight")), ] working_method = None for method_name, method_func in navigation_methods: print(f" ๐Ÿงช Testing {method_name}...") # Take before screenshot before_content = await page.inner_text("body") await method_func() await page.wait_for_timeout(2000) # Take after screenshot and compare after_content = await page.inner_text("body") if before_content != after_content: print(f" โœ… {method_name} works! Content changed.") working_method = method_func break else: print(f" โŒ {method_name} - no change") if not working_method: print(" โš ๏ธ No navigation method worked, using ArrowRight as fallback") working_method = lambda: page.keyboard.press("ArrowRight") # STEP 6: SCAN THE BOOK print("๐Ÿš€ Step 6: Scanning the complete book...") output_dir = Path("scanned_pages") output_dir.mkdir(exist_ok=True) # Clear old screenshots for old_file in output_dir.glob("*.png"): old_file.unlink() page_count = 0 consecutive_identical = 0 last_file_size = 0 total_pages = 226 # We know it's 226 pages while page_count < total_pages + 20: # Scan with safety margin page_count += 1 print(f"๐Ÿ“ธ Scanning page {page_count}...") # Take screenshot filename = output_dir / f"page_{page_count:03d}.png" await page.screenshot(path=str(filename), full_page=False) # Check file size for duplicate detection file_size = filename.stat().st_size if abs(file_size - last_file_size) < 3000: # More lenient duplicate detection consecutive_identical += 1 print(f" โš ๏ธ Possible duplicate ({consecutive_identical}/7)") else: consecutive_identical = 0 print(f" โœ… New content ({file_size} bytes)") last_file_size = file_size # Progress reports if page_count % 25 == 0: print(f"๐Ÿ“Š MAJOR PROGRESS: {page_count}/{total_pages} pages scanned!") # Stop if too many identical pages if consecutive_identical >= 7: print("๐Ÿ“– Detected end of book (too many identical pages)") break # Navigate to next page await working_method() await page.wait_for_timeout(1000) # Wait for page to load # Extra wait every 10 pages to ensure stability if page_count % 10 == 0: await page.wait_for_timeout(2000) print(f"\n๐ŸŽ‰ FINAL SCAN COMPLETED!") print(f"๐Ÿ“Š Total pages scanned: {page_count}") print(f"๐Ÿ“š Expected book pages: {total_pages}") print(f"๐Ÿ“ Files saved to: {output_dir}") if page_count >= total_pages * 0.85: # 85% or more print("โœ… SUCCESS: Captured most/all of the book!") else: print(f"โš ๏ธ Captured {page_count}/{total_pages} pages ({page_count/total_pages*100:.1f}%)") # Take a final screenshot to verify state await page.screenshot(path=output_dir / "final_state.png") print(f"๐Ÿ“ธ Final state screenshot saved") print("\n๐Ÿ” Keeping browser open for verification...") await page.wait_for_timeout(30000) except Exception as e: print(f"โŒ Error: {e}") import traceback traceback.print_exc() finally: await browser.close() if __name__ == "__main__": asyncio.run(final_working_solution())