kindle_OCR/kindle_scanner.py

#!/usr/bin/env python3
"""
FINAL WORKING SOLUTION - Fix TOC overlay and implement proper navigation
Key discoveries:
- Successfully reached cover page ✅
- TOC is stuck open and blocking content ❌
- Need to close TOC properly before scanning ❌
- Need working page navigation ❌
"""

import asyncio
import re
from playwright.async_api import async_playwright
from pathlib import Path
import time

async def final_working_solution():
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=False,
            args=[
                "--disable-blink-features=AutomationControlled",
                "--disable-web-security",
                "--disable-features=VizDisplayCompositor"
            ]
        )
        context = await browser.new_context(
            viewport={"width": 1920, "height": 1080},
            user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        )

        await context.add_init_script("""
            Object.defineProperty(navigator, 'webdriver', {
                get: () => undefined,
            });
        """)

        page = await context.new_page()

        try:
            print("🎯 FINAL WORKING SOLUTION - Fix TOC + Navigation")
            print("=" * 60)

            # STEP 1: LOGIN
            print("🔐 Step 1: Logging in...")
            await page.goto("https://read.amazon.com/?asin=B0DJP2C8M6&ref_=kwl_kr_iv_rec_1")
            await page.wait_for_timeout(5000)

            if "signin" in page.url:
                email_field = await page.wait_for_selector("#ap_email", timeout=10000)
                await email_field.fill("ondrej.glaser@gmail.com")
                continue_btn = await page.wait_for_selector("#continue", timeout=5000)
                await continue_btn.click()
                await page.wait_for_timeout(3000)
                password_field = await page.wait_for_selector("#ap_password", timeout=10000)
                await password_field.fill("csjXgew3In")
                signin_btn = await page.wait_for_selector("#signInSubmit", timeout=5000)
                await signin_btn.click()
                await page.wait_for_timeout(5000)

            print("✅ Login completed")

            # STEP 2: WAIT FOR READER TO LOAD
            print("📖 Step 2: Waiting for reader to load...")
            await page.wait_for_selector("#reader-header", timeout=30000)
            await page.wait_for_timeout(5000)

            # STEP 3: NAVIGATE TO BEGINNING AND CLOSE TOC
            print("🎯 Step 3: Navigate to beginning and properly close TOC...")

            # First, open TOC to navigate to beginning
            try:
                toc_button = await page.wait_for_selector("[aria-label='Table of Contents']", timeout=5000)
                await toc_button.click()
                await page.wait_for_timeout(2000)
                print("   ✅ Opened TOC")

                # Click on "Cover" to go to beginning
                cover_link = await page.wait_for_selector("text=Cover", timeout=5000)
                await cover_link.click()
                await page.wait_for_timeout(3000)
                print("   ✅ Clicked Cover link")

            except Exception as e:
                print(f"   ⚠️ TOC navigation failed: {e}")

            # CRITICAL: PROPERLY CLOSE THE TOC
            print("🔧 Step 4: CLOSING TOC OVERLAY...")

            # Method 1: Try clicking the X button
            try:
                close_button = await page.wait_for_selector("[aria-label='Close'], .close, button[aria-label*='close' i]", timeout=3000)
                await close_button.click()
                await page.wait_for_timeout(2000)
                print("   ✅ Closed TOC with X button")
            except:
                print("   ⚠️ No X button found")

            # Method 2: Press Escape multiple times
            for i in range(5):
                await page.keyboard.press("Escape")
                await page.wait_for_timeout(500)
                print(f"   ⌨️ Pressed Escape {i+1}/5")

            # Method 3: Click outside the TOC area (on the main content)
            await page.click("body", position={"x": 400, "y": 400})
            await page.wait_for_timeout(1000)

            # Method 4: Try clicking on the main reading area
            try:
                # Click on the left side where the book content should be
                await page.click("body", position={"x": 600, "y": 400})
                await page.wait_for_timeout(1000)
                print("   ✅ Clicked on main content area")
            except:
                pass

            print("✅ TOC closure attempts completed")

            # STEP 5: TEST NAVIGATION
            print("🔍 Step 5: Testing navigation...")

            async def get_page_info():
                try:
                    content = await page.inner_text("body")
                    match = re.search(r'page\s+(\d+)\s+of\s+(\d+)', content.lower())
                    if match:
                        return int(match.group(1)), int(match.group(2))

                    # Alternative: look for location info
                    location_match = re.search(r'location\s+(\d+)\s+of\s+(\d+)', content.lower())
                    if location_match:
                        return int(location_match.group(1)), int(location_match.group(2))

                    return None, None
                except:
                    return None, None

            # Test navigation methods
            print("   📋 Testing different navigation methods...")

            navigation_methods = [
                ("ArrowRight", lambda: page.keyboard.press("ArrowRight")),
                ("PageDown", lambda: page.keyboard.press("PageDown")),
                ("Space", lambda: page.keyboard.press("Space")),
                ("ArrowLeft/Right", lambda: page.keyboard.press("ArrowLeft") or page.keyboard.press("ArrowRight")),
            ]

            working_method = None
            for method_name, method_func in navigation_methods:
                print(f"   🧪 Testing {method_name}...")

                # Take before screenshot
                before_content = await page.inner_text("body")
                await method_func()
                await page.wait_for_timeout(2000)

                # Take after screenshot and compare
                after_content = await page.inner_text("body")

                if before_content != after_content:
                    print(f"   ✅ {method_name} works! Content changed.")
                    working_method = method_func
                    break
                else:
                    print(f"   ❌ {method_name} - no change")

            if not working_method:
                print("   ⚠️ No navigation method worked, using ArrowRight as fallback")
                working_method = lambda: page.keyboard.press("ArrowRight")

            # STEP 6: SCAN THE BOOK
            print("🚀 Step 6: Scanning the complete book...")

            output_dir = Path("scanned_pages")
            output_dir.mkdir(exist_ok=True)

            # Clear old screenshots
            for old_file in output_dir.glob("*.png"):
                old_file.unlink()

            page_count = 0
            consecutive_identical = 0
            last_file_size = 0
            total_pages = 226  # We know it's 226 pages

            while page_count < total_pages + 20:  # Scan with safety margin
                page_count += 1

                print(f"📸 Scanning page {page_count}...")

                # Take screenshot
                filename = output_dir / f"page_{page_count:03d}.png"
                await page.screenshot(path=str(filename), full_page=False)

                # Check file size for duplicate detection
                file_size = filename.stat().st_size
                if abs(file_size - last_file_size) < 3000:  # More lenient duplicate detection
                    consecutive_identical += 1
                    print(f"   ⚠️ Possible duplicate ({consecutive_identical}/7)")
                else:
                    consecutive_identical = 0
                    print(f"   ✅ New content ({file_size} bytes)")

                last_file_size = file_size

                # Progress reports
                if page_count % 25 == 0:
                    print(f"📊 MAJOR PROGRESS: {page_count}/{total_pages} pages scanned!")

                # Stop if too many identical pages
                if consecutive_identical >= 7:
                    print("📖 Detected end of book (too many identical pages)")
                    break

                # Navigate to next page
                await working_method()
                await page.wait_for_timeout(1000)  # Wait for page to load

                # Extra wait every 10 pages to ensure stability
                if page_count % 10 == 0:
                    await page.wait_for_timeout(2000)

            print(f"\n🎉 FINAL SCAN COMPLETED!")
            print(f"📊 Total pages scanned: {page_count}")
            print(f"📚 Expected book pages: {total_pages}")
            print(f"📁 Files saved to: {output_dir}")

            if page_count >= total_pages * 0.85:  # 85% or more
                print("✅ SUCCESS: Captured most/all of the book!")
            else:
                print(f"⚠️ Captured {page_count}/{total_pages} pages ({page_count/total_pages*100:.1f}%)")

            # Take a final screenshot to verify state
            await page.screenshot(path=output_dir / "final_state.png")
            print(f"📸 Final state screenshot saved")

            print("\n🔍 Keeping browser open for verification...")
            await page.wait_for_timeout(30000)

        except Exception as e:
            print(f"❌ Error: {e}")
            import traceback
            traceback.print_exc()
        finally:
            await browser.close()

if __name__ == "__main__":
    asyncio.run(final_working_solution())