kindle_OCR/debug_navigation.py

#!/usr/bin/env python3
"""
DEBUG NAVIGATION - Investigate why pages show identical content after page 65
Run in headed mode to observe behavior
"""

import asyncio
from playwright.async_api import async_playwright
from pathlib import Path

async def debug_navigation():
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=False,  # HEADED MODE for observation
            args=[
                "--disable-blink-features=AutomationControlled",
                "--disable-web-security",
                "--disable-features=VizDisplayCompositor"
            ]
        )
        context = await browser.new_context(
            viewport={"width": 1920, "height": 1080},
            user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        )

        await context.add_init_script("""
            Object.defineProperty(navigator, 'webdriver', {
                get: () => undefined,
            });
        """)

        page = await context.new_page()

        try:
            print("🔍 DEBUGGING NAVIGATION ISSUE")
            print("=" * 50)

            # LOGIN
            print("🔐 Logging in...")
            await page.goto("https://read.amazon.com/?asin=B0DJP2C8M6&ref_=kwl_kr_iv_rec_1")
            await page.wait_for_timeout(5000)

            if "signin" in page.url:
                email_field = await page.wait_for_selector("#ap_email", timeout=10000)
                await email_field.fill("ondrej.glaser@gmail.com")
                continue_btn = await page.wait_for_selector("#continue", timeout=5000)
                await continue_btn.click()
                await page.wait_for_timeout(3000)
                password_field = await page.wait_for_selector("#ap_password", timeout=10000)
                await password_field.fill("csjXgew3In")
                signin_btn = await page.wait_for_selector("#signInSubmit", timeout=5000)
                await signin_btn.click()
                await page.wait_for_timeout(5000)

            print("✅ Login completed")

            # WAIT FOR READER
            await page.wait_for_timeout(8000)
            print(f"📍 Current URL: {page.url}")

            # STEP 1: Check if we can get to the beginning using TOC
            print("\n🎯 STEP 1: Navigate to beginning using TOC...")
            try:
                toc_button = await page.wait_for_selector("[aria-label='Table of Contents']", timeout=5000)
                await toc_button.click()
                await page.wait_for_timeout(2000)

                cover_link = await page.wait_for_selector("text=Cover", timeout=5000)
                await cover_link.click()
                await page.wait_for_timeout(3000)

                # Close TOC
                for i in range(5):
                    await page.keyboard.press("Escape")
                    await page.wait_for_timeout(500)
                await page.click("body", position={"x": 600, "y": 400})
                await page.wait_for_timeout(2000)

                print("   ✅ Navigated to beginning")
            except Exception as e:
                print(f"   ⚠️ TOC navigation failed: {e}")

            # STEP 2: Test navigation and observe behavior
            print("\n🔍 STEP 2: Testing navigation behavior...")

            output_dir = Path("debug_pages")
            output_dir.mkdir(exist_ok=True)

            # Clear old debug files
            for old_file in output_dir.glob("*.png"):
                old_file.unlink()

            for page_num in range(1, 11):  # Test first 10 pages
                print(f"\n📸 Debug page {page_num}:")

                # Take screenshot
                filename = output_dir / f"debug_page_{page_num:03d}.png"
                await page.screenshot(path=str(filename))
                file_size = filename.stat().st_size

                print(f"   📁 Screenshot: {filename.name} ({file_size} bytes)")

                # Check URL
                current_url = page.url
                print(f"   🌐 URL: {current_url}")

                # Check for page indicators in content
                try:
                    page_content = await page.inner_text("body")

                    # Look for page indicators
                    page_indicators = []
                    if "page" in page_content.lower():
                        import re
                        page_matches = re.findall(r'page\s+(\d+)', page_content.lower())
                        if page_matches:
                            page_indicators.extend(page_matches)

                    if "location" in page_content.lower():
                        location_matches = re.findall(r'location\s+(\d+)', page_content.lower())
                        if location_matches:
                            page_indicators.extend([f"loc{m}" for m in location_matches])

                    if page_indicators:
                        print(f"   📊 Page indicators: {page_indicators}")
                    else:
                        print("   📊 No page indicators found")

                    # Check for specific content snippets to verify advancement
                    content_snippet = page_content[:100].replace('\n', ' ').strip()
                    print(f"   📝 Content start: \"{content_snippet}...\"")

                except Exception as e:
                    print(f"   ❌ Content check failed: {e}")

                # CRITICAL: Check what happens when we navigate
                if page_num < 10:
                    print(f"   ▶️  Navigating to next page...")

                    # Try different navigation methods and observe
                    navigation_methods = [
                        ("ArrowRight", lambda: page.keyboard.press("ArrowRight")),
                        ("PageDown", lambda: page.keyboard.press("PageDown")),
                        ("Space", lambda: page.keyboard.press("Space"))
                    ]

                    for method_name, method_func in navigation_methods:
                        print(f"      🧪 Trying {method_name}...")

                        # Capture before state
                        before_content = await page.inner_text("body")
                        before_url = page.url

                        # Execute navigation
                        await method_func()
                        await page.wait_for_timeout(2000)  # Wait for change

                        # Capture after state
                        after_content = await page.inner_text("body")
                        after_url = page.url

                        # Compare
                        content_changed = before_content != after_content
                        url_changed = before_url != after_url

                        print(f"         Content changed: {content_changed}")
                        print(f"         URL changed: {url_changed}")

                        if content_changed or url_changed:
                            print(f"         ✅ {method_name} works!")
                            break
                        else:
                            print(f"         ❌ {method_name} no effect")
                    else:
                        print("      ⚠️ No navigation method worked!")

                # Pause for observation
                print("   ⏳ Pausing 3 seconds for observation...")
                await page.wait_for_timeout(3000)

            print("\n🔍 STEP 3: Manual inspection time...")
            print("👀 Please observe the browser and check:")
            print("   - Are pages actually changing visually?")
            print("   - Do you see page numbers or progress indicators?")
            print("   - Can you manually click next/previous and see changes?")
            print("   - Check browser Developer Tools (F12) for:")
            print("     * Network requests when navigating")
            print("     * Local Storage / Session Storage for page state")
            print("     * Any errors in Console")
            print("\n⏳ Keeping browser open for 5 minutes for inspection...")
            await page.wait_for_timeout(300000)  # 5 minutes

        except Exception as e:
            print(f"❌ Debug error: {e}")
            import traceback
            traceback.print_exc()
        finally:
            print("🔚 Debug session complete")
            await browser.close()

if __name__ == "__main__":
    asyncio.run(debug_navigation())