kindle_OCR/auth_handler.py

#!/usr/bin/env python3
"""
Amazon Authentication Handler - Deals with CAPTCHAs and verification
"""

import asyncio
from playwright.async_api import async_playwright

async def handle_amazon_auth(page):
    """
    Handle Amazon authentication including CAPTCHAs
    Returns True if authentication successful, False otherwise
    """
    try:
        print("🔐 Starting Amazon authentication...")

        # Navigate to Kindle reader
        await page.goto("https://read.amazon.com/?asin=B0DJP2C8M6&ref_=kwl_kr_iv_rec_1")
        await page.wait_for_timeout(5000)

        # Check if we need to sign in
        if "signin" in page.url or "ap/" in page.url:
            print("   📧 Login required...")

            # Fill email
            try:
                email_field = await page.wait_for_selector("#ap_email", timeout=10000)
                await email_field.fill("ondrej.glaser@gmail.com")
                continue_btn = await page.wait_for_selector("#continue", timeout=5000)
                await continue_btn.click()
                await page.wait_for_timeout(3000)
            except:
                print("   ⚠️ Email step already completed or different flow")

            # Fill password
            try:
                password_field = await page.wait_for_selector("#ap_password", timeout=10000)
                await password_field.fill("csjXgew3In")
                signin_btn = await page.wait_for_selector("#signInSubmit", timeout=5000)
                await signin_btn.click()
                await page.wait_for_timeout(5000)
            except:
                print("   ⚠️ Password step failed or different flow")

        # Check for CAPTCHA or verification challenges
        await page.wait_for_timeout(3000)

        # Look for CAPTCHA puzzle
        captcha_puzzle = await page.query_selector("text=Solve this puzzle")
        if captcha_puzzle:
            print("   🧩 CAPTCHA detected - requires manual solving")
            print("   👆 Please solve the puzzle manually in the browser")
            print("   ⏳ Waiting up to 120 seconds for manual completion...")

            # Wait for CAPTCHA to be solved (page URL changes or puzzle disappears)
            start_url = page.url
            for attempt in range(24):  # 24 * 5 seconds = 120 seconds
                await page.wait_for_timeout(5000)
                current_url = page.url

                # Check if puzzle is gone or URL changed to reader
                puzzle_still_there = await page.query_selector("text=Solve this puzzle")
                if not puzzle_still_there or "read.amazon.com" in current_url:
                    print("   ✅ CAPTCHA appears to be solved!")
                    break

                if attempt % 4 == 0:  # Every 20 seconds
                    print(f"   ⏳ Still waiting... ({(attempt + 1) * 5}s elapsed)")
            else:
                print("   ❌ CAPTCHA timeout - manual intervention needed")
                return False

        # Check for other verification methods
        verification_indicators = [
            "verify",
            "security",
            "challenge",
            "suspicious activity"
        ]

        page_content = await page.content()
        for indicator in verification_indicators:
            if indicator.lower() in page_content.lower():
                print(f"   🔒 Additional verification detected: {indicator}")
                print("   👆 Please complete verification manually")
                print("   ⏳ Waiting 60 seconds for completion...")
                await page.wait_for_timeout(60000)
                break

        # Final check - are we in the reader?
        await page.wait_for_timeout(5000)

        # Try multiple indicators of successful reader access
        reader_indicators = [
            "#reader-header",
            "ion-header",
            "[class*='reader']",
            "canvas",
            ".kindle"
        ]

        reader_found = False
        for indicator in reader_indicators:
            try:
                element = await page.query_selector(indicator)
                if element:
                    print(f"   ✅ Reader element found: {indicator}")
                    reader_found = True
                    break
            except:
                continue

        if not reader_found:
            # Alternative check - look for page content that indicates we're in reader
            page_text = await page.inner_text("body")
            if any(text in page_text.lower() for text in ["page", "chapter", "table of contents"]):
                print("   ✅ Reader content detected by text analysis")
                reader_found = True

        if reader_found:
            print("✅ Authentication successful - reader accessed")
            return True
        else:
            print("❌ Authentication failed - reader not accessible")
            print(f"   Current URL: {page.url}")

            # Take screenshot for debugging
            await page.screenshot(path="auth_failure_debug.png")
            print("   📸 Debug screenshot saved: auth_failure_debug.png")
            return False

    except Exception as e:
        print(f"❌ Authentication error: {e}")
        return False

async def test_auth():
    """Test the authentication handler"""
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=False,
            args=[
                "--disable-blink-features=AutomationControlled",
                "--disable-web-security"
            ]
        )
        context = await browser.new_context(
            viewport={"width": 1920, "height": 1080},
            user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        )

        page = await context.new_page()

        try:
            success = await handle_amazon_auth(page)
            if success:
                print("\n🎉 Authentication test PASSED")
                print("📖 Reader is accessible - ready for scanning")
                await page.wait_for_timeout(10000)  # Keep open for verification
            else:
                print("\n❌ Authentication test FAILED")
                await page.wait_for_timeout(30000)  # Keep open for manual inspection

        finally:
            await browser.close()

if __name__ == "__main__":
    asyncio.run(test_auth())