107 lines
3.5 KiB
Python
107 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
SCAN BEGINNING - Pages 1-64 to complete the book
|
|
"""
|
|
|
|
import asyncio
|
|
from playwright.async_api import async_playwright
|
|
from pathlib import Path
|
|
import time
|
|
import json
|
|
|
|
async def scan_beginning_pages(start_page=1, end_page=64):
|
|
"""
|
|
Scan the beginning pages 1-64 that were missing
|
|
"""
|
|
storage_state_path = "kindle_session_state.json"
|
|
|
|
if not Path(storage_state_path).exists():
|
|
print("❌ No session state found.")
|
|
return False
|
|
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(
|
|
headless=False,
|
|
args=[
|
|
"--disable-blink-features=AutomationControlled",
|
|
"--disable-web-security",
|
|
"--disable-features=VizDisplayCompositor"
|
|
]
|
|
)
|
|
|
|
context = await browser.new_context(
|
|
storage_state=storage_state_path,
|
|
viewport={"width": 1920, "height": 1080},
|
|
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
)
|
|
|
|
page = await context.new_page()
|
|
|
|
try:
|
|
print(f"🚀 SCANNING BEGINNING: Pages {start_page} to {end_page}")
|
|
print("=" * 50)
|
|
|
|
# Load book
|
|
await page.goto("https://read.amazon.com/?asin=B0DJP2C8M6&ref_=kwl_kr_iv_rec_1")
|
|
await page.wait_for_timeout(5000)
|
|
|
|
# Navigate to actual first page (page 1)
|
|
print("🎯 Navigating to first page...")
|
|
|
|
# Try to click on page 1 or beginning - check if we're already there
|
|
await page.keyboard.press("Home") # Go to beginning
|
|
await page.wait_for_timeout(2000)
|
|
|
|
# Make sure we're at the very beginning
|
|
for _ in range(10):
|
|
await page.keyboard.press("ArrowLeft")
|
|
await page.wait_for_timeout(100)
|
|
|
|
print("✅ At beginning of book")
|
|
|
|
# Scan pages 1-64
|
|
output_dir = Path("scanned_pages")
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
|
print(f"📸 SCANNING PAGES {start_page} to {end_page}...")
|
|
|
|
pages_captured = 0
|
|
|
|
for page_num in range(start_page, end_page + 1):
|
|
print(f"📸 Scanning page {page_num}/{end_page}...")
|
|
|
|
filename = output_dir / f"page_{page_num:03d}.png"
|
|
await page.screenshot(path=str(filename))
|
|
|
|
file_size = filename.stat().st_size
|
|
print(f" ✅ Captured ({file_size} bytes)")
|
|
|
|
pages_captured += 1
|
|
|
|
# Progress reports
|
|
if page_num % 10 == 0:
|
|
progress = (page_num / end_page) * 100
|
|
print(f"📊 PROGRESS: {page_num}/{end_page} ({progress:.1f}%)")
|
|
|
|
# Navigate to next page (except last)
|
|
if page_num < end_page:
|
|
await page.keyboard.press("ArrowRight")
|
|
await page.wait_for_timeout(800)
|
|
|
|
print(f"\n🎉 BEGINNING PAGES COMPLETED!")
|
|
print(f"📊 RESULT: Pages 1-{end_page} captured")
|
|
print(f"✅ {pages_captured} pages successfully scanned!")
|
|
|
|
return end_page
|
|
|
|
except Exception as e:
|
|
print(f"❌ Scanning error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return pages_captured
|
|
finally:
|
|
await browser.close()
|
|
|
|
if __name__ == "__main__":
|
|
result = asyncio.run(scan_beginning_pages())
|
|
print(f"\n🏁 RESULT: {result} beginning pages captured") |