Initial definition automate browser class

This commit is contained in:
Kovid Goyal 2026-03-26 04:45:45 +05:30
parent 2070699dfb
commit 9fc0ea9121
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 58 additions and 1 deletions

View File

@ -178,7 +178,7 @@ class TestImports(unittest.TestCase):
def test_import_of_all_python_modules(self):
from calibre.constants import isbsd, islinux, ismacos, iswindows
exclude_packages = {'calibre.devices.mtp.unix.upstream'}
exclude_modules = set()
exclude_modules = {'calibre.web.automate.browser'}
if not iswindows:
exclude_modules |= {'calibre.utils.iphlpapi', 'calibre.utils.open_with.windows', 'calibre.devices.winusb'}
exclude_packages |= {'calibre.utils.winreg', 'calibre.utils.windows'}

View File

@ -0,0 +1,57 @@
#!/usr/bin/env python
# License: GPLv3 Copyright: 2026, Kovid Goyal <kovid at kovidgoyal.net>
import random
from collections.abc import Iterable
from urllib.parse import urlparse
from camoufox.async_api import AsyncCamoufox # type: ignore
from calibre.constants import ismacos, iswindows
wikipedia_topics = (
'Lists_of_deaths_by_year', 'United_States', 'India', 'China', 'United_Kingdom', 'President', 'Red', 'Green', 'Cyan', 'Magenta'
)
subreddits = (
'funny', 'AskReddit', 'gaming', 'pics', 'worldnews', 'todayilearned', 'Music', 'movies', 'science', 'memes', 'aww',
)
bbc_topics = (
'news/world', 'news', 'sport', 'business', 'health',
)
fox_news_topics = (
'us', 'politics', 'world', 'opinion',
)
class Warmup:
def __init__(self, *forced_urls: str, min_num: int = 2, max_num: int = 3, excluded_domains: Iterable[str] = ()):
foxes = tuple(f'https://www.foxnews.com/{x}' for x in random.choices(fox_news_topics, k=2))
bbc = tuple(f'https://www.bbc.com/{x}' for x in random.choices(bbc_topics, k=2))
wiki = tuple(f'https://en.wikipedia.org/wiki/{x}' for x in random.choices(wikipedia_topics, k=2))
reddit = tuple(f'https://www.reddit.com/r/{x}' for x in random.choices(subreddits, k=2))
urls = (
'https://www.amazon.com/gp/css/order-history?ref_=nav_orders_first',
'https://x.com',
'https://www.youtube.com',
) + foxes + bbc + wiki + reddit
disallow = frozenset(excluded_domains)
if disallow:
def is_not_excluded(x: str) -> bool:
p = urlparse(x)
for q in disallow:
if p.hostname.endswith(q):
return False
return True
urls = tuple(filter(is_not_excluded, urls))
self.urls = tuple(random.sample(urls, k=random.randint(min_num, max_num))) + forced_urls
def __call__(self, br: Browser) -> None:
pass
class Browser(AsyncCamoufox):
def __init__(self, headless: bool = True, warmup: Warmup | None = None):
os = 'windows' if iswindows else ('macos' if ismacos else 'linux')
super().__init__(headless=headless, os=os, humanize=True)