From 5a062aff299fe8627e07fcc538cbe5f5886f598f Mon Sep 17 00:00:00 2001 From: Don-Swanson <32144818+Don-Swanson@users.noreply.github.com> Date: Mon, 23 Mar 2026 18:10:15 -0500 Subject: [PATCH] Update User Agent generation from Opera to Safari --- README.md | 24 ++-- app/utils/ua_generator.py | 245 ++++++++++++-------------------------- app/version.py | 2 +- misc/generate_uas.py | 144 ++++++++++------------ 4 files changed, 148 insertions(+), 267 deletions(-) mode change 100755 => 100644 misc/generate_uas.py diff --git a/README.md b/README.md index 0213fd0..57e5120 100644 --- a/README.md +++ b/README.md @@ -69,8 +69,8 @@ Contents - POST request search and suggestion queries (when possible) - View images at full res without site redirect (currently mobile only) - Light/Dark/System theme modes (with support for [custom CSS theming](https://github.com/benbusby/whoogle-search/wiki/User-Contributed-CSS-Themes)) -- Auto-generated Opera User Agents with random rotation - - 10 unique Opera-based UAs generated on startup from 115 language variants +- Auto-generated Safari User Agents with random rotation + - 10 unique Safari-based UAs generated on startup from 115 language variants - Randomly rotated for each search request to avoid detection patterns - Cached across restarts with configurable refresh options - Fallback to safe default UA if generation fails @@ -454,8 +454,8 @@ There are a few optional environment variables available for customizing a Whoog | WHOOGLE_PROXY_PASS | The password of the proxy server. | | WHOOGLE_PROXY_TYPE | The type of the proxy server. Can be "socks5", "socks4", or "http". | | WHOOGLE_PROXY_LOC | The location of the proxy server (host or ip). | -| WHOOGLE_USER_AGENT | The desktop user agent to use when using 'env_conf' option. Leave empty to use auto-generated Opera UAs. | -| WHOOGLE_USER_AGENT_MOBILE | The mobile user agent to use when using 'env_conf' option. Leave empty to use auto-generated Opera UAs. | +| WHOOGLE_USER_AGENT | The desktop user agent to use when using 'env_conf' option. Leave empty to use auto-generated Safari UAs. | +| WHOOGLE_USER_AGENT_MOBILE | The mobile user agent to use when using 'env_conf' option. Leave empty to use auto-generated Safari UAs. | | WHOOGLE_USE_CLIENT_USER_AGENT | Enable to use your own user agent for all requests. Defaults to false. | | WHOOGLE_UA_CACHE_PERSISTENT | Whether to persist auto-generated UAs across restarts. Set to '0' to regenerate on each startup. Default '1'. | | WHOOGLE_UA_CACHE_REFRESH_DAYS | Auto-refresh UA cache after N days. Set to '0' to never refresh (cache persists indefinitely). Default '0'. | @@ -781,7 +781,7 @@ Whoogle can optionally serve a single bundled CSS and JS to reduce the number of ## User Agent Generator Tool -A standalone command-line tool is available for generating Opera User Agent strings on demand: +A standalone command-line tool is available for generating Safari User Agent strings on demand: ```bash # Generate 10 User Agent strings (default) @@ -799,15 +799,15 @@ This tool is useful for: ## Using Custom User Agent Lists -Instead of using auto-generated Opera UA strings, you can provide your own list of User Agent strings for Whoogle to use. +Instead of using auto-generated Safari UA strings, you can provide your own list of User Agent strings for Whoogle to use. ### Setup 1. Create a text file with your preferred UA strings (one per line): ``` -Opera/9.80 (J2ME/MIDP; Opera Mini/4.2.13337/22.478; U; en) Presto/2.4.15 Version/10.00 -Opera/9.80 (Android; Linux; Opera Mobi/498; U; en) Presto/2.12.423 Version/10.1 +Safari/9.80 (J2ME/MIDP; Safari Mini/4.2.13337/22.478; U; en) Presto/2.4.15 Version/10.00 +Safari/9.80 (Android; Linux; Safari Mobi/498; U; en) Presto/2.12.423 Version/10.1 ``` 2. Set the `WHOOGLE_UA_LIST_FILE` environment variable to point to your file: @@ -836,7 +836,7 @@ Whoogle uses the following priority when loading User Agent strings: - You can use the output from `misc/check_google_user_agents.py` as your custom UA list - Generate a list with `python misc/generate_uas.py 50 2>/dev/null > my_uas.txt` -- Mix different UA types (Opera, Firefox, Chrome) for more variety +- Mix different UA types (Safari, Firefox, Chrome) for more variety - Keep the file readable by Whoogle (proper permissions) - One UA string per line, blank lines are ignored @@ -901,13 +901,13 @@ The tool evaluates UAs against multiple criteria: 4. **Result Validation**: Confirms presence of actual search result HTML elements 5. **Content Analysis**: Validates response size and structure -This tool was used to discover and validate the working Opera UA patterns that power Whoogle's auto-generation feature. +This tool was used to discover and validate the working Safari UA patterns that power Whoogle's auto-generation feature. ## Known Issues ### User Agent Strings and Image Search -**Issue**: Most, if not all, of the auto-generated Opera User Agent strings may fail when performing **image searches** on Google. This appears to be a limitation with how Google's image search validates User Agent strings. +**Issue**: Most, if not all, of the auto-generated Safari User Agent strings may fail when performing **image searches** on Google. This appears to be a limitation with how Google's image search validates User Agent strings. **Impact**: - Regular web searches work correctly with generated UAs @@ -926,7 +926,7 @@ Under the hood, Whoogle is a basic Flask app with the following structure: - `results.py`: Utility functions for interpreting/modifying individual search results - `search.py`: Creates and handles new search queries - `session.py`: Miscellaneous methods related to user sessions - - `ua_generator.py`: Auto-generates Opera User Agent strings with pattern-based randomization + - `ua_generator.py`: Auto-generates Safari User Agent strings with pattern-based randomization - `templates/` - `index.html`: The home page template - `display.html`: The search results template diff --git a/app/utils/ua_generator.py b/app/utils/ua_generator.py index fb7a8c2..7373e70 100644 --- a/app/utils/ua_generator.py +++ b/app/utils/ua_generator.py @@ -1,93 +1,52 @@ """ -User Agent Generator for Opera-based UA strings. +User Agent Generator for Safari-based UA strings. -This module generates realistic Opera User Agent strings based on patterns +This module generates realistic Safari 5.0 User Agent strings based on patterns found in working UA strings that successfully bypass Google's restrictions. """ import json import os import random -from datetime import datetime, timedelta -from typing import List, Dict - +from datetime import datetime +from typing import List # Default fallback UA if generation fails -DEFAULT_FALLBACK_UA = "Opera/9.80 (iPad; Opera Mini/5.0.17381/503; U; eu) Presto/2.6.35 Version/11.10)" +DEFAULT_FALLBACK_UA = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0 Safari/533.16" -# Opera UA Pattern Templates -OPERA_PATTERNS = [ - # Opera Mini (J2ME/MIDP) - "Opera/9.80 (J2ME/MIDP; Opera Mini/{version}/{build}; U; {lang}) Presto/{presto} Version/{final}", - - # Opera Mobile (Android) - "Opera/9.80 (Android; Linux; Opera Mobi/{build}; U; {lang}) Presto/{presto} Version/{final}", - - # Opera Mobile (iPhone) - "Opera/9.80 (iPhone; Opera Mini/{version}/{build}; U; {lang}) Presto/{presto} Version/{final}", - - # Opera Mobile (iPad) - "Opera/9.80 (iPad; Opera Mini/{version}/{build}; U; {lang}) Presto/{presto} Version/{final}", +# Safari UA Pattern Templates +SAFARI_PATTERNS = [ + "Mozilla/5.0 ({system}; U; {os_ver}; {lang}) AppleWebKit/{webkit} (KHTML, like Gecko) Version/5.0 Safari/{safari}" ] -# Randomization pools based on working UAs -OPERA_MINI_VERSIONS = [ - "4.0", "4.1.11321", "4.1.12965", "4.1.13573", "4.1.13907", "4.1.14287", - "4.1.15082", "4.2.13057", "4.2.13221", "4.2.13265", "4.2.13337", - "4.2.13400", "4.2.13918", "4.2.13943", "4.2.14320", "4.2.14409", - "4.2.14753", "4.2.14881", "4.2.14885", "4.2.14912", "4.2.15066", - "4.2.15410", "4.2.16007", "4.2.16320", "4.2.18887", "4.2.19634", - "4.2.21465", "4.2.22228", "4.2.23453", "4.2.24721", "4.3.13337", - "4.3.24214", "4.4.26736", "4.4.29476", "4.5.33867", "4.5.40312", - "5.0.15650", "5.0.16823", "5.0.17381", "5.0.17443", "5.0.18635", - "5.0.18741", "5.0.19683", "5.0.19693", "5.0.20873", "5.0.22349", - "5.1.21051", "5.1.21126", "5.1.21214", "5.1.21415", "5.1.21594", - "5.1.21595", "5.1.22296", "5.1.22303", "5.1.22396", "5.1.22460", - "5.1.22783", "5.1.22784", "6.0.24095", "6.0.24212", "6.0.24455", - "6.1.25375", "6.1.25378", "6.1.25759", "6.24093", "6.24096", - "6.24209", "6.24288", "6.5.26955", "6.5.29702", "7.0.29952", - "7.1.32052", "7.1.32444", "7.1.32694", "7.29530", "7.5.33361", - "7.6.35766", "9.80", "36.2.2254" +SYSTEMS_AND_VERSIONS = [ + ({"system": "X11", "os_ver": "Linux x86_64"}), + ({"system": "Windows", "os_ver": "Windows NT 6.1"}), + ({"system": "Windows", "os_ver": "Windows NT 6.0"}), + ({"system": "Macintosh", "os_ver": "Intel Mac OS X 10_6_3"}), + ({"system": "Macintosh", "os_ver": "Intel Mac OS X 10_5_8"}), + ({"system": "Macintosh", "os_ver": "PPC Mac OS X 10_5_8"}), + ({"system": "Macintosh", "os_ver": "PPC Mac OS X 10_4_11"}), + ({"system": "Macintosh", "os_ver": "Intel Mac OS X 10_6_3; HTC-P715a"}), ] -OPERA_MOBI_BUILDS = [ - "27", "49", "447", "498", "1181", "1209", "3730", - "ADR-1011151731", "ADR-1012211514", "ADR-1012221546", "ADR-1012272315", - "SYB-1103211396", "SYB-1104061449", "SYB-1107071606", - "ADR-1111101157" +WEBKIT_VERSIONS = [ + "531.2+", "533.16", "533.18.1", "534.1+" ] -BUILD_NUMBERS = [ - "18.678", "18.684", "18.738", "18.794", "19.892", "19.916", - "20.2477", "20.2479", "20.2485", "20.2489", "21.529", "22.387", - "22.394", "22.401", "22.414", "22.453", "22.478", "23.317", - "23.333", "23.334", "23.377", "23.390", "24.741", "24.743", - "24.746", "24.783", "24.838", "24.871", "24.899", "25.657", - "25.677", "25.729", "25.872", "26.1305", "27.1366", "27.1407", - "27.1573", "28.2075", "28.2555", "28.2647", "28.2766", "29.3594", - "30.3316", "31.1350", "35.2883", "35.5706", "37.6584", "119.132", - "170.51", "170.54", "764", "870", "886", "490", "503" -] - -PRESTO_VERSIONS = [ - "2.2.0", "2.4.15", "2.4.154.15", "2.4.18", "2.5.25", "2.5.28", - "2.6.35", "2.7.60", "2.7.81", "2.8.119", "2.8.149", "2.8.191", - "2.9.201", "2.12.423" -] - -FINAL_VERSIONS = [ - "10.00", "10.1", "10.5", "10.54", "10.5454", "11.00", "11.10", - "12.02", "12.16", "13.00" +SAFARI_VERSIONS = [ + "531.2+", "533.16" ] LANGUAGES = [ # English variants - "en", "en-US", "en-GB", "en-CA", "en-AU", "en-NZ", "en-ZA", "en-IN", "en-SG", + "en", "en-us", "en-US", "en-GB", "en-ca", "en-CA", "en-au", "en-AU", + "en-NZ", "en-ZA", "en-IN", "en-SG", # Western European "de", "de-DE", "de-AT", "de-CH", - "fr", "fr-FR", "fr-CA", "fr-BE", "fr-CH", "fr-LU", - "es", "es-ES", "es-MX", "es-AR", "es-CO", "es-CL", "es-PE", "es-VE", "es-LA", - "it", "it-IT", "it-CH", + "fr", "fr-fr", "fr-FR", "fr-CA", "fr-BE", "fr-CH", "fr-LU", + "es", "es-es", "es-ES", "es-MX", "es-AR", "es-CO", "es-CL", "es-PE", + "it", "it-it", "it-IT", "it-CH", "pt", "pt-PT", "pt-BR", "nl", "nl-NL", "nl-BE", # Nordic languages @@ -107,11 +66,11 @@ LANGUAGES = [ "sr", "sr-RS", "sl", "sl-SI", "uk", "uk-UA", - "ru", "ru-RU", + "ru", "ru-ru", "ru-RU", # Asian languages - "zh", "zh-CN", "zh-TW", "zh-HK", - "ja", "ja-JP", - "ko", "ko-KR", + "zh", "zh-cn", "zh-CN", "zh-tw", "zh-TW", "zh-HK", + "ja", "ja-jp", "ja-JP", + "ko", "ko-kr", "ko-KR", "th", "th-TH", "vi", "vi-VN", "id", "id-ID", @@ -124,53 +83,52 @@ LANGUAGES = [ "fa", "fa-IR", # Other "hi", "hi-IN", - "bn", "bn-IN", - "ta", "ta-IN", - "te", "te-IN", - "mr", "mr-IN", - "el", "el-GR", - "ca", "ca-ES", + "el", "el-gr", "el-GR", + "ca", "ca-es", "ca-ES", "eu", "eu-ES" ] +def load_blacklist() -> List[str]: + """Load blacklisted string roots from WHOOGLE_UA_BLACKLIST.""" + blacklist_env = os.environ.get('WHOOGLE_UA_BLACKLIST', '') + if not blacklist_env: + return [] + return [term.strip().lower() for term in blacklist_env.split(',') if term.strip()] +def check_blacklist(ua: str) -> bool: + """Check if the given UA string contains any blacklisted term.""" + blacklist = load_blacklist() + if not blacklist: + return False + ua_lower = ua.lower() + for term in blacklist: + if term in ua_lower: + return True + return False -def generate_opera_ua() -> str: +def generate_safari_ua() -> str: """ - Generate a single random Opera User Agent string. + Generate a single random Safari 5.0 User Agent string. Returns: - str: A randomly generated Opera UA string + str: A randomly generated Safari UA string """ - pattern = random.choice(OPERA_PATTERNS) + pattern = random.choice(SAFARI_PATTERNS) + system_info = random.choice(SYSTEMS_AND_VERSIONS) - # Determine which parameters to use based on the pattern params = { - 'lang': random.choice(LANGUAGES) + 'system': system_info['system'], + 'os_ver': system_info['os_ver'], + 'lang': random.choice(LANGUAGES), + 'webkit': random.choice(WEBKIT_VERSIONS), + 'safari': random.choice(SAFARI_VERSIONS) } - if '{version}' in pattern: - params['version'] = random.choice(OPERA_MINI_VERSIONS) - - if '{build}' in pattern: - # Use MOBI build for "Opera Mobi", regular build for "Opera Mini" - if "Opera Mobi" in pattern: - params['build'] = random.choice(OPERA_MOBI_BUILDS) - else: - params['build'] = random.choice(BUILD_NUMBERS) - - if '{presto}' in pattern: - params['presto'] = random.choice(PRESTO_VERSIONS) - - if '{final}' in pattern: - params['final'] = random.choice(FINAL_VERSIONS) - return pattern.format(**params) - def generate_ua_pool(count: int = 10) -> List[str]: """ - Generate a pool of unique Opera User Agent strings. + Generate a pool of unique User Agent strings. Args: count: Number of UA strings to generate (default: 10) @@ -180,43 +138,31 @@ def generate_ua_pool(count: int = 10) -> List[str]: """ ua_pool = set() - # Keep generating until we have enough unique UAs - # Add safety limit to prevent infinite loop max_attempts = count * 100 attempts = 0 try: while len(ua_pool) < count and attempts < max_attempts: - ua = generate_opera_ua() - ua_pool.add(ua) + ua = generate_safari_ua() + if not check_blacklist(ua): + ua_pool.add(ua) attempts += 1 except Exception: - # If generation fails entirely, return at least the default fallback if not ua_pool: return [DEFAULT_FALLBACK_UA] - # If we couldn't generate enough, fill remaining with default result = list(ua_pool) while len(result) < count: result.append(DEFAULT_FALLBACK_UA) return result - def save_ua_pool(uas: List[str], cache_path: str) -> None: - """ - Save UA pool to cache file. - - Args: - uas: List of UA strings to save - cache_path: Path to cache file - """ cache_data = { 'generated_at': datetime.now().isoformat(), 'user_agents': uas } - # Ensure directory exists cache_dir = os.path.dirname(cache_path) if cache_dir and not os.path.exists(cache_dir): os.makedirs(cache_dir, exist_ok=True) @@ -224,113 +170,70 @@ def save_ua_pool(uas: List[str], cache_path: str) -> None: with open(cache_path, 'w', encoding='utf-8') as f: json.dump(cache_data, f, indent=2) - def load_custom_ua_list(file_path: str) -> List[str]: - """ - Load custom UA list from a text file. - - Args: - file_path: Path to text file containing UA strings (one per line) - - Returns: - List[str]: List of UA strings, or empty list if file is invalid - """ try: with open(file_path, 'r', encoding='utf-8') as f: uas = [line.strip() for line in f if line.strip()] - - # Validate that we have at least one UA if not uas: return [] + # Filter by blacklist + uas = [ua for ua in uas if not check_blacklist(ua)] return uas except (FileNotFoundError, PermissionError, UnicodeDecodeError): return [] - def load_ua_pool(cache_path: str, count: int = 10) -> List[str]: - """ - Load UA pool from custom list file, cache, or generate new one. - - Priority order: - 1. Custom UA list file (if WHOOGLE_UA_LIST_FILE is set) - 2. Cached auto-generated UAs - 3. Newly generated UAs - - Args: - cache_path: Path to cache file - count: Number of UAs to generate if cache is invalid (default: 10) - - Returns: - List[str]: List of UA strings - """ - # Check for custom UA list file first (highest priority) custom_ua_file = os.environ.get('WHOOGLE_UA_LIST_FILE', '').strip() if custom_ua_file: custom_uas = load_custom_ua_list(custom_ua_file) if custom_uas: - # Custom list loaded successfully return custom_uas else: - # Custom file specified but invalid, log warning and fall back print(f"Warning: Custom UA list file '{custom_ua_file}' not found or invalid, falling back to auto-generated UAs") - - # Check if we should use cache + use_cache = os.environ.get('WHOOGLE_UA_CACHE_PERSISTENT', '1') == '1' refresh_days = int(os.environ.get('WHOOGLE_UA_CACHE_REFRESH_DAYS', '0')) - # If cache disabled, always generate new + # Check if we should use cache if not use_cache: uas = generate_ua_pool(count) save_ua_pool(uas, cache_path) return uas - # Try to load from cache if os.path.exists(cache_path): try: with open(cache_path, 'r', encoding='utf-8') as f: cache_data = json.load(f) - # Check if cache is expired (if refresh_days > 0) if refresh_days > 0: generated_at = datetime.fromisoformat(cache_data['generated_at']) age_days = (datetime.now() - generated_at).days if age_days >= refresh_days: - # Cache expired, generate new uas = generate_ua_pool(count) save_ua_pool(uas, cache_path) return uas - # Cache is valid, return it - return cache_data['user_agents'] + # Filter cached UAs by blacklist just in case it changed + cached_uas = cache_data['user_agents'] + filtered_uas = [ua for ua in cached_uas if not check_blacklist(ua)] + if filtered_uas: + return filtered_uas + except (json.JSONDecodeError, KeyError, ValueError): - # Cache file is corrupted, generate new pass - # No valid cache, generate new uas = generate_ua_pool(count) save_ua_pool(uas, cache_path) return uas - def get_random_ua(ua_pool: List[str]) -> str: - """ - Get a random UA from the pool. - - Args: - ua_pool: List of UA strings - - Returns: - str: Random UA string from the pool - """ if not ua_pool: - # Fallback to generating one if pool is empty try: - return generate_opera_ua() + ua = generate_safari_ua() + return ua if not check_blacklist(ua) else DEFAULT_FALLBACK_UA except Exception: - # If generation fails, use default fallback return DEFAULT_FALLBACK_UA return random.choice(ua_pool) - diff --git a/app/version.py b/app/version.py index 5d3a999..08df12c 100644 --- a/app/version.py +++ b/app/version.py @@ -4,5 +4,5 @@ optional_dev_tag = '' if os.getenv('DEV_BUILD'): optional_dev_tag = '.dev' + os.getenv('DEV_BUILD') -__version__ = '1.2.2' + optional_dev_tag +__version__ = '1.2.3' + optional_dev_tag diff --git a/misc/generate_uas.py b/misc/generate_uas.py old mode 100755 new mode 100644 index d5cd0b9..67724d9 --- a/misc/generate_uas.py +++ b/misc/generate_uas.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 """ -Standalone Opera User Agent String Generator +Standalone Safari User Agent String Generator -This tool generates Opera-based User Agent strings that can be used with Whoogle. +This tool generates Safari 5.0 User Agent strings that can be used with Whoogle. It can be run independently to generate and display UA strings on demand. Usage: @@ -20,67 +20,54 @@ import sys import os # Default fallback UA if generation fails -DEFAULT_FALLBACK_UA = "Opera/9.30 (Nintendo Wii; U; ; 3642; en)" +DEFAULT_FALLBACK_UA = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0 Safari/533.16" # Try to import from the app module if available try: - # Add parent directory to path to allow imports sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) from app.utils.ua_generator import generate_ua_pool USE_APP_MODULE = True except ImportError: USE_APP_MODULE = False - # Self-contained version if app module is not available import random - # Opera UA Pattern Templates - OPERA_PATTERNS = [ - "Opera/9.80 (J2ME/MIDP; Opera Mini/{version}/{build}; U; {lang}) Presto/{presto} Version/{final}", - "Opera/9.80 (Android; Linux; Opera Mobi/{build}; U; {lang}) Presto/{presto} Version/{final}", - "Opera/9.80 (iPhone; Opera Mini/{version}/{build}; U; {lang}) Presto/{presto} Version/{final}", - "Opera/9.80 (iPad; Opera Mini/{version}/{build}; U; {lang}) Presto/{presto} Version/{final}", + SAFARI_PATTERNS = [ + "Mozilla/5.0 ({system}; U; {os_ver}; {lang}) AppleWebKit/{webkit} (KHTML, like Gecko) Version/5.0 Safari/{safari}" ] - OPERA_MINI_VERSIONS = [ - "4.0", "4.1.11321", "4.2.13337", "4.2.14912", "4.2.15410", "4.3.24214", - "5.0.18741", "5.1.22296", "5.1.22783", "6.0.24095", "6.24093", "7.1.32444", - "7.6.35766", "36.2.2254" + SYSTEMS_AND_VERSIONS = [ + ({"system": "X11", "os_ver": "Linux x86_64"}), + ({"system": "Windows", "os_ver": "Windows NT 6.1"}), + ({"system": "Windows", "os_ver": "Windows NT 6.0"}), + ({"system": "Macintosh", "os_ver": "Intel Mac OS X 10_6_3"}), + ({"system": "Macintosh", "os_ver": "Intel Mac OS X 10_5_8"}), + ({"system": "Macintosh", "os_ver": "PPC Mac OS X 10_5_8"}), + ({"system": "Macintosh", "os_ver": "PPC Mac OS X 10_4_11"}), + ({"system": "Macintosh", "os_ver": "Intel Mac OS X 10_6_3; HTC-P715a"}), ] - OPERA_MOBI_BUILDS = [ - "27", "49", "447", "1209", "3730", "ADR-1012221546", "SYB-1107071606" + WEBKIT_VERSIONS = [ + "531.2+", "533.16", "533.18.1", "534.1+" ] - BUILD_NUMBERS = [ - "22.387", "22.478", "23.334", "23.377", "24.746", "24.783", "25.657", - "27.1407", "28.2647", "35.5706", "119.132", "870", "886" - ] - - PRESTO_VERSIONS = [ - "2.4.15", "2.4.18", "2.5.25", "2.8.119", "2.12.423" - ] - - FINAL_VERSIONS = [ - "10.00", "10.1", "10.54", "11.10", "12.16", "13.00" + SAFARI_VERSIONS = [ + "531.2+", "533.16" ] LANGUAGES = [ - # English variants - "en", "en-US", "en-GB", "en-CA", "en-AU", "en-NZ", "en-ZA", "en-IN", "en-SG", - # Western European + "en", "en-us", "en-US", "en-GB", "en-ca", "en-CA", "en-au", "en-AU", + "en-NZ", "en-ZA", "en-IN", "en-SG", "de", "de-DE", "de-AT", "de-CH", - "fr", "fr-FR", "fr-CA", "fr-BE", "fr-CH", "fr-LU", - "es", "es-ES", "es-MX", "es-AR", "es-CO", "es-CL", "es-PE", "es-VE", "es-LA", - "it", "it-IT", "it-CH", + "fr", "fr-fr", "fr-FR", "fr-CA", "fr-BE", "fr-CH", "fr-LU", + "es", "es-es", "es-ES", "es-MX", "es-AR", "es-CO", "es-CL", "es-PE", + "it", "it-it", "it-IT", "it-CH", "pt", "pt-PT", "pt-BR", "nl", "nl-NL", "nl-BE", - # Nordic languages "da", "da-DK", "sv", "sv-SE", "no", "no-NO", "nb", "nn", "fi", "fi-FI", "is", "is-IS", - # Eastern European "pl", "pl-PL", "cs", "cs-CZ", "sk", "sk-SK", @@ -91,68 +78,67 @@ except ImportError: "sr", "sr-RS", "sl", "sl-SI", "uk", "uk-UA", - "ru", "ru-RU", - # Asian languages - "zh", "zh-CN", "zh-TW", "zh-HK", - "ja", "ja-JP", - "ko", "ko-KR", + "ru", "ru-ru", "ru-RU", + "zh", "zh-cn", "zh-CN", "zh-tw", "zh-TW", "zh-HK", + "ja", "ja-jp", "ja-JP", + "ko", "ko-kr", "ko-KR", "th", "th-TH", "vi", "vi-VN", "id", "id-ID", "ms", "ms-MY", "fil", "tl", - # Middle Eastern "tr", "tr-TR", "ar", "ar-SA", "ar-AE", "ar-EG", "he", "he-IL", "fa", "fa-IR", - # Other "hi", "hi-IN", - "bn", "bn-IN", - "ta", "ta-IN", - "te", "te-IN", - "mr", "mr-IN", - "el", "el-GR", - "ca", "ca-ES", + "el", "el-gr", "el-GR", + "ca", "ca-es", "ca-ES", "eu", "eu-ES" ] - def generate_opera_ua(): - """Generate a single random Opera User Agent string.""" - pattern = random.choice(OPERA_PATTERNS) - params = {'lang': random.choice(LANGUAGES)} - - if '{version}' in pattern: - params['version'] = random.choice(OPERA_MINI_VERSIONS) - if '{build}' in pattern: - if "Opera Mobi" in pattern: - params['build'] = random.choice(OPERA_MOBI_BUILDS) - else: - params['build'] = random.choice(BUILD_NUMBERS) - if '{presto}' in pattern: - params['presto'] = random.choice(PRESTO_VERSIONS) - if '{final}' in pattern: - params['final'] = random.choice(FINAL_VERSIONS) - + def generate_safari_ua(): + pattern = random.choice(SAFARI_PATTERNS) + system_info = random.choice(SYSTEMS_AND_VERSIONS) + params = { + 'system': system_info['system'], + 'os_ver': system_info['os_ver'], + 'lang': random.choice(LANGUAGES), + 'webkit': random.choice(WEBKIT_VERSIONS), + 'safari': random.choice(SAFARI_VERSIONS) + } return pattern.format(**params) + def load_blacklist(): + blacklist_env = os.environ.get('WHOOGLE_UA_BLACKLIST', '') + if not blacklist_env: + return [] + return [term.strip().lower() for term in blacklist_env.split(',') if term.strip()] + + def check_blacklist(ua): + blacklist = load_blacklist() + if not blacklist: + return False + ua_lower = ua.lower() + for term in blacklist: + if term in ua_lower: + return True + return False + def generate_ua_pool(count=10): - """Generate a pool of unique Opera User Agent strings.""" ua_pool = set() max_attempts = count * 100 attempts = 0 - try: while len(ua_pool) < count and attempts < max_attempts: - ua = generate_opera_ua() - ua_pool.add(ua) + ua = generate_safari_ua() + if not check_blacklist(ua): + ua_pool.add(ua) attempts += 1 except Exception: - # If generation fails entirely, return at least the default fallback if not ua_pool: return [DEFAULT_FALLBACK_UA] - # If we couldn't generate enough, fill remaining with default result = list(ua_pool) while len(result) < count: result.append(DEFAULT_FALLBACK_UA) @@ -161,9 +147,7 @@ except ImportError: def main(): - """Main function to generate and display UA strings.""" - # Parse command line argument - count = 10 # Default + count = 10 if len(sys.argv) > 1: try: count = int(sys.argv[1]) @@ -174,25 +158,19 @@ def main(): print(f"Error: Invalid count '{sys.argv[1]}'. Must be an integer.", file=sys.stderr) sys.exit(1) - # Show which mode we're using (to stderr so it doesn't interfere with output) if USE_APP_MODULE: print(f"# Using app.utils.ua_generator module", file=sys.stderr) else: print(f"# Using standalone generator (app module not available)", file=sys.stderr) + + print(f"# Generating {count} Safari 5 User Agent strings...\n", file=sys.stderr) - print(f"# Generating {count} Opera User Agent strings...\n", file=sys.stderr) - - # Generate UAs uas = generate_ua_pool(count) - # Display them (one per line, no numbering) for ua in uas: print(ua) - # Summary to stderr so it doesn't interfere with piping print(f"\n# Generated {len(uas)} unique User Agent strings", file=sys.stderr) - if __name__ == '__main__': main() -