mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Get common user agents from the calibre website logs
This commit is contained in:
parent
ffdf794246
commit
18a3d945c6
@ -3,19 +3,24 @@
|
||||
# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
|
||||
import bz2
|
||||
import os
|
||||
import json
|
||||
import gzip
|
||||
import io
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
from setup import download_securely
|
||||
from urllib.request import urlopen
|
||||
|
||||
from polyglot.builtins import filter
|
||||
from setup import download_securely
|
||||
|
||||
is_ci = os.environ.get('CI', '').lower() == 'true'
|
||||
|
||||
|
||||
def download_from_calibre_server(url):
|
||||
ca = os.path.join(sys.resources_location, 'calibre-ebook-root-CA.crt')
|
||||
with urlopen(url, cafile=ca) as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def filter_ans(ans):
|
||||
return list(filter(None, (x.strip() for x in ans)))
|
||||
|
||||
@ -39,18 +44,15 @@ def common_user_agents():
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
|
||||
]
|
||||
print('Getting recent UAs...')
|
||||
raw = download_securely(
|
||||
'https://raw.githubusercontent.com/intoli/user-agents/master/src/user-agents.json.gz')
|
||||
data = json.loads(gzip.GzipFile(fileobj=io.BytesIO(raw)).read())
|
||||
uas = []
|
||||
for item in data:
|
||||
ua = item['userAgent']
|
||||
if not ua.startswith('Opera'):
|
||||
uas.append(ua)
|
||||
ans = filter_ans(uas)[:256]
|
||||
if not ans:
|
||||
raise ValueError('Failed to download list of common UAs')
|
||||
return ans
|
||||
raw = download_from_calibre_server('https://code.calibre-ebook.com/ua-popularity')
|
||||
ans = {}
|
||||
for line in bz2.decompress(raw).decode('utf-8').splitlines():
|
||||
count, ua = line.partition(':')[::2]
|
||||
count = int(count.strip())
|
||||
ua = ua.strip()
|
||||
if len(ua) > 20:
|
||||
ans[ua] = count
|
||||
return ans, list(sorted(ans, reverse=True, key=ans.__getitem__))
|
||||
|
||||
|
||||
def firefox_versions():
|
||||
@ -103,7 +105,7 @@ def chrome_versions():
|
||||
def all_desktop_platforms(user_agents):
|
||||
ans = set()
|
||||
for ua in user_agents:
|
||||
if 'Mobile/' not in ua and ('Firefox/' in ua or 'Chrome/' in ua):
|
||||
if ' Mobile ' not in ua and 'Mobile/' not in ua and ('Firefox/' in ua or 'Chrome/' in ua):
|
||||
plat = ua.partition('(')[2].partition(')')[0]
|
||||
parts = plat.split(';')
|
||||
if 'Firefox/' in ua:
|
||||
@ -113,10 +115,13 @@ def all_desktop_platforms(user_agents):
|
||||
|
||||
|
||||
def get_data():
|
||||
ua_freq_map, common = common_user_agents()
|
||||
ans = {
|
||||
'chrome_versions': chrome_versions(),
|
||||
'firefox_versions': firefox_versions(),
|
||||
'common_user_agents': common_user_agents(),
|
||||
'common_user_agents': common,
|
||||
'user_agents_popularity': ua_freq_map,
|
||||
'timestamp': datetime.utcnow().isoformat() + '+00:00',
|
||||
}
|
||||
ans['desktop_platforms'] = list(all_desktop_platforms(ans['common_user_agents']))
|
||||
return ans
|
||||
|
@ -23,62 +23,14 @@ def all_firefox_versions(limit=10):
|
||||
return user_agent_data()['firefox_versions'][:limit]
|
||||
|
||||
|
||||
def random_firefox_version():
|
||||
return random.choice(all_firefox_versions())
|
||||
|
||||
|
||||
def random_desktop_platform():
|
||||
return random.choice(user_agent_data()['desktop_platforms'])
|
||||
|
||||
|
||||
def render_firefox_ua(platform, version):
|
||||
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent/Firefox
|
||||
return 'Mozilla/5.0 ({p}; rv:{ver}) Gecko/20100101 Firefox/{ver}'.format(
|
||||
p=platform, ver=version)
|
||||
|
||||
|
||||
def random_firefox_ua():
|
||||
render_firefox_ua(random_desktop_platform(), random_firefox_version())
|
||||
|
||||
|
||||
def all_chrome_versions(limit=10):
|
||||
return user_agent_data()['chrome_versions'][:limit]
|
||||
|
||||
|
||||
def random_chrome_version():
|
||||
return random.choice(all_chrome_versions())
|
||||
|
||||
|
||||
def render_chrome_ua(platform, version):
|
||||
return 'Mozilla/5.0 ({p}) AppleWebKit/{wv} (KHTML, like Gecko) Chrome/{cv} Safari/{wv}'.format(
|
||||
p=platform, wv=version['webkit_version'], cv=version['chrome_version'])
|
||||
|
||||
|
||||
def random_chrome_ua():
|
||||
return render_chrome_ua(random_desktop_platform(), random_chrome_version())
|
||||
|
||||
|
||||
def all_user_agents():
|
||||
ans = getattr(all_user_agents, 'ans', None)
|
||||
if ans is None:
|
||||
uas = []
|
||||
g = globals()
|
||||
platforms = user_agent_data()['desktop_platforms']
|
||||
for b in ('chrome', 'firefox'):
|
||||
versions = g['all_%s_versions' % b]()
|
||||
func = g['render_%s_ua' % b]
|
||||
for v in versions:
|
||||
for p in platforms:
|
||||
uas.append(func(p, v))
|
||||
random.shuffle(uas)
|
||||
ans = all_user_agents.ans = tuple(uas)
|
||||
return ans
|
||||
|
||||
|
||||
def random_user_agent():
|
||||
return random.choice(all_user_agents())
|
||||
|
||||
|
||||
def accept_header_for_ua(ua):
|
||||
if 'Firefox/' in ua:
|
||||
return 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
||||
|
Loading…
x
Reference in New Issue
Block a user