Change the source for recent user agent data

The old source no longer works
This commit is contained in:
Kovid Goyal 2019-04-01 18:08:29 +05:30
parent b78e24417a
commit ccb0391844
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 12 additions and 6 deletions

View File

@ -5,7 +5,9 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import re
import json
import gzip
import io
from datetime import datetime
from setup import download_securely
@ -39,10 +41,14 @@ def common_user_agents():
]
print('Getting recent UAs...')
raw = download_securely(
'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/').decode('utf-8')
lines = re.search(
r'<textarea.+"get-the-list".+>([^<]+)</textarea>', raw).group(1).splitlines()
ans = filter_ans(lines)
'https://raw.githubusercontent.com/intoli/user-agents/master/src/user-agents.json.gz')
data = json.loads(gzip.GzipFile(fileobj=io.BytesIO(raw)).read())
uas = []
for item in data:
ua = item['userAgent']
if not ua.startswith('Opera'):
uas.append(ua)
ans = filter_ans(uas)[:256]
if not ans:
raise ValueError('Failed to download list of common UAs')
return ans

View File

@ -270,7 +270,7 @@ class RecentUAs(Command): # {{{
from setup.browser_data import get_data
data = get_data()
with open(self.UA_PATH, 'wb') as f:
f.write(json.dumps(data, indent=2, ensure_ascii=False).encode('utf-8'))
f.write(json.dumps(data, indent=2, ensure_ascii=False, sort_keys=True).encode('utf-8'))
# }}}