Get list of common user-agents during bootstrap

This commit is contained in:
Kovid Goyal 2016-07-20 00:06:49 +05:30
parent e73972ee09
commit 1c389964cc
4 changed files with 46 additions and 7 deletions

1
.gitignore vendored
View File

@ -25,6 +25,7 @@ resources/content-server/locales.zip
resources/content-server/mathjax.zip.xz
resources/content-server/mathjax.version
resources/mozilla-ca-certs.pem
resources/common-user-agents.txt
icons/icns/*.iconset
setup/installer/windows/calibre/build.log
tags

View File

@ -11,7 +11,7 @@ __all__ = [
'build', 'mathjax',
'gui',
'develop', 'install',
'kakasi', 'coffee', 'rapydscript', 'cacerts', 'resources',
'kakasi', 'coffee', 'rapydscript', 'cacerts', 'recent_uas', 'resources',
'check', 'test',
'sdist', 'bootstrap',
'manual', 'tag_release',
@ -53,11 +53,12 @@ check = Check()
from setup.test import Test
test = Test()
from setup.resources import Resources, Kakasi, Coffee, CACerts, RapydScript
from setup.resources import Resources, Kakasi, Coffee, CACerts, RapydScript, RecentUAs
resources = Resources()
kakasi = Kakasi()
coffee = Coffee()
cacerts = CACerts()
recent_uas = RecentUAs()
rapydscript = RapydScript()
from setup.publish import Manual, TagRelease, Stage1, Stage2, \

View File

@ -327,7 +327,7 @@ class Bootstrap(Command):
description = 'Bootstrap a fresh checkout of calibre from git to a state where it can be installed. Requires various development tools/libraries/headers'
TRANSLATIONS_REPO = 'https://github.com/kovidgoyal/calibre-translations.git'
sub_commands = 'build iso639 iso3166 translations gui resources cacerts mathjax'.split()
sub_commands = 'build iso639 iso3166 translations gui resources cacerts recent_uas mathjax'.split()
def add_options(self, parser):
parser.add_option('--ephemeral', default=False, action='store_true',

View File

@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
import os, cPickle, re, shutil, marshal, zipfile, glob, time, sys, hashlib, json, errno, subprocess
from zlib import compress
from itertools import chain
is_ci = os.environ.get('CI', '').lower() == 'true'
from setup import Command, basenames, __appname__
@ -26,6 +27,12 @@ def get_opts_from_parser(parser):
for x in do_opt(o):
yield x
def download_securely(url):
# We use curl here as on some OSes (OS X) when bootstrapping calibre,
# python will be unable to validate certificates until after cacerts is
# installed
return subprocess.check_output(['curl', '-fsSL', url])
class Coffee(Command): # {{{
description = 'Compile coffeescript files into javascript'
@ -235,10 +242,7 @@ class CACerts(Command): # {{{
if err.errno != errno.ENOENT:
raise
raw = b''
# We use curl here as on some OSes (OS X) when bootstrapping calibre,
# python will be unable to validate certificates until after cacerts is
# installed
nraw = subprocess.check_output(['curl', '-L', 'https://curl.haxx.se/ca/cacert.pem'])
nraw = download_securely('https://curl.haxx.se/ca/cacert.pem')
if not nraw:
raise RuntimeError('Failed to download CA cert bundle')
if nraw != raw:
@ -252,6 +256,39 @@ class CACerts(Command): # {{{
get_https_resource_securely('https://calibre-ebook.com', cacerts=self.b(self.CA_PATH))
# }}}
class RecentUAs(Command):
description = 'Get updated list of recent browser user agents'
UA_PATH = os.path.join(Command.RESOURCES, 'common-user-agents.txt')
def get_list(self):
if is_ci:
# Dont hammer the server from CI
return [
# IE 11 - windows 10
'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko',
# IE 11 - windows 8.1
'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
# IE 11 - windows 8
'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko',
# IE 11 - windows 7
'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
# 32bit IE 11 on 64 bit win 10
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
# 32bit IE 11 on 64 bit win 8.1
'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
# 32bit IE 11 on 64 bit win 7
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
]
raw = download_securely('https://techblog.willshouse.com/2012/01/03/most-common-user-agents/').decode('utf-8')
lines = re.search(r'<textarea.+"get-the-list".+>([^<]+)</textarea>', raw).group(1).splitlines()
return [x.strip() for x in lines if x.strip()]
def run(self, opts):
lines = self.get_list()[:10]
with open(self.UA_PATH, 'wb') as f:
f.write('\n'.join(lines).encode('ascii'))
class RapydScript(Command): # {{{
description = 'Compile RapydScript to JavaScript'