Get list of common user-agents during bootstrap

This commit is contained in:
Kovid Goyal 2016-07-20 00:06:49 +05:30
parent e73972ee09
commit 1c389964cc
4 changed files with 46 additions and 7 deletions

1
.gitignore vendored
View File

@ -25,6 +25,7 @@ resources/content-server/locales.zip
resources/content-server/mathjax.zip.xz resources/content-server/mathjax.zip.xz
resources/content-server/mathjax.version resources/content-server/mathjax.version
resources/mozilla-ca-certs.pem resources/mozilla-ca-certs.pem
resources/common-user-agents.txt
icons/icns/*.iconset icons/icns/*.iconset
setup/installer/windows/calibre/build.log setup/installer/windows/calibre/build.log
tags tags

View File

@ -11,7 +11,7 @@ __all__ = [
'build', 'mathjax', 'build', 'mathjax',
'gui', 'gui',
'develop', 'install', 'develop', 'install',
'kakasi', 'coffee', 'rapydscript', 'cacerts', 'resources', 'kakasi', 'coffee', 'rapydscript', 'cacerts', 'recent_uas', 'resources',
'check', 'test', 'check', 'test',
'sdist', 'bootstrap', 'sdist', 'bootstrap',
'manual', 'tag_release', 'manual', 'tag_release',
@ -53,11 +53,12 @@ check = Check()
from setup.test import Test from setup.test import Test
test = Test() test = Test()
from setup.resources import Resources, Kakasi, Coffee, CACerts, RapydScript from setup.resources import Resources, Kakasi, Coffee, CACerts, RapydScript, RecentUAs
resources = Resources() resources = Resources()
kakasi = Kakasi() kakasi = Kakasi()
coffee = Coffee() coffee = Coffee()
cacerts = CACerts() cacerts = CACerts()
recent_uas = RecentUAs()
rapydscript = RapydScript() rapydscript = RapydScript()
from setup.publish import Manual, TagRelease, Stage1, Stage2, \ from setup.publish import Manual, TagRelease, Stage1, Stage2, \

View File

@ -327,7 +327,7 @@ class Bootstrap(Command):
description = 'Bootstrap a fresh checkout of calibre from git to a state where it can be installed. Requires various development tools/libraries/headers' description = 'Bootstrap a fresh checkout of calibre from git to a state where it can be installed. Requires various development tools/libraries/headers'
TRANSLATIONS_REPO = 'https://github.com/kovidgoyal/calibre-translations.git' TRANSLATIONS_REPO = 'https://github.com/kovidgoyal/calibre-translations.git'
sub_commands = 'build iso639 iso3166 translations gui resources cacerts mathjax'.split() sub_commands = 'build iso639 iso3166 translations gui resources cacerts recent_uas mathjax'.split()
def add_options(self, parser): def add_options(self, parser):
parser.add_option('--ephemeral', default=False, action='store_true', parser.add_option('--ephemeral', default=False, action='store_true',

View File

@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
import os, cPickle, re, shutil, marshal, zipfile, glob, time, sys, hashlib, json, errno, subprocess import os, cPickle, re, shutil, marshal, zipfile, glob, time, sys, hashlib, json, errno, subprocess
from zlib import compress from zlib import compress
from itertools import chain from itertools import chain
is_ci = os.environ.get('CI', '').lower() == 'true'
from setup import Command, basenames, __appname__ from setup import Command, basenames, __appname__
@ -26,6 +27,12 @@ def get_opts_from_parser(parser):
for x in do_opt(o): for x in do_opt(o):
yield x yield x
def download_securely(url):
# We use curl here as on some OSes (OS X) when bootstrapping calibre,
# python will be unable to validate certificates until after cacerts is
# installed
return subprocess.check_output(['curl', '-fsSL', url])
class Coffee(Command): # {{{ class Coffee(Command): # {{{
description = 'Compile coffeescript files into javascript' description = 'Compile coffeescript files into javascript'
@ -235,10 +242,7 @@ class CACerts(Command): # {{{
if err.errno != errno.ENOENT: if err.errno != errno.ENOENT:
raise raise
raw = b'' raw = b''
# We use curl here as on some OSes (OS X) when bootstrapping calibre, nraw = download_securely('https://curl.haxx.se/ca/cacert.pem')
# python will be unable to validate certificates until after cacerts is
# installed
nraw = subprocess.check_output(['curl', '-L', 'https://curl.haxx.se/ca/cacert.pem'])
if not nraw: if not nraw:
raise RuntimeError('Failed to download CA cert bundle') raise RuntimeError('Failed to download CA cert bundle')
if nraw != raw: if nraw != raw:
@ -252,6 +256,39 @@ class CACerts(Command): # {{{
get_https_resource_securely('https://calibre-ebook.com', cacerts=self.b(self.CA_PATH)) get_https_resource_securely('https://calibre-ebook.com', cacerts=self.b(self.CA_PATH))
# }}} # }}}
class RecentUAs(Command):
description = 'Get updated list of recent browser user agents'
UA_PATH = os.path.join(Command.RESOURCES, 'common-user-agents.txt')
def get_list(self):
if is_ci:
# Dont hammer the server from CI
return [
# IE 11 - windows 10
'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko',
# IE 11 - windows 8.1
'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
# IE 11 - windows 8
'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko',
# IE 11 - windows 7
'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
# 32bit IE 11 on 64 bit win 10
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
# 32bit IE 11 on 64 bit win 8.1
'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
# 32bit IE 11 on 64 bit win 7
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
]
raw = download_securely('https://techblog.willshouse.com/2012/01/03/most-common-user-agents/').decode('utf-8')
lines = re.search(r'<textarea.+"get-the-list".+>([^<]+)</textarea>', raw).group(1).splitlines()
return [x.strip() for x in lines if x.strip()]
def run(self, opts):
lines = self.get_list()[:10]
with open(self.UA_PATH, 'wb') as f:
f.write('\n'.join(lines).encode('ascii'))
class RapydScript(Command): # {{{ class RapydScript(Command): # {{{
description = 'Compile RapydScript to JavaScript' description = 'Compile RapydScript to JavaScript'