From 1c389964ccc6703d1dfbbaf092cad69d87803312 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 20 Jul 2016 00:06:49 +0530 Subject: [PATCH] Get list of common user-agents during bootstrap --- .gitignore | 1 + setup/commands.py | 5 +++-- setup/install.py | 2 +- setup/resources.py | 45 +++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 46 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 5f1ae0e02e..69b7077498 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ resources/content-server/locales.zip resources/content-server/mathjax.zip.xz resources/content-server/mathjax.version resources/mozilla-ca-certs.pem +resources/common-user-agents.txt icons/icns/*.iconset setup/installer/windows/calibre/build.log tags diff --git a/setup/commands.py b/setup/commands.py index 737781752c..3e7dfedcf9 100644 --- a/setup/commands.py +++ b/setup/commands.py @@ -11,7 +11,7 @@ __all__ = [ 'build', 'mathjax', 'gui', 'develop', 'install', - 'kakasi', 'coffee', 'rapydscript', 'cacerts', 'resources', + 'kakasi', 'coffee', 'rapydscript', 'cacerts', 'recent_uas', 'resources', 'check', 'test', 'sdist', 'bootstrap', 'manual', 'tag_release', @@ -53,11 +53,12 @@ check = Check() from setup.test import Test test = Test() -from setup.resources import Resources, Kakasi, Coffee, CACerts, RapydScript +from setup.resources import Resources, Kakasi, Coffee, CACerts, RapydScript, RecentUAs resources = Resources() kakasi = Kakasi() coffee = Coffee() cacerts = CACerts() +recent_uas = RecentUAs() rapydscript = RapydScript() from setup.publish import Manual, TagRelease, Stage1, Stage2, \ diff --git a/setup/install.py b/setup/install.py index e3edae07bd..75a66bb849 100644 --- a/setup/install.py +++ b/setup/install.py @@ -327,7 +327,7 @@ class Bootstrap(Command): description = 'Bootstrap a fresh checkout of calibre from git to a state where it can be installed. Requires various development tools/libraries/headers' TRANSLATIONS_REPO = 'https://github.com/kovidgoyal/calibre-translations.git' - sub_commands = 'build iso639 iso3166 translations gui resources cacerts mathjax'.split() + sub_commands = 'build iso639 iso3166 translations gui resources cacerts recent_uas mathjax'.split() def add_options(self, parser): parser.add_option('--ephemeral', default=False, action='store_true', diff --git a/setup/resources.py b/setup/resources.py index df11ac24f0..058e24310b 100644 --- a/setup/resources.py +++ b/setup/resources.py @@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en' import os, cPickle, re, shutil, marshal, zipfile, glob, time, sys, hashlib, json, errno, subprocess from zlib import compress from itertools import chain +is_ci = os.environ.get('CI', '').lower() == 'true' from setup import Command, basenames, __appname__ @@ -26,6 +27,12 @@ def get_opts_from_parser(parser): for x in do_opt(o): yield x +def download_securely(url): + # We use curl here as on some OSes (OS X) when bootstrapping calibre, + # python will be unable to validate certificates until after cacerts is + # installed + return subprocess.check_output(['curl', '-fsSL', url]) + class Coffee(Command): # {{{ description = 'Compile coffeescript files into javascript' @@ -235,10 +242,7 @@ class CACerts(Command): # {{{ if err.errno != errno.ENOENT: raise raw = b'' - # We use curl here as on some OSes (OS X) when bootstrapping calibre, - # python will be unable to validate certificates until after cacerts is - # installed - nraw = subprocess.check_output(['curl', '-L', 'https://curl.haxx.se/ca/cacert.pem']) + nraw = download_securely('https://curl.haxx.se/ca/cacert.pem') if not nraw: raise RuntimeError('Failed to download CA cert bundle') if nraw != raw: @@ -252,6 +256,39 @@ class CACerts(Command): # {{{ get_https_resource_securely('https://calibre-ebook.com', cacerts=self.b(self.CA_PATH)) # }}} +class RecentUAs(Command): + + description = 'Get updated list of recent browser user agents' + UA_PATH = os.path.join(Command.RESOURCES, 'common-user-agents.txt') + + def get_list(self): + if is_ci: + # Dont hammer the server from CI + return [ + # IE 11 - windows 10 + 'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko', + # IE 11 - windows 8.1 + 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', + # IE 11 - windows 8 + 'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko', + # IE 11 - windows 7 + 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko', + # 32bit IE 11 on 64 bit win 10 + 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko', + # 32bit IE 11 on 64 bit win 8.1 + 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko', + # 32bit IE 11 on 64 bit win 7 + 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', + ] + raw = download_securely('https://techblog.willshouse.com/2012/01/03/most-common-user-agents/').decode('utf-8') + lines = re.search(r'([^<]+)', raw).group(1).splitlines() + return [x.strip() for x in lines if x.strip()] + + def run(self, opts): + lines = self.get_list()[:10] + with open(self.UA_PATH, 'wb') as f: + f.write('\n'.join(lines).encode('ascii')) + class RapydScript(Command): # {{{ description = 'Compile RapydScript to JavaScript'