mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Get list of common user-agents during bootstrap
This commit is contained in:
parent
e73972ee09
commit
1c389964cc
1
.gitignore
vendored
1
.gitignore
vendored
@ -25,6 +25,7 @@ resources/content-server/locales.zip
|
|||||||
resources/content-server/mathjax.zip.xz
|
resources/content-server/mathjax.zip.xz
|
||||||
resources/content-server/mathjax.version
|
resources/content-server/mathjax.version
|
||||||
resources/mozilla-ca-certs.pem
|
resources/mozilla-ca-certs.pem
|
||||||
|
resources/common-user-agents.txt
|
||||||
icons/icns/*.iconset
|
icons/icns/*.iconset
|
||||||
setup/installer/windows/calibre/build.log
|
setup/installer/windows/calibre/build.log
|
||||||
tags
|
tags
|
||||||
|
@ -11,7 +11,7 @@ __all__ = [
|
|||||||
'build', 'mathjax',
|
'build', 'mathjax',
|
||||||
'gui',
|
'gui',
|
||||||
'develop', 'install',
|
'develop', 'install',
|
||||||
'kakasi', 'coffee', 'rapydscript', 'cacerts', 'resources',
|
'kakasi', 'coffee', 'rapydscript', 'cacerts', 'recent_uas', 'resources',
|
||||||
'check', 'test',
|
'check', 'test',
|
||||||
'sdist', 'bootstrap',
|
'sdist', 'bootstrap',
|
||||||
'manual', 'tag_release',
|
'manual', 'tag_release',
|
||||||
@ -53,11 +53,12 @@ check = Check()
|
|||||||
from setup.test import Test
|
from setup.test import Test
|
||||||
test = Test()
|
test = Test()
|
||||||
|
|
||||||
from setup.resources import Resources, Kakasi, Coffee, CACerts, RapydScript
|
from setup.resources import Resources, Kakasi, Coffee, CACerts, RapydScript, RecentUAs
|
||||||
resources = Resources()
|
resources = Resources()
|
||||||
kakasi = Kakasi()
|
kakasi = Kakasi()
|
||||||
coffee = Coffee()
|
coffee = Coffee()
|
||||||
cacerts = CACerts()
|
cacerts = CACerts()
|
||||||
|
recent_uas = RecentUAs()
|
||||||
rapydscript = RapydScript()
|
rapydscript = RapydScript()
|
||||||
|
|
||||||
from setup.publish import Manual, TagRelease, Stage1, Stage2, \
|
from setup.publish import Manual, TagRelease, Stage1, Stage2, \
|
||||||
|
@ -327,7 +327,7 @@ class Bootstrap(Command):
|
|||||||
|
|
||||||
description = 'Bootstrap a fresh checkout of calibre from git to a state where it can be installed. Requires various development tools/libraries/headers'
|
description = 'Bootstrap a fresh checkout of calibre from git to a state where it can be installed. Requires various development tools/libraries/headers'
|
||||||
TRANSLATIONS_REPO = 'https://github.com/kovidgoyal/calibre-translations.git'
|
TRANSLATIONS_REPO = 'https://github.com/kovidgoyal/calibre-translations.git'
|
||||||
sub_commands = 'build iso639 iso3166 translations gui resources cacerts mathjax'.split()
|
sub_commands = 'build iso639 iso3166 translations gui resources cacerts recent_uas mathjax'.split()
|
||||||
|
|
||||||
def add_options(self, parser):
|
def add_options(self, parser):
|
||||||
parser.add_option('--ephemeral', default=False, action='store_true',
|
parser.add_option('--ephemeral', default=False, action='store_true',
|
||||||
|
@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os, cPickle, re, shutil, marshal, zipfile, glob, time, sys, hashlib, json, errno, subprocess
|
import os, cPickle, re, shutil, marshal, zipfile, glob, time, sys, hashlib, json, errno, subprocess
|
||||||
from zlib import compress
|
from zlib import compress
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
is_ci = os.environ.get('CI', '').lower() == 'true'
|
||||||
|
|
||||||
from setup import Command, basenames, __appname__
|
from setup import Command, basenames, __appname__
|
||||||
|
|
||||||
@ -26,6 +27,12 @@ def get_opts_from_parser(parser):
|
|||||||
for x in do_opt(o):
|
for x in do_opt(o):
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
|
def download_securely(url):
|
||||||
|
# We use curl here as on some OSes (OS X) when bootstrapping calibre,
|
||||||
|
# python will be unable to validate certificates until after cacerts is
|
||||||
|
# installed
|
||||||
|
return subprocess.check_output(['curl', '-fsSL', url])
|
||||||
|
|
||||||
class Coffee(Command): # {{{
|
class Coffee(Command): # {{{
|
||||||
|
|
||||||
description = 'Compile coffeescript files into javascript'
|
description = 'Compile coffeescript files into javascript'
|
||||||
@ -235,10 +242,7 @@ class CACerts(Command): # {{{
|
|||||||
if err.errno != errno.ENOENT:
|
if err.errno != errno.ENOENT:
|
||||||
raise
|
raise
|
||||||
raw = b''
|
raw = b''
|
||||||
# We use curl here as on some OSes (OS X) when bootstrapping calibre,
|
nraw = download_securely('https://curl.haxx.se/ca/cacert.pem')
|
||||||
# python will be unable to validate certificates until after cacerts is
|
|
||||||
# installed
|
|
||||||
nraw = subprocess.check_output(['curl', '-L', 'https://curl.haxx.se/ca/cacert.pem'])
|
|
||||||
if not nraw:
|
if not nraw:
|
||||||
raise RuntimeError('Failed to download CA cert bundle')
|
raise RuntimeError('Failed to download CA cert bundle')
|
||||||
if nraw != raw:
|
if nraw != raw:
|
||||||
@ -252,6 +256,39 @@ class CACerts(Command): # {{{
|
|||||||
get_https_resource_securely('https://calibre-ebook.com', cacerts=self.b(self.CA_PATH))
|
get_https_resource_securely('https://calibre-ebook.com', cacerts=self.b(self.CA_PATH))
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
class RecentUAs(Command):
|
||||||
|
|
||||||
|
description = 'Get updated list of recent browser user agents'
|
||||||
|
UA_PATH = os.path.join(Command.RESOURCES, 'common-user-agents.txt')
|
||||||
|
|
||||||
|
def get_list(self):
|
||||||
|
if is_ci:
|
||||||
|
# Dont hammer the server from CI
|
||||||
|
return [
|
||||||
|
# IE 11 - windows 10
|
||||||
|
'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko',
|
||||||
|
# IE 11 - windows 8.1
|
||||||
|
'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
|
||||||
|
# IE 11 - windows 8
|
||||||
|
'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko',
|
||||||
|
# IE 11 - windows 7
|
||||||
|
'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
|
||||||
|
# 32bit IE 11 on 64 bit win 10
|
||||||
|
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
|
||||||
|
# 32bit IE 11 on 64 bit win 8.1
|
||||||
|
'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
|
||||||
|
# 32bit IE 11 on 64 bit win 7
|
||||||
|
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
|
||||||
|
]
|
||||||
|
raw = download_securely('https://techblog.willshouse.com/2012/01/03/most-common-user-agents/').decode('utf-8')
|
||||||
|
lines = re.search(r'<textarea.+"get-the-list".+>([^<]+)</textarea>', raw).group(1).splitlines()
|
||||||
|
return [x.strip() for x in lines if x.strip()]
|
||||||
|
|
||||||
|
def run(self, opts):
|
||||||
|
lines = self.get_list()[:10]
|
||||||
|
with open(self.UA_PATH, 'wb') as f:
|
||||||
|
f.write('\n'.join(lines).encode('ascii'))
|
||||||
|
|
||||||
class RapydScript(Command): # {{{
|
class RapydScript(Command): # {{{
|
||||||
|
|
||||||
description = 'Compile RapydScript to JavaScript'
|
description = 'Compile RapydScript to JavaScript'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user