Make plugins_mirror.py more polyglot

This commit is contained in:
Kovid Goyal 2020-02-10 20:41:23 +05:30
parent 0d90159795
commit 449672bab1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -10,7 +10,6 @@ import bz2
import errno import errno
import glob import glob
import gzip import gzip
import HTMLParser
import io import io
import json import json
import os import os
@ -22,8 +21,6 @@ import subprocess
import sys import sys
import tempfile import tempfile
import time import time
import urllib2
import urlparse
import zipfile import zipfile
import zlib import zlib
from collections import namedtuple from collections import namedtuple
@ -33,6 +30,24 @@ from email.utils import parsedate
from functools import partial from functools import partial
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
from xml.sax.saxutils import escape, quoteattr from xml.sax.saxutils import escape, quoteattr
try:
from html import unescape as u
except ImportError:
from HTMLParser import HTMLParser
u = HTMLParser().unescape
try:
from urllib.parse import parse_qs, urlparse
except ImportError:
from urlparse import parse_qs, urlparse
try:
from urllib.error import URLError
from urllib.request import urlopen, Request, build_opener
except Exception:
from urllib2 import urlopen, Request, build_opener, URLError
# }}} # }}}
USER_AGENT = 'calibre mirror' USER_AGENT = 'calibre mirror'
@ -44,15 +59,13 @@ INDEX = MR_URL + 'showpost.php?p=1362767&postcount=1'
# INDEX = 'file:///t/raw.html' # INDEX = 'file:///t/raw.html'
IndexEntry = namedtuple('IndexEntry', 'name url donate history uninstall deprecated thread_id') IndexEntry = namedtuple('IndexEntry', 'name url donate history uninstall deprecated thread_id')
u = HTMLParser.HTMLParser().unescape
socket.setdefaulttimeout(30) socket.setdefaulttimeout(30)
def read(url, get_info=False): # {{{ def read(url, get_info=False): # {{{
if url.startswith("file://"): if url.startswith("file://"):
return urllib2.urlopen(url).read() return urlopen(url).read()
opener = urllib2.build_opener() opener = build_opener()
opener.addheaders = [ opener.addheaders = [
('User-Agent', USER_AGENT), ('User-Agent', USER_AGENT),
('Accept-Encoding', 'gzip,deflate'), ('Accept-Encoding', 'gzip,deflate'),
@ -62,7 +75,7 @@ def read(url, get_info=False): # {{{
try: try:
res = opener.open(url) res = opener.open(url)
break break
except urllib2.URLError as e: except URLError as e:
if not isinstance(e.reason, socket.timeout) or i == 9: if not isinstance(e.reason, socket.timeout) or i == 9:
raise raise
time.sleep(random.randint(10, 45)) time.sleep(random.randint(10, 45))
@ -82,7 +95,7 @@ def read(url, get_info=False): # {{{
def url_to_plugin_id(url, deprecated): def url_to_plugin_id(url, deprecated):
query = urlparse.parse_qs(urlparse.urlparse(url).query) query = parse_qs(urlparse(url).query)
ans = (query['t'] if 't' in query else query['p'])[0] ans = (query['t'] if 't' in query else query['p'])[0]
if deprecated: if deprecated:
ans += '-deprecated' ans += '-deprecated'
@ -149,11 +162,13 @@ def convert_node(fields, x, names={}, import_data=None):
return x.s.decode('utf-8') if isinstance(x.s, bytes) else x.s return x.s.decode('utf-8') if isinstance(x.s, bytes) else x.s
elif name == 'Num': elif name == 'Num':
return x.n return x.n
elif name == 'Constant':
return x.value
elif name in {'Set', 'List', 'Tuple'}: elif name in {'Set', 'List', 'Tuple'}:
func = {'Set':set, 'List':list, 'Tuple':tuple}[name] func = {'Set':set, 'List':list, 'Tuple':tuple}[name]
return func(map(conv, x.elts)) return func(list(map(conv, x.elts)))
elif name == 'Dict': elif name == 'Dict':
keys, values = map(conv, x.keys), map(conv, x.values) keys, values = list(map(conv, x.keys)), list(map(conv, x.values))
return dict(zip(keys, values)) return dict(zip(keys, values))
elif name == 'Call': elif name == 'Call':
if len(x.args) != 1 and len(x.keywords) != 0: if len(x.args) != 1 and len(x.keywords) != 0:
@ -182,7 +197,7 @@ def get_import_data(name, mod, zf, names):
if mod in names: if mod in names:
raw = zf.open(names[mod]).read() raw = zf.open(names[mod]).read()
module = ast.parse(raw, filename='__init__.py') module = ast.parse(raw, filename='__init__.py')
top_level_assigments = filter(lambda x:x.__class__.__name__ == 'Assign', ast.iter_child_nodes(module)) top_level_assigments = [x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'Assign']
for node in top_level_assigments: for node in top_level_assigments:
targets = {getattr(t, 'id', None) for t in node.targets} targets = {getattr(t, 'id', None) for t in node.targets}
targets.discard(None) targets.discard(None)
@ -196,9 +211,9 @@ def get_import_data(name, mod, zf, names):
def parse_metadata(raw, namelist, zf): def parse_metadata(raw, namelist, zf):
module = ast.parse(raw, filename='__init__.py') module = ast.parse(raw, filename='__init__.py')
top_level_imports = filter(lambda x:x.__class__.__name__ == 'ImportFrom', ast.iter_child_nodes(module)) top_level_imports = [x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'ImportFrom']
top_level_classes = tuple(filter(lambda x:x.__class__.__name__ == 'ClassDef', ast.iter_child_nodes(module))) top_level_classes = tuple(x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'ClassDef')
top_level_assigments = filter(lambda x:x.__class__.__name__ == 'Assign', ast.iter_child_nodes(module)) top_level_assigments = [x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'Assign']
defaults = { defaults = {
'name':'', 'description':'', 'name':'', 'description':'',
'supported_platforms':['windows', 'osx', 'linux'], 'supported_platforms':['windows', 'osx', 'linux'],
@ -226,7 +241,7 @@ def parse_metadata(raw, namelist, zf):
plugin_import_found |= inames plugin_import_found |= inames
else: else:
all_imports.append((mod, [n.name for n in names])) all_imports.append((mod, [n.name for n in names]))
imported_names[n.asname or n.name] = mod imported_names[names[-1].asname or names[-1].name] = mod
if not plugin_import_found: if not plugin_import_found:
return all_imports return all_imports
@ -245,7 +260,7 @@ def parse_metadata(raw, namelist, zf):
names[x] = val names[x] = val
def parse_class(node): def parse_class(node):
class_assigments = filter(lambda x:x.__class__.__name__ == 'Assign', ast.iter_child_nodes(node)) class_assigments = [x for x in ast.iter_child_nodes(node) if x.__class__.__name__ == 'Assign']
found = {} found = {}
for node in class_assigments: for node in class_assigments:
targets = {getattr(t, 'id', None) for t in node.targets} targets = {getattr(t, 'id', None) for t in node.targets}
@ -337,7 +352,7 @@ def update_plugin_from_entry(plugin, entry):
def fetch_plugin(old_index, entry): def fetch_plugin(old_index, entry):
lm_map = {plugin['thread_id']:plugin for plugin in old_index.values()} lm_map = {plugin['thread_id']:plugin for plugin in old_index.values()}
raw = read(entry.url) raw = read(entry.url).decode('utf-8', 'replace')
url, name = parse_plugin_zip_url(raw) url, name = parse_plugin_zip_url(raw)
if url is None: if url is None:
raise ValueError('Failed to find zip file URL for entry: %s' % repr(entry)) raise ValueError('Failed to find zip file URL for entry: %s' % repr(entry))
@ -346,9 +361,9 @@ def fetch_plugin(old_index, entry):
if plugin is not None: if plugin is not None:
# Previously downloaded plugin # Previously downloaded plugin
lm = datetime(*tuple(map(int, re.split(r'\D', plugin['last_modified'])))[:6]) lm = datetime(*tuple(map(int, re.split(r'\D', plugin['last_modified'])))[:6])
request = urllib2.Request(url) request = Request(url)
request.get_method = lambda : 'HEAD' request.get_method = lambda : 'HEAD'
with closing(urllib2.urlopen(request)) as response: with closing(urlopen(request)) as response:
info = response.info() info = response.info()
slm = datetime(*parsedate(info.get('Last-Modified'))[:6]) slm = datetime(*parsedate(info.get('Last-Modified'))[:6])
if lm >= slm: if lm >= slm:
@ -413,7 +428,7 @@ def fetch_plugins(old_index):
src = plugin['file'] src = plugin['file']
plugin['file'] = src.partition('_')[-1] plugin['file'] = src.partition('_')[-1]
os.rename(src, plugin['file']) os.rename(src, plugin['file'])
raw = bz2.compress(json.dumps(ans, sort_keys=True, indent=4, separators=(',', ': '))) raw = bz2.compress(json.dumps(ans, sort_keys=True, indent=4, separators=(',', ': ')).encode('utf-8'))
atomic_write(raw, PLUGINS) atomic_write(raw, PLUGINS)
# Cleanup any extra .zip files # Cleanup any extra .zip files
all_plugin_files = {p['file'] for p in ans.values()} all_plugin_files = {p['file'] for p in ans.values()}
@ -503,7 +518,7 @@ h1 { text-align: center }
name, count = x name, count = x
return '<tr><td>%s</td><td>%s</td></tr>\n' % (escape(name), count) return '<tr><td>%s</td><td>%s</td></tr>\n' % (escape(name), count)
pstats = map(plugin_stats, sorted(stats.items(), reverse=True, key=lambda x:x[1])) pstats = list(map(plugin_stats, sorted(stats.items(), reverse=True, key=lambda x:x[1])))
stats = '''\ stats = '''\
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>