py3: Port urllib in metadata sources

This commit is contained in:
Kovid Goyal 2019-04-01 15:30:44 +05:30
parent 96f4c4c3a2
commit e0205790b9
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
9 changed files with 43 additions and 14 deletions

View File

@ -12,7 +12,10 @@ try:
except ImportError: except ImportError:
from Queue import Empty, Queue from Queue import Empty, Queue
from threading import Thread from threading import Thread
from urlparse import urlparse try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
from calibre import as_unicode, browser, random_user_agent from calibre import as_unicode, browser, random_user_agent
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
@ -579,7 +582,10 @@ class Worker(Thread): # Get details {{{
return sanitize_comments_html(desc) return sanitize_comments_html(desc)
def parse_comments(self, root, raw): def parse_comments(self, root, raw):
from urllib import unquote try:
from urllib.parse import unquote
except ImportError:
from urllib import unquote
ans = '' ans = ''
ns = tuple(self.selector('#bookDescription_feature_div noscript')) ns = tuple(self.selector('#bookDescription_feature_div noscript'))
if ns: if ns:
@ -1048,7 +1054,10 @@ class Amazon(Source):
def create_query(self, log, title=None, authors=None, identifiers={}, # {{{ def create_query(self, log, title=None, authors=None, identifiers={}, # {{{
domain=None, for_amazon=True): domain=None, for_amazon=True):
from urllib import urlencode try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
if domain is None: if domain is None:
domain = self.domain domain = self.domain

View File

@ -11,7 +11,10 @@ from calibre.ebooks.metadata.sources.base import Source, Option
def get_urls(br, tokens): def get_urls(br, tokens):
from urllib import quote_plus try:
from urllib.parse import quote_plus
except ImportError:
from urllib import quote_plus
from mechanize import Request from mechanize import Request
from lxml import html from lxml import html
escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()] escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]

View File

@ -178,7 +178,10 @@ class Douban(Source):
# }}} # }}}
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{ def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
from urllib import urlencode try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
SEARCH_URL = 'https://api.douban.com/book/subjects?' SEARCH_URL = 'https://api.douban.com/book/subjects?'
ISBN_URL = 'https://api.douban.com/book/subject/isbn/' ISBN_URL = 'https://api.douban.com/book/subject/isbn/'
SUBJECT_URL = 'https://api.douban.com/book/subject/' SUBJECT_URL = 'https://api.douban.com/book/subject/'

View File

@ -234,7 +234,10 @@ class Edelweiss(Source):
# }}} # }}}
def create_query(self, log, title=None, authors=None, identifiers={}): def create_query(self, log, title=None, authors=None, identifiers={}):
from urllib import urlencode try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
import time import time
BASE_URL = ('https://www.edelweiss.plus/GetTreelineControl.aspx?' BASE_URL = ('https://www.edelweiss.plus/GetTreelineControl.aspx?'
'controlName=/uc/listviews/controls/ListView_data.ascx&itemID=0&resultType=32&dashboardType=8&itemType=1&dataType=products&keywordSearch&') 'controlName=/uc/listviews/controls/ListView_data.ascx&itemID=0&resultType=32&dashboardType=8&itemType=1&dataType=products&keywordSearch&')

View File

@ -199,7 +199,10 @@ class GoogleBooks(Source):
# }}} # }}}
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{ def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
from urllib import urlencode try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
BASE_URL = 'https://books.google.com/books/feeds/volumes?' BASE_URL = 'https://books.google.com/books/feeds/volumes?'
isbn = check_isbn(identifiers.get('isbn', None)) isbn = check_isbn(identifiers.get('isbn', None))
q = '' q = ''

View File

@ -65,7 +65,10 @@ class GoogleImages(Source):
def get_image_urls(self, title, author, log, abort, timeout): def get_image_urls(self, title, author, log, abort, timeout):
from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.cleantext import clean_ascii_chars
from urllib import urlencode try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
import json import json
from collections import OrderedDict from collections import OrderedDict
ans = OrderedDict() ans = OrderedDict()

View File

@ -12,8 +12,7 @@ from datetime import datetime
from threading import Thread from threading import Thread
from io import BytesIO from io import BytesIO
from operator import attrgetter from operator import attrgetter
from urlparse import urlparse from polyglot.urllib import urlparse, quote
from urllib import quote
from calibre.customize.ui import metadata_plugins, all_metadata_plugins from calibre.customize.ui import metadata_plugins, all_metadata_plugins
from calibre.ebooks.metadata import check_issn, authors_to_sort_string from calibre.ebooks.metadata import check_issn, authors_to_sort_string

View File

@ -57,12 +57,15 @@ class Ozon(Source):
) )
def get_book_url(self, identifiers): # {{{ def get_book_url(self, identifiers): # {{{
import urllib2 try:
from urllib.parse import quote
except ImportError:
from urllib import quote
ozon_id = identifiers.get('ozon', None) ozon_id = identifiers.get('ozon', None)
res = None res = None
if ozon_id: if ozon_id:
# no affiliateId is used in search/detail # no affiliateId is used in search/detail
url = '{}/context/detail/id/{}'.format(self.ozon_url, urllib2.quote(ozon_id), _get_affiliateId()) url = '{}/context/detail/id/{}'.format(self.ozon_url, quote(ozon_id), _get_affiliateId())
res = ('ozon', ozon_id, url) res = ('ozon', ozon_id, url)
return res return res

View File

@ -8,8 +8,11 @@ import json
import re import re
import time import time
from collections import defaultdict, namedtuple from collections import defaultdict, namedtuple
from urllib import quote_plus, urlencode try:
from urlparse import parse_qs from urllib.parse import parse_qs, quote_plus, urlencode
except ImportError:
from urlparse import parse_qs
from urllib import quote_plus, urlencode
from lxml import etree from lxml import etree