More delay load optimizations. Time taken to import all builtin plugins now reduced by 60% from before I started.

This commit is contained in:
Kovid Goyal 2012-02-06 14:01:40 +05:30
parent 12d81e629e
commit 9fb9e89e91
23 changed files with 459 additions and 379 deletions

View File

@ -1645,3 +1645,33 @@ plugins += [
# }}} # }}}
if __name__ == '__main__':
# Test load speed
import subprocess, textwrap
try:
subprocess.check_call(['python', '-c', textwrap.dedent(
'''
from __future__ import print_function
import time, sys, init_calibre
st = time.time()
import calibre.customize.builtins
t = time.time() - st
ret = 0
for x in ('lxml', 'calibre.ebooks.BeautifulSoup', 'uuid',
'calibre.utils.terminfo', 'calibre.utils.magick', 'PIL', 'Image',
'sqlite3', 'mechanize', 'httplib', 'xml'):
if x in sys.modules:
ret = 1
print (x, 'has been loaded by a plugin')
if ret:
print ('\\nA good way to trackdown what is loading something is to run'
' python -c "import init_calibre; import calibre.customize.builtins"')
print()
print ('Time taken to import all plugins: %.2f'%t)
sys.exit(ret)
''')])
except subprocess.CalledProcessError:
raise SystemExit(1)

View File

@ -5,7 +5,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from itertools import izip from itertools import izip
from xml.sax.saxutils import escape
from calibre.customize import Plugin as _Plugin from calibre.customize import Plugin as _Plugin
@ -268,6 +267,7 @@ class OutputProfile(Plugin):
@classmethod @classmethod
def tags_to_string(cls, tags): def tags_to_string(cls, tags):
from xml.sax.saxutils import escape
return escape(', '.join(tags)) return escape(', '.join(tags))
class iPadOutput(OutputProfile): class iPadOutput(OutputProfile):

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
import os import os
from contextlib import closing from contextlib import closing
import sqlite3 as sqlite
class Bookmark(): # {{{ class Bookmark(): # {{{
''' '''
@ -32,7 +31,7 @@ class Bookmark(): # {{{
def get_bookmark_data(self): def get_bookmark_data(self):
''' Return the timestamp and last_read_location ''' ''' Return the timestamp and last_read_location '''
import sqlite3 as sqlite
user_notes = {} user_notes = {}
self.timestamp = os.path.getmtime(self.path) self.timestamp = os.path.getmtime(self.path)
with closing(sqlite.connect(self.db_path)) as connection: with closing(sqlite.connect(self.db_path)) as connection:

View File

@ -6,7 +6,6 @@ __copyright__ = '2010, Timothy Legge <timlegge@gmail.com> and Kovid Goyal <kovid
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, time, calendar import os, time, calendar
import sqlite3 as sqlite
from contextlib import closing from contextlib import closing
from calibre.devices.usbms.books import BookList from calibre.devices.usbms.books import BookList
from calibre.devices.kobo.books import Book from calibre.devices.kobo.books import Book
@ -16,7 +15,6 @@ from calibre.devices.mime import mime_type_ext
from calibre.devices.usbms.driver import USBMS, debug_print from calibre.devices.usbms.driver import USBMS, debug_print
from calibre import prints from calibre import prints
from calibre.devices.usbms.books import CollectionsBookList from calibre.devices.usbms.books import CollectionsBookList
from calibre.utils.magick.draw import save_cover_data_to
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
class KOBO(USBMS): class KOBO(USBMS):
@ -230,6 +228,7 @@ class KOBO(USBMS):
traceback.print_exc() traceback.print_exc()
return changed return changed
import sqlite3 as sqlite
with closing(sqlite.connect( with closing(sqlite.connect(
self.normalize_path(self._main_prefix + self.normalize_path(self._main_prefix +
'.kobo/KoboReader.sqlite'))) as connection: '.kobo/KoboReader.sqlite'))) as connection:
@ -344,6 +343,7 @@ class KOBO(USBMS):
# 2) volume_shorcover # 2) volume_shorcover
# 2) content # 2) content
import sqlite3 as sqlite
debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType) debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
with closing(sqlite.connect(self.normalize_path(self._main_prefix + with closing(sqlite.connect(self.normalize_path(self._main_prefix +
'.kobo/KoboReader.sqlite'))) as connection: '.kobo/KoboReader.sqlite'))) as connection:
@ -739,6 +739,8 @@ class KOBO(USBMS):
# Needs to be outside books collection as in the case of removing # Needs to be outside books collection as in the case of removing
# the last book from the collection the list of books is empty # the last book from the collection the list of books is empty
# and the removal of the last book would not occur # and the removal of the last book would not occur
import sqlite3 as sqlite
with closing(sqlite.connect(self.normalize_path(self._main_prefix + with closing(sqlite.connect(self.normalize_path(self._main_prefix +
'.kobo/KoboReader.sqlite'))) as connection: '.kobo/KoboReader.sqlite'))) as connection:
@ -850,6 +852,7 @@ class KOBO(USBMS):
debug_print('FAILED to upload cover', filepath) debug_print('FAILED to upload cover', filepath)
def _upload_cover(self, path, filename, metadata, filepath, uploadgrayscale): def _upload_cover(self, path, filename, metadata, filepath, uploadgrayscale):
from calibre.utils.magick.draw import save_cover_data_to
if metadata.cover: if metadata.cover:
cover = self.normalize_path(metadata.cover.replace('/', os.sep)) cover = self.normalize_path(metadata.cover.replace('/', os.sep))
@ -859,6 +862,7 @@ class KOBO(USBMS):
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(filepath) ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(filepath)
ContentID = self.contentid_from_path(filepath, ContentType) ContentID = self.contentid_from_path(filepath, ContentType)
import sqlite3 as sqlite
with closing(sqlite.connect(self.normalize_path(self._main_prefix + with closing(sqlite.connect(self.normalize_path(self._main_prefix +
'.kobo/KoboReader.sqlite'))) as connection: '.kobo/KoboReader.sqlite'))) as connection:

View File

@ -12,8 +12,6 @@ Device driver for the SONY T1 devices
''' '''
import os, time, re import os, time, re
import sqlite3 as sqlite
from sqlite3 import DatabaseError
from contextlib import closing from contextlib import closing
from datetime import date from datetime import date
@ -146,6 +144,8 @@ class PRST1(USBMS):
return True return True
def books(self, oncard=None, end_session=True): def books(self, oncard=None, end_session=True):
import sqlite3 as sqlite
dummy_bl = BookList(None, None, None) dummy_bl = BookList(None, None, None)
if ( if (
@ -246,6 +246,8 @@ class PRST1(USBMS):
debug_print('PRST1: finished sync_booklists') debug_print('PRST1: finished sync_booklists')
def update_device_database(self, booklist, collections_attributes, oncard): def update_device_database(self, booklist, collections_attributes, oncard):
import sqlite3 as sqlite
debug_print('PRST1: starting update_device_database') debug_print('PRST1: starting update_device_database')
plugboard = None plugboard = None
@ -274,6 +276,8 @@ class PRST1(USBMS):
def update_device_books(self, connection, booklist, source_id, plugboard, def update_device_books(self, connection, booklist, source_id, plugboard,
dbpath): dbpath):
from sqlite3 import DatabaseError
opts = self.settings() opts = self.settings()
upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS] upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS] refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
@ -489,6 +493,8 @@ class PRST1(USBMS):
debug_print('PRS-T1: finished rebuild_collections') debug_print('PRS-T1: finished rebuild_collections')
def upload_cover(self, path, filename, metadata, filepath): def upload_cover(self, path, filename, metadata, filepath):
import sqlite3 as sqlite
debug_print('PRS-T1: uploading cover') debug_print('PRS-T1: uploading cover')
if filepath.startswith(self._main_prefix): if filepath.startswith(self._main_prefix):

View File

@ -8,7 +8,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, codecs import re, codecs
from chardet import detect
ENCODING_PATS = [ ENCODING_PATS = [
re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>', re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
@ -34,8 +33,13 @@ def substitute_entites(raw):
_CHARSET_ALIASES = { "macintosh" : "mac-roman", _CHARSET_ALIASES = { "macintosh" : "mac-roman",
"x-sjis" : "shift-jis" } "x-sjis" : "shift-jis" }
def detect(*args, **kwargs):
from chardet import detect
return detect(*args, **kwargs)
def force_encoding(raw, verbose, assume_utf8=False): def force_encoding(raw, verbose, assume_utf8=False):
from calibre.constants import preferred_encoding from calibre.constants import preferred_encoding
try: try:
chardet = detect(raw[:1024*50]) chardet = detect(raw[:1024*50])
except: except:

View File

@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
import os import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.constants import plugins
pdfreflow, pdfreflow_err = plugins['pdfreflow']
class PDFInput(InputFormatPlugin): class PDFInput(InputFormatPlugin):
@ -29,6 +27,9 @@ class PDFInput(InputFormatPlugin):
]) ])
def convert_new(self, stream, accelerators): def convert_new(self, stream, accelerators):
from calibre.constants import plugins
pdfreflow, pdfreflow_err = plugins['pdfreflow']
from calibre.ebooks.pdf.reflow import PDFDocument from calibre.ebooks.pdf.reflow import PDFDocument
from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.cleantext import clean_ascii_chars
if pdfreflow_err: if pdfreflow_err:

View File

@ -6,7 +6,6 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
from calibre.utils.date import parse_date, strptime
class Epubcheck(ePubFixer): class Epubcheck(ePubFixer):
@ -35,6 +34,8 @@ class Epubcheck(ePubFixer):
return 'epubcheck' return 'epubcheck'
def fix_pubdates(self): def fix_pubdates(self):
from calibre.utils.date import parse_date, strptime
dirtied = False dirtied = False
opf = self.container.opf opf = self.container.opf
for dcdate in opf.xpath('//dc:date', for dcdate in opf.xpath('//dc:date',

View File

@ -12,7 +12,6 @@ from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
from calibre.constants import filesystem_encoding, preferred_encoding from calibre.constants import filesystem_encoding, preferred_encoding
from calibre.library.field_metadata import FieldMetadata from calibre.library.field_metadata import FieldMetadata
from calibre.utils.date import parse_date, isoformat, UNDEFINED_DATE, local_tz from calibre.utils.date import parse_date, isoformat, UNDEFINED_DATE, local_tz
from calibre.utils.magick import Image
from calibre import isbytestring from calibre import isbytestring
# Translate datetimes to and from strings. The string form is the datetime in # Translate datetimes to and from strings. The string form is the datetime in
@ -37,6 +36,8 @@ def encode_thumbnail(thumbnail):
''' '''
Encode the image part of a thumbnail, then return the 3 part tuple Encode the image part of a thumbnail, then return the 3 part tuple
''' '''
from calibre.utils.magick import Image
if thumbnail is None: if thumbnail is None:
return None return None
if not isinstance(thumbnail, (tuple, list)): if not isinstance(thumbnail, (tuple, list)):

View File

@ -12,19 +12,14 @@ from urllib import urlencode
from threading import Thread from threading import Thread
from Queue import Queue, Empty from Queue import Queue, Empty
from lxml.html import tostring
from calibre import as_unicode from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase, from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
fixauthors) fixauthors)
from calibre.utils.cleantext import clean_ascii_chars
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.date import parse_date from calibre.utils.date import parse_date
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
from calibre.utils.soupparser import fromstring
class Worker(Thread): # Get details {{{ class Worker(Thread): # Get details {{{
@ -43,6 +38,8 @@ class Worker(Thread): # Get details {{{
self.browser = browser.clone_browser() self.browser = browser.clone_browser()
self.cover_url = self.amazon_id = self.isbn = None self.cover_url = self.amazon_id = self.isbn = None
self.domain = domain self.domain = domain
from lxml.html import tostring
self.tostring = tostring
months = { months = {
'de': { 'de': {
@ -176,6 +173,10 @@ class Worker(Thread): # Get details {{{
self.log.exception('get_details failed for url: %r'%self.url) self.log.exception('get_details failed for url: %r'%self.url)
def get_details(self): def get_details(self):
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.soupparser import fromstring
from calibre.ebooks.chardet import xml_to_unicode
try: try:
raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip() raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
except Exception as e: except Exception as e:
@ -210,7 +211,7 @@ class Worker(Thread): # Get details {{{
errmsg = root.xpath('//*[@id="errorMessage"]') errmsg = root.xpath('//*[@id="errorMessage"]')
if errmsg: if errmsg:
msg = 'Failed to parse amazon details page: %r'%self.url msg = 'Failed to parse amazon details page: %r'%self.url
msg += tostring(errmsg, method='text', encoding=unicode).strip() msg += self.tostring(errmsg, method='text', encoding=unicode).strip()
self.log.error(msg) self.log.error(msg)
return return
@ -322,10 +323,10 @@ class Worker(Thread): # Get details {{{
tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0] tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]') actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
if actual_title: if actual_title:
title = tostring(actual_title[0], encoding=unicode, title = self.tostring(actual_title[0], encoding=unicode,
method='text').strip() method='text').strip()
else: else:
title = tostring(tdiv, encoding=unicode, method='text').strip() title = self.tostring(tdiv, encoding=unicode, method='text').strip()
return re.sub(r'[(\[].*[)\]]', '', title).strip() return re.sub(r'[(\[].*[)\]]', '', title).strip()
def parse_authors(self, root): def parse_authors(self, root):
@ -337,7 +338,7 @@ class Worker(Thread): # Get details {{{
''') ''')
for x in aname: for x in aname:
x.tail = '' x.tail = ''
authors = [tostring(x, encoding=unicode, method='text').strip() for x authors = [self.tostring(x, encoding=unicode, method='text').strip() for x
in aname] in aname]
authors = [a for a in authors if a] authors = [a for a in authors if a]
return authors return authors
@ -356,6 +357,8 @@ class Worker(Thread): # Get details {{{
return float(m.group(1))/float(m.group(3)) * 5 return float(m.group(1))/float(m.group(3)) * 5
def parse_comments(self, root): def parse_comments(self, root):
from calibre.library.comments import sanitize_comments_html
desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]') desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
if desc: if desc:
desc = desc[0] desc = desc[0]
@ -365,7 +368,7 @@ class Worker(Thread): # Get details {{{
for a in desc.xpath('descendant::a[@href]'): for a in desc.xpath('descendant::a[@href]'):
del a.attrib['href'] del a.attrib['href']
a.tag = 'span' a.tag = 'span'
desc = tostring(desc, method='html', encoding=unicode).strip() desc = self.tostring(desc, method='html', encoding=unicode).strip()
# Encoding bug in Amazon data U+fffd (replacement char) # Encoding bug in Amazon data U+fffd (replacement char)
# in some examples it is present in place of ' # in some examples it is present in place of '
@ -602,6 +605,11 @@ class Amazon(Source):
Note this method will retry without identifiers automatically if no Note this method will retry without identifiers automatically if no
match is found with identifiers. match is found with identifiers.
''' '''
from lxml.html import tostring
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.soupparser import fromstring
from calibre.ebooks.chardet import xml_to_unicode
query, domain = self.create_query(log, title=title, authors=authors, query, domain = self.create_query(log, title=title, authors=authors,
identifiers=identifiers) identifiers=identifiers)
if query is None: if query is None:

View File

@ -12,7 +12,6 @@ from future_builtins import map
from calibre import browser, random_user_agent from calibre import browser, random_user_agent
from calibre.customize import Plugin from calibre.customize import Plugin
from calibre.utils.logging import ThreadSafeLog, FileStream
from calibre.utils.config import JSONConfig from calibre.utils.config import JSONConfig
from calibre.utils.titlecase import titlecase from calibre.utils.titlecase import titlecase
from calibre.utils.icu import capitalize, lower, upper from calibre.utils.icu import capitalize, lower, upper
@ -34,6 +33,7 @@ msprefs.defaults['fewer_tags'] = True
msprefs.defaults['cover_priorities'] = {'Google':2} msprefs.defaults['cover_priorities'] = {'Google':2}
def create_log(ostream=None): def create_log(ostream=None):
from calibre.utils.logging import ThreadSafeLog, FileStream
log = ThreadSafeLog(level=ThreadSafeLog.DEBUG) log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
log.outputs = [FileStream(ostream)] log.outputs = [FileStream(ostream)]
return log return log

View File

@ -12,14 +12,10 @@ from urllib import urlencode
from functools import partial from functools import partial
from Queue import Queue, Empty from Queue import Queue, Empty
from lxml import etree
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars
from calibre import as_unicode from calibre import as_unicode
NAMESPACES = { NAMESPACES = {
@ -28,22 +24,6 @@ NAMESPACES = {
'db': 'http://www.douban.com/xmlns/', 'db': 'http://www.douban.com/xmlns/',
'gd': 'http://schemas.google.com/g/2005' 'gd': 'http://schemas.google.com/g/2005'
} }
XPath = partial(etree.XPath, namespaces=NAMESPACES)
total_results = XPath('//openSearch:totalResults')
start_index = XPath('//openSearch:startIndex')
items_per_page = XPath('//openSearch:itemsPerPage')
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
title = XPath('descendant::atom:title')
description = XPath('descendant::atom:summary')
publisher = XPath("descendant::db:attribute[@name='publisher']")
isbn = XPath("descendant::db:attribute[@name='isbn13']")
date = XPath("descendant::db:attribute[@name='pubdate']")
creator = XPath("descendant::db:attribute[@name='author']")
booktag = XPath("descendant::db:tag/attribute::name")
rating = XPath("descendant::gd:rating/attribute::average")
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
def get_details(browser, url, timeout): # {{{ def get_details(browser, url, timeout): # {{{
try: try:
if Douban.DOUBAN_API_KEY and Douban.DOUBAN_API_KEY != '': if Douban.DOUBAN_API_KEY and Douban.DOUBAN_API_KEY != '':
@ -61,6 +41,25 @@ def get_details(browser, url, timeout): # {{{
# }}} # }}}
def to_metadata(browser, log, entry_, timeout): # {{{ def to_metadata(browser, log, entry_, timeout): # {{{
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars
XPath = partial(etree.XPath, namespaces=NAMESPACES)
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
title = XPath('descendant::atom:title')
description = XPath('descendant::atom:summary')
publisher = XPath("descendant::db:attribute[@name='publisher']")
isbn = XPath("descendant::db:attribute[@name='isbn13']")
date = XPath("descendant::db:attribute[@name='pubdate']")
creator = XPath("descendant::db:attribute[@name='author']")
booktag = XPath("descendant::db:tag/attribute::name")
rating = XPath("descendant::gd:rating/attribute::average")
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
def get_text(extra, x): def get_text(extra, x):
try: try:
ans = x(extra) ans = x(extra)
@ -275,6 +274,7 @@ class Douban(Source):
def get_all_details(self, br, log, entries, abort, # {{{ def get_all_details(self, br, log, entries, abort, # {{{
result_queue, timeout): result_queue, timeout):
from lxml import etree
for relevance, i in enumerate(entries): for relevance, i in enumerate(entries):
try: try:
ans = to_metadata(br, log, i, timeout) ans = to_metadata(br, log, i, timeout)
@ -298,6 +298,13 @@ class Douban(Source):
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=30): identifiers={}, timeout=30):
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.cleantext import clean_ascii_chars
XPath = partial(etree.XPath, namespaces=NAMESPACES)
entry = XPath('//atom:entry')
query = self.create_query(log, title=title, authors=authors, query = self.create_query(log, title=title, authors=authors,
identifiers=identifiers) identifiers=identifiers)
if not query: if not query:

View File

@ -12,8 +12,6 @@ from urllib import urlencode
from functools import partial from functools import partial
from Queue import Queue, Empty from Queue import Queue, Empty
from lxml import etree
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
@ -29,23 +27,6 @@ NAMESPACES = {
'dc' : 'http://purl.org/dc/terms', 'dc' : 'http://purl.org/dc/terms',
'gd' : 'http://schemas.google.com/g/2005' 'gd' : 'http://schemas.google.com/g/2005'
} }
XPath = partial(etree.XPath, namespaces=NAMESPACES)
total_results = XPath('//openSearch:totalResults')
start_index = XPath('//openSearch:startIndex')
items_per_page = XPath('//openSearch:itemsPerPage')
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
creator = XPath('descendant::dc:creator')
identifier = XPath('descendant::dc:identifier')
title = XPath('descendant::dc:title')
date = XPath('descendant::dc:date')
publisher = XPath('descendant::dc:publisher')
subject = XPath('descendant::dc:subject')
description = XPath('descendant::dc:description')
language = XPath('descendant::dc:language')
rating = XPath('descendant::gd:rating[@average]')
def get_details(browser, url, timeout): # {{{ def get_details(browser, url, timeout): # {{{
try: try:
raw = browser.open_novisit(url, timeout=timeout).read() raw = browser.open_novisit(url, timeout=timeout).read()
@ -61,6 +42,24 @@ def get_details(browser, url, timeout): # {{{
# }}} # }}}
def to_metadata(browser, log, entry_, timeout): # {{{ def to_metadata(browser, log, entry_, timeout): # {{{
from lxml import etree
XPath = partial(etree.XPath, namespaces=NAMESPACES)
# total_results = XPath('//openSearch:totalResults')
# start_index = XPath('//openSearch:startIndex')
# items_per_page = XPath('//openSearch:itemsPerPage')
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
creator = XPath('descendant::dc:creator')
identifier = XPath('descendant::dc:identifier')
title = XPath('descendant::dc:title')
date = XPath('descendant::dc:date')
publisher = XPath('descendant::dc:publisher')
subject = XPath('descendant::dc:subject')
description = XPath('descendant::dc:description')
language = XPath('descendant::dc:language')
rating = XPath('descendant::gd:rating[@average]')
def get_text(extra, x): def get_text(extra, x):
try: try:
@ -266,6 +265,7 @@ class GoogleBooks(Source):
def get_all_details(self, br, log, entries, abort, # {{{ def get_all_details(self, br, log, entries, abort, # {{{
result_queue, timeout): result_queue, timeout):
from lxml import etree
for relevance, i in enumerate(entries): for relevance, i in enumerate(entries):
try: try:
ans = to_metadata(br, log, i, timeout) ans = to_metadata(br, log, i, timeout)
@ -289,6 +289,10 @@ class GoogleBooks(Source):
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=30): identifiers={}, timeout=30):
from lxml import etree
XPath = partial(etree.XPath, namespaces=NAMESPACES)
entry = XPath('//atom:entry')
query = self.create_query(log, title=title, authors=authors, query = self.create_query(log, title=title, authors=authors,
identifiers=identifiers) identifiers=identifiers)
if not query: if not query:

View File

@ -9,12 +9,9 @@ __docformat__ = 'restructuredtext en'
from urllib import quote from urllib import quote
from lxml import etree
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source, Option from calibre.ebooks.metadata.sources.base import Source, Option
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.icu import lower from calibre.utils.icu import lower
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
@ -122,6 +119,7 @@ class ISBNDB(Source):
result_queue.put(result) result_queue.put(result)
def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers): def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers):
from lxml import etree
def tostring(x): def tostring(x):
if x is None: if x is None:
@ -198,6 +196,10 @@ class ISBNDB(Source):
def make_query(self, q, abort, title=None, authors=None, identifiers={}, def make_query(self, q, abort, title=None, authors=None, identifiers={},
max_pages=10, timeout=30): max_pages=10, timeout=30):
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.cleantext import clean_ascii_chars
page_num = 1 page_num = 1
parser = etree.XMLParser(recover=True, no_network=True) parser = etree.XMLParser(recover=True, no_network=True)
br = self.browser br = self.browser

View File

@ -9,18 +9,14 @@ __docformat__ = 'restructuredtext en'
''' '''
Fetch metadata using Overdrive Content Reserve Fetch metadata using Overdrive Content Reserve
''' '''
import re, random, mechanize, copy, json import re, random, copy, json
from threading import RLock from threading import RLock
from Queue import Queue, Empty from Queue import Queue, Empty
from lxml import html
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source, Option from calibre.ebooks.metadata.sources.base import Source, Option
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode
from calibre.library.comments import sanitize_comments_html
from calibre.utils.soupparser import fromstring
ovrdrv_data_cache = {} ovrdrv_data_cache = {}
cache_lock = RLock() cache_lock = RLock()
@ -80,6 +76,7 @@ class OverDrive(Source):
def download_cover(self, log, result_queue, abort, # {{{ def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30): title=None, authors=None, identifiers={}, timeout=30):
import mechanize
cached_url = self.get_cached_cover_url(identifiers) cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None: if cached_url is None:
log.info('No cached cover found, running identify') log.info('No cached cover found, running identify')
@ -170,6 +167,7 @@ class OverDrive(Source):
this page attempts to set a cookie that Mechanize doesn't like this page attempts to set a cookie that Mechanize doesn't like
copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
''' '''
import mechanize
goodcookies = br._ua_handlers['_cookies'].cookiejar goodcookies = br._ua_handlers['_cookies'].cookiejar
clean_cj = mechanize.CookieJar() clean_cj = mechanize.CookieJar()
cookies_to_copy = [] cookies_to_copy = []
@ -187,6 +185,7 @@ class OverDrive(Source):
br.set_cookiejar(clean_cj) br.set_cookiejar(clean_cj)
def overdrive_search(self, br, log, q, title, author): def overdrive_search(self, br, log, q, title, author):
import mechanize
# re-initialize the cookiejar to so that it's clean # re-initialize the cookiejar to so that it's clean
clean_cj = mechanize.CookieJar() clean_cj = mechanize.CookieJar()
br.set_cookiejar(clean_cj) br.set_cookiejar(clean_cj)
@ -303,6 +302,7 @@ class OverDrive(Source):
return '' return ''
def overdrive_get_record(self, br, log, q, ovrdrv_id): def overdrive_get_record(self, br, log, q, ovrdrv_id):
import mechanize
search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}' search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc' results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
@ -393,6 +393,11 @@ class OverDrive(Source):
def get_book_detail(self, br, metadata_url, mi, ovrdrv_id, log): def get_book_detail(self, br, metadata_url, mi, ovrdrv_id, log):
from lxml import html
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.soupparser import fromstring
from calibre.library.comments import sanitize_comments_html
try: try:
raw = br.open_novisit(metadata_url).read() raw = br.open_novisit(metadata_url).read()
except Exception, e: except Exception, e:

View File

@ -6,15 +6,11 @@ __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re import re
import urllib2
import datetime import datetime
from urllib import quote_plus from urllib import quote_plus
from Queue import Queue, Empty from Queue import Queue, Empty
from lxml import etree, html
from calibre import as_unicode from calibre import as_unicode
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
@ -43,6 +39,7 @@ class Ozon(Source):
isbnRegex = re.compile(isbnPattern) isbnRegex = re.compile(isbnPattern)
def get_book_url(self, identifiers): # {{{ def get_book_url(self, identifiers): # {{{
import urllib2
ozon_id = identifiers.get('ozon', None) ozon_id = identifiers.get('ozon', None)
res = None res = None
if ozon_id: if ozon_id:
@ -81,6 +78,9 @@ class Ozon(Source):
def identify(self, log, result_queue, abort, title=None, authors=None, def identify(self, log, result_queue, abort, title=None, authors=None,
identifiers={}, timeout=30): # {{{ identifiers={}, timeout=30): # {{{
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
if not self.is_configured(): if not self.is_configured():
return return
query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
@ -283,6 +283,9 @@ class Ozon(Source):
# }}} # }}}
def get_book_details(self, log, metadata, timeout): # {{{ def get_book_details(self, log, metadata, timeout): # {{{
from lxml import html, etree
from calibre.ebooks.chardet import xml_to_unicode
url = self.get_book_url(metadata.get_identifiers())[2] url = self.get_book_url(metadata.get_identifiers())[2]
raw = self.browser.open_novisit(url, timeout=timeout).read() raw = self.browser.open_novisit(url, timeout=timeout).read()

View File

@ -12,7 +12,6 @@ from types import StringType, UnicodeType
from calibre import (strftime) from calibre import (strftime)
from calibre.customize import CatalogPlugin from calibre.customize import CatalogPlugin
from calibre.library.catalogs import FIELDS, TEMPLATE_ALLOWED_FIELDS from calibre.library.catalogs import FIELDS, TEMPLATE_ALLOWED_FIELDS
from calibre.utils.logging import default_log as log
from calibre.customize.conversion import DummyReporter from calibre.customize.conversion import DummyReporter
from calibre.constants import preferred_encoding from calibre.constants import preferred_encoding
@ -113,6 +112,7 @@ class BIBTEX(CatalogPlugin):
from calibre.utils.bibtex import BibTeX from calibre.utils.bibtex import BibTeX
from calibre.library.save_to_disk import preprocess_template from calibre.library.save_to_disk import preprocess_template
from calibre.utils.date import now as nowf from calibre.utils.date import now as nowf
from calibre.utils.logging import default_log as log
def create_bibtex_entry(entry, fields, mode, template_citation, def create_bibtex_entry(entry, fields, mode, template_citation,
bibtexdict, db, citation_bibtex=True, calibre_files=True): bibtexdict, db, citation_bibtex=True, calibre_files=True):

View File

@ -10,7 +10,6 @@ from collections import namedtuple
from calibre.customize import CatalogPlugin from calibre.customize import CatalogPlugin
from calibre.library.catalogs import FIELDS from calibre.library.catalogs import FIELDS
from calibre.utils.logging import default_log as log
from calibre.customize.conversion import DummyReporter from calibre.customize.conversion import DummyReporter
class CSV_XML(CatalogPlugin): class CSV_XML(CatalogPlugin):
@ -52,6 +51,7 @@ class CSV_XML(CatalogPlugin):
from calibre.utils.date import isoformat from calibre.utils.date import isoformat
from calibre.utils.html2text import html2text from calibre.utils.html2text import html2text
from lxml import etree from lxml import etree
from calibre.utils.logging import default_log as log
self.fmt = path_to_output.rpartition('.')[2] self.fmt = path_to_output.rpartition('.')[2]
self.notification = notification self.notification = notification

View File

@ -14,7 +14,6 @@ from calibre import strftime
from calibre.constants import DEBUG from calibre.constants import DEBUG
from calibre.customize import CatalogPlugin from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.utils.logging import default_log as log
Option = namedtuple('Option', 'option, default, dest, action, help') Option = namedtuple('Option', 'option, default, dest, action, help')
@ -161,6 +160,8 @@ class EPUB_MOBI(CatalogPlugin):
def run(self, path_to_output, opts, db, notification=DummyReporter()): def run(self, path_to_output, opts, db, notification=DummyReporter()):
from calibre.library.catalogs.epub_mobi_builder import CatalogBuilder from calibre.library.catalogs.epub_mobi_builder import CatalogBuilder
from calibre.utils.logging import default_log as log
opts.log = log opts.log = log
opts.fmt = self.fmt = path_to_output.rpartition('.')[2] opts.fmt = self.fmt = path_to_output.rpartition('.')[2]

View File

@ -10,7 +10,6 @@ import re
from datetime import datetime, time from datetime import datetime, time
from functools import partial from functools import partial
from dateutil.parser import parse
from dateutil.tz import tzlocal, tzutc from dateutil.tz import tzlocal, tzutc
from calibre import strftime from calibre import strftime
@ -71,6 +70,7 @@ def parse_date(date_string, assume_utc=False, as_utc=True, default=None):
:param default: Missing fields are filled in from default. If None, the :param default: Missing fields are filled in from default. If None, the
current date is used. current date is used.
''' '''
from dateutil.parser import parse
if not date_string: if not date_string:
return UNDEFINED_DATE return UNDEFINED_DATE
if default is None: if default is None: