From 50ea39227b25ee58bd700b8d1d17e399777a7770 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Mon, 19 Jan 2009 22:57:18 -0500 Subject: [PATCH 01/71] Use lxml to handle HTML entities and -specified encodings --- src/calibre/ebooks/oeb/base.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index ac6fb1e5dd..85d87cf425 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -15,10 +15,10 @@ from urlparse import urldefrag, urlparse, urlunparse from urllib import unquote as urlunquote import logging import re -import htmlentitydefs import uuid import copy from lxml import etree +from lxml import html from calibre import LoggingInterface XML_PARSER = etree.XMLParser(recover=True) @@ -67,14 +67,6 @@ OEB_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME, SVG_MIME]) MS_COVER_TYPE = 'other.ms-coverimage-standard' -recode = lambda s: s.decode('iso-8859-1').encode('ascii', 'xmlcharrefreplace') -ENTITYDEFS = dict((k, recode(v)) for k, v in htmlentitydefs.entitydefs.items()) -del ENTITYDEFS['lt'] -del ENTITYDEFS['gt'] -del ENTITYDEFS['quot'] -del ENTITYDEFS['amp'] -del recode - def element(parent, *args, **kwargs): if parent is not None: @@ -298,7 +290,6 @@ class Metadata(object): class Manifest(object): class Item(object): - ENTITY_RE = re.compile(r'&([a-zA-Z_:][a-zA-Z0-9.-_:]+);') NUM_RE = re.compile('^(.*)([0-9][0-9.]*)(?=[.]|$)') def __init__(self, id, href, media_type, @@ -317,9 +308,12 @@ class Manifest(object): % (self.id, self.href, self.media_type) def _force_xhtml(self, data): - repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0)) - data = self.ENTITY_RE.sub(repl, data) - data = etree.fromstring(data, parser=XML_PARSER) + try: + data = etree.fromstring(data, parser=XML_PARSER) + except etree.XMLSyntaxError: + data = html.fromstring(data, parser=XML_PARSER) + data = etree.tostring(data, encoding=unicode) + data = etree.fromstring(data, parser=XML_PARSER) if namespace(data.tag) != XHTML_NS: data.attrib['xmlns'] = XHTML_NS data = etree.tostring(data) From 2857ff00fb0dd9da2dfdd4eaab72e15320e4b31e Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Mon, 19 Jan 2009 22:58:27 -0500 Subject: [PATCH 02/71] Fix validity bug in generated OEBBook-generated NCX --- src/calibre/ebooks/oeb/base.py | 35 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 85d87cf425..c42d363291 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -675,22 +675,22 @@ class TOC(object): node.to_opf1(tour) return tour - def to_ncx(self, parent, playorder=None, depth=1): - if not playorder: playorder = [0] + def to_ncx(self, parent, order=None, depth=1): + if not order: order = [0] for node in self.nodes: - playorder[0] += 1 + order[0] += 1 + playOrder = str(order[0]) + id = self.id or 'np' + playOrder point = etree.SubElement(parent, - NCX('navPoint'), attrib={'playOrder': str(playorder[0])}) + NCX('navPoint'), id=id, playOrder=playOrder) if self.klass: point.attrib['class'] = node.klass - if self.id: - point.attrib['id'] = node.id label = etree.SubElement(point, NCX('navLabel')) etree.SubElement(label, NCX('text')).text = node.title href = node.href if depth > 1 else urldefrag(node.href)[0] child = etree.SubElement(point, NCX('content'), attrib={'src': href}) - node.to_ncx(point, playorder, depth+1) + node.to_ncx(point, order, depth+1) return parent @@ -986,22 +986,11 @@ class OEBBook(object): guide = self.guide.to_opf1(package) return {OPF_MIME: ('content.opf', package)} - def _generate_ncx_item(self): - id = 'ncx' - index = 0 - while id in self.manifest: - id = 'ncx' + str(index) - index = index + 1 - href = 'toc' - index = 0 - while (href + '.ncx') in self.manifest.hrefs: - href = 'toc' + str(index) - href += '.ncx' - return (id, href) - def _to_ncx(self): - ncx = etree.Element(NCX('ncx'), attrib={'version': '2005-1'}, - nsmap={None: NCX_NS}) + lang = unicode(self.metadata.language[0]) + ncx = etree.Element(NCX('ncx'), + attrib={'version': '2005-1', XML('lang'): lang}, + nsmap={None: NCX_NS}) head = etree.SubElement(ncx, NCX('head')) etree.SubElement(head, NCX('meta'), attrib={'name': 'dtb:uid', 'content': unicode(self.uid)}) @@ -1024,7 +1013,7 @@ class OEBBook(object): nsmap={None: OPF2_NS}) metadata = self.metadata.to_opf2(package) manifest = self.manifest.to_opf2(package) - id, href = self._generate_ncx_item() + id, href = self.manifest.generate('ncx', 'toc.ncx') etree.SubElement(manifest, OPF('item'), attrib={'id': id, 'href': href, 'media-type': NCX_MIME}) spine = self.spine.to_opf2(package) From f34034884ac442762c481aff1e52c4a0394ab542 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Tue, 20 Jan 2009 00:32:49 -0500 Subject: [PATCH 03/71] Dynamic translations!: - Dynamic access to any of the extant translation sets - Integration into OEBBook to provide book-language translations - Integration with HTML TOC to provide language-appropriate title - oeb2mobi/any2mobi option to provide HTML TOC from the command-line --- src/calibre/ebooks/mobi/writer.py | 5 +++- src/calibre/ebooks/oeb/base.py | 6 +++++ src/calibre/ebooks/oeb/transforms/htmltoc.py | 10 +++++--- src/calibre/translations/dynamic.py | 27 ++++++++++++++++++++ 4 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 src/calibre/translations/dynamic.py diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 306f097c90..3be283fa0a 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -515,6 +515,9 @@ def add_mobi_options(parser): group.add_option( '-r', '--rescale-images', default=False, action='store_true', help=_('Modify images to meet Palm device size limitations.')) + group.add_option( + '--toc-title', default=None, action='store', + help=_('Title for any generated in-line table of contents.')) parser.add_option_group(group) group = OptionGroup(parser, _('Profiles'), _('Device renderer profiles. ' 'Affects conversion of default font sizes and rasterization ' @@ -558,7 +561,7 @@ def oeb2mobi(opts, inpath): imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None context = Context(source, dest) oeb = OEBBook(inpath, logger=logger) - tocadder = HTMLTOCAdder() + tocadder = HTMLTOCAdder(title=opts.toc_title) tocadder.transform(oeb, context) mangler = CaseMangler() mangler.transform(oeb, context) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index c42d363291..c167151a5f 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -20,6 +20,7 @@ import copy from lxml import etree from lxml import html from calibre import LoggingInterface +from calibre.translations.dynamic import translate XML_PARSER = etree.XMLParser(recover=True) XML_NS = 'http://www.w3.org/XML/1998/namespace' @@ -973,6 +974,11 @@ class OEBBook(object): self._toc_from_opf(opf) self._ensure_cover_image() + def translate(self, text): + lang = str(self.metadata.language[0]) + lang = lang.split('-', 1)[0].lower() + return translate(lang, text) + def to_opf1(self): package = etree.Element('package', attrib={'unique-identifier': self.uid.id}) diff --git a/src/calibre/ebooks/oeb/transforms/htmltoc.py b/src/calibre/ebooks/oeb/transforms/htmltoc.py index 9eaa04d41d..7da7df17e9 100644 --- a/src/calibre/ebooks/oeb/transforms/htmltoc.py +++ b/src/calibre/ebooks/oeb/transforms/htmltoc.py @@ -44,13 +44,15 @@ body > .calibre_toc_block { } class HTMLTOCAdder(object): - def __init__(self, style='nested'): + def __init__(self, title=None, style='nested'): + self.title = title self.style = style def transform(self, oeb, context): if 'toc' in oeb.guide: return oeb.logger.info('Generating in-line TOC...') + title = self.title or oeb.translate('Table of Contents') style = self.style if style not in STYLE_CSS: oeb.logger.error('Unknown TOC style %r' % style) @@ -61,15 +63,15 @@ class HTMLTOCAdder(object): contents = element(None, XHTML('html'), nsmap={None: XHTML_NS}, attrib={XML('lang'): language}) head = element(contents, XHTML('head')) - title = element(head, XHTML('title')) - title.text = 'Table of Contents' + htitle = element(head, XHTML('title')) + htitle.text = title element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME, href=css_href) body = element(contents, XHTML('body'), attrib={'class': 'calibre_toc'}) h1 = element(body, XHTML('h1'), attrib={'class': 'calibre_toc_header'}) - h1.text = 'Table of Contents' + h1.text = title self.add_toc_level(body, oeb.toc) id, href = oeb.manifest.generate('contents', 'contents.xhtml') item = oeb.manifest.add(id, href, XHTML_MIME, data=contents) diff --git a/src/calibre/translations/dynamic.py b/src/calibre/translations/dynamic.py new file mode 100644 index 0000000000..1c9f53e960 --- /dev/null +++ b/src/calibre/translations/dynamic.py @@ -0,0 +1,27 @@ +''' +Dynamic language lookup of translations for user-visible strings. +''' + +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +import sys +from cStringIO import StringIO +from gettext import GNUTranslations, NullTranslations +from calibre.translations.compiled import translations + +__all__ = ['translate'] + +_CACHE = {} + +def translate(lang, text): + trans = None + if lang in _CACHE: + trans = _CACHE[lang] + elif lang in translations: + buf = StringIO(translations[lang]) + trans = GNUTranslations(buf) + _CACHE[lang] = trans + if trans is None: + return _(text) + return trans.ugettext(text) From 6a4933c45330a307c9863a1ebb134946c0e20cd8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 19 Jan 2009 22:16:34 -0800 Subject: [PATCH 04/71] Fix #1626 (bad font size on "The New Yorker" recipe) --- src/calibre/web/feeds/news.py | 10 +++-- .../web/feeds/recipes/recipe_new_yorker.py | 8 ++++ src/calibre/web/feeds/templates.py | 38 +++++++++++++------ 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 85ed39a16d..6da6383210 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -532,7 +532,9 @@ class BasicNewsRecipe(object, LoggingInterface): if body is not None: templ = self.navbar.generate(False, f, a, feed_len, not self.has_single_feed, - url, __appname__, center=self.center_navbar) + url, __appname__, + center=self.center_navbar, + extra_css=self.extra_css) elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') body.insert(0, elem) if self.remove_javascript: @@ -575,7 +577,8 @@ class BasicNewsRecipe(object, LoggingInterface): def feeds2index(self, feeds): templ = templates.IndexTemplate() - return templ.generate(self.title, self.timefmt, feeds).render(doctype='xhtml') + return templ.generate(self.title, self.timefmt, feeds, + extra_css=self.extra_css).render(doctype='xhtml') @classmethod def description_limiter(cls, src): @@ -626,7 +629,8 @@ class BasicNewsRecipe(object, LoggingInterface): templ = templates.FeedTemplate() - return templ.generate(feed, self.description_limiter).render(doctype='xhtml') + return templ.generate(feed, self.description_limiter, + extra_css=self.extra_css).render(doctype='xhtml') def create_logger(self, feed_number, article_number): diff --git a/src/calibre/web/feeds/recipes/recipe_new_yorker.py b/src/calibre/web/feeds/recipes/recipe_new_yorker.py index f5ce85b351..f8ef5bc8cc 100644 --- a/src/calibre/web/feeds/recipes/recipe_new_yorker.py +++ b/src/calibre/web/feeds/recipes/recipe_new_yorker.py @@ -16,6 +16,14 @@ class NewYorker(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = False use_embedded_content = False + extra_css = ''' + .calibre_feed_list {font-size:xx-small} + .calibre_article_list {font-size:xx-small} + .calibre_feed_title {font-size:normal} + .calibre_recipe_title {font-size:normal} + .calibre_feed_description {font-size:xx-small} + ''' + keep_only_tags = [ dict(name='div' , attrs={'id':'printbody' }) diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py index b2b96bd9a4..1a6a574129 100644 --- a/src/calibre/web/feeds/templates.py +++ b/src/calibre/web/feeds/templates.py @@ -32,6 +32,11 @@ class NavBarTemplate(Template): xmlns:py="http://genshi.edgewall.org/" > + + +