News download:Convert all downloaded images to JPG and retry downloads on DNS errors. Also translate "Table of Contents" in gnenerated MOBI TOC.

This commit is contained in:
Kovid Goyal 2009-01-20 00:25:52 -08:00
commit 41d4461c65
5 changed files with 59 additions and 13 deletions

View File

@ -515,6 +515,9 @@ def add_mobi_options(parser):
group.add_option( group.add_option(
'-r', '--rescale-images', default=False, action='store_true', '-r', '--rescale-images', default=False, action='store_true',
help=_('Modify images to meet Palm device size limitations.')) help=_('Modify images to meet Palm device size limitations.'))
group.add_option(
'--toc-title', default=None, action='store',
help=_('Title for any generated in-line table of contents.'))
parser.add_option_group(group) parser.add_option_group(group)
group = OptionGroup(parser, _('Profiles'), _('Device renderer profiles. ' group = OptionGroup(parser, _('Profiles'), _('Device renderer profiles. '
'Affects conversion of default font sizes and rasterization ' 'Affects conversion of default font sizes and rasterization '
@ -558,7 +561,7 @@ def oeb2mobi(opts, inpath):
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
context = Context(source, dest) context = Context(source, dest)
oeb = OEBBook(inpath, logger=logger) oeb = OEBBook(inpath, logger=logger)
tocadder = HTMLTOCAdder() tocadder = HTMLTOCAdder(title=opts.toc_title)
tocadder.transform(oeb, context) tocadder.transform(oeb, context)
mangler = CaseMangler() mangler = CaseMangler()
mangler.transform(oeb, context) mangler.transform(oeb, context)

View File

@ -20,6 +20,7 @@ import copy
from lxml import etree from lxml import etree
from lxml import html from lxml import html
from calibre import LoggingInterface from calibre import LoggingInterface
from calibre.translations.dynamic import translate
XML_PARSER = etree.XMLParser(recover=True) XML_PARSER = etree.XMLParser(recover=True)
XML_NS = 'http://www.w3.org/XML/1998/namespace' XML_NS = 'http://www.w3.org/XML/1998/namespace'
@ -973,6 +974,11 @@ class OEBBook(object):
self._toc_from_opf(opf) self._toc_from_opf(opf)
self._ensure_cover_image() self._ensure_cover_image()
def translate(self, text):
lang = str(self.metadata.language[0])
lang = lang.split('-', 1)[0].lower()
return translate(lang, text)
def to_opf1(self): def to_opf1(self):
package = etree.Element('package', package = etree.Element('package',
attrib={'unique-identifier': self.uid.id}) attrib={'unique-identifier': self.uid.id})

View File

@ -44,13 +44,15 @@ body > .calibre_toc_block {
} }
class HTMLTOCAdder(object): class HTMLTOCAdder(object):
def __init__(self, style='nested'): def __init__(self, title=None, style='nested'):
self.title = title
self.style = style self.style = style
def transform(self, oeb, context): def transform(self, oeb, context):
if 'toc' in oeb.guide: if 'toc' in oeb.guide:
return return
oeb.logger.info('Generating in-line TOC...') oeb.logger.info('Generating in-line TOC...')
title = self.title or oeb.translate('Table of Contents')
style = self.style style = self.style
if style not in STYLE_CSS: if style not in STYLE_CSS:
oeb.logger.error('Unknown TOC style %r' % style) oeb.logger.error('Unknown TOC style %r' % style)
@ -61,15 +63,15 @@ class HTMLTOCAdder(object):
contents = element(None, XHTML('html'), nsmap={None: XHTML_NS}, contents = element(None, XHTML('html'), nsmap={None: XHTML_NS},
attrib={XML('lang'): language}) attrib={XML('lang'): language})
head = element(contents, XHTML('head')) head = element(contents, XHTML('head'))
title = element(head, XHTML('title')) htitle = element(head, XHTML('title'))
title.text = 'Table of Contents' htitle.text = title
element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME, element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME,
href=css_href) href=css_href)
body = element(contents, XHTML('body'), body = element(contents, XHTML('body'),
attrib={'class': 'calibre_toc'}) attrib={'class': 'calibre_toc'})
h1 = element(body, XHTML('h1'), h1 = element(body, XHTML('h1'),
attrib={'class': 'calibre_toc_header'}) attrib={'class': 'calibre_toc_header'})
h1.text = 'Table of Contents' h1.text = title
self.add_toc_level(body, oeb.toc) self.add_toc_level(body, oeb.toc)
id, href = oeb.manifest.generate('contents', 'contents.xhtml') id, href = oeb.manifest.generate('contents', 'contents.xhtml')
item = oeb.manifest.add(id, href, XHTML_MIME, data=contents) item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)

View File

@ -0,0 +1,27 @@
'''
Dynamic language lookup of translations for user-visible strings.
'''
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
from cStringIO import StringIO
from gettext import GNUTranslations, NullTranslations
from calibre.translations.compiled import translations
__all__ = ['translate']
_CACHE = {}
def translate(lang, text):
trans = None
if lang in _CACHE:
trans = _CACHE[lang]
elif lang in translations:
buf = StringIO(translations[lang])
trans = GNUTranslations(buf)
_CACHE[lang] = trans
if trans is None:
return _(text)
return trans.ugettext(text)

View File

@ -11,6 +11,8 @@ import sys, socket, os, urlparse, logging, re, time, copy, urllib2, threading, t
from urllib import url2pathname from urllib import url2pathname
from threading import RLock from threading import RLock
from httplib import responses from httplib import responses
from PIL import Image
from cStringIO import StringIO
from calibre import setup_cli_handlers, browser, sanitize_file_name, \ from calibre import setup_cli_handlers, browser, sanitize_file_name, \
relpath, LoggingInterface relpath, LoggingInterface
@ -183,8 +185,9 @@ class RecursiveFetcher(object, LoggingInterface):
except urllib2.URLError, err: except urllib2.URLError, err:
if hasattr(err, 'code') and responses.has_key(err.code): if hasattr(err, 'code') and responses.has_key(err.code):
raise FetchError, responses[err.code] raise FetchError, responses[err.code]
if getattr(err, 'reason', [0])[0] == 104: # Connection reset by peer if getattr(err, 'reason', [0])[0] == 104 or \
self.log_debug('Connection reset by peer retrying in 1 second.') getattr(err, 'errno', None) == -2: # Connection reset by peer or Name or service not know
self.log_debug('Temporary error, retyring in 1 second')
time.sleep(1) time.sleep(1)
with closing(self.browser.open(url)) as f: with closing(self.browser.open(url)) as f:
data = response(f.read()+f.read()) data = response(f.read()+f.read())
@ -304,12 +307,17 @@ class RecursiveFetcher(object, LoggingInterface):
fname = sanitize_file_name('img'+str(c)+ext) fname = sanitize_file_name('img'+str(c)+ext)
if isinstance(fname, unicode): if isinstance(fname, unicode):
fname = fname.encode('ascii', 'replace') fname = fname.encode('ascii', 'replace')
imgpath = os.path.join(diskpath, fname) imgpath = os.path.join(diskpath, fname+'.jpg')
try:
im = Image.open(StringIO(data)).convert('RGBA')
with self.imagemap_lock: with self.imagemap_lock:
self.imagemap[iurl] = imgpath self.imagemap[iurl] = imgpath
with open(imgpath, 'wb') as x: with open(imgpath, 'wb') as x:
x.write(data) im.save(x, 'JPEG')
tag['src'] = imgpath tag['src'] = imgpath
except:
traceback.print_exc()
continue
def absurl(self, baseurl, tag, key, filter=True): def absurl(self, baseurl, tag, key, filter=True):
iurl = tag[key] iurl = tag[key]