Sync to trunk

This commit is contained in:
John Schember 2009-01-23 07:17:39 -05:00
commit df153d14ba
11 changed files with 137 additions and 20 deletions

View File

@ -10,6 +10,7 @@ import os, fnmatch, shutil
from itertools import cycle from itertools import cycle
from calibre.ebooks.metadata.meta import metadata_from_formats, path_to_ext from calibre.ebooks.metadata.meta import metadata_from_formats, path_to_ext
from calibre.ebooks.metadata import authors_to_string
from calibre.devices.usbms.device import Device from calibre.devices.usbms.device import Device
from calibre.devices.usbms.books import BookList, Book from calibre.devices.usbms.books import BookList, Book
from calibre.devices.errors import FreeSpaceError, PathError from calibre.devices.errors import FreeSpaceError, PathError
@ -221,12 +222,7 @@ class USBMS(Device):
mi = metadata_from_formats([path]) mi = metadata_from_formats([path])
mime = MIME_MAP[fileext] if fileext in MIME_MAP.keys() else 'Unknown' mime = MIME_MAP[fileext] if fileext in MIME_MAP.keys() else 'Unknown'
authors = 'Unknown' authors = authors_to_string(mi.authors)
for author in mi.authors:
if authors == 'Unknown':
authors = author
else:
authors += ', %s' % author
return Book(path, mi.title, authors, mime) return Book(path, mi.title, authors, mime)

View File

@ -153,11 +153,27 @@ class HTMLProcessor(Processor, Rationalizer):
Perform various markup transforms to get the output to render correctly Perform various markup transforms to get the output to render correctly
in the quirky ADE. in the quirky ADE.
''' '''
# Replace <br> that are children of <body> with <p>&nbsp;</p> # Replace <br> that are children of <body> as ADE doesn't handle them
if hasattr(self.body, 'xpath'): if hasattr(self.body, 'xpath'):
for br in self.body.xpath('./br'): for br in self.body.xpath('./br'):
if br.getparent() is None:
continue
try:
sibling = br.itersiblings().next()
except:
sibling = None
br.tag = 'p' br.tag = 'p'
br.text = u'\u00a0' br.text = u'\u00a0'
if (br.tail and br.tail.strip()) or sibling is None or \
getattr(sibling, 'tag', '') != 'br':
br.set('style', br.get('style', '')+'; margin: 0pt; border:0pt; height:0pt')
else:
sibling.getparent().remove(sibling)
if sibling.tail:
if not br.tail:
br.tail = ''
br.tail += sibling.tail
if self.opts.profile.remove_object_tags: if self.opts.profile.remove_object_tags:
for tag in self.root.xpath('//embed'): for tag in self.root.xpath('//embed'):
@ -166,6 +182,14 @@ class HTMLProcessor(Processor, Rationalizer):
if tag.get('type', '').lower().strip() in ('image/svg+xml',): if tag.get('type', '').lower().strip() in ('image/svg+xml',):
continue continue
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in self.root.xpath('//title|//style'):
if not tag.text:
tag.getparent().remove(tag)
for tag in self.root.xpath('//script'):
if not tag.text and not tag.get('src', False):
tag.getparent().remove(tag)
def save(self): def save(self):
for meta in list(self.root.xpath('//meta')): for meta in list(self.root.xpath('//meta')):

View File

@ -1720,7 +1720,7 @@ class HTMLConverter(object, LoggingInterface):
self.previous_text = '\n' self.previous_text = '\n'
elif tagname in ['hr', 'tr']: # tr needed for nested tables elif tagname in ['hr', 'tr']: # tr needed for nested tables
self.end_current_block() self.end_current_block()
if tagname == 'hr': if tagname == 'hr' and not tag_css.get('width', '').strip().startswith('0'):
self.current_page.RuledLine(linelength=int(self.current_page.pageStyle.attrs['textwidth'])) self.current_page.RuledLine(linelength=int(self.current_page.pageStyle.attrs['textwidth']))
self.previous_text = '\n' self.previous_text = '\n'
self.process_children(tag, tag_css, tag_pseudo_css) self.process_children(tag, tag_css, tag_pseudo_css)

View File

@ -0,0 +1,44 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
'''
import sys, os
from calibre.ebooks.lrf.comic.convert_from import do_convert, option_parser, \
ProgressBar, terminal_controller
from calibre.ebooks.mobi.from_any import config, any2mobi
from calibre.ptempfile import PersistentTemporaryFile
def convert(path_to_file, opts, notification=lambda m, p: p):
pt = PersistentTemporaryFile('_comic2mobi.epub')
pt.close()
orig_output = opts.output
opts.output = pt.name
do_convert(path_to_file, opts, notification=notification, output_format='epub')
opts = config('').parse()
if orig_output is None:
orig_output = os.path.splitext(path_to_file)[0]+'.mobi'
opts.output = orig_output
any2mobi(opts, pt.name)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print '\nYou must specify a file to convert'
return 1
pb = ProgressBar(terminal_controller, _('Rendering comic pages...'),
no_progress_bar=opts.no_progress_bar or getattr(opts, 'no_process', False))
notification = pb.update
source = os.path.abspath(args[1])
convert(source, opts, notification=notification)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -452,6 +452,13 @@ class MobiWriter(object):
code = EXTH_CODES[term] code = EXTH_CODES[term]
for item in oeb.metadata[term]: for item in oeb.metadata[term]:
data = self.COLLAPSE_RE.sub(' ', unicode(item)) data = self.COLLAPSE_RE.sub(' ', unicode(item))
if term == 'identifier':
if data.lower().startswith('urn:isbn:'):
data = data[9:]
elif item.get('scheme', '').lower() == 'isbn':
pass
else:
continue
data = data.encode('utf-8') data = data.encode('utf-8')
exth.write(pack('>II', code, len(data) + 8)) exth.write(pack('>II', code, len(data) + 8))
exth.write(data) exth.write(data)
@ -468,7 +475,7 @@ class MobiWriter(object):
nrecs += 3 nrecs += 3
exth = exth.getvalue() exth = exth.getvalue()
trail = len(exth) % 4 trail = len(exth) % 4
pad = '' if not trail else '\0' * (4 - trail) pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad] exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad]
return ''.join(exth) return ''.join(exth)

View File

@ -21,6 +21,7 @@ from lxml import etree
from lxml import html from lxml import html
from calibre import LoggingInterface from calibre import LoggingInterface
from calibre.translations.dynamic import translate from calibre.translations.dynamic import translate
from calibre.startup import get_lang
XML_PARSER = etree.XMLParser(recover=True) XML_PARSER = etree.XMLParser(recover=True)
XML_NS = 'http://www.w3.org/XML/1998/namespace' XML_NS = 'http://www.w3.org/XML/1998/namespace'
@ -30,6 +31,7 @@ OPF2_NS = 'http://www.idpf.org/2007/opf'
DC09_NS = 'http://purl.org/metadata/dublin_core' DC09_NS = 'http://purl.org/metadata/dublin_core'
DC10_NS = 'http://purl.org/dc/elements/1.0/' DC10_NS = 'http://purl.org/dc/elements/1.0/'
DC11_NS = 'http://purl.org/dc/elements/1.1/' DC11_NS = 'http://purl.org/dc/elements/1.1/'
DC_NSES = set([DC09_NS, DC10_NS, DC11_NS])
XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance' XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance'
DCTERMS_NS = 'http://purl.org/dc/terms/' DCTERMS_NS = 'http://purl.org/dc/terms/'
NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/' NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
@ -194,15 +196,19 @@ class Metadata(object):
if term == OPF('meta') and not value: if term == OPF('meta') and not value:
term = self.fq_attrib.pop('name') term = self.fq_attrib.pop('name')
value = self.fq_attrib.pop('content') value = self.fq_attrib.pop('content')
elif term in Metadata.TERMS and not namespace(term): elif barename(term).lower() in Metadata.TERMS and \
term = DC(term) (not namespace(term) or namespace(term) in DC_NSES):
# Anything looking like Dublin Core is coerced
term = DC(barename(term).lower())
elif namespace(term) == OPF2_NS:
term = barename(term)
self.term = term self.term = term
self.value = value self.value = value
self.attrib = attrib = {} self.attrib = attrib = {}
for fq_attr in fq_attrib: for fq_attr in fq_attrib:
if fq_attr in Metadata.ATTRS: if fq_attr in Metadata.ATTRS:
attr = fq_attr attr = fq_attr
fq_attr = OPF2(fq_attr) fq_attr = OPF(fq_attr)
fq_attrib[fq_attr] = fq_attrib.pop(attr) fq_attrib[fq_attr] = fq_attrib.pop(attr)
else: else:
attr = barename(fq_attr) attr = barename(fq_attr)
@ -216,7 +222,16 @@ class Metadata(object):
raise AttributeError( raise AttributeError(
'%r object has no attribute %r' \ '%r object has no attribute %r' \
% (self.__class__.__name__, name)) % (self.__class__.__name__, name))
def __getitem__(self, key):
return self.attrib[key]
def __contains__(self, key):
return key in self.attrib
def get(self, key, default=None):
return self.attrib.get(key, default)
def __repr__(self): def __repr__(self):
return 'Item(term=%r, value=%r, attrib=%r)' \ return 'Item(term=%r, value=%r, attrib=%r)' \
% (barename(self.term), self.value, self.attrib) % (barename(self.term), self.value, self.attrib)
@ -814,13 +829,13 @@ class OEBBook(object):
break break
if not metadata.language: if not metadata.language:
self.logger.warn(u'Language not specified.') self.logger.warn(u'Language not specified.')
metadata.add('language', 'en') metadata.add('language', get_lang())
if not metadata.creator: if not metadata.creator:
self.logger.warn(u'Creator not specified.') self.logger.warn(u'Creator not specified.')
metadata.add('creator', 'Unknown') metadata.add('creator', _('Unknown'))
if not metadata.title: if not metadata.title:
self.logger.warn(u'Title not specified.') self.logger.warn(u'Title not specified.')
metadata.add('title', 'Unknown') metadata.add('title', _('Unknown'))
def _manifest_from_opf(self, opf): def _manifest_from_opf(self, opf):
self.manifest = manifest = Manifest(self) self.manifest = manifest = Manifest(self)
@ -857,6 +872,8 @@ class OEBBook(object):
extras.sort() extras.sort()
for item in extras: for item in extras:
spine.add(item, False) spine.add(item, False)
if len(spine) == 0:
raise OEBError("Spine is empty")
def _guide_from_opf(self, opf): def _guide_from_opf(self, opf):
self.guide = guide = Guide(self) self.guide = guide = Guide(self)
@ -886,8 +903,11 @@ class OEBBook(object):
if len(result) != 1: if len(result) != 1:
return False return False
id = result[0] id = result[0]
ncx = self.manifest[id].data if id not in self.manifest.ids:
self.manifest.remove(id) return False
item = self.manifest.ids[id]
ncx = item.data
self.manifest.remove(item)
title = xpath(ncx, 'ncx:docTitle/ncx:text/text()')[0] title = xpath(ncx, 'ncx:docTitle/ncx:text/text()')[0]
self.toc = toc = TOC(title) self.toc = toc = TOC(title)
navmaps = xpath(ncx, 'ncx:navMap') navmaps = xpath(ncx, 'ncx:navMap')

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

View File

@ -119,7 +119,11 @@
</widget> </widget>
</item> </item>
<item> <item>
<widget class="QComboBox" name="output_format" /> <widget class="QComboBox" name="output_format" >
<property name="toolTip" >
<string>Set the output format that is used when converting ebooks and downloading news</string>
</property>
</widget>
</item> </item>
</layout> </layout>
</item> </item>

View File

@ -63,6 +63,7 @@ entry_points = {
'oeb2lit = calibre.ebooks.lit.writer:main', 'oeb2lit = calibre.ebooks.lit.writer:main',
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main', 'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
'comic2epub = calibre.ebooks.epub.from_comic:main', 'comic2epub = calibre.ebooks.epub.from_comic:main',
'comic2mobi = calibre.ebooks.mobi.from_comic:main',
'comic2pdf = calibre.ebooks.pdf.from_comic:main', 'comic2pdf = calibre.ebooks.pdf.from_comic:main',
'calibre-debug = calibre.debug:main', 'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main', 'calibredb = calibre.library.cli:main',
@ -239,6 +240,7 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit'])) f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit']))
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr'])) f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr'])) f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr'])) f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles)) f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles)) f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))

View File

@ -23,7 +23,7 @@ recipe_modules = ['recipe_' + r for r in (
'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet', 'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de', 'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
'pagina12', 'infobae', 'ambito', 'elargentino', 'sueddeutsche', 'the_age', 'pagina12', 'infobae', 'ambito', 'elargentino', 'sueddeutsche', 'the_age',
'laprensa', 'amspec', 'laprensa', 'amspec', 'freakonomics',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -0,0 +1,20 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
from calibre.web.feeds.news import BasicNewsRecipe
class Freakonomics(BasicNewsRecipe):
title = 'Freakonomics Blog'
description = 'The Hidden side of everything'
__author__ = 'Kovid Goyal'
feeds = [('Blog', 'http://freakonomics.blogs.nytimes.com/feed/atom/')]
def get_article_url(self, article):
return article.get('feedburner_origlink', None)
def print_version(self, url):
return url + '?pagemode=print'