diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py index 604cd9002b..cadc61e584 100644 --- a/src/calibre/devices/usbms/driver.py +++ b/src/calibre/devices/usbms/driver.py @@ -10,6 +10,7 @@ import os, fnmatch, shutil from itertools import cycle from calibre.ebooks.metadata.meta import metadata_from_formats, path_to_ext +from calibre.ebooks.metadata import authors_to_string from calibre.devices.usbms.device import Device from calibre.devices.usbms.books import BookList, Book from calibre.devices.errors import FreeSpaceError, PathError @@ -221,12 +222,7 @@ class USBMS(Device): mi = metadata_from_formats([path]) mime = MIME_MAP[fileext] if fileext in MIME_MAP.keys() else 'Unknown' - authors = 'Unknown' - for author in mi.authors: - if authors == 'Unknown': - authors = author - else: - authors += ', %s' % author + authors = authors_to_string(mi.authors) return Book(path, mi.title, authors, mime) diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index d61dc0051a..722b6cd4e3 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -153,11 +153,27 @@ class HTMLProcessor(Processor, Rationalizer): Perform various markup transforms to get the output to render correctly in the quirky ADE. ''' - # Replace
that are children of with

 

+ # Replace
that are children of as ADE doesn't handle them if hasattr(self.body, 'xpath'): for br in self.body.xpath('./br'): + if br.getparent() is None: + continue + try: + sibling = br.itersiblings().next() + except: + sibling = None br.tag = 'p' br.text = u'\u00a0' + if (br.tail and br.tail.strip()) or sibling is None or \ + getattr(sibling, 'tag', '') != 'br': + br.set('style', br.get('style', '')+'; margin: 0pt; border:0pt; height:0pt') + else: + sibling.getparent().remove(sibling) + if sibling.tail: + if not br.tail: + br.tail = '' + br.tail += sibling.tail + if self.opts.profile.remove_object_tags: for tag in self.root.xpath('//embed'): @@ -166,6 +182,14 @@ class HTMLProcessor(Processor, Rationalizer): if tag.get('type', '').lower().strip() in ('image/svg+xml',): continue tag.getparent().remove(tag) + + + for tag in self.root.xpath('//title|//style'): + if not tag.text: + tag.getparent().remove(tag) + for tag in self.root.xpath('//script'): + if not tag.text and not tag.get('src', False): + tag.getparent().remove(tag) def save(self): for meta in list(self.root.xpath('//meta')): diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index e884ea7213..673c92ebb9 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -1720,7 +1720,7 @@ class HTMLConverter(object, LoggingInterface): self.previous_text = '\n' elif tagname in ['hr', 'tr']: # tr needed for nested tables self.end_current_block() - if tagname == 'hr': + if tagname == 'hr' and not tag_css.get('width', '').strip().startswith('0'): self.current_page.RuledLine(linelength=int(self.current_page.pageStyle.attrs['textwidth'])) self.previous_text = '\n' self.process_children(tag, tag_css, tag_pseudo_css) diff --git a/src/calibre/ebooks/mobi/from_comic.py b/src/calibre/ebooks/mobi/from_comic.py new file mode 100644 index 0000000000..87d63ea15f --- /dev/null +++ b/src/calibre/ebooks/mobi/from_comic.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +''' +''' +import sys, os +from calibre.ebooks.lrf.comic.convert_from import do_convert, option_parser, \ + ProgressBar, terminal_controller +from calibre.ebooks.mobi.from_any import config, any2mobi +from calibre.ptempfile import PersistentTemporaryFile + + +def convert(path_to_file, opts, notification=lambda m, p: p): + pt = PersistentTemporaryFile('_comic2mobi.epub') + pt.close() + orig_output = opts.output + opts.output = pt.name + do_convert(path_to_file, opts, notification=notification, output_format='epub') + opts = config('').parse() + if orig_output is None: + orig_output = os.path.splitext(path_to_file)[0]+'.mobi' + opts.output = orig_output + any2mobi(opts, pt.name) + +def main(args=sys.argv): + parser = option_parser() + opts, args = parser.parse_args(args) + if len(args) < 2: + parser.print_help() + print '\nYou must specify a file to convert' + return 1 + + pb = ProgressBar(terminal_controller, _('Rendering comic pages...'), + no_progress_bar=opts.no_progress_bar or getattr(opts, 'no_process', False)) + notification = pb.update + + source = os.path.abspath(args[1]) + convert(source, opts, notification=notification) + return 0 + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index fdafd2e08b..39c77eace5 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -452,6 +452,13 @@ class MobiWriter(object): code = EXTH_CODES[term] for item in oeb.metadata[term]: data = self.COLLAPSE_RE.sub(' ', unicode(item)) + if term == 'identifier': + if data.lower().startswith('urn:isbn:'): + data = data[9:] + elif item.get('scheme', '').lower() == 'isbn': + pass + else: + continue data = data.encode('utf-8') exth.write(pack('>II', code, len(data) + 8)) exth.write(data) @@ -468,7 +475,7 @@ class MobiWriter(object): nrecs += 3 exth = exth.getvalue() trail = len(exth) % 4 - pad = '' if not trail else '\0' * (4 - trail) + pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad] return ''.join(exth) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index c2d30eb2c3..3336391a38 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -21,6 +21,7 @@ from lxml import etree from lxml import html from calibre import LoggingInterface from calibre.translations.dynamic import translate +from calibre.startup import get_lang XML_PARSER = etree.XMLParser(recover=True) XML_NS = 'http://www.w3.org/XML/1998/namespace' @@ -30,6 +31,7 @@ OPF2_NS = 'http://www.idpf.org/2007/opf' DC09_NS = 'http://purl.org/metadata/dublin_core' DC10_NS = 'http://purl.org/dc/elements/1.0/' DC11_NS = 'http://purl.org/dc/elements/1.1/' +DC_NSES = set([DC09_NS, DC10_NS, DC11_NS]) XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance' DCTERMS_NS = 'http://purl.org/dc/terms/' NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/' @@ -194,15 +196,19 @@ class Metadata(object): if term == OPF('meta') and not value: term = self.fq_attrib.pop('name') value = self.fq_attrib.pop('content') - elif term in Metadata.TERMS and not namespace(term): - term = DC(term) + elif barename(term).lower() in Metadata.TERMS and \ + (not namespace(term) or namespace(term) in DC_NSES): + # Anything looking like Dublin Core is coerced + term = DC(barename(term).lower()) + elif namespace(term) == OPF2_NS: + term = barename(term) self.term = term self.value = value self.attrib = attrib = {} for fq_attr in fq_attrib: if fq_attr in Metadata.ATTRS: attr = fq_attr - fq_attr = OPF2(fq_attr) + fq_attr = OPF(fq_attr) fq_attrib[fq_attr] = fq_attrib.pop(attr) else: attr = barename(fq_attr) @@ -216,7 +222,16 @@ class Metadata(object): raise AttributeError( '%r object has no attribute %r' \ % (self.__class__.__name__, name)) - + + def __getitem__(self, key): + return self.attrib[key] + + def __contains__(self, key): + return key in self.attrib + + def get(self, key, default=None): + return self.attrib.get(key, default) + def __repr__(self): return 'Item(term=%r, value=%r, attrib=%r)' \ % (barename(self.term), self.value, self.attrib) @@ -814,13 +829,13 @@ class OEBBook(object): break if not metadata.language: self.logger.warn(u'Language not specified.') - metadata.add('language', 'en') + metadata.add('language', get_lang()) if not metadata.creator: self.logger.warn(u'Creator not specified.') - metadata.add('creator', 'Unknown') + metadata.add('creator', _('Unknown')) if not metadata.title: self.logger.warn(u'Title not specified.') - metadata.add('title', 'Unknown') + metadata.add('title', _('Unknown')) def _manifest_from_opf(self, opf): self.manifest = manifest = Manifest(self) @@ -857,6 +872,8 @@ class OEBBook(object): extras.sort() for item in extras: spine.add(item, False) + if len(spine) == 0: + raise OEBError("Spine is empty") def _guide_from_opf(self, opf): self.guide = guide = Guide(self) @@ -886,8 +903,11 @@ class OEBBook(object): if len(result) != 1: return False id = result[0] - ncx = self.manifest[id].data - self.manifest.remove(id) + if id not in self.manifest.ids: + return False + item = self.manifest.ids[id] + ncx = item.data + self.manifest.remove(item) title = xpath(ncx, 'ncx:docTitle/ncx:text/text()')[0] self.toc = toc = TOC(title) navmaps = xpath(ncx, 'ncx:navMap') diff --git a/src/calibre/gui2/images/news/freakonomics.png b/src/calibre/gui2/images/news/freakonomics.png new file mode 100644 index 0000000000..7cb9e79916 Binary files /dev/null and b/src/calibre/gui2/images/news/freakonomics.png differ diff --git a/src/calibre/gui2/main.ui b/src/calibre/gui2/main.ui index 2733a61be3..2b243ba2b9 100644 --- a/src/calibre/gui2/main.ui +++ b/src/calibre/gui2/main.ui @@ -119,7 +119,11 @@ - + + + Set the output format that is used when converting ebooks and downloading news + + diff --git a/src/calibre/linux.py b/src/calibre/linux.py index a05a7ea7a8..93571cce4f 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -63,6 +63,7 @@ entry_points = { 'oeb2lit = calibre.ebooks.lit.writer:main', 'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main', 'comic2epub = calibre.ebooks.epub.from_comic:main', + 'comic2mobi = calibre.ebooks.mobi.from_comic:main', 'comic2pdf = calibre.ebooks.pdf.from_comic:main', 'calibre-debug = calibre.debug:main', 'calibredb = calibre.library.cli:main', @@ -239,6 +240,7 @@ def setup_completion(fatal_errors): f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit'])) f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr'])) f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr'])) + f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr'])) f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr'])) f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles)) f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles)) diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 71c4b71483..f2ed6d2d24 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -23,7 +23,7 @@ recipe_modules = ['recipe_' + r for r in ( 'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet', 'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de', 'pagina12', 'infobae', 'ambito', 'elargentino', 'sueddeutsche', 'the_age', - 'laprensa', 'amspec', + 'laprensa', 'amspec', 'freakonomics', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_freakonomics.py b/src/calibre/web/feeds/recipes/recipe_freakonomics.py new file mode 100644 index 0000000000..704f7f727d --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_freakonomics.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Freakonomics(BasicNewsRecipe): + + title = 'Freakonomics Blog' + description = 'The Hidden side of everything' + __author__ = 'Kovid Goyal' + + feeds = [('Blog', 'http://freakonomics.blogs.nytimes.com/feed/atom/')] + + def get_article_url(self, article): + return article.get('feedburner_origlink', None) + + def print_version(self, url): + return url + '?pagemode=print' \ No newline at end of file