Sync to trunk

2025-07-09 03:04:10 -04:00 · 2009-01-23 07:17:39 -05:00 · 2009-01-23 07:17:39 -05:00 · df153d14ba
commit df153d14ba
parent 3d96314688 1f18210a57
11 changed files with 137 additions and 20 deletions
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -10,6 +10,7 @@ import os, fnmatch, shutil
 from itertools import cycle

 from calibre.ebooks.metadata.meta import metadata_from_formats, path_to_ext
+from calibre.ebooks.metadata import authors_to_string
 from calibre.devices.usbms.device import Device
 from calibre.devices.usbms.books import BookList, Book
 from calibre.devices.errors import FreeSpaceError, PathError
@ -221,12 +222,7 @@ class USBMS(Device):
        mi = metadata_from_formats([path])
        mime = MIME_MAP[fileext] if fileext in MIME_MAP.keys() else 'Unknown'
        
-        authors = 'Unknown'
-        for author in mi.authors:
-            if authors == 'Unknown':
-                authors = author
-            else:
-                authors += ', %s' % author
+        authors = authors_to_string(mi.authors)
        
        return Book(path, mi.title, authors, mime)

--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -153,11 +153,27 @@ class HTMLProcessor(Processor, Rationalizer):
        Perform various markup transforms to get the output to render correctly 
        in the quirky ADE.
        '''
-        # Replace <br> that are children of <body> with <p>&nbsp;</p>
+        # Replace <br> that are children of <body> as ADE doesn't handle them
        if hasattr(self.body, 'xpath'):
            for br in self.body.xpath('./br'):
+                if br.getparent() is None:
+                    continue
+                try:
+                    sibling = br.itersiblings().next()
+                except:
+                    sibling = None
                br.tag = 'p'
                br.text = u'\u00a0'
+                if (br.tail and br.tail.strip()) or sibling is None or \
+                    getattr(sibling, 'tag', '') != 'br':
+                    br.set('style', br.get('style', '')+'; margin: 0pt; border:0pt; height:0pt')
+                else:
+                    sibling.getparent().remove(sibling)
+                    if sibling.tail:
+                        if not br.tail:
+                            br.tail = ''
+                        br.tail += sibling.tail
+                
                
        if self.opts.profile.remove_object_tags:
            for tag in self.root.xpath('//embed'):
@ -167,6 +183,14 @@ class HTMLProcessor(Processor, Rationalizer):
                    continue
                tag.getparent().remove(tag)
                
+        
+        for tag in self.root.xpath('//title|//style'):
+            if not tag.text:
+                tag.getparent().remove(tag)
+        for tag in self.root.xpath('//script'):
+            if not tag.text and not tag.get('src', False):
+                tag.getparent().remove(tag)
+    
    def save(self):
        for meta in list(self.root.xpath('//meta')):
            meta.getparent().remove(meta)
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -1720,7 +1720,7 @@ class HTMLConverter(object, LoggingInterface):
                self.previous_text = '\n'
            elif tagname in ['hr', 'tr']: # tr needed for nested tables
                self.end_current_block()
-                if tagname == 'hr':
+                if tagname == 'hr' and not tag_css.get('width', '').strip().startswith('0'):
                    self.current_page.RuledLine(linelength=int(self.current_page.pageStyle.attrs['textwidth']))
                self.previous_text = '\n'
                self.process_children(tag, tag_css, tag_pseudo_css)
--- a/src/calibre/ebooks/mobi/from_comic.py
+++ b/src/calibre/ebooks/mobi/from_comic.py
@ -0,0 +1,44 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+'''
+import sys, os
+from calibre.ebooks.lrf.comic.convert_from import do_convert, option_parser, \
+                        ProgressBar, terminal_controller
+from calibre.ebooks.mobi.from_any import config, any2mobi
+from calibre.ptempfile import PersistentTemporaryFile
+
+
+def convert(path_to_file, opts, notification=lambda m, p: p):
+    pt = PersistentTemporaryFile('_comic2mobi.epub')
+    pt.close()
+    orig_output = opts.output
+    opts.output = pt.name
+    do_convert(path_to_file, opts, notification=notification, output_format='epub')
+    opts = config('').parse()
+    if orig_output is None:
+        orig_output = os.path.splitext(path_to_file)[0]+'.mobi'
+    opts.output = orig_output
+    any2mobi(opts, pt.name)
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    if len(args) < 2:
+        parser.print_help()
+        print '\nYou must specify a file to convert'
+        return 1
+    
+    pb = ProgressBar(terminal_controller, _('Rendering comic pages...'), 
+                     no_progress_bar=opts.no_progress_bar or getattr(opts, 'no_process', False))
+    notification = pb.update
+    
+    source = os.path.abspath(args[1])
+    convert(source, opts, notification=notification)
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -452,6 +452,13 @@ class MobiWriter(object):
            code = EXTH_CODES[term]
            for item in oeb.metadata[term]:
                data = self.COLLAPSE_RE.sub(' ', unicode(item))
+                if term == 'identifier':
+                    if data.lower().startswith('urn:isbn:'):
+                        data = data[9:]
+                    elif item.get('scheme', '').lower() == 'isbn':
+                        pass
+                    else:
+                        continue
                data = data.encode('utf-8')
                exth.write(pack('>II', code, len(data) + 8))
                exth.write(data)
@ -468,7 +475,7 @@ class MobiWriter(object):
            nrecs += 3
        exth = exth.getvalue()
        trail = len(exth) % 4
-        pad = '' if not trail else '\0' * (4 - trail)
+        pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
        exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad]
        return ''.join(exth)

--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -21,6 +21,7 @@ from lxml import etree
 from lxml import html
 from calibre import LoggingInterface
 from calibre.translations.dynamic import translate
+from calibre.startup import get_lang

 XML_PARSER = etree.XMLParser(recover=True)
 XML_NS = 'http://www.w3.org/XML/1998/namespace'
@ -30,6 +31,7 @@ OPF2_NS = 'http://www.idpf.org/2007/opf'
 DC09_NS = 'http://purl.org/metadata/dublin_core'
 DC10_NS = 'http://purl.org/dc/elements/1.0/'
 DC11_NS = 'http://purl.org/dc/elements/1.1/'
+DC_NSES = set([DC09_NS, DC10_NS, DC11_NS])
 XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance'
 DCTERMS_NS = 'http://purl.org/dc/terms/'
 NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
@ -194,15 +196,19 @@ class Metadata(object):
            if term == OPF('meta') and not value:
                term = self.fq_attrib.pop('name')
                value = self.fq_attrib.pop('content')
-            elif term in Metadata.TERMS and not namespace(term):
-                term = DC(term)
+            elif barename(term).lower() in Metadata.TERMS and \
+                 (not namespace(term) or namespace(term) in DC_NSES):
+                # Anything looking like Dublin Core is coerced
+                term = DC(barename(term).lower())
+            elif namespace(term) == OPF2_NS:
+                term = barename(term)
            self.term = term
            self.value = value
            self.attrib = attrib = {}
            for fq_attr in fq_attrib:
                if fq_attr in Metadata.ATTRS:
                    attr = fq_attr
-                    fq_attr = OPF2(fq_attr)
+                    fq_attr = OPF(fq_attr)
                    fq_attrib[fq_attr] = fq_attrib.pop(attr)
                else:
                    attr = barename(fq_attr)
@ -217,6 +223,15 @@ class Metadata(object):
                    '%r object has no attribute %r' \
                        % (self.__class__.__name__, name))
        
+        def __getitem__(self, key):
+            return self.attrib[key]
+        
+        def __contains__(self, key):
+            return key in self.attrib
+        
+        def get(self, key, default=None):
+            return self.attrib.get(key, default)
+        
        def __repr__(self):
            return 'Item(term=%r, value=%r, attrib=%r)' \
                % (barename(self.term), self.value, self.attrib)
@ -814,13 +829,13 @@ class OEBBook(object):
                    break
        if not metadata.language:
            self.logger.warn(u'Language not specified.')
-            metadata.add('language', 'en')
+            metadata.add('language', get_lang())
        if not metadata.creator:
            self.logger.warn(u'Creator not specified.')
-            metadata.add('creator', 'Unknown')
+            metadata.add('creator', _('Unknown'))
        if not metadata.title:
            self.logger.warn(u'Title not specified.')
-            metadata.add('title', 'Unknown')
+            metadata.add('title', _('Unknown'))
    
    def _manifest_from_opf(self, opf):
        self.manifest = manifest = Manifest(self)
@ -857,6 +872,8 @@ class OEBBook(object):
        extras.sort()
        for item in extras:
            spine.add(item, False)
+        if len(spine) == 0:
+            raise OEBError("Spine is empty")

    def _guide_from_opf(self, opf):
        self.guide = guide = Guide(self)
@ -886,8 +903,11 @@ class OEBBook(object):
            if len(result) != 1:
                return False
        id = result[0]
-        ncx = self.manifest[id].data
-        self.manifest.remove(id)
+        if id not in self.manifest.ids:
+            return False
+        item = self.manifest.ids[id]
+        ncx = item.data
+        self.manifest.remove(item)
        title = xpath(ncx, 'ncx:docTitle/ncx:text/text()')[0]
        self.toc = toc = TOC(title)
        navmaps = xpath(ncx, 'ncx:navMap')
--- a/src/calibre/gui2/images/news/freakonomics.png
+++ b/src/calibre/gui2/images/news/freakonomics.png
--- a/src/calibre/gui2/main.ui
+++ b/src/calibre/gui2/main.ui
@ -119,7 +119,11 @@
           </widget>
          </item>
          <item>
-           <widget class="QComboBox" name="output_format" />
+           <widget class="QComboBox" name="output_format" >
+            <property name="toolTip" >
+             <string>Set the output format that is used when converting ebooks and downloading news</string>
+            </property>
+           </widget>
          </item>
         </layout>
        </item>
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -63,6 +63,7 @@ entry_points = {
                             'oeb2lit   = calibre.ebooks.lit.writer:main',
                             'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
                             'comic2epub = calibre.ebooks.epub.from_comic:main',
+                             'comic2mobi = calibre.ebooks.mobi.from_comic:main',
 			     'comic2pdf  = calibre.ebooks.pdf.from_comic:main',
                             'calibre-debug      = calibre.debug:main',
                             'calibredb          = calibre.library.cli:main',
@ -239,6 +240,7 @@ def setup_completion(fatal_errors):
        f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit']))
        f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
        f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
+        f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr']))
        f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
        f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
        f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -23,7 +23,7 @@ recipe_modules = ['recipe_' + r for r in (
           'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
           'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
           'pagina12', 'infobae', 'ambito', 'elargentino', 'sueddeutsche', 'the_age',
-           'laprensa', 'amspec',
+           'laprensa', 'amspec', 'freakonomics',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_freakonomics.py
+++ b/src/calibre/web/feeds/recipes/recipe_freakonomics.py
@ -0,0 +1,20 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Freakonomics(BasicNewsRecipe):
+    
+    title = 'Freakonomics Blog'
+    description = 'The Hidden side of everything'
+    __author__ = 'Kovid Goyal'
+    
+    feeds = [('Blog', 'http://freakonomics.blogs.nytimes.com/feed/atom/')]
+    
+    def get_article_url(self, article):
+        return article.get('feedburner_origlink', None)
+        
+    def print_version(self, url):
+        return url + '?pagemode=print'