merge from trunk

2026-01-03 18:50:27 -05:00 · 2011-04-04 11:36:06 +08:00 · 2011-04-04 11:36:06 +08:00 · 1be45db9d7
commit 1be45db9d7
parent 4245c2fefb 492d16e5c9
29 changed files with 1430 additions and 558 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,77 @@
 #  new recipes:
 #    - title: 

+- version: 0.7.53
+  date: 2011-04-01
+
+  new features:
+    - title: "Email delivery: You can now specify a subject that calibre will use when sending emails per email account, configured in Preferences->Sending by email. The subject is a template of the same kind used in Save to Disk, etc. So you can specift the title/authors/series/whatever in the template."
+      tickets: [743535]
+
+    - title: "Apple driver: When an iDevice is detected, inform the user about the Connect to iTunes method instead of trying to connect directly to the device, as the latter can be buggy. See http://www.mobileread.com/forums/showthread.php?t=127883 for details"
+
+    - title: "SONY driver: Search for books on the device in all directories not just database/media/books. This can be turned off by customizing the SONY plugin in Preferences->Plugins"
+
+    - title: "EPUB Output: Remove any margins specified via an Adobe page template in the input document. This means that the margins specified in calibre are more likely to be the actual margins used."
+
+    - title: "When reading metadata from filenames, allow publisher and published date to be read from the filename"
+      tickets: [744020]
+
+    - title: "Remove the option to show a second tool bar from Preferences->Look & Feel. Instead go to Preferences->Toolbars and add items to the second toolbar to control exactly what is visible there."
+      tickets: [742686]
+
+    - title: "Add a tweak that can be used to have the calibre content server listen for IPv6 connections."
+      tickets: [743486]
+
+    - title: "When clicking Next or Previous in the edit metadata dialog, then active book in the main book list is also changed"
+      tickets: [743533]
+
+    - title: "Remember the previously used setting for Match all/Match any under the Tag Browser when calibre restarts"
+      tickets: [743645]
+
+    - title: "FB2 Output: Option to set the FB2 genre explicitly."
+      tickets: [743178]
+
+    - title: "Plugin developers: calibre now has a new plugin API, see http://calibre-ebook.com/user_manual/creating_plugins.html. Your existing plugins should continue to work, but it would be good to test them to make sure."
+  
+  bug fixes:
+    - title: "Fix text color in the search bar set to black instead of the system font color"
+      tickets: [746846]
+
+    - title: "Workaround for Word bug where Word uses gb2312 as the encoding when exporting CHinese docs to HTML istead of gbk"
+      tickets: [745428]
+
+    - title: "Make sorting on the device view faster and more robust."
+      tickets: [742626]
+
+    - title: "E-book viewer: Fix viewer losing place in very long single file documents when window resized."
+      tickets: [745001]
+
+    - title: "MOBI Output: Workaround for Amazon's MOBI renderer not rendering top margins on ul and ol tags."
+      tickets: [744365]
+
+    - title: "EPUB Input: Workaround for invalid EPUBs produced by someone named 'ibooks, Inc.'."
+      tickets: [744122]
+
+    - title: "RTF Input: Handle RTF files with too many levels of list nesting."
+      tickets: [743243]
+
+  improved recipes:
+    - Irish Times
+    - LifeHacker
+    - Estadao
+    - Folha de Sao Paulo
+
+  new recipes:
+    - title: Financieele Dagblad
+      author: marvin_2
+
+    - title: "Prost Amerika, WV Hooligan and SB Nation"
+      author: rylsfan
+
+    - title: "Cracked.com"
+      author: Nudgenudge
+
 - version: 0.7.52
  date: 2011-03-25

--- a/recipes/developpez.recipe
+++ b/recipes/developpez.recipe
@ -0,0 +1,21 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1301849956(BasicNewsRecipe):
+    title          = u'Developpez.com'
+    description = u'Toutes les news du site Developpez.com'
+    publisher = u'Developpez.com'
+    timefmt = ' [%a, %d %b, %Y]'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    encoding = 'ISO-8859-1'
+    language = 'fr'
+    __author__ = 'louhike'
+    remove_javascript = True
+    keep_only_tags = [dict(name='div', attrs={'class':'content'})]
+
+    feeds = [(u'Tous les articles', u'http://www.developpez.com/index/rss')]
+
+    def get_cover_url(self):
+        return 'http://javascript.developpez.com/template/images/logo.gif'
+
--- a/recipes/globe_and_mail.recipe
+++ b/recipes/globe_and_mail.recipe
@ -35,8 +35,8 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
      (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
      (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
      (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
-      (u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
-      (u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
+      (u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
+      (u'Drive', u'http://www.theglobeandmail.com/auto/?service=rss')
    ]

    preprocess_regexps = [
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -36,6 +36,7 @@ class Guardian(BasicNewsRecipe):
    remove_tags = [
                        dict(name='div', attrs={'class':["video-content","videos-third-column"]}),
                        dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}),
+                        dict(name='div', attrs={'class':["guardian-tickets promo-component",]}),
                        dict(name='ul', attrs={'class':["pagination"]}),
                        dict(name='ul', attrs={'id':["content-actions"]}),
                        #dict(name='img'),
--- a/recipes/hawaii.recipe
+++ b/recipes/hawaii.recipe
@ -2,7 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1282101454(BasicNewsRecipe):
    title = 'West Hawaii Today'
-    __author__ = 'Tony Stegall'
+    __author__ = 'Tony Stegall, fixed by HK'
    language = 'en'
    description = 'Westhawaiitoday.com'
    publisher = 'West Hawaii '
@ -15,7 +15,14 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):

    masthead_url = 'http://images.townnews.com/westhawaiitoday.com/art/whttoplogo.gif'

-
-    feeds          = [ 'http://www.westhawaiitoday.com/rss.xml']
-
+    feeds          = [
+                       ('http://www.westhawaiitoday.com/taxonomy/term/2/feed'),  #Local News
+                       ('http://www.westhawaiitoday.com/taxonomy/term/15/feed'), #Local Sports
+                       ('http://www.westhawaiitoday.com/taxonomy/term/4/feed'),   #Local Features
+                       ('http://www.westhawaiitoday.com/taxonomy/term/12/feed'), #Obituaries
+                       ('http://www.westhawaiitoday.com/taxonomy/term/18/feed'), #Letters
+                       ('http://www.westhawaiitoday.com/taxonomy/term/19/feed'), #Editorial
+                       ('http://www.westhawaiitoday.com/taxonomy/term/20/feed'), #columns
+                       ('http://www.westhawaiitoday.com/taxonomy/term/13/feed') #Volcano Update (Sundays)
+                    ]

--- a/recipes/idnes.recipe
+++ b/recipes/idnes.recipe
@ -34,7 +34,7 @@ class iHeuteRecipe(BasicNewsRecipe):
                   dict(name='table', attrs={'class':['video-16ku9']})]
    remove_tags_after  = [dict(name='div',attrs={'id':['related','related2']})]

-    keep_only_tags = [dict(name='div', attrs={'class':['art-full adwords-text','dil-day']})
+    keep_only_tags = [dict(name='div', attrs={'class':['art-full adwords-text','dil-day','art-full']})
                      ,dict(name='table',attrs={'class':['kemel-box']})]

    def print_version(self, url):
--- a/recipes/iht.recipe
+++ b/recipes/iht.recipe
@ -15,10 +15,10 @@ class InternationalHeraldTribune(BasicNewsRecipe):
    language = 'en'

    oldest_article = 1
-    max_articles_per_feed = 10
+    max_articles_per_feed = 30
    no_stylesheets = True

-    remove_tags    = [dict(name='div', attrs={'class':'footer'}),
+    remove_tags    = [dict(name='div', attrs={'class':['footer','header']}),
                      dict(name=['form'])]
    preprocess_regexps = [
            (re.compile(r'<!-- webtrends.*', re.DOTALL),
@ -26,6 +26,8 @@ class InternationalHeraldTribune(BasicNewsRecipe):
                          ]
    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'

+    remove_empty_feeds = True
+    
    feeds          = [
                      (u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
                      (u'Business', u'http://www.iht.com/rss/business.xml'),
@ -46,13 +48,15 @@ class InternationalHeraldTribune(BasicNewsRecipe):
                    ]
    temp_files = []
    articles_are_obfuscated = True
-
-    def get_obfuscated_article(self, url, logger):
+    
+    masthead_url = 'http://graphics8.nytimes.com/images/misc/iht-masthead-logo.gif'
+    
+    def get_obfuscated_article(self, url):
        br = self.get_browser()
        br.open(url)
-        br.select_form(name='printFriendly')
-        res = br.submit()
-        html = res.read()
+        response1 = br.follow_link(url_regex=re.compile(r'.*pagewanted=print.*'))
+        html = response1.read()
+        
        self.temp_files.append(PersistentTemporaryFile('_iht.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
--- a/recipes/smith.recipe
+++ b/recipes/smith.recipe
@ -7,6 +7,7 @@ class SmithsonianMagazine(BasicNewsRecipe):
    __author__     = 'Krittika Goyal'
    oldest_article = 31#days
    max_articles_per_feed = 50
+    use_embedded_content = False
    #encoding = 'latin1'
    recursions = 1
    match_regexps = ['&page=[2-9]$']
--- a/setup/publish.py
+++ b/setup/publish.py
@ -45,7 +45,6 @@ class Stage3(Command):
   sub_commands = ['upload_user_manual', 'upload_demo', 'sdist',
            'upload_to_sourceforge', 'upload_to_google_code',
            'tag_release', 'upload_to_server',
-            'upload_to_mobileread',
   ]

 class Stage4(Command):
--- a/setup/upload.py
+++ b/setup/upload.py
@ -356,7 +356,7 @@ class UploadUserManual(Command): # {{{
                                zf.write(os.path.join(x, y))
            bname = self.b(path) + '_plugin.zip'
            dest = '%s/%s'%(DOWNLOADS, bname)
-            subprocess.check_call(['scp', f.name, dest])
+            subprocess.check_call(['scp', f.name, 'divok:'+dest])

    def run(self, opts):
        path = self.j(self.SRC, 'calibre', 'manual', 'plugin_examples')
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.52'
+__version__   = '0.7.53'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re, importlib
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -231,6 +231,17 @@ class HTMLMetadataReader(MetadataReaderPlugin):
        from calibre.ebooks.metadata.html import get_metadata
        return get_metadata(stream)

+class HTMLZMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read HTMLZ metadata'
+    file_types  = set(['htmlz'])
+    description = _('Read metadata from %s files') % 'HTMLZ'
+    author      = 'John Schember'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.extz import get_metadata
+        return get_metadata(stream)
+
 class IMPMetadataReader(MetadataReaderPlugin):

    name        = 'Read IMP metadata'
@ -407,7 +418,7 @@ class TXTZMetadataReader(MetadataReaderPlugin):
    author      = 'John Schember'

    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.txtz import get_metadata
+        from calibre.ebooks.metadata.extz import get_metadata
        return get_metadata(stream)

 class ZipMetadataReader(MetadataReaderPlugin):
@ -433,6 +444,17 @@ class EPUBMetadataWriter(MetadataWriterPlugin):
        from calibre.ebooks.metadata.epub import set_metadata
        set_metadata(stream, mi, apply_null=self.apply_null)

+class HTMLZMetadataWriter(MetadataWriterPlugin):
+
+    name        = 'Set HTMLZ metadata'
+    file_types  = set(['htmlz'])
+    description = _('Set metadata from %s files') % 'HTMLZ'
+    author      = 'John Schember'
+
+    def set_metadata(self, stream, mi, type):
+        from calibre.ebooks.metadata.extz import set_metadata
+        set_metadata(stream, mi)
+
 class LRFMetadataWriter(MetadataWriterPlugin):

    name = 'Set LRF metadata'
@ -505,7 +527,7 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
    author      = 'John Schember'

    def set_metadata(self, stream, mi, type):
-        from calibre.ebooks.metadata.txtz import set_metadata
+        from calibre.ebooks.metadata.extz import set_metadata
        set_metadata(stream, mi)

 # }}}
@ -514,6 +536,7 @@ from calibre.ebooks.comic.input import ComicInput
 from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.fb2.input import FB2Input
 from calibre.ebooks.html.input import HTMLInput
+from calibre.ebooks.htmlz.input import HTMLZInput
 from calibre.ebooks.lit.input import LITInput
 from calibre.ebooks.mobi.input import MOBIInput
 from calibre.ebooks.odt.input import ODTInput
@ -544,6 +567,7 @@ from calibre.ebooks.tcr.output import TCROutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.txt.output import TXTZOutput
 from calibre.ebooks.html.output import HTMLOutput
+from calibre.ebooks.htmlz.output import HTMLZOutput
 from calibre.ebooks.snb.output import SNBOutput

 from calibre.customize.profiles import input_profiles, output_profiles
@ -599,6 +623,7 @@ plugins += [
    EPUBInput,
    FB2Input,
    HTMLInput,
+    HTMLZInput,
    LITInput,
    MOBIInput,
    ODTInput,
@ -630,6 +655,7 @@ plugins += [
    TXTOutput,
    TXTZOutput,
    HTMLOutput,
+    HTMLZOutput,
    SNBOutput,
 ]
 # Order here matters. The first matched device is the one used.
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -470,8 +470,8 @@ class KoboReaderOutput(OutputProfile):

    description = _('This profile is intended for the Kobo Reader.')

-    screen_size               = (540, 718)
-    comic_screen_size         = (540, 718)
+    screen_size               = (536, 710)
+    comic_screen_size         = (536, 710)
    dpi                       = 168.451
    fbase                     = 12
    fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -100,6 +100,12 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
        try:
            if encoding.lower().strip() == 'macintosh':
                encoding = 'mac-roman'
+            if encoding.lower().replace('_', '-').strip() in (
+                    'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
+                    'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
+                # Microsoft Word exports to HTML with encoding incorrectly set to
+                # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
+                encoding = 'gbk'
            raw = raw.decode(encoding, 'replace')
        except LookupError:
            encoding = 'utf-8'
@ -110,11 +116,6 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
    if resolve_entities:
        raw = substitute_entites(raw)

-    if encoding and encoding.lower().replace('_', '-').strip() in (
-            'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
-            'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
-        # Microsoft Word exports to HTML with encoding incorrectly set to
-        # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
-        encoding = 'gbk'
+

    return raw, encoding
--- a/src/calibre/ebooks/htmlz/init.py
+++ b/src/calibre/ebooks/htmlz/init.py
--- a/src/calibre/ebooks/htmlz/input.py
+++ b/src/calibre/ebooks/htmlz/input.py
@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre import walk
+from calibre.customize.conversion import InputFormatPlugin
+from calibre.utils.zipfile import ZipFile
+
+class HTMLZInput(InputFormatPlugin):
+
+    name        = 'HTLZ Input'
+    author      = 'John Schember'
+    description = 'Convert HTML files to HTML'
+    file_types  = set(['htmlz'])
+    
+    def convert(self, stream, options, file_ext, log,
+                accelerators):
+        self.log = log
+        html = u''
+
+        # Extract content from zip archive.
+        zf = ZipFile(stream)
+        zf.extractall('.')
+
+        for x in walk('.'):
+            if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
+                with open(x, 'rb') as tf:
+                    html = tf.read()
+                    break
+        
+        # Run the HTML through the html processing plugin.
+        from calibre.customize.ui import plugin_for_input_format
+        html_input = plugin_for_input_format('html')
+        for opt in html_input.options:
+            setattr(options, opt.option.name, opt.recommended_value)
+        options.input_encoding = 'utf-8'
+        base = os.getcwdu()
+        fname = os.path.join(base, 'index.html')
+        c = 0
+        while os.path.exists(fname):
+            c += 1
+            fname = 'index%d.html'%c
+        htmlfile = open(fname, 'wb')
+        with htmlfile:
+            htmlfile.write(html.encode('utf-8'))
+        odi = options.debug_pipeline
+        options.debug_pipeline = None
+        # Generate oeb from html conversion.
+        oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
+                {})
+        options.debug_pipeline = odi
+        os.remove(htmlfile.name)
+
+        # Set metadata from file.
+        from calibre.customize.ui import get_file_type_metadata
+        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
+        mi = get_file_type_metadata(stream, file_ext)
+        meta_info_to_oeb_metadata(mi, oeb.metadata, log)
+
+        return oeb
--- a/src/calibre/ebooks/htmlz/oeb2html.py
+++ b/src/calibre/ebooks/htmlz/oeb2html.py
@ -0,0 +1,372 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Transform OEB content into a single (more or less) HTML file.
+'''
+
+import os
+
+from urlparse import urlparse
+
+from calibre import prepare_string_for_xml
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+from calibre.ebooks.oeb.stylizer import Stylizer
+from calibre.utils.logging import default_log
+
+class OEB2HTML(object):
+    '''
+    Base class. All subclasses should implement dump_text to actually transform
+    content. Also, callers should use oeb2html to get the transformed html.
+    links and images can be retrieved after calling oeb2html to get the mapping
+    of OEB links and images to the new names used in the html returned by oeb2html.
+    Images will always be referenced as if they are in an images directory.
+
+    Use get_css to get the CSS classes for the OEB document as a string.
+    '''
+
+    def __init__(self, log=None):
+        self.log = default_log if log is None else log
+        self.links = {}
+        self.images = {}
+
+    def oeb2html(self, oeb_book, opts):
+        self.log.info('Converting OEB book to HTML...')
+        self.opts = opts
+        self.links = {}
+        self.images = {}
+
+        return self.mlize_spine(oeb_book)
+
+    def mlize_spine(self, oeb_book):
+        output = [u'<html><body><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /></head>']
+        for item in oeb_book.spine:
+            self.log.debug('Converting %s to HTML...' % item.href)
+            stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
+            output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
+            output.append('\n\n')
+        output.append('</body></html>')
+        return ''.join(output)
+
+    def dump_text(self, elem, stylizer, page):
+        raise NotImplementedError
+
+    def get_link_id(self, href, aid):
+        aid = '%s#%s' % (href, aid)
+        if aid not in self.links:
+            self.links[aid] = 'calibre_link-%s' % len(self.links.keys())
+        return self.links[aid]
+
+    def rewrite_links(self, tag, attribs, page):
+        # Rewrite ids.
+        if 'id' in attribs:
+            attribs['id'] = self.get_link_id(page.href, attribs['id'])
+        # Rewrite links.
+        if tag == 'a':
+            href = attribs['href']
+            href = page.abshref(href)
+            if self.url_is_relative(href):
+                if '#' not in href:
+                    href += '#'
+                if href not in self.links:
+                    self.links[href] = 'calibre_link-%s' % len(self.links.keys())
+                href = '#%s' % self.links[href]
+            attribs['href'] = href
+        return attribs
+
+    def rewrite_images(self, tag, attribs, page):
+        if tag == 'img':
+            src = attribs.get('src', None)
+            if src:
+                src = page.abshref(src)
+                if src not in self.images:
+                    ext = os.path.splitext(src)[1]
+                    fname = '%s%s' % (len(self.images), ext)
+                    fname = fname.zfill(10)
+                    self.images[src] = fname
+                attribs['src'] = 'images/%s' % self.images[src]
+        return attribs
+
+    def url_is_relative(self, url):
+        o = urlparse(url)
+        return False if o.scheme else True
+
+    def get_css(self, oeb_book):
+        css = u''
+        for item in oeb_book.manifest:
+            if item.media_type == 'text/css':
+                css = item.data.cssText
+                break
+        return css
+
+
+class OEB2HTMLNoCSSizer(OEB2HTML):
+    '''
+    This will remap a small number of CSS styles to equivalent HTML tags.
+    '''
+
+    def dump_text(self, elem, stylizer, page):
+        '''
+        @elem: The element in the etree that we are working on.
+        @stylizer: The style information attached to the element.
+        '''
+
+        # We can only processes tags. If there isn't a tag return any text.
+        if not isinstance(elem.tag, basestring) \
+           or namespace(elem.tag) != XHTML_NS:
+            p = elem.getparent()
+            if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
+                    and elem.tail:
+                return [elem.tail]
+            return ['']
+
+        # Setup our variables.
+        text = ['']
+        style = stylizer.style(elem)
+        tags = []
+        tag = barename(elem.tag)
+        attribs = elem.attrib
+        if tag == 'body':
+            tag = 'div'
+            attribs['id'] = self.get_link_id(page.href, '')
+        tags.append(tag)
+
+        # Ignore anything that is set to not be displayed.
+        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
+           or style['visibility'] == 'hidden':
+            return ['']
+
+        # Remove attributes we won't want.
+        if 'class' in attribs:
+            del attribs['class']
+        if 'style' in attribs:
+            del attribs['style']
+
+        attribs = self.rewrite_links(tag, attribs, page)
+        attribs = self.rewrite_images(tag, attribs, page)
+
+        # Turn the rest of the attributes into a string we can write with the tag.
+        at = ''
+        for k, v in attribs.items():
+            at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
+
+        # Write the tag.
+        text.append('<%s%s>' % (tag, at))
+
+        # Turn styles into tags.
+        if style['font-weight'] in ('bold', 'bolder'):
+            text.append('<b>')
+            tags.append('b')
+        if style['font-style'] == 'italic':
+            text.append('<i>')
+            tags.append('i')
+        if style['text-decoration'] == 'underline':
+            text.append('<u>')
+            tags.append('u')
+        if style['text-decoration'] == 'line-through':
+            text.append('<s>')
+            tags.append('s')
+
+        # Process tags that contain text.
+        if hasattr(elem, 'text') and elem.text:
+            text.append(elem.text)
+
+        # Recurse down into tags within the tag we are in.
+        for item in elem:
+            text += self.dump_text(item, stylizer, page)
+
+        # Close all open tags.
+        tags.reverse()
+        for t in tags:
+            text.append('</%s>' % t)
+
+        # Add the text that is outside of the tag.
+        if hasattr(elem, 'tail') and elem.tail:
+            text.append(elem.tail)
+
+        return text
+
+
+class OEB2HTMLInlineCSSizer(OEB2HTML):
+    '''
+    Turns external CSS classes into inline style attributes.
+    '''
+
+    def dump_text(self, elem, stylizer, page):
+        '''
+        @elem: The element in the etree that we are working on.
+        @stylizer: The style information attached to the element.
+        '''
+
+        # We can only processes tags. If there isn't a tag return any text.
+        if not isinstance(elem.tag, basestring) \
+           or namespace(elem.tag) != XHTML_NS:
+            p = elem.getparent()
+            if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
+                    and elem.tail:
+                return [elem.tail]
+            return ['']
+
+        # Setup our variables.
+        text = ['']
+        style = stylizer.style(elem)
+        tags = []
+        tag = barename(elem.tag)
+        attribs = elem.attrib
+
+        style_a = '%s' % style
+        if tag == 'body':
+            tag = 'div'
+            attribs['id'] = self.get_link_id(page.href, '')
+            if not style['page-break-before'] == 'always':
+                style_a = 'page-break-before: always;' + ' ' if style_a else '' + style_a
+        tags.append(tag)
+
+        # Remove attributes we won't want.
+        if 'class' in attribs:
+            del attribs['class']
+        if 'style' in attribs:
+            del attribs['style']
+
+        attribs = self.rewrite_links(tag, attribs, page)
+        attribs = self.rewrite_images(tag, attribs, page)
+
+        # Turn the rest of the attributes into a string we can write with the tag.
+        at = ''
+        for k, v in attribs.items():
+            at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
+
+        # Turn style into strings for putting in the tag.
+        style_t = ''
+        if style_a:
+            style_t = ' style="%s"' % style_a
+
+        # Write the tag.
+        text.append('<%s%s%s>' % (tag, at, style_t))
+
+        # Process tags that contain text.
+        if hasattr(elem, 'text') and elem.text:
+            text.append(elem.text)
+
+        # Recurse down into tags within the tag we are in.
+        for item in elem:
+            text += self.dump_text(item, stylizer, page)
+
+        # Close all open tags.
+        tags.reverse()
+        for t in tags:
+            text.append('</%s>' % t)
+
+        # Add the text that is outside of the tag.
+        if hasattr(elem, 'tail') and elem.tail:
+            text.append(elem.tail)
+
+        return text
+
+
+class OEB2HTMLClassCSSizer(OEB2HTML):
+    '''
+    Use CSS classes. css_style option can specify whether to use
+    inline classes (style tag in the head) or reference an external
+    CSS file called style.css.
+    '''
+
+    def mlize_spine(self, oeb_book):
+        output = []
+        for item in oeb_book.spine:
+            self.log.debug('Converting %s to HTML...' % item.href)
+            stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
+            output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
+            output.append('\n\n')
+        if self.opts.htmlz_class_style == 'external':
+            css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
+        else:
+            css =  u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
+        output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + [css] + [u'</head><body>'] + output + [u'</body></html>']
+        return ''.join(output)
+
+    def dump_text(self, elem, stylizer, page):
+        '''
+        @elem: The element in the etree that we are working on.
+        @stylizer: The style information attached to the element.
+        '''
+
+        # We can only processes tags. If there isn't a tag return any text.
+        if not isinstance(elem.tag, basestring) \
+           or namespace(elem.tag) != XHTML_NS:
+            p = elem.getparent()
+            if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
+                    and elem.tail:
+                return [elem.tail]
+            return ['']
+
+        # Setup our variables.
+        text = ['']
+        #style = stylizer.style(elem)
+        tags = []
+        tag = barename(elem.tag)
+        attribs = elem.attrib
+
+        if tag == 'body':
+            tag = 'div'
+            attribs['id'] = self.get_link_id(page.href, '')
+        tags.append(tag)
+
+        # Remove attributes we won't want.
+        if 'style' in attribs:
+            del attribs['style']
+
+        attribs = self.rewrite_links(tag, attribs, page)
+        attribs = self.rewrite_images(tag, attribs, page)
+
+        # Turn the rest of the attributes into a string we can write with the tag.
+        at = ''
+        for k, v in attribs.items():
+            at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
+
+        # Write the tag.
+        text.append('<%s%s>' % (tag, at))
+
+        # Process tags that contain text.
+        if hasattr(elem, 'text') and elem.text:
+            text.append(elem.text)
+
+        # Recurse down into tags within the tag we are in.
+        for item in elem:
+            text += self.dump_text(item, stylizer, page)
+
+        # Close all open tags.
+        tags.reverse()
+        for t in tags:
+            text.append('</%s>' % t)
+
+        # Add the text that is outside of the tag.
+        if hasattr(elem, 'tail') and elem.tail:
+            text.append(elem.tail)
+
+        return text
+
+
+def oeb2html_no_css(oeb_book, log, opts):
+    izer = OEB2HTMLNoCSSizer(log)
+    html = izer.oeb2html(oeb_book, opts)
+    images = izer.images
+    return (html, images)
+
+def oeb2html_inline_css(oeb_book, log, opts):
+    izer = OEB2HTMLInlineCSSizer(log)
+    html = izer.oeb2html(oeb_book, opts)
+    images = izer.images
+    return (html, images)
+
+def oeb2html_class_css(oeb_book, log, opts):
+    izer = OEB2HTMLClassCSSizer(log)
+    setattr(opts, 'class_style', 'inline')
+    html = izer.oeb2html(oeb_book, opts)
+    images = izer.images
+    return (html, images)
--- a/src/calibre/ebooks/htmlz/output.py
+++ b/src/calibre/ebooks/htmlz/output.py
@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from lxml import etree
+
+from calibre.customize.conversion import OutputFormatPlugin, \
+    OptionRecommendation
+from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.ptempfile import TemporaryDirectory
+from calibre.utils.zipfile import ZipFile
+
+class HTMLZOutput(OutputFormatPlugin):
+
+    name = 'HTMLZ Output'
+    author = 'John Schember'
+    file_type = 'htmlz'
+
+    options = set([
+        OptionRecommendation(name='htmlz_css_type', recommended_value='class',
+            level=OptionRecommendation.LOW,
+            choices=['class', 'inline', 'tag'],
+            help=_('Specify the handling of CSS. Default is class.\n'
+                   'class: Use CSS classes and have elements reference them.\n'
+                   'inline: Write the CSS as an inline style attribute.\n'
+                   'tag: Turn as many CSS styles as possible into HTML tags.'
+            )),
+        OptionRecommendation(name='htmlz_class_style', recommended_value='external',
+            level=OptionRecommendation.LOW,
+            choices=['external', 'inline'],
+            help=_('How to handle the CSS when using css-type = \'class\'.\n'
+                   'Default is external.\n'
+                   'external: Use an external CSS file that is linked in the document.\n'
+                   'inline: Place the CSS in the head section of the document.'
+            )),
+    ])
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+        # HTML
+        if opts.htmlz_css_type == 'inline':
+            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
+            OEB2HTMLizer = OEB2HTMLInlineCSSizer
+        elif opts.htmlz_css_type == 'tag':
+            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
+            OEB2HTMLizer = OEB2HTMLNoCSSizer
+        else:
+            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer
+
+        with TemporaryDirectory('_htmlz_output') as tdir:
+            htmlizer = OEB2HTMLizer(log)
+            html = htmlizer.oeb2html(oeb_book, opts)
+
+            with open(os.path.join(tdir, 'index.html'), 'wb') as tf:
+                tf.write(html)
+
+            # CSS
+            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
+                with open(os.path.join(tdir, 'style.css'), 'wb') as tf:
+                    tf.write(htmlizer.get_css(oeb_book))
+
+            # Images
+            images = htmlizer.images
+            if images:
+                if not os.path.exists(os.path.join(tdir, 'images')):
+                    os.makedirs(os.path.join(tdir, 'images'))
+                for item in oeb_book.manifest:
+                    if item.media_type in OEB_IMAGES and item.href in images:
+                        fname = os.path.join(tdir, 'images', images[item.href])
+                        with open(fname, 'wb') as img:
+                            img.write(item.data)
+
+            # Metadata
+            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
+                mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
+
+            htmlz = ZipFile(output_path, 'w')
+            htmlz.add_dir(tdir)
--- a/src/calibre/ebooks/metadata/extz.py
+++ b/src/calibre/ebooks/metadata/extz.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'

 '''
-Read meta information from TXT files
+Read meta information from extZ (TXTZ, HTMLZ...) files.
 '''

 import os
--- a/src/calibre/ebooks/oeb/transforms/page_margin.py
+++ b/src/calibre/ebooks/oeb/transforms/page_margin.py
@ -20,7 +20,8 @@ class RemoveAdobeMargins(object):
        self.oeb, self.opts, self.log = oeb, opts, log

        for item in self.oeb.manifest:
-            if item.media_type == 'application/vnd.adobe-page-template+xml':
+            if item.media_type in ('application/vnd.adobe-page-template+xml',
+                    'application/vnd.adobe.page-template+xml'):
                self.log('Removing page margins specified in the'
                        ' Adobe page template')
                for elem in item.data.xpath(
@ -35,7 +36,7 @@ class RemoveFakeMargins(object):

    '''
    Remove left and right margins from paragraph/divs if the same margin is specified
-    on almost all the elements of at that level.
+    on almost all the elements at that level.

    Must be called only after CSS flattening
    '''
--- a/src/calibre/ebooks/pdf/reflow.cpp
+++ b/src/calibre/ebooks/pdf/reflow.cpp
@ -721,7 +721,7 @@ Reflow::render(int first_page, int last_page) {
    globalParams->setTextEncoding(encoding);

    int doc_pages = doc->getNumPages();
-    if (last_page < 1 or last_page > doc_pages) last_page = doc_pages;
+    if (last_page < 1 || last_page > doc_pages) last_page = doc_pages;
    if (first_page < 1) first_page = 1;
    if (first_page > last_page) first_page = last_page;

--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -270,6 +270,8 @@ class BookInfo(QWebView):
            <style type="text/css">
                body, td {background-color: transparent; font-size: %dpx; color: %s }
                a { text-decoration: none; color: blue }
+                div.description { margin-top: 0; padding-top: 0; text-indent: 0 }
+                table { margin-bottom: 0; padding-bottom: 0; }
            </style>
            </head>
            <body>
@ -278,9 +280,10 @@ class BookInfo(QWebView):
        <html>
        '''%(f, c)
        if self.vertical:
+            extra = ''
            if comments:
-                rows += u'<tr><td colspan="2">%s</td></tr>'%comments
-            self.setHtml(templ%(u'<table>%s</table>'%rows))
+                extra = u'<div class="description">%s</div>'%comments
+            self.setHtml(templ%(u'<table>%s</table>%s'%(rows, extra)))
        else:
            left_pane = u'<table>%s</table>'%rows
            right_pane = u'<div>%s</div>'%comments
--- a/src/calibre/gui2/convert/htmlz_output.py
+++ b/src/calibre/gui2/convert/htmlz_output.py
@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.gui2.convert.htmlz_output_ui import Ui_Form
+from calibre.gui2.convert import Widget
+
+format_model = None
+
+class PluginWidget(Widget, Ui_Form):
+
+    TITLE = _('HTMLZ Output')
+    HELP = _('Options specific to')+' HTMLZ '+_('output')
+    COMMIT_NAME = 'htmlz_output'
+    ICON = I('mimetypes/html.png')
+
+    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
+        Widget.__init__(self, parent, ['htmlz_css_type', 'htmlz_class_style'])
+        self.db, self.book_id = db, book_id
+        for x in get_option('htmlz_css_type').option.choices:
+            self.opt_htmlz_css_type.addItem(x)
+        for x in get_option('htmlz_class_style').option.choices:
+            self.opt_htmlz_class_style.addItem(x)
+        self.initialize_options(get_option, get_help, db, book_id)
--- a/src/calibre/gui2/convert/htmlz_output.ui
+++ b/src/calibre/gui2/convert/htmlz_output.ui
@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Form</class>
+ <widget class="QWidget" name="Form">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>438</width>
+    <height>300</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>Form</string>
+  </property>
+  <layout class="QGridLayout" name="gridLayout">
+   <item row="2" column="0">
+    <spacer name="verticalSpacer">
+     <property name="orientation">
+      <enum>Qt::Vertical</enum>
+     </property>
+     <property name="sizeHint" stdset="0">
+      <size>
+       <width>20</width>
+       <height>246</height>
+      </size>
+     </property>
+    </spacer>
+   </item>
+   <item row="0" column="0">
+    <widget class="QLabel" name="label">
+     <property name="text">
+      <string>How to handle CSS</string>
+     </property>
+     <property name="buddy">
+      <cstring>opt_htmlz_css_type</cstring>
+     </property>
+    </widget>
+   </item>
+   <item row="0" column="1">
+    <widget class="QComboBox" name="opt_htmlz_css_type">
+     <property name="minimumContentsLength">
+      <number>20</number>
+     </property>
+    </widget>
+   </item>
+   <item row="1" column="0">
+    <widget class="QLabel" name="label_2">
+     <property name="text">
+      <string>How to handle class based CSS</string>
+     </property>
+    </widget>
+   </item>
+   <item row="1" column="1">
+    <widget class="QComboBox" name="opt_htmlz_class_style"/>
+   </item>
+  </layout>
+ </widget>
+ <resources/>
+ <connections/>
+</ui>
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -136,17 +136,17 @@ class PostInstall:
        self.icon_resources = []
        self.menu_resources = []
        self.mime_resources = []
-        if islinux:
+        if islinux or isfreebsd:
            self.setup_completion()
        self.install_man_pages()
-        if islinux:
+        if islinux or isfreebsd:
            self.setup_desktop_integration()
        self.create_uninstaller()

        from calibre.utils.config import config_dir
        if os.path.exists(config_dir):
            os.chdir(config_dir)
-            if islinux:
+            if islinux or isfreebsd:
                for f in os.listdir('.'):
                    if os.stat(f).st_uid == 0:
                        os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
--- a/src/calibre/manual/index.rst
+++ b/src/calibre/manual/index.rst
@ -40,3 +40,84 @@ Sections
   glossary


+The main |app| user interface
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. toctree::
+   :maxdepth: 2
+
+   gui
+
+Adding your favorite news website to |app|
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. toctree::
+   :maxdepth: 2
+
+   news
+
+The |app| e-book viewer
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. toctree::
+   :maxdepth: 2
+
+   viewer
+
+Customizing |app|'s e-book conversion
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. toctree::
+   :maxdepth: 2
+
+   viewer
+
+Editing e-book metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. toctree::
+   :maxdepth: 2
+
+   viewer
+
+Frequently Asked Questions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. toctree::
+   :maxdepth: 2
+
+   faq
+
+Tutorials
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. toctree::
+   :maxdepth: 2
+
+   tutorials
+
+Customizing |app|
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. toctree::
+   :maxdepth: 2
+
+   customize
+
+The Command Line Interface
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. toctree::
+   :maxdepth: 2
+
+   cli/cli-index
+
+Setting up a |app| development environment
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. toctree::
+   :maxdepth: 2
+
+   develop
+
+
--- a/src/calibre/manual/news.rst
+++ b/src/calibre/manual/news.rst
@ -263,20 +263,18 @@ Tips for developing new recipes

 The best way to develop new recipes is to use the command line interface. Create the recipe using your favorite python editor and save it to a file say :file:`myrecipe.recipe`. The `.recipe` extension is required. You can download content using this recipe with the command::

-    ebook-convert myrecipe.recipe output_dir --test -vv
+    ebook-convert myrecipe.recipe .epub --test -vv --debug-pipeline debug

-The :command:`ebook-convert` will download all the webpages and save them to the directory :file:`output_dir`, creating it if necessary. The :option:`-vv` makes ebook-convert spit out a lot of information about what it is doing. The :option:`--test` makes it download only a couple of articles from at most two feeds. 
+The command :command:`ebook-convert` will download all the webpages and save them to the EPUB file :file:`myrecipe.epub`. The :option:`-vv` makes ebook-convert spit out a lot of information about what it is doing. The :option:`--test` makes it download only a couple of articles from at most two feeds. In addition, ebook-convert will put the downloaded HTML into the ``debug/input`` directory, where ``debug`` is the directory you specified in the :option:`--debug-pipeline` option. 

-Once the download is complete, you can look at the downloaded :term:`HTML` by opening the file :file:`index.html` in a browser. Once you're satisfied that the download and preprocessing is happening correctly, you can generate ebooks in different formats as shown below::
+Once the download is complete, you can look at the downloaded :term:`HTML` by opening the file :file:`debug/input/index.html` in a browser. Once you're satisfied that the download and preprocessing is happening correctly, you can generate ebooks in different formats as shown below::

    ebook-convert myrecipe.recipe myrecipe.epub
    ebook-convert myrecipe.recipe myrecipe.mobi
    ...


-If you're satisfied with your recipe, and you feel there is enough demand to justify its inclusion into the set of built-in recipes, add a comment to the ticket http://bugs.calibre-ebook.com/ticket/405
-
-Alternatively, you could just post your recipe in the calibre forum at http://www.mobileread.com/forums/forumdisplay.php?f=166 to share it with other calibre users.
+If you're satisfied with your recipe, and you feel there is enough demand to justify its inclusion into the set of built-in recipes, post your recipe in the `calibre recipes forum <http://www.mobileread.com/forums/forumdisplay.php?f=228>`_ to share it with other calibre users.


 .. seealso::
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/odf/odf2xhtml.py
+++ b/src/odf/odf2xhtml.py
@ -1386,12 +1386,19 @@ ol, ul { padding-left: 2em; }
        self.purgedata()

    def s_text_s(self, tag, attrs):
-        """ Generate a number of spaces. ODF has an element; HTML uses &nbsp;
-            We use &#160; so we can send the output through an XML parser if we desire to
+        # Changed by Kovid to fix non breaking spaces being prepended to
+        # element instead of being part of the text flow.
+        # We don't use an entity for the nbsp as the contents of self.data will
+        # be escaped on writeout.
+        """ Generate a number of spaces. We use the non breaking space for
+        the text:s ODF element.
        """
-        c = attrs.get( (TEXTNS,'c'),"1")
-        for x in xrange(int(c)):
-            self.writeout('&#160;')
+        try:
+            c = int(attrs.get((TEXTNS, 'c'), 1))
+        except:
+            c = 0
+        if c > 0:
+            self.data.append(u'\u00a0'*c)

    def s_text_span(self, tag, attrs):
        """ The <text:span> element matches the <span> element in HTML. It is