Merge from trunk

2025-08-11 09:13:57 -04:00 · 2013-05-10 12:41:07 +02:00 · 2013-05-10 12:41:07 +02:00 · 145858572b
commit 145858572b
parent 83ae3fe959 8d5ffbbe0a
100 changed files with 41313 additions and 35333 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -20,6 +20,60 @@
 #   new recipes:
 #     - title: 

+- version: 0.9.30
+  date: 2013-05-10
+
+  new features:
+    - title: "Kobo driver: Add support for showing 'Archived' books on the device. Also up the supported firmware version to 2.5.3."
+      tickets: [1177677]
+
+    - title: "Driver for Blackberry 9790"
+      tickets: [1176607]
+
+    - title: "Add a tweak to turn off the highlighting of the book count when using a virtual library (Preferences->Tweaks)"
+
+    - title: "Add a button to clear the viewer search history in the viewer Preferences, under Miscellaneous"
+
+    - title: "Add keyboard shortcuts to clear the virtual Library and the additional restriction (Ctrl+Esc and Alt+Esc). Also use Shift+Esc to bring keyboard focus back tot he book list. Can be changed under Preferences->Keyboard"
+
+    - title: "Docx metadata: Read the language of the file, if present"
+ 
+  bug fixes:
+    - title: "Kobo driver: Fix unable to read SD card on OS X/Linux"
+      tickets: [1174815]
+
+    - title: "Content server: Fix unable to download ORIGINAL_* formats"
+      tickets: [1177158]
+
+    - title: "Fix regression that broke searching for terms containing a quote mark"
+      tickets: [1177114]
+
+    - title: "Fix regression that broke conversion of txt files when no input encoding is specified"
+      tickets: [1176622]
+
+    - title: "When changing to a virtual library, refresh the Book Details panel."
+      tickets: [1176296]
+
+    - title: "Fix regression that caused searching for user categories to break."
+      tickets: [1176187]
+
+    - title: "Fix error when downloading only covers and reviewing downloaded metadata."
+      tickets: [1176253]
+
+    - title: "MOBI metadata: Strip XML unsafe unicode codepoints when reading metadata from MOBI files."
+      tickets: [1175965]
+
+    - title: "Txt Input: Use the gbk encoding for txt files with detected encoding of gb2312."
+      tickets: [1175974]
+
+    - title: "When pressing Ctrl+Home/End preserve the horizontal scroll position in the book list"
+
+  improved recipes:
+    - NSFW
+    - Go Comics
+    - Various Polish news sources
+    - The Sun
+
 - version: 0.9.29
  date: 2013-05-03

--- a/recipes/nsfw_corp.recipe
+++ b/recipes/nsfw_corp.recipe
@ -1,11 +1,9 @@
-
 __license__   = 'GPL v3'
-__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2012-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.nsfwcorp.com
 '''

-import urllib
 from calibre.web.feeds.news import BasicNewsRecipe

 class NotSafeForWork(BasicNewsRecipe):
@ -20,8 +18,8 @@ class NotSafeForWork(BasicNewsRecipe):
    needs_subscription     = True
    auto_cleanup           = False
    INDEX                  = 'https://www.nsfwcorp.com'
-    LOGIN                  = INDEX + '/login/target/'
-    SETTINGS               = INDEX + '/settings/'
+    LOGIN                  = INDEX + '/account/login/?next=%2F'
+    SETTINGS               = INDEX + '/account/settings/'
    use_embedded_content   = True
    language               = 'en'
    publication_type       = 'magazine'
@ -48,19 +46,20 @@ class NotSafeForWork(BasicNewsRecipe):

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
-        br.open(self.LOGIN)
+        br.open(self.INDEX)
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({ 'email':self.username
-                                     ,'password':self.password
-                                   })
-            br.open(self.LOGIN, data)
+            br.open(self.LOGIN)
+            br.select_form(nr=0)
+            br['email'   ] = self.username
+            br['password'] = self.password
+            br.submit()
        return br

    def get_feeds(self):
        self.feeds = []
        soup = self.index_to_soup(self.SETTINGS)
        for item in soup.findAll('input', attrs={'type':'text'}):
-            if item.has_key('value') and item['value'].startswith('http://www.nsfwcorp.com/feed/'):
+            if item.has_key('value') and item['value'].startswith('https://www.nsfwcorp.com/feed/'):
               self.feeds.append(item['value'])
               return self.feeds
        return self.feeds
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -116,7 +116,9 @@ tarball. Edit setup.py and set zip_safe=False. Then run::

 Run the following command to install python dependencies::

-    easy_install --always-unzip -U mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto cssselect
+    easy_install --always-unzip -U mechanize python-dateutil dnspython cssutils clientform pycrypto cssselect
+
+Install pyreadline from https://pypi.python.org/pypi/pyreadline/2.0

 Install pywin32 and edit win32com\__init__.py setting _frozen = True and
 __gen_path__ to a temp dir (otherwise it tries to set it to a dir in the
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@ -12,14 +12,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-04-21 08:00+0000\n"
+"PO-Revision-Date: 2013-05-06 09:36+0000\n"
 "Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
 "Language-Team: Catalan <linux@softcatala.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-04-22 05:23+0000\n"
-"X-Generator: Launchpad (build 16567)\n"
+"X-Launchpad-Export-Date: 2013-05-07 05:28+0000\n"
+"X-Generator: Launchpad (build 16598)\n"
 "Language: ca\n"

 #. name for aaa
--- a/setup/iso_639/de.po
+++ b/setup/iso_639/de.po
@ -18,14 +18,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-04-11 13:29+0000\n"
+"PO-Revision-Date: 2013-05-06 09:41+0000\n"
 "Last-Translator: Simon Schütte <simonschuette@arcor.de>\n"
 "Language-Team: Ubuntu German Translators\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-04-12 05:20+0000\n"
-"X-Generator: Launchpad (build 16564)\n"
+"X-Launchpad-Export-Date: 2013-05-07 05:29+0000\n"
+"X-Generator: Launchpad (build 16598)\n"
 "Language: de\n"

 #. name for aaa
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 9, 29)
+numeric_version = (0, 9, 30)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/ebooks/docx/block_styles.py
+++ b/src/calibre/ebooks/docx/block_styles.py
@ -0,0 +1,267 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from collections import OrderedDict
+from calibre.ebooks.docx.names import XPath, get
+
+class Inherit:
+    pass
+inherit = Inherit()
+
+def binary_property(parent, name):
+    vals = XPath('./w:%s' % name)(parent)
+    if not vals:
+        return inherit
+    val = get(vals[0], 'w:val', 'on')
+    return True if val in {'on', '1', 'true'} else False
+
+def simple_color(col, auto='black'):
+    if not col or col == 'auto' or len(col) != 6:
+        return auto
+    return '#'+col
+
+def simple_float(val, mult=1.0):
+    try:
+        return float(val) * mult
+    except (ValueError, TypeError, AttributeError, KeyError):
+        return None
+
+
+LINE_STYLES = {  # {{{
+    'basicBlackDashes': 'dashed',
+    'basicBlackDots': 'dotted',
+    'basicBlackSquares': 'dashed',
+    'basicThinLines': 'solid',
+    'dashDotStroked': 'groove',
+    'dashed': 'dashed',
+    'dashSmallGap': 'dashed',
+    'dotDash': 'dashed',
+    'dotDotDash': 'dashed',
+    'dotted': 'dotted',
+    'double': 'double',
+    'inset': 'inset',
+    'nil': 'none',
+    'none': 'none',
+    'outset': 'outset',
+    'single': 'solid',
+    'thick': 'solid',
+    'thickThinLargeGap': 'double',
+    'thickThinMediumGap': 'double',
+    'thickThinSmallGap' : 'double',
+    'thinThickLargeGap': 'double',
+    'thinThickMediumGap': 'double',
+    'thinThickSmallGap': 'double',
+    'thinThickThinLargeGap': 'double',
+    'thinThickThinMediumGap': 'double',
+    'thinThickThinSmallGap': 'double',
+    'threeDEmboss': 'ridge',
+    'threeDEngrave': 'groove',
+    'triple': 'double',
+}  # }}}
+
+# Read from XML {{{
+def read_border(parent, dest):
+    tvals = {'padding_%s':inherit, 'border_%s_width':inherit,
+            'border_%s_style':inherit, 'border_%s_color':inherit}
+    vals = {}
+    for edge in ('left', 'top', 'right', 'bottom'):
+        vals.update({k % edge:v for k, v in tvals.iteritems()})
+
+    for border in XPath('./w:pBdr')(parent):
+        for edge in ('left', 'top', 'right', 'bottom'):
+            for elem in XPath('./w:%s' % edge):
+                color = get(elem, 'w:color')
+                if color is not None:
+                    vals['border_%s_color' % edge] = simple_color(color)
+                style = get(elem, 'w:val')
+                if style is not None:
+                    vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid')
+                space = get(elem, 'w:space')
+                if space is not None:
+                    try:
+                        vals['padding_%s' % edge] = float(space)
+                    except (ValueError, TypeError):
+                        pass
+                sz = get(elem, 'w:sz')
+                if sz is not None:
+                    # we dont care about art borders (they are only used for page borders)
+                    try:
+                        vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8
+                    except (ValueError, TypeError):
+                        pass
+
+    for key, val in vals.iteritems():
+        setattr(dest, key, val)
+
+def read_indent(parent, dest):
+    padding_left = padding_right = text_indent = inherit
+    for indent in XPath('./w:ind')(parent):
+        l, lc = get(indent, 'w:left'), get(indent, 'w:leftChars')
+        pl = simple_float(lc, 0.01) if lc is not None else simple_float(l, 0.05) if l is not None else None
+        if pl is not None:
+            padding_left = '%.3g%s' % (pl, 'em' if lc is not None else 'pt')
+
+        r, rc = get(indent, 'w:right'), get(indent, 'w:rightChars')
+        pr = simple_float(rc, 0.01) if rc is not None else simple_float(r, 0.05) if r is not None else None
+        if pr is not None:
+            padding_right = '%.3g%s' % (pr, 'em' if rc is not None else 'pt')
+
+        h, hc = get(indent, 'w:hanging'), get(indent, 'w:hangingChars')
+        fl, flc = get(indent, 'w:firstLine'), get(indent, 'w:firstLineChars')
+        h = h if h is None else '-'+h
+        hc = hc if hc is None else '-'+hc
+        ti = (simple_float(hc, 0.01) if hc is not None else simple_float(h, 0.05) if h is not None else
+              simple_float(flc, 0.01) if flc is not None else simple_float(fl, 0.05) if fl is not None else None)
+        if ti is not None:
+            text_indent = '%.3g%s' % (ti, 'em' if hc is not None or (h is None and flc is not None) else 'pt')
+
+    setattr(dest, 'margin_left', padding_left)
+    setattr(dest, 'margin_right', padding_right)
+    setattr(dest, 'text_indent', text_indent)
+
+def read_justification(parent, dest):
+    ans = inherit
+    for jc in XPath('./w:jc[@w:val]')(parent):
+        val = get(jc, 'w:val')
+        if not val:
+            continue
+        if val in {'both', 'distribute'} or 'thai' in val or 'kashida' in val:
+            ans = 'justify'
+        if val in {'left', 'center', 'right',}:
+            ans = val
+    setattr(dest, 'text_align', ans)
+
+def read_spacing(parent, dest):
+    padding_top = padding_bottom = line_height = inherit
+    for s in XPath('./w:spacing')(parent):
+        a, al, aa = get(s, 'w:after'), get(s, 'w:afterLines'), get(s, 'w:afterAutospacing')
+        pb = None if aa in {'on', '1', 'true'} else simple_float(al, 0.02) if al is not None else simple_float(a, 0.05) if a is not None else None
+        if pb is not None:
+            padding_bottom = '%.3g%s' % (pb, 'ex' if al is not None else 'pt')
+
+        b, bl, bb = get(s, 'w:before'), get(s, 'w:beforeLines'), get(s, 'w:beforeAutospacing')
+        pt = None if bb in {'on', '1', 'true'} else simple_float(bl, 0.02) if bl is not None else simple_float(b, 0.05) if b is not None else None
+        if pt is not None:
+            padding_top = '%.3g%s' % (pt, 'ex' if bl is not None else 'pt')
+
+        l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
+        if l is not None:
+            lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
+            line_height = '%.3g%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
+
+    setattr(dest, 'margin_top', padding_top)
+    setattr(dest, 'margin_bottom', padding_bottom)
+    setattr(dest, 'line_height', line_height)
+
+def read_direction(parent, dest):
+    ans = inherit
+    for jc in XPath('./w:textFlow[@w:val]')(parent):
+        val = get(jc, 'w:val')
+        if not val:
+            continue
+        if 'rl' in val.lower():
+            ans = 'rtl'
+    setattr(dest, 'direction', ans)
+
+def read_shd(parent, dest):
+    ans = inherit
+    for shd in XPath('./w:shd[@w:fill]')(parent):
+        val = get(shd, 'w:fill')
+        if val:
+            ans = simple_color(val, auto='transparent')
+    setattr(dest, 'background_color', ans)
+# }}}
+
+class ParagraphStyle(object):
+
+    all_properties = (
+        'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
+        'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
+        'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
+        'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
+
+        # Border margins padding
+        'border_left_width', 'border_left_style', 'border_left_color', 'padding_left',
+        'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
+        'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
+        'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
+        'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
+
+        # Misc.
+        'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
+    )
+
+    def __init__(self, pPr=None):
+        self.linked_style = None
+        if pPr is None:
+            for p in self.all_properties:
+                setattr(self, p, inherit)
+        else:
+            for p in (
+                'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
+                'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
+                'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
+                'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
+            ):
+                setattr(self, p, binary_property(pPr, p))
+
+            for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd'):
+                f = globals()['read_%s' % x]
+                f(pPr, self)
+
+            for s in XPath('./w:pStyle[@w:val]')(pPr):
+                self.linked_style = get(s, 'w:val')
+
+        self._css = None
+
+    def update(self, other):
+        for prop in self.all_properties:
+            nval = getattr(other, prop)
+            if nval is not inherit:
+                setattr(self, prop, nval)
+        if other.linked_style is not None:
+            self.linked_style = other.linked_style
+
+    def resolve_based_on(self, parent):
+        for p in self.all_properties:
+            val = getattr(self, p)
+            if val is inherit:
+                setattr(self, p, getattr(parent, p))
+
+    @property
+    def css(self):
+        if self._css is None:
+            self._css = c = OrderedDict()
+            if self.keepLines is True:
+                c['page-break-inside'] = 'avoid'
+            if self.pageBreakBefore is True:
+                c['page-break-before'] = 'always'
+            for edge in ('left', 'top', 'right', 'bottom'):
+                val = getattr(self, 'border_%s_width' % edge)
+                if val is not inherit:
+                    c['border-left-width'] = '%.3gpt' % val
+                for x in ('style', 'color'):
+                    val = getattr(self, 'border_%s_%s' % (edge, x))
+                    if val is not inherit:
+                        c['border-%s-%s' % (edge, x)] = val
+                val = getattr(self, 'padding_%s' % edge)
+                if val is not inherit:
+                    c['padding-%s' % edge] = '%.3gpt' % val
+                val = getattr(self, 'margin_%s' % edge)
+                if val is not inherit:
+                    c['margin-%s' % edge] = val
+
+            for x in ('text_indent', 'text_align', 'line_height', 'background_color'):
+                val = getattr(self, x)
+                if val is not inherit:
+                    c[x.replace('_', '-')] = val
+        return self._css
+
+        # TODO: keepNext must be done at markup level
+
+
--- a/src/calibre/ebooks/docx/char_styles.py
+++ b/src/calibre/ebooks/docx/char_styles.py
@ -0,0 +1,230 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from collections import OrderedDict
+from calibre.ebooks.docx.block_styles import (  # noqa
+    inherit, simple_color, LINE_STYLES, simple_float, binary_property, read_shd)
+from calibre.ebooks.docx.names import XPath, get
+
+# Read from XML {{{
+def read_text_border(parent, dest):
+    border_color = border_style = border_width = padding = inherit
+    elems = XPath('./w:bdr')(parent)
+    if elems:
+        border_color = simple_color('auto')
+        border_style = 'solid'
+        border_width = 1
+    for elem in elems:
+        color = get(elem, 'w:color')
+        if color is not None:
+            border_color = simple_color(color)
+        style = get(elem, 'w:val')
+        if style is not None:
+            border_style = LINE_STYLES.get(style, 'solid')
+        space = get(elem, 'w:space')
+        if space is not None:
+            try:
+                padding = float(space)
+            except (ValueError, TypeError):
+                pass
+        sz = get(elem, 'w:sz')
+        if sz is not None:
+            # we dont care about art borders (they are only used for page borders)
+            try:
+                border_width = min(96, max(2, float(sz))) / 8
+            except (ValueError, TypeError):
+                pass
+
+    setattr(dest, 'border_color', border_color)
+    setattr(dest, 'border_style', border_style)
+    setattr(dest, 'border_width', border_width)
+    setattr(dest, 'padding', padding)
+
+def read_color(parent, dest):
+    ans = inherit
+    for col in XPath('./w:color[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if not val:
+            continue
+        ans = simple_color(val)
+    setattr(dest, 'color', ans)
+
+def read_highlight(parent, dest):
+    ans = inherit
+    for col in XPath('./w:highlight[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if not val:
+            continue
+        if not val or val == 'none':
+            val = 'transparent'
+        ans = val
+    setattr(dest, 'highlight', ans)
+
+def read_lang(parent, dest):
+    ans = inherit
+    for col in XPath('./w:lang[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if not val:
+            continue
+        try:
+            code = int(val, 16)
+        except (ValueError, TypeError):
+            ans = val
+        else:
+            from calibre.ebooks.docx.lcid import lcid
+            val = lcid.get(code, None)
+            if val:
+                ans = val
+    setattr(dest, 'lang', ans)
+
+def read_letter_spacing(parent, dest):
+    ans = inherit
+    for col in XPath('./w:spacing[@w:val]')(parent):
+        val = simple_float(get(col, 'w:val'), 0.05)
+        if val is not None:
+            ans = val
+    setattr(dest, 'letter_spacing', ans)
+
+def read_sz(parent, dest):
+    ans = inherit
+    for col in XPath('./w:sz[@w:val]')(parent):
+        val = simple_float(get(col, 'w:val'), 0.5)
+        if val is not None:
+            ans = val
+    setattr(dest, 'font_size', ans)
+
+def read_underline(parent, dest):
+    ans = inherit
+    for col in XPath('./w:u[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if val:
+            ans = 'underline'
+    setattr(dest, 'text_decoration', ans)
+
+def read_vert_align(parent, dest):
+    ans = inherit
+    for col in XPath('./w:vertAlign[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if val and val in {'baseline', 'subscript', 'superscript'}:
+            ans = val
+    setattr(dest, 'vert_align', ans)
+# }}}
+
+class RunStyle(object):
+
+    all_properties = {
+        'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint',
+        'rtl', 'shadow', 'smallCaps', 'strike', 'vanish',
+
+        'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
+        'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang',
+    }
+
+    toggle_properties = {
+        'b', 'bCs', 'caps', 'emboss', 'i', 'iCs', 'imprint', 'shadow', 'smallCaps', 'strike', 'dstrike', 'vanish',
+    }
+
+    def __init__(self, rPr=None):
+        self.linked_style = None
+        if rPr is None:
+            for p in self.all_properties:
+                setattr(self, p, inherit)
+        else:
+            for p in (
+                'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
+                'smallCaps', 'strike', 'vanish',
+            ):
+                setattr(self, p, binary_property(rPr, p))
+
+            for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang'):
+                f = globals()['read_%s' % x]
+                f(rPr, self)
+
+            for s in XPath('./w:rStyle[@w:val]')(rPr):
+                self.linked_style = get(s, 'w:val')
+
+        self._css = None
+
+    def update(self, other):
+        for prop in self.all_properties:
+            nval = getattr(other, prop)
+            if nval is not inherit:
+                setattr(self, prop, nval)
+        if other.linked_style is not None:
+            self.linked_style = other.linked_style
+
+    def resolve_based_on(self, parent):
+        for p in self.all_properties:
+            val = getattr(self, p)
+            if val is inherit:
+                setattr(self, p, getattr(parent, p))
+
+    @property
+    def css(self):
+        if self._css is None:
+            c = self._css = OrderedDict()
+            td = set()
+            if self.text_decoration is not inherit:
+                td.add(self.text_decoration)
+            if self.strike:
+                td.add('line-through')
+            if self.dstrike:
+                td.add('line-through')
+            if td:
+                c['text-decoration'] = ' '.join(td)
+            if self.caps is True:
+                c['text-transform'] = 'uppercase'
+            if self.i is True:
+                c['font-style'] = 'italic'
+            if self.shadow:
+                c['text-shadow'] = '2px 2px'
+            if self.smallCaps is True:
+                c['font-variant'] = 'small-caps'
+            if self.vanish is True:
+                c['display'] = 'none'
+
+            for x in ('color', 'style', 'width'):
+                val = getattr(self, 'border_'+x)
+                if x == 'width' and val is not inherit:
+                    val = '%.3gpt' % val
+                if val is not inherit:
+                    c['border-%s' % x] = val
+            if self.padding is not inherit:
+                c['padding'] = '%.3gpt' % self.padding
+
+            for x in ('color', 'background_color'):
+                val = getattr(self, x)
+                if val is not inherit:
+                    c[x.replace('_', '-')] = val
+
+            for x in ('letter_spacing', 'font_size'):
+                val = getattr(self, x)
+                if val is not inherit:
+                    c[x.replace('_', '-')] = '%.3gpt' % val
+
+            if self.highlight is not inherit and self.highlight != 'transparent':
+                c['background-color'] = self.highlight
+
+            if self.b:
+                c['font-weight'] = 'bold'
+        return self._css
+
+    def same_border(self, other):
+        for x in (self, other):
+            has_border = False
+            for y in ('color', 'style', 'width'):
+                if ('border-%s' % y) in x.css:
+                    has_border = True
+                    break
+            if not has_border:
+                return False
+
+        s = tuple(self.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
+        o = tuple(other.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
+        return s == o
+
--- a/src/calibre/ebooks/docx/styles.py
+++ b/src/calibre/ebooks/docx/styles.py
@ -6,356 +6,24 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

-from collections import OrderedDict
+import textwrap
+from collections import OrderedDict, Counter

+from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
+from calibre.ebooks.docx.char_styles import RunStyle
 from calibre.ebooks.docx.names import XPath, get

-class Inherit:
-    pass
-inherit = Inherit()
-
-def binary_property(parent, name):
-    vals = XPath('./w:%s' % name)(parent)
-    if not vals:
-        return inherit
-    val = get(vals[0], 'w:val', 'on')
-    return True if val in {'on', '1', 'true'} else False
-
-def simple_color(col, auto='black'):
-    if not col or col == 'auto' or len(col) != 6:
-        return auto
-    return '#'+col
-
-def simple_float(val, mult=1.0):
-    try:
-        return float(val) * mult
-    except (ValueError, TypeError, AttributeError, KeyError):
-        return None
-
-# Block styles {{{
-
-LINE_STYLES = {  # {{{
-    'basicBlackDashes': 'dashed',
-    'basicBlackDots': 'dotted',
-    'basicBlackSquares': 'dashed',
-    'basicThinLines': 'solid',
-    'dashDotStroked': 'groove',
-    'dashed': 'dashed',
-    'dashSmallGap': 'dashed',
-    'dotDash': 'dashed',
-    'dotDotDash': 'dashed',
-    'dotted': 'dotted',
-    'double': 'double',
-    'inset': 'inset',
-    'nil': 'none',
-    'none': 'none',
-    'outset': 'outset',
-    'single': 'solid',
-    'thick': 'solid',
-    'thickThinLargeGap': 'double',
-    'thickThinMediumGap': 'double',
-    'thickThinSmallGap' : 'double',
-    'thinThickLargeGap': 'double',
-    'thinThickMediumGap': 'double',
-    'thinThickSmallGap': 'double',
-    'thinThickThinLargeGap': 'double',
-    'thinThickThinMediumGap': 'double',
-    'thinThickThinSmallGap': 'double',
-    'threeDEmboss': 'ridge',
-    'threeDEngrave': 'groove',
-    'triple': 'double',
-}  # }}}
-
-def read_border(parent, dest):
-    tvals = {'padding_%s':inherit, 'border_%s_width':inherit,
-            'border_%s_style':inherit, 'border_%s_color':inherit}
-    vals = {}
-    for edge in ('left', 'top', 'right', 'bottom'):
-        vals.update({k % edge:v for k, v in tvals.iteritems()})
-
-    for border in XPath('./w:pBdr')(parent):
-        for edge in ('left', 'top', 'right', 'bottom'):
-            for elem in XPath('./w:%s' % edge):
-                color = get(elem, 'w:color')
-                if color is not None:
-                    vals['border_%s_color' % edge] = simple_color(color)
-                style = get(elem, 'w:val')
-                if style is not None:
-                    vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid')
-                space = get(elem, 'w:space')
-                if space is not None:
-                    try:
-                        vals['padding_%s' % edge] = float(space)
-                    except (ValueError, TypeError):
-                        pass
-                sz = get(elem, 'w:sz')
-                if sz is not None:
-                    # we dont care about art borders (they are only used for page borders)
-                    try:
-                        vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8
-                    except (ValueError, TypeError):
-                        pass
-
-    for key, val in vals.iteritems():
-        setattr(dest, key, val)
-
-def read_indent(parent, dest):
-    padding_left = padding_right = text_indent = inherit
-    for indent in XPath('./w:ind')(parent):
-        l, lc = get(indent, 'w:left'), get(indent, 'w:leftChars')
-        pl = simple_float(lc, 0.01) if lc is not None else simple_float(l, 0.05) if l is not None else None
-        if pl is not None:
-            padding_left = '%.3f%s' % (pl, 'em' if lc is not None else 'pt')
-
-        r, rc = get(indent, 'w:right'), get(indent, 'w:rightChars')
-        pr = simple_float(rc, 0.01) if rc is not None else simple_float(r, 0.05) if r is not None else None
-        if pr is not None:
-            padding_right = '%.3f%s' % (pr, 'em' if rc is not None else 'pt')
-
-        h, hc = get(indent, 'w:hanging'), get(indent, 'w:hangingChars')
-        fl, flc = get(indent, 'w:firstLine'), get(indent, 'w:firstLineChars')
-        ti = (simple_float(hc, 0.01) if hc is not None else simple_float(h, 0.05) if h is not None else
-              simple_float(flc, 0.01) if flc is not None else simple_float(fl, 0.05) if fl is not None else None)
-        if ti is not None:
-            text_indent = '%.3f%s' % (ti, 'em' if hc is not None or (h is None and flc is not None) else 'pt')
-
-    setattr(dest, 'margin_left', padding_left)
-    setattr(dest, 'margin_right', padding_right)
-    setattr(dest, 'text_indent', text_indent)
-
-def read_justification(parent, dest):
-    ans = inherit
-    for jc in XPath('./w:jc[@w:val]')(parent):
-        val = get(jc, 'w:val')
-        if not val:
-            continue
-        if val in {'both', 'distribute'} or 'thai' in val or 'kashida' in val:
-            ans = 'justify'
-        if val in {'left', 'center', 'right',}:
-            ans = val
-    setattr(dest, 'text_align', ans)
-
-def read_spacing(parent, dest):
-    padding_top = padding_bottom = line_height = inherit
-    for s in XPath('./w:spacing')(parent):
-        a, al, aa = get(s, 'w:after'), get(s, 'w:afterLines'), get(s, 'w:afterAutospacing')
-        pb = None if aa in {'on', '1', 'true'} else simple_float(al, 0.02) if al is not None else simple_float(a, 0.05) if a is not None else None
-        if pb is not None:
-            padding_bottom = '%.3f%s' % (pb, 'ex' if al is not None else 'pt')
-
-        b, bl, bb = get(s, 'w:before'), get(s, 'w:beforeLines'), get(s, 'w:beforeAutospacing')
-        pt = None if bb in {'on', '1', 'true'} else simple_float(bl, 0.02) if bl is not None else simple_float(b, 0.05) if b is not None else None
-        if pt is not None:
-            padding_top = '%.3f%s' % (pt, 'ex' if bl is not None else 'pt')
-
-        l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
-        if l is not None:
-            lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
-            line_height = '%.3f%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
-
-    setattr(dest, 'margin_top', padding_top)
-    setattr(dest, 'margin_bottom', padding_bottom)
-    setattr(dest, 'line_height', line_height)
-
-def read_direction(parent, dest):
-    ans = inherit
-    for jc in XPath('./w:textFlow[@w:val]')(parent):
-        val = get(jc, 'w:val')
-        if not val:
-            continue
-        if 'rl' in val.lower():
-            ans = 'rtl'
-    setattr(dest, 'direction', ans)
-
-def read_shd(parent, dest):
-    ans = inherit
-    for shd in XPath('./w:shd[@w:fill]')(parent):
-        val = get(shd, 'w:fill')
-        if val:
-            ans = simple_color(val, auto='transparent')
-    setattr(dest, 'background_color', ans)
-
-class ParagraphStyle(object):
-
-    all_properties = (
-        'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
-        'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
-        'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
-        'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
-
-        # Border margins padding
-        'border_left_width', 'border_left_style', 'border_left_color', 'padding_left',
-        'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
-        'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
-        'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
-        'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
-
-        # Misc.
-        'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
-    )
-
-    def __init__(self, pPr):
-        for p in (
-            'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
-            'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
-            'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
-            'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
-        ):
-            setattr(self, p, binary_property(pPr, p))
-
-        for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd'):
-            f = globals()['read_%s' % x]
-            f(pPr, self)
-
-        # TODO: numPr and outlineLvl
-
-    def update(self, other):
-        for prop in self.all_properties:
-            nval = getattr(other, prop)
-            if nval is not inherit:
-                setattr(self, prop, nval)
-
-# }}}
-
-# Character styles {{{
-def read_text_border(parent, dest):
-    border_color = border_style = border_width = padding = inherit
-    elems = XPath('./w:bdr')(parent)
-    if elems:
-        border_color = simple_color('auto')
-        border_style = 'solid'
-        border_width = 1
-    for elem in elems:
-        color = get(elem, 'w:color')
-        if color is not None:
-            border_color = simple_color(color)
-        style = get(elem, 'w:val')
-        if style is not None:
-            border_style = LINE_STYLES.get(style, 'solid')
-        space = get(elem, 'w:space')
-        if space is not None:
-            try:
-                padding = float(space)
-            except (ValueError, TypeError):
-                pass
-        sz = get(elem, 'w:sz')
-        if sz is not None:
-            # we dont care about art borders (they are only used for page borders)
-            try:
-                border_width = min(96, max(2, float(sz))) / 8
-            except (ValueError, TypeError):
-                pass
-
-    setattr(dest, 'border_color', border_color)
-    setattr(dest, 'border_style', border_style)
-    setattr(dest, 'border_width', border_width)
-    setattr(dest, 'padding', padding)
-
-def read_color(parent, dest):
-    ans = inherit
-    for col in XPath('./w:color[@w:val]')(parent):
-        val = get(col, 'w:val')
-        if not val:
-            continue
-        ans = simple_color(val)
-    setattr(dest, 'color', ans)
-
-def read_highlight(parent, dest):
-    ans = inherit
-    for col in XPath('./w:highlight[@w:val]')(parent):
-        val = get(col, 'w:val')
-        if not val:
-            continue
-        if not val or val == 'none':
-            val = 'transparent'
-        ans = val
-    setattr(dest, 'highlight', ans)
-
-def read_lang(parent, dest):
-    ans = inherit
-    for col in XPath('./w:lang[@w:val]')(parent):
-        val = get(col, 'w:val')
-        if not val:
-            continue
-        try:
-            code = int(val, 16)
-        except (ValueError, TypeError):
-            ans = val
-        else:
-            from calibre.ebooks.docx.lcid import lcid
-            val = lcid.get(code, None)
-            if val:
-                ans = val
-    setattr(dest, 'lang', ans)
-
-def read_letter_spacing(parent, dest):
-    ans = inherit
-    for col in XPath('./w:spacing[@w:val]')(parent):
-        val = simple_float(get(col, 'w:val'), 0.05)
-        if val:
-            ans = val
-    setattr(dest, 'letter_spacing', ans)
-
-def read_sz(parent, dest):
-    ans = inherit
-    for col in XPath('./w:sz[@w:val]')(parent):
-        val = simple_float(get(col, 'w:val'), 0.5)
-        if val:
-            ans = val
-    setattr(dest, 'font_size', ans)
-
-def read_underline(parent, dest):
-    ans = inherit
-    for col in XPath('./w:u[@w:val]')(parent):
-        val = get(col, 'w:val')
-        if val:
-            ans = 'underline'
-    setattr(dest, 'text_decoration', ans)
-
-def read_vert_align(parent, dest):
-    ans = inherit
-    for col in XPath('./w:vertAlign[@w:val]')(parent):
-        val = get(col, 'w:val')
-        if val and val in {'baseline', 'subscript', 'superscript'}:
-            ans = val
-    setattr(dest, 'vert_align', ans)
-
-
-class RunStyle(object):
-
-    all_properties = (
-        'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
-        'smallCaps', 'strike', 'vanish',
-
-        'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background-color',
-        'letter_spacing', 'font_size', 'text_decoration', 'vert_align',
-    )
-
-    def __init__(self, rPr):
-        for p in (
-            'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
-            'smallCaps', 'strike', 'vanish',
-        ):
-            setattr(self, p, binary_property(rPr, p))
-
-        for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align'):
-            f = globals()['read_%s' % x]
-            f(rPr, self)
-
-    def update(self, other):
-        for prop in self.all_properties:
-            nval = getattr(other, prop)
-            if nval is not inherit:
-                setattr(self, prop, nval)
-# }}}

 class Style(object):
+    '''
+    Class representing a <w:style> element. Can contain block, character, etc. styles.
+    '''

    name_path = XPath('./w:name[@w:val]')
    based_on_path = XPath('./w:basedOn[@w:val]')
-    link_path = XPath('./w:link[@w:val]')

    def __init__(self, elem):
+        self.resolved = False
        self.style_id = get(elem, 'w:styleId')
        self.style_type = get(elem, 'w:type')
        names = self.name_path(elem)
@ -364,10 +32,7 @@ class Style(object):
        self.based_on = get(based_on[0], 'w:val') if based_on else None
        if self.style_type == 'numbering':
            self.based_on = None
-        link = self.link_path(elem)
-        self.link = get(link[0], 'w:val') if link else None
-        if self.style_type not in {'paragraph', 'character'}:
-            self.link = None
+        self.is_default = get(elem, 'w:default') in {'1', 'on', 'true'}

        self.paragraph_style = self.character_style = None

@ -387,11 +52,31 @@ class Style(object):
                else:
                    self.character_style.update(rs)

+    def resolve_based_on(self, parent):
+        if parent.paragraph_style is not None:
+            if self.paragraph_style is None:
+                self.paragraph_style = ParagraphStyle()
+            self.paragraph_style.resolve_based_on(parent.paragraph_style)
+        if parent.character_style is not None:
+            if self.character_style is None:
+                self.character_style = RunStyle()
+            self.character_style.resolve_based_on(parent.character_style)
+

 class Styles(object):

+    '''
+    Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup.
+    '''
+
    def __init__(self):
        self.id_map = OrderedDict()
+        self.para_cache = {}
+        self.para_char_cache = {}
+        self.run_cache = {}
+        self.classes = {}
+        self.counter = Counter()
+        self.default_styles = {}

    def __iter__(self):
        for s in self.id_map.itervalues():
@ -411,20 +96,178 @@ class Styles(object):
            s = Style(s)
            if s.style_id:
                self.id_map[s.style_id] = s
+            if s.is_default:
+                self.default_styles[s.style_type] = s
+
+        self.default_paragraph_style = self.default_character_style = None
+
+        for dd in XPath('./w:docDefaults')(root):
+            for pd in XPath('./w:pPrDefault')(dd):
+                for pPr in XPath('./w:pPr')(pd):
+                    ps = ParagraphStyle(pPr)
+                    if self.default_paragraph_style is None:
+                        self.default_paragraph_style = ps
+                    else:
+                        self.default_paragraph_style.update(ps)
+            for pd in XPath('./w:rPrDefault')(dd):
+                for pPr in XPath('./w:rPr')(pd):
+                    ps = RunStyle(pPr)
+                    if self.default_character_style is None:
+                        self.default_character_style = ps
+                    else:
+                        self.default_character_style.update(ps)
+
+        def resolve(s, p):
+            if p is not None:
+                if not p.resolved:
+                    resolve(p, self.get(p.based_on))
+                s.resolve_based_on(p)
+            s.resolved = True

-        # Nuke based_on, link attributes that refer to missing/incompatible
-        # styles
        for s in self:
-            bo = s.based_on
-            if bo is not None:
-                p = self.get(bo)
-                if p is None or p.style_type != s.style_type:
-                    s.based_on = None
-            link = s.link
-            if link is not None:
-                p = self.get(link)
-                if p is None or (s.style_type, p.style_type) not in {('paragraph', 'character'), ('character', 'paragraph')}:
-                    s.link = None
+            if not s.resolved:
+                resolve(s, self.get(s.based_on))

-        # TODO: Document defaults (docDefaults)
+    def para_val(self, parent_styles, direct_formatting, attr):
+        val = getattr(direct_formatting, attr)
+        if val is inherit:
+            for ps in reversed(parent_styles):
+                pval = getattr(ps, attr)
+                if pval is not inherit:
+                    val = pval
+                    break
+        return val
+
+    def run_val(self, parent_styles, direct_formatting, attr):
+        val = getattr(direct_formatting, attr)
+        if val is not inherit:
+            return val
+        if attr in direct_formatting.toggle_properties:
+            val = False
+            for rs in parent_styles:
+                pval = getattr(rs, attr)
+                if pval is True:
+                    val ^= True
+            return val
+        for rs in reversed(parent_styles):
+            rval = getattr(rs, attr)
+            if rval is not inherit:
+                return rval
+        return val
+
+    def resolve_paragraph(self, p):
+        ans = self.para_cache.get(p, None)
+        if ans is None:
+            ans = self.para_cache[p] = ParagraphStyle()
+            ans.style_name = None
+            direct_formatting = None
+            for pPr in XPath('./w:pPr')(p):
+                ps = ParagraphStyle(pPr)
+                if direct_formatting is None:
+                    direct_formatting = ps
+                else:
+                    direct_formatting.update(ps)
+
+            if direct_formatting is None:
+                direct_formatting = ParagraphStyle()
+            parent_styles = []
+            if self.default_paragraph_style is not None:
+                parent_styles.append(self.default_paragraph_style)
+
+            default_para = self.default_styles.get('paragraph', None)
+            if direct_formatting.linked_style is not None:
+                ls = self.get(direct_formatting.linked_style)
+                if ls is not None:
+                    ans.style_name = ls.name
+                    ps = ls.paragraph_style
+                    if ps is not None:
+                        parent_styles.append(ps)
+                    if ls.character_style is not None:
+                        self.para_char_cache[p] = ls.character_style
+            elif default_para is not None:
+                if default_para.paragraph_style is not None:
+                    parent_styles.append(default_para.paragraph_style)
+                if default_para.character_style is not None:
+                    self.para_char_cache[p] = default_para.character_style
+
+            for attr in ans.all_properties:
+                setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
+        return ans
+
+    def resolve_run(self, r):
+        ans = self.run_cache.get(r, None)
+        if ans is None:
+            p = r.getparent()
+            ans = self.run_cache[r] = RunStyle()
+            direct_formatting = None
+            for rPr in XPath('./w:rPr')(r):
+                rs = RunStyle(rPr)
+                if direct_formatting is None:
+                    direct_formatting = rs
+                else:
+                    direct_formatting.update(rs)
+
+            if direct_formatting is None:
+                direct_formatting = RunStyle()
+
+            parent_styles = []
+            default_char = self.default_styles.get('character', None)
+            if self.default_character_style is not None:
+                parent_styles.append(self.default_character_style)
+            pstyle = self.para_char_cache.get(p, None)
+            if pstyle is not None:
+                parent_styles.append(pstyle)
+            if direct_formatting.linked_style is not None:
+                ls = self.get(direct_formatting.linked_style).character_style
+                if ls is not None:
+                    parent_styles.append(ls)
+            elif default_char is not None and default_char.character_style is not None:
+                parent_styles.append(default_char.character_style)
+
+            for attr in ans.all_properties:
+                setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr))
+
+        return ans
+
+    def resolve(self, obj):
+        if obj.tag.endswith('}p'):
+            return self.resolve_paragraph(obj)
+        if obj.tag.endswith('}r'):
+            return self.resolve_run(obj)
+
+    def register(self, css, prefix):
+        h = hash(tuple(css.iteritems()))
+        ans, _ = self.classes.get(h, (None, None))
+        if ans is None:
+            self.counter[prefix] += 1
+            ans = '%s_%d' % (prefix, self.counter[prefix])
+            self.classes[h] = (ans, css)
+        return ans
+
+    def generate_classes(self):
+        for bs in self.para_cache.itervalues():
+            css = bs.css
+            if css:
+                self.register(css, 'block')
+        for bs in self.run_cache.itervalues():
+            css = bs.css
+            if css:
+                self.register(css, 'text')
+
+    def class_name(self, css):
+        h = hash(tuple(css.iteritems()))
+        return self.classes.get(h, (None, None))[0]
+
+    def generate_css(self):
+        prefix = textwrap.dedent(
+            '''\
+            p { margin: 0; padding: 0; text-indent: 1.5em }
+            ''')
+
+        ans = []
+        for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):
+            b = ('\t%s: %s;' % (k, v) for k, v in css.iteritems())
+            b = '\n'.join(b)
+            ans.append('.%s {\n%s\n}\n' % (cls, b.rstrip(';')))
+        return prefix + '\n' + '\n'.join(ans)

--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

-import sys, os
+import sys, os, re

 from lxml import html
 from lxml.html.builder import (
@ -14,7 +14,7 @@ from lxml.html.builder import (

 from calibre.ebooks.docx.container import DOCX, fromstring
 from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES
-from calibre.ebooks.docx.styles import Styles
+from calibre.ebooks.docx.styles import Styles, inherit
 from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1

 class Text:
@ -35,6 +35,7 @@ class Convert(object):
        self.mi = self.docx.metadata
        self.body = BODY()
        self.styles = Styles()
+        self.object_map = {}
        self.html = HTML(
            HEAD(
                META(charset='utf-8'),
@ -75,6 +76,16 @@ class Convert(object):
            for child in self.body:
                child.tail = '\n\t'
            self.body[-1].tail = '\n'
+
+        self.styles.generate_classes()
+        for html_obj, obj in self.object_map.iteritems():
+            style = self.styles.resolve(obj)
+            if style is not None:
+                css = style.css
+                if css:
+                    cls = self.styles.class_name(css)
+                    if cls:
+                        html_obj.set('class', cls)
        self.write()

    def read_styles(self, relationships_by_type):
@ -96,17 +107,69 @@ class Convert(object):
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
+        css = self.styles.generate_css()
+        if css:
+            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
+                f.write(css.encode('utf-8'))

    def convert_p(self, p):
        dest = P()
+        style = self.styles.resolve_paragraph(p)
        for run in XPath('descendant::w:r')(p):
            span = self.convert_run(run)
            dest.append(span)

+        m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
+        if m is not None:
+            n = min(1, max(6, int(m.group(1))))
+            dest.tag = 'h%d' % n
+
+        if style.direction == 'rtl':
+            dest.set('dir', 'rtl')
+
+        border_runs = []
+        common_borders = []
+        for span in dest:
+            run = self.object_map[span]
+            style = self.styles.resolve_run(run)
+            if not border_runs or border_runs[-1][1].same_border(style):
+                border_runs.append((span, style))
+            elif border_runs:
+                if len(border_runs) > 1:
+                    common_borders.append(border_runs)
+                border_runs = []
+
+        for border_run in common_borders:
+            spans = []
+            bs = {}
+            for span, style in border_run:
+                c = style.css
+                spans.append(span)
+                for x in ('width', 'color', 'style'):
+                    val = c.pop('border-%s' % x, None)
+                    if val is not None:
+                        bs['border-%s' % x] = val
+            if bs:
+                cls = self.styles.register(bs, 'text_border')
+                wrapper = self.wrap_elems(spans, SPAN())
+                wrapper.set('class', cls)
+
+        self.object_map[dest] = p
        return dest

+    def wrap_elems(self, elems, wrapper):
+        p = elems[0].getparent()
+        idx = p.index(elems[0])
+        p.insert(idx, wrapper)
+        wrapper.tail = elems[-1].tail
+        elems[-1].tail = None
+        for elem in elems:
+            p.remove(elem)
+            wrapper.append(elem)
+
    def convert_run(self, run):
        ans = SPAN()
+        ans.run = run
        text = Text(ans, 'text', [])

        for child in run:
@ -121,6 +184,7 @@ class Convert(object):
                    text.buf.append(child.text)
            elif is_tag(child, 'w:cr'):
                text.add_elem(BR())
+                ans.append(text.elem)
            elif is_tag(child, 'w:br'):
                typ = child.get('type', None)
                if typ in {'column', 'page'}:
@ -132,8 +196,16 @@ class Convert(object):
                    else:
                        br = BR()
                text.add_elem(br)
+                ans.append(text.elem)
        if text.buf:
            setattr(text.elem, text.attr, ''.join(text.buf))
+
+        style = self.styles.resolve_run(run)
+        if style.vert_align in {'superscript', 'subscript'}:
+            ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
+        if style.lang is not inherit:
+            ans.lang = style.lang
+        self.object_map[ans] = run
        return ans

 if __name__ == '__main__':
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -136,7 +136,7 @@ class FB2MLizer(object):
            metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last)
            metadata['author'] += '</author>'
        if not metadata['author']:
-            metadata['author'] = u'<author><first-name></first-name><last-name><last-name></author>'
+            metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>'

        metadata['keywords'] = u''
        tags = list(map(unicode, self.oeb_book.metadata.subject))
--- a/src/calibre/gui2/store/stores/koobe_plugin.py
+++ b/src/calibre/gui2/store/stores/koobe_plugin.py
@ -8,7 +8,6 @@ __copyright__ = '2013, Tomasz Długosz <tomek3d@gmail.com>'
 __docformat__ = 'restructuredtext en'

 import urllib
-from base64 import b64encode
 from contextlib import closing

 from lxml import html
--- a/src/calibre/gui2/store/stores/woblink_plugin.py
+++ b/src/calibre/gui2/store/stores/woblink_plugin.py
@ -9,7 +9,6 @@ __docformat__ = 'restructuredtext en'

 import re
 import urllib
-from base64 import b64encode
 from contextlib import closing

 from lxml import html
--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -113,7 +113,7 @@ class KindleDX(Kindle):
    id = 'kindledx'

 class KindleFire(KindleDX):
-    name = 'Kindle Fire'
+    name = 'Kindle Fire and Fire HD'
    id = 'kindle_fire'
    output_profile = 'kindle_fire'
    supports_color = True
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/ber.po
+++ b/src/calibre/translations/ber.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/cy.po
+++ b/src/calibre/translations/cy.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/fr_CA.po
+++ b/src/calibre/translations/fr_CA.po
--- a/src/calibre/translations/fur.po
+++ b/src/calibre/translations/fur.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/him.po
+++ b/src/calibre/translations/him.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/is.po
+++ b/src/calibre/translations/is.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/jv.po
+++ b/src/calibre/translations/jv.po
--- a/src/calibre/translations/ka.po
+++ b/src/calibre/translations/ka.po
--- a/src/calibre/translations/kn.po
+++ b/src/calibre/translations/kn.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/ku.po
+++ b/src/calibre/translations/ku.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/ltg.po
+++ b/src/calibre/translations/ltg.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/mk.po
+++ b/src/calibre/translations/mk.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/nn.po
+++ b/src/calibre/translations/nn.po
--- a/src/calibre/translations/oc.po
+++ b/src/calibre/translations/oc.po
--- a/src/calibre/translations/pa.po
+++ b/src/calibre/translations/pa.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/pt_BR.po
+++ b/src/calibre/translations/pt_BR.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sc.po
+++ b/src/calibre/translations/sc.po
--- a/src/calibre/translations/si.po
+++ b/src/calibre/translations/si.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sq.po
+++ b/src/calibre/translations/sq.po
--- a/src/calibre/translations/sr.po
+++ b/src/calibre/translations/sr.po
--- a/src/calibre/translations/sr@latin.po
+++ b/src/calibre/translations/sr@latin.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/ta.po
+++ b/src/calibre/translations/ta.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/translations/th.po
+++ b/src/calibre/translations/th.po
--- a/src/calibre/translations/tr.po
+++ b/src/calibre/translations/tr.po
--- a/src/calibre/translations/uk.po
+++ b/src/calibre/translations/uk.po
--- a/src/calibre/translations/ur.po
+++ b/src/calibre/translations/ur.po
--- a/src/calibre/translations/vi.po
+++ b/src/calibre/translations/vi.po
--- a/src/calibre/translations/wa.po
+++ b/src/calibre/translations/wa.po
--- a/src/calibre/translations/yi.po
+++ b/src/calibre/translations/yi.po
--- a/src/calibre/translations/zh_CN.po
+++ b/src/calibre/translations/zh_CN.po
--- a/src/calibre/translations/zh_HK.po
+++ b/src/calibre/translations/zh_HK.po
--- a/src/calibre/translations/zh_TW.po
+++ b/src/calibre/translations/zh_TW.po
--- a/src/calibre/utils/formatter_functions.py
+++ b/src/calibre/utils/formatter_functions.py
@ -1090,7 +1090,7 @@ class BuiltinListRe(BuiltinFormatterFunction):
            if re.search(search_re, item, flags=re.I) is not None:
                if opt_replace:
                    item = re.sub(search_re, opt_replace, item)
-                for i in [l.strip() for l in item.split(',') if l.strip()]:
+                for i in [t.strip() for t in item.split(',') if t.strip()]:
                    if i not in res:
                        res.append(i)
        if separator == ',':
--- a/src/calibre/utils/ipython.py
+++ b/src/calibre/utils/ipython.py
@ -8,18 +8,48 @@ __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import os
-from calibre.constants import iswindows, config_dir, get_version
+from calibre.constants import iswindows, cache_dir, get_version

-ipydir = os.path.join(config_dir, ('_' if iswindows else '.')+'ipython')
+ipydir = os.path.join(cache_dir(), 'ipython')

 BANNER = ('Welcome to the interactive calibre shell!\n')

-def simple_repl(user_ns={}):
+def setup_pyreadline():
    try:
-        import readline
-        readline
+        import pyreadline.rlmain
+        #pyreadline.rlmain.config_path=r"c:\xxx\pyreadlineconfig.ini"
+        import readline, atexit
+        import pyreadline.unicode_helper  # noqa
+        #Normally the codepage for pyreadline is set to be sys.stdout.encoding
+        #if you need to change this uncomment the following line
+        #pyreadline.unicode_helper.pyreadline_codepage="utf8"
    except ImportError:
-        pass
+        print("Module readline not available.")
+    else:
+        #import tab completion functionality
+        import rlcompleter
+
+        #Override completer from rlcompleter to disable automatic ( on callable
+        completer_obj = rlcompleter.Completer()
+        def nop(val, word):
+            return word
+        completer_obj._callable_postfix = nop
+        readline.set_completer(completer_obj.complete)
+
+        #activate tab completion
+        readline.parse_and_bind("tab: complete")
+        readline.read_history_file()
+        atexit.register(readline.write_history_file)
+        del readline, rlcompleter, atexit
+
+def simple_repl(user_ns={}):
+    if iswindows:
+        setup_pyreadline()
+    else:
+        try:
+            import readline  # noqa
+        except ImportError:
+            pass

    import code
    code.interact(BANNER, raw_input, user_ns)
--- a/src/calibre/utils/search_query_parser.py
+++ b/src/calibre/utils/search_query_parser.py
@ -33,7 +33,7 @@ class SavedSearchQueries(object):
    opt_name = ''

    def __init__(self, db, _opt_name):
-        self.opt_name = _opt_name;
+        self.opt_name = _opt_name
        if db is not None:
            self.queries = db.prefs.get(self.opt_name, {})
        else:
@ -171,12 +171,12 @@ class Parser(object):

            # Strip out escaped backslashes and escaped quotes so that the
            # lex scanner doesn't get confused. We put them back later.
-            expr = expr.replace(u'\\\\', u'\001').replace(u'\\"', u'\002')
+            expr = expr.replace(u'\\\\', u'\x01').replace(u'\\"', u'\x02')
            self.tokens = self.lex_scanner.scan(expr)[0]
            for (i,tok) in enumerate(self.tokens):
                tt, tv = tok
                if tt == self.WORD or tt == self.QUOTED_WORD:
-                    self.tokens[i] = (tt, tv.replace('\001', '\\').replace('\002', '"'))
+                    self.tokens[i] = (tt, tv.replace(u'\x01', u'\\').replace(u'\x02', u'"'))

            self.current_token = 0
            prog = self.or_expression()
@ -217,7 +217,7 @@ class Parser(object):
                if self.token(advance=True) != ')':
                    raise ParseException(_('missing )'))
                return res
-            if self.token_type() not in [ self.WORD, self.QUOTED_WORD ]:
+            if self.token_type() not in (self.WORD, self.QUOTED_WORD):
                raise ParseException(_('Invalid syntax. Expected a lookup name or a word'))

            return self.base_token()
@ -279,7 +279,6 @@ class SearchQueryParser(object):
      * `(author:Asimov or author:Hardy) and not tag:read` [search for unread books by Asimov or Hardy]
    '''

-
    @staticmethod
    def run_tests(parser, result, tests):
        failed = []
@ -371,7 +370,7 @@ class SearchQueryParser(object):
                return self._parse(saved_searches().lookup(query), candidates)
            except ParseException as e:
                raise e
-            except: # convert all exceptions (e.g., missing key) to a parse error
+            except:  # convert all exceptions (e.g., missing key) to a parse error
                import traceback
                traceback.print_exc()
                raise ParseException(_('Unknown error in saved search: {0}').format(query))
@ -667,7 +666,7 @@ class Tester(SearchQueryParser):
       u'John Scalzi',
       u'Tor Science Fiction',
       u'html,lrf'],
- 343: [u'The Last Colony', u'John Scalzi', u'Tor Books', u'html,lrf'],
+ 343: [u'The Last Colony', u'John S"calzi', u'Tor Books', u'html,lrf'],
 344: [u'Gossip Girl', u'Cecily von Ziegesar', u'Warner Books', u'lrf,rtf'],
 347: [u'Little Brother', u'Cory Doctorow', u'Tor Teen', u'lrf'],
 348: [u'The Reality Dysfunction',
@ -685,7 +684,7 @@ class Tester(SearchQueryParser):
       u'lit,lrf'],
 356: [u'The Naked God', u'Peter F. Hamilton', u'Aspect', u'lit,lrf'],
 421: [u'A Shadow in Summer', u'Daniel Abraham', u'Tor Fantasy', u'lrf,rar'],
- 427: [u'Lonesome Dove', u'Larry McMurtry', None, u'lit,lrf'],
+ 427: [u'Lonesome Dove', u'Larry M\\cMurtry', None, u'lit,lrf'],
 440: [u'Ghost', u'John Ringo', u'Baen', u'lit,lrf'],
 441: [u'Kildar', u'John Ringo', u'Baen', u'lit,lrf'],
 443: [u'Hidden Empire ', u'Kevin J. Anderson', u'Aspect', u'lrf,rar'],
@ -711,6 +710,10 @@ class Tester(SearchQueryParser):
             'publisher:london:thames': set([13]),
             '"(1977)"': set([13]),
             'jack weatherford orc': set([30]),
+             'S\\"calzi': {343},
+             'author:S\\"calzi': {343},
+             '"S\\"calzi"': {343},
+             'M\\\\cMurtry': {427},
             }
    fields = {'title':0, 'author':1, 'publisher':2, 'tag':3}

@ -732,15 +735,13 @@ class Tester(SearchQueryParser):
            return set([])
        query = query.lower()
        if candidates:
-            return set(key for key, val in self.texts.items() \
+            return set(key for key, val in self.texts.items()
                if key in candidates and query and query
                        in getattr(getter(val), 'lower', lambda : '')())
        else:
-            return set(key for key, val in self.texts.items() \
+            return set(key for key, val in self.texts.items()
                if query and query in getattr(getter(val), 'lower', lambda : '')())

-
-
    def run_tests(self):
        failed = []
        for query in self.tests.keys():