From 17206c3a951324bf3f3d5586cc59c9a9a8545370 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 00:30:31 +0530
Subject: [PATCH 01/26] Allow setting focus rect width to 2px

---
 src/calibre/gui2/__init__.py  |  2 +-
 src/qtcurve/style/qtcurve.cpp | 11 ++++++-----
 src/qtcurve/style/qtcurve.h   |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 369746cec7..ceac21dd30 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -873,7 +873,7 @@ class Application(QApplication):
             v = pcache[v]
             icon_map[type('')(getattr(style, 'SP_'+k))] = v
         style.setProperty(u'calibre_icon_map', icon_map)
-        style.setProperty(u'calibre_item_view_focus', True)
+        style.setProperty(u'calibre_item_view_focus', 1)
         self.__icon_map_memory_ = icon_map
 
     def setup_styles(self, force_calibre_style):
diff --git a/src/qtcurve/style/qtcurve.cpp b/src/qtcurve/style/qtcurve.cpp
index 46a9b91a87..e5ac94a82e 100644
--- a/src/qtcurve/style/qtcurve.cpp
+++ b/src/qtcurve/style/qtcurve.cpp
@@ -3698,7 +3698,7 @@ bool Style::event(QEvent *event) {
             }
             return true;
         } else if (e->propertyName() == QString("calibre_item_view_focus")) {
-            calibre_item_view_focus = property("calibre_item_view_focus").toBool();
+            calibre_item_view_focus = property("calibre_item_view_focus").toInt();
             return true;
         }
     }
@@ -4803,10 +4803,11 @@ void Style::drawPrimitive(PrimitiveElement element, const QStyleOption *option,
                     painter->setBrush(QBrush(patternCol, Qt::Dense4Pattern));
                     painter->setBrushOrigin(r.topLeft());
                     painter->setPen(Qt::NoPen);
-                    painter->drawRect(r.left(), r.top(), r.width(), 1);    // Top
-                    painter->drawRect(r.left(), r.bottom(), r.width(), 1); // Bottom
-                    painter->drawRect(r.left(), r.top(), 1, r.height());   // Left
-                    painter->drawRect(r.right(), r.top(), 1, r.height());  // Right
+                    int fwidth = (calibre_item_view_focus > 1) ? 2 : 1;
+                    painter->drawRect(r.left(), r.top(), r.width(), fwidth);    // Top
+                    painter->drawRect(r.left(), r.bottom(), r.width(), fwidth); // Bottom
+                    painter->drawRect(r.left(), r.top(), fwidth, r.height());   // Left
+                    painter->drawRect(r.right(), r.top(), fwidth, r.height());  // Right
                     painter->restore();
                 }
                 else
diff --git a/src/qtcurve/style/qtcurve.h b/src/qtcurve/style/qtcurve.h
index 84dfbdd145..63500ad340 100644
--- a/src/qtcurve/style/qtcurve.h
+++ b/src/qtcurve/style/qtcurve.h
@@ -355,7 +355,7 @@ class Style : public QCommonStyle
     mutable QList<int>                 itsMdiButtons[2]; // 0=left, 1=right
     mutable int                        itsTitlebarHeight;
     QHash<int,QString>                 calibre_icon_map;
-    bool                               calibre_item_view_focus;
+    int                                calibre_item_view_focus;
     bool                               is_kde_session;
 
     // Required for Q3Header hover...

From d99eccc51e8f274bbaae5a4729c44eda489e427f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 01:01:28 +0530
Subject: [PATCH 02/26] Book list: Make the current cell have a darker
 background

---
 src/calibre/gui2/library/views.py | 1 +
 src/qtcurve/style/qtcurve.cpp     | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py
index e6a816621f..1d6cd33e9d 100644
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@@ -139,6 +139,7 @@ class BooksView(QTableView):  # {{{
 
     def __init__(self, parent, modelcls=BooksModel, use_edit_metadata_dialog=True):
         QTableView.__init__(self, parent)
+        self.setProperty('highlight_current_item', True)
         self.row_sizing_done = False
 
         if not tweaks['horizontal_scrolling_per_column']:
diff --git a/src/qtcurve/style/qtcurve.cpp b/src/qtcurve/style/qtcurve.cpp
index e5ac94a82e..58a32c3479 100644
--- a/src/qtcurve/style/qtcurve.cpp
+++ b/src/qtcurve/style/qtcurve.cpp
@@ -5250,6 +5250,9 @@ void Style::drawPrimitive(PrimitiveElement element, const QStyleOption *option,
                 QColor color(hasCustomBackground && hasSolidBackground
                                 ? v4Opt->backgroundBrush.color()
                                 : palette.color(cg, QPalette::Highlight));
+                if (state & State_HasFocus && widget->property("highlight_current_item").toBool()) {
+                    color = color.darker(130);
+                }
                 bool   square((opts.square&SQUARE_LISTVIEW_SELECTION) &&
                               (/*(!widget && r.height()<=40 && r.width()>=48) || */
                                (widget && !widget->inherits("KFilePlacesView") &&

From ee0e4e4d1a6230bcfd10f50ea63a71d7ef0fd996 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 01:06:57 +0530
Subject: [PATCH 03/26] ...

---
 src/calibre/gui2/__init__.py  | 1 -
 src/qtcurve/style/qtcurve.cpp | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index ceac21dd30..5fcde65ff5 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -873,7 +873,6 @@ class Application(QApplication):
             v = pcache[v]
             icon_map[type('')(getattr(style, 'SP_'+k))] = v
         style.setProperty(u'calibre_icon_map', icon_map)
-        style.setProperty(u'calibre_item_view_focus', 1)
         self.__icon_map_memory_ = icon_map
 
     def setup_styles(self, force_calibre_style):
diff --git a/src/qtcurve/style/qtcurve.cpp b/src/qtcurve/style/qtcurve.cpp
index 58a32c3479..e3bb17d244 100644
--- a/src/qtcurve/style/qtcurve.cpp
+++ b/src/qtcurve/style/qtcurve.cpp
@@ -5250,9 +5250,9 @@ void Style::drawPrimitive(PrimitiveElement element, const QStyleOption *option,
                 QColor color(hasCustomBackground && hasSolidBackground
                                 ? v4Opt->backgroundBrush.color()
                                 : palette.color(cg, QPalette::Highlight));
-                if (state & State_HasFocus && widget->property("highlight_current_item").toBool()) {
-                    color = color.darker(130);
-                }
+                if (state & State_HasFocus && widget && widget->property("highlight_current_item").toBool())
+                    color = color.darker(130); // Added by Kovid to highlight the current cell in the book list
+                
                 bool   square((opts.square&SQUARE_LISTVIEW_SELECTION) &&
                               (/*(!widget && r.height()<=40 && r.width()>=48) || */
                                (widget && !widget->inherits("KFilePlacesView") &&

From f8d6970fd571b3f9d0f63627a8eea098da05152f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 08:12:26 +0530
Subject: [PATCH 04/26] Update .net magazine

---
 recipes/dot_net.recipe | 59 +++++++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/recipes/dot_net.recipe b/recipes/dot_net.recipe
index 50db71e9be..d3a96ad0c3 100644
--- a/recipes/dot_net.recipe
+++ b/recipes/dot_net.recipe
@@ -1,32 +1,37 @@
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 
-class NetMagazineRecipe (BasicNewsRecipe):
-   __author__ = u'Marc Busqué <marc@lamarciana.com>'
-   __url__ = 'http://www.lamarciana.com'
-   __version__ = '1.0'
-   __license__   = 'GPL v3'
-   __copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
-   title = u'.net magazine'
-   description = u'net is the world’s best-selling magazine for web designers and developers, featuring tutorials from leading agencies, interviews with the web’s biggest names, and agenda-setting features on the hottest issues affecting the internet today.'
-   language = 'en'
-   tags = 'web development, software'
-   oldest_article = 7
-   remove_empty_feeds = True
-   no_stylesheets = True
-   cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png'
-   keep_only_tags = [
-         dict(name='article', attrs={'class': re.compile('^node.*$', re.IGNORECASE)})
-         ]
-   remove_tags = [
-         dict(name='span', attrs={'class': 'comment-count'}),
-         dict(name='div', attrs={'class': 'item-list share-links'}),
-         dict(name='footer'),
-         ]
-   remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height', 'style']
-   extra_css = 'img {max-width: 100%; display: block; margin: auto;} .captioned-image div {text-align: center; font-style: italic;}'
+class dotnetMagazine (BasicNewsRecipe):
+    __author__ = u'Bonni Salles'
+    __version__ = '1.0'
+    __license__   = 'GPL v3'
+    __copyright__ = u'2013, Bonni Salles'
+    title                 = '.net magazine'
+    oldest_article        = 7
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
+    cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png'
+
+    remove_tags_after = dict(name='footer', id=lambda x:not x)
+    remove_tags_before = dict(name='header', id=lambda x:not x)
+
+    remove_tags = [
+         dict(name='div', attrs={'class': 'item-list'}),
+         dict(name='h4', attrs={'class': 'std-hdr'}),
+         dict(name='div', attrs={'class': 'item-list share-links'}), #removes share links
+         dict(name=['script', 'noscript']),
+         dict(name='div', attrs={'id': 'comments-form'}), #comment these out if you want the comments to show
+         dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}),
+         dict(name='div', attrs={'id': 'right-col'}),
+         dict(name='div', attrs={'id': 'comments'}), #comment these out if you want the comments to show
+         dict(name='div', attrs={'class': 'item-list related-content'}),
 
-   feeds = [
-         (u'.net', u'http://feeds.feedburner.com/net/topstories'),
          ]
+
+    feeds = [
+               (u'net', u'http://feeds.feedburner.com/net/topstories')
+            ]

From 1e06698942beaa26b26d6b926f5c2717ae060dc1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 08:20:53 +0530
Subject: [PATCH 05/26] Make the darkness of the current cell highlight
 settable from python

---
 src/calibre/gui2/library/views.py | 2 +-
 src/qtcurve/style/qtcurve.cpp     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py
index 1d6cd33e9d..cf028d3f12 100644
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@@ -139,7 +139,7 @@ class BooksView(QTableView):  # {{{
 
     def __init__(self, parent, modelcls=BooksModel, use_edit_metadata_dialog=True):
         QTableView.__init__(self, parent)
-        self.setProperty('highlight_current_item', True)
+        self.setProperty('highlight_current_item', 140)
         self.row_sizing_done = False
 
         if not tweaks['horizontal_scrolling_per_column']:
diff --git a/src/qtcurve/style/qtcurve.cpp b/src/qtcurve/style/qtcurve.cpp
index e3bb17d244..ca88a4c054 100644
--- a/src/qtcurve/style/qtcurve.cpp
+++ b/src/qtcurve/style/qtcurve.cpp
@@ -5251,7 +5251,7 @@ void Style::drawPrimitive(PrimitiveElement element, const QStyleOption *option,
                                 ? v4Opt->backgroundBrush.color()
                                 : palette.color(cg, QPalette::Highlight));
                 if (state & State_HasFocus && widget && widget->property("highlight_current_item").toBool())
-                    color = color.darker(130); // Added by Kovid to highlight the current cell in the book list
+                    color = color.darker(widget->property("highlight_current_item").toInt()); // Added by Kovid to highlight the current cell in the book list
                 
                 bool   square((opts.square&SQUARE_LISTVIEW_SELECTION) &&
                               (/*(!widget && r.height()<=40 && r.width()>=48) || */

From b5ddd3a4e5249e7a59bd9b49ac6de795f60e9462 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 09:35:48 +0530
Subject: [PATCH 06/26] Handle dark colorschemes when highlighting current cell

---
 src/calibre/gui2/library/views.py | 2 +-
 src/qtcurve/style/qtcurve.cpp     | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py
index cf028d3f12..7552257919 100644
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@@ -139,7 +139,7 @@ class BooksView(QTableView):  # {{{
 
     def __init__(self, parent, modelcls=BooksModel, use_edit_metadata_dialog=True):
         QTableView.__init__(self, parent)
-        self.setProperty('highlight_current_item', 140)
+        self.setProperty('highlight_current_item', 150)
         self.row_sizing_done = False
 
         if not tweaks['horizontal_scrolling_per_column']:
diff --git a/src/qtcurve/style/qtcurve.cpp b/src/qtcurve/style/qtcurve.cpp
index ca88a4c054..276e339e62 100644
--- a/src/qtcurve/style/qtcurve.cpp
+++ b/src/qtcurve/style/qtcurve.cpp
@@ -5250,8 +5250,13 @@ void Style::drawPrimitive(PrimitiveElement element, const QStyleOption *option,
                 QColor color(hasCustomBackground && hasSolidBackground
                                 ? v4Opt->backgroundBrush.color()
                                 : palette.color(cg, QPalette::Highlight));
-                if (state & State_HasFocus && widget && widget->property("highlight_current_item").toBool())
-                    color = color.darker(widget->property("highlight_current_item").toInt()); // Added by Kovid to highlight the current cell in the book list
+                if (state & State_HasFocus && widget && widget->property("highlight_current_item").toBool()) {
+                    // Added by Kovid to highlight the current cell in the book list
+                    if (color.lightness() > 128)
+                        color = color.darker(widget->property("highlight_current_item").toInt());
+                    else
+                        color = color.lighter();
+                }
                 
                 bool   square((opts.square&SQUARE_LISTVIEW_SELECTION) &&
                               (/*(!widget && r.height()<=40 && r.width()>=48) || */

From 70a6852ab6d6a1d3a77bf3025ea17af7eaf38d62 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 10:04:53 +0530
Subject: [PATCH 07/26] pep8

---
 src/calibre/ebooks/oeb/iterator/book.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/oeb/iterator/book.py b/src/calibre/ebooks/oeb/iterator/book.py
index 77b478924e..28dd37a88e 100644
--- a/src/calibre/ebooks/oeb/iterator/book.py
+++ b/src/calibre/ebooks/oeb/iterator/book.py
@@ -25,7 +25,7 @@ from calibre.ebooks.oeb.transforms.cover import CoverManager
 from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
 from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin
 
-TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
+TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(
         '__ar__', 'none').replace('__viewbox__', '0 0 600 800'
         ).replace('__width__', '600').replace('__height__', '800')
 

From abad7da850420e01eb5709d4b0e8740005e67214 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 10:18:35 +0530
Subject: [PATCH 08/26] pep8

---
 src/calibre/ebooks/conversion/preprocess.py | 23 ++++++++++-----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 7e5873edd2..91f91c8b3d 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -14,7 +14,7 @@ SVG_NS       = 'http://www.w3.org/2000/svg'
 XLINK_NS     = 'http://www.w3.org/1999/xlink'
 
 convert_entities = functools.partial(entity_to_unicode,
-        result_exceptions = {
+        result_exceptions={
             u'<' : '&lt;',
             u'>' : '&gt;',
             u"'" : '&apos;',
@@ -144,9 +144,9 @@ class DocAnalysis(object):
         percent is the percentage of lines that should be in a single bucket to return true
         The majority of the lines will exist in 1-2 buckets in typical docs with hard line breaks
         '''
-        minLineLength=20 # Ignore lines under 20 chars (typical of spaces)
-        maxLineLength=1900 # Discard larger than this to stay in range
-        buckets=20 # Each line is divided into a bucket based on length
+        minLineLength=20  # Ignore lines under 20 chars (typical of spaces)
+        maxLineLength=1900  # Discard larger than this to stay in range
+        buckets=20  # Each line is divided into a bucket based on length
 
         #print "there are "+str(len(lines))+" lines"
         #max = 0
@@ -156,7 +156,7 @@ class DocAnalysis(object):
         #        max = l
         #print "max line found is "+str(max)
         # Build the line length histogram
-        hRaw = [ 0 for i in range(0,buckets) ]
+        hRaw = [0 for i in range(0,buckets)]
         for line in self.lines:
             l = len(line)
             if l > minLineLength and l < maxLineLength:
@@ -167,7 +167,7 @@ class DocAnalysis(object):
         # Normalize the histogram into percents
         totalLines = len(self.lines)
         if totalLines > 0:
-            h = [ float(count)/totalLines for count in hRaw ]
+            h = [float(count)/totalLines for count in hRaw]
         else:
             h = []
         #print "\nhRaw histogram lengths are: "+str(hRaw)
@@ -200,7 +200,7 @@ class Dehyphenator(object):
         # Add common suffixes to the regex below to increase the likelihood of a match -
         # don't add suffixes which are also complete words, such as 'able' or 'sex'
         # only remove if it's not already the point of hyphenation
-        self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$"
+        self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$"  # noqa
         self.suffixes = re.compile(r"^%s" % self.suffix_string, re.IGNORECASE)
         self.removesuffixes = re.compile(r"%s" % self.suffix_string, re.IGNORECASE)
         # remove prefixes if the prefix was not already the point of hyphenation
@@ -265,19 +265,18 @@ class Dehyphenator(object):
         self.html = html
         self.format = format
         if format == 'html':
-            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags>(</span>)?\s*(</[iubp]>\s*){1,2}(?P<up2threeblanks><(p|div)[^>]*>\s*(<p[^>]*>\s*</p>\s*)?</(p|div)>\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(<span[^>]*>)?)\s*(?P<secondpart>[\w\d]+)' % length)
+            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags>(</span>)?\s*(</[iubp]>\s*){1,2}(?P<up2threeblanks><(p|div)[^>]*>\s*(<p[^>]*>\s*</p>\s*)?</(p|div)>\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(<span[^>]*>)?)\s*(?P<secondpart>[\w\d]+)' % length)  # noqa
         elif format == 'pdf':
             intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)\s*(?P<wraptags><p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
         elif format == 'txt':
-            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)
+            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)  # noqa
         elif format == 'individual_words':
             intextmatch = re.compile(u'(?!<)(?P<firstpart>[^\W\-]+)(-|‐)\s*(?P<secondpart>\w+)(?![^<]*?>)')
         elif format == 'html_cleanup':
-            intextmatch = re.compile(u'(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
+            intextmatch = re.compile(u'(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')  # noqa
         elif format == 'txt_cleanup':
             intextmatch = re.compile(u'(?P<firstpart>[^\W\-]+)(-|‐)(?P<wraptags>\s+)(?P<secondpart>[\w\d]+)')
 
-
         html = intextmatch.sub(self.dehyphenate, html)
         return html
 
@@ -581,7 +580,7 @@ class HTMLPreProcessor(object):
                 end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
                 end_rules.append(
                     # Un wrap using punctuation
-                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),  # noqa
                 )
 
         for rule in self.PREPROCESS + start_rules:

From 32de3c16ea40f234d3b132d77032c31e2c0b4f64 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 12:16:37 +0530
Subject: [PATCH 09/26] Search and replace wizard: Fix generated html being
 slightly different from the actual html in the conversion pipeline for some
 input formats (mainly HTML, CHM, LIT).

---
 src/calibre/ebooks/conversion/plumber.py    | 42 +++++++++++++--------
 src/calibre/ebooks/conversion/preprocess.py |  7 +++-
 src/calibre/ebooks/oeb/base.py              |  5 ++-
 src/calibre/ebooks/oeb/iterator/__init__.py | 27 ++++++-------
 4 files changed, 50 insertions(+), 31 deletions(-)

diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 6ce1b42356..1f459229c8 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -77,7 +77,7 @@ class Plumber(object):
 
     def __init__(self, input, output, log, report_progress=DummyReporter(),
             dummy=False, merge_plugin_recs=True, abort_after_input_dump=False,
-            override_input_metadata=False):
+            override_input_metadata=False, for_regex_wizard=False):
         '''
         :param input: Path to input file.
         :param output: Path to output file/directory
@@ -87,6 +87,7 @@ class Plumber(object):
         if isbytestring(output):
             output = output.decode(filesystem_encoding)
         self.original_input_arg = input
+        self.for_regex_wizard = for_regex_wizard
         self.input = os.path.abspath(input)
         self.output = os.path.abspath(output)
         self.log = log
@@ -123,7 +124,7 @@ OptionRecommendation(name='input_profile',
                    'conversion system information on how to interpret '
                    'various information in the input document. For '
                    'example resolution dependent lengths (i.e. lengths in '
-                   'pixels). Choices are:')+\
+                   'pixels). Choices are:')+
                         ', '.join([x.short_name for x in input_profiles()])
         ),
 
@@ -135,7 +136,7 @@ OptionRecommendation(name='output_profile',
                    'created document for the specified device. In some cases, '
                    'an output profile is required to produce documents that '
                    'will work on a device. For example EPUB on the SONY reader. '
-                   'Choices are:') + \
+                   'Choices are:') +
                            ', '.join([x.short_name for x in output_profiles()])
         ),
 
@@ -490,7 +491,7 @@ OptionRecommendation(name='asciiize',
             'cases where there are multiple representations of a character '
             '(characters shared by Chinese and Japanese for instance) the '
             'representation based on the current calibre interface language will be '
-            'used.')%\
+            'used.')%
             u'\u041c\u0438\u0445\u0430\u0438\u043b '
             u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
 )
@@ -711,7 +712,6 @@ OptionRecommendation(name='search_replace',
         self.input_fmt = input_fmt
         self.output_fmt = output_fmt
 
-
         self.all_format_options = set()
         self.input_options = set()
         self.output_options = set()
@@ -775,7 +775,7 @@ OptionRecommendation(name='search_replace',
         if not html_files:
             raise ValueError(_('Could not find an ebook inside the archive'))
         html_files = [(f, os.stat(f).st_size) for f in html_files]
-        html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
+        html_files.sort(cmp=lambda x, y: cmp(x[1], y[1]))
         html_files = [f[0] for f in html_files]
         for q in ('toc', 'index'):
             for f in html_files:
@@ -783,8 +783,6 @@ OptionRecommendation(name='search_replace',
                     return f, os.path.splitext(f)[1].lower()[1:]
         return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
 
-
-
     def get_option_by_name(self, name):
         for group in (self.input_options, self.pipeline_options,
                       self.output_options, self.all_format_options):
@@ -956,7 +954,6 @@ OptionRecommendation(name='search_replace',
 
         self.log.info('Input debug saved to:', out_dir)
 
-
     def run(self):
         '''
         Run the conversion pipeline
@@ -965,10 +962,12 @@ OptionRecommendation(name='search_replace',
         self.setup_options()
         if self.opts.verbose:
             self.log.filter_level = self.log.DEBUG
+        if self.for_regex_wizard and hasattr(self.opts, 'no_process'):
+            self.opts.no_process = True
         self.flush()
         import cssutils, logging
         cssutils.log.setLevel(logging.WARN)
-        get_types_map() # Ensure the mimetypes module is intialized
+        get_types_map()  # Ensure the mimetypes module is intialized
 
         if self.opts.debug_pipeline is not None:
             self.opts.verbose = max(self.opts.verbose, 4)
@@ -1003,6 +1002,8 @@ OptionRecommendation(name='search_replace',
         self.ui_reporter(0.01, _('Converting input to HTML...'))
         ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
         self.input_plugin.report_progress = ir
+        if self.for_regex_wizard:
+            self.input_plugin.for_viewer = True
         with self.input_plugin:
             self.oeb = self.input_plugin(stream, self.opts,
                                         self.input_fmt, self.log,
@@ -1014,8 +1015,12 @@ OptionRecommendation(name='search_replace',
             if self.input_fmt in ('recipe', 'downloaded_recipe'):
                 self.opts_to_mi(self.user_metadata)
             if not hasattr(self.oeb, 'manifest'):
-                self.oeb = create_oebbook(self.log, self.oeb, self.opts,
-                        encoding=self.input_plugin.output_encoding)
+                self.oeb = create_oebbook(
+                    self.log, self.oeb, self.opts,
+                    encoding=self.input_plugin.output_encoding,
+                    for_regex_wizard=self.for_regex_wizard)
+            if self.for_regex_wizard:
+                return
             self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
             self.opts.is_image_collection = self.input_plugin.is_image_collection
             pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
@@ -1081,7 +1086,6 @@ OptionRecommendation(name='search_replace',
             self.dump_oeb(self.oeb, out_dir)
             self.log('Structured HTML written to:', out_dir)
 
-
         if self.opts.extra_css and os.path.exists(self.opts.extra_css):
             self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
 
@@ -1161,13 +1165,20 @@ OptionRecommendation(name='search_replace',
         self.log(self.output_fmt.upper(), 'output written to', self.output)
         self.flush()
 
+# This has to be global as create_oebbook can be called from other locations
+# (for example in the html input plugin)
+regex_wizard_callback = None
+def set_regex_wizard_callback(f):
+    global regex_wizard_callback
+    regex_wizard_callback = f
+
 def create_oebbook(log, path_or_stream, opts, reader=None,
-        encoding='utf-8', populate=True):
+        encoding='utf-8', populate=True, for_regex_wizard=False):
     '''
     Create an OEBBook.
     '''
     from calibre.ebooks.oeb.base import OEBBook
-    html_preprocessor = HTMLPreProcessor(log, opts)
+    html_preprocessor = HTMLPreProcessor(log, opts, regex_wizard_callback=regex_wizard_callback)
     if not encoding:
         encoding = None
     oeb = OEBBook(log, html_preprocessor,
@@ -1182,3 +1193,4 @@ def create_oebbook(log, path_or_stream, opts, reader=None,
 
     reader()(oeb, path_or_stream)
     return oeb
+
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 91f91c8b3d..126709200a 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -497,9 +497,11 @@ class HTMLPreProcessor(object):
                      (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
                       lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
                      ]
-    def __init__(self, log=None, extra_opts=None):
+    def __init__(self, log=None, extra_opts=None, regex_wizard_callback=None):
         self.log = log
         self.extra_opts = extra_opts
+        self.regex_wizard_callback = regex_wizard_callback
+        self.current_href = None
 
     def is_baen(self, src):
         return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
@@ -586,6 +588,9 @@ class HTMLPreProcessor(object):
         for rule in self.PREPROCESS + start_rules:
             html = rule[0].sub(rule[1], html)
 
+        if self.regex_wizard_callback is not None:
+            self.regex_wizard_callback(self.current_href, html)
+
         if get_preprocess_html:
             return html
 
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index eb5b0042e7..671caf49fc 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -871,6 +871,7 @@ class Manifest(object):
             orig_data = data
             fname = urlunquote(self.href)
             self.oeb.log.debug('Parsing', fname, '...')
+            self.oeb.html_preprocessor.current_href = self.href
             try:
                 data = parse_html(data, log=self.oeb.log,
                         decoder=self.oeb.decode,
@@ -1312,9 +1313,9 @@ class Guide(object):
                          ('notes', __('Notes')),
                          ('preface', __('Preface')),
                          ('text', __('Main Text'))]
-        TYPES = set(t for t, _ in _TYPES_TITLES)
+        TYPES = set(t for t, _ in _TYPES_TITLES)  # noqa
         TITLES = dict(_TYPES_TITLES)
-        ORDER = dict((t, i) for i, (t, _) in enumerate(_TYPES_TITLES))
+        ORDER = dict((t, i) for i, (t, _) in enumerate(_TYPES_TITLES))  # noqa
 
         def __init__(self, oeb, type, title, href):
             self.oeb = oeb
diff --git a/src/calibre/ebooks/oeb/iterator/__init__.py b/src/calibre/ebooks/oeb/iterator/__init__.py
index 29487cbb84..3e2dfc5df2 100644
--- a/src/calibre/ebooks/oeb/iterator/__init__.py
+++ b/src/calibre/ebooks/oeb/iterator/__init__.py
@@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import os, re
+import sys, os, re
 
 from calibre.customize.ui import available_input_formats
 
@@ -26,17 +26,18 @@ def EbookIterator(*args, **kwargs):
     from calibre.ebooks.oeb.iterator.book import EbookIterator
     return EbookIterator(*args, **kwargs)
 
-def get_preprocess_html(path_to_ebook, output):
-    from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
-    iterator = EbookIterator(path_to_ebook)
-    iterator.__enter__(only_input_plugin=True, run_char_count=False,
-            read_anchor_map=False)
-    preprocessor = HTMLPreProcessor(None, False)
-    with open(output, 'wb') as out:
-        for path in iterator.spine:
-            with open(path, 'rb') as f:
-                html = f.read().decode('utf-8', 'replace')
-            html = preprocessor(html, get_preprocess_html=True)
+def get_preprocess_html(path_to_ebook, output=None):
+    from calibre.ebooks.conversion.plumber import set_regex_wizard_callback, Plumber
+    from calibre.utils.logging import DevNull
+    from calibre.ptempfile import TemporaryDirectory
+    raw = {}
+    set_regex_wizard_callback(raw.__setitem__)
+    with TemporaryDirectory('_regex_wiz') as tdir:
+        pl = Plumber(path_to_ebook, os.path.join(tdir, 'a.epub'), DevNull(), for_regex_wizard=True)
+        pl.run()
+        items = [raw[item.href] for item in pl.oeb.spine if item.href in raw]
+
+    with (sys.stdout if output is None else open(output, 'wb')) as out:
+        for html in items:
             out.write(html.encode('utf-8'))
             out.write(b'\n\n' + b'-'*80 + b'\n\n')
-

From ddbff0f23d9727c66f550276cb123dce0b033b61 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 14:38:21 +0530
Subject: [PATCH 10/26] pep8

---
 src/calibre/ebooks/oeb/parse_utils.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py
index f053b5f515..8bf9c23d98 100644
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@@ -44,8 +44,10 @@ META_XP = XPath('/h:html/h:head/h:meta[@http-equiv="Content-Type"]')
 
 def merge_multiple_html_heads_and_bodies(root, log=None):
     heads, bodies = xpath(root, '//h:head'), xpath(root, '//h:body')
-    if not (len(heads) > 1 or len(bodies) > 1): return root
-    for child in root: root.remove(child)
+    if not (len(heads) > 1 or len(bodies) > 1):
+        return root
+    for child in root:
+        root.remove(child)
     head = root.makeelement(XHTML('head'))
     body = root.makeelement(XHTML('body'))
     for h in heads:
@@ -88,7 +90,7 @@ def html5_parse(data, max_nesting_depth=100):
     # Check that the asinine HTML 5 algorithm did not result in a tree with
     # insane nesting depths
     for x in data.iterdescendants():
-        if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node
+        if isinstance(x.tag, basestring) and len(x) is 0:  # Leaf node
             depth = node_depth(x)
             if depth > max_nesting_depth:
                 raise ValueError('html5lib resulted in a tree with nesting'
@@ -228,7 +230,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
     if idx > -1:
         pre = data[:idx]
         data = data[idx:]
-        if '<!DOCTYPE' in pre: # Handle user defined entities
+        if '<!DOCTYPE' in pre:  # Handle user defined entities
             user_entities = {}
             for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
                 val = match.group(2)
@@ -368,8 +370,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
         meta.getparent().remove(meta)
     meta = etree.SubElement(head, XHTML('meta'),
         attrib={'http-equiv': 'Content-Type'})
-    meta.set('content', 'text/html; charset=utf-8') # Ensure content is second
-                                                    # attribute
+    meta.set('content', 'text/html; charset=utf-8')  # Ensure content is second attribute
 
     # Ensure has a <body/>
     if not xpath(data, '/h:html/h:body'):

From c2fde795af6ca1e9cbe8b117e61d3214530ca91b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 14:45:25 +0530
Subject: [PATCH 11/26] pep8

---
 src/calibre/ebooks/mobi/reader/markup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/mobi/reader/markup.py b/src/calibre/ebooks/mobi/reader/markup.py
index 3330c65a0a..d558ce611a 100644
--- a/src/calibre/ebooks/mobi/reader/markup.py
+++ b/src/calibre/ebooks/mobi/reader/markup.py
@@ -112,7 +112,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
     url_css_index_pattern = re.compile(r'''kindle:flow:([0-9|A-V]+)\?mime=text/css[^\)]*''', re.IGNORECASE)
 
     for flow in mr.flows:
-        if flow is None: # 0th flow is None
+        if flow is None:  # 0th flow is None
             flows.append(flow)
             continue
 
@@ -330,7 +330,7 @@ def expand_mobi8_markup(mobi8_reader, resource_map, log):
     mobi8_reader.flows = flows
 
     # write out the parts and file flows
-    os.mkdir('text') # directory containing all parts
+    os.mkdir('text')  # directory containing all parts
     spine = []
     for i, part in enumerate(parts):
         pi = mobi8_reader.partinfo[i]

From c1d49333a0ea7e64adef606d73bc28079e4f7b86 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 15:10:40 +0530
Subject: [PATCH 12/26] pep8

---
 src/calibre/ebooks/mobi/debug/mobi8.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py
index e1c8ffba44..a180b11ad0 100644
--- a/src/calibre/ebooks/mobi/debug/mobi8.py
+++ b/src/calibre/ebooks/mobi/debug/mobi8.py
@@ -163,7 +163,8 @@ class MOBIFile(object):
             ext = 'dat'
             prefix = 'binary'
             suffix = ''
-            if sig in {b'HUFF', b'CDIC', b'INDX'}: continue
+            if sig in {b'HUFF', b'CDIC', b'INDX'}:
+                continue
             # TODO: Ignore CNCX records as well
             if sig == b'FONT':
                 font = read_font_record(rec.raw)
@@ -196,7 +197,6 @@ class MOBIFile(object):
             vals = list(index)[:-1] + [None, None, None, None]
             entry_map.append(Entry(*(vals[:12])))
 
-
         indexing_data = collect_indexing_data(entry_map, list(map(len,
             self.text_records)))
         self.indexing_data = [DOC + '\n' +textwrap.dedent('''\

From 689808861a304835a09ad47ae3e30e76cede8973 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 15:38:23 +0530
Subject: [PATCH 13/26] MOBI Input: Add support for MOBI/KF8 files generated
 with the to be released kindlegen 2.9. Fixes #1179144 (error during
 conversion azw3 to other formats)

---
 src/calibre/ebooks/mobi/reader/headers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/mobi/reader/headers.py b/src/calibre/ebooks/mobi/reader/headers.py
index b5b55b2ba0..31646a8d7b 100644
--- a/src/calibre/ebooks/mobi/reader/headers.py
+++ b/src/calibre/ebooks/mobi/reader/headers.py
@@ -181,9 +181,9 @@ class BookHeader(object):
                 self.codec = 'cp1252' if not user_encoding else user_encoding
                 log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
                     self.codec))
-            # Some KF8 files have header length == 256 (generated by kindlegen
-            # 2.7?). See https://bugs.launchpad.net/bugs/1067310
-            max_header_length = 0x100
+            # Some KF8 files have header length == 264 (generated by kindlegen
+            # 2.9?). See https://bugs.launchpad.net/bugs/1179144
+            max_header_length = 0x108
 
             if (ident == 'TEXTREAD' or self.length < 0xE4 or
                     self.length > max_header_length or

From 1c225cac666e28563e6a99f720f35b1cd31c18f7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 12 May 2013 21:51:12 +0530
Subject: [PATCH 14/26] MOBI Output: Fix space errorneously being removed when
 the input document contains a tag with leading space and sub-tags. Fixes
 #1179216 (Space lost between span tags converting to mobi)

---
 src/calibre/ebooks/mobi/mobiml.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py
index 9610b7c0bd..f6cd55dafe 100644
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@@ -16,7 +16,8 @@ from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
 from calibre.utils.magick.draw import identify_data
 
 MBP_NS = 'http://mobipocket.com/ns/mbp'
-def MBP(name): return '{%s}%s' % (MBP_NS, name)
+def MBP(name):
+    return '{%s}%s' % (MBP_NS, name)
 
 MOBI_NSMAP = {None: XHTML_NS, 'mbp': MBP_NS}
 
@@ -413,7 +414,7 @@ class MobiMLizer(object):
                         # img sizes in units other than px
                         # See #7520 for test case
                         try:
-                            pixs = int(round(float(value) / \
+                            pixs = int(round(float(value) /
                                 (72./self.profile.dpi)))
                         except:
                             continue
@@ -488,8 +489,6 @@ class MobiMLizer(object):
         if elem.text:
             if istate.preserve:
                 text = elem.text
-            elif len(elem) > 0 and isspace(elem.text):
-                text = None
             else:
                 text = COLLAPSE.sub(' ', elem.text)
         valign = style['vertical-align']

From cc223574d07cc87e0a810b16107fad70cbdbb410 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 13 May 2013 08:56:31 +0530
Subject: [PATCH 15/26] PDF Output: Ignore invalid links instead of erroring
 out on them. Fixes #1179314 (conversion from CHM to PDF fails)

---
 src/calibre/ebooks/pdf/render/links.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/pdf/render/links.py b/src/calibre/ebooks/pdf/render/links.py
index 2d0b91bbfe..500bbbf6c1 100644
--- a/src/calibre/ebooks/pdf/render/links.py
+++ b/src/calibre/ebooks/pdf/render/links.py
@@ -45,11 +45,15 @@ class Links(object):
             href, page, rect = link
             p, frag = href.partition('#')[0::2]
             try:
-                link = ((path, p, frag or None), self.pdf.get_pageref(page).obj, Array(rect))
+                pref = self.pdf.get_pageref(page).obj
             except IndexError:
-                self.log.warn('Unable to find page for link: %r, ignoring it' % link)
-                continue
-            self.links.append(link)
+                try:
+                    pref = self.pdf.get_pageref(page-1).obj
+                except IndexError:
+                    self.pdf.debug('Unable to find page for link: %r, ignoring it' % link)
+                    continue
+                self.pdf.debug('The link %s points to non-existent page, moving it one page back' % href)
+            self.links.append(((path, p, frag or None), pref, Array(rect)))
 
     def add_links(self):
         for link in self.links:

From ed422c7b0fb17ee6b4c7b45106d6293538e9a14f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 13 May 2013 18:39:33 +0530
Subject: [PATCH 16/26] DOCX Input: Lists work

---
 src/calibre/ebooks/docx/block_styles.py |  17 ++-
 src/calibre/ebooks/docx/numbering.py    | 144 +++++++++++++++++++++++-
 src/calibre/ebooks/docx/styles.py       |  33 +++++-
 src/calibre/ebooks/docx/to_html.py      |  23 +++-
 4 files changed, 200 insertions(+), 17 deletions(-)

diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py
index 1770569b61..b501580042 100644
--- a/src/calibre/ebooks/docx/block_styles.py
+++ b/src/calibre/ebooks/docx/block_styles.py
@@ -175,6 +175,20 @@ def read_shd(parent, dest):
         if val:
             ans = simple_color(val, auto='transparent')
     setattr(dest, 'background_color', ans)
+
+def read_numbering(parent, dest):
+    lvl = num_id = None
+    for np in XPath('./w:numPr')(parent):
+        for ilvl in XPath('./w:ilvl[@w:val]')(np):
+            try:
+                lvl = int(get(ilvl, 'w:val'))
+            except (ValueError, TypeError):
+                pass
+        for num in XPath('./w:numId[@w:val]')(np):
+            num_id = get(num, 'w:val')
+    val = (num_id, lvl) if num_id is not None or lvl is not None else inherit
+    setattr(dest, 'numbering', val)
+
 # }}}
 
 class ParagraphStyle(object):
@@ -194,6 +208,7 @@ class ParagraphStyle(object):
 
         # Misc.
         'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
+        'numbering',
     )
 
     def __init__(self, pPr=None):
@@ -210,7 +225,7 @@ class ParagraphStyle(object):
             ):
                 setattr(self, p, binary_property(pPr, p))
 
-            for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd'):
+            for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering'):
                 f = globals()['read_%s' % x]
                 f(pPr, self)
 
diff --git a/src/calibre/ebooks/docx/numbering.py b/src/calibre/ebooks/docx/numbering.py
index fc1e65db6a..8693e2a9a1 100644
--- a/src/calibre/ebooks/docx/numbering.py
+++ b/src/calibre/ebooks/docx/numbering.py
@@ -6,6 +6,11 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 
+import re
+from collections import Counter
+
+from lxml.html.builder import OL, UL, SPAN
+
 from calibre.ebooks.docx.block_styles import ParagraphStyle
 from calibre.ebooks.docx.char_styles import RunStyle
 from calibre.ebooks.docx.names import XPath, get
@@ -33,10 +38,26 @@ class Level(object):
         self.fmt = 'decimal'
         self.para_link = None
         self.paragraph_style = self.character_style = None
+        self.is_numbered = False
+        self.num_template = None
 
         if lvl is not None:
             self.read_from_xml(lvl)
 
+    def copy(self):
+        ans = Level()
+        for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template'):
+            setattr(ans, x, getattr(self, x))
+        return ans
+
+    def format_template(self, counter, ilvl):
+        def sub(m):
+            x = int(m.group(1)) - 1
+            if x > ilvl or x not in counter:
+                return ''
+            return '%d' % (counter[x] - (0 if x == ilvl else 1))
+        return re.sub(r'%(\d+)', sub, self.num_template).rstrip() + '\xa0'
+
     def read_from_xml(self, lvl, override=False):
         for lr in XPath('./w:lvlRestart[@w:val]')(lvl):
             try:
@@ -57,9 +78,13 @@ class Level(object):
         for lr in XPath('./w:numFmt[@w:val]')(lvl):
             val = get(lr, 'w:val')
             if val == 'bullet':
+                self.is_numbered = False
                 self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
             else:
+                self.is_numbered = True
                 self.fmt = STYLE_MAP.get(val, 'decimal')
+                if lt and re.match(r'%\d+\.$', lt) is None:
+                    self.num_template = lt
 
         for lr in XPath('./w:pStyle[@w:val]')(lvl):
             self.para_link = get(lr, 'w:val')
@@ -78,12 +103,6 @@ class Level(object):
             else:
                 self.character_style.update(ps)
 
-    def copy(self):
-        ans = Level()
-        for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style'):
-            setattr(ans, x, getattr(self, x))
-        return ans
-
 class NumberingDefinition(object):
 
     def __init__(self, parent=None):
@@ -107,6 +126,7 @@ class Numbering(object):
     def __init__(self):
         self.definitions = {}
         self.instances = {}
+        self.counters = {}
 
     def __call__(self, root, styles):
         ' Read all numbering style definitions '
@@ -131,6 +151,7 @@ class Numbering(object):
                     if alvl is None:
                         alvl = Level()
                     alvl.read_from_xml(lvl, override=True)
+            return nd
 
         next_pass = {}
         for n in XPath('./w:num[@w:numId]')(root):
@@ -154,3 +175,114 @@ class Numbering(object):
             if d is not None:
                 self.instances[num_id] = create_instance(n, d)
 
+        for num_id, d in self.instances.iteritems():
+            self.counters[num_id] = Counter({lvl:d.levels[lvl].start for lvl in d.levels})
+
+    def get_pstyle(self, num_id, style_id):
+        d = self.instances.get(num_id, None)
+        if d is not None:
+            for ilvl, lvl in d.levels.iteritems():
+                if lvl.para_link == style_id:
+                    return ilvl
+
+    def get_para_style(self, num_id, lvl):
+        d = self.instances.get(num_id, None)
+        if d is not None:
+            lvl = d.levels.get(lvl, None)
+            return getattr(lvl, 'paragraph_style', None)
+
+    def update_counter(self, counter, levelnum, levels):
+        counter[levelnum] += 1
+        for ilvl, lvl in levels.iteritems():
+            restart = lvl.restart
+            if (restart is None and ilvl == levelnum + 1) or restart == levelnum + 1:
+                counter[ilvl] = lvl.start
+
+    def apply_markup(self, items, body, styles, object_map):
+        for p, num_id, ilvl in items:
+            d = self.instances.get(num_id, None)
+            if d is not None:
+                lvl = d.levels.get(ilvl, None)
+                if lvl is not None:
+                    counter = self.counters[num_id]
+                    p.tag = 'li'
+                    p.set('value', '%s' % counter[ilvl])
+                    p.set('list-lvl', str(ilvl))
+                    p.set('list-id', num_id)
+                    if lvl.num_template is not None:
+                        val = lvl.format_template(counter, ilvl)
+                        p.set('list-template', val)
+                    self.update_counter(counter, ilvl, d.levels)
+
+        def commit(current_run):
+            if not current_run:
+                return
+            start = current_run[0]
+            parent = start.getparent()
+            idx = parent.index(start)
+
+            d = self.instances[start.get('list-id')]
+            ilvl = int(start.get('list-lvl'))
+            lvl = d.levels[ilvl]
+            lvlid = start.get('list-id') + start.get('list-lvl')
+            wrap = (OL if lvl.is_numbered else UL)('\n\t')
+            has_template = 'list-template' in start.attrib
+            if has_template:
+                wrap.set('lvlid', lvlid)
+            else:
+                wrap.set('class', styles.register({'list-style-type': lvl.fmt}, 'list'))
+            parent.insert(idx, wrap)
+            last_val = None
+            for child in current_run:
+                wrap.append(child)
+                child.tail = '\n\t'
+                if has_template:
+                    span = SPAN()
+                    span.text = child.text
+                    child.text = None
+                    for gc in child:
+                        span.append(gc)
+                    child.append(span)
+                    span = SPAN(child.get('list-template'))
+                    child.insert(0, span)
+                for attr in ('list-lvl', 'list-id', 'list-template'):
+                    child.attrib.pop(attr, None)
+                val = int(child.get('value'))
+                if last_val == val - 1 or wrap.tag == 'ul':
+                    child.attrib.pop('value')
+                last_val = val
+            current_run[-1].tail = '\n'
+            del current_run[:]
+
+        parents = set()
+        for child in body.iterdescendants('li'):
+            parents.add(child.getparent())
+
+        for parent in parents:
+            current_run = []
+            for child in parent:
+                if child.tag == 'li':
+                    if current_run:
+                        last = current_run[-1]
+                        if (last.get('list-id') , last.get('list-lvl')) != (child.get('list-id'), child.get('list-lvl')):
+                            commit(current_run)
+                    current_run.append(child)
+                else:
+                    commit(current_run)
+            commit(current_run)
+
+        for wrap in body.xpath('//ol[@lvlid]'):
+            wrap.attrib.pop('lvlid')
+            wrap.tag = 'div'
+            for i, li in enumerate(wrap.iterchildren('li')):
+                li.tag = 'div'
+                li.attrib.pop('value', None)
+                li.set('style', 'display:table-row')
+                obj = object_map[li]
+                bs = styles.para_cache[obj]
+                if i == 0:
+                    wrap.set('style', 'display:table; margin-left: %s' % (bs.css.get('margin-left', 0)))
+                bs.css.pop('margin-left', None)
+                for child in li:
+                    child.set('style', 'display:table-cell')
+
diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py
index a17295aa61..44ae2cea89 100644
--- a/src/calibre/ebooks/docx/styles.py
+++ b/src/calibre/ebooks/docx/styles.py
@@ -198,8 +198,19 @@ class Styles(object):
                 if default_para.character_style is not None:
                     self.para_char_cache[p] = default_para.character_style
 
+            is_numbering = direct_formatting.numbering is not inherit
+            if is_numbering:
+                num_id, lvl = direct_formatting.numbering
+                if num_id is not None:
+                    p.set('calibre_num_id', '%s:%s' % (lvl, num_id))
+                if num_id is not None and lvl is not None:
+                    ps = self.numbering.get_para_style(num_id, lvl)
+                    if ps is not None:
+                        parent_styles.append(ps)
+
             for attr in ans.all_properties:
-                setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
+                if not (is_numbering and attr == 'text_indent'):  # skip text-indent for lists
+                    setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
         return ans
 
     def resolve_run(self, r):
@@ -244,10 +255,20 @@ class Styles(object):
             return self.resolve_run(obj)
 
     def resolve_numbering(self, numbering):
-        pass  # TODO: Implement this
+        # When a numPr element appears inside a paragraph style, the lvl info
+        # must be discarder and pStyle used instead.
+        self.numbering = numbering
+        for style in self:
+            ps = style.paragraph_style
+            if ps is not None and ps.numbering is not inherit:
+                lvl = numbering.get_pstyle(ps.numbering[0], style.style_id)
+                if lvl is None:
+                    ps.numbering = inherit
+                else:
+                    ps.numbering = (ps.numbering[0], lvl)
 
     def register(self, css, prefix):
-        h = hash(tuple(css.iteritems()))
+        h = hash(frozenset(css.iteritems()))
         ans, _ = self.classes.get(h, (None, None))
         if ans is None:
             self.counter[prefix] += 1
@@ -266,13 +287,15 @@ class Styles(object):
                 self.register(css, 'text')
 
     def class_name(self, css):
-        h = hash(tuple(css.iteritems()))
+        h = hash(frozenset(css.iteritems()))
         return self.classes.get(h, (None, None))[0]
 
     def generate_css(self):
         prefix = textwrap.dedent(
             '''\
-            p { margin: 0; padding: 0; text-indent: 1.5em }
+            p { text-indent: 1.5em }
+
+            ul, ol, p { margin: 0; padding: 0 }
             ''')
 
         ans = []
diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index 7aa0383da6..8cd79074e3 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -7,6 +7,7 @@ __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 
 import sys, os, re
+from collections import OrderedDict
 
 from lxml import html
 from lxml.html.builder import (
@@ -36,7 +37,7 @@ class Convert(object):
         self.mi = self.docx.metadata
         self.body = BODY()
         self.styles = Styles()
-        self.object_map = {}
+        self.object_map = OrderedDict()
         self.html = HTML(
             HEAD(
                 META(charset='utf-8'),
@@ -72,6 +73,19 @@ class Convert(object):
                 pass  # TODO: Last section properties
             else:
                 self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag))
+
+        numbered = []
+        for html_obj, obj in self.object_map.iteritems():
+            raw = obj.get('calibre_num_id', None)
+            if raw is not None:
+                lvl, num_id = raw.partition(':')[0::2]
+                try:
+                    lvl = int(lvl)
+                except (TypeError, ValueError):
+                    lvl = 0
+                numbered.append((html_obj, num_id, lvl))
+        self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map)
+
         if len(self.body) > 0:
             self.body.text = '\n\t'
             for child in self.body:
@@ -102,7 +116,7 @@ class Convert(object):
 
         nname = get_name(NUMBERING, 'numbering.xml')
         sname = get_name(STYLES, 'styles.xml')
-        numbering = Numbering()
+        numbering = self.numbering = Numbering()
 
         if sname is not None:
             try:
@@ -133,6 +147,7 @@ class Convert(object):
 
     def convert_p(self, p):
         dest = P()
+        self.object_map[dest] = p
         style = self.styles.resolve_paragraph(p)
         for run in XPath('descendant::w:r')(p):
             span = self.convert_run(run)
@@ -173,7 +188,6 @@ class Convert(object):
                 wrapper = self.wrap_elems(spans, SPAN())
                 wrapper.set('class', cls)
 
-        self.object_map[dest] = p
         return dest
 
     def wrap_elems(self, elems, wrapper):
@@ -188,7 +202,7 @@ class Convert(object):
 
     def convert_run(self, run):
         ans = SPAN()
-        ans.run = run
+        self.object_map[ans] = run
         text = Text(ans, 'text', [])
 
         for child in run:
@@ -224,7 +238,6 @@ class Convert(object):
             ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
         if style.lang is not inherit:
             ans.lang = style.lang
-        self.object_map[ans] = run
         return ans
 
 if __name__ == '__main__':

From e637b32485386b2e05820d2861a5ea50e97fb687 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 13 May 2013 21:46:48 +0530
Subject: [PATCH 17/26] Add mechanism for device drivers to popup a message to
 the user after a callback

---
 src/calibre/devices/interface.py | 26 +++++++++++++++-----------
 src/calibre/gui2/device.py       | 20 +++++++++++++++-----
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/src/calibre/devices/interface.py b/src/calibre/devices/interface.py
index 2b3bbd4fd6..9b173b091e 100644
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@@ -107,6 +107,12 @@ class DevicePlugin(Plugin):
     #: :meth:`set_user_blacklisted_devices`
     ASK_TO_ALLOW_CONNECT = False
 
+    #: Set this to a dictionary of the form {'title':title, 'msg':msg, 'det_msg':detailed_msg} to have calibre popup
+    #: a message to the user after some callbacks are run (currently only upload_books).
+    #: Be careful to not spam the user with too many messages. This variable is checked after *every* callback,
+    #: so only set it when you really need to.
+    user_feedback_after_callback = None
+
     @classmethod
     def get_gui_name(cls):
         if hasattr(cls, 'gui_name'):
@@ -157,16 +163,15 @@ class DevicePlugin(Plugin):
                 if (vid in device_id or vidd in device_id) and \
                    (pid in device_id or pidd in device_id) and \
                    self.test_bcd_windows(device_id, bcd):
-                       if debug:
-                           self.print_usb_device_info(device_id)
-                       if only_presence or self.can_handle_windows(device_id, debug=debug):
-                           try:
-                               bcd = int(device_id.rpartition(
-                                   'rev_')[-1].replace(':', 'a'), 16)
-                           except:
-                               bcd = None
-                           return True, (vendor_id, product_id, bcd, None,
-                                   None, None)
+                        if debug:
+                            self.print_usb_device_info(device_id)
+                        if only_presence or self.can_handle_windows(device_id, debug=debug):
+                            try:
+                                bcd = int(device_id.rpartition(
+                                            'rev_')[-1].replace(':', 'a'), 16)
+                            except:
+                                bcd = None
+                            return True, (vendor_id, product_id, bcd, None, None, None)
         return False, None
 
     def test_bcd(self, bcdDevice, bcd):
@@ -638,7 +643,6 @@ class DevicePlugin(Plugin):
         '''
         device_prefs.set_overrides()
 
-
     # Dynamic control interface.
     # The following methods are probably called on the GUI thread. Any driver
     # that implements these methods must take pains to be thread safe, because
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index 45778ec309..15dc1f0c0a 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -122,7 +122,8 @@ def device_name_for_plugboards(device_class):
 class DeviceManager(Thread): # {{{
 
     def __init__(self, connected_slot, job_manager, open_feedback_slot,
-            open_feedback_msg, allow_connect_slot, sleep_time=2):
+                 open_feedback_msg, allow_connect_slot,
+                 after_callback_feedback_slot, sleep_time=2):
         '''
         :sleep_time: Time to sleep between device probes in secs
         '''
@@ -150,6 +151,7 @@ class DeviceManager(Thread): # {{{
         self.ejected_devices  = set([])
         self.mount_connection_requests = Queue.Queue(0)
         self.open_feedback_slot = open_feedback_slot
+        self.after_callback_feedback_slot = after_callback_feedback_slot
         self.open_feedback_msg = open_feedback_msg
         self._device_information = None
         self.current_library_uuid = None
@@ -392,6 +394,10 @@ class DeviceManager(Thread): # {{{
                         self.device.set_progress_reporter(job.report_progress)
                     self.current_job.run()
                     self.current_job = None
+                    feedback = getattr(self.device, 'user_feedback_after_callback', None)
+                    if feedback is not None:
+                        self.device.user_feedback_after_callback = None
+                        self.after_callback_feedback_slot(feedback)
                 else:
                     break
             if do_sleep:
@@ -850,7 +856,7 @@ class DeviceMixin(object): # {{{
         self.device_manager = DeviceManager(FunctionDispatcher(self.device_detected),
                 self.job_manager, Dispatcher(self.status_bar.show_message),
                 Dispatcher(self.show_open_feedback),
-                FunctionDispatcher(self.allow_connect))
+                FunctionDispatcher(self.allow_connect), Dispatcher(self.after_callback_feedback))
         self.device_manager.start()
         self.device_manager.devices_initialized.wait()
         if tweaks['auto_connect_to_folder']:
@@ -862,6 +868,10 @@ class DeviceMixin(object): # {{{
                 name, show_copy_button=False,
                 override_icon=QIcon(icon))
 
+    def after_callback_feedback(self, feedback):
+        title, msg, det_msg = feedback
+        info_dialog(self, feedback['title'], feedback['msg'], det_msg=feedback['det_msg']).show()
+
     def debug_detection(self, done):
         self.debug_detection_callback = weakref.ref(done)
         self.device_manager.debug_detection(FunctionDispatcher(self.debug_detection_done))
@@ -1116,7 +1126,7 @@ class DeviceMixin(object): # {{{
             return
 
         dm = self.iactions['Remove Books'].delete_memory
-        if dm.has_key(job):
+        if job in dm:
             paths, model = dm.pop(job)
             self.device_manager.remove_books_from_metadata(paths,
                     self.booklists())
@@ -1141,7 +1151,7 @@ class DeviceMixin(object): # {{{
     def dispatch_sync_event(self, dest, delete, specific):
         rows = self.library_view.selectionModel().selectedRows()
         if not rows or len(rows) == 0:
-            error_dialog(self, _('No books'), _('No books')+' '+\
+            error_dialog(self, _('No books'), _('No books')+' '+
                     _('selected to send')).exec_()
             return
 
@@ -1160,7 +1170,7 @@ class DeviceMixin(object): # {{{
                 if fmts:
                     for f in fmts.split(','):
                         f = f.lower()
-                        if format_count.has_key(f):
+                        if f in format_count:
                             format_count[f] += 1
                         else:
                             format_count[f] = 1

From ec863926661d2bc9366d2fe1b74bd091138e0495 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 May 2013 08:29:58 +0530
Subject: [PATCH 18/26] Fix #1179697 (write a device driver for my device)

---
 src/calibre/devices/android/driver.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index 9d5ce152d3..2855de16ae 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -240,7 +240,8 @@ class ANDROID(USBMS):
             'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
             'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E',
             'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS',
-            'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD', 'XT894']
+            'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD', 'XT894', '_USB',
+    ]
     WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
             'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
             'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@@ -251,7 +252,9 @@ class ANDROID(USBMS):
             'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0', 'XT875',
             'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
             'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E',
-            'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1', 'XT894']
+            'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1', 'XT894',
+            '_USB',
+    ]
 
     OSX_MAIN_MEM = 'Android Device Main Memory'
 

From e33ac985b4b5e6485a5ce53970093ce6e04a78ad Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 May 2013 09:30:36 +0530
Subject: [PATCH 19/26] On linux when searching the system for fonts, search
 all directories returned by fontconfig, if available, instead of a default
 list of directories

---
 src/calibre/utils/fonts/scanner.py | 84 ++++++++++++++++++++++++++----
 1 file changed, 75 insertions(+), 9 deletions(-)

diff --git a/src/calibre/utils/fonts/scanner.py b/src/calibre/utils/fonts/scanner.py
index 827e5536d5..b5628989c2 100644
--- a/src/calibre/utils/fonts/scanner.py
+++ b/src/calibre/utils/fonts/scanner.py
@@ -13,13 +13,82 @@ from threading import Thread
 
 from calibre import walk, prints, as_unicode
 from calibre.constants import (config_dir, iswindows, isosx, plugins, DEBUG,
-        isworker)
+        isworker, filesystem_encoding)
 from calibre.utils.fonts.metadata import FontMetadata, UnsupportedFont
 from calibre.utils.icu import sort_key
 
 class NoFonts(ValueError):
     pass
 
+
+def default_font_dirs():
+    return [
+        '/opt/share/fonts',
+        '/usr/share/fonts',
+        '/usr/local/share/fonts',
+        os.path.expanduser('~/.local/share/fonts'),
+        os.path.expanduser('~/.fonts')
+    ]
+
+
+def fc_list():
+    import ctypes
+    from ctypes.util import find_library
+
+    lib = find_library('fontconfig')
+    if lib is None:
+        return default_font_dirs()
+    try:
+        lib = ctypes.CDLL(lib)
+    except:
+        return default_font_dirs()
+
+    prototype = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p)
+    try:
+        get_font_dirs = prototype(('FcConfigGetFontDirs', lib))
+    except (AttributeError):
+        return default_font_dirs()
+    prototype = ctypes.CFUNCTYPE(ctypes.c_char_p, ctypes.c_void_p)
+    try:
+        next_dir = prototype(('FcStrListNext', lib))
+    except (AttributeError):
+        return default_font_dirs()
+
+    prototype = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
+    try:
+        end = prototype(('FcStrListDone', lib))
+    except (AttributeError):
+        return default_font_dirs()
+
+    str_list = get_font_dirs(ctypes.c_void_p())
+    if not str_list:
+        return default_font_dirs()
+
+    ans = []
+    while True:
+        d = next_dir(str_list)
+        if not d:
+            break
+        if d:
+            try:
+                ans.append(d.decode(filesystem_encoding))
+            except ValueError:
+                return default_font_dirs
+    end(str_list)
+    if len(ans) < 3:
+        return default_font_dirs()
+    parents = []
+    for f in ans:
+        found = False
+        for p in parents:
+            if f.startswith(p):
+                found = True
+                break
+        if not found:
+            parents.append(f)
+    return parents
+
+
 def font_dirs():
     if iswindows:
         winutil, err = plugins['winutil']
@@ -35,12 +104,7 @@ def font_dirs():
                 os.path.expanduser('~/.fonts'),
                 os.path.expanduser('~/Library/Fonts'),
                 ]
-    return [
-            '/opt/share/fonts',
-            '/usr/share/fonts',
-            '/usr/local/share/fonts',
-            os.path.expanduser('~/.fonts')
-            ]
+    return fc_list()
 
 class Scanner(Thread):
 
@@ -133,7 +197,8 @@ class Scanner(Thread):
 
         for family in self.find_font_families():
             faces = filter(filter_faces, self.fonts_for_family(family))
-            if not faces: continue
+            if not faces:
+                continue
             generic_family = panose_to_css_generic_family(faces[0]['panose'])
             if generic_family in allowed_families or generic_family == preferred_families[0]:
                 return (family, faces)
@@ -233,7 +298,8 @@ class Scanner(Thread):
     def build_families(self):
         families = defaultdict(list)
         for f in self.cached_fonts.itervalues():
-            if not f: continue
+            if not f:
+                continue
             lf = icu_lower(f['font-family'] or '')
             if lf:
                 families[lf].append(f)

From 802e4c52fb841f7bf3ef92476b29796f04774595 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 May 2013 09:53:42 +0530
Subject: [PATCH 20/26] Change the filesystem encoding used by python to utf-8
 if it is ascii

---
 src/calibre/constants.py |  6 ++----
 src/calibre/utils/icu.c  | 15 +++++++++++++++
 src/calibre/utils/icu.py | 15 +++++++++++++--
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/calibre/constants.py b/src/calibre/constants.py
index 6526c2e289..4c17a90122 100644
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@@ -66,10 +66,8 @@ else:
             filesystem_encoding = 'utf-8'
             # On linux, unicode arguments to os file functions are coerced to an ascii
             # bytestring if sys.getfilesystemencoding() == 'ascii', which is
-            # just plain dumb. So issue a warning.
-            print ('WARNING: You do not have the LANG environment variable set correctly. '
-                    'This will cause problems with non-ascii filenames. '
-                    'Set it to something like en_US.UTF-8.\n')
+            # just plain dumb. This is fixed by the icu.py module which, when
+            # imported changes ascii to utf-8
     except:
         filesystem_encoding = 'utf-8'
 
diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c
index ccb1cfb5b9..aee47448fd 100644
--- a/src/calibre/utils/icu.c
+++ b/src/calibre/utils/icu.c
@@ -661,6 +661,17 @@ icu_set_default_encoding(PyObject *self, PyObject *args) {
 }
 // }}}
 
+// set_default_encoding {{{
+static PyObject *
+icu_set_filesystem_encoding(PyObject *self, PyObject *args) {
+    char *encoding;
+    if (!PyArg_ParseTuple(args, "s:setfilesystemencoding", &encoding))
+        return NULL;
+    Py_FileSystemDefaultEncoding = strdup(encoding);
+    Py_RETURN_NONE;
+
+}
+// }}}
 // set_default_encoding {{{
 static PyObject *
 icu_get_available_transliterators(PyObject *self, PyObject *args) {
@@ -707,6 +718,10 @@ static PyMethodDef icu_methods[] = {
         "set_default_encoding(encoding) -> Set the default encoding for the python unicode implementation."
     },
 
+    {"set_filesystem_encoding", icu_set_filesystem_encoding, METH_VARARGS,
+        "set_filesystem_encoding(encoding) -> Set the filesystem encoding for python."
+    },
+
     {"get_available_transliterators", icu_get_available_transliterators, METH_VARARGS,
         "get_available_transliterators() -> Return list of available transliterators. This list is rather limited on OS X."
     },
diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py
index e1e6c1a1c6..1f54a04646 100644
--- a/src/calibre/utils/icu.py
+++ b/src/calibre/utils/icu.py
@@ -163,11 +163,22 @@ load_collator()
 _icu_not_ok = _icu is None or _collator is None
 
 try:
-    if sys.getdefaultencoding().lower() == 'ascii':
+    senc = sys.getdefaultencoding()
+    if not senc or senc.lower() == 'ascii':
         _icu.set_default_encoding('utf-8')
+    del senc
 except:
     pass
 
+try:
+    fenc = sys.getfilesystemencoding()
+    if not fenc or fenc.lower() == 'ascii':
+        _icu.set_filesystem_encoding('utf-8')
+    del fenc
+except:
+    pass
+
+
 # }}}
 
 ################# The string functions ########################################
@@ -247,7 +258,7 @@ def collation_order(a):
 
 ################################################################################
 
-def test(): # {{{
+def test():  # {{{
     from calibre import prints
     # Data {{{
     german = '''

From ffdc9d377c7540f7bab6ac68303c744676828597 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 May 2013 11:42:31 +0530
Subject: [PATCH 21/26] ...

---
 src/calibre/ebooks/docx/dump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/docx/dump.py b/src/calibre/ebooks/docx/dump.py
index f6432125c5..6ebc2e8871 100644
--- a/src/calibre/ebooks/docx/dump.py
+++ b/src/calibre/ebooks/docx/dump.py
@@ -22,7 +22,7 @@ def dump(path):
         zf.extractall(dest)
 
     for f in walk(dest):
-        if f.endswith('.xml'):
+        if f.endswith('.xml') or f.endswith('.rels'):
             with open(f, 'r+b') as stream:
                 raw = stream.read()
                 root = etree.fromstring(raw)

From d8a896616a34432bc7c4ae00ce8018619881ae7a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 May 2013 16:09:12 +0530
Subject: [PATCH 22/26] DOCX Input: Fonts

---
 src/calibre/ebooks/docx/char_styles.py |  15 ++-
 src/calibre/ebooks/docx/container.py   |   4 +-
 src/calibre/ebooks/docx/fonts.py       | 132 +++++++++++++++++++++++++
 src/calibre/ebooks/docx/names.py       |   1 +
 src/calibre/ebooks/docx/styles.py      |  11 ++-
 src/calibre/ebooks/docx/to_html.py     |  18 +++-
 6 files changed, 173 insertions(+), 8 deletions(-)
 create mode 100644 src/calibre/ebooks/docx/fonts.py

diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py
index a9d2a43cdb..b65766e494 100644
--- a/src/calibre/ebooks/docx/char_styles.py
+++ b/src/calibre/ebooks/docx/char_styles.py
@@ -113,6 +113,14 @@ def read_vert_align(parent, dest):
         if val and val in {'baseline', 'subscript', 'superscript'}:
             ans = val
     setattr(dest, 'vert_align', ans)
+
+def read_font_family(parent, dest):
+    ans = inherit
+    for col in XPath('./w:rFonts[@w:ascii]')(parent):
+        val = get(col, 'w:ascii')
+        if val:
+            ans = val
+    setattr(dest, 'font_family', ans)
 # }}}
 
 class RunStyle(object):
@@ -122,7 +130,7 @@ class RunStyle(object):
         'rtl', 'shadow', 'smallCaps', 'strike', 'vanish',
 
         'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
-        'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang',
+        'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family'
     }
 
     toggle_properties = {
@@ -141,7 +149,7 @@ class RunStyle(object):
             ):
                 setattr(self, p, binary_property(rPr, p))
 
-            for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang'):
+            for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang', 'font_family'):
                 f = globals()['read_%s' % x]
                 f(rPr, self)
 
@@ -212,6 +220,9 @@ class RunStyle(object):
 
             if self.b:
                 c['font-weight'] = 'bold'
+
+            if self.font_family is not inherit:
+                c['font-family'] = self.font_family
         return self._css
 
     def same_border(self, other):
diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py
index ec0decacef..bcca336474 100644
--- a/src/calibre/ebooks/docx/container.py
+++ b/src/calibre/ebooks/docx/container.py
@@ -167,7 +167,9 @@ class DOCX(object):
 
     @property
     def document_relationships(self):
-        name = self.document_name
+        return self.get_relationships(self.document_name)
+
+    def get_relationships(self, name):
         base = '/'.join(name.split('/')[:-1])
         by_id, by_type = {}, {}
         parts = name.split('/')
diff --git a/src/calibre/ebooks/docx/fonts.py b/src/calibre/ebooks/docx/fonts.py
new file mode 100644
index 0000000000..4ed602c71d
--- /dev/null
+++ b/src/calibre/ebooks/docx/fonts.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import os, re
+from collections import namedtuple
+
+from calibre.ebooks.docx.block_styles import binary_property, inherit
+from calibre.ebooks.docx.names import XPath, get
+from calibre.utils.filenames import ascii_filename
+from calibre.utils.fonts.scanner import font_scanner, NoFonts
+from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font
+
+Embed = namedtuple('Embed', 'name key subsetted')
+
+def has_system_fonts(name):
+    try:
+        return bool(font_scanner.fonts_for_family(name))
+    except NoFonts:
+        return False
+
+def get_variant(bold=False, italic=False):
+    return {(False, False):'Regular', (False, True):'Italic',
+            (True, False):'Bold', (True, True):'BoldItalic'}[(bold, italic)]
+
+class Family(object):
+
+    def __init__(self, elem, embed_relationships):
+        self.name = self.family_name = get(elem, 'w:name')
+        self.alt_names = tuple(get(x, 'w:val') for x in XPath('./w:altName')(elem))
+        if self.alt_names and not has_system_fonts(self.name):
+            for x in self.alt_names:
+                if has_system_fonts(x):
+                    self.family_name = x
+                    break
+
+        self.embedded = {}
+        for x in ('Regular', 'Bold', 'Italic', 'BoldItalic'):
+            for y in XPath('./w:embed%s[@r:id]' % x)(elem):
+                rid = get(y, 'r:id')
+                key = get(y, 'w:fontKey')
+                subsetted = get(y, 'w:subsetted') in {'1', 'true', 'on'}
+                if rid in embed_relationships:
+                    self.embedded[x] = Embed(embed_relationships[rid], key, subsetted)
+
+        self.generic_family = 'auto'
+        for x in XPath('./w:family[@w:val]')(elem):
+            self.generic_family = get(x, 'w:val', 'auto')
+
+        ntt = binary_property(elem, 'notTrueType')
+        self.is_ttf = ntt is inherit or not ntt
+
+        self.panose1 = None
+        self.panose_name = None
+        for x in XPath('./w:panose1[@w:val]')(elem):
+            try:
+                v = get(x, 'w:val')
+                v = tuple(int(v[i:i+2], 16) for i in xrange(0, len(v), 2))
+            except (TypeError, ValueError, IndexError):
+                pass
+            else:
+                self.panose1 = v
+                self.panose_name = panose_to_css_generic_family(v)
+
+        self.css_generic_family = {'roman':'serif', 'swiss':'sans-serif', 'modern':'monospace',
+                                   'decorative':'fantasy', 'script':'cursive'}.get(self.generic_family, None)
+        self.css_generic_family = self.css_generic_family or self.panose_name or 'serif'
+
+
+class Fonts(object):
+
+    def __init__(self):
+        self.fonts = {}
+        self.used = set()
+
+    def __call__(self, root, embed_relationships, docx, dest_dir):
+        for elem in XPath('//w:font[@w:name]')(root):
+            self.fonts[get(elem, 'w:name')] = Family(elem, embed_relationships)
+
+    def family_for(self, name, bold=False, italic=False):
+        f = self.fonts.get(name, None)
+        if f is None:
+            return 'serif'
+        variant = get_variant(bold, italic)
+        self.used.add((name, variant))
+        name = f.name if variant in f.embedded else f.family_name
+        return '"%s", %s' % (name.replace('"', ''), f.css_generic_family)
+
+    def embed_fonts(self, dest_dir, docx):
+        defs = []
+        dest_dir = os.path.join(dest_dir, 'fonts')
+        for name, variant in self.used:
+            f = self.fonts[name]
+            if variant in f.embedded:
+                if not os.path.exists(dest_dir):
+                    os.mkdir(dest_dir)
+                fname = self.write(name, dest_dir, docx, variant)
+                if fname is not None:
+                    d = {'font-family':'"%s"' % name.replace('"', ''), 'src': 'url("fonts/%s")' % fname}
+                    if 'Bold' in variant:
+                        d['font-weight'] = 'bold'
+                    if 'Italic' in variant:
+                        d['font-style'] = 'italic'
+                    d = ['%s: %s' % (k, v) for k, v in d.iteritems()]
+                    d = ';\n\t'.join(d)
+                    defs.append('@font-face {\n\t%s\n}\n' % d)
+        return '\n'.join(defs)
+
+    def write(self, name, dest_dir, docx, variant):
+        f = self.fonts[name]
+        ef = f.embedded[variant]
+        raw = docx.read(ef.name)
+        prefix = raw[:32]
+        if ef.key:
+            key = re.sub(r'[^A-Fa-f0-9]', '', ef.key)
+            key = bytearray(reversed(tuple(int(key[i:i+2], 16) for i in xrange(0, len(key), 2))))
+            prefix = bytearray(prefix)
+            prefix = bytes(bytearray(prefix[i]^key[i % len(key)] for i in xrange(len(prefix))))
+        if not is_truetype_font(prefix):
+            return None
+        ext = 'otf' if prefix.startswith(b'OTTO') else 'ttf'
+        fname = ascii_filename('%s - %s.%s' % (name, variant, ext))
+        with open(os.path.join(dest_dir, fname), 'wb') as dest:
+            dest.write(prefix)
+            dest.write(raw[32:])
+
+        return fname
+
diff --git a/src/calibre/ebooks/docx/names.py b/src/calibre/ebooks/docx/names.py
index 91b051d691..da643dcc2c 100644
--- a/src/calibre/ebooks/docx/names.py
+++ b/src/calibre/ebooks/docx/names.py
@@ -13,6 +13,7 @@ DOCPROPS  = 'http://schemas.openxmlformats.org/package/2006/relationships/metada
 APPPROPS  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
 STYLES    = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
 NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
+FONTS     = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable'
 
 namespaces = {
     'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py
index 44ae2cea89..13b9ebe58f 100644
--- a/src/calibre/ebooks/docx/styles.py
+++ b/src/calibre/ebooks/docx/styles.py
@@ -97,7 +97,8 @@ class Styles(object):
     def get(self, key, default=None):
         return self.id_map.get(key, default)
 
-    def __call__(self, root):
+    def __call__(self, root, fonts):
+        self.fonts = fonts
         for s in XPath('//w:style')(root):
             s = Style(s)
             if s.style_id:
@@ -246,6 +247,9 @@ class Styles(object):
             for attr in ans.all_properties:
                 setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr))
 
+            if ans.font_family is not inherit:
+                ans.font_family = self.fonts.family_for(ans.font_family, ans.b, ans.i)
+
         return ans
 
     def resolve(self, obj):
@@ -290,13 +294,16 @@ class Styles(object):
         h = hash(frozenset(css.iteritems()))
         return self.classes.get(h, (None, None))[0]
 
-    def generate_css(self):
+    def generate_css(self, dest_dir, docx):
+        ef = self.fonts.embed_fonts(dest_dir, docx)
         prefix = textwrap.dedent(
             '''\
             p { text-indent: 1.5em }
 
             ul, ol, p { margin: 0; padding: 0 }
             ''')
+        if ef:
+            prefix += '\n' + ef
 
         ans = []
         for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):
diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index 8cd79074e3..dbd6dce043 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -14,9 +14,10 @@ from lxml.html.builder import (
     HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
 
 from calibre.ebooks.docx.container import DOCX, fromstring
-from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING
+from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING, FONTS
 from calibre.ebooks.docx.styles import Styles, inherit
 from calibre.ebooks.docx.numbering import Numbering
+from calibre.ebooks.docx.fonts import Fonts
 from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
 
 class Text:
@@ -116,7 +117,18 @@ class Convert(object):
 
         nname = get_name(NUMBERING, 'numbering.xml')
         sname = get_name(STYLES, 'styles.xml')
+        fname = get_name(FONTS, 'fontTable.xml')
         numbering = self.numbering = Numbering()
+        fonts = self.fonts = Fonts()
+
+        if fname is not None:
+            embed_relationships = self.docx.get_relationships(fname)[0]
+            try:
+                raw = self.docx.read(fname)
+            except KeyError:
+                self.log.warn('Fonts table %s does not exist' % fname)
+            else:
+                fonts(fromstring(raw), embed_relationships, self.docx, self.dest_dir)
 
         if sname is not None:
             try:
@@ -124,7 +136,7 @@ class Convert(object):
             except KeyError:
                 self.log.warn('Styles %s do not exist' % sname)
             else:
-                self.styles(fromstring(raw))
+                self.styles(fromstring(raw), fonts)
 
         if nname is not None:
             try:
@@ -140,7 +152,7 @@ class Convert(object):
         raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
         with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
             f.write(raw)
-        css = self.styles.generate_css()
+        css = self.styles.generate_css(self.dest_dir, self.docx)
         if css:
             with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                 f.write(css.encode('utf-8'))

From aa2aa3d2ef8bb89acf7a6e943be9a91391d9cdd0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 May 2013 16:36:09 +0530
Subject: [PATCH 23/26] Ignore line height of 1

---
 src/calibre/ebooks/docx/block_styles.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py
index b501580042..eef68a184f 100644
--- a/src/calibre/ebooks/docx/block_styles.py
+++ b/src/calibre/ebooks/docx/block_styles.py
@@ -271,7 +271,10 @@ class ParagraphStyle(object):
                 if val is not inherit:
                     c['margin-%s' % edge] = val
 
-            for x in ('text_indent', 'text_align', 'line_height', 'background_color'):
+            if self.line_height not in {inherit, '1'}:
+                c['line-height'] = self.line_height
+
+            for x in ('text_indent', 'text_align', 'background_color'):
                 val = getattr(self, x)
                 if val is not inherit:
                     c[x.replace('_', '-')] = val

From 5ec61a6b299ab2114e0b7b7ae5848b733d512371 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 May 2013 16:45:03 +0530
Subject: [PATCH 24/26] Dont ignore the content in tables, just extarct the
 content as linear blocks for now

---
 src/calibre/ebooks/docx/to_html.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index dbd6dce043..b4e5b0e5f7 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -14,7 +14,7 @@ from lxml.html.builder import (
     HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
 
 from calibre.ebooks.docx.container import DOCX, fromstring
-from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING, FONTS
+from calibre.ebooks.docx.names import XPath, is_tag, XML, STYLES, NUMBERING, FONTS
 from calibre.ebooks.docx.styles import Styles, inherit
 from calibre.ebooks.docx.numbering import Numbering
 from calibre.ebooks.docx.fonts import Fonts
@@ -64,16 +64,11 @@ class Convert(object):
         doc = self.docx.document
         relationships_by_id, relationships_by_type = self.docx.document_relationships
         self.read_styles(relationships_by_type)
-        for top_level in XPath('/w:document/w:body/*')(doc):
-            if is_tag(top_level, 'w:p'):
-                p = self.convert_p(top_level)
-                self.body.append(p)
-            elif is_tag(top_level, 'w:tbl'):
-                pass  # TODO: tables
-            elif is_tag(top_level, 'w:sectPr'):
-                pass  # TODO: Last section properties
-            else:
-                self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag))
+        for wp in XPath('//w:p')(doc):
+            p = self.convert_p(wp)
+            self.body.append(p)
+        # TODO: tables <w:tbl> child of <w:body> (nested tables?)
+        # TODO: Last section properties <w:sectPr> child of <w:body>
 
         numbered = []
         for html_obj, obj in self.object_map.iteritems():

From 33793ff0d1135729832cd4d2c10f1c2a2a37516f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 May 2013 18:01:55 +0530
Subject: [PATCH 25/26] Driver for SONY PRS-T2N

---
 src/calibre/devices/prst1/driver.py | 34 ++++++++++++++---------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/calibre/devices/prst1/driver.py b/src/calibre/devices/prst1/driver.py
index 72533860d4..0431ca7bfd 100644
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@@ -39,8 +39,8 @@ class PRST1(USBMS):
     path_sep = '/'
     booklist_class = CollectionsBookList
 
-    FORMATS      = ['epub', 'pdf', 'txt', 'book', 'zbf'] # The last two are
-                                                         # used in japan
+    FORMATS      = ['epub', 'pdf', 'txt', 'book', 'zbf']  # The last two are
+                                                          # used in japan
     CAN_SET_METADATA = ['collections']
     CAN_DO_DEVICE_DB_PLUGBOARD = True
 
@@ -50,10 +50,10 @@ class PRST1(USBMS):
 
     VENDOR_NAME        = 'SONY'
     WINDOWS_MAIN_MEM   = re.compile(
-            r'(PRS-T(1|2)&)'
+            r'(PRS-T(1|2|2N)&)'
             )
     WINDOWS_CARD_A_MEM = re.compile(
-            r'(PRS-T(1|2)__SD&)'
+            r'(PRS-T(1|2|2N)__SD&)'
             )
     MAIN_MEMORY_VOLUME_LABEL = 'SONY Reader Main Memory'
     STORAGE_CARD_VOLUME_LABEL = 'SONY Reader Storage Card'
@@ -66,7 +66,7 @@ class PRST1(USBMS):
 
     EXTRA_CUSTOMIZATION_MESSAGE = [
         _('Comma separated list of metadata fields '
-            'to turn into collections on the device. Possibilities include: ')+\
+            'to turn into collections on the device. Possibilities include: ')+
                     'series, tags, authors',
         _('Upload separate cover thumbnails for books') +
              ':::'+_('Normally, the SONY readers get the cover image from the'
@@ -194,17 +194,17 @@ class PRST1(USBMS):
                 time_offsets = {}
                 for i, row in enumerate(cursor):
                     try:
-                        comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000);
+                        comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000)
                     except (OSError, IOError, TypeError):
                         # In case the db has incorrect path info
                         continue
-                    device_date = int(row[1]);
+                    device_date = int(row[1])
                     offset = device_date - comp_date
                     time_offsets.setdefault(offset, 0)
                     time_offsets[offset] = time_offsets[offset] + 1
 
                 try:
-                    device_offset = max(time_offsets,key = lambda a: time_offsets.get(a))
+                    device_offset = max(time_offsets, key=lambda a: time_offsets.get(a))
                     debug_print("Device Offset: %d ms"%device_offset)
                     self.device_offset = device_offset
                 except ValueError:
@@ -213,7 +213,7 @@ class PRST1(USBMS):
             for idx, book in enumerate(bl):
                 query = 'SELECT _id, thumbnail FROM books WHERE file_path = ?'
                 t = (book.lpath,)
-                cursor.execute (query, t)
+                cursor.execute(query, t)
 
                 for i, row in enumerate(cursor):
                     book.device_collections = bl_collections.get(row[0], None)
@@ -318,14 +318,14 @@ class PRST1(USBMS):
                     ' any notes/highlights, etc.')%dbpath)+' Underlying error:'
                     '\n'+tb)
 
-	def get_lastrowid(self, cursor):
-		# SQLite3 + Python has a fun issue on 32-bit systems with integer overflows.
-		# Issue a SQL query instead, getting the value as a string, and then converting to a long python int manually.
-		query = 'SELECT last_insert_rowid()'
-		cursor.execute(query)
-		row = cursor.fetchone()
+    def get_lastrowid(self, cursor):
+        # SQLite3 + Python has a fun issue on 32-bit systems with integer overflows.
+        # Issue a SQL query instead, getting the value as a string, and then converting to a long python int manually.
+        query = 'SELECT last_insert_rowid()'
+        cursor.execute(query)
+        row = cursor.fetchone()
 
-		return long(row[0])
+        return long(row[0])
 
     def get_database_min_id(self, source_id):
         sequence_min = 0L
@@ -345,7 +345,7 @@ class PRST1(USBMS):
         # Insert the sequence Id if it doesn't
         query = ('INSERT INTO sqlite_sequence (name, seq) '
                 'SELECT ?, ? '
-                'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)');
+                'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)')
         cursor.execute(query, (table, sequence_id, table,))
 
         cursor.close()

From a597fe76bb40aa170af740b269e5cc48f8e5e633 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 14 May 2013 18:39:58 +0530
Subject: [PATCH 26/26] DOCX Input: Cascade the font css

---
 src/calibre/ebooks/docx/block_styles.py |  9 +++-
 src/calibre/ebooks/docx/char_styles.py  | 20 ++++++---
 src/calibre/ebooks/docx/styles.py       | 55 ++++++++++++++++++++++++-
 src/calibre/ebooks/docx/to_html.py      | 12 +++---
 4 files changed, 81 insertions(+), 15 deletions(-)

diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py
index eef68a184f..10dc416eec 100644
--- a/src/calibre/ebooks/docx/block_styles.py
+++ b/src/calibre/ebooks/docx/block_styles.py
@@ -208,7 +208,7 @@ class ParagraphStyle(object):
 
         # Misc.
         'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
-        'numbering',
+        'numbering', 'font_family', 'font_size',
     )
 
     def __init__(self, pPr=None):
@@ -232,6 +232,8 @@ class ParagraphStyle(object):
             for s in XPath('./w:pStyle[@w:val]')(pPr):
                 self.linked_style = get(s, 'w:val')
 
+            self.font_family = self.font_size = inherit
+
         self._css = None
 
     def update(self, other):
@@ -274,10 +276,13 @@ class ParagraphStyle(object):
             if self.line_height not in {inherit, '1'}:
                 c['line-height'] = self.line_height
 
-            for x in ('text_indent', 'text_align', 'background_color'):
+            for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size'):
                 val = getattr(self, x)
                 if val is not inherit:
+                    if x == 'font_size':
+                        val = '%.3gpt' % val
                     c[x.replace('_', '-')] = val
+
         return self._css
 
         # TODO: keepNext must be done at markup level
diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py
index b65766e494..ca023e23af 100644
--- a/src/calibre/ebooks/docx/char_styles.py
+++ b/src/calibre/ebooks/docx/char_styles.py
@@ -172,6 +172,18 @@ class RunStyle(object):
             if val is inherit:
                 setattr(self, p, getattr(parent, p))
 
+    def get_border_css(self, ans):
+        for x in ('color', 'style', 'width'):
+            val = getattr(self, 'border_'+x)
+            if x == 'width' and val is not inherit:
+                val = '%.3gpt' % val
+            if val is not inherit:
+                ans['border-%s' % x] = val
+
+    def clear_border_css(self):
+        for x in ('color', 'style', 'width'):
+            setattr(self, 'border_'+x, inherit)
+
     @property
     def css(self):
         if self._css is None:
@@ -196,12 +208,7 @@ class RunStyle(object):
             if self.vanish is True:
                 c['display'] = 'none'
 
-            for x in ('color', 'style', 'width'):
-                val = getattr(self, 'border_'+x)
-                if x == 'width' and val is not inherit:
-                    val = '%.3gpt' % val
-                if val is not inherit:
-                    c['border-%s' % x] = val
+            self.get_border_css(c)
             if self.padding is not inherit:
                 c['padding'] = '%.3gpt' % self.padding
 
@@ -223,6 +230,7 @@ class RunStyle(object):
 
             if self.font_family is not inherit:
                 c['font-family'] = self.font_family
+
         return self._css
 
     def same_border(self, other):
diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py
index 13b9ebe58f..c17418d0dd 100644
--- a/src/calibre/ebooks/docx/styles.py
+++ b/src/calibre/ebooks/docx/styles.py
@@ -258,6 +258,55 @@ class Styles(object):
         if obj.tag.endswith('}r'):
             return self.resolve_run(obj)
 
+    def cascade(self, layers):
+        self.body_font_family = 'serif'
+        self.body_font_size = '10pt'
+
+        for p, runs in layers.iteritems():
+            char_styles = [self.resolve_run(r) for r in runs]
+            block_style = self.resolve_paragraph(p)
+            c = Counter()
+            for s in char_styles:
+                if s.font_family is not inherit:
+                    c[s.font_family] += 1
+            if c:
+                family = c.most_common(1)[0][0]
+                block_style.font_family = family
+                for s in char_styles:
+                    if s.font_family == family:
+                        s.font_family = inherit
+
+            sizes = [s.font_size for s in char_styles if s.font_size is not inherit]
+            if sizes:
+                sz = block_style.font_size = sizes[0]
+                for s in char_styles:
+                    if s.font_size == sz:
+                        s.font_size = inherit
+
+        block_styles = [self.resolve_paragraph(p) for p in layers]
+        c = Counter()
+        for s in block_styles:
+            if s.font_family is not inherit:
+                c[s.font_family] += 1
+
+        if c:
+            self.body_font_family = family = c.most_common(1)[0][0]
+            for s in block_styles:
+                if s.font_family == family:
+                    s.font_family = inherit
+
+        c = Counter()
+        for s in block_styles:
+            if s.font_size is not inherit:
+                c[s.font_size] += 1
+
+        if c:
+            sz = c.most_common(1)[0][0]
+            for s in block_styles:
+                if s.font_size == sz:
+                    s.font_size = inherit
+            self.body_font_size = '%.3gpt' % sz
+
     def resolve_numbering(self, numbering):
         # When a numPr element appears inside a paragraph style, the lvl info
         # must be discarder and pStyle used instead.
@@ -298,12 +347,14 @@ class Styles(object):
         ef = self.fonts.embed_fonts(dest_dir, docx)
         prefix = textwrap.dedent(
             '''\
+            body { font-family: %s; font-size: %s }
+
             p { text-indent: 1.5em }
 
             ul, ol, p { margin: 0; padding: 0 }
-            ''')
+            ''') % (self.body_font_family, self.body_font_size)
         if ef:
-            prefix += '\n' + ef
+            prefix = ef + '\n' + prefix
 
         ans = []
         for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):
diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index b4e5b0e5f7..902952ca4a 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -64,12 +64,15 @@ class Convert(object):
         doc = self.docx.document
         relationships_by_id, relationships_by_type = self.docx.document_relationships
         self.read_styles(relationships_by_type)
+        self.layers = OrderedDict()
         for wp in XPath('//w:p')(doc):
             p = self.convert_p(wp)
             self.body.append(p)
         # TODO: tables <w:tbl> child of <w:body> (nested tables?)
         # TODO: Last section properties <w:sectPr> child of <w:body>
 
+        self.styles.cascade(self.layers)
+
         numbered = []
         for html_obj, obj in self.object_map.iteritems():
             raw = obj.get('calibre_num_id', None)
@@ -156,9 +159,11 @@ class Convert(object):
         dest = P()
         self.object_map[dest] = p
         style = self.styles.resolve_paragraph(p)
+        self.layers[p] = []
         for run in XPath('descendant::w:r')(p):
             span = self.convert_run(run)
             dest.append(span)
+            self.layers[p].append(run)
 
         m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
         if m is not None:
@@ -184,12 +189,9 @@ class Convert(object):
             spans = []
             bs = {}
             for span, style in border_run:
-                c = style.css
+                style.get_border_css(bs)
+                style.clear_border_css()
                 spans.append(span)
-                for x in ('width', 'color', 'style'):
-                    val = c.pop('border-%s' % x, None)
-                    if val is not None:
-                        bs['border-%s' % x] = val
             if bs:
                 cls = self.styles.register(bs, 'text_border')
                 wrapper = self.wrap_elems(spans, SPAN())