From 17206c3a951324bf3f3d5586cc59c9a9a8545370 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 00:30:31 +0530 Subject: [PATCH 01/26] Allow setting focus rect width to 2px --- src/calibre/gui2/__init__.py | 2 +- src/qtcurve/style/qtcurve.cpp | 11 ++++++----- src/qtcurve/style/qtcurve.h | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 369746cec7..ceac21dd30 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -873,7 +873,7 @@ class Application(QApplication): v = pcache[v] icon_map[type('')(getattr(style, 'SP_'+k))] = v style.setProperty(u'calibre_icon_map', icon_map) - style.setProperty(u'calibre_item_view_focus', True) + style.setProperty(u'calibre_item_view_focus', 1) self.__icon_map_memory_ = icon_map def setup_styles(self, force_calibre_style): diff --git a/src/qtcurve/style/qtcurve.cpp b/src/qtcurve/style/qtcurve.cpp index 46a9b91a87..e5ac94a82e 100644 --- a/src/qtcurve/style/qtcurve.cpp +++ b/src/qtcurve/style/qtcurve.cpp @@ -3698,7 +3698,7 @@ bool Style::event(QEvent *event) { } return true; } else if (e->propertyName() == QString("calibre_item_view_focus")) { - calibre_item_view_focus = property("calibre_item_view_focus").toBool(); + calibre_item_view_focus = property("calibre_item_view_focus").toInt(); return true; } } @@ -4803,10 +4803,11 @@ void Style::drawPrimitive(PrimitiveElement element, const QStyleOption *option, painter->setBrush(QBrush(patternCol, Qt::Dense4Pattern)); painter->setBrushOrigin(r.topLeft()); painter->setPen(Qt::NoPen); - painter->drawRect(r.left(), r.top(), r.width(), 1); // Top - painter->drawRect(r.left(), r.bottom(), r.width(), 1); // Bottom - painter->drawRect(r.left(), r.top(), 1, r.height()); // Left - painter->drawRect(r.right(), r.top(), 1, r.height()); // Right + int fwidth = (calibre_item_view_focus > 1) ? 2 : 1; + painter->drawRect(r.left(), r.top(), r.width(), fwidth); // Top + painter->drawRect(r.left(), r.bottom(), r.width(), fwidth); // Bottom + painter->drawRect(r.left(), r.top(), fwidth, r.height()); // Left + painter->drawRect(r.right(), r.top(), fwidth, r.height()); // Right painter->restore(); } else diff --git a/src/qtcurve/style/qtcurve.h b/src/qtcurve/style/qtcurve.h index 84dfbdd145..63500ad340 100644 --- a/src/qtcurve/style/qtcurve.h +++ b/src/qtcurve/style/qtcurve.h @@ -355,7 +355,7 @@ class Style : public QCommonStyle mutable QList itsMdiButtons[2]; // 0=left, 1=right mutable int itsTitlebarHeight; QHash calibre_icon_map; - bool calibre_item_view_focus; + int calibre_item_view_focus; bool is_kde_session; // Required for Q3Header hover... From d99eccc51e8f274bbaae5a4729c44eda489e427f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 01:01:28 +0530 Subject: [PATCH 02/26] Book list: Make the current cell have a darker background --- src/calibre/gui2/library/views.py | 1 + src/qtcurve/style/qtcurve.cpp | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py index e6a816621f..1d6cd33e9d 100644 --- a/src/calibre/gui2/library/views.py +++ b/src/calibre/gui2/library/views.py @@ -139,6 +139,7 @@ class BooksView(QTableView): # {{{ def __init__(self, parent, modelcls=BooksModel, use_edit_metadata_dialog=True): QTableView.__init__(self, parent) + self.setProperty('highlight_current_item', True) self.row_sizing_done = False if not tweaks['horizontal_scrolling_per_column']: diff --git a/src/qtcurve/style/qtcurve.cpp b/src/qtcurve/style/qtcurve.cpp index e5ac94a82e..58a32c3479 100644 --- a/src/qtcurve/style/qtcurve.cpp +++ b/src/qtcurve/style/qtcurve.cpp @@ -5250,6 +5250,9 @@ void Style::drawPrimitive(PrimitiveElement element, const QStyleOption *option, QColor color(hasCustomBackground && hasSolidBackground ? v4Opt->backgroundBrush.color() : palette.color(cg, QPalette::Highlight)); + if (state & State_HasFocus && widget->property("highlight_current_item").toBool()) { + color = color.darker(130); + } bool square((opts.square&SQUARE_LISTVIEW_SELECTION) && (/*(!widget && r.height()<=40 && r.width()>=48) || */ (widget && !widget->inherits("KFilePlacesView") && From ee0e4e4d1a6230bcfd10f50ea63a71d7ef0fd996 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 01:06:57 +0530 Subject: [PATCH 03/26] ... --- src/calibre/gui2/__init__.py | 1 - src/qtcurve/style/qtcurve.cpp | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index ceac21dd30..5fcde65ff5 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -873,7 +873,6 @@ class Application(QApplication): v = pcache[v] icon_map[type('')(getattr(style, 'SP_'+k))] = v style.setProperty(u'calibre_icon_map', icon_map) - style.setProperty(u'calibre_item_view_focus', 1) self.__icon_map_memory_ = icon_map def setup_styles(self, force_calibre_style): diff --git a/src/qtcurve/style/qtcurve.cpp b/src/qtcurve/style/qtcurve.cpp index 58a32c3479..e3bb17d244 100644 --- a/src/qtcurve/style/qtcurve.cpp +++ b/src/qtcurve/style/qtcurve.cpp @@ -5250,9 +5250,9 @@ void Style::drawPrimitive(PrimitiveElement element, const QStyleOption *option, QColor color(hasCustomBackground && hasSolidBackground ? v4Opt->backgroundBrush.color() : palette.color(cg, QPalette::Highlight)); - if (state & State_HasFocus && widget->property("highlight_current_item").toBool()) { - color = color.darker(130); - } + if (state & State_HasFocus && widget && widget->property("highlight_current_item").toBool()) + color = color.darker(130); // Added by Kovid to highlight the current cell in the book list + bool square((opts.square&SQUARE_LISTVIEW_SELECTION) && (/*(!widget && r.height()<=40 && r.width()>=48) || */ (widget && !widget->inherits("KFilePlacesView") && From f8d6970fd571b3f9d0f63627a8eea098da05152f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 08:12:26 +0530 Subject: [PATCH 04/26] Update .net magazine --- recipes/dot_net.recipe | 59 +++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/recipes/dot_net.recipe b/recipes/dot_net.recipe index 50db71e9be..d3a96ad0c3 100644 --- a/recipes/dot_net.recipe +++ b/recipes/dot_net.recipe @@ -1,32 +1,37 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai from calibre.web.feeds.news import BasicNewsRecipe import re -class NetMagazineRecipe (BasicNewsRecipe): - __author__ = u'Marc Busqué ' - __url__ = 'http://www.lamarciana.com' - __version__ = '1.0' - __license__ = 'GPL v3' - __copyright__ = u'2012, Marc Busqué ' - title = u'.net magazine' - description = u'net is the world’s best-selling magazine for web designers and developers, featuring tutorials from leading agencies, interviews with the web’s biggest names, and agenda-setting features on the hottest issues affecting the internet today.' - language = 'en' - tags = 'web development, software' - oldest_article = 7 - remove_empty_feeds = True - no_stylesheets = True - cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png' - keep_only_tags = [ - dict(name='article', attrs={'class': re.compile('^node.*$', re.IGNORECASE)}) - ] - remove_tags = [ - dict(name='span', attrs={'class': 'comment-count'}), - dict(name='div', attrs={'class': 'item-list share-links'}), - dict(name='footer'), - ] - remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height', 'style'] - extra_css = 'img {max-width: 100%; display: block; margin: auto;} .captioned-image div {text-align: center; font-style: italic;}' +class dotnetMagazine (BasicNewsRecipe): + __author__ = u'Bonni Salles' + __version__ = '1.0' + __license__ = 'GPL v3' + __copyright__ = u'2013, Bonni Salles' + title = '.net magazine' + oldest_article = 7 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'en' + remove_empty_feeds = True + extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} ' + cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png' + + remove_tags_after = dict(name='footer', id=lambda x:not x) + remove_tags_before = dict(name='header', id=lambda x:not x) + + remove_tags = [ + dict(name='div', attrs={'class': 'item-list'}), + dict(name='h4', attrs={'class': 'std-hdr'}), + dict(name='div', attrs={'class': 'item-list share-links'}), #removes share links + dict(name=['script', 'noscript']), + dict(name='div', attrs={'id': 'comments-form'}), #comment these out if you want the comments to show + dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}), + dict(name='div', attrs={'id': 'right-col'}), + dict(name='div', attrs={'id': 'comments'}), #comment these out if you want the comments to show + dict(name='div', attrs={'class': 'item-list related-content'}), - feeds = [ - (u'.net', u'http://feeds.feedburner.com/net/topstories'), ] + + feeds = [ + (u'net', u'http://feeds.feedburner.com/net/topstories') + ] From 1e06698942beaa26b26d6b926f5c2717ae060dc1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 08:20:53 +0530 Subject: [PATCH 05/26] Make the darkness of the current cell highlight settable from python --- src/calibre/gui2/library/views.py | 2 +- src/qtcurve/style/qtcurve.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py index 1d6cd33e9d..cf028d3f12 100644 --- a/src/calibre/gui2/library/views.py +++ b/src/calibre/gui2/library/views.py @@ -139,7 +139,7 @@ class BooksView(QTableView): # {{{ def __init__(self, parent, modelcls=BooksModel, use_edit_metadata_dialog=True): QTableView.__init__(self, parent) - self.setProperty('highlight_current_item', True) + self.setProperty('highlight_current_item', 140) self.row_sizing_done = False if not tweaks['horizontal_scrolling_per_column']: diff --git a/src/qtcurve/style/qtcurve.cpp b/src/qtcurve/style/qtcurve.cpp index e3bb17d244..ca88a4c054 100644 --- a/src/qtcurve/style/qtcurve.cpp +++ b/src/qtcurve/style/qtcurve.cpp @@ -5251,7 +5251,7 @@ void Style::drawPrimitive(PrimitiveElement element, const QStyleOption *option, ? v4Opt->backgroundBrush.color() : palette.color(cg, QPalette::Highlight)); if (state & State_HasFocus && widget && widget->property("highlight_current_item").toBool()) - color = color.darker(130); // Added by Kovid to highlight the current cell in the book list + color = color.darker(widget->property("highlight_current_item").toInt()); // Added by Kovid to highlight the current cell in the book list bool square((opts.square&SQUARE_LISTVIEW_SELECTION) && (/*(!widget && r.height()<=40 && r.width()>=48) || */ From b5ddd3a4e5249e7a59bd9b49ac6de795f60e9462 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 09:35:48 +0530 Subject: [PATCH 06/26] Handle dark colorschemes when highlighting current cell --- src/calibre/gui2/library/views.py | 2 +- src/qtcurve/style/qtcurve.cpp | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py index cf028d3f12..7552257919 100644 --- a/src/calibre/gui2/library/views.py +++ b/src/calibre/gui2/library/views.py @@ -139,7 +139,7 @@ class BooksView(QTableView): # {{{ def __init__(self, parent, modelcls=BooksModel, use_edit_metadata_dialog=True): QTableView.__init__(self, parent) - self.setProperty('highlight_current_item', 140) + self.setProperty('highlight_current_item', 150) self.row_sizing_done = False if not tweaks['horizontal_scrolling_per_column']: diff --git a/src/qtcurve/style/qtcurve.cpp b/src/qtcurve/style/qtcurve.cpp index ca88a4c054..276e339e62 100644 --- a/src/qtcurve/style/qtcurve.cpp +++ b/src/qtcurve/style/qtcurve.cpp @@ -5250,8 +5250,13 @@ void Style::drawPrimitive(PrimitiveElement element, const QStyleOption *option, QColor color(hasCustomBackground && hasSolidBackground ? v4Opt->backgroundBrush.color() : palette.color(cg, QPalette::Highlight)); - if (state & State_HasFocus && widget && widget->property("highlight_current_item").toBool()) - color = color.darker(widget->property("highlight_current_item").toInt()); // Added by Kovid to highlight the current cell in the book list + if (state & State_HasFocus && widget && widget->property("highlight_current_item").toBool()) { + // Added by Kovid to highlight the current cell in the book list + if (color.lightness() > 128) + color = color.darker(widget->property("highlight_current_item").toInt()); + else + color = color.lighter(); + } bool square((opts.square&SQUARE_LISTVIEW_SELECTION) && (/*(!widget && r.height()<=40 && r.width()>=48) || */ From 70a6852ab6d6a1d3a77bf3025ea17af7eaf38d62 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 10:04:53 +0530 Subject: [PATCH 07/26] pep8 --- src/calibre/ebooks/oeb/iterator/book.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/iterator/book.py b/src/calibre/ebooks/oeb/iterator/book.py index 77b478924e..28dd37a88e 100644 --- a/src/calibre/ebooks/oeb/iterator/book.py +++ b/src/calibre/ebooks/oeb/iterator/book.py @@ -25,7 +25,7 @@ from calibre.ebooks.oeb.transforms.cover import CoverManager from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data) from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin -TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\ +TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace( '__ar__', 'none').replace('__viewbox__', '0 0 600 800' ).replace('__width__', '600').replace('__height__', '800') From abad7da850420e01eb5709d4b0e8740005e67214 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 10:18:35 +0530 Subject: [PATCH 08/26] pep8 --- src/calibre/ebooks/conversion/preprocess.py | 23 ++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 7e5873edd2..91f91c8b3d 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -14,7 +14,7 @@ SVG_NS = 'http://www.w3.org/2000/svg' XLINK_NS = 'http://www.w3.org/1999/xlink' convert_entities = functools.partial(entity_to_unicode, - result_exceptions = { + result_exceptions={ u'<' : '<', u'>' : '>', u"'" : ''', @@ -144,9 +144,9 @@ class DocAnalysis(object): percent is the percentage of lines that should be in a single bucket to return true The majority of the lines will exist in 1-2 buckets in typical docs with hard line breaks ''' - minLineLength=20 # Ignore lines under 20 chars (typical of spaces) - maxLineLength=1900 # Discard larger than this to stay in range - buckets=20 # Each line is divided into a bucket based on length + minLineLength=20 # Ignore lines under 20 chars (typical of spaces) + maxLineLength=1900 # Discard larger than this to stay in range + buckets=20 # Each line is divided into a bucket based on length #print "there are "+str(len(lines))+" lines" #max = 0 @@ -156,7 +156,7 @@ class DocAnalysis(object): # max = l #print "max line found is "+str(max) # Build the line length histogram - hRaw = [ 0 for i in range(0,buckets) ] + hRaw = [0 for i in range(0,buckets)] for line in self.lines: l = len(line) if l > minLineLength and l < maxLineLength: @@ -167,7 +167,7 @@ class DocAnalysis(object): # Normalize the histogram into percents totalLines = len(self.lines) if totalLines > 0: - h = [ float(count)/totalLines for count in hRaw ] + h = [float(count)/totalLines for count in hRaw] else: h = [] #print "\nhRaw histogram lengths are: "+str(hRaw) @@ -200,7 +200,7 @@ class Dehyphenator(object): # Add common suffixes to the regex below to increase the likelihood of a match - # don't add suffixes which are also complete words, such as 'able' or 'sex' # only remove if it's not already the point of hyphenation - self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$" + self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$" # noqa self.suffixes = re.compile(r"^%s" % self.suffix_string, re.IGNORECASE) self.removesuffixes = re.compile(r"%s" % self.suffix_string, re.IGNORECASE) # remove prefixes if the prefix was not already the point of hyphenation @@ -265,19 +265,18 @@ class Dehyphenator(object): self.html = html self.format = format if format == 'html': - intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)\s*(?=<)(?P()?\s*(\s*){1,2}(?P<(p|div)[^>]*>\s*(]*>\s*

\s*)?\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(]*>)?)\s*(?P[\w\d]+)' % length) + intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)\s*(?=<)(?P()?\s*(\s*){1,2}(?P<(p|div)[^>]*>\s*(]*>\s*

\s*)?\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(]*>)?)\s*(?P[\w\d]+)' % length) # noqa elif format == 'pdf': intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)\s*(?P

|\s*

\s*<[iub]>)\s*(?P[\w\d]+)'% length) elif format == 'txt': - intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P(\n(\u0020|\u0009)*)+)(?P[\w\d]+)'% length) + intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P(\n(\u0020|\u0009)*)+)(?P[\w\d]+)'% length) # noqa elif format == 'individual_words': intextmatch = re.compile(u'(?!<)(?P[^\W\-]+)(-|‐)\s*(?P\w+)(?![^<]*?>)') elif format == 'html_cleanup': - intextmatch = re.compile(u'(?P[^\W\-]+)(-|‐)\s*(?=<)(?P\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)') + intextmatch = re.compile(u'(?P[^\W\-]+)(-|‐)\s*(?=<)(?P\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)') # noqa elif format == 'txt_cleanup': intextmatch = re.compile(u'(?P[^\W\-]+)(-|‐)(?P\s+)(?P[\w\d]+)') - html = intextmatch.sub(self.dehyphenate, html) return html @@ -581,7 +580,7 @@ class HTMLPreProcessor(object): end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*

\s*(?=[[a-z\d])' % length), lambda match: '')) end_rules.append( # Un wrap using punctuation - (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?)?\s*(

\s*

\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), + (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?)?\s*(

\s*

\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), # noqa ) for rule in self.PREPROCESS + start_rules: From 32de3c16ea40f234d3b132d77032c31e2c0b4f64 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 12:16:37 +0530 Subject: [PATCH 09/26] Search and replace wizard: Fix generated html being slightly different from the actual html in the conversion pipeline for some input formats (mainly HTML, CHM, LIT). --- src/calibre/ebooks/conversion/plumber.py | 42 +++++++++++++-------- src/calibre/ebooks/conversion/preprocess.py | 7 +++- src/calibre/ebooks/oeb/base.py | 5 ++- src/calibre/ebooks/oeb/iterator/__init__.py | 27 ++++++------- 4 files changed, 50 insertions(+), 31 deletions(-) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 6ce1b42356..1f459229c8 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -77,7 +77,7 @@ class Plumber(object): def __init__(self, input, output, log, report_progress=DummyReporter(), dummy=False, merge_plugin_recs=True, abort_after_input_dump=False, - override_input_metadata=False): + override_input_metadata=False, for_regex_wizard=False): ''' :param input: Path to input file. :param output: Path to output file/directory @@ -87,6 +87,7 @@ class Plumber(object): if isbytestring(output): output = output.decode(filesystem_encoding) self.original_input_arg = input + self.for_regex_wizard = for_regex_wizard self.input = os.path.abspath(input) self.output = os.path.abspath(output) self.log = log @@ -123,7 +124,7 @@ OptionRecommendation(name='input_profile', 'conversion system information on how to interpret ' 'various information in the input document. For ' 'example resolution dependent lengths (i.e. lengths in ' - 'pixels). Choices are:')+\ + 'pixels). Choices are:')+ ', '.join([x.short_name for x in input_profiles()]) ), @@ -135,7 +136,7 @@ OptionRecommendation(name='output_profile', 'created document for the specified device. In some cases, ' 'an output profile is required to produce documents that ' 'will work on a device. For example EPUB on the SONY reader. ' - 'Choices are:') + \ + 'Choices are:') + ', '.join([x.short_name for x in output_profiles()]) ), @@ -490,7 +491,7 @@ OptionRecommendation(name='asciiize', 'cases where there are multiple representations of a character ' '(characters shared by Chinese and Japanese for instance) the ' 'representation based on the current calibre interface language will be ' - 'used.')%\ + 'used.')% u'\u041c\u0438\u0445\u0430\u0438\u043b ' u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432' ) @@ -711,7 +712,6 @@ OptionRecommendation(name='search_replace', self.input_fmt = input_fmt self.output_fmt = output_fmt - self.all_format_options = set() self.input_options = set() self.output_options = set() @@ -775,7 +775,7 @@ OptionRecommendation(name='search_replace', if not html_files: raise ValueError(_('Could not find an ebook inside the archive')) html_files = [(f, os.stat(f).st_size) for f in html_files] - html_files.sort(cmp = lambda x, y: cmp(x[1], y[1])) + html_files.sort(cmp=lambda x, y: cmp(x[1], y[1])) html_files = [f[0] for f in html_files] for q in ('toc', 'index'): for f in html_files: @@ -783,8 +783,6 @@ OptionRecommendation(name='search_replace', return f, os.path.splitext(f)[1].lower()[1:] return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:] - - def get_option_by_name(self, name): for group in (self.input_options, self.pipeline_options, self.output_options, self.all_format_options): @@ -956,7 +954,6 @@ OptionRecommendation(name='search_replace', self.log.info('Input debug saved to:', out_dir) - def run(self): ''' Run the conversion pipeline @@ -965,10 +962,12 @@ OptionRecommendation(name='search_replace', self.setup_options() if self.opts.verbose: self.log.filter_level = self.log.DEBUG + if self.for_regex_wizard and hasattr(self.opts, 'no_process'): + self.opts.no_process = True self.flush() import cssutils, logging cssutils.log.setLevel(logging.WARN) - get_types_map() # Ensure the mimetypes module is intialized + get_types_map() # Ensure the mimetypes module is intialized if self.opts.debug_pipeline is not None: self.opts.verbose = max(self.opts.verbose, 4) @@ -1003,6 +1002,8 @@ OptionRecommendation(name='search_replace', self.ui_reporter(0.01, _('Converting input to HTML...')) ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter) self.input_plugin.report_progress = ir + if self.for_regex_wizard: + self.input_plugin.for_viewer = True with self.input_plugin: self.oeb = self.input_plugin(stream, self.opts, self.input_fmt, self.log, @@ -1014,8 +1015,12 @@ OptionRecommendation(name='search_replace', if self.input_fmt in ('recipe', 'downloaded_recipe'): self.opts_to_mi(self.user_metadata) if not hasattr(self.oeb, 'manifest'): - self.oeb = create_oebbook(self.log, self.oeb, self.opts, - encoding=self.input_plugin.output_encoding) + self.oeb = create_oebbook( + self.log, self.oeb, self.opts, + encoding=self.input_plugin.output_encoding, + for_regex_wizard=self.for_regex_wizard) + if self.for_regex_wizard: + return self.input_plugin.postprocess_book(self.oeb, self.opts, self.log) self.opts.is_image_collection = self.input_plugin.is_image_collection pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter) @@ -1081,7 +1086,6 @@ OptionRecommendation(name='search_replace', self.dump_oeb(self.oeb, out_dir) self.log('Structured HTML written to:', out_dir) - if self.opts.extra_css and os.path.exists(self.opts.extra_css): self.opts.extra_css = open(self.opts.extra_css, 'rb').read() @@ -1161,13 +1165,20 @@ OptionRecommendation(name='search_replace', self.log(self.output_fmt.upper(), 'output written to', self.output) self.flush() +# This has to be global as create_oebbook can be called from other locations +# (for example in the html input plugin) +regex_wizard_callback = None +def set_regex_wizard_callback(f): + global regex_wizard_callback + regex_wizard_callback = f + def create_oebbook(log, path_or_stream, opts, reader=None, - encoding='utf-8', populate=True): + encoding='utf-8', populate=True, for_regex_wizard=False): ''' Create an OEBBook. ''' from calibre.ebooks.oeb.base import OEBBook - html_preprocessor = HTMLPreProcessor(log, opts) + html_preprocessor = HTMLPreProcessor(log, opts, regex_wizard_callback=regex_wizard_callback) if not encoding: encoding = None oeb = OEBBook(log, html_preprocessor, @@ -1182,3 +1193,4 @@ def create_oebbook(log, path_or_stream, opts, reader=None, reader()(oeb, path_or_stream) return oeb + diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 91f91c8b3d..126709200a 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -497,9 +497,11 @@ class HTMLPreProcessor(object): (re.compile('<]*?id=subtitle[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL), lambda match : '

%s

'%(match.group(1),)), ] - def __init__(self, log=None, extra_opts=None): + def __init__(self, log=None, extra_opts=None, regex_wizard_callback=None): self.log = log self.extra_opts = extra_opts + self.regex_wizard_callback = regex_wizard_callback + self.current_href = None def is_baen(self, src): return re.compile(r' Date: Sun, 12 May 2013 14:38:21 +0530 Subject: [PATCH 10/26] pep8 --- src/calibre/ebooks/oeb/parse_utils.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index f053b5f515..8bf9c23d98 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -44,8 +44,10 @@ META_XP = XPath('/h:html/h:head/h:meta[@http-equiv="Content-Type"]') def merge_multiple_html_heads_and_bodies(root, log=None): heads, bodies = xpath(root, '//h:head'), xpath(root, '//h:body') - if not (len(heads) > 1 or len(bodies) > 1): return root - for child in root: root.remove(child) + if not (len(heads) > 1 or len(bodies) > 1): + return root + for child in root: + root.remove(child) head = root.makeelement(XHTML('head')) body = root.makeelement(XHTML('body')) for h in heads: @@ -88,7 +90,7 @@ def html5_parse(data, max_nesting_depth=100): # Check that the asinine HTML 5 algorithm did not result in a tree with # insane nesting depths for x in data.iterdescendants(): - if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node + if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node depth = node_depth(x) if depth > max_nesting_depth: raise ValueError('html5lib resulted in a tree with nesting' @@ -228,7 +230,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, if idx > -1: pre = data[:idx] data = data[idx:] - if ']+)', pre): val = match.group(2) @@ -368,8 +370,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, meta.getparent().remove(meta) meta = etree.SubElement(head, XHTML('meta'), attrib={'http-equiv': 'Content-Type'}) - meta.set('content', 'text/html; charset=utf-8') # Ensure content is second - # attribute + meta.set('content', 'text/html; charset=utf-8') # Ensure content is second attribute # Ensure has a if not xpath(data, '/h:html/h:body'): From c2fde795af6ca1e9cbe8b117e61d3214530ca91b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 14:45:25 +0530 Subject: [PATCH 11/26] pep8 --- src/calibre/ebooks/mobi/reader/markup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader/markup.py b/src/calibre/ebooks/mobi/reader/markup.py index 3330c65a0a..d558ce611a 100644 --- a/src/calibre/ebooks/mobi/reader/markup.py +++ b/src/calibre/ebooks/mobi/reader/markup.py @@ -112,7 +112,7 @@ def update_flow_links(mobi8_reader, resource_map, log): url_css_index_pattern = re.compile(r'''kindle:flow:([0-9|A-V]+)\?mime=text/css[^\)]*''', re.IGNORECASE) for flow in mr.flows: - if flow is None: # 0th flow is None + if flow is None: # 0th flow is None flows.append(flow) continue @@ -330,7 +330,7 @@ def expand_mobi8_markup(mobi8_reader, resource_map, log): mobi8_reader.flows = flows # write out the parts and file flows - os.mkdir('text') # directory containing all parts + os.mkdir('text') # directory containing all parts spine = [] for i, part in enumerate(parts): pi = mobi8_reader.partinfo[i] From c1d49333a0ea7e64adef606d73bc28079e4f7b86 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 15:10:40 +0530 Subject: [PATCH 12/26] pep8 --- src/calibre/ebooks/mobi/debug/mobi8.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py index e1c8ffba44..a180b11ad0 100644 --- a/src/calibre/ebooks/mobi/debug/mobi8.py +++ b/src/calibre/ebooks/mobi/debug/mobi8.py @@ -163,7 +163,8 @@ class MOBIFile(object): ext = 'dat' prefix = 'binary' suffix = '' - if sig in {b'HUFF', b'CDIC', b'INDX'}: continue + if sig in {b'HUFF', b'CDIC', b'INDX'}: + continue # TODO: Ignore CNCX records as well if sig == b'FONT': font = read_font_record(rec.raw) @@ -196,7 +197,6 @@ class MOBIFile(object): vals = list(index)[:-1] + [None, None, None, None] entry_map.append(Entry(*(vals[:12]))) - indexing_data = collect_indexing_data(entry_map, list(map(len, self.text_records))) self.indexing_data = [DOC + '\n' +textwrap.dedent('''\ From 689808861a304835a09ad47ae3e30e76cede8973 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 15:38:23 +0530 Subject: [PATCH 13/26] MOBI Input: Add support for MOBI/KF8 files generated with the to be released kindlegen 2.9. Fixes #1179144 (error during conversion azw3 to other formats) --- src/calibre/ebooks/mobi/reader/headers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader/headers.py b/src/calibre/ebooks/mobi/reader/headers.py index b5b55b2ba0..31646a8d7b 100644 --- a/src/calibre/ebooks/mobi/reader/headers.py +++ b/src/calibre/ebooks/mobi/reader/headers.py @@ -181,9 +181,9 @@ class BookHeader(object): self.codec = 'cp1252' if not user_encoding else user_encoding log.warn('Unknown codepage %d. Assuming %s' % (self.codepage, self.codec)) - # Some KF8 files have header length == 256 (generated by kindlegen - # 2.7?). See https://bugs.launchpad.net/bugs/1067310 - max_header_length = 0x100 + # Some KF8 files have header length == 264 (generated by kindlegen + # 2.9?). See https://bugs.launchpad.net/bugs/1179144 + max_header_length = 0x108 if (ident == 'TEXTREAD' or self.length < 0xE4 or self.length > max_header_length or From 1c225cac666e28563e6a99f720f35b1cd31c18f7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 May 2013 21:51:12 +0530 Subject: [PATCH 14/26] MOBI Output: Fix space errorneously being removed when the input document contains a tag with leading space and sub-tags. Fixes #1179216 (Space lost between span tags converting to mobi) --- src/calibre/ebooks/mobi/mobiml.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index 9610b7c0bd..f6cd55dafe 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -16,7 +16,8 @@ from calibre.ebooks.oeb.transforms.flatcss import KeyMapper from calibre.utils.magick.draw import identify_data MBP_NS = 'http://mobipocket.com/ns/mbp' -def MBP(name): return '{%s}%s' % (MBP_NS, name) +def MBP(name): + return '{%s}%s' % (MBP_NS, name) MOBI_NSMAP = {None: XHTML_NS, 'mbp': MBP_NS} @@ -413,7 +414,7 @@ class MobiMLizer(object): # img sizes in units other than px # See #7520 for test case try: - pixs = int(round(float(value) / \ + pixs = int(round(float(value) / (72./self.profile.dpi))) except: continue @@ -488,8 +489,6 @@ class MobiMLizer(object): if elem.text: if istate.preserve: text = elem.text - elif len(elem) > 0 and isspace(elem.text): - text = None else: text = COLLAPSE.sub(' ', elem.text) valign = style['vertical-align'] From cc223574d07cc87e0a810b16107fad70cbdbb410 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 13 May 2013 08:56:31 +0530 Subject: [PATCH 15/26] PDF Output: Ignore invalid links instead of erroring out on them. Fixes #1179314 (conversion from CHM to PDF fails) --- src/calibre/ebooks/pdf/render/links.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/links.py b/src/calibre/ebooks/pdf/render/links.py index 2d0b91bbfe..500bbbf6c1 100644 --- a/src/calibre/ebooks/pdf/render/links.py +++ b/src/calibre/ebooks/pdf/render/links.py @@ -45,11 +45,15 @@ class Links(object): href, page, rect = link p, frag = href.partition('#')[0::2] try: - link = ((path, p, frag or None), self.pdf.get_pageref(page).obj, Array(rect)) + pref = self.pdf.get_pageref(page).obj except IndexError: - self.log.warn('Unable to find page for link: %r, ignoring it' % link) - continue - self.links.append(link) + try: + pref = self.pdf.get_pageref(page-1).obj + except IndexError: + self.pdf.debug('Unable to find page for link: %r, ignoring it' % link) + continue + self.pdf.debug('The link %s points to non-existent page, moving it one page back' % href) + self.links.append(((path, p, frag or None), pref, Array(rect))) def add_links(self): for link in self.links: From ed422c7b0fb17ee6b4c7b45106d6293538e9a14f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 13 May 2013 18:39:33 +0530 Subject: [PATCH 16/26] DOCX Input: Lists work --- src/calibre/ebooks/docx/block_styles.py | 17 ++- src/calibre/ebooks/docx/numbering.py | 144 +++++++++++++++++++++++- src/calibre/ebooks/docx/styles.py | 33 +++++- src/calibre/ebooks/docx/to_html.py | 23 +++- 4 files changed, 200 insertions(+), 17 deletions(-) diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py index 1770569b61..b501580042 100644 --- a/src/calibre/ebooks/docx/block_styles.py +++ b/src/calibre/ebooks/docx/block_styles.py @@ -175,6 +175,20 @@ def read_shd(parent, dest): if val: ans = simple_color(val, auto='transparent') setattr(dest, 'background_color', ans) + +def read_numbering(parent, dest): + lvl = num_id = None + for np in XPath('./w:numPr')(parent): + for ilvl in XPath('./w:ilvl[@w:val]')(np): + try: + lvl = int(get(ilvl, 'w:val')) + except (ValueError, TypeError): + pass + for num in XPath('./w:numId[@w:val]')(np): + num_id = get(num, 'w:val') + val = (num_id, lvl) if num_id is not None or lvl is not None else inherit + setattr(dest, 'numbering', val) + # }}} class ParagraphStyle(object): @@ -194,6 +208,7 @@ class ParagraphStyle(object): # Misc. 'text_indent', 'text_align', 'line_height', 'direction', 'background_color', + 'numbering', ) def __init__(self, pPr=None): @@ -210,7 +225,7 @@ class ParagraphStyle(object): ): setattr(self, p, binary_property(pPr, p)) - for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd'): + for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering'): f = globals()['read_%s' % x] f(pPr, self) diff --git a/src/calibre/ebooks/docx/numbering.py b/src/calibre/ebooks/docx/numbering.py index fc1e65db6a..8693e2a9a1 100644 --- a/src/calibre/ebooks/docx/numbering.py +++ b/src/calibre/ebooks/docx/numbering.py @@ -6,6 +6,11 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' +import re +from collections import Counter + +from lxml.html.builder import OL, UL, SPAN + from calibre.ebooks.docx.block_styles import ParagraphStyle from calibre.ebooks.docx.char_styles import RunStyle from calibre.ebooks.docx.names import XPath, get @@ -33,10 +38,26 @@ class Level(object): self.fmt = 'decimal' self.para_link = None self.paragraph_style = self.character_style = None + self.is_numbered = False + self.num_template = None if lvl is not None: self.read_from_xml(lvl) + def copy(self): + ans = Level() + for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template'): + setattr(ans, x, getattr(self, x)) + return ans + + def format_template(self, counter, ilvl): + def sub(m): + x = int(m.group(1)) - 1 + if x > ilvl or x not in counter: + return '' + return '%d' % (counter[x] - (0 if x == ilvl else 1)) + return re.sub(r'%(\d+)', sub, self.num_template).rstrip() + '\xa0' + def read_from_xml(self, lvl, override=False): for lr in XPath('./w:lvlRestart[@w:val]')(lvl): try: @@ -57,9 +78,13 @@ class Level(object): for lr in XPath('./w:numFmt[@w:val]')(lvl): val = get(lr, 'w:val') if val == 'bullet': + self.is_numbered = False self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc') else: + self.is_numbered = True self.fmt = STYLE_MAP.get(val, 'decimal') + if lt and re.match(r'%\d+\.$', lt) is None: + self.num_template = lt for lr in XPath('./w:pStyle[@w:val]')(lvl): self.para_link = get(lr, 'w:val') @@ -78,12 +103,6 @@ class Level(object): else: self.character_style.update(ps) - def copy(self): - ans = Level() - for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style'): - setattr(ans, x, getattr(self, x)) - return ans - class NumberingDefinition(object): def __init__(self, parent=None): @@ -107,6 +126,7 @@ class Numbering(object): def __init__(self): self.definitions = {} self.instances = {} + self.counters = {} def __call__(self, root, styles): ' Read all numbering style definitions ' @@ -131,6 +151,7 @@ class Numbering(object): if alvl is None: alvl = Level() alvl.read_from_xml(lvl, override=True) + return nd next_pass = {} for n in XPath('./w:num[@w:numId]')(root): @@ -154,3 +175,114 @@ class Numbering(object): if d is not None: self.instances[num_id] = create_instance(n, d) + for num_id, d in self.instances.iteritems(): + self.counters[num_id] = Counter({lvl:d.levels[lvl].start for lvl in d.levels}) + + def get_pstyle(self, num_id, style_id): + d = self.instances.get(num_id, None) + if d is not None: + for ilvl, lvl in d.levels.iteritems(): + if lvl.para_link == style_id: + return ilvl + + def get_para_style(self, num_id, lvl): + d = self.instances.get(num_id, None) + if d is not None: + lvl = d.levels.get(lvl, None) + return getattr(lvl, 'paragraph_style', None) + + def update_counter(self, counter, levelnum, levels): + counter[levelnum] += 1 + for ilvl, lvl in levels.iteritems(): + restart = lvl.restart + if (restart is None and ilvl == levelnum + 1) or restart == levelnum + 1: + counter[ilvl] = lvl.start + + def apply_markup(self, items, body, styles, object_map): + for p, num_id, ilvl in items: + d = self.instances.get(num_id, None) + if d is not None: + lvl = d.levels.get(ilvl, None) + if lvl is not None: + counter = self.counters[num_id] + p.tag = 'li' + p.set('value', '%s' % counter[ilvl]) + p.set('list-lvl', str(ilvl)) + p.set('list-id', num_id) + if lvl.num_template is not None: + val = lvl.format_template(counter, ilvl) + p.set('list-template', val) + self.update_counter(counter, ilvl, d.levels) + + def commit(current_run): + if not current_run: + return + start = current_run[0] + parent = start.getparent() + idx = parent.index(start) + + d = self.instances[start.get('list-id')] + ilvl = int(start.get('list-lvl')) + lvl = d.levels[ilvl] + lvlid = start.get('list-id') + start.get('list-lvl') + wrap = (OL if lvl.is_numbered else UL)('\n\t') + has_template = 'list-template' in start.attrib + if has_template: + wrap.set('lvlid', lvlid) + else: + wrap.set('class', styles.register({'list-style-type': lvl.fmt}, 'list')) + parent.insert(idx, wrap) + last_val = None + for child in current_run: + wrap.append(child) + child.tail = '\n\t' + if has_template: + span = SPAN() + span.text = child.text + child.text = None + for gc in child: + span.append(gc) + child.append(span) + span = SPAN(child.get('list-template')) + child.insert(0, span) + for attr in ('list-lvl', 'list-id', 'list-template'): + child.attrib.pop(attr, None) + val = int(child.get('value')) + if last_val == val - 1 or wrap.tag == 'ul': + child.attrib.pop('value') + last_val = val + current_run[-1].tail = '\n' + del current_run[:] + + parents = set() + for child in body.iterdescendants('li'): + parents.add(child.getparent()) + + for parent in parents: + current_run = [] + for child in parent: + if child.tag == 'li': + if current_run: + last = current_run[-1] + if (last.get('list-id') , last.get('list-lvl')) != (child.get('list-id'), child.get('list-lvl')): + commit(current_run) + current_run.append(child) + else: + commit(current_run) + commit(current_run) + + for wrap in body.xpath('//ol[@lvlid]'): + wrap.attrib.pop('lvlid') + wrap.tag = 'div' + for i, li in enumerate(wrap.iterchildren('li')): + li.tag = 'div' + li.attrib.pop('value', None) + li.set('style', 'display:table-row') + obj = object_map[li] + bs = styles.para_cache[obj] + if i == 0: + wrap.set('style', 'display:table; margin-left: %s' % (bs.css.get('margin-left', 0))) + bs.css.pop('margin-left', None) + for child in li: + child.set('style', 'display:table-cell') + diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index a17295aa61..44ae2cea89 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -198,8 +198,19 @@ class Styles(object): if default_para.character_style is not None: self.para_char_cache[p] = default_para.character_style + is_numbering = direct_formatting.numbering is not inherit + if is_numbering: + num_id, lvl = direct_formatting.numbering + if num_id is not None: + p.set('calibre_num_id', '%s:%s' % (lvl, num_id)) + if num_id is not None and lvl is not None: + ps = self.numbering.get_para_style(num_id, lvl) + if ps is not None: + parent_styles.append(ps) + for attr in ans.all_properties: - setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr)) + if not (is_numbering and attr == 'text_indent'): # skip text-indent for lists + setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr)) return ans def resolve_run(self, r): @@ -244,10 +255,20 @@ class Styles(object): return self.resolve_run(obj) def resolve_numbering(self, numbering): - pass # TODO: Implement this + # When a numPr element appears inside a paragraph style, the lvl info + # must be discarder and pStyle used instead. + self.numbering = numbering + for style in self: + ps = style.paragraph_style + if ps is not None and ps.numbering is not inherit: + lvl = numbering.get_pstyle(ps.numbering[0], style.style_id) + if lvl is None: + ps.numbering = inherit + else: + ps.numbering = (ps.numbering[0], lvl) def register(self, css, prefix): - h = hash(tuple(css.iteritems())) + h = hash(frozenset(css.iteritems())) ans, _ = self.classes.get(h, (None, None)) if ans is None: self.counter[prefix] += 1 @@ -266,13 +287,15 @@ class Styles(object): self.register(css, 'text') def class_name(self, css): - h = hash(tuple(css.iteritems())) + h = hash(frozenset(css.iteritems())) return self.classes.get(h, (None, None))[0] def generate_css(self): prefix = textwrap.dedent( '''\ - p { margin: 0; padding: 0; text-indent: 1.5em } + p { text-indent: 1.5em } + + ul, ol, p { margin: 0; padding: 0 } ''') ans = [] diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 7aa0383da6..8cd79074e3 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -7,6 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' import sys, os, re +from collections import OrderedDict from lxml import html from lxml.html.builder import ( @@ -36,7 +37,7 @@ class Convert(object): self.mi = self.docx.metadata self.body = BODY() self.styles = Styles() - self.object_map = {} + self.object_map = OrderedDict() self.html = HTML( HEAD( META(charset='utf-8'), @@ -72,6 +73,19 @@ class Convert(object): pass # TODO: Last section properties else: self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag)) + + numbered = [] + for html_obj, obj in self.object_map.iteritems(): + raw = obj.get('calibre_num_id', None) + if raw is not None: + lvl, num_id = raw.partition(':')[0::2] + try: + lvl = int(lvl) + except (TypeError, ValueError): + lvl = 0 + numbered.append((html_obj, num_id, lvl)) + self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map) + if len(self.body) > 0: self.body.text = '\n\t' for child in self.body: @@ -102,7 +116,7 @@ class Convert(object): nname = get_name(NUMBERING, 'numbering.xml') sname = get_name(STYLES, 'styles.xml') - numbering = Numbering() + numbering = self.numbering = Numbering() if sname is not None: try: @@ -133,6 +147,7 @@ class Convert(object): def convert_p(self, p): dest = P() + self.object_map[dest] = p style = self.styles.resolve_paragraph(p) for run in XPath('descendant::w:r')(p): span = self.convert_run(run) @@ -173,7 +188,6 @@ class Convert(object): wrapper = self.wrap_elems(spans, SPAN()) wrapper.set('class', cls) - self.object_map[dest] = p return dest def wrap_elems(self, elems, wrapper): @@ -188,7 +202,7 @@ class Convert(object): def convert_run(self, run): ans = SPAN() - ans.run = run + self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: @@ -224,7 +238,6 @@ class Convert(object): ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang - self.object_map[ans] = run return ans if __name__ == '__main__': From e637b32485386b2e05820d2861a5ea50e97fb687 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 13 May 2013 21:46:48 +0530 Subject: [PATCH 17/26] Add mechanism for device drivers to popup a message to the user after a callback --- src/calibre/devices/interface.py | 26 +++++++++++++++----------- src/calibre/gui2/device.py | 20 +++++++++++++++----- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/calibre/devices/interface.py b/src/calibre/devices/interface.py index 2b3bbd4fd6..9b173b091e 100644 --- a/src/calibre/devices/interface.py +++ b/src/calibre/devices/interface.py @@ -107,6 +107,12 @@ class DevicePlugin(Plugin): #: :meth:`set_user_blacklisted_devices` ASK_TO_ALLOW_CONNECT = False + #: Set this to a dictionary of the form {'title':title, 'msg':msg, 'det_msg':detailed_msg} to have calibre popup + #: a message to the user after some callbacks are run (currently only upload_books). + #: Be careful to not spam the user with too many messages. This variable is checked after *every* callback, + #: so only set it when you really need to. + user_feedback_after_callback = None + @classmethod def get_gui_name(cls): if hasattr(cls, 'gui_name'): @@ -157,16 +163,15 @@ class DevicePlugin(Plugin): if (vid in device_id or vidd in device_id) and \ (pid in device_id or pidd in device_id) and \ self.test_bcd_windows(device_id, bcd): - if debug: - self.print_usb_device_info(device_id) - if only_presence or self.can_handle_windows(device_id, debug=debug): - try: - bcd = int(device_id.rpartition( - 'rev_')[-1].replace(':', 'a'), 16) - except: - bcd = None - return True, (vendor_id, product_id, bcd, None, - None, None) + if debug: + self.print_usb_device_info(device_id) + if only_presence or self.can_handle_windows(device_id, debug=debug): + try: + bcd = int(device_id.rpartition( + 'rev_')[-1].replace(':', 'a'), 16) + except: + bcd = None + return True, (vendor_id, product_id, bcd, None, None, None) return False, None def test_bcd(self, bcdDevice, bcd): @@ -638,7 +643,6 @@ class DevicePlugin(Plugin): ''' device_prefs.set_overrides() - # Dynamic control interface. # The following methods are probably called on the GUI thread. Any driver # that implements these methods must take pains to be thread safe, because diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 45778ec309..15dc1f0c0a 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -122,7 +122,8 @@ def device_name_for_plugboards(device_class): class DeviceManager(Thread): # {{{ def __init__(self, connected_slot, job_manager, open_feedback_slot, - open_feedback_msg, allow_connect_slot, sleep_time=2): + open_feedback_msg, allow_connect_slot, + after_callback_feedback_slot, sleep_time=2): ''' :sleep_time: Time to sleep between device probes in secs ''' @@ -150,6 +151,7 @@ class DeviceManager(Thread): # {{{ self.ejected_devices = set([]) self.mount_connection_requests = Queue.Queue(0) self.open_feedback_slot = open_feedback_slot + self.after_callback_feedback_slot = after_callback_feedback_slot self.open_feedback_msg = open_feedback_msg self._device_information = None self.current_library_uuid = None @@ -392,6 +394,10 @@ class DeviceManager(Thread): # {{{ self.device.set_progress_reporter(job.report_progress) self.current_job.run() self.current_job = None + feedback = getattr(self.device, 'user_feedback_after_callback', None) + if feedback is not None: + self.device.user_feedback_after_callback = None + self.after_callback_feedback_slot(feedback) else: break if do_sleep: @@ -850,7 +856,7 @@ class DeviceMixin(object): # {{{ self.device_manager = DeviceManager(FunctionDispatcher(self.device_detected), self.job_manager, Dispatcher(self.status_bar.show_message), Dispatcher(self.show_open_feedback), - FunctionDispatcher(self.allow_connect)) + FunctionDispatcher(self.allow_connect), Dispatcher(self.after_callback_feedback)) self.device_manager.start() self.device_manager.devices_initialized.wait() if tweaks['auto_connect_to_folder']: @@ -862,6 +868,10 @@ class DeviceMixin(object): # {{{ name, show_copy_button=False, override_icon=QIcon(icon)) + def after_callback_feedback(self, feedback): + title, msg, det_msg = feedback + info_dialog(self, feedback['title'], feedback['msg'], det_msg=feedback['det_msg']).show() + def debug_detection(self, done): self.debug_detection_callback = weakref.ref(done) self.device_manager.debug_detection(FunctionDispatcher(self.debug_detection_done)) @@ -1116,7 +1126,7 @@ class DeviceMixin(object): # {{{ return dm = self.iactions['Remove Books'].delete_memory - if dm.has_key(job): + if job in dm: paths, model = dm.pop(job) self.device_manager.remove_books_from_metadata(paths, self.booklists()) @@ -1141,7 +1151,7 @@ class DeviceMixin(object): # {{{ def dispatch_sync_event(self, dest, delete, specific): rows = self.library_view.selectionModel().selectedRows() if not rows or len(rows) == 0: - error_dialog(self, _('No books'), _('No books')+' '+\ + error_dialog(self, _('No books'), _('No books')+' '+ _('selected to send')).exec_() return @@ -1160,7 +1170,7 @@ class DeviceMixin(object): # {{{ if fmts: for f in fmts.split(','): f = f.lower() - if format_count.has_key(f): + if f in format_count: format_count[f] += 1 else: format_count[f] = 1 From ec863926661d2bc9366d2fe1b74bd091138e0495 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 May 2013 08:29:58 +0530 Subject: [PATCH 18/26] Fix #1179697 (write a device driver for my device) --- src/calibre/devices/android/driver.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 9d5ce152d3..2855de16ae 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -240,7 +240,8 @@ class ANDROID(USBMS): 'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID', 'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E', 'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS', - 'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD', 'XT894'] + 'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD', 'XT894', '_USB', + ] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', @@ -251,7 +252,9 @@ class ANDROID(USBMS): 'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0', 'XT875', 'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E', - 'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1', 'XT894'] + 'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1', 'XT894', + '_USB', + ] OSX_MAIN_MEM = 'Android Device Main Memory' From e33ac985b4b5e6485a5ce53970093ce6e04a78ad Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 May 2013 09:30:36 +0530 Subject: [PATCH 19/26] On linux when searching the system for fonts, search all directories returned by fontconfig, if available, instead of a default list of directories --- src/calibre/utils/fonts/scanner.py | 84 ++++++++++++++++++++++++++---- 1 file changed, 75 insertions(+), 9 deletions(-) diff --git a/src/calibre/utils/fonts/scanner.py b/src/calibre/utils/fonts/scanner.py index 827e5536d5..b5628989c2 100644 --- a/src/calibre/utils/fonts/scanner.py +++ b/src/calibre/utils/fonts/scanner.py @@ -13,13 +13,82 @@ from threading import Thread from calibre import walk, prints, as_unicode from calibre.constants import (config_dir, iswindows, isosx, plugins, DEBUG, - isworker) + isworker, filesystem_encoding) from calibre.utils.fonts.metadata import FontMetadata, UnsupportedFont from calibre.utils.icu import sort_key class NoFonts(ValueError): pass + +def default_font_dirs(): + return [ + '/opt/share/fonts', + '/usr/share/fonts', + '/usr/local/share/fonts', + os.path.expanduser('~/.local/share/fonts'), + os.path.expanduser('~/.fonts') + ] + + +def fc_list(): + import ctypes + from ctypes.util import find_library + + lib = find_library('fontconfig') + if lib is None: + return default_font_dirs() + try: + lib = ctypes.CDLL(lib) + except: + return default_font_dirs() + + prototype = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p) + try: + get_font_dirs = prototype(('FcConfigGetFontDirs', lib)) + except (AttributeError): + return default_font_dirs() + prototype = ctypes.CFUNCTYPE(ctypes.c_char_p, ctypes.c_void_p) + try: + next_dir = prototype(('FcStrListNext', lib)) + except (AttributeError): + return default_font_dirs() + + prototype = ctypes.CFUNCTYPE(None, ctypes.c_void_p) + try: + end = prototype(('FcStrListDone', lib)) + except (AttributeError): + return default_font_dirs() + + str_list = get_font_dirs(ctypes.c_void_p()) + if not str_list: + return default_font_dirs() + + ans = [] + while True: + d = next_dir(str_list) + if not d: + break + if d: + try: + ans.append(d.decode(filesystem_encoding)) + except ValueError: + return default_font_dirs + end(str_list) + if len(ans) < 3: + return default_font_dirs() + parents = [] + for f in ans: + found = False + for p in parents: + if f.startswith(p): + found = True + break + if not found: + parents.append(f) + return parents + + def font_dirs(): if iswindows: winutil, err = plugins['winutil'] @@ -35,12 +104,7 @@ def font_dirs(): os.path.expanduser('~/.fonts'), os.path.expanduser('~/Library/Fonts'), ] - return [ - '/opt/share/fonts', - '/usr/share/fonts', - '/usr/local/share/fonts', - os.path.expanduser('~/.fonts') - ] + return fc_list() class Scanner(Thread): @@ -133,7 +197,8 @@ class Scanner(Thread): for family in self.find_font_families(): faces = filter(filter_faces, self.fonts_for_family(family)) - if not faces: continue + if not faces: + continue generic_family = panose_to_css_generic_family(faces[0]['panose']) if generic_family in allowed_families or generic_family == preferred_families[0]: return (family, faces) @@ -233,7 +298,8 @@ class Scanner(Thread): def build_families(self): families = defaultdict(list) for f in self.cached_fonts.itervalues(): - if not f: continue + if not f: + continue lf = icu_lower(f['font-family'] or '') if lf: families[lf].append(f) From 802e4c52fb841f7bf3ef92476b29796f04774595 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 May 2013 09:53:42 +0530 Subject: [PATCH 20/26] Change the filesystem encoding used by python to utf-8 if it is ascii --- src/calibre/constants.py | 6 ++---- src/calibre/utils/icu.c | 15 +++++++++++++++ src/calibre/utils/icu.py | 15 +++++++++++++-- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 6526c2e289..4c17a90122 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -66,10 +66,8 @@ else: filesystem_encoding = 'utf-8' # On linux, unicode arguments to os file functions are coerced to an ascii # bytestring if sys.getfilesystemencoding() == 'ascii', which is - # just plain dumb. So issue a warning. - print ('WARNING: You do not have the LANG environment variable set correctly. ' - 'This will cause problems with non-ascii filenames. ' - 'Set it to something like en_US.UTF-8.\n') + # just plain dumb. This is fixed by the icu.py module which, when + # imported changes ascii to utf-8 except: filesystem_encoding = 'utf-8' diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index ccb1cfb5b9..aee47448fd 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -661,6 +661,17 @@ icu_set_default_encoding(PyObject *self, PyObject *args) { } // }}} +// set_default_encoding {{{ +static PyObject * +icu_set_filesystem_encoding(PyObject *self, PyObject *args) { + char *encoding; + if (!PyArg_ParseTuple(args, "s:setfilesystemencoding", &encoding)) + return NULL; + Py_FileSystemDefaultEncoding = strdup(encoding); + Py_RETURN_NONE; + +} +// }}} // set_default_encoding {{{ static PyObject * icu_get_available_transliterators(PyObject *self, PyObject *args) { @@ -707,6 +718,10 @@ static PyMethodDef icu_methods[] = { "set_default_encoding(encoding) -> Set the default encoding for the python unicode implementation." }, + {"set_filesystem_encoding", icu_set_filesystem_encoding, METH_VARARGS, + "set_filesystem_encoding(encoding) -> Set the filesystem encoding for python." + }, + {"get_available_transliterators", icu_get_available_transliterators, METH_VARARGS, "get_available_transliterators() -> Return list of available transliterators. This list is rather limited on OS X." }, diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index e1e6c1a1c6..1f54a04646 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -163,11 +163,22 @@ load_collator() _icu_not_ok = _icu is None or _collator is None try: - if sys.getdefaultencoding().lower() == 'ascii': + senc = sys.getdefaultencoding() + if not senc or senc.lower() == 'ascii': _icu.set_default_encoding('utf-8') + del senc except: pass +try: + fenc = sys.getfilesystemencoding() + if not fenc or fenc.lower() == 'ascii': + _icu.set_filesystem_encoding('utf-8') + del fenc +except: + pass + + # }}} ################# The string functions ######################################## @@ -247,7 +258,7 @@ def collation_order(a): ################################################################################ -def test(): # {{{ +def test(): # {{{ from calibre import prints # Data {{{ german = ''' From ffdc9d377c7540f7bab6ac68303c744676828597 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 May 2013 11:42:31 +0530 Subject: [PATCH 21/26] ... --- src/calibre/ebooks/docx/dump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/docx/dump.py b/src/calibre/ebooks/docx/dump.py index f6432125c5..6ebc2e8871 100644 --- a/src/calibre/ebooks/docx/dump.py +++ b/src/calibre/ebooks/docx/dump.py @@ -22,7 +22,7 @@ def dump(path): zf.extractall(dest) for f in walk(dest): - if f.endswith('.xml'): + if f.endswith('.xml') or f.endswith('.rels'): with open(f, 'r+b') as stream: raw = stream.read() root = etree.fromstring(raw) From d8a896616a34432bc7c4ae00ce8018619881ae7a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 May 2013 16:09:12 +0530 Subject: [PATCH 22/26] DOCX Input: Fonts --- src/calibre/ebooks/docx/char_styles.py | 15 ++- src/calibre/ebooks/docx/container.py | 4 +- src/calibre/ebooks/docx/fonts.py | 132 +++++++++++++++++++++++++ src/calibre/ebooks/docx/names.py | 1 + src/calibre/ebooks/docx/styles.py | 11 ++- src/calibre/ebooks/docx/to_html.py | 18 +++- 6 files changed, 173 insertions(+), 8 deletions(-) create mode 100644 src/calibre/ebooks/docx/fonts.py diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py index a9d2a43cdb..b65766e494 100644 --- a/src/calibre/ebooks/docx/char_styles.py +++ b/src/calibre/ebooks/docx/char_styles.py @@ -113,6 +113,14 @@ def read_vert_align(parent, dest): if val and val in {'baseline', 'subscript', 'superscript'}: ans = val setattr(dest, 'vert_align', ans) + +def read_font_family(parent, dest): + ans = inherit + for col in XPath('./w:rFonts[@w:ascii]')(parent): + val = get(col, 'w:ascii') + if val: + ans = val + setattr(dest, 'font_family', ans) # }}} class RunStyle(object): @@ -122,7 +130,7 @@ class RunStyle(object): 'rtl', 'shadow', 'smallCaps', 'strike', 'vanish', 'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color', - 'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', + 'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family' } toggle_properties = { @@ -141,7 +149,7 @@ class RunStyle(object): ): setattr(self, p, binary_property(rPr, p)) - for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang'): + for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang', 'font_family'): f = globals()['read_%s' % x] f(rPr, self) @@ -212,6 +220,9 @@ class RunStyle(object): if self.b: c['font-weight'] = 'bold' + + if self.font_family is not inherit: + c['font-family'] = self.font_family return self._css def same_border(self, other): diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py index ec0decacef..bcca336474 100644 --- a/src/calibre/ebooks/docx/container.py +++ b/src/calibre/ebooks/docx/container.py @@ -167,7 +167,9 @@ class DOCX(object): @property def document_relationships(self): - name = self.document_name + return self.get_relationships(self.document_name) + + def get_relationships(self, name): base = '/'.join(name.split('/')[:-1]) by_id, by_type = {}, {} parts = name.split('/') diff --git a/src/calibre/ebooks/docx/fonts.py b/src/calibre/ebooks/docx/fonts.py new file mode 100644 index 0000000000..4ed602c71d --- /dev/null +++ b/src/calibre/ebooks/docx/fonts.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +import os, re +from collections import namedtuple + +from calibre.ebooks.docx.block_styles import binary_property, inherit +from calibre.ebooks.docx.names import XPath, get +from calibre.utils.filenames import ascii_filename +from calibre.utils.fonts.scanner import font_scanner, NoFonts +from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font + +Embed = namedtuple('Embed', 'name key subsetted') + +def has_system_fonts(name): + try: + return bool(font_scanner.fonts_for_family(name)) + except NoFonts: + return False + +def get_variant(bold=False, italic=False): + return {(False, False):'Regular', (False, True):'Italic', + (True, False):'Bold', (True, True):'BoldItalic'}[(bold, italic)] + +class Family(object): + + def __init__(self, elem, embed_relationships): + self.name = self.family_name = get(elem, 'w:name') + self.alt_names = tuple(get(x, 'w:val') for x in XPath('./w:altName')(elem)) + if self.alt_names and not has_system_fonts(self.name): + for x in self.alt_names: + if has_system_fonts(x): + self.family_name = x + break + + self.embedded = {} + for x in ('Regular', 'Bold', 'Italic', 'BoldItalic'): + for y in XPath('./w:embed%s[@r:id]' % x)(elem): + rid = get(y, 'r:id') + key = get(y, 'w:fontKey') + subsetted = get(y, 'w:subsetted') in {'1', 'true', 'on'} + if rid in embed_relationships: + self.embedded[x] = Embed(embed_relationships[rid], key, subsetted) + + self.generic_family = 'auto' + for x in XPath('./w:family[@w:val]')(elem): + self.generic_family = get(x, 'w:val', 'auto') + + ntt = binary_property(elem, 'notTrueType') + self.is_ttf = ntt is inherit or not ntt + + self.panose1 = None + self.panose_name = None + for x in XPath('./w:panose1[@w:val]')(elem): + try: + v = get(x, 'w:val') + v = tuple(int(v[i:i+2], 16) for i in xrange(0, len(v), 2)) + except (TypeError, ValueError, IndexError): + pass + else: + self.panose1 = v + self.panose_name = panose_to_css_generic_family(v) + + self.css_generic_family = {'roman':'serif', 'swiss':'sans-serif', 'modern':'monospace', + 'decorative':'fantasy', 'script':'cursive'}.get(self.generic_family, None) + self.css_generic_family = self.css_generic_family or self.panose_name or 'serif' + + +class Fonts(object): + + def __init__(self): + self.fonts = {} + self.used = set() + + def __call__(self, root, embed_relationships, docx, dest_dir): + for elem in XPath('//w:font[@w:name]')(root): + self.fonts[get(elem, 'w:name')] = Family(elem, embed_relationships) + + def family_for(self, name, bold=False, italic=False): + f = self.fonts.get(name, None) + if f is None: + return 'serif' + variant = get_variant(bold, italic) + self.used.add((name, variant)) + name = f.name if variant in f.embedded else f.family_name + return '"%s", %s' % (name.replace('"', ''), f.css_generic_family) + + def embed_fonts(self, dest_dir, docx): + defs = [] + dest_dir = os.path.join(dest_dir, 'fonts') + for name, variant in self.used: + f = self.fonts[name] + if variant in f.embedded: + if not os.path.exists(dest_dir): + os.mkdir(dest_dir) + fname = self.write(name, dest_dir, docx, variant) + if fname is not None: + d = {'font-family':'"%s"' % name.replace('"', ''), 'src': 'url("fonts/%s")' % fname} + if 'Bold' in variant: + d['font-weight'] = 'bold' + if 'Italic' in variant: + d['font-style'] = 'italic' + d = ['%s: %s' % (k, v) for k, v in d.iteritems()] + d = ';\n\t'.join(d) + defs.append('@font-face {\n\t%s\n}\n' % d) + return '\n'.join(defs) + + def write(self, name, dest_dir, docx, variant): + f = self.fonts[name] + ef = f.embedded[variant] + raw = docx.read(ef.name) + prefix = raw[:32] + if ef.key: + key = re.sub(r'[^A-Fa-f0-9]', '', ef.key) + key = bytearray(reversed(tuple(int(key[i:i+2], 16) for i in xrange(0, len(key), 2)))) + prefix = bytearray(prefix) + prefix = bytes(bytearray(prefix[i]^key[i % len(key)] for i in xrange(len(prefix)))) + if not is_truetype_font(prefix): + return None + ext = 'otf' if prefix.startswith(b'OTTO') else 'ttf' + fname = ascii_filename('%s - %s.%s' % (name, variant, ext)) + with open(os.path.join(dest_dir, fname), 'wb') as dest: + dest.write(prefix) + dest.write(raw[32:]) + + return fname + diff --git a/src/calibre/ebooks/docx/names.py b/src/calibre/ebooks/docx/names.py index 91b051d691..da643dcc2c 100644 --- a/src/calibre/ebooks/docx/names.py +++ b/src/calibre/ebooks/docx/names.py @@ -13,6 +13,7 @@ DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metada APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties' STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles' NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering' +FONTS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable' namespaces = { 'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main', diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index 44ae2cea89..13b9ebe58f 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -97,7 +97,8 @@ class Styles(object): def get(self, key, default=None): return self.id_map.get(key, default) - def __call__(self, root): + def __call__(self, root, fonts): + self.fonts = fonts for s in XPath('//w:style')(root): s = Style(s) if s.style_id: @@ -246,6 +247,9 @@ class Styles(object): for attr in ans.all_properties: setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr)) + if ans.font_family is not inherit: + ans.font_family = self.fonts.family_for(ans.font_family, ans.b, ans.i) + return ans def resolve(self, obj): @@ -290,13 +294,16 @@ class Styles(object): h = hash(frozenset(css.iteritems())) return self.classes.get(h, (None, None))[0] - def generate_css(self): + def generate_css(self, dest_dir, docx): + ef = self.fonts.embed_fonts(dest_dir, docx) prefix = textwrap.dedent( '''\ p { text-indent: 1.5em } ul, ol, p { margin: 0; padding: 0 } ''') + if ef: + prefix += '\n' + ef ans = [] for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]): diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 8cd79074e3..dbd6dce043 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -14,9 +14,10 @@ from lxml.html.builder import ( HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR) from calibre.ebooks.docx.container import DOCX, fromstring -from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING +from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING, FONTS from calibre.ebooks.docx.styles import Styles, inherit from calibre.ebooks.docx.numbering import Numbering +from calibre.ebooks.docx.fonts import Fonts from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 class Text: @@ -116,7 +117,18 @@ class Convert(object): nname = get_name(NUMBERING, 'numbering.xml') sname = get_name(STYLES, 'styles.xml') + fname = get_name(FONTS, 'fontTable.xml') numbering = self.numbering = Numbering() + fonts = self.fonts = Fonts() + + if fname is not None: + embed_relationships = self.docx.get_relationships(fname)[0] + try: + raw = self.docx.read(fname) + except KeyError: + self.log.warn('Fonts table %s does not exist' % fname) + else: + fonts(fromstring(raw), embed_relationships, self.docx, self.dest_dir) if sname is not None: try: @@ -124,7 +136,7 @@ class Convert(object): except KeyError: self.log.warn('Styles %s do not exist' % sname) else: - self.styles(fromstring(raw)) + self.styles(fromstring(raw), fonts) if nname is not None: try: @@ -140,7 +152,7 @@ class Convert(object): raw = html.tostring(self.html, encoding='utf-8', doctype='') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) - css = self.styles.generate_css() + css = self.styles.generate_css(self.dest_dir, self.docx) if css: with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f: f.write(css.encode('utf-8')) From aa2aa3d2ef8bb89acf7a6e943be9a91391d9cdd0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 May 2013 16:36:09 +0530 Subject: [PATCH 23/26] Ignore line height of 1 --- src/calibre/ebooks/docx/block_styles.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py index b501580042..eef68a184f 100644 --- a/src/calibre/ebooks/docx/block_styles.py +++ b/src/calibre/ebooks/docx/block_styles.py @@ -271,7 +271,10 @@ class ParagraphStyle(object): if val is not inherit: c['margin-%s' % edge] = val - for x in ('text_indent', 'text_align', 'line_height', 'background_color'): + if self.line_height not in {inherit, '1'}: + c['line-height'] = self.line_height + + for x in ('text_indent', 'text_align', 'background_color'): val = getattr(self, x) if val is not inherit: c[x.replace('_', '-')] = val From 5ec61a6b299ab2114e0b7b7ae5848b733d512371 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 May 2013 16:45:03 +0530 Subject: [PATCH 24/26] Dont ignore the content in tables, just extarct the content as linear blocks for now --- src/calibre/ebooks/docx/to_html.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index dbd6dce043..b4e5b0e5f7 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -14,7 +14,7 @@ from lxml.html.builder import ( HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR) from calibre.ebooks.docx.container import DOCX, fromstring -from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING, FONTS +from calibre.ebooks.docx.names import XPath, is_tag, XML, STYLES, NUMBERING, FONTS from calibre.ebooks.docx.styles import Styles, inherit from calibre.ebooks.docx.numbering import Numbering from calibre.ebooks.docx.fonts import Fonts @@ -64,16 +64,11 @@ class Convert(object): doc = self.docx.document relationships_by_id, relationships_by_type = self.docx.document_relationships self.read_styles(relationships_by_type) - for top_level in XPath('/w:document/w:body/*')(doc): - if is_tag(top_level, 'w:p'): - p = self.convert_p(top_level) - self.body.append(p) - elif is_tag(top_level, 'w:tbl'): - pass # TODO: tables - elif is_tag(top_level, 'w:sectPr'): - pass # TODO: Last section properties - else: - self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag)) + for wp in XPath('//w:p')(doc): + p = self.convert_p(wp) + self.body.append(p) + # TODO: tables child of (nested tables?) + # TODO: Last section properties child of numbered = [] for html_obj, obj in self.object_map.iteritems(): From 33793ff0d1135729832cd4d2c10f1c2a2a37516f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 May 2013 18:01:55 +0530 Subject: [PATCH 25/26] Driver for SONY PRS-T2N --- src/calibre/devices/prst1/driver.py | 34 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/calibre/devices/prst1/driver.py b/src/calibre/devices/prst1/driver.py index 72533860d4..0431ca7bfd 100644 --- a/src/calibre/devices/prst1/driver.py +++ b/src/calibre/devices/prst1/driver.py @@ -39,8 +39,8 @@ class PRST1(USBMS): path_sep = '/' booklist_class = CollectionsBookList - FORMATS = ['epub', 'pdf', 'txt', 'book', 'zbf'] # The last two are - # used in japan + FORMATS = ['epub', 'pdf', 'txt', 'book', 'zbf'] # The last two are + # used in japan CAN_SET_METADATA = ['collections'] CAN_DO_DEVICE_DB_PLUGBOARD = True @@ -50,10 +50,10 @@ class PRST1(USBMS): VENDOR_NAME = 'SONY' WINDOWS_MAIN_MEM = re.compile( - r'(PRS-T(1|2)&)' + r'(PRS-T(1|2|2N)&)' ) WINDOWS_CARD_A_MEM = re.compile( - r'(PRS-T(1|2)__SD&)' + r'(PRS-T(1|2|2N)__SD&)' ) MAIN_MEMORY_VOLUME_LABEL = 'SONY Reader Main Memory' STORAGE_CARD_VOLUME_LABEL = 'SONY Reader Storage Card' @@ -66,7 +66,7 @@ class PRST1(USBMS): EXTRA_CUSTOMIZATION_MESSAGE = [ _('Comma separated list of metadata fields ' - 'to turn into collections on the device. Possibilities include: ')+\ + 'to turn into collections on the device. Possibilities include: ')+ 'series, tags, authors', _('Upload separate cover thumbnails for books') + ':::'+_('Normally, the SONY readers get the cover image from the' @@ -194,17 +194,17 @@ class PRST1(USBMS): time_offsets = {} for i, row in enumerate(cursor): try: - comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000); + comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000) except (OSError, IOError, TypeError): # In case the db has incorrect path info continue - device_date = int(row[1]); + device_date = int(row[1]) offset = device_date - comp_date time_offsets.setdefault(offset, 0) time_offsets[offset] = time_offsets[offset] + 1 try: - device_offset = max(time_offsets,key = lambda a: time_offsets.get(a)) + device_offset = max(time_offsets, key=lambda a: time_offsets.get(a)) debug_print("Device Offset: %d ms"%device_offset) self.device_offset = device_offset except ValueError: @@ -213,7 +213,7 @@ class PRST1(USBMS): for idx, book in enumerate(bl): query = 'SELECT _id, thumbnail FROM books WHERE file_path = ?' t = (book.lpath,) - cursor.execute (query, t) + cursor.execute(query, t) for i, row in enumerate(cursor): book.device_collections = bl_collections.get(row[0], None) @@ -318,14 +318,14 @@ class PRST1(USBMS): ' any notes/highlights, etc.')%dbpath)+' Underlying error:' '\n'+tb) - def get_lastrowid(self, cursor): - # SQLite3 + Python has a fun issue on 32-bit systems with integer overflows. - # Issue a SQL query instead, getting the value as a string, and then converting to a long python int manually. - query = 'SELECT last_insert_rowid()' - cursor.execute(query) - row = cursor.fetchone() + def get_lastrowid(self, cursor): + # SQLite3 + Python has a fun issue on 32-bit systems with integer overflows. + # Issue a SQL query instead, getting the value as a string, and then converting to a long python int manually. + query = 'SELECT last_insert_rowid()' + cursor.execute(query) + row = cursor.fetchone() - return long(row[0]) + return long(row[0]) def get_database_min_id(self, source_id): sequence_min = 0L @@ -345,7 +345,7 @@ class PRST1(USBMS): # Insert the sequence Id if it doesn't query = ('INSERT INTO sqlite_sequence (name, seq) ' 'SELECT ?, ? ' - 'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)'); + 'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)') cursor.execute(query, (table, sequence_id, table,)) cursor.close() From a597fe76bb40aa170af740b269e5cc48f8e5e633 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 May 2013 18:39:58 +0530 Subject: [PATCH 26/26] DOCX Input: Cascade the font css --- src/calibre/ebooks/docx/block_styles.py | 9 +++- src/calibre/ebooks/docx/char_styles.py | 20 ++++++--- src/calibre/ebooks/docx/styles.py | 55 ++++++++++++++++++++++++- src/calibre/ebooks/docx/to_html.py | 12 +++--- 4 files changed, 81 insertions(+), 15 deletions(-) diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py index eef68a184f..10dc416eec 100644 --- a/src/calibre/ebooks/docx/block_styles.py +++ b/src/calibre/ebooks/docx/block_styles.py @@ -208,7 +208,7 @@ class ParagraphStyle(object): # Misc. 'text_indent', 'text_align', 'line_height', 'direction', 'background_color', - 'numbering', + 'numbering', 'font_family', 'font_size', ) def __init__(self, pPr=None): @@ -232,6 +232,8 @@ class ParagraphStyle(object): for s in XPath('./w:pStyle[@w:val]')(pPr): self.linked_style = get(s, 'w:val') + self.font_family = self.font_size = inherit + self._css = None def update(self, other): @@ -274,10 +276,13 @@ class ParagraphStyle(object): if self.line_height not in {inherit, '1'}: c['line-height'] = self.line_height - for x in ('text_indent', 'text_align', 'background_color'): + for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size'): val = getattr(self, x) if val is not inherit: + if x == 'font_size': + val = '%.3gpt' % val c[x.replace('_', '-')] = val + return self._css # TODO: keepNext must be done at markup level diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py index b65766e494..ca023e23af 100644 --- a/src/calibre/ebooks/docx/char_styles.py +++ b/src/calibre/ebooks/docx/char_styles.py @@ -172,6 +172,18 @@ class RunStyle(object): if val is inherit: setattr(self, p, getattr(parent, p)) + def get_border_css(self, ans): + for x in ('color', 'style', 'width'): + val = getattr(self, 'border_'+x) + if x == 'width' and val is not inherit: + val = '%.3gpt' % val + if val is not inherit: + ans['border-%s' % x] = val + + def clear_border_css(self): + for x in ('color', 'style', 'width'): + setattr(self, 'border_'+x, inherit) + @property def css(self): if self._css is None: @@ -196,12 +208,7 @@ class RunStyle(object): if self.vanish is True: c['display'] = 'none' - for x in ('color', 'style', 'width'): - val = getattr(self, 'border_'+x) - if x == 'width' and val is not inherit: - val = '%.3gpt' % val - if val is not inherit: - c['border-%s' % x] = val + self.get_border_css(c) if self.padding is not inherit: c['padding'] = '%.3gpt' % self.padding @@ -223,6 +230,7 @@ class RunStyle(object): if self.font_family is not inherit: c['font-family'] = self.font_family + return self._css def same_border(self, other): diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index 13b9ebe58f..c17418d0dd 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -258,6 +258,55 @@ class Styles(object): if obj.tag.endswith('}r'): return self.resolve_run(obj) + def cascade(self, layers): + self.body_font_family = 'serif' + self.body_font_size = '10pt' + + for p, runs in layers.iteritems(): + char_styles = [self.resolve_run(r) for r in runs] + block_style = self.resolve_paragraph(p) + c = Counter() + for s in char_styles: + if s.font_family is not inherit: + c[s.font_family] += 1 + if c: + family = c.most_common(1)[0][0] + block_style.font_family = family + for s in char_styles: + if s.font_family == family: + s.font_family = inherit + + sizes = [s.font_size for s in char_styles if s.font_size is not inherit] + if sizes: + sz = block_style.font_size = sizes[0] + for s in char_styles: + if s.font_size == sz: + s.font_size = inherit + + block_styles = [self.resolve_paragraph(p) for p in layers] + c = Counter() + for s in block_styles: + if s.font_family is not inherit: + c[s.font_family] += 1 + + if c: + self.body_font_family = family = c.most_common(1)[0][0] + for s in block_styles: + if s.font_family == family: + s.font_family = inherit + + c = Counter() + for s in block_styles: + if s.font_size is not inherit: + c[s.font_size] += 1 + + if c: + sz = c.most_common(1)[0][0] + for s in block_styles: + if s.font_size == sz: + s.font_size = inherit + self.body_font_size = '%.3gpt' % sz + def resolve_numbering(self, numbering): # When a numPr element appears inside a paragraph style, the lvl info # must be discarder and pStyle used instead. @@ -298,12 +347,14 @@ class Styles(object): ef = self.fonts.embed_fonts(dest_dir, docx) prefix = textwrap.dedent( '''\ + body { font-family: %s; font-size: %s } + p { text-indent: 1.5em } ul, ol, p { margin: 0; padding: 0 } - ''') + ''') % (self.body_font_family, self.body_font_size) if ef: - prefix += '\n' + ef + prefix = ef + '\n' + prefix ans = [] for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]): diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index b4e5b0e5f7..902952ca4a 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -64,12 +64,15 @@ class Convert(object): doc = self.docx.document relationships_by_id, relationships_by_type = self.docx.document_relationships self.read_styles(relationships_by_type) + self.layers = OrderedDict() for wp in XPath('//w:p')(doc): p = self.convert_p(wp) self.body.append(p) # TODO: tables child of (nested tables?) # TODO: Last section properties child of + self.styles.cascade(self.layers) + numbered = [] for html_obj, obj in self.object_map.iteritems(): raw = obj.get('calibre_num_id', None) @@ -156,9 +159,11 @@ class Convert(object): dest = P() self.object_map[dest] = p style = self.styles.resolve_paragraph(p) + self.layers[p] = [] for run in XPath('descendant::w:r')(p): span = self.convert_run(run) dest.append(span) + self.layers[p].append(run) m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE) if m is not None: @@ -184,12 +189,9 @@ class Convert(object): spans = [] bs = {} for span, style in border_run: - c = style.css + style.get_border_css(bs) + style.clear_border_css() spans.append(span) - for x in ('width', 'color', 'style'): - val = c.pop('border-%s' % x, None) - if val is not None: - bs['border-%s' % x] = val if bs: cls = self.styles.register(bs, 'text_border') wrapper = self.wrap_elems(spans, SPAN())