From 74cd76d02c09436aec38fb04d38c8470c3efd2cb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 14 Jun 2013 16:35:21 +0530 Subject: [PATCH 01/25] Use a https link for the default author link template Fixes #1190952 [please use https where possible for external requests](https://bugs.launchpad.net/calibre/+bug/1190952) --- src/calibre/gui2/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 5fcde65ff5..a552ad8594 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -92,7 +92,7 @@ defs['tags_browser_partition_method'] = 'first letter' defs['tags_browser_collapse_at'] = 100 defs['tag_browser_dont_collapse'] = [] defs['edit_metadata_single_layout'] = 'default' -defs['default_author_link'] = 'http://en.wikipedia.org/w/index.php?search={author}' +defs['default_author_link'] = 'https://en.wikipedia.org/w/index.php?search={author}' defs['preserve_date_on_ctl'] = True defs['manual_add_auto_convert'] = False defs['cb_fullscreen'] = False From 4637e3935038bc74340cea1b434e554e02eaf76e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 14 Jun 2013 21:40:17 +0530 Subject: [PATCH 02/25] MOBI Output: Fix rasterizing of svg images MOBI Output: Fix rendering of SVG images that embed large raster images in 64bit calibre installs. Fixes #1191020 [Conversion to mobi (from ePub) removes inserted images](https://bugs.launchpad.net/calibre/+bug/1191020) --- .../ebooks/oeb/transforms/rasterize.py | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py index d5eb7c5008..0b222deeca 100644 --- a/src/calibre/ebooks/oeb/transforms/rasterize.py +++ b/src/calibre/ebooks/oeb/transforms/rasterize.py @@ -8,9 +8,8 @@ __copyright__ = '2008, Marshall T. Vandegrift ' import os from urlparse import urldefrag -import base64 from lxml import etree -from PyQt4.QtCore import Qt +from PyQt4.QtCore import Qt, QUrl from PyQt4.QtCore import QByteArray from PyQt4.QtCore import QBuffer from PyQt4.QtCore import QIODevice @@ -18,11 +17,14 @@ from PyQt4.QtGui import QColor from PyQt4.QtGui import QImage from PyQt4.QtGui import QPainter from PyQt4.QtSvg import QSvgRenderer +from calibre.constants import iswindows from calibre.ebooks.oeb.base import XHTML, XLINK from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME from calibre.ebooks.oeb.base import xml2str, xpath from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.stylizer import Stylizer +from calibre.ptempfile import PersistentTemporaryFile +from calibre.utils.imghdr import what IMAGE_TAGS = set([XHTML('img'), XHTML('object')]) KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align']) @@ -46,6 +48,7 @@ class SVGRasterizer(object): def __call__(self, oeb, context): oeb.logger.info('Rasterizing SVG images...') + self.temp_files = [] self.stylizer_cache = {} self.oeb = oeb self.opts = context @@ -54,6 +57,11 @@ class SVGRasterizer(object): self.dataize_manifest() self.rasterize_spine() self.rasterize_cover() + for pt in self.temp_files: + try: + os.remove(pt) + except: + pass def rasterize_svg(self, elem, width=0, height=0, format='PNG'): view_box = elem.get('viewBox', elem.get('viewbox', None)) @@ -112,9 +120,15 @@ class SVGRasterizer(object): if abshref not in hrefs: continue linkee = hrefs[abshref] - data = base64.encodestring(str(linkee)) - data = "data:%s;base64,%s" % (linkee.media_type, data) - elem.attrib[XLINK('href')] = data + data = str(linkee) + ext = what(None, data) or 'jpg' + with PersistentTemporaryFile(suffix='.'+ext) as pt: + pt.write(data) + self.temp_files.append(pt.name) + href = unicode(QUrl.fromLocalFile(pt.name).toString())[len('file://'):] + if iswindows: + href = href[1:] + elem.attrib[XLINK('href')] = href return svg def stylizer(self, item): From 877810c134bdcc76ee364cf3bc0ef5a7eda12b93 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 14 Jun 2013 21:48:01 +0530 Subject: [PATCH 03/25] Avoid unnecessary QUrl round-tripping --- src/calibre/ebooks/oeb/transforms/rasterize.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py index 0b222deeca..acd815524e 100644 --- a/src/calibre/ebooks/oeb/transforms/rasterize.py +++ b/src/calibre/ebooks/oeb/transforms/rasterize.py @@ -9,7 +9,7 @@ __copyright__ = '2008, Marshall T. Vandegrift ' import os from urlparse import urldefrag from lxml import etree -from PyQt4.QtCore import Qt, QUrl +from PyQt4.QtCore import Qt from PyQt4.QtCore import QByteArray from PyQt4.QtCore import QBuffer from PyQt4.QtCore import QIODevice @@ -17,7 +17,6 @@ from PyQt4.QtGui import QColor from PyQt4.QtGui import QImage from PyQt4.QtGui import QPainter from PyQt4.QtSvg import QSvgRenderer -from calibre.constants import iswindows from calibre.ebooks.oeb.base import XHTML, XLINK from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME from calibre.ebooks.oeb.base import xml2str, xpath @@ -125,10 +124,7 @@ class SVGRasterizer(object): with PersistentTemporaryFile(suffix='.'+ext) as pt: pt.write(data) self.temp_files.append(pt.name) - href = unicode(QUrl.fromLocalFile(pt.name).toString())[len('file://'):] - if iswindows: - href = href[1:] - elem.attrib[XLINK('href')] = href + elem.attrib[XLINK('href')] = pt.name return svg def stylizer(self, item): From 8bc65df29a4914156b6aef2ee21e9f0328906bc3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 15 Jun 2013 10:07:07 +0530 Subject: [PATCH 04/25] DOCX Input: Add support for contextual spacing DOCX Input: Add support for the Word setting "No space between paragraphs with the same style". Fixes #1191001 [docx conversion bug](https://bugs.launchpad.net/calibre/+bug/1191001) --- src/calibre/ebooks/docx/styles.py | 14 ++++++++++++++ src/calibre/ebooks/docx/to_html.py | 11 +++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index 8e4d811803..3888496e5a 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -260,6 +260,7 @@ class Styles(object): for attr in ans.all_properties: if not (is_numbering and attr == 'text_indent'): # skip text-indent for lists setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr)) + ans.linked_style = direct_formatting.linked_style return ans def resolve_run(self, r): @@ -389,6 +390,19 @@ class Styles(object): else: ps.numbering = (ps.numbering[0], lvl) + def apply_contextual_spacing(self, paras): + last_para = None + for p in paras: + if last_para is not None: + ls = self.resolve_paragraph(last_para) + ps = self.resolve_paragraph(p) + if ls.linked_style is not None and ls.linked_style == ps.linked_style: + if ls.contextualSpacing: + ls.margin_bottom = 0 + if ps.contextualSpacing: + ps.margin_top = 0 + last_para = p + def register(self, css, prefix): h = hash(frozenset(css.iteritems())) ans, _ = self.classes.get(h, (None, None)) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index ad26f91d46..bc8336d768 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -86,6 +86,7 @@ class Convert(object): self.framed_map = {} self.anchor_map = {} self.link_map = defaultdict(list) + paras = [] self.log.debug('Converting Word markup to HTML') self.read_page_properties(doc) @@ -94,6 +95,8 @@ class Convert(object): if wp.tag.endswith('}p'): p = self.convert_p(wp) self.body.append(p) + paras.append(wp) + self.styles.apply_contextual_spacing(paras) notes_header = None if self.footnotes.has_notes: @@ -107,12 +110,16 @@ class Convert(object): dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor)) dl[-1][0].tail = ']' dl.append(DD()) + paras = [] for wp in note: if wp.tag.endswith('}tbl'): self.tables.register(wp, self.styles) self.page_map[wp] = self.current_page - p = self.convert_p(wp) - dl[-1].append(p) + else: + p = self.convert_p(wp) + dl[-1].append(p) + paras.append(wp) + self.styles.apply_contextual_spacing(paras) self.resolve_links(relationships_by_id) From ba0639c68105c79c4c70a10e0e486dc88e26a679 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 15 Jun 2013 10:46:03 +0530 Subject: [PATCH 05/25] Move TOC creation into its own module --- src/calibre/ebooks/docx/to_html.py | 45 +--------------------- src/calibre/ebooks/docx/toc.py | 60 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 43 deletions(-) create mode 100644 src/calibre/ebooks/docx/toc.py diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index bc8336d768..23191864ff 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -25,9 +25,8 @@ from calibre.ebooks.docx.tables import Tables from calibre.ebooks.docx.footnotes import Footnotes from calibre.ebooks.docx.cleanup import cleanup_markup from calibre.ebooks.docx.theme import Theme +from calibre.ebooks.docx.toc import create_toc from calibre.ebooks.metadata.opf2 import OPFCreator -from calibre.ebooks.metadata.toc import TOC -from calibre.ebooks.oeb.polish.toc import elem_to_toc_text from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 class Text: @@ -267,48 +266,8 @@ class Convert(object): self.styles.resolve_numbering(numbering) - def create_toc(self): - ' Create a TOC from headings in the document ' - root = self.body - headings = ('h1', 'h2', 'h3') - tocroot = TOC() - xpaths = [XPath('//%s' % x) for x in headings] - level_prev = {i+1:None for i in xrange(len(xpaths))} - level_prev[0] = tocroot - level_item_map = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)} - item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems} - - self.idcount = 0 - - def ensure_id(elem): - ans = elem.get('id', None) - if not ans: - self.idcount += 1 - ans = 'toc_id_%d' % self.idcount - elem.set('id', ans) - return ans - - for item in descendants(root, *headings): - lvl = plvl = item_level_map.get(item, None) - if lvl is None: - continue - parent = None - while parent is None: - plvl -= 1 - parent = level_prev[plvl] - lvl = plvl + 1 - elem_id = ensure_id(item) - text = elem_to_toc_text(item) - toc = parent.add_item('index.html', elem_id, text) - level_prev[lvl] = toc - for i in xrange(lvl+1, len(xpaths)+1): - level_prev[i] = None - - if len(tuple(tocroot.flat())) > 1: - return tocroot - def write(self): - toc = self.create_toc() + toc = create_toc(self.body) raw = html.tostring(self.html, encoding='utf-8', doctype='') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) diff --git a/src/calibre/ebooks/docx/toc.py b/src/calibre/ebooks/docx/toc.py new file mode 100644 index 0000000000..8036808701 --- /dev/null +++ b/src/calibre/ebooks/docx/toc.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +from calibre.ebooks.docx.names import XPath, descendants +from calibre.ebooks.metadata.toc import TOC +from calibre.ebooks.oeb.polish.toc import elem_to_toc_text + +class Count(object): + + __slots__ = ('val',) + + def __init__(self): + self.val = 0 + +def create_toc(body): + ' Create a TOC from headings in the document ' + headings = ('h1', 'h2', 'h3') + tocroot = TOC() + xpaths = [XPath('//%s' % x) for x in headings] + level_prev = {i+1:None for i in xrange(len(xpaths))} + level_prev[0] = tocroot + level_item_map = {i+1:frozenset(xp(body)) for i, xp in enumerate(xpaths)} + item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems} + + idcount = Count() + + def ensure_id(elem): + ans = elem.get('id', None) + if not ans: + idcount.val += 1 + ans = 'toc_id_%d' % idcount.val + elem.set('id', ans) + return ans + + for item in descendants(body, *headings): + lvl = plvl = item_level_map.get(item, None) + if lvl is None: + continue + parent = None + while parent is None: + plvl -= 1 + parent = level_prev[plvl] + lvl = plvl + 1 + elem_id = ensure_id(item) + text = elem_to_toc_text(item) + toc = parent.add_item('index.html', elem_id, text) + level_prev[lvl] = toc + for i in xrange(lvl+1, len(xpaths)+1): + level_prev[i] = None + + if len(tuple(tocroot.flat())) > 1: + return tocroot + + + From 58233b596cf45754583a25447611f868c83ff005 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 15 Jun 2013 11:49:04 +0530 Subject: [PATCH 06/25] Bulk metadata: focus the Review instead of OK After a bulk metadata download, focus the review button on the popup notification, instead of the OK button. Fixes #1190931 [Newly custom added columns keep disappearing on restart](https://bugs.launchpad.net/calibre/+bug/1190931) --- src/calibre/gui2/proceed.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/calibre/gui2/proceed.py b/src/calibre/gui2/proceed.py index 67efe48b53..0dd1a2189d 100644 --- a/src/calibre/gui2/proceed.py +++ b/src/calibre/gui2/proceed.py @@ -19,7 +19,7 @@ from calibre.gui2.dialogs.message_box import ViewLog Question = namedtuple('Question', 'payload callback cancel_callback ' 'title msg html_log log_viewer_title log_is_file det_msg ' 'show_copy_button checkbox_msg checkbox_checked action_callback ' - 'action_label action_icon') + 'action_label action_icon focus_action') class ProceedQuestion(QDialog): @@ -155,13 +155,14 @@ class ProceedQuestion(QDialog): self.checkbox.setChecked(question.checkbox_checked) self.do_resize() self.show() - self.bb.button(self.bb.Yes).setDefault(True) - self.bb.button(self.bb.Yes).setFocus(Qt.OtherFocusReason) + button = self.action_button if question.focus_action and question.action_callback is not None else self.bb.button(self.bb.Yes) + button.setDefault(True) + button.setFocus(Qt.OtherFocusReason) def __call__(self, callback, payload, html_log, log_viewer_title, title, msg, det_msg='', show_copy_button=False, cancel_callback=None, log_is_file=False, checkbox_msg=None, checkbox_checked=False, - action_callback=None, action_label=None, action_icon=None): + action_callback=None, action_label=None, action_icon=None, focus_action=False): ''' A non modal popup that notifies the user that a background task has been completed. This class guarantees that only a single popup is @@ -192,13 +193,14 @@ class ProceedQuestion(QDialog): exactly the same way as callback. :param action_label: The text on the action button :param action_icon: The icon for the action button, must be a QIcon object or None + :param focus_action: If True, the action button will be focused instead of the Yes button ''' question = Question( payload, callback, cancel_callback, title, msg, html_log, log_viewer_title, log_is_file, det_msg, show_copy_button, checkbox_msg, checkbox_checked, action_callback, action_label, - action_icon) + action_icon, focus_action) self.questions.append(question) self.show_question() From 3abe0800238675001c6280b798b4904b224ca22f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 15 Jun 2013 16:13:42 +0530 Subject: [PATCH 07/25] DOCX Input: Support webHidden DOCX Input: Hide text that has been marked as not being visible in the web view in Word. --- src/calibre/ebooks/docx/char_styles.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py index 02b8299c94..c9a2fee4c9 100644 --- a/src/calibre/ebooks/docx/char_styles.py +++ b/src/calibre/ebooks/docx/char_styles.py @@ -132,10 +132,10 @@ class RunStyle(object): all_properties = { 'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', - 'rtl', 'shadow', 'smallCaps', 'strike', 'vanish', + 'rtl', 'shadow', 'smallCaps', 'strike', 'vanish', 'webHidden', 'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color', - 'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family' + 'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family', } toggle_properties = { @@ -150,7 +150,7 @@ class RunStyle(object): else: for p in ( 'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow', - 'smallCaps', 'strike', 'vanish', + 'smallCaps', 'strike', 'vanish', 'webHidden', ): setattr(self, p, binary_property(rPr, p)) @@ -210,7 +210,7 @@ class RunStyle(object): c['text-shadow'] = '2px 2px' if self.smallCaps is True: c['font-variant'] = 'small-caps' - if self.vanish is True: + if self.vanish is True or self.webHidden is True: c['display'] = 'none' self.get_border_css(c) From e2823644595d1c119164f9c4928efedacb367720 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 15 Jun 2013 17:12:30 +0530 Subject: [PATCH 08/25] ... --- src/calibre/ebooks/docx/styles.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index 3888496e5a..21f45616fa 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -397,9 +397,9 @@ class Styles(object): ls = self.resolve_paragraph(last_para) ps = self.resolve_paragraph(p) if ls.linked_style is not None and ls.linked_style == ps.linked_style: - if ls.contextualSpacing: + if ls.contextualSpacing is True: ls.margin_bottom = 0 - if ps.contextualSpacing: + if ps.contextualSpacing is True: ps.margin_top = 0 last_para = p From e444f27de8201a7e07a3c8fe87d27ea366781289 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 15 Jun 2013 17:15:16 +0530 Subject: [PATCH 09/25] Remove incorrect rendering of separator field chars Unconditionally rendering the separator is wrong, for example, for the TOC field it causes an extra space at the start of the first entry. In any case, many common fields are webHidden, so they dont display anyway. --- src/calibre/ebooks/docx/to_html.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 23191864ff..6fd0026874 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -440,8 +440,6 @@ class Convert(object): l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) - elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate': - text.buf.append('\xa0') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) From 8c261063b484128bc2d4333d9fa7eef1a97a84e2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 15 Jun 2013 18:44:58 +0530 Subject: [PATCH 10/25] DOCX Input: Support for Word created ToC DOCX Input: Support for Table of Contents created using the Word Table of Contents tool. calibre now first looks for such a Table of Contents and only if one is not found does it generate a ToC from headings. --- src/calibre/ebooks/docx/to_html.py | 8 +-- src/calibre/ebooks/docx/toc.py | 84 +++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 6fd0026874..c3b2391d3f 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -171,7 +171,7 @@ class Convert(object): self.log.debug('Cleaning up redundant markup generated by Word') cleanup_markup(self.html, self.styles) - return self.write() + return self.write(doc) def read_page_properties(self, doc): current = [] @@ -266,8 +266,8 @@ class Convert(object): self.styles.resolve_numbering(numbering) - def write(self): - toc = create_toc(self.body) + def write(self, doc): + toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map) raw = html.tostring(self.html, encoding='utf-8', doctype='') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) @@ -367,11 +367,13 @@ class Convert(object): return wrapper def resolve_links(self, relationships_by_id): + self.resolved_link_map = {} for hyperlink, spans in self.link_map.iteritems(): span = spans[0] if len(spans) > 1: span = self.wrap_elems(spans, SPAN()) span.tag = 'a' + self.resolved_link_map[hyperlink] = span tgt = get(hyperlink, 'w:tgtFrame') if tgt: span.set('target', tgt) diff --git a/src/calibre/ebooks/docx/toc.py b/src/calibre/ebooks/docx/toc.py index 8036808701..5936d34355 100644 --- a/src/calibre/ebooks/docx/toc.py +++ b/src/calibre/ebooks/docx/toc.py @@ -6,7 +6,11 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' -from calibre.ebooks.docx.names import XPath, descendants +from collections import namedtuple + +from lxml.etree import tostring + +from calibre.ebooks.docx.names import XPath, descendants, get, ancestor from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.oeb.polish.toc import elem_to_toc_text @@ -17,7 +21,7 @@ class Count(object): def __init__(self): self.val = 0 -def create_toc(body): +def from_headings(body): ' Create a TOC from headings in the document ' headings = ('h1', 'h2', 'h3') tocroot = TOC() @@ -56,5 +60,81 @@ def create_toc(body): if len(tuple(tocroot.flat())) > 1: return tocroot +def structure_toc(entries): + indent_vals = sorted({x.indent for x in entries}) + last_found = [None for i in indent_vals] + newtoc = TOC() + + if len(indent_vals) > 6: + for x in entries: + newtoc.add_item('index.html', x.anchor, x.text) + return newtoc + + def find_parent(level): + candidates = last_found[:level] + for x in reversed(candidates): + if x is not None: + return x + return newtoc + + for item in entries: + level = indent_vals.index(item.indent) + parent = find_parent(level) + last_found[level] = parent.add_item('index.html', item.anchor, + item.text) + for i in xrange(level+1, len(last_found)): + last_found[i] = None + + return newtoc + +def link_to_txt(a, styles, object_map): + if len(a) > 1: + for child in a: + run = object_map.get(child, None) + if run is not None: + rs = styles.resolve(run) + if rs.css.get('display', None) == 'none': + a.remove(child) + + return tostring(a, method='text', with_tail=False, encoding=unicode).strip() + +def from_toc(docx, link_map, styles, object_map): + toc_level = None + level = 0 + TI = namedtuple('TI', 'text anchor indent') + toc = [] + for tag in XPath('//*[(@w:fldCharType and name()="w:fldChar") or name()="w:hyperlink" or name()="w:instrText"]')(docx): + n = tag.tag.rpartition('}')[-1] + if n == 'fldChar': + t = get(tag, 'w:fldCharType') + if t == 'begin': + level += 1 + elif t == 'end': + level -= 1 + if toc_level is not None and level < toc_level: + break + elif n == 'instrText': + if level > 0 and tag.text and tag.text.strip().startswith('TOC '): + toc_level = level + elif n == 'hyperlink': + if toc_level is not None and level >= toc_level and tag in link_map: + a = link_map[tag] + href = a.get('href', None) + txt = link_to_txt(a, styles, object_map) + p = ancestor(tag, 'w:p') + if txt and href and p is not None: + ps = styles.resolve_paragraph(p) + try: + ml = int(ps.margin_left[:-2]) + except (TypeError, ValueError, AttributeError): + ml = 0 + if ps.text_align in {'center', 'right'}: + ml = 0 + toc.append(TI(txt, href[1:], ml)) + if toc: + return structure_toc(toc) + +def create_toc(docx, body, link_map, styles, object_map): + return from_toc(docx, link_map, styles, object_map) or from_headings(body) From ca3573b6cbe29c0a1fe82e609075f5ca314c4258 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 15 Jun 2013 21:13:32 +0530 Subject: [PATCH 11/25] E-book viewer: Make bookmark button instant popup E-book viewer: Change the bookmark button to always popup a menu when clicked, makes accessing existing bookmarks easier. --- src/calibre/gui2/viewer/main.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index 3b63d51c15..113e1201e2 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -82,7 +82,8 @@ class History(list): return None item = self[self.forward_pos] self.back_pos = self.forward_pos - 1 - if self.back_pos < 0: self.back_pos = None + if self.back_pos < 0: + self.back_pos = None self.insert_pos = self.back_pos or 0 self.forward_pos = None if self.forward_pos > len(self) - 2 else self.forward_pos + 1 self.set_actions() @@ -268,7 +269,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.action_full_screen.shortcuts()])) self.action_back.triggered[bool].connect(self.back) self.action_forward.triggered[bool].connect(self.forward) - self.action_bookmark.triggered[bool].connect(self.bookmark) self.action_preferences.triggered.connect(self.do_config) self.pos.editingFinished.connect(self.goto_page_num) self.vertical_scrollbar.valueChanged[int].connect(lambda @@ -294,7 +294,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.toc.setCursor(Qt.PointingHandCursor) self.tool_bar.setContextMenuPolicy(Qt.PreventContextMenu) self.tool_bar2.setContextMenuPolicy(Qt.PreventContextMenu) - self.tool_bar.widgetForAction(self.action_bookmark).setPopupMode(QToolButton.MenuButtonPopup) + self.tool_bar.widgetForAction(self.action_bookmark).setPopupMode(QToolButton.InstantPopup) self.action_full_screen.setCheckable(True) self.full_screen_label = QLabel('''
@@ -394,7 +394,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.action_toggle_paged_mode.setToolTip(self.FLOW_MODE_TT if self.action_toggle_paged_mode.isChecked() else self.PAGED_MODE_TT) - if at_start: return + if at_start: + return self.reload() def settings_changed(self): @@ -486,8 +487,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer): at_start=True) def lookup(self, word): - self.dictionary_view.setHtml('

'+ \ - _('Connecting to dict.org to lookup: %s…')%word + \ + self.dictionary_view.setHtml('

'+ + _('Connecting to dict.org to lookup: %s…')%word + '

') self.dictionary_box.show() self._lookup = Lookup(word, parent=self) @@ -964,6 +965,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer): def set_bookmarks(self, bookmarks): self.bookmarks_menu.clear() + self.bookmarks_menu.addAction(_("Bookmark this location"), self.bookmark) self.bookmarks_menu.addAction(_("Manage Bookmarks"), self.manage_bookmarks) self.bookmarks_menu.addSeparator() current_page = None @@ -1202,3 +1204,4 @@ def main(args=sys.argv): if __name__ == '__main__': sys.exit(main()) + From b63e0df73fc45dc35ebbdd1b64ef6b01951ac082 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 16 Jun 2013 08:31:05 +0530 Subject: [PATCH 12/25] DOCX Input: Fix image name generation DOCX Input: When converting docx files with large numbers of unnamed images, do not crash on windows. Fixes #1191354 [Word docx conversion](https://bugs.launchpad.net/calibre/+bug/1191354) --- src/calibre/ebooks/docx/images.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/docx/images.py b/src/calibre/ebooks/docx/images.py index 76f43e7e0c..e24b550797 100644 --- a/src/calibre/ebooks/docx/images.py +++ b/src/calibre/ebooks/docx/images.py @@ -112,15 +112,16 @@ class Images(object): base += '.' + ext exists = frozenset(self.used.itervalues()) c = 1 - while base in exists: + name = base + while name in exists: n, e = base.rpartition('.')[0::2] - base = '%s-%d.%s' % (n, c, e) + name = '%s-%d.%s' % (n, c, e) c += 1 - self.used[rid] = base - with open(os.path.join(self.dest_dir, base), 'wb') as f: + self.used[rid] = name + with open(os.path.join(self.dest_dir, name), 'wb') as f: f.write(raw) - self.all_images.add('images/' + base) - return base + self.all_images.add('images/' + name) + return name def pic_to_img(self, pic, alt=None): name = None From 1bada2b35b6fd59c2afcb86efaae30f028168375 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 16 Jun 2013 09:49:50 +0530 Subject: [PATCH 13/25] Get Books: Fix error when using internal browser Get Books: Fix error when using internal browser on some systems. I cannot replicate this error so my fix is speculative, based on the idea that not keeping explicit references to the python object is causing them to be garbage collected. Fixes #1191199 [Python exception downloading books from Barnes & Noble](https://bugs.launchpad.net/calibre/+bug/1191199) --- src/calibre/gui2/store/web_control.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/store/web_control.py b/src/calibre/gui2/store/web_control.py index 48e1b7dff0..8318ae9078 100644 --- a/src/calibre/gui2/store/web_control.py +++ b/src/calibre/gui2/store/web_control.py @@ -24,8 +24,10 @@ class NPWebView(QWebView): self.gui = None self.tags = '' - self.setPage(NPWebPage()) - self.page().networkAccessManager().setCookieJar(QNetworkCookieJar()) + self._page = NPWebPage() + self.setPage(self._page) + self.cookie_jar = QNetworkCookieJar() + self.page().networkAccessManager().setCookieJar(self.cookie_jar) http_proxy = get_proxies().get('http', None) if http_proxy: From 7a0675e59a94f1159eaf9d13dfa862a49cd58a1a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Jun 2013 09:29:56 +0530 Subject: [PATCH 14/25] Fix PIL import --- src/calibre/ebooks/textile/functions.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py index 45e73cfe8f..7380b10d1f 100755 --- a/src/calibre/ebooks/textile/functions.py +++ b/src/calibre/ebooks/textile/functions.py @@ -86,7 +86,14 @@ def getimagesize(url): """ try: - import ImageFile + from PIL import ImageFile + except ImportError: + try: + import ImageFile + except ImportError: + return None + + try: import urllib2 except ImportError: return None @@ -220,7 +227,7 @@ class Textile(object): (re.compile(r'{(S\^|\^S)}'), r'Ŝ'), # S-circumflex (re.compile(r'{(s\^|\^s)}'), r'ŝ'), # s-circumflex - + (re.compile(r'{(S\ˇ|\ˇS)}'), r'Š'), # S-caron (re.compile(r'{(s\ˇ|\ˇs)}'), r'š'), # s-caron (re.compile(r'{(T\ˇ|\ˇT)}'), r'Ť'), # T-caron @@ -229,7 +236,7 @@ class Textile(object): (re.compile(r'{(u\°|\°u)}'), r'ů'), # u-ring (re.compile(r'{(Z\ˇ|\ˇZ)}'), r'Ž'), # Z-caron (re.compile(r'{(z\ˇ|\ˇz)}'), r'ž'), # z-caron - + (re.compile(r'{\*}'), r'•'), # bullet (re.compile(r'{Fr}'), r'₣'), # Franc (re.compile(r'{(L=|=L)}'), r'₤'), # Lira @@ -245,7 +252,7 @@ class Textile(object): (re.compile(r"{(’|'/|/')}"), r'’'), # closing-single-quote - apostrophe (re.compile(r"{(‘|\\'|'\\)}"), r'‘'), # opening-single-quote (re.compile(r'{(”|"/|/")}'), r'”'), # closing-double-quote - (re.compile(r'{(“|\\"|"\\)}'), r'“'), # opening-double-quote + (re.compile(r'{(“|\\"|"\\)}'), r'“'), # opening-double-quote ] glyph_defaults = [ (re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2×\3'), # dimension sign From 752bd9e06ea7df02296fd2c563ce1e31e8da46ff Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Jun 2013 11:03:15 +0530 Subject: [PATCH 15/25] DOCX Input: Detect likely cover image DOCX Input: If a large image that looks like a cover is present at the start of the document, remove it and use it as the cover of the output ebook. This can be turned off under the DOC Input section of the conversion dialog. --- .../ebooks/conversion/plugins/docx_input.py | 10 ++++- src/calibre/ebooks/docx/cleanup.py | 33 ++++++++++++++- src/calibre/ebooks/docx/to_html.py | 7 +++- src/calibre/gui2/convert/docx_input.py | 23 +++++++++++ src/calibre/gui2/convert/docx_input.ui | 41 +++++++++++++++++++ 5 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 src/calibre/gui2/convert/docx_input.py create mode 100644 src/calibre/gui2/convert/docx_input.ui diff --git a/src/calibre/ebooks/conversion/plugins/docx_input.py b/src/calibre/ebooks/conversion/plugins/docx_input.py index 7492d46c68..190a771379 100644 --- a/src/calibre/ebooks/conversion/plugins/docx_input.py +++ b/src/calibre/ebooks/conversion/plugins/docx_input.py @@ -14,9 +14,17 @@ class DOCXInput(InputFormatPlugin): description = 'Convert DOCX files (.docx) to HTML' file_types = set(['docx']) + options = { + OptionRecommendation(name='docx_no_cover', recommended_value=False, + help=_('Normally, if a large image is present at the start of the document that looks like a cover, ' + 'it will be removed from the document and used as the cover for created ebook. This option ' + 'turns off that behavior.')), + + } + recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)]) def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.docx.to_html import Convert - return Convert(stream, log=log)() + return Convert(stream, detect_cover=not options.docx_no_cover, log=log)() diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index 2b1e095025..a55f8449d8 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -6,6 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' +import os def mergeable(previous, current): if previous.tail or current.tail: @@ -83,8 +84,19 @@ def lift(span): else: add_text(last_child, 'tail', span.tail) +def before_count(root, tag, limit=10): + body = root.xpath('//body[1]') + if not body: + return limit + ans = 0 + for elem in body[0].iterdescendants(): + if elem is tag: + return ans + ans += 1 + if ans > limit: + return limit -def cleanup_markup(root, styles): +def cleanup_markup(log, root, styles, dest_dir, detect_cover): # Merge consecutive spans that have the same styling current_run = [] for span in root.xpath('//span'): @@ -134,3 +146,22 @@ def cleanup_markup(root, styles): for span in root.xpath('//span[not(@class) and not(@id)]'): lift(span) + if detect_cover: + # Check if the first image in the document is possibly a cover + img = root.xpath('//img[@src][1]') + if img: + img = img[0] + path = os.path.join(dest_dir, img.get('src')) + if os.path.exists(path) and before_count(root, img, limit=10) < 5: + from calibre.utils.magick.draw import identify + try: + width, height, fmt = identify(path) + except: + width, height, fmt = 0, 0, None + is_cover = 0.8 <= height/width <= 1.8 and height*width >= 160000 + if is_cover: + log.debug('Detected an image that looks like a cover') + img.getparent().remove(img) + return path + + diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index c3b2391d3f..963d1fc6c8 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -40,11 +40,12 @@ class Text: class Convert(object): - def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): + def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None): self.docx = DOCX(path_or_stream, log=log) self.ms_pat = re.compile(r'\s{2,}') self.ws_pat = re.compile(r'[\n\r\t]') self.log = self.docx.log + self.detect_cover = detect_cover self.notes_text = notes_text or _('Notes') self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata @@ -169,7 +170,7 @@ class Convert(object): break self.log.debug('Cleaning up redundant markup generated by Word') - cleanup_markup(self.html, self.styles) + self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover) return self.write(doc) @@ -280,6 +281,8 @@ class Convert(object): opf.toc = toc opf.create_manifest_from_files_in([self.dest_dir]) opf.create_spine(['index.html']) + if self.cover_image is not None: + opf.guide.set_cover(self.cover_image) with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx: opf.render(of, ncx, 'toc.ncx') return os.path.join(self.dest_dir, 'metadata.opf') diff --git a/src/calibre/gui2/convert/docx_input.py b/src/calibre/gui2/convert/docx_input.py new file mode 100644 index 0000000000..46234c6a36 --- /dev/null +++ b/src/calibre/gui2/convert/docx_input.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +from calibre.gui2.convert.docx_input_ui import Ui_Form +from calibre.gui2.convert import Widget + +class PluginWidget(Widget, Ui_Form): + + TITLE = _('DOCX Input') + HELP = _('Options specific to')+' DOCX '+_('input') + COMMIT_NAME = 'docx_input' + ICON = I('mimetypes/docx.png') + + def __init__(self, parent, get_option, get_help, db=None, book_id=None): + Widget.__init__(self, parent, + ['docx_no_cover', ]) + self.initialize_options(get_option, get_help, db, book_id) + diff --git a/src/calibre/gui2/convert/docx_input.ui b/src/calibre/gui2/convert/docx_input.ui new file mode 100644 index 0000000000..41948118dc --- /dev/null +++ b/src/calibre/gui2/convert/docx_input.ui @@ -0,0 +1,41 @@ + + + Form + + + + 0 + 0 + 518 + 353 + + + + Form + + + + + + Do not try to autodetect a &cover from images in the document + + + + + + + Qt::Vertical + + + + 20 + 213 + + + + + + + + + From 1e5ce66ca36bbc16c479e0da0e801329a22c6387 Mon Sep 17 00:00:00 2001 From: fenuks Date: Mon, 17 Jun 2013 09:45:13 +0200 Subject: [PATCH 16/25] various minor fixes --- recipes/ekologia_pl.recipe | 4 ++- recipes/gildia_pl.recipe | 59 ++++++++++++++++++++--------------- recipes/media2.recipe | 36 ++++++++++----------- recipes/nauka_w_polsce.recipe | 2 +- recipes/polter_pl.recipe | 2 +- recipes/ppe_pl.recipe | 46 ++++++++++++--------------- recipes/pure_pc.recipe | 17 ++++++---- 7 files changed, 85 insertions(+), 81 deletions(-) diff --git a/recipes/ekologia_pl.recipe b/recipes/ekologia_pl.recipe index e925ebad6f..c053e6d5bc 100644 --- a/recipes/ekologia_pl.recipe +++ b/recipes/ekologia_pl.recipe @@ -9,13 +9,15 @@ class EkologiaPl(BasicNewsRecipe): language = 'pl' cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png' ignore_duplicate_articles = {'title', 'url'} - extra_css = '.title {font-size: 200%;} .imagePowiazane, .imgCon {float:left; margin-right:5px;}' + extra_css = '.title {font-size: 200%;} .imagePowiazane {float:left; margin-right:5px; width: 200px;}' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True remove_empty_feeds = True + remove_javascript = True use_embedded_content = False remove_attrs = ['style'] + keep_only_tags = [dict(attrs={'class':'contentParent'})] remove_tags = [dict(attrs={'class':['ekoLogo', 'powrocArt', 'butonDrukuj', 'widget-social-buttons']})] feeds = [(u'Wiadomo\u015bci', u'http://www.ekologia.pl/rss/20,53,0'), (u'\u015arodowisko', u'http://www.ekologia.pl/rss/20,56,0'), (u'Styl \u017cycia', u'http://www.ekologia.pl/rss/20,55,0')] diff --git a/recipes/gildia_pl.recipe b/recipes/gildia_pl.recipe index 37c129aaa1..513bbe44d6 100644 --- a/recipes/gildia_pl.recipe +++ b/recipes/gildia_pl.recipe @@ -16,40 +16,47 @@ class Gildia(BasicNewsRecipe): ignore_duplicate_articles = {'title', 'url'} preprocess_regexps = [(re.compile(ur''), lambda match: '') ] ignore_duplicate_articles = {'title', 'url'} - remove_tags = [dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})] - keep_only_tags = dict(name='div', attrs={'class':'widetext'}) - feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'), (u'Literatura', u'http://www.literatura.gildia.pl/rss'), (u'Film', u'http://www.film.gildia.pl/rss'), (u'Horror', u'http://www.horror.gildia.pl/rss'), (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), (u'Techno', u'http://www.techno.gildia.pl/rss'), (u'Historia', u'http://www.historia.gildia.pl/rss'), (u'Magia', u'http://www.magia.gildia.pl/rss'), (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), (u'RPG', u'http://www.rpg.gildia.pl/rss'), (u'LARP', u'http://www.larp.gildia.pl/rss'), (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), (u'Nauka', u'http://www.nauka.gildia.pl/rss')] - + remove_tags = [dict(name='div', attrs={'class':['backlink', 'im_img', 'addthis_toolbox addthis_default_style', 'banner-bottom']})] + keep_only_tags = [dict(name='div', attrs={'class':'widetext'})] + feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'), + (u'Literatura', u'http://www.literatura.gildia.pl/rss'), + (u'Film', u'http://www.film.gildia.pl/rss'), + (u'Horror', u'http://www.horror.gildia.pl/rss'), + (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), + (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), + (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), + (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), + (u'Techno', u'http://www.techno.gildia.pl/rss'), + (u'Historia', u'http://www.historia.gildia.pl/rss'), + (u'Magia', u'http://www.magia.gildia.pl/rss'), + (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), + (u'RPG', u'http://www.rpg.gildia.pl/rss'), + (u'LARP', u'http://www.larp.gildia.pl/rss'), + (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), + (u'Nauka', u'http://www.nauka.gildia.pl/rss'), + ] def skip_ad_pages(self, soup): content = soup.find('div', attrs={'class':'news'}) - if 'recenzj' in soup.title.string.lower(): - for link in content.findAll(name='a'): - if 'recenzj' in link['href'] or 'muzyka/plyty' in link['href']: - return self.index_to_soup(link['href'], raw=True) - if 'fragmen' in soup.title.string.lower(): - for link in content.findAll(name='a'): - if 'fragment' in link['href']: - return self.index_to_soup(link['href'], raw=True) - if 'relacj' in soup.title.string.lower(): - for link in content.findAll(name='a'): - if 'relacj' in link['href']: - return self.index_to_soup(link['href'], raw=True) - if 'wywiad' in soup.title.string.lower(): - for link in content.findAll(name='a'): - if 'wywiad' in link['href']: - return self.index_to_soup(link['href'], raw=True) - + words = ('recenzj', 'zapowied','fragmen', 'relacj', 'wywiad', 'nominacj') + for word in words: + if word in soup.title.string.lower(): + for link in content.findAll(name='a'): + if word in link['href'] or (link.string and word in link.string): + return self.index_to_soup(link['href'], raw=True) + for tag in content.findAll(name='a', href=re.compile('/publicystyka/')): + if 'Więcej...' == tag.string: + return self.index_to_soup(tag['href'], raw=True) def preprocess_html(self, soup): for a in soup('a'): if a.has_key('href') and not a['href'].startswith('http'): if '/gry/' in a['href']: - a['href']='http://www.gry.gildia.pl' + a['href'] + a['href'] = 'http://www.gry.gildia.pl' + a['href'] elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower(): - a['href']='http://www.literatura.gildia.pl' + a['href'] + a['href'] = 'http://www.literatura.gildia.pl' + a['href'] elif u'komiks' in soup.title.string.lower(): - a['href']='http://www.literatura.gildia.pl' + a['href'] + a['href'] = 'http://www.literatura.gildia.pl' + a['href'] else: - a['href']='http://www.gildia.pl' + a['href'] - return soup + a['href'] = 'http://www.gildia.pl' + a['href'] + return soup \ No newline at end of file diff --git a/recipes/media2.recipe b/recipes/media2.recipe index 135740a62e..d685a90803 100644 --- a/recipes/media2.recipe +++ b/recipes/media2.recipe @@ -3,33 +3,29 @@ __license__ = 'GPL v3' __copyright__ = 'teepel' -''' -media2.pl -''' - from calibre.web.feeds.news import BasicNewsRecipe class media2_pl(BasicNewsRecipe): title = u'Media2' __author__ = 'teepel ' language = 'pl' - description =u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.' - masthead_url='http://media2.pl/res/logo/www.png' - remove_empty_feeds= True - oldest_article = 1 + description = u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.' + masthead_url = 'http://media2.pl/res/logo/www.png' + cover_url = 'http://media2.pl/res/logo/www.png' + remove_empty_feeds = True + oldest_article = 7 max_articles_per_feed = 100 - remove_javascript=True - no_stylesheets=True - simultaneous_downloads = 5 - + remove_javascript = True + no_stylesheets = True + remove_attributes = ['style'] + ignore_duplicate_articles = {'title', 'url'} extra_css = '''.news-lead{font-weight: bold; }''' - keep_only_tags =[] - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})) + keep_only_tags = [dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})] + remove_tags = [dict(name = 'span', attrs = {'class' : 'news-comments'}), dict(name = 'div', attrs = {'class' : 'item-sidebar'}), dict(name = 'div', attrs = {'class' : 'news-tags'})] - remove_tags =[] - remove_tags.append(dict(name = 'span', attrs = {'class' : 'news-comments'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'item-sidebar'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-tags'})) - - feeds = [(u'Media2', u'http://feeds.feedburner.com/media2')] + feeds = [(u'Media2', u'http://feeds.feedburner.com/media2'), (u'Internet', u'http://feeds.feedburner.com/media2/internet'), + (u'Media', 'http://feeds.feedburner.com/media2/media'), (u'Telekomunikacja', 'http://feeds.feedburner.com/media2/telekomunikacja'), + (u'Reklama/PR', 'http://feeds.feedburner.com/media2/reklama-pr'), (u'Technologie', 'http://feeds.feedburner.com/media2/technologie'), + (u'Badania', 'http://feeds.feedburner.com/media2/badania') + ] \ No newline at end of file diff --git a/recipes/nauka_w_polsce.recipe b/recipes/nauka_w_polsce.recipe index 715780d162..2a44aa7e84 100644 --- a/recipes/nauka_w_polsce.recipe +++ b/recipes/nauka_w_polsce.recipe @@ -1,7 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe import re class NaukawPolsce(BasicNewsRecipe): - title = u'Nauka w Polsce' + title = u'PAP Nauka w Polsce' __author__ = 'fenuks' description = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak: osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.' category = 'science' diff --git a/recipes/polter_pl.recipe b/recipes/polter_pl.recipe index 1f9cef3be3..aea21dca9c 100644 --- a/recipes/polter_pl.recipe +++ b/recipes/polter_pl.recipe @@ -3,7 +3,7 @@ import re from calibre.web.feeds.news import BasicNewsRecipe class Poltergeist(BasicNewsRecipe): - title = u'Poltergeist' + title = u'Polter.pl' __author__ = 'fenuks' description = u'Największy polski serwis poświęcony ogólno pojętej fantastyce - grom fabularnym (RPG), książkom, filmowi, komiksowi, grom planszowym, karcianym i bitewnym.' category = 'fantasy, books, rpg, games' diff --git a/recipes/ppe_pl.recipe b/recipes/ppe_pl.recipe index 2edc611ad7..597c9ef2d3 100644 --- a/recipes/ppe_pl.recipe +++ b/recipes/ppe_pl.recipe @@ -1,41 +1,35 @@ #!/usr/bin/env python __license__ = 'GPL v3' - +import re from calibre.web.feeds.news import BasicNewsRecipe class ppeRecipe(BasicNewsRecipe): __author__ = u'Artur Stachecki ' language = 'pl' - title = u'ppe.pl' category = u'News' description = u'Portal o konsolach i grach wideo.' - cover_url='' - remove_empty_feeds= True - no_stylesheets=True - oldest_article = 1 - max_articles_per_feed = 100000 - recursions = 0 + extra_css = '.categories > li {list-style: none; display: inline;} .galmini > li {list-style: none; float: left;} .calibre_navbar {clear: both;}' + remove_empty_feeds = True no_stylesheets = True + oldest_article = 7 + max_articles_per_feed = 100 remove_javascript = True - simultaneous_downloads = 2 + remove_empty_feeds = True + remove_attributes = ['style'] + + keep_only_tags = [dict(attrs={'class':'box'})] + remove_tags = [dict(attrs={'class':['voltage-1', 'voltage-2', 'encyklopedia', 'nag', 'related', 'comment_form', 'komentarze-box']})] - keep_only_tags =[] - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-heading'})) - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'tresc-poziom'})) + feeds = [ + ('Newsy', 'http://ppe.pl/rss.html'), + ('Recenzje', 'http://ppe.pl/rss-recenzje.html'), + ('Publicystyka', 'http://ppe.pl/rss-publicystyka.html'), + ] - remove_tags =[] - remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria1'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria2'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria3'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-photo'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'fbl'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'info'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'links'})) - - remove_tags.append(dict(name = 'div', attrs = {'style' : 'padding: 4px'})) - - feeds = [ - ('Newsy', 'feed://ppe.pl/rss/rss.xml'), - ] + def get_cover_url(self): + soup = self.index_to_soup('http://www.ppe.pl/psx_extreme.html') + part = soup.find(attrs={'class':'archiwum-foto'})['style'] + part = re.search("'(.+)'", part).group(1).replace('_min', '') + return 'http://www.ppe.pl' + part diff --git a/recipes/pure_pc.recipe b/recipes/pure_pc.recipe index 13d9307a09..167136c90f 100644 --- a/recipes/pure_pc.recipe +++ b/recipes/pure_pc.recipe @@ -1,3 +1,4 @@ +import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Comment @@ -11,6 +12,7 @@ class PurePC(BasicNewsRecipe): language = 'pl' masthead_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg' cover_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg' + extra_css = '.wykres_logo {float: left; margin-right: 5px;}' no_stylesheets = True keep_only_tags= [dict(id='content')] remove_tags_after= dict(attrs={'class':'fivestar-widget'}) @@ -19,11 +21,14 @@ class PurePC(BasicNewsRecipe): def append_page(self, soup, appendtag): - nexturl= appendtag.find(attrs={'class':'pager-next'}) - if nexturl: - while nexturl: - soup2 = self.index_to_soup('http://www.purepc.pl'+ nexturl.a['href']) - nexturl=soup2.find(attrs={'class':'pager-next'}) + lasturl = appendtag.find(attrs={'class':'pager-last'}) + if lasturl: + regex = re.search('(.+?2C)(\d+)', lasturl.a['href']) + baseurl = regex.group(1).replace('?page=0%2C', '?page=1%2C') + baseurl = 'http://www.purepc.pl' + baseurl + nr = int(regex.group(2)) + for page_nr in range(1, nr+1): + soup2 = self.index_to_soup(baseurl+str(page_nr)) pagetext = soup2.find(attrs={'class':'article'}) pos = len(appendtag.contents) appendtag.insert(pos, pagetext) @@ -35,4 +40,4 @@ class PurePC(BasicNewsRecipe): def preprocess_html(self, soup): self.append_page(soup, soup.body) - return soup + return soup \ No newline at end of file From b08854e60acb47bd8b78894801c194c2f3e47ee7 Mon Sep 17 00:00:00 2001 From: fenuks Date: Mon, 17 Jun 2013 09:53:53 +0200 Subject: [PATCH 17/25] new Polish news sources --- recipes/cdrinfo_pl.recipe | 65 ++++++++++++++++++++++ recipes/gazeta_pl_bydgoszcz.recipe | 88 ++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100644 recipes/cdrinfo_pl.recipe create mode 100644 recipes/gazeta_pl_bydgoszcz.recipe diff --git a/recipes/cdrinfo_pl.recipe b/recipes/cdrinfo_pl.recipe new file mode 100644 index 0000000000..2a8b3b9a2e --- /dev/null +++ b/recipes/cdrinfo_pl.recipe @@ -0,0 +1,65 @@ +__license__ = 'GPL v3' +import re +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Comment +class cdrinfo(BasicNewsRecipe): + title = u'CDRinfo.pl' + __author__ = 'fenuks' + description = u'Serwis poświęcony archiwizacji danych. Testy i recenzje nagrywarek. Programy do nagrywania płyt. Dyski twarde, dyski SSD i serwery sieciowe NAS. Rankingi dyskow twardych, najszybsze dyski twarde, newsy, artykuły, testy, recenzje, porady, oprogramowanie. Zestawienie nagrywarek, najnowsze biosy do nagrywarek, programy dla dysków twardych.' + category = 'it, hardware' + #publication_type = '' + language = 'pl' + #encoding = '' + #extra_css = '' + cover_url = 'http://www.cdrinfo.pl/gfx/graph3/top.jpg' + #masthead_url = '' + use_embedded_content = False + oldest_article = 777 + max_articles_per_feed = 100 + no_stylesheets = True + remove_empty_feeds = True + remove_javascript = True + remove_attributes = ['style'] + preprocess_regexps = [(re.compile(u']*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com\.

', re.DOTALL), lambda match: '')] + ignore_duplicate_articles = {'title', 'url'} + + keep_only_tags = [dict(name='input', attrs={'name':'ref'}), dict(id='text')] + remove_tags = [dict(attrs={'class':['navigation', 'sociable']}), dict(name='hr'), dict(id='respond')] + remove_tags_after = dict(id='artnawigacja') + feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'), (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'), + (u'Konsole', 'http://konsole.cdrinfo.pl/rss/rss_konsole_news.xml'), + (u'Pliki', 'http://www.cdrinfo.pl/rss/rss_pliki.xml') + ] + + def preprocess_html(self, soup): + if soup.find(id='artnawigacja'): + self.append_page(soup, soup.body) + return soup + + def append_page(self, soup, appendtag): + baseurl = 'http://cdrinfo.pl' + soup.find(name='input', attrs={'name':'ref'})['value'] + '/' + if baseurl[-2] == '/': + baseurl = baseurl[:-1] + tag = soup.find(id='artnawigacja') + div = tag.find('div', attrs={'align':'right'}) + while div: + counter = 0 + while counter < 5: + try: + soup2 = self.index_to_soup(baseurl+div.a['href']) + break + except: + counter += 1 + tag2 = soup2.find(id='artnawigacja') + div = tag2.find('div', attrs={'align':'right'}) + pagetext = soup2.find(attrs={'class':'art'}) + comments = pagetext.findAll(text=lambda text:isinstance(text, Comment)) + for comment in comments: + comment.extract() + for r in soup2.findAll(attrs={'class':'star-rating'}): + r.extract() + for r in soup2.findAll(attrs={'class':'star-rating2'}): + r.extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + tag.extract() \ No newline at end of file diff --git a/recipes/gazeta_pl_bydgoszcz.recipe b/recipes/gazeta_pl_bydgoszcz.recipe new file mode 100644 index 0000000000..f86d642419 --- /dev/null +++ b/recipes/gazeta_pl_bydgoszcz.recipe @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Comment +import re +class gw_bydgoszcz(BasicNewsRecipe): + title = u'Gazeta Wyborcza Bydgoszcz' + __author__ = 'fenuks' + language = 'pl' + description = 'Wiadomości z Bydgoszczy na portalu Gazeta.pl.' + category = 'newspaper' + publication_type = 'newspaper' + masthead_url = 'http://bi.gazeta.pl/im/3/4089/m4089863.gif' + INDEX = 'http://bydgoszcz.gazeta.pl' + cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif' + remove_empty_feeds = True + oldest_article = 3 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} + + #rules for gazeta.pl + preprocess_regexps = [(re.compile(u'Czytaj więcej.*', re.DOTALL), lambda m: '')] + keep_only_tags = [dict(id='gazeta_article')] + remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(attrs={'class':['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})] + remove_tags_after = dict(id='gazeta_article_body') + + feeds = [(u'Wiadomości', u'http://rss.feedsportal.com/c/32739/f/530239/index.rss')] + + def print_version(self, url): + if 'feedsportal.com' in url: + s = url.rpartition('gazeta0Bpl') + u = s[2] + if not s[0]: + u = url.rpartition('wyborcza0Bpl')[2] + u = u.replace('/l/', '/') + u = u.replace('/ia1.htm', '') + u = u.replace('0Dbo0F1', '') + u = u.replace('/story01.htm', '') + u = u.replace('0C', '/') + u = u.replace('A', '') + u = u.replace('0E', '-') + u = u.replace('0H', ',') + u = u.replace('0I', '_') + u = u.replace('0B', '.') + u = self.INDEX + u + return u + else: + return url + + def preprocess_html(self, soup): + tag = soup.find(id='Str') + if soup.find(attrs={'class': 'piano_btn_1'}): + return None + elif tag and tag.findAll('a'): + self.append_page(soup, soup.body) + return soup + + def append_page(self, soup, appendtag): + loop = False + tag = soup.find('div', attrs={'id': 'Str'}) + try: + baseurl = soup.find(name='meta', attrs={'property':'og:url'})['content'] + except: + return 1 + link = tag.findAll('a')[-1] + while link: + soup2 = self.index_to_soup(baseurl + link['href']) + link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1] + if not u'następne' in link.string: + link = '' + pagetext = soup2.find(id='artykul') + comments = pagetext.findAll(text=lambda text:isinstance(text, Comment)) + for comment in comments: + comment.extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + tag.extract() + + def image_url_processor(self, baseurl, url): + if url.startswith(' '): + return url.strip() + else: + return url \ No newline at end of file From 82bfa745021e8cf998303cbe7fbb13aada15c10a Mon Sep 17 00:00:00 2001 From: fenuks Date: Mon, 17 Jun 2013 09:58:02 +0200 Subject: [PATCH 18/25] new icons for recipes --- recipes/icons/cdrinfo_pl.png | Bin 0 -> 909 bytes recipes/icons/gazeta_pl_bydgoszcz.png | Bin 0 -> 294 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 recipes/icons/cdrinfo_pl.png create mode 100644 recipes/icons/gazeta_pl_bydgoszcz.png diff --git a/recipes/icons/cdrinfo_pl.png b/recipes/icons/cdrinfo_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..73dbc33692082371e7dcdc92d688a097f3c6da10 GIT binary patch literal 909 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstU$g(vPY0F z14ES>14Ba#1H&(%P{RubhEf9thF1v;3|2E37{m+a>IIpm6`n+{}${TxmQ!5qlTb2!>E^7e&MB+*Y~Fun z%Z`1OHBBBq0ZzUV$>m)@@oB4e-@NDGp>NeQK0v*yh64vv}x`hGnYsz^IWB=5- zQ)e&Ux#!^VQ)dg?XBM{4%Bk(it?Mo9n3Yo56Pe#!(lw`J)@t9FjJ)QlS@n}L>LwL+ z&dqC{7E{#bpHl7;kyX$(Gcc{vH?g$1b9Qt=OMGcZT2)t6ep6oS%(Tj`n8Fs<$eh5m zYM-dI?1qU&9kbG_dt)-o^XmGF8Yle!|38c)I}8{B5hX!>!3-=~S-pi7^?p)J-p79* zJ^ps4Qup7JyT7I}+5bDY<5$Gyf3vL2C;htf;vp}`p+8se6osDt_q+Axge&5=?sX;V z-F(#eZxuUl)nCw9!3ArZK-qvM7}A*UysW~PF< zouiwXp{Av3sNXzC#uNP^Awgk*q2Au-PlTjjVBpNiGBn6|os#-I<(1KkMxPT`%?(V9 z`c9laZ<1)B+rh|a{Q1O;jzf!NxY@i{CrxPB${UdCnK@IJxp{7FrK`)5=42P2)Txs< zb6-$gm-kNY>byA9t@rHq)d?K`#m>UU+TPOED*QBnC}Q!>*kackJ%bixj( Ofx*+&&t;ucLK6Vn`-mU_ literal 0 HcmV?d00001 diff --git a/recipes/icons/gazeta_pl_bydgoszcz.png b/recipes/icons/gazeta_pl_bydgoszcz.png new file mode 100644 index 0000000000000000000000000000000000000000..49d76d2ddc6f4549c7211bb82f5176d1d8413c5b GIT binary patch literal 294 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!D3?x-;bCrM;OS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5lwb?+ z32_C|R|OO``h28;EXI-`zhDN3XE)M-9L@rd$YLPv0mg18v+aNkK2I0N5RU7~6aF7` z;9_E!{avu|+_&36QyElCTq8?Mnsfm?=g|>l_ zm4QL?RMuxG8glbfGSe!78VoFT4UBXR%|i^0txQd=jLo$T41pR-ew-5nYGCkm^>bP0 Hl+XkKPAf?O literal 0 HcmV?d00001 From 45704e36c5786b34adf698fc7941177367be9dbc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jun 2013 09:00:46 +0530 Subject: [PATCH 19/25] Add Kobo Aura HD to Welcome Wizard --- src/calibre/gui2/wizard/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py index 798ac5faca..f813eed892 100644 --- a/src/calibre/gui2/wizard/__init__.py +++ b/src/calibre/gui2/wizard/__init__.py @@ -139,7 +139,7 @@ class Kobo(Device): id = 'kobo' class KoboVox(Kobo): - name = 'Kobo Vox' + name = 'Kobo Vox and Kobo Aura HD' output_profile = 'tablet' id = 'kobo_vox' From ccaa960edf9733e78028cb50c73b805790196a3e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 19 Jun 2013 10:02:20 +0530 Subject: [PATCH 20/25] pep8 --- src/calibre/gui2/dialogs/plugin_updater.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/calibre/gui2/dialogs/plugin_updater.py b/src/calibre/gui2/dialogs/plugin_updater.py index 3820169876..df76bec27d 100644 --- a/src/calibre/gui2/dialogs/plugin_updater.py +++ b/src/calibre/gui2/dialogs/plugin_updater.py @@ -254,7 +254,7 @@ Platforms: Windows, OSX, Linux; History: Yes; return self.installed_version is not None def is_upgrade_available(self): - return self.is_installed() and (self.installed_version < self.available_version \ + return self.is_installed() and (self.installed_version < self.available_version or self.is_deprecated) def is_valid_platform(self): @@ -317,7 +317,7 @@ class DisplayPluginModel(QAbstractTableModel): def data(self, index, role): if not index.isValid(): - return NONE; + return NONE row, col = index.row(), index.column() if row < 0 or row >= self.rowCount(): return NONE @@ -357,7 +357,7 @@ class DisplayPluginModel(QAbstractTableModel): else: return self._get_status_tooltip(display_plugin) elif role == Qt.ForegroundRole: - if col != 1: # Never change colour of the donation column + if col != 1: # Never change colour of the donation column if display_plugin.is_deprecated: return QVariant(QBrush(Qt.blue)) if display_plugin.is_disabled(): @@ -417,7 +417,7 @@ class DisplayPluginModel(QAbstractTableModel): icon_name = 'plugin_upgrade_invalid.png' else: icon_name = 'plugin_upgrade_ok.png' - else: # A plugin available not currently installed + else: # A plugin available not currently installed if display_plugin.is_valid_to_install(): icon_name = 'plugin_new_valid.png' else: @@ -429,11 +429,11 @@ class DisplayPluginModel(QAbstractTableModel): return QVariant(_('This plugin has been deprecated and should be uninstalled')+'\n\n'+ _('Right-click to see more options')) if not display_plugin.is_valid_platform(): - return QVariant(_('This plugin can only be installed on: %s') % \ + return QVariant(_('This plugin can only be installed on: %s') % ', '.join(display_plugin.platforms)+'\n\n'+ _('Right-click to see more options')) if numeric_version < display_plugin.calibre_required_version: - return QVariant(_('You must upgrade to at least Calibre %s before installing this plugin') % \ + return QVariant(_('You must upgrade to at least Calibre %s before installing this plugin') % self._get_display_version(display_plugin.calibre_required_version)+'\n\n'+ _('Right-click to see more options')) if display_plugin.installed_version < display_plugin.available_version: @@ -687,7 +687,7 @@ class PluginUpdaterDialog(SizePersistedDialog): def _install_clicked(self): display_plugin = self._selected_display_plugin() - if not question_dialog(self, _('Install %s')%display_plugin.name, '

' + \ + if not question_dialog(self, _('Install %s')%display_plugin.name, '

' + _('Installing plugins is a security risk. ' 'Plugins can contain a virus/malware. ' 'Only install it if you got it from a trusted source.' @@ -886,3 +886,4 @@ class PluginUpdaterDialog(SizePersistedDialog): pt.write(raw) pt.close() return pt.name + From 07c935b700bd6e7d35f3e587caa1a189b1a49669 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 19 Jun 2013 10:12:03 +0530 Subject: [PATCH 21/25] Do not show builtin plugins in the get new plugins dialog If a builtin plugin with the same name as a third party plugin exists, then the builtin plagin was displayed in the get new plugins dialog as installed (happened with the new DOCX Input plugin). --- src/calibre/gui2/dialogs/plugin_updater.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/dialogs/plugin_updater.py b/src/calibre/gui2/dialogs/plugin_updater.py index df76bec27d..c5d79218f9 100644 --- a/src/calibre/gui2/dialogs/plugin_updater.py +++ b/src/calibre/gui2/dialogs/plugin_updater.py @@ -89,7 +89,7 @@ def get_installed_plugin_status(display_plugin): display_plugin.installed_version = None display_plugin.plugin = None for plugin in initialized_plugins(): - if plugin.name == display_plugin.name: + if plugin.name == display_plugin.name and plugin.plugin_path is not None: display_plugin.plugin = plugin display_plugin.installed_version = plugin.version break From 0e14d36438eeb7ed8f39610267a1c78b7f98889f Mon Sep 17 00:00:00 2001 From: David Forrester Date: Wed, 19 Jun 2013 14:23:43 +1000 Subject: [PATCH 22/25] SQL delete needs firmware check for older Kobo firmare Kobo driver: Fix a regression when deleting empty shelves on Kobo devices with older firmware. Fixes #1192441 [Private bug](https://bugs.launchpad.net/calibre/+bug/1192441) As reported here, http://www.mobileread.com/forums/showthread.php?t=214760, if the Kobo device is using firmware before 2.5.0, it doesn't have the Activity table. The delete from this table when maintaining shelves needs a version check around it. --- src/calibre/devices/kobo/driver.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index cddf6a561f..cb325efb07 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -1880,7 +1880,7 @@ class KOBOTOUCH(KOBO): # Remove any entries for the Activity table - removes tile from new home page if self.has_activity_table(): - debug_print('KoboTouch:delete_via_sql: detete from Activity') + debug_print('KoboTouch:delete_via_sql: delete from Activity') cursor.execute('delete from Activity where Id =?', t) connection.commit() @@ -2391,7 +2391,8 @@ class KOBOTOUCH(KOBO): cursor = connection.cursor() cursor.execute(delete_query) cursor.execute(update_query) - cursor.execute(delete_activity_query) + if self.has_activity_table(): + cursor.execute(delete_activity_query) connection.commit() cursor.close() From 8bd6cc840c8460279d1413cdf5ed141f0c12a9a4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 19 Jun 2013 11:36:33 +0530 Subject: [PATCH 23/25] DOCX metadata: Be more intelligent for covers DOCX metadata: When reading covers from DOCX files use the first image as specified in the actual markup instead of just the first image in the container. --- src/calibre/ebooks/metadata/docx.py | 40 ++++++++++++++++++----------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/src/calibre/ebooks/metadata/docx.py b/src/calibre/ebooks/metadata/docx.py index ea34d27d3a..2c8b91bc70 100644 --- a/src/calibre/ebooks/metadata/docx.py +++ b/src/calibre/ebooks/metadata/docx.py @@ -8,29 +8,39 @@ __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' from calibre.ebooks.docx.container import DOCX +from calibre.ebooks.docx.names import XPath, get -from calibre.utils.zipfile import ZipFile from calibre.utils.magick.draw import identify_data +images = XPath('//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]') + +def get_cover(docx): + doc = docx.document + rid_map = docx.document_relationships[0] + for image in images(doc): + rid = get(image, 'r:embed') or get(image, 'r:id') + if rid in rid_map: + try: + raw = docx.read(rid_map[rid]) + width, height, fmt = identify_data(raw) + except Exception: + continue + if 0.8 <= height/width <= 1.8 and height*width >= 160000: + return (fmt, raw) + def get_metadata(stream): c = DOCX(stream, extract=False) mi = c.metadata + try: + cdata = get_cover(c) + except Exception: + cdata = None + import traceback + traceback.print_exc() c.close() stream.seek(0) - cdata = None - with ZipFile(stream, 'r') as zf: - for zi in zf.infolist(): - ext = zi.filename.rpartition('.')[-1].lower() - if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}: - raw = zf.read(zi) - try: - width, height, fmt = identify_data(raw) - except: - continue - if 0.8 <= height/width <= 1.8 and height*width >= 160000: - cdata = (fmt, raw) - if cdata is not None: - mi.cover_data = cdata + if cdata is not None: + mi.cover_data = cdata return mi From 8020f489ca9eb51f3b997aba384f08bd4143ebcb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 19 Jun 2013 12:39:20 +0530 Subject: [PATCH 24/25] pep8 --- src/calibre/web/feeds/templates.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py index a22a79ef20..68af525cfd 100644 --- a/src/calibre/web/feeds/templates.py +++ b/src/calibre/web/feeds/templates.py @@ -13,7 +13,7 @@ from lxml.html.builder import HTML, HEAD, TITLE, STYLE, DIV, BODY, \ from calibre import preferred_encoding, strftime, isbytestring -def CLASS(*args, **kwargs): # class is a reserved word in Python +def CLASS(*args, **kwargs): # class is a reserved word in Python kwargs['class'] = ' '.join(args) return kwargs @@ -26,7 +26,7 @@ class Template(object): self.html_lang = lang def generate(self, *args, **kwargs): - if not kwargs.has_key('style'): + if 'style' not in kwargs: kwargs['style'] = '' for key in kwargs.keys(): if isbytestring(kwargs[key]): @@ -152,8 +152,8 @@ class FeedTemplate(Template): body.append(div) if getattr(feed, 'image', None): div.append(DIV(IMG( - alt = feed.image_alt if feed.image_alt else '', - src = feed.image_url + alt=feed.image_alt if feed.image_alt else '', + src=feed.image_url ), CLASS('calibre_feed_image'))) if getattr(feed, 'description', None): @@ -261,8 +261,8 @@ class TouchscreenIndexTemplate(Template): for i, feed in enumerate(feeds): if feed: tr = TR() - tr.append(TD( CLASS('calibre_rescale_120'), A(feed.title, href='feed_%d/index.html'%i))) - tr.append(TD( '%s' % len(feed.articles), style="text-align:right")) + tr.append(TD(CLASS('calibre_rescale_120'), A(feed.title, href='feed_%d/index.html'%i))) + tr.append(TD('%s' % len(feed.articles), style="text-align:right")) toc.append(tr) div = DIV( masthead_p, @@ -307,7 +307,7 @@ class TouchscreenFeedTemplate(Template): if f > 0: link = A(CLASS('feed_link'), trim_title(feeds[f-1].title), - href = '../feed_%d/index.html' % int(f-1)) + href='../feed_%d/index.html' % int(f-1)) navbar_tr.append(TD(CLASS('feed_prev'),link)) # Up to Sections @@ -319,13 +319,12 @@ class TouchscreenFeedTemplate(Template): if f < len(feeds)-1: link = A(CLASS('feed_link'), trim_title(feeds[f+1].title), - href = '../feed_%d/index.html' % int(f+1)) + href='../feed_%d/index.html' % int(f+1)) navbar_tr.append(TD(CLASS('feed_next'),link)) navbar_t.append(navbar_tr) top_navbar = navbar_t bottom_navbar = copy.copy(navbar_t) - #print "\n%s\n" % etree.tostring(navbar_t, pretty_print=True) - + # print "\n%s\n" % etree.tostring(navbar_t, pretty_print=True) # Build the page head = HEAD(TITLE(feed.title)) @@ -342,8 +341,8 @@ class TouchscreenFeedTemplate(Template): if getattr(feed, 'image', None): div.append(DIV(IMG( - alt = feed.image_alt if feed.image_alt else '', - src = feed.image_url + alt=feed.image_alt if feed.image_alt else '', + src=feed.image_url ), CLASS('calibre_feed_image'))) if getattr(feed, 'description', None): @@ -411,6 +410,7 @@ class TouchscreenNavBarTemplate(Template): navbar_tr.append(TD(CLASS('article_next'),link)) navbar_t.append(navbar_tr) navbar.append(navbar_t) - #print "\n%s\n" % etree.tostring(navbar, pretty_print=True) + # print "\n%s\n" % etree.tostring(navbar, pretty_print=True) self.root = HTML(head, BODY(navbar)) + From adcc1739a65a5f5dc2a1bc6b0f13531534a00c98 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 19 Jun 2013 13:12:14 +0530 Subject: [PATCH 25/25] News download: "downloaded from" for touchscreens News download: Add the "downloaded from" link at the bottom of every article when using a touchscreen output profile (like the Tablet profile). --- src/calibre/web/feeds/templates.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py index 68af525cfd..3ee90c43a6 100644 --- a/src/calibre/web/feeds/templates.py +++ b/src/calibre/web/feeds/templates.py @@ -387,6 +387,14 @@ class TouchscreenNavBarTemplate(Template): navbar_t = TABLE(CLASS('touchscreen_navbar')) navbar_tr = TR() + if bottom and not url.startswith('file://'): + navbar.append(HR()) + text = 'This article was downloaded by ' + p = PT(text, STRONG(__appname__), A(url, href=url), + style='text-align:left; max-width: 100%; overflow: hidden;') + p[0].tail = ' from ' + navbar.append(p) + navbar.append(BR()) # | Previous if art > 0: link = A(CLASS('article_link'),_('Previous'),href='%s../article_%d/index.html'%(prefix, art-1))