From 74cd76d02c09436aec38fb04d38c8470c3efd2cb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 14 Jun 2013 16:35:21 +0530
Subject: [PATCH 01/25] Use a https link for the default author link template

Fixes #1190952 [please use https where possible for external requests](https://bugs.launchpad.net/calibre/+bug/1190952)
---
 src/calibre/gui2/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 5fcde65ff5..a552ad8594 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -92,7 +92,7 @@ defs['tags_browser_partition_method'] = 'first letter'
 defs['tags_browser_collapse_at'] = 100
 defs['tag_browser_dont_collapse'] = []
 defs['edit_metadata_single_layout'] = 'default'
-defs['default_author_link'] = 'http://en.wikipedia.org/w/index.php?search={author}'
+defs['default_author_link'] = 'https://en.wikipedia.org/w/index.php?search={author}'
 defs['preserve_date_on_ctl'] = True
 defs['manual_add_auto_convert'] = False
 defs['cb_fullscreen'] = False

From 4637e3935038bc74340cea1b434e554e02eaf76e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 14 Jun 2013 21:40:17 +0530
Subject: [PATCH 02/25] MOBI Output: Fix rasterizing of svg images

MOBI Output: Fix rendering of SVG images that embed large raster images
in 64bit calibre installs.
Fixes #1191020 [Conversion to mobi (from ePub) removes inserted images](https://bugs.launchpad.net/calibre/+bug/1191020)
---
 .../ebooks/oeb/transforms/rasterize.py        | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py
index d5eb7c5008..0b222deeca 100644
--- a/src/calibre/ebooks/oeb/transforms/rasterize.py
+++ b/src/calibre/ebooks/oeb/transforms/rasterize.py
@@ -8,9 +8,8 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 
 import os
 from urlparse import urldefrag
-import base64
 from lxml import etree
-from PyQt4.QtCore import Qt
+from PyQt4.QtCore import Qt, QUrl
 from PyQt4.QtCore import QByteArray
 from PyQt4.QtCore import QBuffer
 from PyQt4.QtCore import QIODevice
@@ -18,11 +17,14 @@ from PyQt4.QtGui import QColor
 from PyQt4.QtGui import QImage
 from PyQt4.QtGui import QPainter
 from PyQt4.QtSvg import QSvgRenderer
+from calibre.constants import iswindows
 from calibre.ebooks.oeb.base import XHTML, XLINK
 from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME
 from calibre.ebooks.oeb.base import xml2str, xpath
 from calibre.ebooks.oeb.base import urlnormalize
 from calibre.ebooks.oeb.stylizer import Stylizer
+from calibre.ptempfile import PersistentTemporaryFile
+from calibre.utils.imghdr import what
 
 IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
 KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])
@@ -46,6 +48,7 @@ class SVGRasterizer(object):
 
     def __call__(self, oeb, context):
         oeb.logger.info('Rasterizing SVG images...')
+        self.temp_files = []
         self.stylizer_cache = {}
         self.oeb = oeb
         self.opts = context
@@ -54,6 +57,11 @@ class SVGRasterizer(object):
         self.dataize_manifest()
         self.rasterize_spine()
         self.rasterize_cover()
+        for pt in self.temp_files:
+            try:
+                os.remove(pt)
+            except:
+                pass
 
     def rasterize_svg(self, elem, width=0, height=0, format='PNG'):
         view_box = elem.get('viewBox', elem.get('viewbox', None))
@@ -112,9 +120,15 @@ class SVGRasterizer(object):
             if abshref not in hrefs:
                 continue
             linkee = hrefs[abshref]
-            data = base64.encodestring(str(linkee))
-            data = "data:%s;base64,%s" % (linkee.media_type, data)
-            elem.attrib[XLINK('href')] = data
+            data = str(linkee)
+            ext = what(None, data) or 'jpg'
+            with PersistentTemporaryFile(suffix='.'+ext) as pt:
+                pt.write(data)
+                self.temp_files.append(pt.name)
+            href = unicode(QUrl.fromLocalFile(pt.name).toString())[len('file://'):]
+            if iswindows:
+                href = href[1:]
+            elem.attrib[XLINK('href')] = href
         return svg
 
     def stylizer(self, item):

From 877810c134bdcc76ee364cf3bc0ef5a7eda12b93 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 14 Jun 2013 21:48:01 +0530
Subject: [PATCH 03/25] Avoid unnecessary QUrl round-tripping

---
 src/calibre/ebooks/oeb/transforms/rasterize.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py
index 0b222deeca..acd815524e 100644
--- a/src/calibre/ebooks/oeb/transforms/rasterize.py
+++ b/src/calibre/ebooks/oeb/transforms/rasterize.py
@@ -9,7 +9,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 import os
 from urlparse import urldefrag
 from lxml import etree
-from PyQt4.QtCore import Qt, QUrl
+from PyQt4.QtCore import Qt
 from PyQt4.QtCore import QByteArray
 from PyQt4.QtCore import QBuffer
 from PyQt4.QtCore import QIODevice
@@ -17,7 +17,6 @@ from PyQt4.QtGui import QColor
 from PyQt4.QtGui import QImage
 from PyQt4.QtGui import QPainter
 from PyQt4.QtSvg import QSvgRenderer
-from calibre.constants import iswindows
 from calibre.ebooks.oeb.base import XHTML, XLINK
 from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME
 from calibre.ebooks.oeb.base import xml2str, xpath
@@ -125,10 +124,7 @@ class SVGRasterizer(object):
             with PersistentTemporaryFile(suffix='.'+ext) as pt:
                 pt.write(data)
                 self.temp_files.append(pt.name)
-            href = unicode(QUrl.fromLocalFile(pt.name).toString())[len('file://'):]
-            if iswindows:
-                href = href[1:]
-            elem.attrib[XLINK('href')] = href
+            elem.attrib[XLINK('href')] = pt.name
         return svg
 
     def stylizer(self, item):

From 8bc65df29a4914156b6aef2ee21e9f0328906bc3 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 15 Jun 2013 10:07:07 +0530
Subject: [PATCH 04/25] DOCX Input: Add support for contextual spacing

DOCX Input: Add support for the Word setting "No space between
paragraphs with the same style". Fixes #1191001 [docx conversion bug](https://bugs.launchpad.net/calibre/+bug/1191001)
---
 src/calibre/ebooks/docx/styles.py  | 14 ++++++++++++++
 src/calibre/ebooks/docx/to_html.py | 11 +++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py
index 8e4d811803..3888496e5a 100644
--- a/src/calibre/ebooks/docx/styles.py
+++ b/src/calibre/ebooks/docx/styles.py
@@ -260,6 +260,7 @@ class Styles(object):
             for attr in ans.all_properties:
                 if not (is_numbering and attr == 'text_indent'):  # skip text-indent for lists
                     setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
+            ans.linked_style = direct_formatting.linked_style
         return ans
 
     def resolve_run(self, r):
@@ -389,6 +390,19 @@ class Styles(object):
                 else:
                     ps.numbering = (ps.numbering[0], lvl)
 
+    def apply_contextual_spacing(self, paras):
+        last_para = None
+        for p in paras:
+            if last_para is not None:
+                ls = self.resolve_paragraph(last_para)
+                ps = self.resolve_paragraph(p)
+                if ls.linked_style is not None and ls.linked_style == ps.linked_style:
+                    if ls.contextualSpacing:
+                        ls.margin_bottom = 0
+                    if ps.contextualSpacing:
+                        ps.margin_top = 0
+            last_para = p
+
     def register(self, css, prefix):
         h = hash(frozenset(css.iteritems()))
         ans, _ = self.classes.get(h, (None, None))
diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index ad26f91d46..bc8336d768 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -86,6 +86,7 @@ class Convert(object):
         self.framed_map = {}
         self.anchor_map = {}
         self.link_map = defaultdict(list)
+        paras = []
 
         self.log.debug('Converting Word markup to HTML')
         self.read_page_properties(doc)
@@ -94,6 +95,8 @@ class Convert(object):
             if wp.tag.endswith('}p'):
                 p = self.convert_p(wp)
                 self.body.append(p)
+                paras.append(wp)
+        self.styles.apply_contextual_spacing(paras)
 
         notes_header = None
         if self.footnotes.has_notes:
@@ -107,12 +110,16 @@ class Convert(object):
                 dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor))
                 dl[-1][0].tail = ']'
                 dl.append(DD())
+                paras = []
                 for wp in note:
                     if wp.tag.endswith('}tbl'):
                         self.tables.register(wp, self.styles)
                         self.page_map[wp] = self.current_page
-                    p = self.convert_p(wp)
-                    dl[-1].append(p)
+                    else:
+                        p = self.convert_p(wp)
+                        dl[-1].append(p)
+                        paras.append(wp)
+                self.styles.apply_contextual_spacing(paras)
 
         self.resolve_links(relationships_by_id)
 

From ba0639c68105c79c4c70a10e0e486dc88e26a679 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 15 Jun 2013 10:46:03 +0530
Subject: [PATCH 05/25] Move TOC creation into its own module

---
 src/calibre/ebooks/docx/to_html.py | 45 +---------------------
 src/calibre/ebooks/docx/toc.py     | 60 ++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 43 deletions(-)
 create mode 100644 src/calibre/ebooks/docx/toc.py

diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index bc8336d768..23191864ff 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -25,9 +25,8 @@ from calibre.ebooks.docx.tables import Tables
 from calibre.ebooks.docx.footnotes import Footnotes
 from calibre.ebooks.docx.cleanup import cleanup_markup
 from calibre.ebooks.docx.theme import Theme
+from calibre.ebooks.docx.toc import create_toc
 from calibre.ebooks.metadata.opf2 import OPFCreator
-from calibre.ebooks.metadata.toc import TOC
-from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
 from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
 
 class Text:
@@ -267,48 +266,8 @@ class Convert(object):
 
         self.styles.resolve_numbering(numbering)
 
-    def create_toc(self):
-        ' Create a TOC from headings in the document '
-        root = self.body
-        headings = ('h1', 'h2', 'h3')
-        tocroot = TOC()
-        xpaths = [XPath('//%s' % x) for x in headings]
-        level_prev = {i+1:None for i in xrange(len(xpaths))}
-        level_prev[0] = tocroot
-        level_item_map = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)}
-        item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems}
-
-        self.idcount = 0
-
-        def ensure_id(elem):
-            ans = elem.get('id', None)
-            if not ans:
-                self.idcount += 1
-                ans = 'toc_id_%d' % self.idcount
-                elem.set('id', ans)
-            return ans
-
-        for item in descendants(root, *headings):
-            lvl = plvl = item_level_map.get(item, None)
-            if lvl is None:
-                continue
-            parent = None
-            while parent is None:
-                plvl -= 1
-                parent = level_prev[plvl]
-            lvl = plvl + 1
-            elem_id = ensure_id(item)
-            text = elem_to_toc_text(item)
-            toc = parent.add_item('index.html', elem_id, text)
-            level_prev[lvl] = toc
-            for i in xrange(lvl+1, len(xpaths)+1):
-                level_prev[i] = None
-
-        if len(tuple(tocroot.flat())) > 1:
-            return tocroot
-
     def write(self):
-        toc = self.create_toc()
+        toc = create_toc(self.body)
         raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
         with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
             f.write(raw)
diff --git a/src/calibre/ebooks/docx/toc.py b/src/calibre/ebooks/docx/toc.py
new file mode 100644
index 0000000000..8036808701
--- /dev/null
+++ b/src/calibre/ebooks/docx/toc.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from calibre.ebooks.docx.names import XPath, descendants
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
+
+class Count(object):
+
+    __slots__ = ('val',)
+
+    def __init__(self):
+        self.val = 0
+
+def create_toc(body):
+    ' Create a TOC from headings in the document '
+    headings = ('h1', 'h2', 'h3')
+    tocroot = TOC()
+    xpaths = [XPath('//%s' % x) for x in headings]
+    level_prev = {i+1:None for i in xrange(len(xpaths))}
+    level_prev[0] = tocroot
+    level_item_map = {i+1:frozenset(xp(body)) for i, xp in enumerate(xpaths)}
+    item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems}
+
+    idcount = Count()
+
+    def ensure_id(elem):
+        ans = elem.get('id', None)
+        if not ans:
+            idcount.val += 1
+            ans = 'toc_id_%d' % idcount.val
+            elem.set('id', ans)
+        return ans
+
+    for item in descendants(body, *headings):
+        lvl = plvl = item_level_map.get(item, None)
+        if lvl is None:
+            continue
+        parent = None
+        while parent is None:
+            plvl -= 1
+            parent = level_prev[plvl]
+        lvl = plvl + 1
+        elem_id = ensure_id(item)
+        text = elem_to_toc_text(item)
+        toc = parent.add_item('index.html', elem_id, text)
+        level_prev[lvl] = toc
+        for i in xrange(lvl+1, len(xpaths)+1):
+            level_prev[i] = None
+
+    if len(tuple(tocroot.flat())) > 1:
+        return tocroot
+
+
+

From 58233b596cf45754583a25447611f868c83ff005 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 15 Jun 2013 11:49:04 +0530
Subject: [PATCH 06/25] Bulk metadata: focus the Review instead of OK

After a bulk metadata download, focus the review button on the
popup notification, instead of the OK button. Fixes #1190931 [Newly custom added columns keep disappearing on restart](https://bugs.launchpad.net/calibre/+bug/1190931)
---
 src/calibre/gui2/proceed.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/calibre/gui2/proceed.py b/src/calibre/gui2/proceed.py
index 67efe48b53..0dd1a2189d 100644
--- a/src/calibre/gui2/proceed.py
+++ b/src/calibre/gui2/proceed.py
@@ -19,7 +19,7 @@ from calibre.gui2.dialogs.message_box import ViewLog
 Question = namedtuple('Question', 'payload callback cancel_callback '
         'title msg html_log log_viewer_title log_is_file det_msg '
         'show_copy_button checkbox_msg checkbox_checked action_callback '
-        'action_label action_icon')
+        'action_label action_icon focus_action')
 
 class ProceedQuestion(QDialog):
 
@@ -155,13 +155,14 @@ class ProceedQuestion(QDialog):
                 self.checkbox.setChecked(question.checkbox_checked)
             self.do_resize()
             self.show()
-            self.bb.button(self.bb.Yes).setDefault(True)
-            self.bb.button(self.bb.Yes).setFocus(Qt.OtherFocusReason)
+            button = self.action_button if question.focus_action and question.action_callback is not None else self.bb.button(self.bb.Yes)
+            button.setDefault(True)
+            button.setFocus(Qt.OtherFocusReason)
 
     def __call__(self, callback, payload, html_log, log_viewer_title, title,
             msg, det_msg='', show_copy_button=False, cancel_callback=None,
             log_is_file=False, checkbox_msg=None, checkbox_checked=False,
-            action_callback=None, action_label=None, action_icon=None):
+            action_callback=None, action_label=None, action_icon=None, focus_action=False):
         '''
         A non modal popup that notifies the user that a background task has
         been completed. This class guarantees that only a single popup is
@@ -192,13 +193,14 @@ class ProceedQuestion(QDialog):
                                 exactly the same way as callback.
         :param action_label: The text on the action button
         :param action_icon: The icon for the action button, must be a QIcon object or None
+        :param focus_action: If True, the action button will be focused instead of the Yes button
 
         '''
         question = Question(
             payload, callback, cancel_callback, title, msg, html_log,
             log_viewer_title, log_is_file, det_msg, show_copy_button,
             checkbox_msg, checkbox_checked, action_callback, action_label,
-            action_icon)
+            action_icon, focus_action)
         self.questions.append(question)
         self.show_question()
 

From 3abe0800238675001c6280b798b4904b224ca22f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 15 Jun 2013 16:13:42 +0530
Subject: [PATCH 07/25] DOCX Input: Support webHidden

DOCX Input: Hide text that has been marked as not being visible in the
web view in Word.
---
 src/calibre/ebooks/docx/char_styles.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py
index 02b8299c94..c9a2fee4c9 100644
--- a/src/calibre/ebooks/docx/char_styles.py
+++ b/src/calibre/ebooks/docx/char_styles.py
@@ -132,10 +132,10 @@ class RunStyle(object):
 
     all_properties = {
         'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint',
-        'rtl', 'shadow', 'smallCaps', 'strike', 'vanish',
+        'rtl', 'shadow', 'smallCaps', 'strike', 'vanish', 'webHidden',
 
         'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
-        'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family'
+        'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family',
     }
 
     toggle_properties = {
@@ -150,7 +150,7 @@ class RunStyle(object):
         else:
             for p in (
                 'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
-                'smallCaps', 'strike', 'vanish',
+                'smallCaps', 'strike', 'vanish', 'webHidden',
             ):
                 setattr(self, p, binary_property(rPr, p))
 
@@ -210,7 +210,7 @@ class RunStyle(object):
                 c['text-shadow'] = '2px 2px'
             if self.smallCaps is True:
                 c['font-variant'] = 'small-caps'
-            if self.vanish is True:
+            if self.vanish is True or self.webHidden is True:
                 c['display'] = 'none'
 
             self.get_border_css(c)

From e2823644595d1c119164f9c4928efedacb367720 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 15 Jun 2013 17:12:30 +0530
Subject: [PATCH 08/25] ...

---
 src/calibre/ebooks/docx/styles.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py
index 3888496e5a..21f45616fa 100644
--- a/src/calibre/ebooks/docx/styles.py
+++ b/src/calibre/ebooks/docx/styles.py
@@ -397,9 +397,9 @@ class Styles(object):
                 ls = self.resolve_paragraph(last_para)
                 ps = self.resolve_paragraph(p)
                 if ls.linked_style is not None and ls.linked_style == ps.linked_style:
-                    if ls.contextualSpacing:
+                    if ls.contextualSpacing is True:
                         ls.margin_bottom = 0
-                    if ps.contextualSpacing:
+                    if ps.contextualSpacing is True:
                         ps.margin_top = 0
             last_para = p
 

From e444f27de8201a7e07a3c8fe87d27ea366781289 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 15 Jun 2013 17:15:16 +0530
Subject: [PATCH 09/25] Remove incorrect rendering of separator field chars

Unconditionally rendering the separator is wrong, for example, for the
TOC field it causes an extra space at the start of the first entry. In
any case, many common fields are webHidden, so they dont display anyway.
---
 src/calibre/ebooks/docx/to_html.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index 23191864ff..6fd0026874 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -440,8 +440,6 @@ class Convert(object):
                     l.set('class', 'noteref')
                     text.add_elem(l)
                     ans.append(text.elem)
-            elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate':
-                text.buf.append('\xa0')
         if text.buf:
             setattr(text.elem, text.attr, ''.join(text.buf))
 

From 8c261063b484128bc2d4333d9fa7eef1a97a84e2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 15 Jun 2013 18:44:58 +0530
Subject: [PATCH 10/25] DOCX Input: Support for Word created ToC

DOCX Input: Support for Table of Contents created using the Word Table
of Contents tool. calibre now first looks for such a Table of Contents
and only if one is not found does it generate a ToC from headings.
---
 src/calibre/ebooks/docx/to_html.py |  8 +--
 src/calibre/ebooks/docx/toc.py     | 84 +++++++++++++++++++++++++++++-
 2 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index 6fd0026874..c3b2391d3f 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -171,7 +171,7 @@ class Convert(object):
         self.log.debug('Cleaning up redundant markup generated by Word')
         cleanup_markup(self.html, self.styles)
 
-        return self.write()
+        return self.write(doc)
 
     def read_page_properties(self, doc):
         current = []
@@ -266,8 +266,8 @@ class Convert(object):
 
         self.styles.resolve_numbering(numbering)
 
-    def write(self):
-        toc = create_toc(self.body)
+    def write(self, doc):
+        toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map)
         raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
         with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
             f.write(raw)
@@ -367,11 +367,13 @@ class Convert(object):
         return wrapper
 
     def resolve_links(self, relationships_by_id):
+        self.resolved_link_map = {}
         for hyperlink, spans in self.link_map.iteritems():
             span = spans[0]
             if len(spans) > 1:
                 span = self.wrap_elems(spans, SPAN())
             span.tag = 'a'
+            self.resolved_link_map[hyperlink] = span
             tgt = get(hyperlink, 'w:tgtFrame')
             if tgt:
                 span.set('target', tgt)
diff --git a/src/calibre/ebooks/docx/toc.py b/src/calibre/ebooks/docx/toc.py
index 8036808701..5936d34355 100644
--- a/src/calibre/ebooks/docx/toc.py
+++ b/src/calibre/ebooks/docx/toc.py
@@ -6,7 +6,11 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 
-from calibre.ebooks.docx.names import XPath, descendants
+from collections import namedtuple
+
+from lxml.etree import tostring
+
+from calibre.ebooks.docx.names import XPath, descendants, get, ancestor
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
 
@@ -17,7 +21,7 @@ class Count(object):
     def __init__(self):
         self.val = 0
 
-def create_toc(body):
+def from_headings(body):
     ' Create a TOC from headings in the document '
     headings = ('h1', 'h2', 'h3')
     tocroot = TOC()
@@ -56,5 +60,81 @@ def create_toc(body):
     if len(tuple(tocroot.flat())) > 1:
         return tocroot
 
+def structure_toc(entries):
+    indent_vals = sorted({x.indent for x in entries})
+    last_found = [None for i in indent_vals]
+    newtoc = TOC()
+
+    if len(indent_vals) > 6:
+        for x in entries:
+            newtoc.add_item('index.html', x.anchor, x.text)
+        return newtoc
+
+    def find_parent(level):
+        candidates = last_found[:level]
+        for x in reversed(candidates):
+            if x is not None:
+                return x
+        return newtoc
+
+    for item in entries:
+        level = indent_vals.index(item.indent)
+        parent = find_parent(level)
+        last_found[level] = parent.add_item('index.html', item.anchor,
+                    item.text)
+        for i in xrange(level+1, len(last_found)):
+            last_found[i] = None
+
+    return newtoc
+
+def link_to_txt(a, styles, object_map):
+    if len(a) > 1:
+        for child in a:
+            run = object_map.get(child, None)
+            if run is not None:
+                rs = styles.resolve(run)
+                if rs.css.get('display', None) == 'none':
+                    a.remove(child)
+
+    return tostring(a, method='text', with_tail=False, encoding=unicode).strip()
+
+def from_toc(docx, link_map, styles, object_map):
+    toc_level = None
+    level = 0
+    TI = namedtuple('TI', 'text anchor indent')
+    toc = []
+    for tag in XPath('//*[(@w:fldCharType and name()="w:fldChar") or name()="w:hyperlink" or name()="w:instrText"]')(docx):
+        n = tag.tag.rpartition('}')[-1]
+        if n == 'fldChar':
+            t = get(tag, 'w:fldCharType')
+            if t == 'begin':
+                level += 1
+            elif t == 'end':
+                level -= 1
+                if toc_level is not None and level < toc_level:
+                    break
+        elif n == 'instrText':
+            if level > 0 and tag.text and tag.text.strip().startswith('TOC '):
+                toc_level = level
+        elif n == 'hyperlink':
+            if toc_level is not None and level >= toc_level and tag in link_map:
+                a = link_map[tag]
+                href = a.get('href', None)
+                txt = link_to_txt(a, styles, object_map)
+                p = ancestor(tag, 'w:p')
+                if txt and href and p is not None:
+                    ps = styles.resolve_paragraph(p)
+                    try:
+                        ml = int(ps.margin_left[:-2])
+                    except (TypeError, ValueError, AttributeError):
+                        ml = 0
+                    if ps.text_align in {'center', 'right'}:
+                        ml = 0
+                    toc.append(TI(txt, href[1:], ml))
+    if toc:
+        return structure_toc(toc)
+
+def create_toc(docx, body, link_map, styles, object_map):
+    return from_toc(docx, link_map, styles, object_map) or from_headings(body)
 
 
From ca3573b6cbe29c0a1fe82e609075f5ca314c4258 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 15 Jun 2013 21:13:32 +0530
Subject: [PATCH 11/25] E-book viewer: Make bookmark button instant popup

E-book viewer: Change the bookmark button to always popup a menu when
clicked, makes accessing existing bookmarks easier.
---
 src/calibre/gui2/viewer/main.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py
index 3b63d51c15..113e1201e2 100644
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@@ -82,7 +82,8 @@ class History(list):
             return None
         item = self[self.forward_pos]
         self.back_pos = self.forward_pos - 1
-        if self.back_pos < 0: self.back_pos = None
+        if self.back_pos < 0:
+            self.back_pos = None
         self.insert_pos = self.back_pos or 0
         self.forward_pos = None if self.forward_pos > len(self) - 2 else self.forward_pos + 1
         self.set_actions()
@@ -268,7 +269,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
                     self.action_full_screen.shortcuts()]))
         self.action_back.triggered[bool].connect(self.back)
         self.action_forward.triggered[bool].connect(self.forward)
-        self.action_bookmark.triggered[bool].connect(self.bookmark)
         self.action_preferences.triggered.connect(self.do_config)
         self.pos.editingFinished.connect(self.goto_page_num)
         self.vertical_scrollbar.valueChanged[int].connect(lambda
@@ -294,7 +294,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
         self.toc.setCursor(Qt.PointingHandCursor)
         self.tool_bar.setContextMenuPolicy(Qt.PreventContextMenu)
         self.tool_bar2.setContextMenuPolicy(Qt.PreventContextMenu)
-        self.tool_bar.widgetForAction(self.action_bookmark).setPopupMode(QToolButton.MenuButtonPopup)
+        self.tool_bar.widgetForAction(self.action_bookmark).setPopupMode(QToolButton.InstantPopup)
         self.action_full_screen.setCheckable(True)
         self.full_screen_label = QLabel('''
                 <center>
@@ -394,7 +394,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
         self.action_toggle_paged_mode.setToolTip(self.FLOW_MODE_TT if
                 self.action_toggle_paged_mode.isChecked() else
                 self.PAGED_MODE_TT)
-        if at_start: return
+        if at_start:
+            return
         self.reload()
 
     def settings_changed(self):
@@ -486,8 +487,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
                 at_start=True)
 
     def lookup(self, word):
-        self.dictionary_view.setHtml('<html><body><p>'+ \
-            _('Connecting to dict.org to lookup: <b>%s</b>&hellip;')%word + \
+        self.dictionary_view.setHtml('<html><body><p>'+
+            _('Connecting to dict.org to lookup: <b>%s</b>&hellip;')%word +
             '</p></body></html>')
         self.dictionary_box.show()
         self._lookup = Lookup(word, parent=self)
@@ -964,6 +965,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
 
     def set_bookmarks(self, bookmarks):
         self.bookmarks_menu.clear()
+        self.bookmarks_menu.addAction(_("Bookmark this location"), self.bookmark)
         self.bookmarks_menu.addAction(_("Manage Bookmarks"), self.manage_bookmarks)
         self.bookmarks_menu.addSeparator()
         current_page = None
@@ -1202,3 +1204,4 @@ def main(args=sys.argv):
 
 if __name__ == '__main__':
     sys.exit(main())
+

From b63e0df73fc45dc35ebbdd1b64ef6b01951ac082 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 16 Jun 2013 08:31:05 +0530
Subject: [PATCH 12/25] DOCX Input: Fix image name generation

DOCX Input: When converting docx files with large numbers of unnamed
images, do not crash on windows. Fixes #1191354 [Word docx conversion](https://bugs.launchpad.net/calibre/+bug/1191354)
---
 src/calibre/ebooks/docx/images.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/docx/images.py b/src/calibre/ebooks/docx/images.py
index 76f43e7e0c..e24b550797 100644
--- a/src/calibre/ebooks/docx/images.py
+++ b/src/calibre/ebooks/docx/images.py
@@ -112,15 +112,16 @@ class Images(object):
         base += '.' + ext
         exists = frozenset(self.used.itervalues())
         c = 1
-        while base in exists:
+        name = base
+        while name in exists:
             n, e = base.rpartition('.')[0::2]
-            base = '%s-%d.%s' % (n, c, e)
+            name = '%s-%d.%s' % (n, c, e)
             c += 1
-        self.used[rid] = base
-        with open(os.path.join(self.dest_dir, base), 'wb') as f:
+        self.used[rid] = name
+        with open(os.path.join(self.dest_dir, name), 'wb') as f:
             f.write(raw)
-        self.all_images.add('images/' + base)
-        return base
+        self.all_images.add('images/' + name)
+        return name
 
     def pic_to_img(self, pic, alt=None):
         name = None

From 1bada2b35b6fd59c2afcb86efaae30f028168375 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 16 Jun 2013 09:49:50 +0530
Subject: [PATCH 13/25] Get Books: Fix error when using internal browser

Get Books: Fix error when using internal browser on some systems. I
cannot replicate this error so my fix is speculative, based on the idea
that not keeping explicit references to the python object is causing
them to be garbage collected. Fixes #1191199 [Python exception downloading books from Barnes & Noble](https://bugs.launchpad.net/calibre/+bug/1191199)
---
 src/calibre/gui2/store/web_control.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/store/web_control.py b/src/calibre/gui2/store/web_control.py
index 48e1b7dff0..8318ae9078 100644
--- a/src/calibre/gui2/store/web_control.py
+++ b/src/calibre/gui2/store/web_control.py
@@ -24,8 +24,10 @@ class NPWebView(QWebView):
         self.gui = None
         self.tags = ''
 
-        self.setPage(NPWebPage())
-        self.page().networkAccessManager().setCookieJar(QNetworkCookieJar())
+        self._page = NPWebPage()
+        self.setPage(self._page)
+        self.cookie_jar = QNetworkCookieJar()
+        self.page().networkAccessManager().setCookieJar(self.cookie_jar)
 
         http_proxy = get_proxies().get('http', None)
         if http_proxy:

From 7a0675e59a94f1159eaf9d13dfa862a49cd58a1a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 17 Jun 2013 09:29:56 +0530
Subject: [PATCH 14/25] Fix PIL import

---
 src/calibre/ebooks/textile/functions.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py
index 45e73cfe8f..7380b10d1f 100755
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@@ -86,7 +86,14 @@ def getimagesize(url):
     """
 
     try:
-        import ImageFile
+        from PIL import ImageFile
+    except ImportError:
+        try:
+            import ImageFile
+        except ImportError:
+            return None
+
+    try:
         import urllib2
     except ImportError:
         return None
@@ -220,7 +227,7 @@ class Textile(object):
 
         (re.compile(r'{(S\^|\^S)}'),     r'&#348;'),   #  S-circumflex
         (re.compile(r'{(s\^|\^s)}'),     r'&#349;'),   #  s-circumflex
-        
+
         (re.compile(r'{(S\ˇ|\ˇS)}'),     r'&#352;'),   #  S-caron
         (re.compile(r'{(s\ˇ|\ˇs)}'),     r'&#353;'),   #  s-caron
         (re.compile(r'{(T\ˇ|\ˇT)}'),     r'&#356;'),   #  T-caron
@@ -229,7 +236,7 @@ class Textile(object):
         (re.compile(r'{(u\°|\°u)}'),     r'&#367;'),   #  u-ring
         (re.compile(r'{(Z\ˇ|\ˇZ)}'),     r'&#381;'),   #  Z-caron
         (re.compile(r'{(z\ˇ|\ˇz)}'),     r'&#382;'),   #  z-caron
-        
+
         (re.compile(r'{\*}'),            r'&#8226;'),  #  bullet
         (re.compile(r'{Fr}'),            r'&#8355;'),  #  Franc
         (re.compile(r'{(L=|=L)}'),       r'&#8356;'),  #  Lira
@@ -245,7 +252,7 @@ class Textile(object):
         (re.compile(r"{(’|'/|/')}"),     r'&#8217;'),  #  closing-single-quote - apostrophe
         (re.compile(r"{(‘|\\'|'\\)}"),   r'&#8216;'),  #  opening-single-quote
         (re.compile(r'{(”|"/|/")}'),     r'&#8221;'),  #  closing-double-quote
-        (re.compile(r'{(“|\\"|"\\)}'),   r'&#8220;'),  #  opening-double-quote        
+        (re.compile(r'{(“|\\"|"\\)}'),   r'&#8220;'),  #  opening-double-quote
     ]
     glyph_defaults = [
         (re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'),                   r'\1\2&#215;\3'),                       #  dimension sign

From 752bd9e06ea7df02296fd2c563ce1e31e8da46ff Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 17 Jun 2013 11:03:15 +0530
Subject: [PATCH 15/25] DOCX Input: Detect likely cover image

DOCX Input: If a large image that looks like a cover is present at the
start of the document, remove it and use it as the cover of the output
ebook. This can be turned off under the DOC Input section of the
conversion dialog.
---
 .../ebooks/conversion/plugins/docx_input.py   | 10 ++++-
 src/calibre/ebooks/docx/cleanup.py            | 33 ++++++++++++++-
 src/calibre/ebooks/docx/to_html.py            |  7 +++-
 src/calibre/gui2/convert/docx_input.py        | 23 +++++++++++
 src/calibre/gui2/convert/docx_input.ui        | 41 +++++++++++++++++++
 5 files changed, 110 insertions(+), 4 deletions(-)
 create mode 100644 src/calibre/gui2/convert/docx_input.py
 create mode 100644 src/calibre/gui2/convert/docx_input.ui

diff --git a/src/calibre/ebooks/conversion/plugins/docx_input.py b/src/calibre/ebooks/conversion/plugins/docx_input.py
index 7492d46c68..190a771379 100644
--- a/src/calibre/ebooks/conversion/plugins/docx_input.py
+++ b/src/calibre/ebooks/conversion/plugins/docx_input.py
@@ -14,9 +14,17 @@ class DOCXInput(InputFormatPlugin):
     description = 'Convert DOCX files (.docx) to HTML'
     file_types = set(['docx'])
 
+    options = {
+        OptionRecommendation(name='docx_no_cover', recommended_value=False,
+            help=_('Normally, if a large image is present at the start of the document that looks like a cover, '
+                   'it will be removed from the document and used as the cover for created ebook. This option '
+                   'turns off that behavior.')),
+
+    }
+
     recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
 
     def convert(self, stream, options, file_ext, log, accelerators):
         from calibre.ebooks.docx.to_html import Convert
-        return Convert(stream, log=log)()
+        return Convert(stream, detect_cover=not options.docx_no_cover, log=log)()
 
diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py
index 2b1e095025..a55f8449d8 100644
--- a/src/calibre/ebooks/docx/cleanup.py
+++ b/src/calibre/ebooks/docx/cleanup.py
@@ -6,6 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 
+import os
 
 def mergeable(previous, current):
     if previous.tail or current.tail:
@@ -83,8 +84,19 @@ def lift(span):
         else:
             add_text(last_child, 'tail', span.tail)
 
+def before_count(root, tag, limit=10):
+    body = root.xpath('//body[1]')
+    if not body:
+        return limit
+    ans = 0
+    for elem in body[0].iterdescendants():
+        if elem is tag:
+            return ans
+        ans += 1
+        if ans > limit:
+            return limit
 
-def cleanup_markup(root, styles):
+def cleanup_markup(log, root, styles, dest_dir, detect_cover):
     # Merge consecutive spans that have the same styling
     current_run = []
     for span in root.xpath('//span'):
@@ -134,3 +146,22 @@ def cleanup_markup(root, styles):
     for span in root.xpath('//span[not(@class) and not(@id)]'):
         lift(span)
 
+    if detect_cover:
+        # Check if the first image in the document is possibly a cover
+        img = root.xpath('//img[@src][1]')
+        if img:
+            img = img[0]
+            path = os.path.join(dest_dir, img.get('src'))
+            if os.path.exists(path) and before_count(root, img, limit=10) < 5:
+                from calibre.utils.magick.draw import identify
+                try:
+                    width, height, fmt = identify(path)
+                except:
+                    width, height, fmt = 0, 0, None
+                is_cover = 0.8 <= height/width <= 1.8 and height*width >= 160000
+                if is_cover:
+                    log.debug('Detected an image that looks like a cover')
+                    img.getparent().remove(img)
+                    return path
+
+
diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index c3b2391d3f..963d1fc6c8 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -40,11 +40,12 @@ class Text:
 
 class Convert(object):
 
-    def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None):
+    def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None):
         self.docx = DOCX(path_or_stream, log=log)
         self.ms_pat = re.compile(r'\s{2,}')
         self.ws_pat = re.compile(r'[\n\r\t]')
         self.log = self.docx.log
+        self.detect_cover = detect_cover
         self.notes_text = notes_text or _('Notes')
         self.dest_dir = dest_dir or os.getcwdu()
         self.mi = self.docx.metadata
@@ -169,7 +170,7 @@ class Convert(object):
                 break
 
         self.log.debug('Cleaning up redundant markup generated by Word')
-        cleanup_markup(self.html, self.styles)
+        self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover)
 
         return self.write(doc)
 
@@ -280,6 +281,8 @@ class Convert(object):
         opf.toc = toc
         opf.create_manifest_from_files_in([self.dest_dir])
         opf.create_spine(['index.html'])
+        if self.cover_image is not None:
+            opf.guide.set_cover(self.cover_image)
         with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx:
             opf.render(of, ncx, 'toc.ncx')
         return os.path.join(self.dest_dir, 'metadata.opf')
diff --git a/src/calibre/gui2/convert/docx_input.py b/src/calibre/gui2/convert/docx_input.py
new file mode 100644
index 0000000000..46234c6a36
--- /dev/null
+++ b/src/calibre/gui2/convert/docx_input.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from calibre.gui2.convert.docx_input_ui import Ui_Form
+from calibre.gui2.convert import Widget
+
+class PluginWidget(Widget, Ui_Form):
+
+    TITLE = _('DOCX Input')
+    HELP = _('Options specific to')+' DOCX '+_('input')
+    COMMIT_NAME = 'docx_input'
+    ICON = I('mimetypes/docx.png')
+
+    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
+        Widget.__init__(self, parent,
+            ['docx_no_cover', ])
+        self.initialize_options(get_option, get_help, db, book_id)
+
diff --git a/src/calibre/gui2/convert/docx_input.ui b/src/calibre/gui2/convert/docx_input.ui
new file mode 100644
index 0000000000..41948118dc
--- /dev/null
+++ b/src/calibre/gui2/convert/docx_input.ui
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Form</class>
+ <widget class="QWidget" name="Form">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>518</width>
+    <height>353</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>Form</string>
+  </property>
+  <layout class="QVBoxLayout" name="verticalLayout_3">
+   <item>
+    <widget class="QCheckBox" name="opt_docx_no_cover">
+     <property name="text">
+      <string>Do not try to autodetect a &amp;cover from images in the document</string>
+     </property>
+    </widget>
+   </item>
+   <item>
+    <spacer name="verticalSpacer">
+     <property name="orientation">
+      <enum>Qt::Vertical</enum>
+     </property>
+     <property name="sizeHint" stdset="0">
+      <size>
+       <width>20</width>
+       <height>213</height>
+      </size>
+     </property>
+    </spacer>
+   </item>
+  </layout>
+ </widget>
+ <resources/>
+ <connections/>
+</ui>

From 1e5ce66ca36bbc16c479e0da0e801329a22c6387 Mon Sep 17 00:00:00 2001
From: fenuks <fenuks@gmail.com>
Date: Mon, 17 Jun 2013 09:45:13 +0200
Subject: [PATCH 16/25] various minor fixes

---
 recipes/ekologia_pl.recipe    |  4 ++-
 recipes/gildia_pl.recipe      | 59 ++++++++++++++++++++---------------
 recipes/media2.recipe         | 36 ++++++++++-----------
 recipes/nauka_w_polsce.recipe |  2 +-
 recipes/polter_pl.recipe      |  2 +-
 recipes/ppe_pl.recipe         | 46 ++++++++++++---------------
 recipes/pure_pc.recipe        | 17 ++++++----
 7 files changed, 85 insertions(+), 81 deletions(-)

diff --git a/recipes/ekologia_pl.recipe b/recipes/ekologia_pl.recipe
index e925ebad6f..c053e6d5bc 100644
--- a/recipes/ekologia_pl.recipe
+++ b/recipes/ekologia_pl.recipe
@@ -9,13 +9,15 @@ class EkologiaPl(BasicNewsRecipe):
     language       = 'pl'
     cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png'
     ignore_duplicate_articles = {'title', 'url'}
-    extra_css = '.title {font-size: 200%;} .imagePowiazane, .imgCon {float:left; margin-right:5px;}'
+    extra_css = '.title {font-size: 200%;} .imagePowiazane {float:left; margin-right:5px; width: 200px;}'
     oldest_article = 7
     max_articles_per_feed = 100
     no_stylesheets = True
     remove_empty_feeds = True
+    remove_javascript = True
     use_embedded_content = False
     remove_attrs = ['style']
+    keep_only_tags = [dict(attrs={'class':'contentParent'})]
     remove_tags = [dict(attrs={'class':['ekoLogo', 'powrocArt', 'butonDrukuj', 'widget-social-buttons']})]
 
     feeds          = [(u'Wiadomo\u015bci', u'http://www.ekologia.pl/rss/20,53,0'), (u'\u015arodowisko', u'http://www.ekologia.pl/rss/20,56,0'), (u'Styl \u017cycia', u'http://www.ekologia.pl/rss/20,55,0')]
diff --git a/recipes/gildia_pl.recipe b/recipes/gildia_pl.recipe
index 37c129aaa1..513bbe44d6 100644
--- a/recipes/gildia_pl.recipe
+++ b/recipes/gildia_pl.recipe
@@ -16,40 +16,47 @@ class Gildia(BasicNewsRecipe):
     ignore_duplicate_articles = {'title', 'url'}
     preprocess_regexps = [(re.compile(ur'</?sup>'), lambda match: '') ]
     ignore_duplicate_articles = {'title', 'url'}
-    remove_tags = [dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
-    keep_only_tags = dict(name='div', attrs={'class':'widetext'})
-    feeds          = [(u'Gry', u'http://www.gry.gildia.pl/rss'), (u'Literatura', u'http://www.literatura.gildia.pl/rss'), (u'Film', u'http://www.film.gildia.pl/rss'), (u'Horror', u'http://www.horror.gildia.pl/rss'), (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), (u'Techno', u'http://www.techno.gildia.pl/rss'), (u'Historia', u'http://www.historia.gildia.pl/rss'), (u'Magia', u'http://www.magia.gildia.pl/rss'), (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), (u'RPG', u'http://www.rpg.gildia.pl/rss'), (u'LARP', u'http://www.larp.gildia.pl/rss'), (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), (u'Nauka', u'http://www.nauka.gildia.pl/rss')]
-
+    remove_tags = [dict(name='div', attrs={'class':['backlink', 'im_img', 'addthis_toolbox addthis_default_style', 'banner-bottom']})]
+    keep_only_tags = [dict(name='div', attrs={'class':'widetext'})]
+    feeds          = [(u'Gry', u'http://www.gry.gildia.pl/rss'),
+                        (u'Literatura', u'http://www.literatura.gildia.pl/rss'),
+                        (u'Film', u'http://www.film.gildia.pl/rss'),
+                        (u'Horror', u'http://www.horror.gildia.pl/rss'),
+                        (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'),
+                        (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'),
+                        (u'Manga i anime', u'http://www.manga.gildia.pl/rss'),
+                        (u'Star Wars', u'http://www.starwars.gildia.pl/rss'),
+                        (u'Techno', u'http://www.techno.gildia.pl/rss'),
+                        (u'Historia', u'http://www.historia.gildia.pl/rss'),
+                        (u'Magia', u'http://www.magia.gildia.pl/rss'),
+                        (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'),
+                        (u'RPG', u'http://www.rpg.gildia.pl/rss'),
+                        (u'LARP', u'http://www.larp.gildia.pl/rss'),
+                        (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'),
+                        (u'Nauka', u'http://www.nauka.gildia.pl/rss'),
+                    ]
 
     def skip_ad_pages(self, soup):
         content = soup.find('div', attrs={'class':'news'})
-        if 'recenzj' in soup.title.string.lower():
-            for link in content.findAll(name='a'):
-                if 'recenzj' in link['href'] or 'muzyka/plyty' in link['href']:
-                    return self.index_to_soup(link['href'], raw=True)
-        if 'fragmen' in soup.title.string.lower():
-            for link in content.findAll(name='a'):
-                if 'fragment' in link['href']:
-                    return self.index_to_soup(link['href'], raw=True)
-        if 'relacj' in soup.title.string.lower():
-            for link in content.findAll(name='a'):
-                if 'relacj' in link['href']:
-                    return self.index_to_soup(link['href'], raw=True)
-        if 'wywiad' in soup.title.string.lower():
-            for link in content.findAll(name='a'):
-                if 'wywiad' in link['href']:
-                    return self.index_to_soup(link['href'], raw=True)
-
+        words = ('recenzj', 'zapowied','fragmen', 'relacj', 'wywiad', 'nominacj')
+        for word in words:
+            if word in soup.title.string.lower():
+                for link in content.findAll(name='a'):
+                    if word in link['href'] or (link.string and word in link.string):
+                        return self.index_to_soup(link['href'], raw=True)
+        for tag in content.findAll(name='a', href=re.compile('/publicystyka/')):
+            if 'Wi&#281;cej...' == tag.string:
+                return self.index_to_soup(tag['href'], raw=True)
 
     def preprocess_html(self, soup):
         for a in soup('a'):
             if a.has_key('href') and not a['href'].startswith('http'):
                 if '/gry/' in a['href']:
-                    a['href']='http://www.gry.gildia.pl' + a['href']
+                    a['href'] = 'http://www.gry.gildia.pl' + a['href']
                 elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower():
-                    a['href']='http://www.literatura.gildia.pl' + a['href']
+                    a['href'] = 'http://www.literatura.gildia.pl' + a['href']
                 elif u'komiks' in soup.title.string.lower():
-                    a['href']='http://www.literatura.gildia.pl' + a['href']
+                    a['href'] = 'http://www.literatura.gildia.pl' + a['href']
                 else:
-                    a['href']='http://www.gildia.pl' + a['href']
-        return soup
+                    a['href'] = 'http://www.gildia.pl' + a['href']
+        return soup
\ No newline at end of file
diff --git a/recipes/media2.recipe b/recipes/media2.recipe
index 135740a62e..d685a90803 100644
--- a/recipes/media2.recipe
+++ b/recipes/media2.recipe
@@ -3,33 +3,29 @@
 __license__ = 'GPL v3'
 __copyright__ = 'teepel'
 
-'''
-media2.pl
-'''
-
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class media2_pl(BasicNewsRecipe):
     title = u'Media2'
     __author__ = 'teepel <teepel44@gmail.com>'
     language = 'pl'
-    description =u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.'
-    masthead_url='http://media2.pl/res/logo/www.png'
-    remove_empty_feeds= True
-    oldest_article = 1
+    description = u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.'
+    masthead_url = 'http://media2.pl/res/logo/www.png'
+    cover_url = 'http://media2.pl/res/logo/www.png'
+    remove_empty_feeds = True
+    oldest_article = 7
     max_articles_per_feed = 100
-    remove_javascript=True
-    no_stylesheets=True
-    simultaneous_downloads = 5
-
+    remove_javascript = True
+    no_stylesheets = True
+    remove_attributes = ['style']
+    ignore_duplicate_articles = {'title', 'url'}
     extra_css = '''.news-lead{font-weight: bold; }'''
 
-    keep_only_tags =[]
-    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-item tpl-big'}))
+    keep_only_tags = [dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})]
+    remove_tags = [dict(name = 'span', attrs = {'class' : 'news-comments'}), dict(name = 'div', attrs = {'class' : 'item-sidebar'}), dict(name = 'div', attrs = {'class' : 'news-tags'})]
 
-    remove_tags =[]
-    remove_tags.append(dict(name = 'span', attrs = {'class' : 'news-comments'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'item-sidebar'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-tags'}))
-
-    feeds          = [(u'Media2', u'http://feeds.feedburner.com/media2')]
+    feeds = [(u'Media2', u'http://feeds.feedburner.com/media2'), (u'Internet', u'http://feeds.feedburner.com/media2/internet'),
+            (u'Media', 'http://feeds.feedburner.com/media2/media'), (u'Telekomunikacja', 'http://feeds.feedburner.com/media2/telekomunikacja'),
+            (u'Reklama/PR', 'http://feeds.feedburner.com/media2/reklama-pr'), (u'Technologie', 'http://feeds.feedburner.com/media2/technologie'),
+            (u'Badania', 'http://feeds.feedburner.com/media2/badania')
+            ]
\ No newline at end of file
diff --git a/recipes/nauka_w_polsce.recipe b/recipes/nauka_w_polsce.recipe
index 715780d162..2a44aa7e84 100644
--- a/recipes/nauka_w_polsce.recipe
+++ b/recipes/nauka_w_polsce.recipe
@@ -1,7 +1,7 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class NaukawPolsce(BasicNewsRecipe):
-    title = u'Nauka w Polsce'
+    title = u'PAP Nauka w Polsce'
     __author__ = 'fenuks'
     description = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak: osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.'
     category = 'science'
diff --git a/recipes/polter_pl.recipe b/recipes/polter_pl.recipe
index 1f9cef3be3..aea21dca9c 100644
--- a/recipes/polter_pl.recipe
+++ b/recipes/polter_pl.recipe
@@ -3,7 +3,7 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class Poltergeist(BasicNewsRecipe):
-    title          = u'Poltergeist'
+    title          = u'Polter.pl'
     __author__        = 'fenuks'
     description   = u'Największy polski serwis poświęcony ogólno pojętej fantastyce - grom fabularnym (RPG), książkom, filmowi, komiksowi, grom planszowym, karcianym i bitewnym.'
     category       = 'fantasy, books, rpg, games'
diff --git a/recipes/ppe_pl.recipe b/recipes/ppe_pl.recipe
index 2edc611ad7..597c9ef2d3 100644
--- a/recipes/ppe_pl.recipe
+++ b/recipes/ppe_pl.recipe
@@ -1,41 +1,35 @@
 #!/usr/bin/env  python
 
 __license__ = 'GPL v3'
-
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class ppeRecipe(BasicNewsRecipe):
     __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
     language = 'pl'
-
     title = u'ppe.pl'
     category = u'News'
     description = u'Portal o konsolach i grach wideo.'
-    cover_url=''
-    remove_empty_feeds= True
-    no_stylesheets=True
-    oldest_article = 1
-    max_articles_per_feed = 100000
-    recursions = 0
+    extra_css = '.categories > li {list-style: none; display: inline;} .galmini > li {list-style: none; float: left;} .calibre_navbar {clear: both;}'
+    remove_empty_feeds = True
     no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
     remove_javascript = True
-    simultaneous_downloads = 2
+    remove_empty_feeds = True
+    remove_attributes = ['style']
+    
+    keep_only_tags = [dict(attrs={'class':'box'})]
+    remove_tags = [dict(attrs={'class':['voltage-1', 'voltage-2', 'encyklopedia', 'nag', 'related', 'comment_form', 'komentarze-box']})]
 
-    keep_only_tags =[]
-    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-heading'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'tresc-poziom'}))
+    feeds = [
+            ('Newsy', 'http://ppe.pl/rss.html'),
+            ('Recenzje', 'http://ppe.pl/rss-recenzje.html'),
+            ('Publicystyka', 'http://ppe.pl/rss-publicystyka.html'),
+            ]
 
-    remove_tags =[]
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria1'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria2'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria3'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-photo'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'fbl'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'info'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'links'}))
-
-    remove_tags.append(dict(name = 'div', attrs = {'style' : 'padding: 4px'}))
-
-    feeds          = [
-                            ('Newsy', 'feed://ppe.pl/rss/rss.xml'),
-                           ]
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.ppe.pl/psx_extreme.html')
+        part = soup.find(attrs={'class':'archiwum-foto'})['style']
+        part = re.search("'(.+)'", part).group(1).replace('_min', '')
+        return 'http://www.ppe.pl' + part
diff --git a/recipes/pure_pc.recipe b/recipes/pure_pc.recipe
index 13d9307a09..167136c90f 100644
--- a/recipes/pure_pc.recipe
+++ b/recipes/pure_pc.recipe
@@ -1,3 +1,4 @@
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Comment
 
@@ -11,6 +12,7 @@ class PurePC(BasicNewsRecipe):
     language       = 'pl'
     masthead_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
     cover_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
+    extra_css = '.wykres_logo {float: left; margin-right: 5px;}'
     no_stylesheets = True
     keep_only_tags= [dict(id='content')]
     remove_tags_after= dict(attrs={'class':'fivestar-widget'})
@@ -19,11 +21,14 @@ class PurePC(BasicNewsRecipe):
 
 
     def append_page(self, soup, appendtag):
-        nexturl= appendtag.find(attrs={'class':'pager-next'})
-        if nexturl:
-            while nexturl:
-                soup2 = self.index_to_soup('http://www.purepc.pl'+ nexturl.a['href'])
-                nexturl=soup2.find(attrs={'class':'pager-next'})
+        lasturl = appendtag.find(attrs={'class':'pager-last'})
+        if lasturl:
+            regex = re.search('(.+?2C)(\d+)', lasturl.a['href'])
+            baseurl = regex.group(1).replace('?page=0%2C', '?page=1%2C')
+            baseurl = 'http://www.purepc.pl' + baseurl
+            nr = int(regex.group(2))
+            for page_nr in range(1, nr+1):
+                soup2 = self.index_to_soup(baseurl+str(page_nr))
                 pagetext = soup2.find(attrs={'class':'article'})
                 pos = len(appendtag.contents)
                 appendtag.insert(pos, pagetext)
@@ -35,4 +40,4 @@ class PurePC(BasicNewsRecipe):
 
     def preprocess_html(self, soup):
         self.append_page(soup, soup.body)
-        return soup
+        return soup
\ No newline at end of file

From b08854e60acb47bd8b78894801c194c2f3e47ee7 Mon Sep 17 00:00:00 2001
From: fenuks <fenuks@gmail.com>
Date: Mon, 17 Jun 2013 09:53:53 +0200
Subject: [PATCH 17/25] new Polish news sources

---
 recipes/cdrinfo_pl.recipe          | 65 ++++++++++++++++++++++
 recipes/gazeta_pl_bydgoszcz.recipe | 88 ++++++++++++++++++++++++++++++
 2 files changed, 153 insertions(+)
 create mode 100644 recipes/cdrinfo_pl.recipe
 create mode 100644 recipes/gazeta_pl_bydgoszcz.recipe

diff --git a/recipes/cdrinfo_pl.recipe b/recipes/cdrinfo_pl.recipe
new file mode 100644
index 0000000000..2a8b3b9a2e
--- /dev/null
+++ b/recipes/cdrinfo_pl.recipe
@@ -0,0 +1,65 @@
+__license__ = 'GPL v3'
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Comment
+class cdrinfo(BasicNewsRecipe):
+    title          = u'CDRinfo.pl'
+    __author__        = 'fenuks'
+    description   = u'Serwis poświęcony archiwizacji danych. Testy i recenzje nagrywarek. Programy do nagrywania płyt.  Dyski twarde, dyski SSD i serwery sieciowe NAS. Rankingi dyskow twardych, najszybsze dyski twarde, newsy, artykuły, testy, recenzje, porady, oprogramowanie. Zestawienie nagrywarek, najnowsze biosy do nagrywarek, programy dla dysków twardych.'
+    category       = 'it, hardware'
+    #publication_type = ''
+    language       = 'pl'
+    #encoding = ''
+    #extra_css = ''
+    cover_url = 'http://www.cdrinfo.pl/gfx/graph3/top.jpg'
+    #masthead_url = ''
+    use_embedded_content = False
+    oldest_article = 777
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    remove_javascript = True
+    remove_attributes = ['style']
+    preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com</a>\.</p>', re.DOTALL), lambda match: '')]
+    ignore_duplicate_articles = {'title', 'url'}
+
+    keep_only_tags = [dict(name='input', attrs={'name':'ref'}), dict(id='text')]
+    remove_tags = [dict(attrs={'class':['navigation', 'sociable']}), dict(name='hr'), dict(id='respond')]
+    remove_tags_after = dict(id='artnawigacja')
+    feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'), (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'),
+            (u'Konsole', 'http://konsole.cdrinfo.pl/rss/rss_konsole_news.xml'),
+            (u'Pliki', 'http://www.cdrinfo.pl/rss/rss_pliki.xml')
+            ]
+
+    def preprocess_html(self, soup):
+        if soup.find(id='artnawigacja'):
+            self.append_page(soup, soup.body)
+        return soup
+        
+    def append_page(self, soup, appendtag):
+        baseurl = 'http://cdrinfo.pl' + soup.find(name='input', attrs={'name':'ref'})['value'] + '/'
+        if baseurl[-2] == '/':
+            baseurl = baseurl[:-1]
+        tag = soup.find(id='artnawigacja')
+        div = tag.find('div', attrs={'align':'right'})
+        while div:
+            counter = 0
+            while counter < 5:
+                try:
+                    soup2 = self.index_to_soup(baseurl+div.a['href'])
+                    break
+                except:
+                    counter += 1
+            tag2 = soup2.find(id='artnawigacja')
+            div = tag2.find('div', attrs={'align':'right'})
+            pagetext = soup2.find(attrs={'class':'art'})
+            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
+            for comment in comments:
+                comment.extract()
+            for r in soup2.findAll(attrs={'class':'star-rating'}):
+                r.extract()
+            for r in soup2.findAll(attrs={'class':'star-rating2'}):
+                r.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        tag.extract()
\ No newline at end of file
diff --git a/recipes/gazeta_pl_bydgoszcz.recipe b/recipes/gazeta_pl_bydgoszcz.recipe
new file mode 100644
index 0000000000..f86d642419
--- /dev/null
+++ b/recipes/gazeta_pl_bydgoszcz.recipe
@@ -0,0 +1,88 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Comment
+import re
+class gw_bydgoszcz(BasicNewsRecipe):
+    title          = u'Gazeta Wyborcza Bydgoszcz'
+    __author__ = 'fenuks'
+    language       = 'pl'
+    description = 'Wiadomości z Bydgoszczy na portalu Gazeta.pl.'
+    category = 'newspaper'
+    publication_type = 'newspaper'
+    masthead_url = 'http://bi.gazeta.pl/im/3/4089/m4089863.gif'
+    INDEX = 'http://bydgoszcz.gazeta.pl'
+    cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif'
+    remove_empty_feeds = True
+    oldest_article = 3
+    max_articles_per_feed = 100
+    remove_javascript = True
+    no_stylesheets = True
+    use_embedded_content = False
+    ignore_duplicate_articles = {'title', 'url'}
+
+    #rules for gazeta.pl
+    preprocess_regexps = [(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
+    keep_only_tags = [dict(id='gazeta_article')]
+    remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(attrs={'class':['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
+    remove_tags_after = dict(id='gazeta_article_body')
+
+    feeds          = [(u'Wiadomości', u'http://rss.feedsportal.com/c/32739/f/530239/index.rss')]
+
+    def print_version(self, url):
+        if 'feedsportal.com' in url:
+            s = url.rpartition('gazeta0Bpl')
+            u = s[2]
+            if not s[0]:
+                u = url.rpartition('wyborcza0Bpl')[2]
+            u = u.replace('/l/', '/')
+            u = u.replace('/ia1.htm', '')
+            u = u.replace('0Dbo0F1', '')
+            u = u.replace('/story01.htm', '')
+            u = u.replace('0C', '/')
+            u = u.replace('A', '')
+            u = u.replace('0E', '-')
+            u = u.replace('0H', ',')
+            u = u.replace('0I', '_')
+            u = u.replace('0B', '.')
+            u = self.INDEX + u
+            return u
+        else:
+            return url
+
+    def preprocess_html(self, soup):
+        tag = soup.find(id='Str')
+        if soup.find(attrs={'class': 'piano_btn_1'}):
+            return None
+        elif tag and tag.findAll('a'):
+            self.append_page(soup, soup.body)
+        return soup
+        
+    def append_page(self, soup, appendtag):
+        loop = False
+        tag = soup.find('div', attrs={'id': 'Str'})
+        try:
+            baseurl = soup.find(name='meta', attrs={'property':'og:url'})['content']
+        except:
+            return 1
+        link = tag.findAll('a')[-1]
+        while link:
+            soup2 = self.index_to_soup(baseurl + link['href'])
+            link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
+            if not u'następne' in link.string:
+                link = ''
+            pagetext = soup2.find(id='artykul')
+            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
+            for comment in comments:
+                comment.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        tag.extract()
+
+    def image_url_processor(self, baseurl, url):
+        if url.startswith(' '):
+            return url.strip()
+        else:
+            return url
\ No newline at end of file

From 82bfa745021e8cf998303cbe7fbb13aada15c10a Mon Sep 17 00:00:00 2001
From: fenuks <fenuks@gmail.com>
Date: Mon, 17 Jun 2013 09:58:02 +0200
Subject: [PATCH 18/25] new icons for recipes

---
 recipes/icons/cdrinfo_pl.png          | Bin 0 -> 909 bytes
 recipes/icons/gazeta_pl_bydgoszcz.png | Bin 0 -> 294 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 recipes/icons/cdrinfo_pl.png
 create mode 100644 recipes/icons/gazeta_pl_bydgoszcz.png

diff --git a/recipes/icons/cdrinfo_pl.png b/recipes/icons/cdrinfo_pl.png
new file mode 100644
index 0000000000000000000000000000000000000000..73dbc33692082371e7dcdc92d688a097f3c6da10
GIT binary patch
literal 909
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstU$g(vPY0F
z14ES>14Ba#1H&(%P{RubhEf9thF1v;3|2E37{m+a><YADU|_5b@Ck7RDmZ)L(t?#6
zW-eLRJ8Rj-Jx3R<*)n7KmStP^Pg=Zj(b5&m*KKK>IIpm6`n+{}${Tx<ikfmNS`HjN
z-8E%ye#_L^t9KUGcBWMJcFb6@dh7nmwn=%-Qzpz>mQ!5qlTb2!>E^7e&MB+*Y~Fun
z%Z`1OHBBBq0ZzUV$>m)@@oB4e-@N<q$nle<4PE{z6?s*yt+Q6I-g#uhw%tt==jFFf
z3rMZ%?43Ay>DGp>NeQK0v*yh64vv}x<aSQXuA4A@<{Wz$ugS}|xAsm6Nw2Q%nz?D$
z!F~G=`UZwtI=Fd-#5YcuJ7xBQx~>`hGnY<ZuxkI|lMykAsaXXRrp#<=>sz^IWB=5-
zQ)e&Ux#!^VQ)dg?XBM{4%Bk(it?Mo9n3Yo56Pe#!(lw`J)@t9FjJ)QlS@n}L>LwL+
z&dqC{7E{#bpHl7;kyX$(Gcc{vH?g$1b9Qt=OMGcZT2)t6ep6oS%(Tj`n8Fs<$eh5m
zYM-dI?1qU&9kbG_dt)-o^XmGF8Yle!|38c)I}8{B5hX!>!3-=~S-pi7^?p)J-p79*
zJ^ps4Qup7JyT7I}+5bDY<5$Gyf3vL2C;htf;vp}`p+8se6osDt_q+Axge&5=?sX;V
z-F(#eZxuUl<MY2yEB`&pW#;p$?+;$w<jSibn7bBe2V;`AyGxz@4##33XOE|gV~E7%
z<OBsa10%!D8#Zk$E^yirbH`?n;KZ6gfft#YoD>)nCw9!3ArZK-qvM7}A*UysW~PF<
zouiwXp{Av3sNXzC#uNP^Awgk*q2Au-PlTjjVBpNiGBn6|os#-I<(1KkMxPT`%?(V9
z`c9laZ<1)B+rh|a{Q1O;jzf!NxY@i{CrxPB${UdCnK@IJxp{7FrK`)5=42P2)Txs<
zb6-$gm-kNY>byA9t@rHq)d?K`#m>UU+TPOED*QB<fj?c(=<AmE;Xr?=mbgZgq$HN4
zS|t~y0x1R~14Cn712bJi^AH0|D^pV|QzLByBP#=ggYpGUC>nC}Q!>*kackJ%bixj(
Ofx*+&&t;ucLK6Vn`-mU_

literal 0
HcmV?d00001

diff --git a/recipes/icons/gazeta_pl_bydgoszcz.png b/recipes/icons/gazeta_pl_bydgoszcz.png
new file mode 100644
index 0000000000000000000000000000000000000000..49d76d2ddc6f4549c7211bb82f5176d1d8413c5b
GIT binary patch
literal 294
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!D3?x-;bCrM;OS+@4BLl<6e(pbstUx|vage(c
z!@6@aFM%9|WRD<U28Jp%28M<f28Lfip@tU>45bDP46hOx7_4S6Fo+k-*%fF5lwb?+
z32_C|R|OO``h28;EXI-`zhDN3XE)M-9L@rd$YLPv0mg18v+aNkK2I0N5RU7~6aF7`
z;9_E!{avu|+_&36QyElCTq8<S5=&C8l8aJ-6oZk0p|P%kiLQZhh>?Mnsfm?=g|>l_
zm4QL?RMuxG8glbfGSe!78VoFT4UBXR%|i^0txQd=jLo$T41pR-ew-5nYGCkm^>bP0
Hl+XkKPAf?O

literal 0
HcmV?d00001


From 45704e36c5786b34adf698fc7941177367be9dbc Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 18 Jun 2013 09:00:46 +0530
Subject: [PATCH 19/25] Add Kobo Aura HD to Welcome Wizard

---
 src/calibre/gui2/wizard/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py
index 798ac5faca..f813eed892 100644
--- a/src/calibre/gui2/wizard/__init__.py
+++ b/src/calibre/gui2/wizard/__init__.py
@@ -139,7 +139,7 @@ class Kobo(Device):
     id = 'kobo'
 
 class KoboVox(Kobo):
-    name = 'Kobo Vox'
+    name = 'Kobo Vox and Kobo Aura HD'
     output_profile = 'tablet'
     id = 'kobo_vox'
 

From ccaa960edf9733e78028cb50c73b805790196a3e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 19 Jun 2013 10:02:20 +0530
Subject: [PATCH 20/25] pep8

---
 src/calibre/gui2/dialogs/plugin_updater.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/calibre/gui2/dialogs/plugin_updater.py b/src/calibre/gui2/dialogs/plugin_updater.py
index 3820169876..df76bec27d 100644
--- a/src/calibre/gui2/dialogs/plugin_updater.py
+++ b/src/calibre/gui2/dialogs/plugin_updater.py
@@ -254,7 +254,7 @@ Platforms: Windows, OSX, Linux; History: Yes;</span></i></li>
         return self.installed_version is not None
 
     def is_upgrade_available(self):
-        return self.is_installed() and (self.installed_version < self.available_version \
+        return self.is_installed() and (self.installed_version < self.available_version
                 or self.is_deprecated)
 
     def is_valid_platform(self):
@@ -317,7 +317,7 @@ class DisplayPluginModel(QAbstractTableModel):
 
     def data(self, index, role):
         if not index.isValid():
-            return NONE;
+            return NONE
         row, col = index.row(), index.column()
         if row < 0 or row >= self.rowCount():
             return NONE
@@ -357,7 +357,7 @@ class DisplayPluginModel(QAbstractTableModel):
             else:
                 return self._get_status_tooltip(display_plugin)
         elif role == Qt.ForegroundRole:
-            if col != 1: # Never change colour of the donation column
+            if col != 1:  # Never change colour of the donation column
                 if display_plugin.is_deprecated:
                     return QVariant(QBrush(Qt.blue))
                 if display_plugin.is_disabled():
@@ -417,7 +417,7 @@ class DisplayPluginModel(QAbstractTableModel):
                     icon_name = 'plugin_upgrade_invalid.png'
             else:
                 icon_name = 'plugin_upgrade_ok.png'
-        else: # A plugin available not currently installed
+        else:  # A plugin available not currently installed
             if display_plugin.is_valid_to_install():
                 icon_name = 'plugin_new_valid.png'
             else:
@@ -429,11 +429,11 @@ class DisplayPluginModel(QAbstractTableModel):
             return QVariant(_('This plugin has been deprecated and should be uninstalled')+'\n\n'+
                             _('Right-click to see more options'))
         if not display_plugin.is_valid_platform():
-            return QVariant(_('This plugin can only be installed on: %s') % \
+            return QVariant(_('This plugin can only be installed on: %s') %
                             ', '.join(display_plugin.platforms)+'\n\n'+
                             _('Right-click to see more options'))
         if numeric_version < display_plugin.calibre_required_version:
-            return QVariant(_('You must upgrade to at least Calibre %s before installing this plugin') % \
+            return QVariant(_('You must upgrade to at least Calibre %s before installing this plugin') %
                             self._get_display_version(display_plugin.calibre_required_version)+'\n\n'+
                             _('Right-click to see more options'))
         if display_plugin.installed_version < display_plugin.available_version:
@@ -687,7 +687,7 @@ class PluginUpdaterDialog(SizePersistedDialog):
 
     def _install_clicked(self):
         display_plugin = self._selected_display_plugin()
-        if not question_dialog(self, _('Install %s')%display_plugin.name, '<p>' + \
+        if not question_dialog(self, _('Install %s')%display_plugin.name, '<p>' +
                 _('Installing plugins is a <b>security risk</b>. '
                 'Plugins can contain a virus/malware. '
                     'Only install it if you got it from a trusted source.'
@@ -886,3 +886,4 @@ class PluginUpdaterDialog(SizePersistedDialog):
         pt.write(raw)
         pt.close()
         return pt.name
+

From 07c935b700bd6e7d35f3e587caa1a189b1a49669 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 19 Jun 2013 10:12:03 +0530
Subject: [PATCH 21/25] Do not show builtin plugins in the get new plugins
 dialog

If a builtin plugin with the same name as a third party plugin exists,
then the builtin plagin was displayed in the get new plugins dialog as
installed (happened with the new DOCX Input plugin).
---
 src/calibre/gui2/dialogs/plugin_updater.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/dialogs/plugin_updater.py b/src/calibre/gui2/dialogs/plugin_updater.py
index df76bec27d..c5d79218f9 100644
--- a/src/calibre/gui2/dialogs/plugin_updater.py
+++ b/src/calibre/gui2/dialogs/plugin_updater.py
@@ -89,7 +89,7 @@ def get_installed_plugin_status(display_plugin):
     display_plugin.installed_version = None
     display_plugin.plugin = None
     for plugin in initialized_plugins():
-        if plugin.name == display_plugin.name:
+        if plugin.name == display_plugin.name and plugin.plugin_path is not None:
             display_plugin.plugin = plugin
             display_plugin.installed_version = plugin.version
             break

From 0e14d36438eeb7ed8f39610267a1c78b7f98889f Mon Sep 17 00:00:00 2001
From: David Forrester <davidfor@internode.on.net>
Date: Wed, 19 Jun 2013 14:23:43 +1000
Subject: [PATCH 22/25] SQL delete needs firmware check for older Kobo firmare

Kobo driver: Fix a regression when deleting empty shelves on Kobo devices with older firmware.
Fixes #1192441 [Private bug](https://bugs.launchpad.net/calibre/+bug/1192441)

As reported here,
http://www.mobileread.com/forums/showthread.php?t=214760, if the Kobo
device is using firmware before 2.5.0, it doesn't have the Activity
table. The delete from this table when maintaining shelves needs a
version check around it.
---
 src/calibre/devices/kobo/driver.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py
index cddf6a561f..cb325efb07 100644
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@@ -1880,7 +1880,7 @@ class KOBOTOUCH(KOBO):
 
                     # Remove any entries for the Activity table - removes tile from new home page
                     if self.has_activity_table():
-                        debug_print('KoboTouch:delete_via_sql: detete from Activity')
+                        debug_print('KoboTouch:delete_via_sql: delete from Activity')
                         cursor.execute('delete from Activity where Id =?', t)
 
                     connection.commit()
@@ -2391,7 +2391,8 @@ class KOBOTOUCH(KOBO):
         cursor = connection.cursor()
         cursor.execute(delete_query)
         cursor.execute(update_query)
-        cursor.execute(delete_activity_query)
+        if self.has_activity_table():
+            cursor.execute(delete_activity_query)
         connection.commit()
         cursor.close()
 

From 8bd6cc840c8460279d1413cdf5ed141f0c12a9a4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 19 Jun 2013 11:36:33 +0530
Subject: [PATCH 23/25] DOCX metadata: Be more intelligent for covers

DOCX metadata: When reading covers from DOCX files use the first image
as specified in the actual markup instead of just the first image in the
container.
---
 src/calibre/ebooks/metadata/docx.py | 40 ++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/src/calibre/ebooks/metadata/docx.py b/src/calibre/ebooks/metadata/docx.py
index ea34d27d3a..2c8b91bc70 100644
--- a/src/calibre/ebooks/metadata/docx.py
+++ b/src/calibre/ebooks/metadata/docx.py
@@ -8,29 +8,39 @@ __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
 from calibre.ebooks.docx.container import DOCX
+from calibre.ebooks.docx.names import XPath, get
 
-from calibre.utils.zipfile import ZipFile
 from calibre.utils.magick.draw import identify_data
 
+images = XPath('//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]')
+
+def get_cover(docx):
+    doc = docx.document
+    rid_map = docx.document_relationships[0]
+    for image in images(doc):
+        rid = get(image, 'r:embed') or get(image, 'r:id')
+        if rid in rid_map:
+            try:
+                raw = docx.read(rid_map[rid])
+                width, height, fmt = identify_data(raw)
+            except Exception:
+                continue
+            if 0.8 <= height/width <= 1.8 and height*width >= 160000:
+                return (fmt, raw)
+
 def get_metadata(stream):
     c = DOCX(stream, extract=False)
     mi = c.metadata
+    try:
+        cdata = get_cover(c)
+    except Exception:
+        cdata = None
+        import traceback
+        traceback.print_exc()
     c.close()
     stream.seek(0)
-    cdata = None
-    with ZipFile(stream, 'r') as zf:
-        for zi in zf.infolist():
-            ext = zi.filename.rpartition('.')[-1].lower()
-            if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
-                raw = zf.read(zi)
-                try:
-                    width, height, fmt = identify_data(raw)
-                except:
-                    continue
-                if 0.8 <= height/width <= 1.8 and height*width >= 160000:
-                    cdata = (fmt, raw)
-        if cdata is not None:
-            mi.cover_data = cdata
+    if cdata is not None:
+        mi.cover_data = cdata
 
     return mi
 

From 8020f489ca9eb51f3b997aba384f08bd4143ebcb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 19 Jun 2013 12:39:20 +0530
Subject: [PATCH 24/25] pep8

---
 src/calibre/web/feeds/templates.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py
index a22a79ef20..68af525cfd 100644
--- a/src/calibre/web/feeds/templates.py
+++ b/src/calibre/web/feeds/templates.py
@@ -13,7 +13,7 @@ from lxml.html.builder import HTML, HEAD, TITLE, STYLE, DIV, BODY, \
 
 from calibre import preferred_encoding, strftime, isbytestring
 
-def CLASS(*args, **kwargs): # class is a reserved word in Python
+def CLASS(*args, **kwargs):  # class is a reserved word in Python
     kwargs['class'] = ' '.join(args)
     return kwargs
 
@@ -26,7 +26,7 @@ class Template(object):
         self.html_lang = lang
 
     def generate(self, *args, **kwargs):
-        if not kwargs.has_key('style'):
+        if 'style' not in kwargs:
             kwargs['style'] = ''
         for key in kwargs.keys():
             if isbytestring(kwargs[key]):
@@ -152,8 +152,8 @@ class FeedTemplate(Template):
         body.append(div)
         if getattr(feed, 'image', None):
             div.append(DIV(IMG(
-                alt = feed.image_alt if feed.image_alt else '',
-                src = feed.image_url
+                alt=feed.image_alt if feed.image_alt else '',
+                src=feed.image_url
                 ),
                 CLASS('calibre_feed_image')))
         if getattr(feed, 'description', None):
@@ -261,8 +261,8 @@ class TouchscreenIndexTemplate(Template):
         for i, feed in enumerate(feeds):
             if feed:
                 tr = TR()
-                tr.append(TD( CLASS('calibre_rescale_120'), A(feed.title, href='feed_%d/index.html'%i)))
-                tr.append(TD( '%s' % len(feed.articles), style="text-align:right"))
+                tr.append(TD(CLASS('calibre_rescale_120'), A(feed.title, href='feed_%d/index.html'%i)))
+                tr.append(TD('%s' % len(feed.articles), style="text-align:right"))
                 toc.append(tr)
         div = DIV(
                 masthead_p,
@@ -307,7 +307,7 @@ class TouchscreenFeedTemplate(Template):
         if f > 0:
             link = A(CLASS('feed_link'),
                      trim_title(feeds[f-1].title),
-                     href = '../feed_%d/index.html' % int(f-1))
+                     href='../feed_%d/index.html' % int(f-1))
         navbar_tr.append(TD(CLASS('feed_prev'),link))
 
         # Up to Sections
@@ -319,13 +319,12 @@ class TouchscreenFeedTemplate(Template):
         if f < len(feeds)-1:
             link = A(CLASS('feed_link'),
                      trim_title(feeds[f+1].title),
-                     href = '../feed_%d/index.html' % int(f+1))
+                     href='../feed_%d/index.html' % int(f+1))
         navbar_tr.append(TD(CLASS('feed_next'),link))
         navbar_t.append(navbar_tr)
         top_navbar = navbar_t
         bottom_navbar = copy.copy(navbar_t)
-        #print "\n%s\n" % etree.tostring(navbar_t, pretty_print=True)
-
+        # print "\n%s\n" % etree.tostring(navbar_t, pretty_print=True)
 
         # Build the page
         head = HEAD(TITLE(feed.title))
@@ -342,8 +341,8 @@ class TouchscreenFeedTemplate(Template):
 
         if getattr(feed, 'image', None):
             div.append(DIV(IMG(
-                alt = feed.image_alt if feed.image_alt else '',
-                src = feed.image_url
+                alt=feed.image_alt if feed.image_alt else '',
+                src=feed.image_url
                 ),
                 CLASS('calibre_feed_image')))
         if getattr(feed, 'description', None):
@@ -411,6 +410,7 @@ class TouchscreenNavBarTemplate(Template):
         navbar_tr.append(TD(CLASS('article_next'),link))
         navbar_t.append(navbar_tr)
         navbar.append(navbar_t)
-        #print "\n%s\n" % etree.tostring(navbar, pretty_print=True)
+        # print "\n%s\n" % etree.tostring(navbar, pretty_print=True)
 
         self.root = HTML(head, BODY(navbar))
+

From adcc1739a65a5f5dc2a1bc6b0f13531534a00c98 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 19 Jun 2013 13:12:14 +0530
Subject: [PATCH 25/25] News download: "downloaded from" for touchscreens

News download: Add the "downloaded from" link at the bottom of every
article when using a touchscreen output profile (like the Tablet
profile).
---
 src/calibre/web/feeds/templates.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py
index 68af525cfd..3ee90c43a6 100644
--- a/src/calibre/web/feeds/templates.py
+++ b/src/calibre/web/feeds/templates.py
@@ -387,6 +387,14 @@ class TouchscreenNavBarTemplate(Template):
         navbar_t = TABLE(CLASS('touchscreen_navbar'))
         navbar_tr = TR()
 
+        if bottom and not url.startswith('file://'):
+            navbar.append(HR())
+            text = 'This article was downloaded by '
+            p = PT(text, STRONG(__appname__), A(url, href=url),
+                    style='text-align:left; max-width: 100%; overflow: hidden;')
+            p[0].tail = ' from '
+            navbar.append(p)
+            navbar.append(BR())
         # | Previous
         if art > 0:
             link = A(CLASS('article_link'),_('Previous'),href='%s../article_%d/index.html'%(prefix, art-1))