From ded912be3f6dc21d1b6373c8f9365db068063afa Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 17 Sep 2010 09:59:03 -0600
Subject: [PATCH 1/3] ...

---
 src/calibre/manual/faq.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index b93444f4c3..781048666b 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -376,7 +376,9 @@ be printed to it. If the debug output contains a line that looks like::
 
 then the problem is probably a corrupted font cache. You can clear the cache by following these
 `instructions <http://www.macworld.com/article/139383/2009/03/fontcacheclear.html>`_. If that doesn't
-solve it, look for a corrupted font file on your system, in ~/Library/Fonts or the like.
+solve it, look for a corrupted font file on your system, in ~/Library/Fonts or the like. An easy way to
+check for corrupted fonts in OS X is to start the "Font Book" application, select all fonts and then in the File
+menu, choose "Validate fonts".
 
 My antivirus program claims |app| is a virus/trojan?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From ffe8fe5fd23a721af0fe1d07df109d78a39c743c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 17 Sep 2010 10:42:11 -0600
Subject: [PATCH 2/3] Fix use of UTF-8 raw string

---
 src/calibre/ebooks/conversion/preprocess.py | 1 -
 src/calibre/ebooks/conversion/utils.py      | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 3e5de26766..03a0047927 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -144,7 +144,6 @@ class HTMLPreProcessor(object):
     # Fix pdftohtml markup
     PDFTOHTML  = [
                   # Fix umlauts
-                  # ¨
                   (re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'),
                   (re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'),
                   (re.compile(u'¨\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ë'),
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 8588ff65ad..37fd169cb1 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -176,8 +176,8 @@ class PreProcessor(object):
         self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***")
         #
         # Unwrap and/or delete soft-hyphens, hyphens
-        html = re.sub(u'­\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
-        html = re.sub(u'(?<=[-–—])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html)
+        html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
+        html = re.sub(u'(?<=[-\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html)
 
         # Unwrap lines using punctation and line length
         unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
@@ -195,6 +195,6 @@ class PreProcessor(object):
         html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
 
         # put back non-breaking spaces in empty paragraphs to preserve original formatting
-        html = blankreg.sub('\n'+'\g<openline>'+' '+'\g<closeline>', html)
+        html = blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
 
         return html

From 2c11080dc7780deae4ebfa4e3582a7ffd2885e5a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 17 Sep 2010 10:46:49 -0600
Subject: [PATCH 3/3] taz.de RSS by Alexander Schremmer

---
 resources/recipes/taz_rss.recipe | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 resources/recipes/taz_rss.recipe

diff --git a/resources/recipes/taz_rss.recipe b/resources/recipes/taz_rss.recipe
new file mode 100644
index 0000000000..6520a23b63
--- /dev/null
+++ b/resources/recipes/taz_rss.recipe
@@ -0,0 +1,24 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Alexander Schremmer <alex@alexanderweb.de>'
+
+from calibre.resources.recipes import BasicNewsRecipe
+
+class TazRSSRecipe(BasicNewsRecipe):
+    title = u'Taz.de (die tageszeitung) RSS Feed - German'
+    __author__ = 'Alexander Schremmer'
+    language = 'de'
+    lang = 'de-DE'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    publisher = 'taz Entwicklungs GmbH & Co. Medien KG'
+
+    conversion_options = {'publisher': publisher,
+                          'language': lang,
+                        }
+
+    feeds          = [(u'TAZ main feed', u'http://www.taz.de/rss.xml')]
+    keep_only_tags = [dict(name='div', attrs={'class': 'sect sect_article'})]
+    remove_tags_after = dict(name='div', attrs={'class': 'rack'})
+    remove_tags = [dict(name=['div'], attrs={'class': 'rack'}),
+		 dict(name=['div'], attrs={'class': 'artikelwerbung'}),
+		 dict(name=['ul'], attrs={'class': 'toolbar'}),]