From c4fa7317be81a7eabdf5198efea84b29a2ab0b92 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 14 Dec 2009 11:30:26 -0700
Subject: [PATCH] Conversion pipeline: Decode XML files in a more fault
 tolerant manner

---
 resources/recipes/guardian.recipe | 19 +++++++++----------
 src/calibre/ebooks/oeb/base.py    |  1 +
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/resources/recipes/guardian.recipe b/resources/recipes/guardian.recipe
index 1aee9bdf07..f74414a569 100644
--- a/resources/recipes/guardian.recipe
+++ b/resources/recipes/guardian.recipe
@@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 '''
 www.guardian.co.uk
 '''
-#from calibre import strftime
+from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class Guardian(BasicNewsRecipe):
@@ -16,7 +16,7 @@ class Guardian(BasicNewsRecipe):
     language = 'en_GB'
 
     oldest_article = 7
-    max_articles_per_feed = 50
+    max_articles_per_feed = 100
     remove_javascript = True
 
     timefmt = ' [%a, %d %b %Y]'
@@ -75,9 +75,6 @@ class Guardian(BasicNewsRecipe):
 
           return soup
 
-
-
-'''
     def find_sections(self):
         soup = self.index_to_soup('http://www.guardian.co.uk/theguardian')
         # find cover pic
@@ -113,11 +110,13 @@ class Guardian(BasicNewsRecipe):
                         }
 
     def parse_index(self):
-        feeds = []
-        for title, href in self.find_sections():
-            feeds.append((title, list(self.find_articles(href))))
-        return feeds
-'''
+        try:
+            feeds = []
+            for title, href in self.find_sections():
+                feeds.append((title, list(self.find_articles(href))))
+            return feeds
+        except:
+            raise NotImplementedError
 
 
 
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index b37f51c6b5..dff2dc724b 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -759,6 +759,7 @@ class Manifest(object):
                 % (self.id, self.href, self.media_type)
 
         def _parse_xml(self, data):
+            data = xml_to_unicode(data, strip_encoding_pats=True)[0]
             parser = etree.XMLParser(recover=True)
             try:
                 return etree.fromstring(data, parser=parser)