From c70adff6bfedbce5043ec0852fc5dca9c68a9ea3 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Dec 2009 12:23:39 -0700
Subject: [PATCH] Fix #4226 (London Review of Books fails to load completely.
 An index and table of contents is the only pages in the feed.)

---
 resources/recipes/lrb.recipe | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/resources/recipes/lrb.recipe b/resources/recipes/lrb.recipe
index 8c248b00f1..0076b3e697 100644
--- a/resources/recipes/lrb.recipe
+++ b/resources/recipes/lrb.recipe
@@ -1,4 +1,3 @@
-#!/usr/bin/env  python
 
 __license__   = 'GPL v3'
 __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
@@ -12,30 +11,29 @@ class LondonReviewOfBooks(BasicNewsRecipe):
     title                 = u'London Review of Books'
     __author__            = u'Darko Miletic'
     description           = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
+    category              = 'news, literature, England'
+    publisher             = 'London Review of Books'
     oldest_article        = 7
     max_articles_per_feed = 100
-    language = 'en_GB'
-
+    language              = 'en_GB'
     no_stylesheets        = True
     use_embedded_content  = False
-    encoding              = 'cp1252'
+    encoding              = 'utf-8'
 
+    conversion_options = {  
+                             'comments'  : description
+                            ,'tags'      : category
+                            ,'language'  : language
+                            ,'publisher' : publisher
+                         }
+    
+    keep_only_tags = [dict(name='div' , attrs={'id'   :'main'})]
     remove_tags = [
-                    dict(name='div' , attrs={'id'   :'otherarticles'})
-                   ,dict(name='div' , attrs={'class':'pagetools'    })
-                   ,dict(name='div' , attrs={'id'   :'mainmenu'     })
-                   ,dict(name='div' , attrs={'id'   :'precontent'   })
-                   ,dict(name='div' , attrs={'class':'nocss'        })
-                   ,dict(name='span', attrs={'class':'inlineright'  })
+                    dict(name='div' , attrs={'class':['pagetools','issue-nav-controls','nocss']})
+                   ,dict(name='div' , attrs={'id'   :['mainmenu','precontent','otherarticles']     })
+                   ,dict(name='span', attrs={'class':['inlineright','article-icons']})
+                   ,dict(name='ul'  , attrs={'class':'article-controls'})
+                   ,dict(name='p'   , attrs={'class':'meta-info'       })
                   ]
 
     feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
-
-    def print_version(self, url):
-        main, split, rest = url.rpartition('/')
-        return main + '/print/' + rest
-
-    def postprocess_html(self, soup, first_fetch):
-        for t in soup.findAll(['table', 'tr', 'td']):
-            t.name = 'div'
-        return soup