From cd60236542ad8460a2d01b8da75d83c16c71a587 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 9 Jul 2015 16:08:16 +0530
Subject: [PATCH] Update Economia

---
 recipes/economia.recipe | 55 ++++++++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/recipes/economia.recipe b/recipes/economia.recipe
index 249125b76f..f2f9c228c0 100644
--- a/recipes/economia.recipe
+++ b/recipes/economia.recipe
@@ -1,17 +1,48 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+from urllib import quote
 
-class AdvancedUserRecipe1314326622(BasicNewsRecipe):
-    title          = u'Economia'
-    __author__     = 'Manish Bhattarai'
+class EconomiaMagazine(BasicNewsRecipe):
+    title          = u'Economia Magazine'
+    __author__     = 'Kovid Goyal'
     description = 'Economia - Intelligence & Insight for ICAEW Members'
     language = 'en_GB'
-    oldest_article = 7
-    max_articles_per_feed = 25
-    masthead_url = 'http://economia.icaew.com/~/media/Images/Design%20Images/Economia_Red_website.ashx'
-    cover_url = 'http://economia.icaew.com/~/media/Images/Design%20Images/Economia_Red_website.ashx'
+    BASE = 'http://economia.icaew.com/'
     no_stylesheets = True
-    remove_empty_feeds = True
-    remove_tags_before = dict(id='content')
-    remove_tags_after  = dict(id='stars-wrapper')
-    remove_tags = [dict(attrs={'class':['floatR', 'sharethis', 'rating clearfix']})]
-    feeds          = [(u'News', u'http://feedity.com/icaew-com/VlNTVFRa.rss'),(u'Business', u'http://feedity.com/icaew-com/VlNTVFtS.rss'),(u'People', u'http://feedity.com/icaew-com/VlNTVFtX.rss'),(u'Opinion', u'http://feedity.com/icaew-com/VlNTVFtW.rss'),(u'Finance', u'http://feedity.com/icaew-com/VlNTVFtV.rss')]
+
+    keep_only_tags = [
+        dict(name='h1'),
+        dict(name='figure', attrs={'class':lambda x:x and 'figure' in x.split()}),
+        dict(attrs={'class':'intro articleCopy'.split()})
+    ]
+
+    def image_url_processor(cls, baseurl, iurl):
+        if iurl:
+            return baseurl + quote(iurl)
+        return baseurl + '404.jpeg'
+
+    def preprocess_raw_html(self, raw_html, url):
+        return raw_html.replace('src=""', '')
+
+    def parse_index(self):
+        soup = self.index_to_soup('http://economia.icaew.com/')
+        img = soup.find('img', src=lambda x:x and 'Magazine covers' in x)
+        self.cover_url = self.BASE + quote(img['src'].encode('utf-8'))
+        soup = self.index_to_soup(self.BASE + img.parent['href'])
+        self.timefmt = ' [%s]' % self.tag_to_string(soup.find('title')).split('|')[0].strip()
+        ans = []
+        for div in soup.findAll('div', attrs={'class':'articlePreview'}):
+            h2 = div.find('h2')
+            section_title = self.tag_to_string(h2).strip()
+            self.log('Found section:', section_title)
+            articles = []
+            for li in div.findAll('li'):
+                h3 = li.find('h3')
+                title = self.tag_to_string(h3)
+                a = h3.find('a', href=True)
+                url = self.BASE + a['href']
+                p = li.find('p')
+                self.log('\t', title, 'at', url)
+                articles.append({'title':title, 'url':url, 'description':self.tag_to_string(p)})
+            if articles:
+                ans.append((section_title, articles))
+        return ans