Improved Economist

2025-11-03 19:17:02 -05:00 · 2010-04-07 18:07:12 +05:30 · 2010-04-07 18:07:12 +05:30 · 5dfc08ed4e
commit 5dfc08ed4e
parent 47cee548dc
3 changed files with 21 additions and 5 deletions
--- a/resources/recipes/economist.recipe
+++ b/resources/recipes/economist.recipe
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.BeautifulSoup import Tag, NavigableString

-import mechanize, string, urllib, time
+import mechanize, string, urllib, time, re

 class Economist(BasicNewsRecipe):

@ -27,6 +27,8 @@ class Economist(BasicNewsRecipe):
            dict(attrs={'class':['dblClkTrk']})]
    remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
    needs_subscription = True
+    preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
+        lambda x:'</html>')]

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -111,11 +113,15 @@ class Economist(BasicNewsRecipe):
                yield x

    def postprocess_html(self, soup, first):
+        body = soup.find('body')
+        for name, val in body.attrs:
+            del body[name]
+
        for table in list(self.eco_find_image_tables(soup)):
            caption = table.find('font')
            img = table.find('img')
            div = Tag(soup, 'div')
-            div['style'] = 'text-align:center;font-size:70%'
+            div['style'] = 'text-align:left;font-size:70%'
            ns = NavigableString(self.tag_to_string(caption))
            div.insert(0, ns)
            div.insert(1, Tag(soup, 'br'))
--- a/resources/recipes/economist_free.recipe
+++ b/resources/recipes/economist_free.recipe
@ -1,7 +1,7 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.utils.threadpool import ThreadPool, makeRequests
 from calibre.ebooks.BeautifulSoup import Tag, NavigableString
-import time, string
+import time, string, re
 from datetime import datetime
 from lxml import html

@ -19,9 +19,13 @@ class Economist(BasicNewsRecipe):
    remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk']})]
    remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
+    preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
+        lambda x:'</html>')]

    def parse_index(self):
        from calibre.web.feeds.feedparser import parse
+        if self.test:
+            self.oldest_article = 14.0
        raw = self.index_to_soup(
                'http://feeds.feedburner.com/economist/full_print_edition',
                raw=True)
@ -44,6 +48,8 @@ class Economist(BasicNewsRecipe):
            author      = item.get('author', '')

            requests.append([i, link, title, description, author, published])
+        if self.test:
+            requests = requests[:4]
        requests = makeRequests(self.process_eco_feed_article, requests, self.eco_article_found,
                self.eco_article_failed)
        for r in requests: pool.putRequest(r)
@ -114,11 +120,14 @@ class Economist(BasicNewsRecipe):
                yield x

    def postprocess_html(self, soup, first):
+        body = soup.find('body')
+        for name, val in body.attrs:
+            del body[name]
        for table in list(self.eco_find_image_tables(soup)):
            caption = table.find('font')
            img = table.find('img')
            div = Tag(soup, 'div')
-            div['style'] = 'text-align:center;font-size:70%'
+            div['style'] = 'text-align:left;font-size:70%'
            ns = NavigableString(self.tag_to_string(caption))
            div.insert(0, ns)
            div.insert(1, Tag(soup, 'br'))
--- a/src/calibre/devices/init.py
+++ b/src/calibre/devices/init.py
@ -65,7 +65,8 @@ def debug(ioreg_to_tmp=False, buf=None):
            ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
            ioreg += Device.run_ioreg()
        connected_devices = []
-        for dev in device_plugins():
+        for dev in sorted(device_plugins(), cmp=lambda
+                x,y:cmp(x.__class__.__name__, y.__class__.__name__)):
            out('Looking for', dev.__class__.__name__)
            connected, det = s.is_device_connected(dev, debug=True)
            if connected: