From 5dfc08ed4ee6c73fa9f5b2736c20fd5523af9ffa Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 7 Apr 2010 18:07:12 +0530 Subject: [PATCH] Improved Economist --- resources/recipes/economist.recipe | 10 ++++++++-- resources/recipes/economist_free.recipe | 13 +++++++++++-- src/calibre/devices/__init__.py | 3 ++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/resources/recipes/economist.recipe b/resources/recipes/economist.recipe index fc29ea8a65..c0b9c464d8 100644 --- a/resources/recipes/economist.recipe +++ b/resources/recipes/economist.recipe @@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import Tag, NavigableString -import mechanize, string, urllib, time +import mechanize, string, urllib, time, re class Economist(BasicNewsRecipe): @@ -27,6 +27,8 @@ class Economist(BasicNewsRecipe): dict(attrs={'class':['dblClkTrk']})] remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body') needs_subscription = True + preprocess_regexps = [(re.compile('.*', re.DOTALL), + lambda x:'')] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -111,11 +113,15 @@ class Economist(BasicNewsRecipe): yield x def postprocess_html(self, soup, first): + body = soup.find('body') + for name, val in body.attrs: + del body[name] + for table in list(self.eco_find_image_tables(soup)): caption = table.find('font') img = table.find('img') div = Tag(soup, 'div') - div['style'] = 'text-align:center;font-size:70%' + div['style'] = 'text-align:left;font-size:70%' ns = NavigableString(self.tag_to_string(caption)) div.insert(0, ns) div.insert(1, Tag(soup, 'br')) diff --git a/resources/recipes/economist_free.recipe b/resources/recipes/economist_free.recipe index b3d5caeffc..32e108d2d6 100644 --- a/resources/recipes/economist_free.recipe +++ b/resources/recipes/economist_free.recipe @@ -1,7 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.utils.threadpool import ThreadPool, makeRequests from calibre.ebooks.BeautifulSoup import Tag, NavigableString -import time, string +import time, string, re from datetime import datetime from lxml import html @@ -19,9 +19,13 @@ class Economist(BasicNewsRecipe): remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']), dict(attrs={'class':['dblClkTrk']})] remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body') + preprocess_regexps = [(re.compile('.*', re.DOTALL), + lambda x:'')] def parse_index(self): from calibre.web.feeds.feedparser import parse + if self.test: + self.oldest_article = 14.0 raw = self.index_to_soup( 'http://feeds.feedburner.com/economist/full_print_edition', raw=True) @@ -44,6 +48,8 @@ class Economist(BasicNewsRecipe): author = item.get('author', '') requests.append([i, link, title, description, author, published]) + if self.test: + requests = requests[:4] requests = makeRequests(self.process_eco_feed_article, requests, self.eco_article_found, self.eco_article_failed) for r in requests: pool.putRequest(r) @@ -114,11 +120,14 @@ class Economist(BasicNewsRecipe): yield x def postprocess_html(self, soup, first): + body = soup.find('body') + for name, val in body.attrs: + del body[name] for table in list(self.eco_find_image_tables(soup)): caption = table.find('font') img = table.find('img') div = Tag(soup, 'div') - div['style'] = 'text-align:center;font-size:70%' + div['style'] = 'text-align:left;font-size:70%' ns = NavigableString(self.tag_to_string(caption)) div.insert(0, ns) div.insert(1, Tag(soup, 'br')) diff --git a/src/calibre/devices/__init__.py b/src/calibre/devices/__init__.py index f2065dd632..bcbd9b1640 100644 --- a/src/calibre/devices/__init__.py +++ b/src/calibre/devices/__init__.py @@ -65,7 +65,8 @@ def debug(ioreg_to_tmp=False, buf=None): ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n' ioreg += Device.run_ioreg() connected_devices = [] - for dev in device_plugins(): + for dev in sorted(device_plugins(), cmp=lambda + x,y:cmp(x.__class__.__name__, y.__class__.__name__)): out('Looking for', dev.__class__.__name__) connected, det = s.is_device_connected(dev, debug=True) if connected: