Improved Economist

This commit is contained in:
Kovid Goyal 2010-04-07 18:07:12 +05:30
parent 47cee548dc
commit 5dfc08ed4e
3 changed files with 21 additions and 5 deletions

View File

@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
import mechanize, string, urllib, time
import mechanize, string, urllib, time, re
class Economist(BasicNewsRecipe):
@ -27,6 +27,8 @@ class Economist(BasicNewsRecipe):
dict(attrs={'class':['dblClkTrk']})]
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
needs_subscription = True
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
lambda x:'</html>')]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
@ -111,11 +113,15 @@ class Economist(BasicNewsRecipe):
yield x
def postprocess_html(self, soup, first):
body = soup.find('body')
for name, val in body.attrs:
del body[name]
for table in list(self.eco_find_image_tables(soup)):
caption = table.find('font')
img = table.find('img')
div = Tag(soup, 'div')
div['style'] = 'text-align:center;font-size:70%'
div['style'] = 'text-align:left;font-size:70%'
ns = NavigableString(self.tag_to_string(caption))
div.insert(0, ns)
div.insert(1, Tag(soup, 'br'))

View File

@ -1,7 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.threadpool import ThreadPool, makeRequests
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
import time, string
import time, string, re
from datetime import datetime
from lxml import html
@ -19,9 +19,13 @@ class Economist(BasicNewsRecipe):
remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
dict(attrs={'class':['dblClkTrk']})]
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
lambda x:'</html>')]
def parse_index(self):
from calibre.web.feeds.feedparser import parse
if self.test:
self.oldest_article = 14.0
raw = self.index_to_soup(
'http://feeds.feedburner.com/economist/full_print_edition',
raw=True)
@ -44,6 +48,8 @@ class Economist(BasicNewsRecipe):
author = item.get('author', '')
requests.append([i, link, title, description, author, published])
if self.test:
requests = requests[:4]
requests = makeRequests(self.process_eco_feed_article, requests, self.eco_article_found,
self.eco_article_failed)
for r in requests: pool.putRequest(r)
@ -114,11 +120,14 @@ class Economist(BasicNewsRecipe):
yield x
def postprocess_html(self, soup, first):
body = soup.find('body')
for name, val in body.attrs:
del body[name]
for table in list(self.eco_find_image_tables(soup)):
caption = table.find('font')
img = table.find('img')
div = Tag(soup, 'div')
div['style'] = 'text-align:center;font-size:70%'
div['style'] = 'text-align:left;font-size:70%'
ns = NavigableString(self.tag_to_string(caption))
div.insert(0, ns)
div.insert(1, Tag(soup, 'br'))

View File

@ -65,7 +65,8 @@ def debug(ioreg_to_tmp=False, buf=None):
ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
ioreg += Device.run_ioreg()
connected_devices = []
for dev in device_plugins():
for dev in sorted(device_plugins(), cmp=lambda
x,y:cmp(x.__class__.__name__, y.__class__.__name__)):
out('Looking for', dev.__class__.__name__)
connected, det = s.is_device_connected(dev, debug=True)
if connected: