mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Improved Economist
This commit is contained in:
parent
47cee548dc
commit
5dfc08ed4e
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||
|
||||
import mechanize, string, urllib, time
|
||||
import mechanize, string, urllib, time, re
|
||||
|
||||
class Economist(BasicNewsRecipe):
|
||||
|
||||
@ -27,6 +27,8 @@ class Economist(BasicNewsRecipe):
|
||||
dict(attrs={'class':['dblClkTrk']})]
|
||||
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
|
||||
needs_subscription = True
|
||||
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
|
||||
lambda x:'</html>')]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
@ -111,11 +113,15 @@ class Economist(BasicNewsRecipe):
|
||||
yield x
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
body = soup.find('body')
|
||||
for name, val in body.attrs:
|
||||
del body[name]
|
||||
|
||||
for table in list(self.eco_find_image_tables(soup)):
|
||||
caption = table.find('font')
|
||||
img = table.find('img')
|
||||
div = Tag(soup, 'div')
|
||||
div['style'] = 'text-align:center;font-size:70%'
|
||||
div['style'] = 'text-align:left;font-size:70%'
|
||||
ns = NavigableString(self.tag_to_string(caption))
|
||||
div.insert(0, ns)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
|
@ -1,7 +1,7 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.threadpool import ThreadPool, makeRequests
|
||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||
import time, string
|
||||
import time, string, re
|
||||
from datetime import datetime
|
||||
from lxml import html
|
||||
|
||||
@ -19,9 +19,13 @@ class Economist(BasicNewsRecipe):
|
||||
remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||
dict(attrs={'class':['dblClkTrk']})]
|
||||
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
|
||||
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
|
||||
lambda x:'</html>')]
|
||||
|
||||
def parse_index(self):
|
||||
from calibre.web.feeds.feedparser import parse
|
||||
if self.test:
|
||||
self.oldest_article = 14.0
|
||||
raw = self.index_to_soup(
|
||||
'http://feeds.feedburner.com/economist/full_print_edition',
|
||||
raw=True)
|
||||
@ -44,6 +48,8 @@ class Economist(BasicNewsRecipe):
|
||||
author = item.get('author', '')
|
||||
|
||||
requests.append([i, link, title, description, author, published])
|
||||
if self.test:
|
||||
requests = requests[:4]
|
||||
requests = makeRequests(self.process_eco_feed_article, requests, self.eco_article_found,
|
||||
self.eco_article_failed)
|
||||
for r in requests: pool.putRequest(r)
|
||||
@ -114,11 +120,14 @@ class Economist(BasicNewsRecipe):
|
||||
yield x
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
body = soup.find('body')
|
||||
for name, val in body.attrs:
|
||||
del body[name]
|
||||
for table in list(self.eco_find_image_tables(soup)):
|
||||
caption = table.find('font')
|
||||
img = table.find('img')
|
||||
div = Tag(soup, 'div')
|
||||
div['style'] = 'text-align:center;font-size:70%'
|
||||
div['style'] = 'text-align:left;font-size:70%'
|
||||
ns = NavigableString(self.tag_to_string(caption))
|
||||
div.insert(0, ns)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
|
@ -65,7 +65,8 @@ def debug(ioreg_to_tmp=False, buf=None):
|
||||
ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
|
||||
ioreg += Device.run_ioreg()
|
||||
connected_devices = []
|
||||
for dev in device_plugins():
|
||||
for dev in sorted(device_plugins(), cmp=lambda
|
||||
x,y:cmp(x.__class__.__name__, y.__class__.__name__)):
|
||||
out('Looking for', dev.__class__.__name__)
|
||||
connected, det = s.is_device_connected(dev, debug=True)
|
||||
if connected:
|
||||
|
Loading…
x
Reference in New Issue
Block a user