mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Improved Economist
This commit is contained in:
parent
47cee548dc
commit
5dfc08ed4e
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||||
|
|
||||||
import mechanize, string, urllib, time
|
import mechanize, string, urllib, time, re
|
||||||
|
|
||||||
class Economist(BasicNewsRecipe):
|
class Economist(BasicNewsRecipe):
|
||||||
|
|
||||||
@ -27,6 +27,8 @@ class Economist(BasicNewsRecipe):
|
|||||||
dict(attrs={'class':['dblClkTrk']})]
|
dict(attrs={'class':['dblClkTrk']})]
|
||||||
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
|
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
|
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
|
||||||
|
lambda x:'</html>')]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
@ -111,11 +113,15 @@ class Economist(BasicNewsRecipe):
|
|||||||
yield x
|
yield x
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
def postprocess_html(self, soup, first):
|
||||||
|
body = soup.find('body')
|
||||||
|
for name, val in body.attrs:
|
||||||
|
del body[name]
|
||||||
|
|
||||||
for table in list(self.eco_find_image_tables(soup)):
|
for table in list(self.eco_find_image_tables(soup)):
|
||||||
caption = table.find('font')
|
caption = table.find('font')
|
||||||
img = table.find('img')
|
img = table.find('img')
|
||||||
div = Tag(soup, 'div')
|
div = Tag(soup, 'div')
|
||||||
div['style'] = 'text-align:center;font-size:70%'
|
div['style'] = 'text-align:left;font-size:70%'
|
||||||
ns = NavigableString(self.tag_to_string(caption))
|
ns = NavigableString(self.tag_to_string(caption))
|
||||||
div.insert(0, ns)
|
div.insert(0, ns)
|
||||||
div.insert(1, Tag(soup, 'br'))
|
div.insert(1, Tag(soup, 'br'))
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.utils.threadpool import ThreadPool, makeRequests
|
from calibre.utils.threadpool import ThreadPool, makeRequests
|
||||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||||
import time, string
|
import time, string, re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
@ -19,9 +19,13 @@ class Economist(BasicNewsRecipe):
|
|||||||
remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||||
dict(attrs={'class':['dblClkTrk']})]
|
dict(attrs={'class':['dblClkTrk']})]
|
||||||
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
|
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
|
||||||
|
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
|
||||||
|
lambda x:'</html>')]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
from calibre.web.feeds.feedparser import parse
|
from calibre.web.feeds.feedparser import parse
|
||||||
|
if self.test:
|
||||||
|
self.oldest_article = 14.0
|
||||||
raw = self.index_to_soup(
|
raw = self.index_to_soup(
|
||||||
'http://feeds.feedburner.com/economist/full_print_edition',
|
'http://feeds.feedburner.com/economist/full_print_edition',
|
||||||
raw=True)
|
raw=True)
|
||||||
@ -44,6 +48,8 @@ class Economist(BasicNewsRecipe):
|
|||||||
author = item.get('author', '')
|
author = item.get('author', '')
|
||||||
|
|
||||||
requests.append([i, link, title, description, author, published])
|
requests.append([i, link, title, description, author, published])
|
||||||
|
if self.test:
|
||||||
|
requests = requests[:4]
|
||||||
requests = makeRequests(self.process_eco_feed_article, requests, self.eco_article_found,
|
requests = makeRequests(self.process_eco_feed_article, requests, self.eco_article_found,
|
||||||
self.eco_article_failed)
|
self.eco_article_failed)
|
||||||
for r in requests: pool.putRequest(r)
|
for r in requests: pool.putRequest(r)
|
||||||
@ -114,11 +120,14 @@ class Economist(BasicNewsRecipe):
|
|||||||
yield x
|
yield x
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
def postprocess_html(self, soup, first):
|
||||||
|
body = soup.find('body')
|
||||||
|
for name, val in body.attrs:
|
||||||
|
del body[name]
|
||||||
for table in list(self.eco_find_image_tables(soup)):
|
for table in list(self.eco_find_image_tables(soup)):
|
||||||
caption = table.find('font')
|
caption = table.find('font')
|
||||||
img = table.find('img')
|
img = table.find('img')
|
||||||
div = Tag(soup, 'div')
|
div = Tag(soup, 'div')
|
||||||
div['style'] = 'text-align:center;font-size:70%'
|
div['style'] = 'text-align:left;font-size:70%'
|
||||||
ns = NavigableString(self.tag_to_string(caption))
|
ns = NavigableString(self.tag_to_string(caption))
|
||||||
div.insert(0, ns)
|
div.insert(0, ns)
|
||||||
div.insert(1, Tag(soup, 'br'))
|
div.insert(1, Tag(soup, 'br'))
|
||||||
|
@ -65,7 +65,8 @@ def debug(ioreg_to_tmp=False, buf=None):
|
|||||||
ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
|
ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
|
||||||
ioreg += Device.run_ioreg()
|
ioreg += Device.run_ioreg()
|
||||||
connected_devices = []
|
connected_devices = []
|
||||||
for dev in device_plugins():
|
for dev in sorted(device_plugins(), cmp=lambda
|
||||||
|
x,y:cmp(x.__class__.__name__, y.__class__.__name__)):
|
||||||
out('Looking for', dev.__class__.__name__)
|
out('Looking for', dev.__class__.__name__)
|
||||||
connected, det = s.is_device_connected(dev, debug=True)
|
connected, det = s.is_device_connected(dev, debug=True)
|
||||||
if connected:
|
if connected:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user