mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	Improved Economist
This commit is contained in:
		
							parent
							
								
									47cee548dc
								
							
						
					
					
						commit
						5dfc08ed4e
					
				@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 | 
			
		||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
 | 
			
		||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 | 
			
		||||
 | 
			
		||||
import mechanize, string, urllib, time
 | 
			
		||||
import mechanize, string, urllib, time, re
 | 
			
		||||
 | 
			
		||||
class Economist(BasicNewsRecipe):
 | 
			
		||||
 | 
			
		||||
@ -27,6 +27,8 @@ class Economist(BasicNewsRecipe):
 | 
			
		||||
            dict(attrs={'class':['dblClkTrk']})]
 | 
			
		||||
    remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
 | 
			
		||||
    needs_subscription = True
 | 
			
		||||
    preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
 | 
			
		||||
        lambda x:'</html>')]
 | 
			
		||||
 | 
			
		||||
    def get_browser(self):
 | 
			
		||||
        br = BasicNewsRecipe.get_browser()
 | 
			
		||||
@ -111,11 +113,15 @@ class Economist(BasicNewsRecipe):
 | 
			
		||||
                yield x
 | 
			
		||||
 | 
			
		||||
    def postprocess_html(self, soup, first):
 | 
			
		||||
        body = soup.find('body')
 | 
			
		||||
        for name, val in body.attrs:
 | 
			
		||||
            del body[name]
 | 
			
		||||
 | 
			
		||||
        for table in list(self.eco_find_image_tables(soup)):
 | 
			
		||||
            caption = table.find('font')
 | 
			
		||||
            img = table.find('img')
 | 
			
		||||
            div = Tag(soup, 'div')
 | 
			
		||||
            div['style'] = 'text-align:center;font-size:70%'
 | 
			
		||||
            div['style'] = 'text-align:left;font-size:70%'
 | 
			
		||||
            ns = NavigableString(self.tag_to_string(caption))
 | 
			
		||||
            div.insert(0, ns)
 | 
			
		||||
            div.insert(1, Tag(soup, 'br'))
 | 
			
		||||
 | 
			
		||||
@ -1,7 +1,7 @@
 | 
			
		||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
			
		||||
from calibre.utils.threadpool import ThreadPool, makeRequests
 | 
			
		||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 | 
			
		||||
import time, string
 | 
			
		||||
import time, string, re
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from lxml import html
 | 
			
		||||
 | 
			
		||||
@ -19,9 +19,13 @@ class Economist(BasicNewsRecipe):
 | 
			
		||||
    remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
 | 
			
		||||
            dict(attrs={'class':['dblClkTrk']})]
 | 
			
		||||
    remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
 | 
			
		||||
    preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
 | 
			
		||||
        lambda x:'</html>')]
 | 
			
		||||
 | 
			
		||||
    def parse_index(self):
 | 
			
		||||
        from calibre.web.feeds.feedparser import parse
 | 
			
		||||
        if self.test:
 | 
			
		||||
            self.oldest_article = 14.0
 | 
			
		||||
        raw = self.index_to_soup(
 | 
			
		||||
                'http://feeds.feedburner.com/economist/full_print_edition',
 | 
			
		||||
                raw=True)
 | 
			
		||||
@ -44,6 +48,8 @@ class Economist(BasicNewsRecipe):
 | 
			
		||||
            author      = item.get('author', '')
 | 
			
		||||
 | 
			
		||||
            requests.append([i, link, title, description, author, published])
 | 
			
		||||
        if self.test:
 | 
			
		||||
            requests = requests[:4]
 | 
			
		||||
        requests = makeRequests(self.process_eco_feed_article, requests, self.eco_article_found,
 | 
			
		||||
                self.eco_article_failed)
 | 
			
		||||
        for r in requests: pool.putRequest(r)
 | 
			
		||||
@ -114,11 +120,14 @@ class Economist(BasicNewsRecipe):
 | 
			
		||||
                yield x
 | 
			
		||||
 | 
			
		||||
    def postprocess_html(self, soup, first):
 | 
			
		||||
        body = soup.find('body')
 | 
			
		||||
        for name, val in body.attrs:
 | 
			
		||||
            del body[name]
 | 
			
		||||
        for table in list(self.eco_find_image_tables(soup)):
 | 
			
		||||
            caption = table.find('font')
 | 
			
		||||
            img = table.find('img')
 | 
			
		||||
            div = Tag(soup, 'div')
 | 
			
		||||
            div['style'] = 'text-align:center;font-size:70%'
 | 
			
		||||
            div['style'] = 'text-align:left;font-size:70%'
 | 
			
		||||
            ns = NavigableString(self.tag_to_string(caption))
 | 
			
		||||
            div.insert(0, ns)
 | 
			
		||||
            div.insert(1, Tag(soup, 'br'))
 | 
			
		||||
 | 
			
		||||
@ -65,7 +65,8 @@ def debug(ioreg_to_tmp=False, buf=None):
 | 
			
		||||
            ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
 | 
			
		||||
            ioreg += Device.run_ioreg()
 | 
			
		||||
        connected_devices = []
 | 
			
		||||
        for dev in device_plugins():
 | 
			
		||||
        for dev in sorted(device_plugins(), cmp=lambda
 | 
			
		||||
                x,y:cmp(x.__class__.__name__, y.__class__.__name__)):
 | 
			
		||||
            out('Looking for', dev.__class__.__name__)
 | 
			
		||||
            connected, det = s.is_device_connected(dev, debug=True)
 | 
			
		||||
            if connected:
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user