mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
New recipe for The Chicago Tribune by Kovid Goyal
This commit is contained in:
parent
af8f3b56ce
commit
d221e7e448
@ -23,7 +23,6 @@ class Article(object):
|
|||||||
try:
|
try:
|
||||||
self.title = re.sub(r'&(\S+);',
|
self.title = re.sub(r'&(\S+);',
|
||||||
entity_to_unicode, self.title)
|
entity_to_unicode, self.title)
|
||||||
print 11111, repr(self.title)
|
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
self.url = url
|
self.url = url
|
||||||
|
@ -30,7 +30,7 @@ recipe_modules = ['recipe_' + r for r in (
|
|||||||
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
|
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
|
||||||
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
|
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
|
||||||
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
|
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
|
||||||
'la_republica', 'physics_today',
|
'la_republica', 'physics_today', 'chicago_tribune',
|
||||||
)]
|
)]
|
||||||
|
|
||||||
import re, imp, inspect, time, os
|
import re, imp, inspect, time, os
|
||||||
|
82
src/calibre/web/feeds/recipes/recipe_chicago_tribune.py
Normal file
82
src/calibre/web/feeds/recipes/recipe_chicago_tribune.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
from urlparse import urlparse, urlunparse
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
from threading import RLock
|
||||||
|
|
||||||
|
class ChicagoTribune(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Chicago Tribune'
|
||||||
|
__author__ = 'Kovid Goyal'
|
||||||
|
description = 'Politics, local and business news from Chicago'
|
||||||
|
language = _('English')
|
||||||
|
use_embedded_content = False
|
||||||
|
articles_are_obfuscated = True
|
||||||
|
remove_tags_before = dict(name='h1')
|
||||||
|
obfuctation_lock = RLock()
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
|
||||||
|
('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
|
||||||
|
('Nation/world', 'http://feeds.chicagotribune.com/chicagotribune/news/nationworld/'),
|
||||||
|
('Hot topics', 'http://feeds.chicagotribune.com/chicagotribune/hottopics/'),
|
||||||
|
('Most E-mailed stories', 'http://feeds.chicagotribune.com/chicagotribune/email/'),
|
||||||
|
('Opinion', 'http://feeds.chicagotribune.com/chicagotribune/opinion/'),
|
||||||
|
('Off Topic', 'http://feeds.chicagotribune.com/chicagotribune/offtopic/'),
|
||||||
|
('Politics', 'http://feeds.chicagotribune.com/chicagotribune/politics/'),
|
||||||
|
('Special Reports', 'http://feeds.chicagotribune.com/chicagotribune/special/'),
|
||||||
|
('Religion News', 'http://feeds.chicagotribune.com/chicagotribune/religion/'),
|
||||||
|
('Business news', 'http://feeds.chicagotribune.com/chicagotribune/business/'),
|
||||||
|
('Jobs and Careers', 'http://feeds.chicagotribune.com/chicagotribune/career/'),
|
||||||
|
('Local scene', 'http://feeds.chicagotribune.com/chicagohomes/localscene/'),
|
||||||
|
('Phil Rosenthal', 'http://feeds.chicagotribune.com/chicagotribune/rosenthal/'),
|
||||||
|
('Tech Buzz', 'http://feeds.chicagotribune.com/chicagotribune/techbuzz/'),
|
||||||
|
('Your Money', 'http://feeds.chicagotribune.com/chicagotribune/yourmoney/'),
|
||||||
|
('Jon Hilkevitch - Getting around', 'http://feeds.chicagotribune.com/chicagotribune/gettingaround/'),
|
||||||
|
('Jon Yates - What\'s your problem?', 'http://feeds.chicagotribune.com/chicagotribune/problem/'),
|
||||||
|
('Garisson Keillor', 'http://feeds.chicagotribune.com/chicagotribune/keillor/'),
|
||||||
|
('Marks Jarvis - On Money', 'http://feeds.chicagotribune.com/chicagotribune/marksjarvisonmoney/'),
|
||||||
|
('Sports', 'http://feeds.chicagotribune.com/chicagotribune/sports/'),
|
||||||
|
('Arts and Architecture', 'http://feeds.chicagotribune.com/chicagotribune/arts/'),
|
||||||
|
('Books', 'http://feeds.chicagotribune.com/chicagotribune/books/'),
|
||||||
|
('Magazine', 'http://feeds.chicagotribune.com/chicagotribune/magazine/'),
|
||||||
|
('Movies', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
|
||||||
|
('Music', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
|
||||||
|
('TV', 'http://feeds.chicagotribune.com/chicagotribune/tv/'),
|
||||||
|
('Hypertext', 'http://feeds.chicagotribune.com/chicagotribune/hypertext/'),
|
||||||
|
('iPhone Blog', 'http://feeds.feedburner.com/redeye/iphoneblog'),
|
||||||
|
('Julie\'s Health Club', 'http://feeds.chicagotribune.com/chicagotribune_julieshealthclub/'),
|
||||||
|
]
|
||||||
|
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||||
|
|
||||||
|
def get_obfuscated_article(self, url, logger):
|
||||||
|
with self.obfuctation_lock:
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
img = soup.find('img', alt='Print')
|
||||||
|
if img is not None:
|
||||||
|
a = img.parent.find('a', href=True)
|
||||||
|
purl = urlparse(url)
|
||||||
|
xurl = urlunparse(purl[:2] + (a['href'], '', '', ''))
|
||||||
|
soup = self.index_to_soup(xurl)
|
||||||
|
for img in soup.findAll('img', src=True):
|
||||||
|
if img['src'].startswith('/'):
|
||||||
|
img['src'] = urlunparse(purl[:2]+(img['src'], '', '', ''))
|
||||||
|
html = unicode(soup)
|
||||||
|
else:
|
||||||
|
h1 = soup.find(id='page-title')
|
||||||
|
body = soup.find(attrs={'class':re.compile('asset-content')})
|
||||||
|
html = u'<html><head/><body>%s</body></html>'%(unicode(h1)+unicode(body))
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_chicago_tribune.xhtml'))
|
||||||
|
self.temp_files[-1].write(html.encode('utf-8'))
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
return self.temp_files[-1].name
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user