mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
American Prospect, FactCheck, PolitiFact by Michael Heinz
This commit is contained in:
parent
e1b988598c
commit
b1287f0a51
26
resources/recipes/aprospect.recipe
Executable file
26
resources/recipes/aprospect.recipe
Executable file
@ -0,0 +1,26 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AmericanProspect(BasicNewsRecipe):
|
||||
title = u'American Prospect'
|
||||
__author__ = u'Michael Heinz'
|
||||
oldest_article = 30
|
||||
language = 'en'
|
||||
max_articles_per_feed = 100
|
||||
recursions = 0
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<body.*?<div class="pad_10L10R">', re.DOTALL|re.IGNORECASE), lambda match: '<body><div>'),
|
||||
(re.compile(r'</div>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</div></body>'),
|
||||
(re.compile('\r'),lambda match: ''),
|
||||
(re.compile(r'<!-- .+? -->', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<link .+?>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<script.*?</script>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<noscript.*?</noscript>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<meta .*?/>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')]
|
||||
|
19
resources/recipes/factcheck.recipe
Normal file
19
resources/recipes/factcheck.recipe
Normal file
@ -0,0 +1,19 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FactCheckOrg(BasicNewsRecipe):
|
||||
title = u'Factcheck'
|
||||
__author__ = u'Michael Heinz'
|
||||
language = 'en'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
recursion = 0
|
||||
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://factcheck.org/wp-content/themes/Streamline/images/headernew.jpg'
|
||||
cover_url = 'http://factcheck.org/wp-content/themes/Streamline/images/headernew.jpg'
|
||||
|
||||
remove_tags = [ dict({'id':['footer','footerabout','sidebar']}) ]
|
||||
|
||||
|
||||
feeds = [(u'Factcheck', u'feed://www.factcheck.org/feed/')]
|
||||
|
30
resources/recipes/politifact.recipe
Normal file
30
resources/recipes/politifact.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
from calibre.wb.feeds.news import BasicNewsRecipe
|
||||
|
||||
class PolitiFactCom(BasicNewsRecipe):
|
||||
title = u'Politifact'
|
||||
__author__ = u'Michael Heinz'
|
||||
oldest_article = 21
|
||||
max_articles_per_feed = 100
|
||||
recursion = 0
|
||||
language = 'en'
|
||||
|
||||
no_stylesheets = True
|
||||
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://static.politifact.com.s3.amazonaws.com/images/politifactdotcom-flag-fff_01.png'
|
||||
cover_url = 'http://static.politifact.com.s3.amazonaws.com/images/politifactdotcom-flag-fff_01.png'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'pfstoryarchive'}),
|
||||
dict(name='div', attrs={'class':'pfhead'}),
|
||||
dict(name='div', attrs={'class':'boxmid'}),
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'pfcontentleft'})]
|
||||
feeds = [
|
||||
(u'Articles', u'http://www.politifact.com/feeds/articles/truth-o-meter/'),
|
||||
(u'Obamameter', u'http://politifact.com/feeds/updates/'),
|
||||
(u'Statements', u'http://www.politifact.com/feeds/statements/truth-o-meter/')
|
||||
]
|
||||
|
||||
|
@ -34,6 +34,6 @@ class Worker(threading.Thread):
|
||||
def run(self):
|
||||
'''Thread loops taking jobs from the queue as they become available'''
|
||||
while True:
|
||||
job = self.jobs.get(True, None)
|
||||
self.jobs.get(True, None)
|
||||
# Do job
|
||||
self.jobs.task_done()
|
@ -354,7 +354,6 @@ if another paragraph_def is found, the state changes to collect_tokens.
|
||||
def __tab_stop_func(self, line):
|
||||
"""
|
||||
"""
|
||||
type = 'tabs-%s' % self.__tab_type
|
||||
self.__att_val_dict['tabs'] += '%s:' % self.__tab_type
|
||||
self.__att_val_dict['tabs'] += '%s;' % line[20:-1]
|
||||
self.__tab_type = 'left'
|
||||
@ -373,7 +372,6 @@ if another paragraph_def is found, the state changes to collect_tokens.
|
||||
"""
|
||||
leader = self.__tab_type_dict.get(self.__token_info)
|
||||
if leader != None:
|
||||
type = 'tabs-%s' % self.__tab_type
|
||||
self.__att_val_dict['tabs'] += '%s^' % leader
|
||||
else:
|
||||
if self.__run_level > 3:
|
||||
|
@ -318,7 +318,6 @@ class Styles:
|
||||
Try to add the number to dictionary entry tabs-left, or tabs-right, etc.
|
||||
If the dictionary entry doesn't exist, create one.
|
||||
"""
|
||||
type = 'tabs-%s' % self.__tab_type
|
||||
try:
|
||||
if self.__leader_found:
|
||||
self.__styles_dict['par'][self.__styles_num]['tabs']\
|
||||
@ -362,7 +361,6 @@ class Styles:
|
||||
leader = self.__tab_type_dict.get(self.__token_info)
|
||||
if leader != None:
|
||||
leader += '^'
|
||||
type = 'tabs-%s' % self.__tab_type
|
||||
try:
|
||||
self.__styles_dict['par'][self.__styles_num]['tabs'] += ':%s;' % leader
|
||||
except KeyError:
|
||||
|
Loading…
x
Reference in New Issue
Block a user