American Prospect, FactCheck, PolitiFact by Michael Heinz

2025-07-09 03:04:10 -04:00 · 2010-05-18 18:03:16 -06:00 · 2010-05-18 18:03:16 -06:00 · b1287f0a51
commit b1287f0a51
parent e1b988598c
6 changed files with 82 additions and 11 deletions
--- a/resources/recipes/aprospect.recipe
+++ b/resources/recipes/aprospect.recipe
@ -0,0 +1,26 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class AmericanProspect(BasicNewsRecipe):
    title          = u'American Prospect'
    __author__     = u'Michael Heinz'
    oldest_article = 30
    language = 'en'
    max_articles_per_feed = 100
    recursions = 0
    no_stylesheets = True
    remove_javascript = True
    preprocess_regexps = [
        (re.compile(r'<body.*?<div class="pad_10L10R">', re.DOTALL|re.IGNORECASE), lambda match: '<body><div>'),
        (re.compile(r'</div>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</div></body>'),
        (re.compile('\r'),lambda match: ''),
        (re.compile(r'<!-- .+? -->', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<link .+?>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<script.*?</script>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<noscript.*?</noscript>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<meta .*?/>', re.DOTALL|re.IGNORECASE), lambda match: ''),
    ]
    feeds       = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')]
--- a/resources/recipes/factcheck.recipe
+++ b/resources/recipes/factcheck.recipe
@ -0,0 +1,19 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class FactCheckOrg(BasicNewsRecipe):
    title          = u'Factcheck'
    __author__     = u'Michael Heinz'
    language = 'en'
    oldest_article = 7
    max_articles_per_feed = 100
    recursion = 0
    publication_type = 'magazine'
    masthead_url = 'http://factcheck.org/wp-content/themes/Streamline/images/headernew.jpg'
    cover_url = 'http://factcheck.org/wp-content/themes/Streamline/images/headernew.jpg'
    remove_tags = [ dict({'id':['footer','footerabout','sidebar']}) ]
    feeds          = [(u'Factcheck', u'feed://www.factcheck.org/feed/')]
--- a/resources/recipes/politifact.recipe
+++ b/resources/recipes/politifact.recipe
@ -0,0 +1,30 @@
 from calibre.wb.feeds.news import BasicNewsRecipe
 class PolitiFactCom(BasicNewsRecipe):
    title          = u'Politifact'
    __author__     = u'Michael Heinz'
    oldest_article = 21
    max_articles_per_feed = 100
    recursion = 0
    language = 'en'
    no_stylesheets = True
    publication_type = 'magazine'
    masthead_url = 'http://static.politifact.com.s3.amazonaws.com/images/politifactdotcom-flag-fff_01.png'
    cover_url = 'http://static.politifact.com.s3.amazonaws.com/images/politifactdotcom-flag-fff_01.png'
    remove_tags = [
                     dict(name='div', attrs={'class':'pfstoryarchive'}),
                     dict(name='div', attrs={'class':'pfhead'}),
                     dict(name='div', attrs={'class':'boxmid'}),
                  ]
    keep_only_tags = [dict(name='div', attrs={'class':'pfcontentleft'})]
    feeds          = [
                     (u'Articles', u'http://www.politifact.com/feeds/articles/truth-o-meter/'),
                     (u'Obamameter', u'http://politifact.com/feeds/updates/'),
                     (u'Statements', u'http://www.politifact.com/feeds/statements/truth-o-meter/')
                     ]
--- a/src/calibre/devices/manager.py
+++ b/src/calibre/devices/manager.py
@ -10,7 +10,7 @@ import threading, Queue
 class DeviceManager(object):
-    
+
    def __init__(self):
        self.devices = []
        self.device_jobs = Queue(0)
@ -21,19 +21,19 @@ class Job(object):
    def __init__(self, func, args):
        self.completed = False
        self.exception = None
-        
+
 class Worker(threading.Thread):
-    
+
-    def __init__(self, jobs):        
+    def __init__(self, jobs):
        self.jobs = jobs
        self.results = []
        threading.Thread.__init__(self)
        self.setDaemon(True)
-        
+
    def run(self):
        '''Thread loops taking jobs from the queue as they become available'''
        while True:
-            job = self.jobs.get(True, None)
+            self.jobs.get(True, None)
            # Do job
-            self.jobs.task_done()
+            self.jobs.task_done()
--- a/src/calibre/ebooks/rtf2xml/paragraph_def.py
+++ b/src/calibre/ebooks/rtf2xml/paragraph_def.py
@ -354,7 +354,6 @@ if another paragraph_def is found, the state changes to collect_tokens.
    def __tab_stop_func(self, line):
        """
        """
        type = 'tabs-%s' % self.__tab_type
        self.__att_val_dict['tabs'] += '%s:' % self.__tab_type
        self.__att_val_dict['tabs'] += '%s;' % line[20:-1]
        self.__tab_type = 'left'
@ -373,7 +372,6 @@ if another paragraph_def is found, the state changes to collect_tokens.
        """
        leader = self.__tab_type_dict.get(self.__token_info)
        if leader != None:
            type = 'tabs-%s' % self.__tab_type
            self.__att_val_dict['tabs'] += '%s^' % leader
        else:
            if self.__run_level > 3:
--- a/src/calibre/ebooks/rtf2xml/styles.py
+++ b/src/calibre/ebooks/rtf2xml/styles.py
@ -318,7 +318,6 @@ class Styles:
            Try to add the number to dictionary entry tabs-left, or tabs-right, etc.
            If the dictionary entry doesn't exist, create one.
        """
        type = 'tabs-%s' % self.__tab_type
        try:
            if self.__leader_found:
                self.__styles_dict['par'][self.__styles_num]['tabs']\
@ -362,7 +361,6 @@ class Styles:
        leader = self.__tab_type_dict.get(self.__token_info)
        if leader != None:
            leader += '^'
            type = 'tabs-%s' % self.__tab_type
            try:
                self.__styles_dict['par'][self.__styles_num]['tabs'] += ':%s;' % leader
            except KeyError: