calibre/recipes/upi.recipe

#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'

import re
from calibre.web.feeds.news import BasicNewsRecipe


class UnitedPressInternational(BasicNewsRecipe):

    title = 'United Press International'
    max_articles_per_feed = 15
    html2lrf_options = [
        '--override-css= "H1 {font-family: Arial; font-weight: bold; color: #000000; size: 10pt;}"']
    language = 'en'

    preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
                          [
        (r'<HEAD>.*?</HEAD>', lambda match: '<HEAD></HEAD>'),
        (r'<div id="apple-rss-sidebar-background">.*?<!-- start Entries -->',
         lambda match: ''),
        (r'<!-- end apple-rss-content-area -->.*?</body>', lambda match: '</body>'),
        (r'<script.*?>.*?</script>', lambda match: ''),
        (r'<body onload=.*?>.*?<a href="http://www.upi.com">',
         lambda match: '<body style="font: 8pt arial;">'),
        (r'<script src="http://www.g.*?>.*?</body>', lambda match: ''),
        (r'<span style="font: 16pt arial',
         lambda match: '<span style="font: 12pt arial'),
    ]
    ]

    def get_feeds(self):
        return [('Top Stories', 'http://www.upi.com/rss/NewsTrack/Top_News/'),
                ('Science', 'http://www.upi.com/rss/NewsTrack/Science/'),
                ('Heatlth', 'http://www.upi.com/rss/NewsTrack/Health/'),
                ('Quirks', 'http://www.upi.com/rss/NewsTrack/Quirks/'),
                ]

    def print_version(self, url):
        return url + 'print/'