#!/usr/bin/env python __license__ = 'GPL v3' __author__ = 'Tony Stegall' __copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com' __version__ = '1.03' __date__ = '27, September 2010' __docformat__ = 'restructuredtext en' import datetime from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1282101454(BasicNewsRecipe): now = datetime.datetime.now() title = 'The AJC' timefmt = ' [%a,%d %B %Y %I:%M %p]' __author__ = 'TonytheBookworm' language = 'en' description = 'News from Atlanta and USA' publisher = 'The Atlanta Journal' category = 'news, politics, USA' oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif' extra_css = ''' h1.articleHeadline{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} h2.articleSubheadline{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} p.byline{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;} p.organization{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;} p{font-family:Helvetica,Arial,sans-serif;font-size:small;} ''' keep_only_tags = [ dict(name='div', attrs={'class':['cxArticleHeader']}) ,dict(attrs={'id':['cxArticleText']}) ] remove_tags = [ dict(name='div' , attrs={'class':'cxArticleList' }) ,dict(name='div' , attrs={'class':'cxFeedTease' }) ,dict(name='div' , attrs={'class':'cxElementEnlarge' }) ,dict(name='div' , attrs={'id':'cxArticleTools' }) ] feeds = [ ('Breaking News', 'http://www.ajc.com/genericList-rss.do?source=61499'), # ------------------------------------------------------------------- # Here are the different area feeds. Choose which ever one you wish to # read by simply removing the pound sign from it. I currently have it # set to only get the Cobb area # -------------------------------------------------------------------- #('Atlanta & Fulton', 'http://www.ajc.com/section-rss.do?source=atlanta'), #('Clayton', 'http://www.ajc.com/section-rss.do?source=clayton'), #('DeKalb', 'http://www.ajc.com/section-rss.do?source=dekalb'), #('Gwinnett', 'http://www.ajc.com/section-rss.do?source=gwinnett'), #('North Fulton', 'http://www.ajc.com/section-rss.do?source=north-fulton'), #('Metro', 'http://www.ajc.com/section-rss.do?source=news'), #('Cherokee', 'http://www.ajc.com/section-rss.do?source=cherokee'), ('Cobb', 'http://www.ajc.com/section-rss.do?source=cobb'), #('Fayette', 'http://www.ajc.com/section-rss.do?source=fayette'), #('Henry', 'http://www.ajc.com/section-rss.do?source=henry'), #('Q & A', 'http://www.ajc.com/genericList-rss.do?source=77197'), ('Opinions', 'http://www.ajc.com/section-rss.do?source=opinion'), ('Ga Politics', 'http://www.ajc.com/section-rss.do?source=georgia-politics-elections'), # ------------------------------------------------------------------------ # Here are the different sports feeds. I only follow the Falcons, and Highschool # but again # You can enable which ever team you like by removing the pound sign # ------------------------------------------------------------------------ #('Sports News', 'http://www.ajc.com/genericList-rss.do?source=61510'), #('Braves', 'http://www.ajc.com/genericList-rss.do?source=61457'), ('Falcons', 'http://www.ajc.com/genericList-rss.do?source=61458'), #('Hawks', 'http://www.ajc.com/genericList-rss.do?source=61522'), #('Dawgs', 'http://www.ajc.com/genericList-rss.do?source=61492'), #('Yellowjackets', 'http://www.ajc.com/genericList-rss.do?source=61523'), ('Highschool', 'http://www.ajc.com/section-rss.do?source=high-school'), ('Events', 'http://www.accessatlanta.com/section-rss.do?source=events'), ('Music', 'http://www.accessatlanta.com/section-rss.do?source=music'), ] def postprocess_html(self, soup, first): for credit_tag in soup.findAll('span', attrs={'class':['imageCredit rightFloat']}): credit_tag.extract() return soup #def print_version(self, url): # return url.partition('?')[0] +'?printArticle=y'