mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
113 lines
5.1 KiB
Python
113 lines
5.1 KiB
Python
#!/usr/bin/env python
|
|
__license__ = 'GPL v3'
|
|
__author__ = 'Tony Stegall'
|
|
__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
|
|
__version__ = '1.03'
|
|
__date__ = '27, September 2010'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
|
|
|
|
import datetime
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|
now = datetime.datetime.now()
|
|
title = 'The AJC'
|
|
timefmt = ' [%a,%d %B %Y %I:%M %p]'
|
|
__author__ = 'TonytheBookworm'
|
|
language = 'en'
|
|
description = 'News from Atlanta and USA'
|
|
publisher = 'The Atlanta Journal'
|
|
category = 'news, politics, USA'
|
|
oldest_article = 1
|
|
max_articles_per_feed = 100
|
|
no_stylesheets = True
|
|
|
|
masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
|
|
extra_css = '''
|
|
h1.articleHeadline{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
|
h2.articleSubheadline{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
|
|
|
p.byline{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
|
|
p.organization{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
|
|
|
|
|
|
p{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
|
'''
|
|
|
|
|
|
keep_only_tags = [
|
|
dict(name='div', attrs={'class':['cxArticleHeader']})
|
|
,dict(attrs={'id':['cxArticleText']})
|
|
]
|
|
|
|
|
|
remove_tags = [
|
|
dict(name='div' , attrs={'class':'cxArticleList' })
|
|
,dict(name='div' , attrs={'class':'cxFeedTease' })
|
|
,dict(name='div' , attrs={'class':'cxElementEnlarge' })
|
|
,dict(name='div' , attrs={'id':'cxArticleTools' })
|
|
]
|
|
|
|
|
|
|
|
feeds = [
|
|
('Breaking News', 'http://www.ajc.com/genericList-rss.do?source=61499'),
|
|
# -------------------------------------------------------------------
|
|
# Here are the different area feeds. Choose which ever one you wish to
|
|
# read by simply removing the pound sign from it. I currently have it
|
|
# set to only get the Cobb area
|
|
# --------------------------------------------------------------------
|
|
#('Atlanta & Fulton', 'http://www.ajc.com/section-rss.do?source=atlanta'),
|
|
#('Clayton', 'http://www.ajc.com/section-rss.do?source=clayton'),
|
|
#('DeKalb', 'http://www.ajc.com/section-rss.do?source=dekalb'),
|
|
#('Gwinnett', 'http://www.ajc.com/section-rss.do?source=gwinnett'),
|
|
#('North Fulton', 'http://www.ajc.com/section-rss.do?source=north-fulton'),
|
|
#('Metro', 'http://www.ajc.com/section-rss.do?source=news'),
|
|
#('Cherokee', 'http://www.ajc.com/section-rss.do?source=cherokee'),
|
|
('Cobb', 'http://www.ajc.com/section-rss.do?source=cobb'),
|
|
#('Fayette', 'http://www.ajc.com/section-rss.do?source=fayette'),
|
|
#('Henry', 'http://www.ajc.com/section-rss.do?source=henry'),
|
|
#('Q & A', 'http://www.ajc.com/genericList-rss.do?source=77197'),
|
|
('Opinions', 'http://www.ajc.com/section-rss.do?source=opinion'),
|
|
('Ga Politics', 'http://www.ajc.com/section-rss.do?source=georgia-politics-elections'),
|
|
# ------------------------------------------------------------------------
|
|
# Here are the different sports feeds. I only follow the Falcons, and Highschool
|
|
# but again
|
|
# You can enable which ever team you like by removing the pound sign
|
|
# ------------------------------------------------------------------------
|
|
#('Sports News', 'http://www.ajc.com/genericList-rss.do?source=61510'),
|
|
#('Braves', 'http://www.ajc.com/genericList-rss.do?source=61457'),
|
|
('Falcons', 'http://www.ajc.com/genericList-rss.do?source=61458'),
|
|
#('Hawks', 'http://www.ajc.com/genericList-rss.do?source=61522'),
|
|
#('Dawgs', 'http://www.ajc.com/genericList-rss.do?source=61492'),
|
|
#('Yellowjackets', 'http://www.ajc.com/genericList-rss.do?source=61523'),
|
|
('Highschool', 'http://www.ajc.com/section-rss.do?source=high-school'),
|
|
('Events', 'http://www.accessatlanta.com/section-rss.do?source=events'),
|
|
('Music', 'http://www.accessatlanta.com/section-rss.do?source=music'),
|
|
]
|
|
|
|
|
|
|
|
def postprocess_html(self, soup, first):
|
|
for credit_tag in soup.findAll('span', attrs={'class':['imageCredit rightFloat']}):
|
|
credit_tag.extract()
|
|
|
|
return soup
|
|
|
|
#def print_version(self, url):
|
|
# return url.partition('?')[0] +'?printArticle=y'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|