diff --git a/recipes/fairbanks_daily.recipe b/recipes/fairbanks_daily.recipe
new file mode 100644
index 0000000000..282925728e
--- /dev/null
+++ b/recipes/fairbanks_daily.recipe
@@ -0,0 +1,128 @@
+#import re # Provides preprocess_regexps re.compile
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class FairbanksDailyNewsminer(BasicNewsRecipe):
+ title = u'Fairbanks Daily News-miner'
+ __author__ = 'Roger'
+ oldest_article = 7
+ max_articles_per_feed = 100
+
+ description = ''''The voice of interior Alaska since 1903'''
+ publisher = 'http://www.newsminer.com/'
+ category = 'news, Alaska, Fairbanks'
+ language = 'en'
+ #extra_css = '''
+ # p{font-weight: normal;text-align: justify}
+ # '''
+
+ remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+ language = 'en'
+ encoding = 'utf8'
+ conversion_options = {'linearize_tables':True}
+ # TODO: I don't see any photos in my Mobi file with this masterhead_url!
+ masthead_url = 'http://d2uh5w9wm14i0w.cloudfront.net/sites/635/assets/top_masthead_-_menu_pic.jpg'
+
+
+ # In order to omit seeing number of views, number of posts and the pipe
+ # symbol for divider after the title and date of the article, a regex or
+ # manual processing is needed to get just the "story_item_date updated"
+ # (which contains the date). Everything else on this line is pretty much not needed.
+ #
+ # HTML line containing story_item_date:
+ #
Aug 22, 2011 | 2370 views | 52

|
9 
|

|

+
+ # The following was suggested, but it looks like I also need to define self & soup
+ # (as well as bring in extra soup depends?)
+ #date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
+
+ #preprocess_regexps = [(re.compile(r']*addthis_separator*>'), lambda match: '') ]
+ #preprocess_regexps = [(re.compile(r'span class="addthis_separator">|'), lambda match: '') ]
+
+ #preprocess_regexps = [
+ # (re.compile(r'.*?', re.IGNORECASE | re.DOTALL), lambda match : ''),
+ # ]
+
+ #def get_browser(self):
+ #def preprocess_html(soup, first_fetch):
+ # date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
+ # return
+
+
+ # Try to keep some tags - some might not be needed here
+ keep_only_tags = [
+ #date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'})),
+ dict(name='div', attrs={'class':'hnews hentry item'}),
+ dict(name='div', attrs={'class':'story_item_headline entry-title'}),
+ #dict(name='span', attrs={'class':'story_item_date updated'}),
+ dict(name='div', attrs={'class':'full_story'})
+ ]
+ #remove_tags = [
+ # dict(name='div', attrs={'class':'story_tools'}),
+ # dict(name='p', attrs={'class':'ad_label'}),
+ # ]
+
+ # Try to remove some bothersome tags
+ remove_tags = [
+ #dict(name='img', attrs={'alt'}),
+ dict(name='img', attrs={'class':'dont_touch_me'}),
+ dict(name='span', attrs={'class':'number_recommendations'}),
+ #dict(name='div', attrs={'class':'signature_line'}),
+ dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
+ dict(name='div', attrs={'class':['addthis_toolbox','addthis_default_style']}),
+ dict(name='span', attrs={'class':'addthis_separator'}),
+ dict(name='div', attrs={'class':'related_content'}),
+ dict(name='div', attrs={'class':'comments_container'}),
+ #dict(name='div', attrs={'class':'signature_line'}),
+ dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
+ dict(name='div', attrs={'id':'comments_container'})
+ ]
+
+
+ # This one works but only gets title, date and clips article content!
+ #remove_tags_after = [
+ # dict(name='span', attrs={'class':'story_item_date updated'})
+ # ]
+
+ #remove_tags_after = [
+ # dict(name='div', attrs={'class':'advertisement'}),
+ # ]
+
+ # Try clipping tags before and after to prevent pulling img views/posts numbers after date?
+ #remove_tags_before = [
+ # dict(name='span', attrs={'class':'story_item_date updated'})
+ # ]
+
+ #extra_css # tweak the appearance # TODO: Change article titles to bold?
+
+
+ # Comment-out or uncomment any of the following RSS feeds according to your
+ # liking.
+ #
+ # TODO: Adding more then one RSS Feed, and newline will be omitted for
+ # entries within the Table of Contents or Index of Articles
+ #
+ # TODO: Some random bits of text is trailing the last page (or TOC on MOBI
+ # files), these are bits of public posts and comments and need to also be
+ # removed.
+ #
+ feeds = [
+ (u'Alaska News', u'http://newsminer.com/rss/rss_feeds/alaska_news?content_type=article&tags=alaska_news&page_name=rss_feeds&instance=alaska_news'),
+ (u'Local News', u'http://newsminer.com/rss/rss_feeds/local_news?content_type=article&tags=local_news&page_name=rss_feeds&offset=0&instance=local_news'),
+ (u'Business', u'http://newsminer.com/rss/rss_feeds/business_news?content_type=article&tags=business_news&page_name=rss_feeds&instance=business_news'),
+ (u'Politics', u'http://newsminer.com/rss/rss_feeds/politics_news?content_type=article&tags=politics_news&page_name=rss_feeds&instance=politics_news'),
+ (u'Sports', u'http://newsminer.com/rss/rss_feeds/sports_news?content_type=article&tags=sports_news&page_name=rss_feeds&instance=sports_news'),
+ # (u'Latitude 65 feed', u'http://newsminer.com/rss/rss_feeds/latitude_65?content_type=article&tags=latitude_65&page_name=rss_feeds&offset=0&instance=latitude_65'),
+ (u'Sundays', u'http://newsminer.com/rss/rss_feeds/Sundays?content_type=article&tags=alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Sundays'),
+ # (u'Outdoors', u'http://newsminer.com/rss/rss_feeds/Outdoors?content_type=article&tags=outdoors&page_name=rss_feeds&instance=Outdoors'),
+ # (u'Fairbanks Grizzlies', u'http://newsminer.com/rss/rss_feeds/fairbanks_grizzlies?content_type=article&tags=fairbanks_grizzlies&page_name=rss_feeds&instance=fairbanks_grizzlies'),
+ (u'Newsminer', u'http://newsminer.com/rss/rss_feeds/Newsminer?content_type=article&tags=ted_stevens_bullets+ted_stevens+sports_news+business_news+fairbanks_grizzlies+dermot_cole_column+outdoors+alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Newsminer'),
+ # (u'Opinion', u'http://newsminer.com/rss/rss_feeds/Opinion?content_type=article&tags=editorials&page_name=rss_feeds&instance=Opinion'),
+ # (u'Youth', u'http://newsminer.com/rss/rss_feeds/Youth?content_type=article&tags=youth&page_name=rss_feeds&instance=Youth'),
+ # (u'Dermot Cole Blog', u'http://newsminer.com/rss/rss_feeds/dermot_cole_blog+rss?content_type=blog+entry&sort_by=posted_on&user_ids=3015275&page_name=blogs_dermot_cole&limit=10&instance=dermot_cole_blog+rss'),
+ # (u'Dermot Cole Column', u'http://newsminer.com/rss/rss_feeds/Dermot_Cole_column?content_type=article&tags=dermot_cole_column&page_name=rss_feeds&instance=Dermot_Cole_column'),
+ (u'Sarah Palin', u'http://newsminer.com/rss/rss_feeds/sarah_palin?content_type=article&tags=palin_in_the_news+palin_on_the_issues&page_name=rss_feeds&tag_inclusion=or&instance=sarah_palin')
+ ]
+
diff --git a/recipes/msdnmag_en.recipe b/recipes/msdnmag_en.recipe
index 341ca027f6..cf9cfc4f6a 100644
--- a/recipes/msdnmag_en.recipe
+++ b/recipes/msdnmag_en.recipe
@@ -6,7 +6,7 @@ __copyright__ = '2009, Darko Miletic '
msdn.microsoft.com/en-us/magazine
'''
from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
class MSDNMagazine_en(BasicNewsRecipe):
title = 'MSDN Magazine'
@@ -21,7 +21,7 @@ class MSDNMagazine_en(BasicNewsRecipe):
use_embedded_content = False
encoding = 'utf-8'
language = 'en'
-
+
base_url = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
rss_url = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
@@ -32,15 +32,15 @@ class MSDNMagazine_en(BasicNewsRecipe):
dict(name='div', attrs={'class':'DivRatingsOnly'})
,dict(name='div', attrs={'class':'ShareThisButton4'})
]
-
+
def find_articles(self):
idx_contents = self.browser.open(self.rss_url).read()
idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
-
+
for article in idx.findAll('item'):
desc_html = self.tag_to_string(article.find('description'))
description = self.tag_to_string(BeautifulSoup(desc_html))
-
+
a = {
'title': self.tag_to_string(article.find('title')),
'url': self.tag_to_string(article.find('link')),
@@ -52,14 +52,14 @@ class MSDNMagazine_en(BasicNewsRecipe):
def parse_index(self):
soup = self.index_to_soup(self.base_url)
-
+
#find issue name, eg "August 2011"
issue_name = self.tag_to_string(soup.find('h1'))
-
+
# find cover pic
img = soup.find('img',attrs ={'alt':issue_name})
if img is not None:
self.cover_url = img['src']
return [(issue_name, list(self.find_articles()))]
-
+