From 3a5de491d08eeeb83b65c22e772e235d2dbce579 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 16 Oct 2010 22:13:39 -0600 Subject: [PATCH] Malaysian Mirror by Tony Stegall --- resources/recipes/malaysian_mirror.recipe | 89 +++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 resources/recipes/malaysian_mirror.recipe diff --git a/resources/recipes/malaysian_mirror.recipe b/resources/recipes/malaysian_mirror.recipe new file mode 100644 index 0000000000..e61538431a --- /dev/null +++ b/resources/recipes/malaysian_mirror.recipe @@ -0,0 +1,89 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Tony Stegall' +__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com' +__version__ = '1' +__date__ = '16, October 2010' +__docformat__ = 'English' + + + +from calibre.web.feeds.news import BasicNewsRecipe + +class MalaysianMirror(BasicNewsRecipe): + title = 'MalaysianMirror' + __author__ = 'Tonythebookworm' + description = 'The Pulse of the Nation' + language = 'en' + no_stylesheets = True + publisher = 'Tonythebookworm' + category = 'news' + use_embedded_content= False + no_stylesheets = True + oldest_article = 24 + + remove_javascript = True + remove_empty_feeds = True + conversion_options = {'linearize_tables' : True} + extra_css = ''' + #content_heading{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + + td{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;} + + #content_body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [dict(name='table', attrs={'class':['contentpaneopen']}) + ] + remove_tags = [dict(name='table', attrs={'class':['buttonheading']})] + ####################################################################################################################### + + + max_articles_per_feed = 10 + + ''' + Make a variable that will hold the url for the main site because our links do not include the index + ''' + + INDEX = 'http://www.malaysianmirror.com' + + + + + def parse_index(self): + feeds = [] + for title, url in [ + (u"Media Buzz", u"http://www.malaysianmirror.com/media-buzz-front"), + (u"Life Style", u"http://www.malaysianmirror.com/lifestylefront"), + (u"Features", u"http://www.malaysianmirror.com/featurefront"), + + + ]: + articles = self.make_links(url) + if articles: + feeds.append((title, articles)) + return feeds + + def make_links(self, url): + title = 'Temp' + current_articles = [] + soup = self.index_to_soup(url) + # print 'The soup is: ', soup + for item in soup.findAll('div', attrs={'class':'contentheading'}): + #print 'item is: ', item + link = item.find('a') + #print 'the link is: ', link + if link: + url = self.INDEX + link['href'] + title = self.tag_to_string(link) + #print 'the title is: ', title + #print 'the url is: ', url + #print 'the title is: ', title + current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this + return current_articles + + def preprocess_html(self, soup): + for item in soup.findAll(attrs={'style':True}): + del item['style'] + return soup +