diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index d6ba724256..d5cd3c8cd7 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -54,6 +54,7 @@ recipe_modules = ['recipe_' + r for r in ( 'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna', 'eltiempo_hn', 'slate', 'tnxm', 'bbcvietnamese', 'vnexpress', 'volksrant', 'theeconomictimes_india', 'ourdailybread', + 'monitor', 'republika', )] diff --git a/src/calibre/web/feeds/recipes/recipe_monitor.py b/src/calibre/web/feeds/recipes/recipe_monitor.py new file mode 100644 index 0000000000..b2a6bd20a0 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_monitor.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +monitorcg.com +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag + +class MonitorCG(BasicNewsRecipe): + title = 'Monitor online' + __author__ = 'Darko Miletic' + description = 'News from Montenegro' + publisher = 'MONITOR d.o.o. Podgorica' + category = 'news, politics, Montenegro' + oldest_article = 15 + max_articles_per_feed = 150 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False + language = _('Montenegrin') + lang ='sr-Latn-Me' + INDEX = 'http://www.monitorcg.com' + + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : lang + , 'pretty_print' : True + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + keep_only_tags = [dict(name='div', attrs={'id':'ja-current-content'})] + + remove_tags = [ dict(name=['object','link','embed']) + , dict(attrs={'class':['buttonheading','article-section']})] + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return self.adeify_images(soup) + + def parse_index(self): + totalfeeds = [] + soup = self.index_to_soup(self.INDEX) + cover_item = soup.find('div',attrs={'class':'ja-catslwi'}) + if cover_item: + dt = cover_item['onclick'].partition("location.href=")[2] + curl = self.INDEX + dt.strip("'") + lfeeds = [(u'Svi clanci', curl)] + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + contitem = soup.find('div',attrs={'class':'article-content'}) + if contitem: + img = contitem.find('img') + if img: + self.cover_url = self.INDEX + img['src'] + for item in contitem.findAll('a'): + url = self.INDEX + item['href'] + title = self.tag_to_string(item) + articles.append({ + 'title' :title + ,'date' :'' + ,'url' :url + ,'description':'' + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds + + \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_republika.py b/src/calibre/web/feeds/recipes/recipe_republika.py new file mode 100644 index 0000000000..65577c9119 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_republika.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +republika.co.yu +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Republika(BasicNewsRecipe): + title = 'Republika' + __author__ = 'Darko Miletic' + description = 'Glasilo gradjanskog samooslobadjanja. Protiv stihije straha, mrznje i nasilja' + publisher = ' Zadruga Res Publica' + category = 'news, politics, Serbia' + language = _('Serbian') + lang = 'sr-Latn-RS' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'cp1250' + use_embedded_content = False + INDEX = u'http://www.republika.co.yu/' + extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .naslov{font-size: x-large; font-weight: bold} .autor{font-size: small; font-weight: bold} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : lang + , 'pretty_print' : True + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + keep_only_tags = [ dict(attrs={'class':'naslov'}) + , dict(attrs={'class':'text1'}) + ] + + remove_tags = [dict(name=['object','link','iframe','base','img'])] + + feeds = [(u'Svi clanci', INDEX)] + + def preprocess_html(self, soup): + attribs = [ 'style','font','valign' + ,'colspan','width','height' + ,'rowspan','summary','align' + ,'cellspacing','cellpadding' + ,'frames','rules','border' + ] + for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): + item.name = 'div' + for attrib in attribs: + if item.has_key(attrib): + del item[attrib] + return soup + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + for item in soup.findAll('a', attrs={'class':'naslovLink'}): + url = item['href'] + title = self.tag_to_string(item) + articles.append({ + 'title' :title + ,'date' :'' + ,'url' :url + ,'description':'' + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds +