From 45ed449af18527de267b49b987e64a2a0d8af791 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jun 2009 11:20:41 -0700 Subject: [PATCH] New recipe for The Budget Fashionista by Darko Miletic --- src/calibre/web/feeds/recipes/__init__.py | 1 + .../web/feeds/recipes/recipe_publico.py | 6 +- .../recipes/recipe_the_budget_fashionista.py | 63 +++++++++++++++++++ 3 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 src/calibre/web/feeds/recipes/recipe_the_budget_fashionista.py diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 31ce75356c..a6dabf230f 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -45,6 +45,7 @@ recipe_modules = ['recipe_' + r for r in ( 'straitstimes', 'index_hu', 'pcworld_hu', 'hrt', 'rts', 'h1', 'h2', 'h3', 'phd_comics', 'woz_die', 'elektrolese', 'climate_progress', 'carta', 'slashdot', 'publico', + 'the_budget_fashionista' )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_publico.py b/src/calibre/web/feeds/recipes/recipe_publico.py index dd63949d30..17e168955f 100644 --- a/src/calibre/web/feeds/recipes/recipe_publico.py +++ b/src/calibre/web/feeds/recipes/recipe_publico.py @@ -23,11 +23,11 @@ class Publico(BasicNewsRecipe): feeds = [ (u'Geral', u'http://feeds.feedburner.com/PublicoUltimaHora'), (u'Internacional', u'http://www.publico.clix.pt/rss.ashx?idCanal=11'), - (u'Política', u'http://www.publico.clix.pt/rss.ashx?idCanal=12'), - (u'Ciências', u'http://www.publico.clix.pt/rss.ashx?idCanal=13'), + (u'Pol\xc3\xadtica', u'http://www.publico.clix.pt/rss.ashx?idCanal=12'), + (u'Ci\xc3\xaancias', u'http://www.publico.clix.pt/rss.ashx?idCanal=13'), (u'Desporto', u'http://desporto.publico.pt/rss.ashx'), (u'Economia', u'http://www.publico.clix.pt/rss.ashx?idCanal=57'), - (u'Educação', u'http://www.publico.clix.pt/rss.ashx?idCanal=58'), + (u'Educa\xc3\xa7\xc3\xa3o', u'http://www.publico.clix.pt/rss.ashx?idCanal=58'), (u'Local', u'http://www.publico.clix.pt/rss.ashx?idCanal=59'), (u'Media e Tecnologia', u'http://www.publico.clix.pt/rss.ashx?idCanal=61'), (u'Sociedade', u'http://www.publico.clix.pt/rss.ashx?idCanal=62') diff --git a/src/calibre/web/feeds/recipes/recipe_the_budget_fashionista.py b/src/calibre/web/feeds/recipes/recipe_the_budget_fashionista.py new file mode 100644 index 0000000000..113cf9ce43 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_the_budget_fashionista.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.thebudgetfashionista.com +''' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class TheBudgetFashionista(BasicNewsRecipe): + title = 'The Budget Fashionista' + __author__ = 'Darko Miletic' + description = 'Saving your money since 2003' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + publisher = 'TBF GROUP, LLC.' + category = 'news, fashion, comsetics, women' + lang = 'en-US' + language = _('English') + + preprocess_regexps = [(re.compile(r"{0,1}", re.DOTALL|re.IGNORECASE),lambda match: '')] + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [dict(name='div', attrs={'id':'singlepost'})] + remove_tags_after = dict(name='div', attrs={'id':'postnav'}) + remove_tags = [ + dict(name=['object','link','script','iframe','form']) + ,dict(name='div', attrs={'id':'postnav'}) + ] + + feeds = [(u'Articles', u'http://www.thebudgetfashionista.com/feeds/atom/')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + + def postprocess_html(self, soup, x): + body = soup.find('body') + post = soup.find('div', attrs={'id':'singlepost'}) + if post and body: + post.extract() + body.extract() + soup.html.append(body) + body.insert(1,post) + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return self.adeify_images(soup)