diff --git a/resources/images/news/biggovernment.png b/resources/images/news/biggovernment.png new file mode 100644 index 0000000000..d5c2442ebb Binary files /dev/null and b/resources/images/news/biggovernment.png differ diff --git a/resources/images/news/eluniversal.png b/resources/images/news/eluniversal.png new file mode 100644 index 0000000000..cd970ab9e5 Binary files /dev/null and b/resources/images/news/eluniversal.png differ diff --git a/resources/images/news/propublica.png b/resources/images/news/propublica.png new file mode 100644 index 0000000000..02954be4ea Binary files /dev/null and b/resources/images/news/propublica.png differ diff --git a/resources/recipes/biggovernment.recipe b/resources/recipes/biggovernment.recipe new file mode 100644 index 0000000000..f14b78f1b8 --- /dev/null +++ b/resources/recipes/biggovernment.recipe @@ -0,0 +1,28 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +class BigGovernmentRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'en_US' + version = 1 + + title = u'Big Government' + publisher = u'Andrew Breitbart' + category = u'Political blog' + description = u'Political news from the USA' + + oldest_article = 30 + max_articles_per_feed = 100 + use_embedded_content = True + + feeds = [(u'Big Government', u'http://feeds.feedburner.com/BigGovernment')] + + conversion_options = {'comments': description, 'tags': category, 'language': 'en', + 'publisher': publisher} + + extra_css = ''' + body{font-family:verdana,arial,helvetica,geneva,sans-serif;} + img {float: left; margin-right: 0.5em;} + ''' + diff --git a/resources/recipes/eluniversalimpresa.recipe b/resources/recipes/eluniversalimpresa.recipe new file mode 100644 index 0000000000..c7046a31c4 --- /dev/null +++ b/resources/recipes/eluniversalimpresa.recipe @@ -0,0 +1,82 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class ElUniversalImpresaRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'es' + version = 1 + + title = u'El Universal (Edici\u00F3n Impresa)' + publisher = u'El Universal' + category = u'News, Mexico' + description = u'News from Mexico' + + remove_empty_feeds = True + remove_javascript = True + + INDEX = 'http://www.eluniversal.com.mx' + + extra_css = ''' + body{font-family:verdana,arial,helvetica,geneva,sans-serif;} + ''' + + conversion_options = {'comments': description, 'tags': category, 'language': 'en', + 'publisher': publisher, 'linearize_tables': True} + + def parse_index(self): + soup = self.index_to_soup('http://www.eluniversal.com.mx/edicion_impresa.html') + index = [] + + table = soup.find('table', attrs = {'width': '500'}) + articles = [] + for td in table.findAll(lambda tag: tag.name == 'td' and tag.has_key('class') and tag['class'] == 'arnegro12'): + a = td.a + a.extract() + title = self.tag_to_string(a) + url = self.INDEX + a['href'] + description = self.tag_to_string(td) + articles.append({'title': title, 'date': None, 'url': url, 'description' : description}) + + index.append(('Primera Plana', articles)) + + for td in table.findAll(lambda tag: tag.name == 'td' and len(tag.attrs) == 0): + articles = [] + feedTitle = None + for a in td.findAll('a'): + if not feedTitle: + feedTitle = self.tag_to_string(a) + continue + + title = self.tag_to_string(a) + + url = self.INDEX + a['href'] + articles.append({'title': title, 'date': None, 'url': url, 'description': ''}) + + index.append((feedTitle, articles)) + + return index + + def print_version(self, url): + if url.find('wcarton') >= 0: + return None + + main, sep, id = url.rpartition('/') + + return main + '/vi_' + id + + def preprocess_html(self, soup): + table = soup.find('table') + table.extract() + + for p in soup.findAll('p'): + if self.tag_to_string(p).strip() == '': + p.extract() + + tag = soup.find('font', attrs = {'color': '#0F046A'}) + if tag: + for attr in ['color', 'face', 'helvetica,', 'sans-serif', 'size']: + if tag.has_key(attr): + del tag[attr] + tag.name = 'h1' + + return soup diff --git a/resources/recipes/journalofaccountancy.recipe b/resources/recipes/journalofaccountancy.recipe new file mode 100644 index 0000000000..51a6ac8d29 --- /dev/null +++ b/resources/recipes/journalofaccountancy.recipe @@ -0,0 +1,44 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class JournalOfAccountancyRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'en' + version = 1 + + title = u'Journal of Accountancy' + publisher = u'AICPA' + category = u'News, Accountancy' + description = u'Publication of the American Institute of Certified Public Accountants' + + use_embedded_content = False + remove_empty_feeds = True + oldest_article = 30 + max_articles_per_feed = 100 + + no_stylesheets = True + remove_javascript = True + + extra_css = ''' + body{font-family:verdana,arial,helvetica,geneva,sans-serif;} + div#Rubricname {font-size: small; color: #666666; margin-bottom: 1em;} + div#Headline {font-size: x-large; font-weight: bold; margin-bottom: 0.6em} + div#SubHeadline {font-size: medium; font-weight: bold; margin-bottom: 1em} + div#Authorname, div#Date {font-size: x-small; color: #696969;} + ''' + + conversion_options = {'comments': description, 'tags': category, 'language': 'en', + 'publisher': publisher} + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'Rubricname'})) + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'Headline'})) + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'SubHeadline'})) + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'Authorname'})) + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'Date'})) + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'BodyContent'})) + + remove_attributes = ['style'] + + feeds = [] + feeds.append((u'Journal of Accountancy', u'http://feeds2.feedburner.com/JournalOfAccountancy')) diff --git a/resources/recipes/propublica.recipe b/resources/recipes/propublica.recipe new file mode 100644 index 0000000000..1e1f0af7a9 --- /dev/null +++ b/resources/recipes/propublica.recipe @@ -0,0 +1,60 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +class ProPublicaRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'en_US' + version = 1 + + title = u'Pro Publica' + publisher = u'ProPublica.org' + category = u'Political blog' + description = u'Independent investigative journalism in the public interest.' + + oldest_article = 14 + max_articles_per_feed = 100 + use_embedded_content = False + + remove_empty_feeds = True + no_stylesheets = True + remove_javascript = True + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'div', attrs = {'class': 'article'})) + + remove_tags = [] + remove_tags.append(dict(name = 'div', attrs = {'id': 'rollups'})) + remove_tags.append(dict(name = 'div', attrs = {'class': 'follow_info'})) + remove_tags.append(dict(name = 'ul', attrs = {'class': 'long-tools-top'})) + remove_tags.append(dict(name = 'ul', attrs = {'id': 'share-box'})) + remove_tags.append(dict(name = 'div', attrs = {'class': 'tags'})) + remove_tags.append(dict(name = 'ul', attrs = {'class': 'long-tools'})) + remove_tags.append(dict(name = 'ul', attrs = {'id': 'share-box2'})) + remove_tags.append(dict(name = 'p', attrs = {'id': 'original-url'})) + + feeds = [] + feeds.append((u'Top Stories', u'http://feeds.propublica.org/propublica/main')) + feeds.append((u'Stimulus', u'http://feeds.propublica.org/propublica/watchdog/stimulus')) + feeds.append((u'Bailout', u'http://feeds.propublica.org/propublica/watchdog/bailout')) + feeds.append((u'Business', u'http://feeds.propublica.org/propublica/business-money')) + feeds.append((u'Justice', u'http://feeds.propublica.org/propublica/justice-law')) + feeds.append((u'Energy & Environment', u'http://feeds.propublica.org/propublica/energy-environment')) + feeds.append((u'Government & Politics', u'http://feeds.propublica.org/propublica/government-politics')) + feeds.append((u'Health & Science', u'http://feeds.propublica.org/propublica/health-science')) + feeds.append((u'Media & Technology', u'http://feeds.propublica.org/propublica/media-technology')) + feeds.append((u'National Security', u'http://feeds.propublica.org/propublica/national-security')) + #feeds.append((u'', u'')) + + conversion_options = {'comments': description, 'tags': category, 'language': 'en', + 'publisher': publisher} + + extra_css = ''' + body{font-family:verdana,arial,helvetica,geneva,sans-serif;} + img {float: left; margin-right: 0.5em;} + h1 {text-align: left;} + a, a[href] {text-decoration: none; color: blue;} + div.cat {font-size: x-small; color: #666666; margin-bottom: 0.1em;} + div.info {font-size: small; color: #696969;} + ''' + \ No newline at end of file