From 619227f3202afafad1dc7f45918a9dc2feb135a9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 22 Aug 2020 08:50:27 +0530 Subject: [PATCH] Google News and MyDealz by Volker Heggemann --- recipes/google_news.recipe | 70 ++++++++++++++++++++++++++++++++++++++ recipes/my_dealz_de.recipe | 57 +++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 recipes/google_news.recipe create mode 100644 recipes/my_dealz_de.recipe diff --git a/recipes/google_news.recipe b/recipes/google_news.recipe new file mode 100644 index 0000000000..df382e79f8 --- /dev/null +++ b/recipes/google_news.recipe @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import unicode_literals, division, absolute_import, print_function +from calibre.web.feeds.news import BasicNewsRecipe +from datetime import datetime +import json + +# a serarch topic, filled into the string below. You can change that to anything google news should be searched for... +terms_to_search_for = ( + 'computer', + 'books', +) + + +class google_news_de(BasicNewsRecipe): + # Titel of the Recipe - this is a sample + title = 'Google News' + cover_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/da/Google_News_icon.svg/500px-Google_News_icon.svg.png' + # Author + __author__ = 'Volker Heggemann, VoHe' + # oldes article to download (in days) ---- can be edit by user + oldest_article = 2 + # describes itself, ---- can be edit by user + max_articles_per_feed = 200 + # speed up the download on fast computers be carefull (I test max.20) + # ---- can be edit by user + simultaneous_downloads = 10 + # description, some Reader show this in titlepage + description = u'Google News filter by your own recipe. Please read it in calibre software!' + # add date to description so for dayly downloads you can find them easier + # ---- can be edit by user + description = description + ' fetched: ' + \ + datetime.now().strftime("%Y-%m-%d") # %H:%M:%S") + # What is the content of? + category = u'NEWS' + # describes itself, ---- can be edit by user + use_embedded_content = False + remove_javascript = True + # Removes empty feeds - why keep them!? + remove_empty_feeds = True + + # remove the rubbish (in ebook) + auto_cleanup = True + + # now the content description and URL follows + # feel free to add, wipe out what you need ---- can be edit by user + # + def get_feeds(self): + url = "https://geolocation-db.com/json" + data = self.index_to_soup(url, raw=True) + data = json.loads(data) + country_code = str(data['country_code']).lower() # for me this is de + city = data['city'] + self.feeds = [ + ('Google news Topnews for ' + country_code, + 'https://news.google.com/news?pz=1&cf=all&ned=' + country_code + + '&hl=' + country_code + '&output=rss'), + ] + if city: + location = '{},{}'.format(city, country_code) + self.feeds.append( + ('Google news for ' + location, 'https://news.google.' + + country_code + '/news/rss/headlines/section/geo/' + location), + ) + for searchfor in terms_to_search_for: + self.feeds.append( + ('Google news intrested in ' + searchfor, + 'https://news.google.com/news?cf=all&hl=' + country_code + + '+&pz=1&ned=' + country_code + '&q=' + searchfor + '&output=rss')) + return BasicNewsRecipe.get_feeds(self) diff --git a/recipes/my_dealz_de.recipe b/recipes/my_dealz_de.recipe new file mode 100644 index 0000000000..8783956b1b --- /dev/null +++ b/recipes/my_dealz_de.recipe @@ -0,0 +1,57 @@ +#!/usr/bin/env python +from __future__ import unicode_literals, division, absolute_import, print_function + +from calibre.web.feeds.news import BasicNewsRecipe +from datetime import datetime + + +class MyDealzDE(BasicNewsRecipe): + # Titel of the Recipe + title = 'MyDealz' + # Author + __author__ = 'Volker Heggemann, VoHe' + # oldes article to download (in days) ---- can be edit by user + oldest_article = 5 + # describes itself, ---- can be edit by user + max_articles_per_feed = 100 + # Cover Picture + cover_url = 'https://pbs.twimg.com/profile_images/817053687545741313/0wFqvfqC_400x400.jpg' + # speed up the download on fast computers be carefull (I test max.20) + # ---- can be edit by user + simultaneous_downloads = 10 + # description, some Reader show this in titlepage + description = u'MyDealz - Shopping Deals for Germany' + # add date to description so for dayly downloads you can find them easier + # ---- can be edit by user + description = description + ' fetched: ' + \ + datetime.now().strftime("%Y-%m-%d") # %H:%M:%S") + # Who published the content? + publisher = u'https://www.mydealz.de' + # What is the content of? + category = u'Shopping' + # describes itself, ---- can be edit by user + use_embedded_content = False + # describes itself, ---- can be edit by user + language = 'de' + # encoding of content. e.g. utf-8, None, ... + # ---- can be edit by user + encoding = 'utf-8' + # Removes javascript- why keep this, we only want static content + remove_javascript = True + # Removes empty feeds - why keep them!? + remove_empty_feeds = True + + # remove the rubbish (in ebook) + auto_cleanup = True + # now the content description and URL follows + # feel free to add, wipe out what you need ---- can be edit by user + # + # some of this are double + # + # + # Make some tests, may you first comment all of them out, and step by step you add what you'll need? + # + + feeds = [ + ('MyDealz', 'https://www.mydealz.de/rss/alle'), + ]