Google News and MyDealz by Volker Heggemann

2025-08-30 23:00:21 -04:00 · 2020-08-22 08:50:27 +05:30 · 2020-08-22 08:50:27 +05:30 · 619227f320
commit 619227f320
parent 9d881ed2fc
2 changed files with 127 additions and 0 deletions
--- a/recipes/google_news.recipe
+++ b/recipes/google_news.recipe
@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import unicode_literals, division, absolute_import, print_function
+from calibre.web.feeds.news import BasicNewsRecipe
+from datetime import datetime
+import json
+
+# a serarch topic, filled into the string below. You can change that to anything google news should be searched for...
+terms_to_search_for = (
+    'computer',
+    'books',
+)
+
+
+class google_news_de(BasicNewsRecipe):
+    # Titel of the Recipe - this is a sample
+    title = 'Google News'
+    cover_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/da/Google_News_icon.svg/500px-Google_News_icon.svg.png'
+    # Author
+    __author__ = 'Volker Heggemann, VoHe'
+    # oldes article to download (in days) 									---- can be edit by user
+    oldest_article = 2
+    # describes itself, 						 							---- can be edit by user
+    max_articles_per_feed = 200
+    # speed up the download on fast computers be carefull (I test max.20)
+    # ---- can be edit by user
+    simultaneous_downloads = 10
+    # description, some Reader show this in titlepage
+    description = u'Google News filter by your own recipe. Please read it in calibre software!'
+    # add date to description so for dayly downloads you can find them easier
+    # ---- can be edit by user
+    description = description + ' fetched: ' + \
+        datetime.now().strftime("%Y-%m-%d")  # %H:%M:%S")
+    # What is the content of?
+    category = u'NEWS'
+    # describes itself, 						 							---- can be edit by user
+    use_embedded_content = False
+    remove_javascript = True
+    # Removes empty feeds - why keep them!?
+    remove_empty_feeds = True
+
+    # remove the rubbish (in ebook)
+    auto_cleanup = True
+
+    # now the content description and URL follows
+    # feel free to add, wipe out what you need	 							---- can be edit by user
+    #
+    def get_feeds(self):
+        url = "https://geolocation-db.com/json"
+        data = self.index_to_soup(url, raw=True)
+        data = json.loads(data)
+        country_code = str(data['country_code']).lower()  # for me this is de
+        city = data['city']
+        self.feeds = [
+            ('Google news Topnews for ' + country_code,
+             'https://news.google.com/news?pz=1&cf=all&ned=' + country_code +
+             '&hl=' + country_code + '&output=rss'),
+        ]
+        if city:
+            location = '{},{}'.format(city, country_code)
+            self.feeds.append(
+                ('Google news for ' + location, 'https://news.google.' +
+                country_code + '/news/rss/headlines/section/geo/' + location),
+            )
+        for searchfor in terms_to_search_for:
+            self.feeds.append(
+                ('Google news intrested in ' + searchfor,
+                 'https://news.google.com/news?cf=all&hl=' + country_code +
+                 '+&pz=1&ned=' + country_code + '&q=' + searchfor + '&output=rss'))
+        return BasicNewsRecipe.get_feeds(self)
--- a/recipes/my_dealz_de.recipe
+++ b/recipes/my_dealz_de.recipe
@ -0,0 +1,57 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from datetime import datetime
+
+
+class MyDealzDE(BasicNewsRecipe):
+    # Titel of the Recipe
+    title = 'MyDealz'
+    # Author
+    __author__ = 'Volker Heggemann, VoHe'
+    # oldes article to download (in days) 									---- can be edit by user
+    oldest_article = 5
+    # describes itself, 						 							---- can be edit by user
+    max_articles_per_feed = 100
+    # Cover Picture
+    cover_url = 'https://pbs.twimg.com/profile_images/817053687545741313/0wFqvfqC_400x400.jpg'
+    # speed up the download on fast computers be carefull (I test max.20)
+    # ---- can be edit by user
+    simultaneous_downloads = 10
+    # description, some Reader show this in titlepage
+    description = u'MyDealz - Shopping Deals for Germany'
+    # add date to description so for dayly downloads you can find them easier
+    # ---- can be edit by user
+    description = description + ' fetched: ' + \
+        datetime.now().strftime("%Y-%m-%d")  # %H:%M:%S")
+    # Who published the content?
+    publisher = u'https://www.mydealz.de'
+    # What is the content of?
+    category = u'Shopping'
+    # describes itself, 						 							---- can be edit by user
+    use_embedded_content = False
+    # describes itself, 						 							---- can be edit by user
+    language = 'de'
+    # encoding of content. e.g. utf-8, None, ...
+    # ---- can be edit by user
+    encoding = 'utf-8'
+    # Removes javascript- why keep this, we only want static content
+    remove_javascript = True
+    # Removes empty feeds - why keep them!?
+    remove_empty_feeds = True
+
+    # remove the rubbish (in ebook)
+    auto_cleanup = True
+    # now the content description and URL follows
+    # feel free to add, wipe out what you need	 							---- can be edit by user
+    #
+    # some of this are double
+    #
+    #
+    # Make some tests, may you first comment all of them out, and step by step you add what you'll need?
+    #
+
+    feeds = [
+        ('MyDealz', 'https://www.mydealz.de/rss/alle'),
+    ]