Google News and MyDealz by Volker Heggemann

2025-07-09 03:04:10 -04:00 · 2020-08-22 08:50:27 +05:30 · 2020-08-22 08:50:27 +05:30 · 619227f320
commit 619227f320
parent 9d881ed2fc
2 changed files with 127 additions and 0 deletions
--- a/recipes/google_news.recipe
+++ b/recipes/google_news.recipe
@ -0,0 +1,70 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from __future__ import unicode_literals, division, absolute_import, print_function
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import datetime
 import json
 # a serarch topic, filled into the string below. You can change that to anything google news should be searched for...
 terms_to_search_for = (
    'computer',
    'books',
 )
 class google_news_de(BasicNewsRecipe):
    # Titel of the Recipe - this is a sample
    title = 'Google News'
    cover_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/da/Google_News_icon.svg/500px-Google_News_icon.svg.png'
    # Author
    __author__ = 'Volker Heggemann, VoHe'
    # oldes article to download (in days) 									---- can be edit by user
    oldest_article = 2
    # describes itself, 						 							---- can be edit by user
    max_articles_per_feed = 200
    # speed up the download on fast computers be carefull (I test max.20)
    # ---- can be edit by user
    simultaneous_downloads = 10
    # description, some Reader show this in titlepage
    description = u'Google News filter by your own recipe. Please read it in calibre software!'
    # add date to description so for dayly downloads you can find them easier
    # ---- can be edit by user
    description = description + ' fetched: ' + \
        datetime.now().strftime("%Y-%m-%d")  # %H:%M:%S")
    # What is the content of?
    category = u'NEWS'
    # describes itself, 						 							---- can be edit by user
    use_embedded_content = False
    remove_javascript = True
    # Removes empty feeds - why keep them!?
    remove_empty_feeds = True
    # remove the rubbish (in ebook)
    auto_cleanup = True
    # now the content description and URL follows
    # feel free to add, wipe out what you need	 							---- can be edit by user
    #
    def get_feeds(self):
        url = "https://geolocation-db.com/json"
        data = self.index_to_soup(url, raw=True)
        data = json.loads(data)
        country_code = str(data['country_code']).lower()  # for me this is de
        city = data['city']
        self.feeds = [
            ('Google news Topnews for ' + country_code,
             'https://news.google.com/news?pz=1&cf=all&ned=' + country_code +
             '&hl=' + country_code + '&output=rss'),
        ]
        if city:
            location = '{},{}'.format(city, country_code)
            self.feeds.append(
                ('Google news for ' + location, 'https://news.google.' +
                country_code + '/news/rss/headlines/section/geo/' + location),
            )
        for searchfor in terms_to_search_for:
            self.feeds.append(
                ('Google news intrested in ' + searchfor,
                 'https://news.google.com/news?cf=all&hl=' + country_code +
                 '+&pz=1&ned=' + country_code + '&q=' + searchfor + '&output=rss'))
        return BasicNewsRecipe.get_feeds(self)
--- a/recipes/my_dealz_de.recipe
+++ b/recipes/my_dealz_de.recipe
@ -0,0 +1,57 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals, division, absolute_import, print_function
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import datetime
 class MyDealzDE(BasicNewsRecipe):
    # Titel of the Recipe
    title = 'MyDealz'
    # Author
    __author__ = 'Volker Heggemann, VoHe'
    # oldes article to download (in days) 									---- can be edit by user
    oldest_article = 5
    # describes itself, 						 							---- can be edit by user
    max_articles_per_feed = 100
    # Cover Picture
    cover_url = 'https://pbs.twimg.com/profile_images/817053687545741313/0wFqvfqC_400x400.jpg'
    # speed up the download on fast computers be carefull (I test max.20)
    # ---- can be edit by user
    simultaneous_downloads = 10
    # description, some Reader show this in titlepage
    description = u'MyDealz - Shopping Deals for Germany'
    # add date to description so for dayly downloads you can find them easier
    # ---- can be edit by user
    description = description + ' fetched: ' + \
        datetime.now().strftime("%Y-%m-%d")  # %H:%M:%S")
    # Who published the content?
    publisher = u'https://www.mydealz.de'
    # What is the content of?
    category = u'Shopping'
    # describes itself, 						 							---- can be edit by user
    use_embedded_content = False
    # describes itself, 						 							---- can be edit by user
    language = 'de'
    # encoding of content. e.g. utf-8, None, ...
    # ---- can be edit by user
    encoding = 'utf-8'
    # Removes javascript- why keep this, we only want static content
    remove_javascript = True
    # Removes empty feeds - why keep them!?
    remove_empty_feeds = True
    # remove the rubbish (in ebook)
    auto_cleanup = True
    # now the content description and URL follows
    # feel free to add, wipe out what you need	 							---- can be edit by user
    #
    # some of this are double
    #
    #
    # Make some tests, may you first comment all of them out, and step by step you add what you'll need?
    #
    feeds = [
        ('MyDealz', 'https://www.mydealz.de/rss/alle'),
    ]