calibre/recipes/google_news.recipe

#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime
import json

# a serarch topic, filled into the string below. You can change that to anything google news should be searched for...
terms_to_search_for = (
    'computer',
    'books',
)


class google_news_de(BasicNewsRecipe):
    # Titel of the Recipe - this is a sample
    title = 'Google News'
    cover_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/da/Google_News_icon.svg/500px-Google_News_icon.svg.png'
    # Author
    __author__ = 'Volker Heggemann, VoHe'
    # oldes article to download (in days) 									---- can be edit by user
    oldest_article = 2
    # describes itself, 						 							---- can be edit by user
    max_articles_per_feed = 200
    # speed up the download on fast computers be carefull (I test max.20)
    # ---- can be edit by user
    simultaneous_downloads = 10
    # description, some Reader show this in titlepage
    description = u'Google News filter by your own recipe. Please read it in calibre software!'
    # add date to description so for dayly downloads you can find them easier
    # ---- can be edit by user
    description = description + ' fetched: ' + \
        datetime.now().strftime("%Y-%m-%d")  # %H:%M:%S")
    # What is the content of?
    category = u'NEWS'
    # describes itself, 						 							---- can be edit by user
    use_embedded_content = False
    remove_javascript = True
    # Removes empty feeds - why keep them!?
    remove_empty_feeds = True

    # remove the rubbish (in ebook)
    auto_cleanup = True

    # now the content description and URL follows
    # feel free to add, wipe out what you need	 							---- can be edit by user
    #
    def get_feeds(self):
        url = "https://geolocation-db.com/json"
        data = self.index_to_soup(url, raw=True)
        data = json.loads(data)
        country_code = str(data['country_code']).lower()  # for me this is de
        city = data['city']
        self.feeds = [
            ('Google news Topnews for ' + country_code,
             'https://news.google.com/news?pz=1&cf=all&ned=' + country_code +
             '&hl=' + country_code + '&output=rss'),
        ]
        if city:
            location = '{},{}'.format(city, country_code)
            self.feeds.append(
                ('Google news for ' + location, 'https://news.google.' +
                country_code + '/news/rss/headlines/section/geo/' + location),
            )
        for searchfor in terms_to_search_for:
            self.feeds.append(
                ('Google news intrested in ' + searchfor,
                 'https://news.google.com/news?cf=all&hl=' + country_code +
                 '+&pz=1&ned=' + country_code + '&q=' + searchfor + '&output=rss'))
        return BasicNewsRecipe.get_feeds(self)