mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
93 lines
3.5 KiB
Python
93 lines
3.5 KiB
Python
#!/usr/bin/env python
|
|
# vim:fileencoding=utf-8
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
import json
|
|
|
|
from calibre.ptempfile import PersistentTemporaryFile
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
# a serarch topic, filled into the string below. You can change that to anything google news should be searched for...
|
|
terms_to_search_for = (
|
|
'computer',
|
|
'books',
|
|
)
|
|
|
|
|
|
class google_news_de(BasicNewsRecipe):
|
|
# Title of the Recipe - this is a sample
|
|
title = 'Google News'
|
|
cover_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/da/Google_News_icon.svg/500px-Google_News_icon.svg.png'
|
|
# Author
|
|
__author__ = 'Volker Heggemann, VoHe, unkn0wn'
|
|
# oldest article to download (in days) ---- can be edit by user
|
|
oldest_article = 1.25
|
|
# describes itself, ---- can be edit by user
|
|
max_articles_per_feed = 200
|
|
# speed up the download on fast computers be careful (I test max.20)
|
|
# ---- can be edit by user
|
|
simultaneous_downloads = 10
|
|
# description, some Reader show this in titlepage
|
|
description = u'Google News filter by your own recipe. Please read it in calibre software!'
|
|
# What is the content of?
|
|
category = u'NEWS'
|
|
# describes itself, ---- can be edit by user
|
|
use_embedded_content = False
|
|
remove_javascript = True
|
|
# Removes empty feeds - why keep them!?
|
|
remove_empty_feeds = True
|
|
|
|
# remove the rubbish (in ebook)
|
|
auto_cleanup = True
|
|
|
|
articles_are_obfuscated = True
|
|
|
|
def get_obfuscated_article(self, url):
|
|
br = self.get_browser()
|
|
try:
|
|
br.open(url)
|
|
except Exception as e:
|
|
url = e.hdrs.get('location')
|
|
soup = self.index_to_soup(url)
|
|
link = soup.find('a', href=True)
|
|
skip_sections =[ # add sections you want to skip
|
|
'/video/', '/videos/', '/media/', 'podcast-'
|
|
]
|
|
if any(x in link['href'] for x in skip_sections):
|
|
self.log('Aborting Article ', link['href'])
|
|
self.abort_article('skipping video links')
|
|
|
|
self.log('Found link: ', link['href'])
|
|
html = br.open(link['href']).read()
|
|
pt = PersistentTemporaryFile('.html')
|
|
pt.write(html)
|
|
pt.close()
|
|
return pt.name
|
|
|
|
# now the content description and URL follows
|
|
# feel free to add, wipe out what you need ---- can be edit by user
|
|
#
|
|
def get_feeds(self):
|
|
url = "https://geolocation-db.com/json"
|
|
data = self.index_to_soup(url, raw=True)
|
|
data = json.loads(data)
|
|
country_code = str(data['country_code']).lower() # for me this is de
|
|
city = data['city']
|
|
self.feeds = [
|
|
('Google news Topnews for ' + country_code,
|
|
'https://news.google.com/news?pz=1&cf=all&ned=' + country_code +
|
|
'&hl=' + country_code + '&output=rss'),
|
|
]
|
|
if city:
|
|
location = '{},{}'.format(city, country_code)
|
|
self.feeds.append(
|
|
('Google news for ' + location, 'https://news.google.' +
|
|
country_code + '/news/rss/headlines/section/geo/' + location),
|
|
)
|
|
for searchfor in terms_to_search_for:
|
|
self.feeds.append(
|
|
('Google news interested in ' + searchfor,
|
|
'https://news.google.com/news?cf=all&hl=' + country_code +
|
|
'+&pz=1&ned=' + country_code + '&q=' + searchfor + '&output=rss'))
|
|
return BasicNewsRecipe.get_feeds(self)
|