calibre/recipes/dunyahalleri.recipe
Kovid Goyal 29cd8d64ea
Change shebangs to python from python2
Also remove a few other miscellaneous references to python2
2020-08-22 18:47:51 +05:30

206 lines
7.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""www.dunyahalleri.com"""
import locale
import os
import re
from shutil import copyfile
from calibre import strftime
from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.recipes import BasicNewsRecipe
from PIL import Image, ImageDraw, ImageFont
__license__ = 'GPL v3'
__copyright__ = '2017, sukru alatas / alatas.org'
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DunyaHalleri(BasicNewsRecipe):
title = 'Dünya Halleri'
description = 'Gözden Kaçanlar Rehberi'
timefmt = ' [%a, %d %b, %Y]'
publication_type = 'blog'
language = 'tr'
locale = 'tr_TR' # for localized month names
simultaneous_downloads = 5
needs_subscription = False
scale_news_images = True
remove_tags_before = dict(name='span', attrs={'itemprop': 'reviewBody'})
remove_tags_after = dict(
name='div', attrs={'class': 'sharedaddy sd-sharing-enabled'})
remove_tags = [dict(name=['script', 'noscript', 'style', 'footer']),
dict(attrs={'class': ['jsharedaddy sd-sharing-enabled',
'cb-sticky-sidebar', 'sharedaddy sd-sharing-enabled']}),
dict(id=['jp-relatedposts', 'tldr-post-summary', 'tldr-post-summary-buttons'])]
encoding = 'utf_8'
no_stylesheets = True
extra_css = '.caption {color: #998; font-style: italic; font-size: 8pt}'
__author__ = 'Sukru Alatas'
feeds = [(u"Genel Gündem",
'https://www.dunyahalleri.com/genel-gundem/feed/'),
(u"Teknoloji / Bilim",
'https://www.dunyahalleri.com/teknoloji-bilim/feed/'),
(u"İnternet / Girişimler",
'https://www.dunyahalleri.com/internet-girisimler/feed/'),
(u"Tasarım / İnovasyon",
'https://www.dunyahalleri.com/tasarim-inovasyon/feed/'),
(u"Kültür / Sanat", 'https://www.dunyahalleri.com/kultur-sanat/feed/')]
oldest_article = 7
max_articles_per_feed = 50
COVER_WIDTH, COVER_HEIGHT = 590, 750
masthead_url = 'https://www.dunyahalleri.com/wp-content/uploads/2016/07/dh-logo-transparan.png'
cover_url = ''
cover_img_url = 'https://i0.wp.com/www.dunyahalleri.com/wp-content/uploads/2016/04/dh-favico-v2.png'
cover_img_path = ''
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
# for localized month names
locale.setlocale(locale.LC_TIME, self.locale)
if self.output_profile.short_name.startswith('kindle'):
# Reduce image sizes to get file size below amazon's email
# sending threshold
self.web2disk_options.compress_news_images = True
self.web2disk_options.compress_news_images_auto_size = 5
self.log.warn(
'Kindle Output profile being used, reducing image quality '
'to keep file size below amazon email threshold')
def preprocess_html(self, soup):
span = soup.findAll('span', {'itemprop': 'reviewBody'}, limit=1)[0]
# title insert
article_title = soup.title.contents[0]
article_title.replace(u' - Dünya Halleri', '')
h2 = new_tag(soup, 'h2')
h2.append(article_title)
span.insert(0, h2)
# featured image insert
meta = soup.findAll('meta', {'property': 'og:image'}, limit=1)[0]
if meta:
img = new_tag(soup, 'img')
img.attrs = [('src', meta['content'])]
span.insert(1, img)
# gallery normalization
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
p = new_tag(soup, 'p')
for img in div.findAll('img'):
img.attrs = [(key, value)
for key, value in img.attrs if key in ['src']]
p.append(img)
div.replaceWith(p)
# youtube embeded normalization
# this block finds the cover image for each embeded youtube video then
# changes it to "a href" and "img"
for iframe in soup.findAll('iframe'):
a = new_tag(soup, 'a')
caption = new_tag(soup, 'pre')
img = new_tag(soup, 'img')
m = re.match(
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'
r'(?P<vid>.*?)(([\?\&].*)|$|\n)',
iframe['src'])
if m:
# youtube
img_src = 'https://img.youtube.com/vi/' + \
m.group('vid') + '/0.jpg'
a_href = 'https://www.youtube.com/watch?v=' + m.group('vid')
else:
# not youtube
# default cover image for non-youtube embeded pages
img_src = 'http://www.warnerclassics.com/img_style/default_video_m.jpg'
a_href = iframe['src']
img.attrs = [('src', img_src)]
caption.append('Video: ' + a_href)
caption.attrs = [('class', 'caption')]
a.attrs = [('href', a_href), ('target', '_blank')]
a.append(img)
a.append(caption)
iframe.replaceWith(a)
return soup
# cover generator
# original version
# https://www.mobileread.com/forums/showpost.php?p=866553&postcount=5
def get_cover_img_url(self):
return getattr(self, 'cover_img_url', None)
def _download_cover_img(self):
old_cu = None
try:
old_cu = self.get_cover_url()
except:
pass
new_cu = self.get_cover_img_url()
self.cover_url = new_cu
self._download_cover()
outfile = os.path.join(self.output_dir, 'cover_img.jpg')
copyfile(self.cover_path, outfile)
self.cover_url = old_cu
self.cover_img_path = outfile
def download_cover_img(self):
try:
self._download_cover_img()
self.report_progress(
1, ('Downloaded cover to %s') % self.cover_img_path)
except:
self.log.exception('Failed to download cover img')
self.cover_img_path = None
def draw_text(self, draw, text, text_size, top):
font_path = P('fonts/liberation/LiberationSerif-Bold.ttf')
font = ImageFont.truetype(font_path, text_size)
width, height = draw.textsize(text, font=font)
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
draw.text((left, top), text, fill=(0, 0, 0), font=font)
return height
def default_cover(self, cover_file):
title = self.title
date = strftime('%d %B %Y')
author = u'www.dunyahalleri.com'
# Texts
img = Image.new(
'RGB', (self.COVER_WIDTH, self.COVER_HEIGHT), 'white')
draw = ImageDraw.Draw(img)
bottom = 15
bottom += self.draw_text(draw, title, 42, bottom)
bottom += 50
bottom += self.draw_text(draw, date, 32, bottom)
bottom += self.draw_text(draw, author, 32, self.COVER_HEIGHT - 45)
# Logo
self.download_cover_img()
if getattr(self, 'cover_img_path', None) is not None:
logo_file = self.cover_img_path
self.report_progress(
1, ('using cover img from %s') % logo_file)
logo = Image.open(logo_file, 'r')
width, height = logo.size
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
top = max(int((self.COVER_HEIGHT - height) / 2.), 0)
img.paste(logo, (left, top))
img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE)
img.convert('RGB').save(cover_file, 'JPEG')
cover_file.flush()
return True