mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
206 lines
7.7 KiB
Python
206 lines
7.7 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
"""www.dunyahalleri.com"""
|
||
import locale
|
||
import os
|
||
import re
|
||
|
||
from shutil import copyfile
|
||
|
||
from calibre import strftime
|
||
from calibre.ebooks.BeautifulSoup import Tag
|
||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||
from PIL import Image, ImageDraw, ImageFont
|
||
|
||
__license__ = 'GPL v3'
|
||
__copyright__ = '2017, sukru alatas / alatas.org'
|
||
|
||
|
||
def new_tag(soup, name, attrs=()):
|
||
impl = getattr(soup, 'new_tag', None)
|
||
if impl is not None:
|
||
return impl(name, attrs=dict(attrs))
|
||
return Tag(soup, name, attrs=attrs or None)
|
||
|
||
|
||
class DunyaHalleri(BasicNewsRecipe):
|
||
title = 'Dünya Halleri'
|
||
description = 'Gözden Kaçanlar Rehberi'
|
||
timefmt = ' [%a, %d %b, %Y]'
|
||
publication_type = 'blog'
|
||
language = 'tr'
|
||
locale = 'tr_TR' # for localized month names
|
||
simultaneous_downloads = 5
|
||
|
||
needs_subscription = False
|
||
scale_news_images = True
|
||
|
||
remove_tags_before = dict(name='span', attrs={'itemprop': 'reviewBody'})
|
||
remove_tags_after = dict(
|
||
name='div', attrs={'class': 'sharedaddy sd-sharing-enabled'})
|
||
remove_tags = [dict(name=['script', 'noscript', 'style', 'footer']),
|
||
dict(attrs={'class': ['jsharedaddy sd-sharing-enabled',
|
||
'cb-sticky-sidebar', 'sharedaddy sd-sharing-enabled']}),
|
||
dict(id=['jp-relatedposts', 'tldr-post-summary', 'tldr-post-summary-buttons'])]
|
||
encoding = 'utf_8'
|
||
no_stylesheets = True
|
||
|
||
extra_css = '.caption {color: #998; font-style: italic; font-size: 8pt}'
|
||
__author__ = 'Sukru Alatas'
|
||
feeds = [(u"Genel Gündem",
|
||
'https://www.dunyahalleri.com/genel-gundem/feed/'),
|
||
(u"Teknoloji / Bilim",
|
||
'https://www.dunyahalleri.com/teknoloji-bilim/feed/'),
|
||
(u"İnternet / Girişimler",
|
||
'https://www.dunyahalleri.com/internet-girisimler/feed/'),
|
||
(u"Tasarım / İnovasyon",
|
||
'https://www.dunyahalleri.com/tasarim-inovasyon/feed/'),
|
||
(u"Kültür / Sanat", 'https://www.dunyahalleri.com/kultur-sanat/feed/')]
|
||
oldest_article = 7
|
||
max_articles_per_feed = 50
|
||
|
||
COVER_WIDTH, COVER_HEIGHT = 590, 750
|
||
masthead_url = 'https://www.dunyahalleri.com/wp-content/uploads/2016/07/dh-logo-transparan.png'
|
||
cover_url = ''
|
||
cover_img_url = 'https://i0.wp.com/www.dunyahalleri.com/wp-content/uploads/2016/04/dh-favico-v2.png'
|
||
cover_img_path = ''
|
||
|
||
def __init__(self, *args, **kwargs):
|
||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||
# for localized month names
|
||
locale.setlocale(locale.LC_TIME, self.locale)
|
||
|
||
if self.output_profile.short_name.startswith('kindle'):
|
||
# Reduce image sizes to get file size below amazon's email
|
||
# sending threshold
|
||
self.web2disk_options.compress_news_images = True
|
||
self.web2disk_options.compress_news_images_auto_size = 5
|
||
self.log.warn(
|
||
'Kindle Output profile being used, reducing image quality '
|
||
'to keep file size below amazon email threshold')
|
||
|
||
def preprocess_html(self, soup):
|
||
span = soup.findAll('span', {'itemprop': 'reviewBody'}, limit=1)[0]
|
||
|
||
# title insert
|
||
article_title = soup.title.contents[0]
|
||
article_title.replace(u' - Dünya Halleri', '')
|
||
h2 = new_tag(soup, 'h2')
|
||
h2.append(article_title)
|
||
span.insert(0, h2)
|
||
|
||
# featured image insert
|
||
meta = soup.findAll('meta', {'property': 'og:image'}, limit=1)[0]
|
||
if meta:
|
||
img = new_tag(soup, 'img')
|
||
img.attrs = [('src', meta['content'])]
|
||
span.insert(1, img)
|
||
|
||
# gallery normalization
|
||
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
|
||
p = new_tag(soup, 'p')
|
||
for img in div.findAll('img'):
|
||
img.attrs = [(key, value)
|
||
for key, value in img.attrs if key in ['src']]
|
||
p.append(img)
|
||
div.replaceWith(p)
|
||
|
||
# youtube embeded normalization
|
||
# this block finds the cover image for each embeded youtube video then
|
||
# changes it to "a href" and "img"
|
||
for iframe in soup.findAll('iframe'):
|
||
a = new_tag(soup, 'a')
|
||
caption = new_tag(soup, 'pre')
|
||
img = new_tag(soup, 'img')
|
||
|
||
m = re.match(
|
||
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'
|
||
r'(?P<vid>.*?)(([\?\&].*)|$|\n)',
|
||
iframe['src'])
|
||
if m:
|
||
# youtube
|
||
img_src = 'https://img.youtube.com/vi/' + \
|
||
m.group('vid') + '/0.jpg'
|
||
a_href = 'https://www.youtube.com/watch?v=' + m.group('vid')
|
||
else:
|
||
# not youtube
|
||
# default cover image for non-youtube embeded pages
|
||
img_src = 'http://www.warnerclassics.com/img_style/default_video_m.jpg'
|
||
a_href = iframe['src']
|
||
|
||
img.attrs = [('src', img_src)]
|
||
caption.append('Video: ' + a_href)
|
||
caption.attrs = [('class', 'caption')]
|
||
a.attrs = [('href', a_href), ('target', '_blank')]
|
||
a.append(img)
|
||
a.append(caption)
|
||
iframe.replaceWith(a)
|
||
return soup
|
||
|
||
# cover generator
|
||
# original version
|
||
# https://www.mobileread.com/forums/showpost.php?p=866553&postcount=5
|
||
def get_cover_img_url(self):
|
||
return getattr(self, 'cover_img_url', None)
|
||
|
||
def _download_cover_img(self):
|
||
old_cu = None
|
||
try:
|
||
old_cu = self.get_cover_url()
|
||
except:
|
||
pass
|
||
new_cu = self.get_cover_img_url()
|
||
self.cover_url = new_cu
|
||
self._download_cover()
|
||
|
||
outfile = os.path.join(self.output_dir, 'cover_img.jpg')
|
||
copyfile(self.cover_path, outfile)
|
||
self.cover_url = old_cu
|
||
self.cover_img_path = outfile
|
||
|
||
def download_cover_img(self):
|
||
try:
|
||
self._download_cover_img()
|
||
self.report_progress(
|
||
1, ('Downloaded cover to %s') % self.cover_img_path)
|
||
except:
|
||
self.log.exception('Failed to download cover img')
|
||
self.cover_img_path = None
|
||
|
||
def draw_text(self, draw, text, text_size, top):
|
||
font_path = P('fonts/liberation/LiberationSerif-Bold.ttf')
|
||
font = ImageFont.truetype(font_path, text_size)
|
||
width, height = draw.textsize(text, font=font)
|
||
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
|
||
draw.text((left, top), text, fill=(0, 0, 0), font=font)
|
||
return height
|
||
|
||
def default_cover(self, cover_file):
|
||
title = self.title
|
||
date = strftime('%d %B %Y')
|
||
author = u'www.dunyahalleri.com'
|
||
# Texts
|
||
img = Image.new(
|
||
'RGB', (self.COVER_WIDTH, self.COVER_HEIGHT), 'white')
|
||
draw = ImageDraw.Draw(img)
|
||
bottom = 15
|
||
bottom += self.draw_text(draw, title, 42, bottom)
|
||
bottom += 50
|
||
bottom += self.draw_text(draw, date, 32, bottom)
|
||
bottom += self.draw_text(draw, author, 32, self.COVER_HEIGHT - 45)
|
||
# Logo
|
||
self.download_cover_img()
|
||
if getattr(self, 'cover_img_path', None) is not None:
|
||
logo_file = self.cover_img_path
|
||
self.report_progress(
|
||
1, ('using cover img from %s') % logo_file)
|
||
logo = Image.open(logo_file, 'r')
|
||
width, height = logo.size
|
||
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
|
||
top = max(int((self.COVER_HEIGHT - height) / 2.), 0)
|
||
img.paste(logo, (left, top))
|
||
img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE)
|
||
img.convert('RGB').save(cover_file, 'JPEG')
|
||
cover_file.flush()
|
||
return True
|