added dunyahalleri and dunyahalleri_haftaninozeti

2025-08-11 09:13:57 -04:00 · 2017-09-04 01:23:11 +03:00 · 2017-09-04 01:23:11 +03:00 · 85a0304bf6
commit 85a0304bf6
parent 08471c12a4
2 changed files with 465 additions and 0 deletions
--- a/recipes/dunyahalleri.recipe
+++ b/recipes/dunyahalleri.recipe
@ -0,0 +1,199 @@
+#!/usr/bin/env  python2
+# -*- coding: utf-8 -*-
+"""www.dunyahalleri.com"""
+import locale
+import os
+import re
+import time
+
+from shutil import copyfile
+
+from calibre.ebooks.BeautifulSoup import Tag
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from PIL import Image, ImageDraw, ImageFont
+
+__license__ = 'GPL v3'
+__copyright__ = '2017, sukru alatas / alatas.org'
+
+
+class DunyaHalleri(BasicNewsRecipe):
+    title = 'Dünya Halleri'
+    description = 'Gözden Kaçanlar Rehberi'
+    timefmt = ' [%a, %d %b, %Y]'
+    publication_type = 'blog'
+    language = 'tr'
+    locale = 'tr_TR'  # for localized month names
+    simultaneous_downloads = 5
+
+    needs_subscription = False
+    scale_news_images = True
+
+    remove_tags_before = dict(name='span', attrs={'itemprop': 'reviewBody'})
+    remove_tags_after = dict(
+        name='div', attrs={'class': 'sharedaddy sd-sharing-enabled'})
+    remove_tags = [dict(name=['script', 'noscript', 'style', 'footer']),
+                   dict(attrs={'class': ['jsharedaddy sd-sharing-enabled',
+                                         'cb-sticky-sidebar', 'sharedaddy sd-sharing-enabled']}),
+                   dict(id=['jp-relatedposts', 'tldr-post-summary', 'tldr-post-summary-buttons'])]
+    encoding = 'utf_8'
+    no_stylesheets = True
+
+    extra_css = '.caption {color: #998; font-style: italic; font-size: 8pt}'
+    __author__ = 'Sukru Alatas'
+    feeds = [("Genel Gündem".decode('utf-8', 'replace'),
+              'https://www.dunyahalleri.com/genel-gundem/feed/'),
+             ("Teknoloji / Bilim".decode('utf-8', 'replace'),
+              'https://www.dunyahalleri.com/teknoloji-bilim/feed/'),
+             ("İnternet / Girişimler".decode('utf-8', 'replace'),
+              'https://www.dunyahalleri.com/internet-girisimler/feed/'),
+             ("Tasarım / İnovasyon".decode('utf-8', 'replace'),
+              'https://www.dunyahalleri.com/tasarim-inovasyon/feed/'),
+             ("Kültür / Sanat".decode('utf-8', 'replace'), 'https://www.dunyahalleri.com/kultur-sanat/feed/')]
+    oldest_article = 7
+    max_articles_per_feed = 50
+
+    COVER_WIDTH, COVER_HEIGHT = 590, 750
+    masthead_url = 'https://www.dunyahalleri.com/wp-content/uploads/2016/07/dh-logo-transparan.png'
+    cover_url = ''
+    cover_img_url = 'https://i0.wp.com/www.dunyahalleri.com/wp-content/uploads/2016/04/dh-favico-v2.png'
+    cover_img_path = ''
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        # for localized month names
+        locale.setlocale(locale.LC_TIME, self.locale)
+
+        if self.output_profile.short_name.startswith('kindle'):
+            # Reduce image sizes to get file size below amazon's email
+            # sending threshold
+            self.web2disk_options.compress_news_images = True
+            self.web2disk_options.compress_news_images_auto_size = 5
+            self.log.warn(
+                'Kindle Output profile being used, reducing image quality '
+                'to keep file size below amazon email threshold')
+
+    def preprocess_html(self, soup):
+        span = soup.findAll('span', {'itemprop': 'reviewBody'}, limit=1)[0]
+
+        # title insert
+        article_title = soup.title.contents[0]
+        article_title.replace(' - Dünya Halleri'.decode('utf-8', 'replace'), '')
+        h2 = Tag(soup, 'h2')
+        h2.append(article_title)
+        span.insert(0, h2)
+
+        # featured image insert
+        meta = soup.findAll('meta', {'property': 'og:image'}, limit=1)[0]
+        if meta:
+            img = Tag(soup, 'img')
+            img.attrs = [('src', meta['content'])]
+            span.insert(1, img)
+
+        # gallery normalization
+        for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
+            p = Tag(soup, 'p')
+            for img in div.findAll('img'):
+                img.attrs = [(key, value)
+                             for key, value in img.attrs if key in ['src']]
+                p.append(img)
+            div.replaceWith(p)
+
+        # youtube embeded normalization
+        # this block finds the cover image for each embeded youtube video then
+        # changes it to "a href" and "img"
+        for iframe in soup.findAll('iframe'):
+            a = Tag(soup, 'a')
+            caption = Tag(soup, 'pre')
+            img = Tag(soup, 'img')
+
+            m = re.match(
+                r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'
+                r'(?P<vid>.*?)(([\?\&].*)|$|\n)',
+                iframe['src'])
+            if m:
+                # youtube
+                img_src = 'https://img.youtube.com/vi/' + \
+                    m.group('vid') + '/0.jpg'
+                a_href = 'https://www.youtube.com/watch?v=' + m.group('vid')
+            else:
+                #not youtube
+                # default cover image for non-youtube embeded pages
+                img_src = 'http://www.warnerclassics.com/img_style/default_video_m.jpg'
+                a_href = iframe['src']
+
+            img.attrs = [('src', img_src)]
+            caption.append('Video: ' + a_href)
+            caption.attrs = [('class', 'caption')]
+            a.attrs = [('href', a_href), ('target', '_blank')]
+            a.append(img)
+            a.append(caption)
+            iframe.replaceWith(a)
+        return soup
+
+    # cover generator
+    # original version
+    # https://www.mobileread.com/forums/showpost.php?p=866553&postcount=5
+    def get_cover_img_url(self):
+        return getattr(self, 'cover_img_url', None)
+
+    def _download_cover_img(self):
+        old_cu = None
+        try:
+            old_cu = self.get_cover_url()
+        except:
+            pass
+        new_cu = self.get_cover_img_url()
+        self.cover_url = new_cu
+        self._download_cover()
+
+        outfile = os.path.join(self.output_dir, 'cover_img.jpg')
+        copyfile(self.cover_path, outfile)
+        self.cover_url = old_cu
+        self.cover_img_path = outfile
+
+    def download_cover_img(self):
+        try:
+            self._download_cover_img()
+            self.report_progress(
+                1, ('Downloaded cover to %s') % self.cover_img_path)
+        except:
+            self.log.exception('Failed to download cover img')
+            self.cover_img_path = None
+
+    def draw_text(self, draw, text, text_size, top):
+        font_path = P('fonts/liberation/LiberationSerif-Bold.ttf')
+        font = ImageFont.truetype(font_path, text_size)
+        width, height = draw.textsize(text, font=font)
+        left = max(int((self.COVER_WIDTH - width) / 2.), 0)
+        draw.text((left, top), text, fill=(0, 0, 0), font=font)
+        return height
+
+    def default_cover(self, cover_file):
+        title = self.title
+        date = time.strftime(
+            '%d %B %Y').decode('utf8', 'replace')
+        author = 'www.dunyahalleri.com'.decode('utf8', 'replace')
+        # Texts
+        img = Image.new(
+            'RGB', (self.COVER_WIDTH, self.COVER_HEIGHT), 'white')
+        draw = ImageDraw.Draw(img)
+        bottom = 15
+        bottom += self.draw_text(draw, title, 42, bottom)
+        bottom += 50
+        bottom += self.draw_text(draw, date, 32, bottom)
+        bottom += self.draw_text(draw, author, 32, self.COVER_HEIGHT - 45)
+        # Logo
+        self.download_cover_img()
+        if getattr(self, 'cover_img_path', None) is not None:
+            logo_file = self.cover_img_path
+            self.report_progress(
+                1, ('using cover img from %s') % logo_file)
+            logo = Image.open(logo_file, 'r')
+            width, height = logo.size
+            left = max(int((self.COVER_WIDTH - width) / 2.), 0)
+            top = max(int((self.COVER_HEIGHT - height) / 2.), 0)
+            img.paste(logo, (left, top))
+        img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE)
+        img.convert('RGB').save(cover_file, 'JPEG')
+        cover_file.flush()
+        return True
--- a/recipes/dunyahalleri_haftaninozeti.recipe
+++ b/recipes/dunyahalleri_haftaninozeti.recipe
@ -0,0 +1,266 @@
+#!/usr/bin/env  python2
+# -*- coding: utf-8 -*-
+"""www.dunyahalleri.com/haftanin-ozeti"""
+import locale
+import os
+import re
+
+from shutil import copyfile
+
+from contextlib import closing
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from PIL import Image, ImageDraw, ImageFont
+
+__license__ = 'GPL v3'
+__copyright__ = '2017, sukru alatas / alatas.org'
+
+class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
+    title = 'Dünya Halleri - Haftanın Özeti'
+    description = ('Geçen hafta boyunca Türkiye ve dünyadan haber,'
+                   ' site, yazılım, donanım, cihaz, video ve trendler...')
+    timefmt = ' [%a, %d %b, %Y]'
+    publication_type = 'blog'
+    language = 'tr'
+    locale = 'tr_TR'  # for localized month names
+    simultaneous_downloads = 5
+
+    needs_subscription = False
+    scale_news_images = True
+
+    remove_tags_before = dict(name='section', attrs={'itemprop': 'articleBody'})
+    remove_tags_after = dict(name='div', attrs={'class': 'cb-alert cb-blue'})
+    remove_tags = [dict(name=['ol', 'h4', 'script', 'noscript', 'style', 'footer']),
+                   dict(name='h1', attrs={
+                       'class': 'entry-title cb-entry-title entry-title cb-title'}),
+                   dict(attrs={'class': ['cb-alert cb-blue', 'woo-sc-box  info   ',
+                                         'sharedaddy sd-sharing-enabled', 'jp-relatedposts']}),
+                   dict(id=['post-pagination', 'plp_inital_pagination'])]
+    encoding = 'utf_8'
+    no_stylesheets = True
+    INDEX = 'https://www.dunyahalleri.com/haftanin-ozeti/feed/'
+    extra_css = '.caption {color: #998; font-style: italic; font-size: 8pt}'
+    __author__ = 'Sukru Alatas'
+
+    COVER_WIDTH, COVER_HEIGHT = 590, 750
+    issue_title = ''
+    issue_date = ''
+    masthead_url = ''
+    cover_url = ''
+    cover_img_url = ''
+    cover_img_path = ''
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        # for localized month names
+        locale.setlocale(locale.LC_TIME, self.locale)
+
+        if self.output_profile.short_name.startswith('kindle'):
+            # Reduce image sizes to get file size below amazon's email
+            # sending threshold
+            self.web2disk_options.compress_news_images = True
+            self.web2disk_options.compress_news_images_auto_size = 5
+            self.log.warn(
+                'Kindle Output profile being used, reducing image quality '
+                'to keep file size below amazon email threshold')
+
+    # BeautifulSoup xml parser extension
+    # If you use index_to_soup with xml or rss, it outputs lots of garbage node,
+    #   and change the tree for its own.
+    # This function very very similar copy of index_to_soup but it uses
+    # BeautifulStoneSoup instead of BeautifulSoup
+    def xml_to_soup(self, url_or_raw, raw=False):
+        if re.match(r'\w+://', url_or_raw):
+            br = self.clone_browser(self.browser)
+            open_func = getattr(br, 'open_novisit', br.open)
+            with closing(open_func(url_or_raw)) as f:
+                _raw = f.read()
+            if not _raw:
+                raise RuntimeError(
+                    'Could not fetch index from %s' % url_or_raw)
+        else:
+            _raw = url_or_raw
+
+        if raw:
+            return _raw
+
+        if not isinstance(_raw, unicode) and self.encoding:
+            if callable(self.encoding):
+                _raw = self.encoding(_raw)
+            else:
+                _raw = _raw.decode(self.encoding, 'replace')
+
+        from calibre.ebooks.chardet import strip_encoding_declarations, xml_to_unicode
+        from calibre.utils.cleantext import clean_xml_chars
+
+        if isinstance(_raw, unicode):
+            _raw = strip_encoding_declarations(_raw)
+        else:
+            _raw = xml_to_unicode(
+                _raw, strip_encoding_pats=True, resolve_entities=True)[0]
+
+        _raw = clean_xml_chars(_raw)
+        return BeautifulStoneSoup(_raw)  # <== the difference
+
+    def parse_index(self):
+        from dateutil.parser import parse
+
+        # RSS parsing
+        index = self.xml_to_soup(self.INDEX)
+
+        channel = index.rss.channel
+
+        self.description = channel.description.contents[0]
+        self.masthead_url = channel.url.contents[0]
+
+        item = channel.item
+        self.issue_title = item.title.contents[0]
+        self.issue_date = parse(item.pubdate.contents[0])
+
+        base_url = item.link.contents[0]
+        cover_img_desc = BeautifulSoup(item.description.contents[0])
+        # this is necessary for cover generator
+        self.cover_img_url = cover_img_desc.img['src']
+
+        soup = self.index_to_soup(base_url)
+        articles = {}
+        key = None
+        ans = []
+
+        for li in soup.findNext('ol').findAll('li'):
+            a = li.find('a', href=True)
+
+            if not a:
+                url = base_url
+                feed = self.tag_to_string(li, use_alt=True).strip()
+                pubdate = self.issue_date.strftime('%a, %d %b')
+            else:
+                url = base_url + re.sub(r'\.\/', '', a['href'])
+                feed = self.tag_to_string(a, use_alt=True).strip()
+                pubdate = self.issue_date.strftime('%a, %d %b')
+
+            title = self.issue_title + \
+                ' (' + self.issue_date.strftime('%d %B %Y') + ')'
+
+            if not articles.has_key(feed):
+                articles[feed] = []
+                ans.append(feed)
+
+            articles[feed].append(
+                dict(title=title, url=url, date=pubdate, description='', content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
+
+    def preprocess_html(self, soup):
+        # gallery normalization
+        for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
+            p = Tag(soup, 'p')
+            for img in div.findAll('img'):
+                img.attrs = [(key, value)
+                             for key, value in img.attrs if key in ['src']]
+                p.append(img)
+            div.replaceWith(p)
+
+        # youtube embeded normalization
+        # this block finds the cover image for each embeded youtube video then
+        # changes it to "a href" and "img"
+        for iframe in soup.findAll('iframe'):
+            a = Tag(soup, 'a')
+            caption = Tag(soup, 'pre')
+            img = Tag(soup, 'img')
+
+            m = re.match(
+                r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'
+                r'(?P<vid>.*?)(([\?\&].*)|$|\n)',
+                iframe['src'])
+            if m:
+                # youtube
+                img_src = 'https://img.youtube.com/vi/' + \
+                    m.group('vid') + '/0.jpg'
+                a_href = 'https://www.youtube.com/watch?v=' + m.group('vid')
+            else:
+                #not youtube
+                # default cover image for non-youtube embeded pages
+                img_src = 'http://www.warnerclassics.com/img_style/default_video_m.jpg'
+                a_href = iframe['src']
+
+            img.attrs = [('src', img_src)]
+            caption.append('Video: ' + a_href)
+            caption.attrs = [('class', 'caption')]
+            a.attrs = [('href', a_href), ('target', '_blank')]
+            a.append(img)
+            a.append(caption)
+            iframe.replaceWith(a)
+        return soup
+
+    # cover generator
+    # original version https://www.mobileread.com/forums/showpost.php?p=866553&postcount=5
+    def get_cover_img_url(self):
+        return getattr(self, 'cover_img_url', None)
+
+    def _download_cover_img(self):
+        old_cu = None
+        try:
+            old_cu = self.get_cover_url()
+        except:
+            pass
+        new_cu = self.get_cover_img_url()
+        self.cover_url = new_cu
+        self._download_cover()
+
+        outfile = os.path.join(self.output_dir, 'cover_img.jpg')
+        copyfile(self.cover_path, outfile)
+        self.cover_url = old_cu
+        self.cover_img_path = outfile
+
+    def download_cover_img(self):
+        try:
+            self._download_cover_img()
+            self.report_progress(
+                1, ('Downloaded cover to %s') % self.cover_img_path)
+        except:
+            self.log.exception('Failed to download cover img')
+            self.cover_img_path = None
+
+    def draw_text(self, draw, text, text_size, top):
+        font_path = P('fonts/liberation/LiberationSerif-Bold.ttf')
+        font = ImageFont.truetype(font_path, text_size)
+        width, height = draw.textsize(text, font=font)
+        left = max(int((self.COVER_WIDTH - width) / 2.), 0)
+        draw.text((left, top), text, fill=(0, 0, 0), font=font)
+        return height
+
+    def default_cover(self, cover_file):
+        title = self.issue_title
+        date = self.issue_date.strftime(
+            '%d %B %Y').decode('utf8', 'replace')
+        author = 'www.dunyahalleri.com/haftanin-ozeti'.decode(
+            'utf8', 'replace')
+        # Texts
+        img = Image.new(
+            'RGB', (self.COVER_WIDTH, self.COVER_HEIGHT), 'white')
+        draw = ImageDraw.Draw(img)
+        bottom = 15
+        bottom += self.draw_text(draw, title, 42, bottom)
+        bottom += 50
+        bottom += self.draw_text(draw, date, 32, bottom)
+        bottom += self.draw_text(draw, author, 32, self.COVER_HEIGHT - 45)
+        # Logo
+        self.download_cover_img()
+        if getattr(self, 'cover_img_path', None) is not None:
+            logo_file = self.cover_img_path
+            self.report_progress(
+                1, ('using cover img from %s') % logo_file)
+            logo = Image.open(logo_file, 'r')
+            width, height = logo.size
+            logo = logo.resize(
+                (self.COVER_WIDTH, (self.COVER_WIDTH * height / width)), Image.ANTIALIAS)
+            width, height = logo.size
+            left = max(int((self.COVER_WIDTH - width) / 2.), 0)
+            top = max(int((self.COVER_HEIGHT - height) / 2.), 0)
+            img.paste(logo, (left, top))
+        img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE)
+        img.convert('RGB').save(cover_file, 'JPEG')
+        cover_file.flush()
+        return True