From f0d694a58d0b19e59ed1a212521632275bebf3c3 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 9 Jun 2019 13:25:36 +0530
Subject: [PATCH] Remove non-working recipe

---
 recipes/hurriyet_daily_news.recipe | 266 -----------------------------
 1 file changed, 266 deletions(-)
 delete mode 100644 recipes/hurriyet_daily_news.recipe

diff --git a/recipes/hurriyet_daily_news.recipe b/recipes/hurriyet_daily_news.recipe
deleted file mode 100644
index c7ea9582e4..0000000000
--- a/recipes/hurriyet_daily_news.recipe
+++ /dev/null
@@ -1,266 +0,0 @@
-#!/usr/bin/env  python2
-# -*- coding: utf-8 -*-
-from __future__ import print_function
-__license__   = 'GPL v3'
-__copyright__ = '2014, spswerling'
-'''
-www.hurriyetdailynews.com
-'''
-import os
-import string
-import inspect
-import datetime
-import re
-from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-
-
-class HurriyetDailyNews_en(BasicNewsRecipe):
-    title = u'Hurriyet Daily News'
-    __author__ = u'spswerling'
-    description = 'a Turkey based daily in english'
-    description = 'English version of Turkish Daily "Hurriyet"'
-    no_stylesheets = True
-    encoding = 'utf-8'
-    category = 'news'
-    language = 'en_TR'
-    publication_type = 'newspaper'
-    cover_img_url = 'http://www.hurriyetdailynews.com/images/design/logo-hurriyet-daily-news.png'
-    masthead_url = cover_img_url
-    remove_empty_feeds = True
-
-    # on kindle, images can make things kind of fat. Slim them down.
-    recursions = 0
-    oldest_article = 1
-    compress_news_images = True
-    compress_news_images_max_size = 7
-    scale_news_images = (150, 200)  # (kindle touch: 600x800)
-    useHighResImages = False
-    oldest_article = 1.5
-    max_articles_per_section = 25
-    max_articles_per_subsection = 7
-
-    sections = [
-        u'turkey',
-        u'economy',
-        u'world',
-        u'sports',
-        # u'life',
-        u'opinion',
-        # u'arts/culture'
-    ]
-
-    # util for creating remove_tags and keep_tags style regex matchers
-    def tag_matcher(elt, attr, str):
-        return dict(name=elt, attrs={attr: re.compile(str, re.IGNORECASE)})
-
-    keep_only_tags = [tag_matcher('div', 'class', 'NewsDetail')]
-
-    remove_tags = [
-        tag_matcher('div', 'class', 'Carousel'),
-        tag_matcher('div', 'class', 'ShareIt'),
-        tag_matcher('div', 'class', 'tmz'),
-        tag_matcher('span', 'id', 'comment'),
-        tag_matcher('h2', 'class', 'NewSpot'),
-        tag_matcher('h2', 'class', 'pv-gallery'),
-    ]
-
-    articles = {}
-    subsection_links = {}
-    urls_done = []
-    links_per_section = {}
-
-    def parse_index(self):
-        section_links = self.section_links_from_home_page()
-        for section_link in section_links:
-            self.articles[self.section_name(section_link)] = []
-            subsection_links = self.find_subsection_links(section_link)
-            for subsection_link in subsection_links:
-                sub_name = self.subsection_name(subsection_link)
-                self.subsection_links[sub_name] = []
-                self.parse_subsection(section_link, subsection_link)
-        ans = []
-        for k in self.articles:
-            ans.append((string.capwords(k), self.articles[k]))
-        return ans
-
-    def section_links_from_home_page(self):
-
-        def include_link(link):
-            return self.text(link).lower() in self.sections
-
-        url = 'http://www.hurriyetdailynews.com/'
-        try:
-            self._p('hitting home page ' + url)
-            soup = self.index_to_soup(url)
-        except:
-            self._p('Unable to spider home page')
-            return []
-
-        self._p('Got home page. hunt down section links.')
-
-        regex = re.compile('rmRootLink', re.IGNORECASE)
-        links = soup.findAll('a', {'class': regex})
-
-        filtered_links = list(filter(include_link, links))
-        self._p(' all sections: ' + ', '.join(map(self.text, links)))
-        self._p(' filtered sections: ' +
-                ', '.join(map(self.text, filtered_links)))
-
-        return filtered_links
-
-    def find_subsection_links(self, section_link):
-        self._p('find subsection links for section  ' + str(section_link))
-        url = self.abs_url(section_link['href'])
-        try:
-            self._p('hitting ' + url)
-            soup = self.index_to_soup(url)
-        except:
-            self._p('Unable to spider subsection')
-            return []
-        self._p('Got ' + url)
-
-        div = soup.find('div', {'class': 'SeffafLink'})
-        if not div:
-            self._p('could not find any subsections')
-            return [section_link]
-        links = div.findAll('a')
-        self._p(' subsection links: ' + ', '.join(map(self.text, links)))
-        return links
-
-    def parse_subsection(self, section_link, subsection_link):
-
-        section = self.section_name(section_link)
-        if len(self.articles[section]) > self.max_articles_per_section:
-            return
-
-        # tmp dbg
-        # if not self.subsection_name(subsection_link) == 'arts':
-        #    return
-
-        self._p('hit section  ' + section +
-                ', subsect ' + self.subsection_name(subsection_link))
-        url = self.abs_url(subsection_link['href'])
-        try:
-            self._p('hitting ' + url)
-            soup = self.index_to_soup(url)
-        except:
-            self._p('Unable to spider section')
-            return []
-
-        self._p('Process  links ')
-        for link in soup.findAll('a'):
-            if 'NewsDetail' in str(link.get('id')):
-                self.process_link(section_link, subsection_link, link)
-
-    def process_link(self, section_link, subsection_link, link):
-        section = self.section_name(section_link)
-        subsection = self.subsection_name(subsection_link)
-        title = link['title'] or self.text(link)
-        href = link.get('href')
-        if not href:
-            self._p("BAD HREF: " + str(link))
-            return
-        self.queue_article_link(section, subsection, href, title)
-
-    def queue_article_link(self, section, subsection, url, title):
-        full_url = self.abs_url(url)
-        if full_url in self.urls_done:
-            # self._p('Skip (already Qd): ' + ' - '.join([section, subsection, title, url]))
-            return
-
-        self.urls_done.append(full_url)
-        if len(self.articles[section]) >= self.max_articles_per_section:
-            return
-        if len(self.subsection_links[subsection]) >= \
-                self.max_articles_per_subsection:
-            return
-        self._p('Q: ' + ' - '.join([section, subsection, title, url]))
-        full_title = string.capwords(subsection + ' - ' + title)
-        self.subsection_links[subsection].append(url)
-        self.articles[section].append(
-            dict(title=full_title,
-                 url=full_url,
-                 date='',
-                 description='',
-                 author='',
-                 content=''))
-
-    def text(self, n):
-        return self.tag_to_string(n).strip()
-
-    def abs_url(self, url):
-        if 'www.hurriyetdailynews.com' in url:
-            abs_url = url
-        elif url[0] == '/':
-            abs_url = 'http://www.hurriyetdailynews.com' + url
-        else:
-            abs_url = 'http://www.hurriyetdailynews.com/' + url
-        if '#' in abs_url:
-            abs_url = ''.join(abs_url.split('#')[0:-1])
-
-        return abs_url
-
-    def section_name(self, link):
-        return self.text(link).lower()
-
-    def subsection_name(self, link):
-        from_fn = str(os.path.splitext(link['href'])[0]).split('/')[-1]
-        return from_fn
-
-    def preprocess_raw_html(self, raw_html, url):
-        reason_to_skip = self.should_skip_article(BeautifulSoup(raw_html))
-        if reason_to_skip:
-            self._p('Skipping article: ' + reason_to_skip + ', ' + url)
-            # Next line will show up as an error in the logs, but ignore, see
-            #   http://www.mobileread.com/forums/showthread.php?p=2931136
-            return None
-        else:
-            return super(self.__class__, self).preprocess_raw_html(raw_html, url)
-
-    def should_skip_article(self, soup):
-        date = self.scrape_article_date(soup)
-        if not date:
-            return False
-
-        age = (datetime.datetime.now() - date).days
-        if (age > self.oldest_article):
-            return "too old"
-        return False
-
-    def date_from_string(self, datestring):
-        try:
-            # eg: September/17/2014
-            dt = datetime.datetime.strptime(datestring, "%B/%d/%Y")
-        except:
-            try:
-                # eg: September 17/2014
-                dt = datetime.datetime.strptime(datestring, "%B %d/%Y")
-            except:
-                dt = None
-        if dt:
-            self._p('From string "' + datestring + '", datetime: ' + str(dt))
-        else:
-            self._p('Could not get datetime from ' + datestring)
-        return dt
-
-    def scrape_article_date(self, soup):
-        dnode =  soup.find('p', {'class': 'dateagency'}) or \
-            soup.find('p', {'class': 'Tarih'})
-        if dnode:
-            dstring = self.text(dnode)
-            return self.date_from_string(dstring)
-        else:
-            return None
-
-    def _dbg_soup_node(self, node):
-        s = '   cls: ' + str(node.get('class')).strip() + \
-            '  id: ' + str(node.get('id')).strip() + \
-            ' txt: ' + self.text(node)
-        return s
-
-    def _p(self, msg):
-        curframe = inspect.currentframe()
-        calframe = inspect.getouterframes(curframe, 2)
-        calname = calframe[1][3].upper()
-        print('[' + calname + '] ' + msg[0:120])