From e0002deb1fba920695c88147b415d583ac79f517 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 22 Apr 2012 12:48:29 +0530
Subject: [PATCH] Sol Haber by Onur Gungor

---
 recipes/sol_haber.recipe | 141 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 recipes/sol_haber.recipe

diff --git a/recipes/sol_haber.recipe b/recipes/sol_haber.recipe
new file mode 100644
index 0000000000..29db88019c
--- /dev/null
+++ b/recipes/sol_haber.recipe
@@ -0,0 +1,141 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Onur Gungor onurgu@gmail.com'
+__docformat__ = 'restructuredtext en'
+
+'''
+www.sol.org.tr
+'''
+
+import datetime
+
+import re
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class SolHaberRecipe(BasicNewsRecipe):
+    title = u'soL Haber'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    language = 'tr'
+    __author__ = 'Onur Güngör'
+    description = 'Hayata soL''dan bakın..'
+    publisher = 'soL Haber'
+    tags = 'news, haberler, siyaset, türkiye, turkey, politics'
+
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : tags
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    category_dict = { 'sonuncu-kavga':'Sonuncu Kavga',
+                      'devlet-ve-siyaset':'Devlet ve Siyaset',
+                      'ekonomi':'Ekonomi',
+                      'enternasyonal-gundem':'Enternasyonel Gündem',
+                      'kent-gundemleri':'Kent Gündemleri',
+                      'kultur-sanat':'Kültür Sanat',
+                      'dunyadan':'Dünyadan',
+                      'serbest-kursu':'Serbest Kürsü',
+                      'medya':'Medya',
+                      'liseliler':'Liseliler',
+                      'yazarlar':'Köşe Yazıları'}
+
+    end_date = datetime.date.today().isoformat()
+    start_date = (datetime.date.today()-datetime.timedelta(days=1)).isoformat()
+
+
+    section_tuples = [['Köşe Yazıları', 'http://haber.sol.org.tr/arsiv?icerik=kose_yazisi&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)],
+                    ['Haberler', 'http://haber.sol.org.tr/arsiv?icerik=haber&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)],
+                    ['soL postal', 'http://haber.sol.org.tr/arsiv?icerik=postal&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)],
+                    ['Bizim Amerika', 'http://haber.sol.org.tr/arsiv?icerik=bizim_amerika&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)]]
+
+
+    # Disable stylesheets from site.
+    no_stylesheets = True
+
+    cover_margins = (20, 20, '#ffffff')
+
+    storybody_reg_exp = '^\s*(haber|kose)\s*$'
+
+    comments_reg_exp = '^\s*makale-elestiri\s*$'
+
+    remove_tags = [dict(name='div', attrs={'class':re.compile(comments_reg_exp, re.IGNORECASE)})]
+
+    keep_only_tags = [dict(name='div', attrs={'class':re.compile(storybody_reg_exp, re.IGNORECASE)})]
+
+    def get_masthead_title(self):
+        return self.title + "(" + self.end_date + ")"
+
+    def parse_index(self):
+
+        result = []
+        articles_dict = dict()
+
+        author_regexp = re.compile('^http://.*?/yazarlar/(.*?)/.*$')
+        category_regexp = re.compile('^http://.*?/(.+?)/.*$')
+
+        for section_tuple in self.section_tuples:
+
+            section_title = section_tuple[0]
+            section_index_url = section_tuple[1]
+
+            self.log('Bölüm:', section_title, 'URL:', section_index_url)
+
+            soup = self.index_to_soup(section_index_url)
+
+            logo = soup.find('div', id='logo').find('img', src=True)
+            if logo is not None:
+                self.cover_url = logo['src']
+                if self.cover_url.startswith('/'):
+                    self.cover_url = 'http://haber.sol.org.tr'+self.cover_url
+
+            view_content = soup.find('div', id='ana-icerik').find('div', attrs={'class':'view-content'})
+            if view_content == None:
+                break
+            rows = view_content.find('tbody').findAll('tr')
+
+            self.log('Row sayısı', len(rows))
+            for row in rows:
+                cells = row.findAll('td')
+
+                a = cells[1].find('a', href=True)
+
+                url = a['href']
+                title = self.tag_to_string(a)
+
+                if url.startswith('/'):
+                    url = 'http://haber.sol.org.tr'+url
+
+                category = section_title
+                category_match_result = category_regexp.match(url)
+                if category_match_result:
+                    category = category_match_result.group(1)
+
+                date = self.tag_to_string(cells[2])
+
+                author = 'soL haber'
+
+                author_match_result = author_regexp.match(url)
+                if author_match_result:
+                    author = author_match_result.group(1)
+
+                self.log('\tFound article:', title, 'at', url, 'published at ', date, 'by', author)
+                article = {'title':title, 'url':url, 'description':None, 'date':date, 'author':author}
+                if category in articles_dict:
+                    articles_dict[category].append(article)
+                else:
+                    articles_dict[category] = [article]
+
+        for category in articles_dict.keys():
+            if category in self.category_dict:
+                result.append((self.category_dict[category], articles_dict[category]))
+            else:
+                result.append((category, articles_dict[category]))
+
+        return result