mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
146 lines
5.2 KiB
Plaintext
146 lines
5.2 KiB
Plaintext
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||
from __future__ import unicode_literals
|
||
|
||
__license__ = 'GPL v3'
|
||
__copyright__ = '2012, Onur Gungor onurgu@gmail.com'
|
||
__docformat__ = 'restructuredtext en'
|
||
|
||
'''
|
||
www.sol.org.tr
|
||
'''
|
||
|
||
import datetime
|
||
|
||
import re
|
||
|
||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||
|
||
|
||
class SolHaberRecipe(BasicNewsRecipe):
|
||
title = u'soL Haber'
|
||
oldest_article = 7
|
||
max_articles_per_feed = 100
|
||
|
||
language = 'tr'
|
||
__author__ = 'Onur Güngör'
|
||
description = 'Hayata soL''dan bakın..'
|
||
publisher = 'soL Haber'
|
||
tags = 'news, haberler, siyaset, türkiye, turkey, politics'
|
||
|
||
conversion_options = {
|
||
'comment': description, 'tags': tags, 'publisher': publisher, 'language': language
|
||
}
|
||
|
||
category_dict = {'sonuncu-kavga': 'Sonuncu Kavga',
|
||
'devlet-ve-siyaset': 'Devlet ve Siyaset',
|
||
'ekonomi': 'Ekonomi',
|
||
'enternasyonal-gundem': 'Enternasyonel Gündem',
|
||
'kent-gundemleri': 'Kent Gündemleri',
|
||
'kultur-sanat': 'Kültür Sanat',
|
||
'dunyadan': 'Dünyadan',
|
||
'serbest-kursu': 'Serbest Kürsü',
|
||
'medya': 'Medya',
|
||
'liseliler': 'Liseliler',
|
||
'yazarlar': 'Köşe Yazıları'}
|
||
|
||
end_date = datetime.date.today().isoformat()
|
||
start_date = (datetime.date.today() -
|
||
datetime.timedelta(days=1)).isoformat()
|
||
|
||
section_tuples = [['Köşe Yazıları', 'http://haber.sol.org.tr/arsiv?icerik=kose_yazisi&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)], # noqa
|
||
['Haberler', 'http://haber.sol.org.tr/arsiv?icerik=haber&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' %
|
||
(start_date, end_date)],
|
||
['soL postal', 'http://haber.sol.org.tr/arsiv?icerik=postal&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' %
|
||
(start_date, end_date)],
|
||
['Bizim Amerika', 'http://haber.sol.org.tr/arsiv?icerik=bizim_amerika&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)]] # noqa
|
||
|
||
# Disable stylesheets from site.
|
||
no_stylesheets = True
|
||
|
||
cover_margins = (20, 20, '#ffffff')
|
||
|
||
storybody_reg_exp = r'^\s*(haber|kose)\s*$'
|
||
|
||
comments_reg_exp = r'^\s*makale-elestiri\s*$'
|
||
|
||
remove_tags = [
|
||
dict(name='div', attrs={'class': re.compile(comments_reg_exp, re.IGNORECASE)})]
|
||
|
||
keep_only_tags = [
|
||
dict(name='div', attrs={'class': re.compile(storybody_reg_exp, re.IGNORECASE)})]
|
||
|
||
def get_masthead_title(self):
|
||
return self.title + "(" + self.end_date + ")"
|
||
|
||
def parse_index(self):
|
||
|
||
result = []
|
||
articles_dict = dict()
|
||
|
||
author_regexp = re.compile('^http://.*?/yazarlar/(.*?)/.*$')
|
||
category_regexp = re.compile('^http://.*?/(.+?)/.*$')
|
||
|
||
for section_tuple in self.section_tuples:
|
||
|
||
section_title = section_tuple[0]
|
||
section_index_url = section_tuple[1]
|
||
|
||
self.log('Bölüm:', section_title, 'URL:', section_index_url)
|
||
|
||
soup = self.index_to_soup(section_index_url)
|
||
|
||
logo = soup.find('div', id='logo').find('img', src=True)
|
||
if logo is not None:
|
||
self.cover_url = logo['src']
|
||
if self.cover_url.startswith('/'):
|
||
self.cover_url = 'http://haber.sol.org.tr' + self.cover_url
|
||
|
||
view_content = soup.find(
|
||
'div', id='ana-icerik').find('div', attrs={'class': 'view-content'})
|
||
if view_content is None:
|
||
break
|
||
rows = view_content.find('tbody').findAll('tr')
|
||
|
||
self.log('Row sayısı', len(rows))
|
||
for row in rows:
|
||
cells = row.findAll('td')
|
||
|
||
a = cells[1].find('a', href=True)
|
||
|
||
url = a['href']
|
||
title = self.tag_to_string(a)
|
||
|
||
if url.startswith('/'):
|
||
url = 'http://haber.sol.org.tr' + url
|
||
|
||
category = section_title
|
||
category_match_result = category_regexp.match(url)
|
||
if category_match_result:
|
||
category = category_match_result.group(1)
|
||
|
||
date = self.tag_to_string(cells[2])
|
||
|
||
author = 'soL haber'
|
||
|
||
author_match_result = author_regexp.match(url)
|
||
if author_match_result:
|
||
author = author_match_result.group(1)
|
||
|
||
self.log('\tFound article:', title, 'at', url,
|
||
'published at ', date, 'by', author)
|
||
article = {'title': title, 'url': url,
|
||
'description': None, 'date': date, 'author': author}
|
||
if category in articles_dict:
|
||
articles_dict[category].append(article)
|
||
else:
|
||
articles_dict[category] = [article]
|
||
|
||
for category in articles_dict.keys():
|
||
if category in self.category_dict:
|
||
result.append(
|
||
(self.category_dict[category], articles_dict[category]))
|
||
else:
|
||
result.append((category, articles_dict[category]))
|
||
|
||
return result
|