calibre/recipes/tvxs.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

72 lines
3.2 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class TVXS(BasicNewsRecipe):
title = 'TVXS'
__author__ = 'hargikas'
description = 'News from Greece'
max_articles_per_feed = 100
oldest_article = 3
publisher = 'TVXS'
category = 'news, sport, greece'
language = 'el'
encoding = None
use_embedded_content = False
remove_empty_feeds = True
conversion_options = {'smarten_punctuation': True}
no_stylesheets = True
publication_type = 'newspaper'
remove_tags_before = dict(name='h1', attrs={'class': 'print-title'})
remove_tags_after = dict(name='div', attrs={
'class': 'field field-type-relevant-content field-field-relevant-articles'})
remove_tags = [dict(name='div', attrs={'class': 'field field-type-relevant-content field-field-relevant-articles'}),
dict(name='div', attrs={
'class': 'field field-type-filefield field-field-image-gallery'}),
dict(name='div', attrs={'class': 'filefield-file'})]
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding',
'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
table { width: 100%; } \
td img { display: block; margin: 5px auto; } \
ul { padding-top: 10px; } \
ol { padding-top: 10px; } \
li { padding-top: 5px; padding-bottom: 5px; } \
h1 { text-align: center; font-size: 125%; font-weight: bold; } \
h2, h3, h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]
feeds = [(u'Ελλάδα', 'http://tvxs.gr/feeds/2/feed.xml'),
(u'Κόσμος', 'http://tvxs.gr/feeds/5/feed.xml'),
(u'Τοπικά Νέα', 'http://tvxs.gr/feeds/5363/feed.xml'),
(u'Sci Tech', 'http://tvxs.gr/feeds/26/feed.xml'),
(u'Αθλητικά', 'http://tvxs.gr/feeds/243/feed.xml'),
(u'Internet & ΜΜΕ', 'http://tvxs.gr/feeds/32/feed.xml'),
(u'Καλά Νέα', 'http://tvxs.gr/feeds/914/feed.xml'),
(u'Απόψεις', 'http://tvxs.gr/feeds/1109/feed.xml'),
(u'Πολιτισμός', 'http://tvxs.gr/feeds/1317/feed.xml'),
(u'Greenlife', 'http://tvxs.gr/feeds/3/feed.xml'),
(u'Ιστορία', 'http://tvxs.gr/feeds/1573/feed.xml'),
(u'Χιούμορ', 'http://tvxs.gr/feeds/692/feed.xml')]
def print_version(self, url):
br = self.get_browser()
response = br.open(url)
data = response.read()
pos_1 = data.find('<a href="/print/')
if pos_1 == -1:
return url
pos_2 = data.find('">', pos_1)
if pos_2 == -1:
return url
pos_1 += len('<a href="')
new_url = data[pos_1:pos_2]
print_url = "http://tvxs.gr" + new_url
return print_url