diff --git a/recipes/tvxs.recipe b/recipes/tvxs.recipe index 76e35e30b4..eb7812fe4f 100644 --- a/recipes/tvxs.recipe +++ b/recipes/tvxs.recipe @@ -1,5 +1,6 @@ # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +import re from calibre.web.feeds.recipes import BasicNewsRecipe class TVXS(BasicNewsRecipe): @@ -8,19 +9,30 @@ class TVXS(BasicNewsRecipe): description = 'News from Greece' max_articles_per_feed = 100 oldest_article = 3 - simultaneous_downloads = 1 publisher = 'TVXS' - category = 'news, GR' + category = 'news, sport, greece' language = 'el' encoding = None use_embedded_content = False remove_empty_feeds = True - #conversion_options = { 'linearize_tables': True} + conversion_options = {'smarten_punctuation': True} no_stylesheets = True + publication_type = 'newspaper' remove_tags_before = dict(name='h1',attrs={'class':'print-title'}) remove_tags_after = dict(name='div',attrs={'class':'field field-type-relevant-content field-field-relevant-articles'}) - remove_attributes = ['width', 'src', 'header', 'footer'] - + remove_tags = [dict(name='div',attrs={'class':'field field-type-relevant-content field-field-relevant-articles'}), + dict(name='div',attrs={'class':'field field-type-filefield field-field-image-gallery'}), + dict(name='div',attrs={'class':'filefield-file'})] + remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height'] + extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \ + table { width: 100%; } \ + td img { display: block; margin: 5px auto; } \ + ul { padding-top: 10px; } \ + ol { padding-top: 10px; } \ + li { padding-top: 5px; padding-bottom: 5px; } \ + h1 { text-align: center; font-size: 125%; font-weight: bold; } \ + h2, h3, h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }' + preprocess_regexps = [(re.compile(r'', re.IGNORECASE), lambda m: ''), (re.compile(r'', re.IGNORECASE), lambda m: '')] feeds = [(u'Ελλάδα', 'http://tvxs.gr/feeds/2/feed.xml'), (u'Κόσμος', 'http://tvxs.gr/feeds/5/feed.xml'), @@ -35,17 +47,10 @@ class TVXS(BasicNewsRecipe): (u'Ιστορία', 'http://tvxs.gr/feeds/1573/feed.xml'), (u'Χιούμορ', 'http://tvxs.gr/feeds/692/feed.xml')] - def print_version(self, url): - import urllib2, urlparse, StringIO, gzip - - fp = urllib2.urlopen(url) - data = fp.read() - if fp.info()['content-encoding'] == 'gzip': - gzip_data = StringIO.StringIO(data) - gzipper = gzip.GzipFile(fileobj=gzip_data) - data = gzipper.read() - fp.close() + br = self.get_browser() + response = br.open(url) + data = response.read() pos_1 = data.find('