diff --git a/src/calibre/ebooks/oeb/transforms/linearize_tables.py b/src/calibre/ebooks/oeb/transforms/linearize_tables.py index 5842db90e5..92c3a00c7b 100644 --- a/src/calibre/ebooks/oeb/transforms/linearize_tables.py +++ b/src/calibre/ebooks/oeb/transforms/linearize_tables.py @@ -6,15 +6,19 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -from calibre.ebooks.oeb.base import OEB_DOCS, XPNSMAP +from calibre.ebooks.oeb.base import OEB_DOCS, XPath class LinearizeTables(object): def linearize(self, root): - for x in root.xpath('//h:table|//h:td|//h:tr|//h:th', - namespaces=XPNSMAP): + for x in XPath('//h:table|//h:td|//h:tr|//h:th|//h:caption|' + '//h:tbody|//h:tfoot|//h:thead|//h:colgroup|//h:col')(root): x.tag = 'div' - for attr in ('valign', 'colspan', 'rowspan', 'width', 'halign'): + for attr in ('style', 'font', 'valign', + 'colspan', 'width', 'height', + 'rowspan', 'summary', 'align', + 'cellspacing', 'cellpadding', + 'frames', 'rules', 'border'): if attr in x.attrib: del x.attrib[attr] diff --git a/src/calibre/gui2/images/news/cubadebate.png b/src/calibre/gui2/images/news/cubadebate.png new file mode 100644 index 0000000000..4f8fc9e4fc Binary files /dev/null and b/src/calibre/gui2/images/news/cubadebate.png differ diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 1588a4c8ab..d6ba724256 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -48,7 +48,7 @@ recipe_modules = ['recipe_' + r for r in ( 'the_budget_fashionista', 'elperiodico_catalan', 'elperiodico_spanish', 'expansion_spanish', 'lavanguardia', 'marca', 'kellog_faculty', 'kellog_insight', 'noaa', - '7dias', 'buenosaireseconomico', 'huntechnet', + '7dias', 'buenosaireseconomico', 'huntechnet', 'cubadebate', 'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres', 'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate', 'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna', diff --git a/src/calibre/web/feeds/recipes/recipe_cubadebate.py b/src/calibre/web/feeds/recipes/recipe_cubadebate.py new file mode 100644 index 0000000000..13747348fd --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_cubadebate.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +newyorker.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class CubaDebate(BasicNewsRecipe): + title = 'CubaDebate' + __author__ = 'Darko Miletic' + description = 'Contra el Terorismo Mediatico' + oldest_article = 15 + language = _('Spanish') + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + publisher = 'Cubadebate' + category = 'news, politics, Cuba' + encoding = 'utf-8' + extra_css = ' #BlogTitle{font-size: x-large; font-weight: bold} ' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : 'es' + ,'publisher' : publisher + ,'pretty_print': True + } + + keep_only_tags = [dict(name='div', attrs={'id':'Outline'})] + remove_tags_after = dict(name='div',attrs={'id':'BlogContent'}) + remove_tags = [dict(name='link')] + + feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')] + + def print_version(self, url): + return url + 'print/' + + def preprocess_html(self, soup): + return self.adeify_images(soup)