From 382dbb3a6dc3efb28f7f4e0ffcae0cbd82719b29 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 10 Apr 2009 08:45:16 -0700 Subject: [PATCH] Fix #2257 (Updated recipes with fixes for epub image problem) --- .../web/feeds/recipes/recipe_24sata_rs.py | 21 ++++++++++++-- src/calibre/web/feeds/recipes/recipe_blic.py | 29 +++++++++++++++---- src/calibre/web/feeds/recipes/recipe_nspm.py | 22 ++++++++++++-- .../web/feeds/recipes/recipe_tomshardware.py | 13 ++++++++- 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/src/calibre/web/feeds/recipes/recipe_24sata_rs.py b/src/calibre/web/feeds/recipes/recipe_24sata_rs.py index ac4ee2b860..88860bf493 100644 --- a/src/calibre/web/feeds/recipes/recipe_24sata_rs.py +++ b/src/calibre/web/feeds/recipes/recipe_24sata_rs.py @@ -9,6 +9,7 @@ __copyright__ = '2009, Darko Miletic ' import re from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Ser24Sata(BasicNewsRecipe): title = '24 Sata - Sr' @@ -39,14 +40,30 @@ class Ser24Sata(BasicNewsRecipe): feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')] + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + myIndex = oldParent.contents.index(item) + item.extract() + divtag = Tag(soup,'div') + brtag = Tag(soup,'br') + oldParent.insert(myIndex,divtag) + divtag.append(item) + divtag.append(brtag) + return soup + def preprocess_html(self, soup): soup.html['xml:lang'] = 'sr-Latn-RS' soup.html['lang'] = 'sr-Latn-RS' mtag = '\n' soup.head.insert(0,mtag) - return soup + return self.cleanup_image_tags(soup) def print_version(self, url): article, sep, rest = url.partition('#') - return article.replace('/show.php','/_print.php') + article_base, sep2, article_id = article.partition('id=') + return 'http://www.24sata.co.rs/_print.php?id=' + article_id diff --git a/src/calibre/web/feeds/recipes/recipe_blic.py b/src/calibre/web/feeds/recipes/recipe_blic.py index 05d4e43865..e4e4987dec 100644 --- a/src/calibre/web/feeds/recipes/recipe_blic.py +++ b/src/calibre/web/feeds/recipes/recipe_blic.py @@ -8,11 +8,12 @@ blic.rs import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Blic(BasicNewsRecipe): - title = u'Blic' - __author__ = u'Darko Miletic' - description = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' + title = 'Blic' + __author__ = 'Darko Miletic' + description = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' publisher = 'RINGIER d.o.o.' category = 'news, politics, Serbia' oldest_article = 2 @@ -21,7 +22,7 @@ class Blic(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False language = _('Serbian') - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} ' html2lrf_options = [ '--comment' , description @@ -30,7 +31,7 @@ class Blic(BasicNewsRecipe): , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -44,10 +45,26 @@ class Blic(BasicNewsRecipe): start_url, question, rest_url = url.partition('?') return u'http://www.blic.rs/_print.php?' + rest_url + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + myIndex = oldParent.contents.index(item) + item.extract() + divtag = Tag(soup,'div') + brtag = Tag(soup,'br') + oldParent.insert(myIndex,divtag) + divtag.append(item) + divtag.append(brtag) + return soup + + def preprocess_html(self, soup): mtag = '' soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] - return soup + return self.cleanup_image_tags(soup) \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_nspm.py b/src/calibre/web/feeds/recipes/recipe_nspm.py index 0ff80b8a93..360fb35c35 100644 --- a/src/calibre/web/feeds/recipes/recipe_nspm.py +++ b/src/calibre/web/feeds/recipes/recipe_nspm.py @@ -8,9 +8,10 @@ nspm.rs import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Nspm(BasicNewsRecipe): - title = u'Nova srpska politicka misao' + title = 'Nova srpska politicka misao' __author__ = 'Darko Miletic' description = 'Casopis za politicku teoriju i drustvena istrazivanja' publisher = 'NSPM' @@ -36,7 +37,7 @@ class Nspm(BasicNewsRecipe): preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_tags = [ - dict(name=['a','img','link','object','embed']) + dict(name=['link','object','embed']) ,dict(name='td', attrs={'class':'buttonheading'}) ] @@ -50,6 +51,21 @@ class Nspm(BasicNewsRecipe): def print_version(self, url): return url.replace('.html','/stampa.html') + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + myIndex = oldParent.contents.index(item) + item.extract() + divtag = Tag(soup,'div') + brtag = Tag(soup,'br') + oldParent.insert(myIndex,divtag) + divtag.append(item) + divtag.append(brtag) + return soup + def preprocess_html(self, soup): lng = 'sr-Latn-RS' soup.html['xml:lang'] = lng @@ -59,4 +75,4 @@ class Nspm(BasicNewsRecipe): ftag['content'] = lng for item in soup.findAll(style=True): del item['style'] - return soup + return self.cleanup_image_tags(soup) diff --git a/src/calibre/web/feeds/recipes/recipe_tomshardware.py b/src/calibre/web/feeds/recipes/recipe_tomshardware.py index 13d164d0e6..af080ccbb7 100644 --- a/src/calibre/web/feeds/recipes/recipe_tomshardware.py +++ b/src/calibre/web/feeds/recipes/recipe_tomshardware.py @@ -63,6 +63,17 @@ class Tomshardware(BasicNewsRecipe): rind = 'http://www.tomshardware.com/review_print.php?p1=' return rind + article_id + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + return soup + def preprocess_html(self, soup): del(soup.body['onload']) - return soup + for item in soup.findAll(style=True): + del item['style'] + for it in soup.findAll('span'): + it.name="div" + return self.cleanup_image_tags(soup)