diff --git a/resources/recipes/24sata_rs.recipe b/resources/recipes/24sata_rs.recipe index df1f92bfaa..0f879036ea 100644 --- a/resources/recipes/24sata_rs.recipe +++ b/resources/recipes/24sata_rs.recipe @@ -21,6 +21,7 @@ class Ser24Sata(BasicNewsRecipe): encoding = 'utf-8' use_embedded_content = False language = 'sr' + publication_type = 'newspaper' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' conversion_options = { diff --git a/resources/recipes/b92.recipe b/resources/recipes/b92.recipe index b17440e596..20b844b57d 100644 --- a/resources/recipes/b92.recipe +++ b/resources/recipes/b92.recipe @@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class B92(BasicNewsRecipe): title = 'B92' __author__ = 'Darko Miletic' - description = 'Dnevne vesti iz Srbije i sveta' + description = 'B92 info, najnovije vesti iz Srbije, regiona i sveta' publisher = 'B92' category = 'news, politics, Serbia' oldest_article = 2 @@ -19,6 +19,7 @@ class B92(BasicNewsRecipe): use_embedded_content = False encoding = 'cp1250' language = 'sr' + publication_type = 'newsportal' extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} ' conversion_options = { diff --git a/resources/recipes/beta.recipe b/resources/recipes/beta.recipe index 49da10e11a..039e8cef93 100644 --- a/resources/recipes/beta.recipe +++ b/resources/recipes/beta.recipe @@ -18,6 +18,7 @@ class Danas(BasicNewsRecipe): no_stylesheets = False use_embedded_content = True language = 'sr' + publication_type = 'newsportal' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} ' conversion_options = { diff --git a/resources/recipes/blic.recipe b/resources/recipes/blic.recipe index f784c031a3..0c955bebde 100644 --- a/resources/recipes/blic.recipe +++ b/resources/recipes/blic.recipe @@ -20,7 +20,8 @@ class Blic(BasicNewsRecipe): use_embedded_content = False masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png' language = 'sr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Georgia, serif1, serif} .article_description{font-family: Arial, sans1, sans-serif} .img_full{float: none} img{margin-bottom: 0.8em} ' + publication_type = 'newspaper' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Georgia, serif1, serif} .article_description{font-family: Arial, sans1, sans-serif} .img_full{float: none} img{margin-bottom: 0.8em} ' conversion_options = { 'comment' : description @@ -41,4 +42,6 @@ class Blic(BasicNewsRecipe): return url + '/print' def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] return self.adeify_images(soup) diff --git a/resources/recipes/cetnixploitation.recipe b/resources/recipes/cetnixploitation.recipe index ee95ef599f..edc7233245 100644 --- a/resources/recipes/cetnixploitation.recipe +++ b/resources/recipes/cetnixploitation.recipe @@ -15,6 +15,7 @@ class Chetnixploitation(BasicNewsRecipe): oldest_article = 7 max_articles_per_feed = 100 language = 'sr' + publication_type = 'blog' encoding = 'utf-8' no_stylesheets = True use_embedded_content = True @@ -32,5 +33,3 @@ class Chetnixploitation(BasicNewsRecipe): def preprocess_html(self, soup): return self.adeify_images(soup) - - diff --git a/resources/recipes/danas.recipe b/resources/recipes/danas.recipe index a8cd8a5a3d..d82928e323 100644 --- a/resources/recipes/danas.recipe +++ b/resources/recipes/danas.recipe @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2008-2010, Darko Miletic ' ''' @@ -20,7 +21,9 @@ class Danas(BasicNewsRecipe): encoding = 'utf-8' masthead_url = 'http://www.danas.rs/images/basic/danas.gif' language = 'sr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} ' + publication_type = 'newspaper' + remove_empty_feeds = True + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} ' conversion_options = { 'comment' : description @@ -38,10 +41,10 @@ class Danas(BasicNewsRecipe): ,dict(name=['object','link','iframe']) ] - feeds = [ + feeds = [ (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27') ,(u'Hronika' , u'http://www.danas.rs/rss/rss.asp?column_id=2' ) - ,(u'Drustvo' , u'http://www.danas.rs/rss/rss.asp?column_id=24') + ,(u'Dru\xc5\xa1tvo', u'http://www.danas.rs/rss/rss.asp?column_id=24') ,(u'Dijalog' , u'http://www.danas.rs/rss/rss.asp?column_id=1' ) ,(u'Ekonomija', u'http://www.danas.rs/rss/rss.asp?column_id=6' ) ,(u'Svet' , u'http://www.danas.rs/rss/rss.asp?column_id=25') @@ -51,13 +54,14 @@ class Danas(BasicNewsRecipe): ,(u'Scena' , u'http://www.danas.rs/rss/rss.asp?column_id=42') ,(u'Feljton' , u'http://www.danas.rs/rss/rss.asp?column_id=19') ,(u'Periskop' , u'http://www.danas.rs/rss/rss.asp?column_id=4' ) + ,(u'Famozno' , u'http://www.danas.rs/rss/rss.asp?column_id=47') ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] - return soup + return self.adeify_images(soup) def print_version(self, url): return url + '&action=print' - + diff --git a/resources/recipes/e_novine.recipe b/resources/recipes/e_novine.recipe index 7d39e448d2..6a7041baae 100644 --- a/resources/recipes/e_novine.recipe +++ b/resources/recipes/e_novine.recipe @@ -20,6 +20,7 @@ class E_novine(BasicNewsRecipe): encoding = 'utf-8' use_embedded_content = False language = 'sr' + publication_type = 'newsportal' masthead_url = 'http://www.e-novine.com/themes/e_novine/img/logo.gif' extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} img{float: none; margin-bottom: 0.8em} ' diff --git a/resources/recipes/glas_srpske.recipe b/resources/recipes/glas_srpske.recipe index 157584720a..27d65f861c 100644 --- a/resources/recipes/glas_srpske.recipe +++ b/resources/recipes/glas_srpske.recipe @@ -22,6 +22,7 @@ class GlasSrpske(BasicNewsRecipe): use_embedded_content = False masthead_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png' language = 'sr' + publication_type = 'newspaper' INDEX = 'http://www.glassrpske.com' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} img{margin-bottom: 0.8em} ' diff --git a/resources/recipes/glasjavnosti.recipe b/resources/recipes/glasjavnosti.recipe index 15b1042818..61675ea236 100644 --- a/resources/recipes/glasjavnosti.recipe +++ b/resources/recipes/glasjavnosti.recipe @@ -18,6 +18,7 @@ class GlasJavnosti(BasicNewsRecipe): no_stylesheets = False use_embedded_content = False language = 'sr' + publication_type = 'newspaper' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} ' conversion_options = { diff --git a/resources/recipes/nin.recipe b/resources/recipes/nin.recipe index a349f0e11f..9e1aa57733 100644 --- a/resources/recipes/nin.recipe +++ b/resources/recipes/nin.recipe @@ -25,7 +25,8 @@ class Nin(BasicNewsRecipe): LOGIN = PREFIX + '/?logout=true' use_embedded_content = False language = 'sr' - extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} ' + publication_type = 'magazine' + extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} ' conversion_options = { 'comment' : description diff --git a/resources/recipes/novosti.recipe b/resources/recipes/novosti.recipe index 61bb8ffd06..3c770e883b 100644 --- a/resources/recipes/novosti.recipe +++ b/resources/recipes/novosti.recipe @@ -1,14 +1,12 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' novosti.rs ''' import re from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Novosti(BasicNewsRecipe): title = 'Vecernje Novosti' @@ -21,17 +19,16 @@ class Novosti(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False encoding = 'utf-8' - language = 'sr' - - lang = 'sr-Latn-RS' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + language = 'sr' + publication_type = 'newspaper' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + , 'language' : language + , 'linearize_tables' : True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -42,17 +39,6 @@ class Novosti(BasicNewsRecipe): feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')] def preprocess_html(self, soup): - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - soup.head.insert(0,mlang) - attribs = [ 'style','font','valign' - ,'colspan','width','height' - ,'rowspan','summary','align' - ,'cellspacing','cellpadding' - ,'frames','rules','border' - ] - for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): - item.name = 'div' - for attrib in attribs: - if item.has_key(attrib): - del item[attrib] - return soup + for item in soup.findAll(style=True): + del item['style'] + return self.adeify_images(soup) diff --git a/resources/recipes/nspm.recipe b/resources/recipes/nspm.recipe index f5f6c10b72..636f34aac2 100644 --- a/resources/recipes/nspm.recipe +++ b/resources/recipes/nspm.recipe @@ -20,6 +20,7 @@ class Nspm(BasicNewsRecipe): INDEX = 'http://www.nspm.rs/?alphabet=l' encoding = 'utf-8' language = 'sr' + publication_type = 'magazine' masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' diff --git a/resources/recipes/pescanik.recipe b/resources/recipes/pescanik.recipe index 5281aa579d..3ab9c24de3 100644 --- a/resources/recipes/pescanik.recipe +++ b/resources/recipes/pescanik.recipe @@ -1,14 +1,11 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' pescanik.net ''' import re from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Pescanik(BasicNewsRecipe): title = 'Pescanik' @@ -21,17 +18,16 @@ class Pescanik(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False encoding = 'utf-8' - language = 'sr' - - lang = 'sr-Latn-RS' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .contentheading{font-size: x-large; font-weight: bold} .small{font-size: small} .createdate{font-size: x-small; font-weight: bold}' + language = 'sr' + publication_type = 'newsportal' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,"Lucida Grande",Tahoma,Verdana,sans1,sans-serif} .contentheading{font-size: x-large; font-weight: bold} .small{font-size: small} .createdate{font-size: x-small; font-weight: bold}' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + , 'language' : language + , 'linearize_tables' : True } @@ -50,17 +46,4 @@ class Pescanik(BasicNewsRecipe): return nurl + '&pop=1&page=0' def preprocess_html(self, soup): - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - soup.head.insert(0,mlang) - attribs = [ 'style','font','valign' - ,'colspan','width','height' - ,'rowspan','summary','align' - ,'cellspacing','cellpadding' - ,'frames','rules','border' - ] - for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): - item.name = 'div' - for attrib in attribs: - if item.has_key(attrib): - del item[attrib] return self.adeify_images(soup) diff --git a/resources/recipes/thecultofghoul.recipe b/resources/recipes/thecultofghoul.recipe index 8a5f507971..3d78c02db6 100644 --- a/resources/recipes/thecultofghoul.recipe +++ b/resources/recipes/thecultofghoul.recipe @@ -18,6 +18,7 @@ class TheCultOfGhoul(BasicNewsRecipe): encoding = 'utf-8' no_stylesheets = True use_embedded_content = True + publication_type = 'blog' extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } ' conversion_options = { diff --git a/resources/recipes/vijesti.recipe b/resources/recipes/vijesti.recipe index cc28959e93..969b300486 100644 --- a/resources/recipes/vijesti.recipe +++ b/resources/recipes/vijesti.recipe @@ -1,7 +1,6 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' vijesti.me @@ -9,7 +8,6 @@ vijesti.me import re from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Vijesti(BasicNewsRecipe): title = 'Vijesti' @@ -22,17 +20,16 @@ class Vijesti(BasicNewsRecipe): no_stylesheets = True encoding = 'cp1250' use_embedded_content = False - language = 'sr' - - lang ='sr-Latn-Me' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + language = 'sr' + publication_type = 'newspaper' + masthead_url = 'http://www.vijesti.me/img/logo.gif' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + , 'language' : language } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -44,15 +41,5 @@ class Vijesti(BasicNewsRecipe): feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss.php' )] def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) return self.adeify_images(soup) - def get_article_url(self, article): - raw = article.get('link', None) - return raw.replace('.cg.yu','.me') - diff --git a/resources/recipes/vreme.recipe b/resources/recipes/vreme.recipe index 6350fa67c2..a54353b78c 100644 --- a/resources/recipes/vreme.recipe +++ b/resources/recipes/vreme.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2008-2009, Darko Miletic ' ''' @@ -9,7 +7,6 @@ vreme.com import re from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Vreme(BasicNewsRecipe): title = 'Vreme' @@ -24,22 +21,23 @@ class Vreme(BasicNewsRecipe): LOGIN = 'http://www.vreme.com/account/login.php?url=%2F' use_embedded_content = False encoding = 'utf-8' - language = 'sr' - - lang = 'sr-Latn-RS' - direction = 'ltr' + language = 'sr' + publication_type = 'magazine' + masthead_url = 'http://www.vreme.com/g/vreme-logo.gif' extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + , 'language' : language + , 'linearize_tables' : True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + remove_tags_before = dict(attrs={'class':'toc-heading'}) + remove_tags_after = dict(attrs={'class':'footer' }) def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -79,31 +77,6 @@ class Vreme(BasicNewsRecipe): def print_version(self, url): return url + '&print=yes' - def preprocess_html(self, soup): - del soup.body['text' ] - del soup.body['bgcolor'] - del soup.body['onload' ] - soup.html['lang'] = self.lang - soup.html['dir' ] = self.direction - - attribs = [ 'style','font','valign' - ,'colspan','width','height' - ,'rowspan','summary','align' - ,'cellspacing','cellpadding' - ,'frames','rules','border' - ] - for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): - item.name = 'div' - for attrib in attribs: - if item.has_key(attrib): - del item[attrib] - - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) - return soup - def get_cover_url(self): cover_url = None soup = self.index_to_soup(self.INDEX)