diff --git a/resources/images/library.png b/resources/images/library.png index e093247162..721ef0546d 100644 Binary files a/resources/images/library.png and b/resources/images/library.png differ diff --git a/resources/recipes/24sata_rs.recipe b/resources/recipes/24sata_rs.recipe index b306c3ee6c..df1f92bfaa 100644 --- a/resources/recipes/24sata_rs.recipe +++ b/resources/recipes/24sata_rs.recipe @@ -1,7 +1,6 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' 24sata.rs @@ -9,7 +8,6 @@ __copyright__ = '2009, Darko Miletic ' import re from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Ser24Sata(BasicNewsRecipe): title = '24 Sata - Sr' @@ -17,22 +15,20 @@ class Ser24Sata(BasicNewsRecipe): description = '24 sata portal vesti iz Srbije' publisher = 'Ringier d.o.o.' category = 'news, politics, entertainment, Serbia' - oldest_article = 7 + oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False - language = 'sr' - - lang = 'sr-Latn-RS' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' + language = 'sr' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + , 'language' : language + , 'linearize_tables' : True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -40,25 +36,6 @@ class Ser24Sata(BasicNewsRecipe): feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')] def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - - attribs = [ 'style','font','valign' - ,'colspan','width','height' - ,'rowspan','summary','align' - ,'cellspacing','cellpadding' - ,'frames','rules','border' - ] - for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): - item.name = 'div' - for attrib in attribs: - if item.has_key(attrib): - del item[attrib] - - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) return self.adeify_images(soup) def print_version(self, url): diff --git a/resources/recipes/b92.recipe b/resources/recipes/b92.recipe index 612aee4d67..b17440e596 100644 --- a/resources/recipes/b92.recipe +++ b/resources/recipes/b92.recipe @@ -1,7 +1,6 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' b92.net ''' @@ -19,16 +18,15 @@ class B92(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False encoding = 'cp1250' - language = 'sr' - - lang = 'sr-Latn-RS' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' + language = 'sr' + extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher - , 'language' : lang + , 'language' : language + , 'linearize_tables' : True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -50,20 +48,5 @@ class B92(BasicNewsRecipe): return url + '&version=print' def preprocess_html(self, soup): - del soup.body['onload'] - for item in soup.findAll('font'): - item.name='div' - if item.has_key('size'): - del item['size'] - attribs = [ 'style','font','valign' - ,'colspan','width','height' - ,'rowspan','summary','align' - ,'cellspacing','cellpadding' - ,'frames','rules','border' - ] - for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): - item.name = 'div' - for attrib in attribs: - if item.has_key(attrib): - del item[attrib] - return soup + return self.adeify_images(soup) + diff --git a/resources/recipes/beta.recipe b/resources/recipes/beta.recipe index 8c0b36297c..49da10e11a 100644 --- a/resources/recipes/beta.recipe +++ b/resources/recipes/beta.recipe @@ -1,13 +1,11 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' beta.rs ''' import re from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Danas(BasicNewsRecipe): title = 'BETA' @@ -19,18 +17,14 @@ class Danas(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = False use_embedded_content = True - language = 'sr' - - lang = 'sr-Latn-RS' - direction = 'ltr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + language = 'sr' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + , 'language' : language } @@ -43,9 +37,4 @@ class Danas(BasicNewsRecipe): ] def preprocess_html(self, soup): - soup.html['lang'] = self.lang - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) return self.adeify_images(soup) diff --git a/resources/recipes/blic.recipe b/resources/recipes/blic.recipe index 4b0880237d..f784c031a3 100644 --- a/resources/recipes/blic.recipe +++ b/resources/recipes/blic.recipe @@ -14,14 +14,13 @@ class Blic(BasicNewsRecipe): description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' publisher = 'RINGIER d.o.o.' category = 'news, politics, Serbia' - delay = 1 oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png' language = 'sr' - - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} ' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Georgia, serif1, serif} .article_description{font-family: Arial, sans1, sans-serif} .img_full{float: none} img{margin-bottom: 0.8em} ' conversion_options = { 'comment' : description @@ -31,13 +30,15 @@ class Blic(BasicNewsRecipe): } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - remove_tags_before = dict(name='div', attrs={'id':'article_info'}) + remove_tags = [dict(name=['object','link'])] + remove_attributes = ['width','height'] feeds = [(u'Danasnje Vesti', u'http://www.blic.rs/rss/danasnje-vesti')] - remove_tags = [dict(name=['object','link'])] def print_version(self, url): return url + '/print' + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/cetnixploitation.recipe b/resources/recipes/cetnixploitation.recipe new file mode 100644 index 0000000000..ee95ef599f --- /dev/null +++ b/resources/recipes/cetnixploitation.recipe @@ -0,0 +1,36 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +chetnixploitation.blogspot.com +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Chetnixploitation(BasicNewsRecipe): + title = 'Chetnixploitation' + __author__ = 'Darko Miletic' + description = 'Filmski blog' + oldest_article = 7 + max_articles_per_feed = 100 + language = 'sr' + encoding = 'utf-8' + no_stylesheets = True + use_embedded_content = True + extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } ' + + conversion_options = { + 'comment' : description + , 'tags' : 'film, blog, cetnici, srbija, ex-yu' + , 'publisher': 'Son of Man' + , 'language' : language + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + feeds = [(u'Posts', u'http://chetnixploitation.blogspot.com/feeds/posts/default')] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + diff --git a/resources/recipes/danas.recipe b/resources/recipes/danas.recipe index 081c46a5d2..a8cd8a5a3d 100644 --- a/resources/recipes/danas.recipe +++ b/resources/recipes/danas.recipe @@ -20,7 +20,7 @@ class Danas(BasicNewsRecipe): encoding = 'utf-8' masthead_url = 'http://www.danas.rs/images/basic/danas.gif' language = 'sr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} ' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} ' conversion_options = { 'comment' : description @@ -38,7 +38,7 @@ class Danas(BasicNewsRecipe): ,dict(name=['object','link','iframe']) ] - feeds = [ + feeds = [ (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27') ,(u'Hronika' , u'http://www.danas.rs/rss/rss.asp?column_id=2' ) ,(u'Drustvo' , u'http://www.danas.rs/rss/rss.asp?column_id=24') @@ -60,4 +60,4 @@ class Danas(BasicNewsRecipe): def print_version(self, url): return url + '&action=print' - + diff --git a/resources/recipes/e_novine.recipe b/resources/recipes/e_novine.recipe index 2bea48ec82..7d39e448d2 100644 --- a/resources/recipes/e_novine.recipe +++ b/resources/recipes/e_novine.recipe @@ -1,7 +1,5 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' e-novine.com @@ -9,7 +7,6 @@ e-novine.com import re from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class E_novine(BasicNewsRecipe): title = 'E-Novine' @@ -20,40 +17,38 @@ class E_novine(BasicNewsRecipe): oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True - encoding = 'cp1250' + encoding = 'utf-8' use_embedded_content = False - language = 'sr' - - lang = 'sr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + language = 'sr' + masthead_url = 'http://www.e-novine.com/themes/e_novine/img/logo.gif' + extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} img{float: none; margin-bottom: 0.8em} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + , 'language' : language } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})] + keep_only_tags = [ + dict(name='div', attrs={'class':'article_head'}) + ,dict(name='div', attrs={'id':'article_body'}) + ] - remove_tags = [dict(name=['object','link','embed','iframe'])] + remove_tags = [ + dict(name=['object','link','embed','iframe']) + ,dict(attrs={'id':'box_article_tools'}) + ] + remove_attributes = ['height','width','lang'] - feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )] + feeds = [(u'Sve vesti', u'http://www.e-novine.com/feed/index.1.rss' )] def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - soup.head.insert(0,mlang) for item in soup.findAll(style=True): del item['style'] - ftag = soup.find('div', attrs={'id':'css_47_0_2844H'}) - if ftag: - it = ftag.div - it.extract() - ftag.div.extract() - ftag.insert(0,it) - return soup + return self.adeify_images(soup) + + def print_version(self, url): + return url + '?print' diff --git a/resources/recipes/glas_srpske.recipe b/resources/recipes/glas_srpske.recipe index a63d3c1242..157584720a 100644 --- a/resources/recipes/glas_srpske.recipe +++ b/resources/recipes/glas_srpske.recipe @@ -1,7 +1,6 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' glassrpske.com @@ -9,7 +8,6 @@ glassrpske.com import re from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class GlasSrpske(BasicNewsRecipe): title = 'Glas Srpske' @@ -22,20 +20,16 @@ class GlasSrpske(BasicNewsRecipe): no_stylesheets = True encoding = 'utf-8' use_embedded_content = False - cover_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png' - lang = 'sr-BA' - language = 'sr' - + masthead_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png' + language = 'sr' INDEX = 'http://www.glassrpske.com' - - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} img{margin-bottom: 0.8em} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + , 'language' : language } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -63,11 +57,7 @@ class GlasSrpske(BasicNewsRecipe): ] def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - soup.head.insert(0,mlang) - return soup + return self.adeify_images(soup) def parse_index(self): totalfeeds = [] diff --git a/resources/recipes/glasjavnosti.recipe b/resources/recipes/glasjavnosti.recipe index 9a50aa3bed..15b1042818 100644 --- a/resources/recipes/glasjavnosti.recipe +++ b/resources/recipes/glasjavnosti.recipe @@ -1,7 +1,6 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' www.glas-javnosti.rs ''' @@ -18,18 +17,14 @@ class GlasJavnosti(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = False use_embedded_content = False - language = 'sr' - - lang = 'sr-Latn-RS' - direction = 'ltr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + language = 'sr' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + , 'language' : language } diff --git a/resources/recipes/nspm.recipe b/resources/recipes/nspm.recipe index c7314bcf37..f5f6c10b72 100644 --- a/resources/recipes/nspm.recipe +++ b/resources/recipes/nspm.recipe @@ -1,7 +1,5 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' nspm.rs ''' @@ -21,17 +19,16 @@ class Nspm(BasicNewsRecipe): use_embedded_content = False INDEX = 'http://www.nspm.rs/?alphabet=l' encoding = 'utf-8' - language = 'sr' - - lang = 'sr-Latn-RS' + language = 'sr' + masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + , 'language' : language + , 'linearize_tables' : True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -39,6 +36,8 @@ class Nspm(BasicNewsRecipe): dict(name=['link','object','embed']) ,dict(name='td', attrs={'class':'buttonheading'}) ] + remove_tags_after = dict(attrs={'class':'article_separator'}) + remove_attributes = ['width','height'] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -51,17 +50,6 @@ class Nspm(BasicNewsRecipe): return url.replace('.html','/stampa.html') def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - attribs = [ 'style','font','valign' - ,'colspan','width','height' - ,'rowspan','summary','align' - ,'cellspacing','cellpadding' - ,'frames','rules','border' - ] - for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): - item.name = 'div' - for attrib in attribs: - if item.has_key(attrib): - del item[attrib] + for item in soup.body.findAll(style=True): + del item['style'] return self.adeify_images(soup) diff --git a/resources/recipes/thecultofghoul.recipe b/resources/recipes/thecultofghoul.recipe new file mode 100644 index 0000000000..8a5f507971 --- /dev/null +++ b/resources/recipes/thecultofghoul.recipe @@ -0,0 +1,39 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +cultofghoul.blogspot.com +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class TheCultOfGhoul(BasicNewsRecipe): + title = 'The Cult of Ghoul' + __author__ = 'Darko Miletic' + description = 'Filmski blog' + oldest_article = 7 + max_articles_per_feed = 100 + language = 'sr' + encoding = 'utf-8' + no_stylesheets = True + use_embedded_content = True + extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } ' + + conversion_options = { + 'comment' : description + , 'tags' : 'film, blog, srbija, strava, uzas' + , 'publisher': 'Dejan Ognjanovic' + , 'language' : language + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + feeds = [(u'Posts', u'http://cultofghoul.blogspot.com/feeds/posts/default')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return self.adeify_images(soup) + + diff --git a/resources/recipes/wsj.recipe b/resources/recipes/wsj.recipe index 3ced77023d..25f175f78b 100644 --- a/resources/recipes/wsj.recipe +++ b/resources/recipes/wsj.recipe @@ -50,7 +50,11 @@ class WallStreetJournal(BasicNewsRecipe): br.select_form(nr=0) br['user'] = self.username br['password'] = self.password - br.submit() + res = br.submit() + raw = res.read() + if 'Welcome,' not in raw: + raise ValueError('Failed to log in to wsj.com, check your ' + 'username and password') return br def postprocess_html(self, soup, first): @@ -69,8 +73,10 @@ class WallStreetJournal(BasicNewsRecipe): soup = self.wsj_get_index() year = strftime('%Y') - for x in soup.findAll('td', attrs={'class':'b14'}): + for x in soup.findAll('td', height='25', attrs={'class':'b14'}): txt = self.tag_to_string(x).strip() + txt = txt.replace(u'\xa0', ' ') + txt = txt.encode('ascii', 'ignore') if year in txt: self.timefmt = ' [%s]'%txt break diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 040f28549e..5a3a209f5c 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -70,9 +70,10 @@ class PML2PMLZ(FileTypePlugin): pmlz = zipfile.ZipFile(of.name, 'w') pmlz.write(pmlfile, os.path.basename(pmlfile)) - pml_img = os.path.basename(pmlfile)[0] + '_img' - img_dir = pml_img if os.path.exists(pml_img) else 'images' if \ - os.path.exists('images') else '' + pml_img = os.path.splitext(pmlfile)[0] + '_img' + i_img = os.path.join(os.path.dirname(pmlfile),'images') + img_dir = pml_img if os.path.isdir(pml_img) else i_img if \ + os.path.isdir(i_img) else '' if img_dir: for image in glob.glob(os.path.join(img_dir, '*.png')): pmlz.write(image, os.path.join('images', (os.path.basename(image)))) diff --git a/src/calibre/devices/usbms/cli.py b/src/calibre/devices/usbms/cli.py index 829267a0b8..1554d6fce0 100644 --- a/src/calibre/devices/usbms/cli.py +++ b/src/calibre/devices/usbms/cli.py @@ -49,7 +49,7 @@ class CLI(object): d = os.path.dirname(path) if not os.path.exists(d): os.makedirs(d) - with open(path, 'wb') as dest: + with open(path, 'w+b') as dest: try: shutil.copyfileobj(infile, dest) except IOError: diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index 2d22190ad4..4f808e3fb0 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -70,11 +70,14 @@ def is_recipe(filename): filename.rpartition('.')[0].endswith('_recipe_out') def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False): - pos = stream.tell() + pos = 0 + if hasattr(stream, 'tell'): + pos = stream.tell() try: return _get_metadata(stream, stream_type, use_libprs_metadata) finally: - stream.seek(pos) + if hasattr(stream, 'seek'): + stream.seek(pos) def _get_metadata(stream, stream_type, use_libprs_metadata): diff --git a/src/calibre/ebooks/metadata/rar.py b/src/calibre/ebooks/metadata/rar.py index 896e3d7777..d23577eab1 100644 --- a/src/calibre/ebooks/metadata/rar.py +++ b/src/calibre/ebooks/metadata/rar.py @@ -8,9 +8,10 @@ Read metadata from RAR archives ''' import os -from cStringIO import StringIO -from calibre.ptempfile import PersistentTemporaryFile + +from calibre.ptempfile import PersistentTemporaryFile, TemporaryDirectory from calibre.libunrar import extract_member, names +from calibre import CurrentDir def get_metadata(stream): from calibre.ebooks.metadata.archive import is_comic @@ -32,8 +33,10 @@ def get_metadata(stream): stream_type = stream_type[1:] if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub', 'rb', 'imp', 'pdf', 'lrf'): - data = extract_member(path, match=None, name=f)[1] - stream = StringIO(data) + with TemporaryDirectory() as tdir: + with CurrentDir(tdir): + stream = extract_member(path, match=None, name=f, + as_file=True)[1] return get_metadata(stream, stream_type) raise ValueError('No ebook found in RAR archive') diff --git a/src/calibre/ebooks/metadata/zip.py b/src/calibre/ebooks/metadata/zip.py index db9d751f3a..08ac132d53 100644 --- a/src/calibre/ebooks/metadata/zip.py +++ b/src/calibre/ebooks/metadata/zip.py @@ -3,9 +3,10 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' import os -from zipfile import ZipFile -from cStringIO import StringIO +from calibre.utils.zipfile import ZipFile +from calibre.ptempfile import TemporaryDirectory +from calibre import CurrentDir def get_metadata(stream): from calibre.ebooks.metadata.meta import get_metadata @@ -23,8 +24,10 @@ def get_metadata(stream): stream_type = stream_type[1:] if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub', 'rb', 'imp', 'pdf', 'lrf'): - stream = StringIO(zf.read(f)) - return get_metadata(stream, stream_type) + with TemporaryDirectory() as tdir: + with CurrentDir(tdir): + path = zf.extract(f) + return get_metadata(open(path, 'rb'), stream_type) raise ValueError('No ebook found in ZIP archive') diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 88396b4346..ae175f1493 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -796,10 +796,11 @@ class MobiReader(object): def get_metadata(stream): from calibre.utils.logging import Log log = Log() - mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) try: mh = MetadataHeader(stream, log) + if mh.title and mh.title != _('Unknown'): + mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: @@ -818,10 +819,15 @@ def get_metadata(stream): else: data = mh.section_data(mh.first_image_index) buf = cStringIO.StringIO(data) - im = PILImage.open(buf) - obuf = cStringIO.StringIO() - im.convert('RGBA').save(obuf, format='JPEG') - mi.cover_data = ('jpg', obuf.getvalue()) + try: + im = PILImage.open(buf) + except: + log.exception('Failed to read MOBI cover') + else: + obuf = cStringIO.StringIO() + im.convert('RGB').save(obuf, format='JPEG') + mi.cover_data = ('jpg', obuf.getvalue()) except: - log.exception() + log.filter_level = Log.DEBUG + log.exception('Failed to read MOBI metadata') return mi diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index d57ed136f6..3df24fcc86 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -131,7 +131,7 @@ class PMLMLizer(object): if item.href in self.link_hrefs.keys(): toc.append('* \\q="#%s"%s\\q\n' % (self.link_hrefs[item.href], item.title)) else: - self.oeb.warn('Ignoring toc item: %s not found in document.' % item) + self.oeb_book.warn('Ignoring toc item: %s not found in document.' % item) return ''.join(toc) def get_text(self): diff --git a/src/calibre/libunrar.py b/src/calibre/libunrar.py index 06732b931a..bf38a47d64 100644 --- a/src/calibre/libunrar.py +++ b/src/calibre/libunrar.py @@ -217,33 +217,55 @@ def names(path): finally: _libunrar.RARCloseArchive(arc_data) -def extract_member(path, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I), name=None): +def _extract_member(path, match, name): + + def is_match(fname): + return (name is not None and fname == name) or \ + (match is not None and match.search(fname) is not None) + + open_archive_data = RAROpenArchiveDataEx(ArcName=path, OpenMode=RAR_OM_EXTRACT, CmtBuf=None) + arc_data = _libunrar.RAROpenArchiveEx(byref(open_archive_data)) + try: + if open_archive_data.OpenResult != 0: + raise UnRARException(_interpret_open_error(open_archive_data.OpenResult, path)) + header_data = RARHeaderDataEx(CmtBuf=None) + first = True + while True: + if _libunrar.RARReadHeaderEx(arc_data, byref(header_data)) != 0: + raise UnRARException('%s has no files'%path if first + else 'No match found in %s'%path) + file_name = header_data.FileNameW + if is_match(file_name): + PFCode = _libunrar.RARProcessFileW(arc_data, RAR_EXTRACT, None, None) + if PFCode != 0: + raise UnRARException(_interpret_process_file_error(PFCode)) + abspath = os.path.abspath(*file_name.split('/')) + return abspath + else: + PFCode = _libunrar.RARProcessFileW(arc_data, RAR_SKIP, None, None) + if PFCode != 0: + raise UnRARException(_interpret_process_file_error(PFCode)) + first = False + + finally: + _libunrar.RARCloseArchive(arc_data) + +def extract_member(path, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I), + name=None, as_file=False): if hasattr(path, 'read'): data = path.read() f = NamedTemporaryFile(suffix='.rar') f.write(data) f.flush() path = f.name - with TemporaryDirectory('_libunrar') as dir: - with CurrentDir(dir): - open_archive_data = RAROpenArchiveDataEx(ArcName=path, OpenMode=RAR_OM_EXTRACT, CmtBuf=None) - arc_data = _libunrar.RAROpenArchiveEx(byref(open_archive_data)) - try: - if open_archive_data.OpenResult != 0: - raise UnRARException(_interpret_open_error(open_archive_data.OpenResult, path)) - header_data = RARHeaderDataEx(CmtBuf=None) - while True: - if _libunrar.RARReadHeaderEx(arc_data, byref(header_data)) != 0: - raise UnRARException('%s has no files'%path) - PFCode = _libunrar.RARProcessFileW(arc_data, RAR_EXTRACT, None, None) - if PFCode != 0: - raise UnRARException(_interpret_process_file_error(PFCode)) - file_name = header_data.FileNameW - if (name is not None and file_name == name) or \ - (match is not None and match.search(file_name)): - return header_data.FileNameW.replace('/', os.sep), \ - open(os.path.join(dir, *header_data.FileNameW.split('/')), 'rb').read() - finally: - _libunrar.RARCloseArchive(arc_data) + path = os.path.abspath(path) + if as_file: + path = _extract_member(path, match, name) + return path, open(path, 'rb') + else: + with TemporaryDirectory('_libunrar') as tdir: + with CurrentDir(tdir): + path = _extract_member(path, match, name) + return path, open(path, 'rb').read() diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 6e9c72de26..8b1757371d 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -19,15 +19,13 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, from calibre.ebooks.metadata.opf2 import OPFCreator from calibre import entity_to_unicode from calibre.web import Recipe -from calibre.ebooks import render_html from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata import MetaInformation from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed from calibre.web.fetch.simple import option_parser as web2disk_option_parser from calibre.web.fetch.simple import RecursiveFetcher from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending -from calibre.ptempfile import PersistentTemporaryFile, \ - PersistentTemporaryDirectory +from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.date import now as nowf class BasicNewsRecipe(Recipe): @@ -928,63 +926,52 @@ class BasicNewsRecipe(Recipe): ''' Create a generic cover for recipes that dont have a cover ''' - from calibre.gui2 import is_ok_to_use_qt - if not is_ok_to_use_qt(): - return False - img_data = open(I('library.png'), 'rb').read() - tdir = PersistentTemporaryDirectory('_default_cover') - img = os.path.join(tdir, 'logo.png') - with open(img, 'wb') as g: - g.write(img_data) - img = os.path.basename(img) - html= u'''\ - - - - - - -

%(title)s

-

-
-
- calibre -
-
-

%(date)s

-




-

%(author)s

-








-

Produced by %(app)s

-
-
- - - '''%dict(title=self.title if isinstance(self.title, unicode) else self.title.decode(preferred_encoding, 'replace'), - author=self.__author__ if isinstance(self.__author__, unicode) else self.__author__.decode(preferred_encoding, 'replace'), - date=strftime(self.timefmt), - app=__appname__ +' '+__version__, - img=img) - hf = os.path.join(tdir, 'cover.htm') - with open(hf, 'wb') as f: - f.write(html.encode('utf-8')) - renderer = render_html(hf) - if renderer.tb is not None: - self.log.warning('Failed to render default cover') - self.log.debug(renderer.tb) - else: - cover_file.write(renderer.data) + try: + try: + from PIL import Image, ImageDraw, ImageFont + Image, ImageDraw, ImageFont + except ImportError: + import Image, ImageDraw, ImageFont + font_path = P('fonts/liberation/LiberationSerif-Bold.ttf') + title = self.title if isinstance(self.title, unicode) else \ + self.title.decode(preferred_encoding, 'replace') + date = strftime(self.timefmt) + app = '['+__appname__ +' '+__version__+']' + + COVER_WIDTH, COVER_HEIGHT = 590, 750 + img = Image.new('RGB', (COVER_WIDTH, COVER_HEIGHT), 'white') + draw = ImageDraw.Draw(img) + # Title + font = ImageFont.truetype(font_path, 44) + width, height = draw.textsize(title, font=font) + left = max(int((COVER_WIDTH - width)/2.), 0) + top = 15 + draw.text((left, top), title, fill=(0,0,0), font=font) + bottom = top + height + # Date + font = ImageFont.truetype(font_path, 32) + width, height = draw.textsize(date, font=font) + left = max(int((COVER_WIDTH - width)/2.), 0) + draw.text((left, bottom+15), date, fill=(0,0,0), font=font) + # Vanity + font = ImageFont.truetype(font_path, 28) + width, height = draw.textsize(app, font=font) + left = max(int((COVER_WIDTH - width)/2.), 0) + top = COVER_HEIGHT - height - 15 + draw.text((left, top), app, fill=(0,0,0), font=font) + # Logo + logo = Image.open(I('library.png'), 'r') + width, height = logo.size + left = max(int((COVER_WIDTH - width)/2.), 0) + top = max(int((COVER_HEIGHT - height)/2.), 0) + img.paste(logo, (left, top)) + img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE) + + img.convert('RGB').save(cover_file, 'JPEG') cover_file.flush() + except: + self.log.exception('Failed to generate default cover') + return False return True def get_masthead_title(self):