KG updates

2025-12-09 14:45:01 -05:00 · 2010-02-20 17:08:21 -07:00 · 2010-02-20 17:08:21 -07:00 · c88bcb7396
commit c88bcb7396
parent 3282f39baf 374badc1ba
22 changed files with 275 additions and 251 deletions
--- a/resources/images/library.png
+++ b/resources/images/library.png
--- a/resources/recipes/24sata_rs.recipe
+++ b/resources/recipes/24sata_rs.recipe
@ -1,7 +1,6 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'

 '''
 24sata.rs
@ -9,7 +8,6 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class Ser24Sata(BasicNewsRecipe):
    title                 = '24 Sata - Sr'
@ -17,22 +15,20 @@ class Ser24Sata(BasicNewsRecipe):
    description           = '24 sata portal vesti iz Srbije'
    publisher             = 'Ringier d.o.o.'
    category              = 'news, politics, entertainment, Serbia'
-    oldest_article        = 7
+    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    language = 'sr'
-
-    lang                  = 'sr-Latn-RS'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
+    language              = 'sr'
+    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
+                        , 'linearize_tables' : True
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -40,25 +36,6 @@ class Ser24Sata(BasicNewsRecipe):
    feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]

    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-
-        attribs = [  'style','font','valign'
-                    ,'colspan','width','height'
-                    ,'rowspan','summary','align'
-                    ,'cellspacing','cellpadding'
-                    ,'frames','rules','border'
-                  ]
-        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
-            item.name = 'div'
-            for attrib in attribs:
-                if item.has_key(attrib):
-                   del item[attrib]
-
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
-        soup.head.insert(0,mlang)
-        soup.head.insert(1,mcharset)
        return self.adeify_images(soup)

    def print_version(self, url):
--- a/resources/recipes/b92.recipe
+++ b/resources/recipes/b92.recipe
@ -1,7 +1,6 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 b92.net
 '''
@ -19,16 +18,15 @@ class B92(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1250'
-    language = 'sr'
-
-    lang                  = 'sr-Latn-RS'
-    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
+    language              = 'sr'
+    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} '
    
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
+                        , 'language'         : language
+                        , 'linearize_tables' : True
                        }
    
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -50,20 +48,5 @@ class B92(BasicNewsRecipe):
        return url + '&version=print'

    def preprocess_html(self, soup):
-        del soup.body['onload']
-        for item in soup.findAll('font'):
-            item.name='div'
-            if item.has_key('size'):
-               del item['size']
-        attribs = [  'style','font','valign'
-                    ,'colspan','width','height'
-                    ,'rowspan','summary','align'
-                    ,'cellspacing','cellpadding'
-                    ,'frames','rules','border'
-                  ]
-        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
-            item.name = 'div'
-            for attrib in attribs:
-                if item.has_key(attrib):
-                   del item[attrib]                           
-        return soup
+        return self.adeify_images(soup)
+
--- a/resources/recipes/beta.recipe
+++ b/resources/recipes/beta.recipe
@ -1,13 +1,11 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 beta.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class Danas(BasicNewsRecipe):
    title                 = 'BETA'
@ -19,18 +17,14 @@ class Danas(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = True
-    language = 'sr'
-
-    lang                  = 'sr-Latn-RS'
-    direction             = 'ltr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    language              = 'sr'
+    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
                        }


@ -43,9 +37,4 @@ class Danas(BasicNewsRecipe):
                     ]

    def preprocess_html(self, soup):
-        soup.html['lang'] = self.lang
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
-        soup.head.insert(0,mlang)
-        soup.head.insert(1,mcharset)
        return self.adeify_images(soup)
--- a/resources/recipes/blic.recipe
+++ b/resources/recipes/blic.recipe
@ -14,14 +14,13 @@ class Blic(BasicNewsRecipe):
    description           = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
    publisher             = 'RINGIER d.o.o.'
    category              = 'news, politics, Serbia'
-    delay                 = 1
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
+    masthead_url          = 'http://www.blic.rs/resources/images/header/header_back.png'
    language              = 'sr'
-
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Georgia, serif1, serif} .article_description{font-family: Arial, sans1, sans-serif} .img_full{float: none} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'  : description
@ -31,13 +30,15 @@ class Blic(BasicNewsRecipe):
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
-
    remove_tags_before = dict(name='div', attrs={'id':'article_info'})
+    remove_tags        = [dict(name=['object','link'])]
+    remove_attributes  = ['width','height']

    feeds              = [(u'Danasnje Vesti', u'http://www.blic.rs/rss/danasnje-vesti')]

-    remove_tags        = [dict(name=['object','link'])]

    def print_version(self, url):
        return url + '/print'

+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/resources/recipes/cetnixploitation.recipe
+++ b/resources/recipes/cetnixploitation.recipe
@ -0,0 +1,36 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+chetnixploitation.blogspot.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Chetnixploitation(BasicNewsRecipe):
+    title                 = 'Chetnixploitation'
+    __author__            = 'Darko Miletic'
+    description           = 'Filmski blog'    
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    language              = 'sr'
+    encoding              = 'utf-8'
+    no_stylesheets        = True
+    use_embedded_content  = True
+    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : 'film, blog, cetnici, srbija, ex-yu'
+                        , 'publisher': 'Son of Man'
+                        , 'language' : language
+                        }
+
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+    feeds = [(u'Posts', u'http://chetnixploitation.blogspot.com/feeds/posts/default')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
--- a/resources/recipes/danas.recipe
+++ b/resources/recipes/danas.recipe
@ -20,7 +20,7 @@ class Danas(BasicNewsRecipe):
    encoding              = 'utf-8'
    masthead_url          = 'http://www.danas.rs/images/basic/danas.gif'
    language              = 'sr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
@ -38,7 +38,7 @@ class Danas(BasicNewsRecipe):
                    ,dict(name=['object','link','iframe'])
                  ]

-    feeds          = [
+    feeds          = [ 
                        (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
                       ,(u'Hronika'  , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
                       ,(u'Drustvo'  , u'http://www.danas.rs/rss/rss.asp?column_id=24')
@ -60,4 +60,4 @@ class Danas(BasicNewsRecipe):

    def print_version(self, url):
        return url + '&action=print'
-
+        
--- a/resources/recipes/e_novine.recipe
+++ b/resources/recipes/e_novine.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'

 '''
 e-novine.com
@ -9,7 +7,6 @@ e-novine.com

 import re
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class E_novine(BasicNewsRecipe):
    title                 = 'E-Novine'
@ -20,40 +17,38 @@ class E_novine(BasicNewsRecipe):
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
-    encoding              = 'cp1250'
+    encoding              = 'utf-8'
    use_embedded_content  = False
-    language = 'sr'
-
-    lang                  = 'sr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    language              = 'sr'
+    masthead_url          = 'http://www.e-novine.com/themes/e_novine/img/logo.gif'
+    extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} img{float: none; margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

-    keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
+    keep_only_tags = [
+                         dict(name='div', attrs={'class':'article_head'})
+                        ,dict(name='div', attrs={'id':'article_body'})
+                     ]

-    remove_tags = [dict(name=['object','link','embed','iframe'])]
+    remove_tags = [
+                     dict(name=['object','link','embed','iframe'])
+                    ,dict(attrs={'id':'box_article_tools'})
+                  ]
+    remove_attributes = ['height','width','lang']

-    feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
+    feeds = [(u'Sve vesti', u'http://www.e-novine.com/feed/index.1.rss' )]

    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        soup.head.insert(0,mlang)
        for item in soup.findAll(style=True):
            del item['style']
-        ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
-        if ftag:
-           it = ftag.div
-           it.extract()
-           ftag.div.extract()
-           ftag.insert(0,it)
-        return soup
+        return self.adeify_images(soup)
+
+    def print_version(self, url):
+        return url + '?print'
--- a/resources/recipes/glas_srpske.recipe
+++ b/resources/recipes/glas_srpske.recipe
@ -1,7 +1,6 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'

 '''
 glassrpske.com
@ -9,7 +8,6 @@ glassrpske.com

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class GlasSrpske(BasicNewsRecipe):
    title                 = 'Glas Srpske'
@ -22,20 +20,16 @@ class GlasSrpske(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    cover_url             = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
-    lang                  = 'sr-BA'
-    language = 'sr'
-
+    masthead_url          = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
+    language              = 'sr'
    INDEX                 = 'http://www.glassrpske.com'
-
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
+    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -63,11 +57,7 @@ class GlasSrpske(BasicNewsRecipe):
            ]

    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        soup.head.insert(0,mlang)
-        return soup
+        return self.adeify_images(soup)

    def parse_index(self):
        totalfeeds = []
--- a/resources/recipes/glasjavnosti.recipe
+++ b/resources/recipes/glasjavnosti.recipe
@ -1,7 +1,6 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.glas-javnosti.rs
 '''
@ -18,18 +17,14 @@ class GlasJavnosti(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = False
-    language = 'sr'
-
-    lang                  = 'sr-Latn-RS'
-    direction             = 'ltr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    language              = 'sr'
+    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
                        }


--- a/resources/recipes/nspm.recipe
+++ b/resources/recipes/nspm.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 nspm.rs
 '''
@ -21,17 +19,16 @@ class Nspm(BasicNewsRecipe):
    use_embedded_content  = False
    INDEX                 = 'http://www.nspm.rs/?alphabet=l'
    encoding              = 'utf-8'
-    language = 'sr'
-
-    lang                  = 'sr-Latn-RS'
+    language              = 'sr'
+    masthead_url          = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
+                        , 'linearize_tables' : True
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -39,6 +36,8 @@ class Nspm(BasicNewsRecipe):
                            dict(name=['link','object','embed'])
                           ,dict(name='td', attrs={'class':'buttonheading'})
                         ]
+    remove_tags_after = dict(attrs={'class':'article_separator'})
+    remove_attributes = ['width','height']

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -51,17 +50,6 @@ class Nspm(BasicNewsRecipe):
        return url.replace('.html','/stampa.html')

    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-        attribs = [  'style','font','valign'
-                    ,'colspan','width','height'
-                    ,'rowspan','summary','align'
-                    ,'cellspacing','cellpadding'
-                    ,'frames','rules','border'
-                  ]
-        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
-            item.name = 'div'
-            for attrib in attribs:
-                if item.has_key(attrib):
-                   del item[attrib]
+        for item in soup.body.findAll(style=True):
+            del item['style']
        return self.adeify_images(soup)
--- a/resources/recipes/thecultofghoul.recipe
+++ b/resources/recipes/thecultofghoul.recipe
@ -0,0 +1,39 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+cultofghoul.blogspot.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheCultOfGhoul(BasicNewsRecipe):
+    title                 = 'The Cult of Ghoul'
+    __author__            = 'Darko Miletic'
+    description           = 'Filmski blog'    
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    language              = 'sr'
+    encoding              = 'utf-8'
+    no_stylesheets        = True
+    use_embedded_content  = True
+    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : 'film, blog, srbija, strava, uzas'
+                        , 'publisher': 'Dejan Ognjanovic'
+                        , 'language' : language
+                        }
+
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    feeds = [(u'Posts', u'http://cultofghoul.blogspot.com/feeds/posts/default')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return self.adeify_images(soup)
+
+
--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@ -50,7 +50,11 @@ class WallStreetJournal(BasicNewsRecipe):
                br.select_form(nr=0)
                br['user']   = self.username
                br['password'] = self.password
-                br.submit()
+                res = br.submit()
+                raw = res.read()
+                if 'Welcome,' not in raw:
+                    raise ValueError('Failed to log in to wsj.com, check your '
+                            'username and password')
            return br

        def postprocess_html(self, soup, first):
@ -69,8 +73,10 @@ class WallStreetJournal(BasicNewsRecipe):
            soup = self.wsj_get_index()

            year = strftime('%Y')
-            for x in soup.findAll('td', attrs={'class':'b14'}):
+            for x in soup.findAll('td', height='25', attrs={'class':'b14'}):
                txt = self.tag_to_string(x).strip()
+                txt = txt.replace(u'\xa0', ' ')
+                txt = txt.encode('ascii', 'ignore')
                if year in txt:
                    self.timefmt = ' [%s]'%txt
                    break
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -70,9 +70,10 @@ class PML2PMLZ(FileTypePlugin):
        pmlz = zipfile.ZipFile(of.name, 'w')
        pmlz.write(pmlfile, os.path.basename(pmlfile))

-        pml_img = os.path.basename(pmlfile)[0] + '_img'
-        img_dir = pml_img if os.path.exists(pml_img) else 'images' if \
-            os.path.exists('images') else ''
+        pml_img = os.path.splitext(pmlfile)[0] + '_img'
+        i_img = os.path.join(os.path.dirname(pmlfile),'images')
+        img_dir = pml_img if os.path.isdir(pml_img) else i_img if \
+            os.path.isdir(i_img) else ''
        if img_dir:
            for image in glob.glob(os.path.join(img_dir, '*.png')):
                pmlz.write(image, os.path.join('images', (os.path.basename(image))))
--- a/src/calibre/devices/usbms/cli.py
+++ b/src/calibre/devices/usbms/cli.py
@ -49,7 +49,7 @@ class CLI(object):
        d = os.path.dirname(path)
        if not os.path.exists(d):
            os.makedirs(d)
-        with open(path, 'wb') as dest:
+        with open(path, 'w+b') as dest:
            try:
                shutil.copyfileobj(infile, dest)
            except IOError:
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -70,11 +70,14 @@ def is_recipe(filename):
        filename.rpartition('.')[0].endswith('_recipe_out')

 def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
-    pos = stream.tell()
+    pos = 0
+    if hasattr(stream, 'tell'):
+        pos = stream.tell()
    try:
        return _get_metadata(stream, stream_type, use_libprs_metadata)
    finally:
-        stream.seek(pos)
+        if hasattr(stream, 'seek'):
+            stream.seek(pos)


 def _get_metadata(stream, stream_type, use_libprs_metadata):
--- a/src/calibre/ebooks/metadata/rar.py
+++ b/src/calibre/ebooks/metadata/rar.py
@ -8,9 +8,10 @@ Read metadata from RAR archives
 '''

 import os
-from cStringIO import StringIO
-from calibre.ptempfile import PersistentTemporaryFile
+
+from calibre.ptempfile import PersistentTemporaryFile, TemporaryDirectory
 from calibre.libunrar import extract_member, names
+from calibre import CurrentDir

 def get_metadata(stream):
    from calibre.ebooks.metadata.archive import is_comic
@ -32,8 +33,10 @@ def get_metadata(stream):
            stream_type = stream_type[1:]
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
                               'rb', 'imp', 'pdf', 'lrf'):
-                data = extract_member(path, match=None, name=f)[1]
-                stream = StringIO(data)
+                with TemporaryDirectory() as tdir:
+                    with CurrentDir(tdir):
+                       stream = extract_member(path, match=None, name=f,
+                               as_file=True)[1]
                return get_metadata(stream, stream_type)
    raise ValueError('No ebook found in RAR archive')

--- a/src/calibre/ebooks/metadata/zip.py
+++ b/src/calibre/ebooks/metadata/zip.py
@ -3,9 +3,10 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

 import os
-from zipfile import ZipFile
-from cStringIO import StringIO

+from calibre.utils.zipfile import ZipFile
+from calibre.ptempfile import TemporaryDirectory
+from calibre import CurrentDir

 def get_metadata(stream):
    from calibre.ebooks.metadata.meta import get_metadata
@ -23,8 +24,10 @@ def get_metadata(stream):
            stream_type = stream_type[1:]
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
                               'rb', 'imp', 'pdf', 'lrf'):
-                stream = StringIO(zf.read(f))
-                return get_metadata(stream, stream_type)
+                with TemporaryDirectory() as tdir:
+                    with CurrentDir(tdir):
+                        path = zf.extract(f)
+                        return get_metadata(open(path, 'rb'), stream_type)
    raise ValueError('No ebook found in ZIP archive')


--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -796,10 +796,11 @@ class MobiReader(object):
 def get_metadata(stream):
    from calibre.utils.logging import Log
    log = Log()
-
    mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    try:
        mh = MetadataHeader(stream, log)
+        if mh.title and mh.title != _('Unknown'):
+            mi.title = mh.title

        if mh.exth is not None:
            if mh.exth.mi is not None:
@ -818,10 +819,15 @@ def get_metadata(stream):
        else:
            data  = mh.section_data(mh.first_image_index)
        buf = cStringIO.StringIO(data)
-        im = PILImage.open(buf)
-        obuf = cStringIO.StringIO()
-        im.convert('RGBA').save(obuf, format='JPEG')
-        mi.cover_data = ('jpg', obuf.getvalue())
+        try:
+            im = PILImage.open(buf)
+        except:
+            log.exception('Failed to read MOBI cover')
+        else:
+            obuf = cStringIO.StringIO()
+            im.convert('RGB').save(obuf, format='JPEG')
+            mi.cover_data = ('jpg', obuf.getvalue())
    except:
-        log.exception()
+        log.filter_level = Log.DEBUG
+        log.exception('Failed to read MOBI metadata')
    return mi
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -131,7 +131,7 @@ class PMLMLizer(object):
                if item.href in self.link_hrefs.keys():
                    toc.append('* \\q="#%s"%s\\q\n' % (self.link_hrefs[item.href], item.title))
                else:
-                    self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
+                    self.oeb_book.warn('Ignoring toc item: %s not found in document.' % item)
        return ''.join(toc)

    def get_text(self):
--- a/src/calibre/libunrar.py
+++ b/src/calibre/libunrar.py
@ -217,33 +217,55 @@ def names(path):
    finally:
        _libunrar.RARCloseArchive(arc_data)

-def extract_member(path, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I), name=None):
+def _extract_member(path, match, name):
+
+    def is_match(fname):
+        return (name is not None and fname == name) or \
+               (match is not None and match.search(fname) is not None)
+
+    open_archive_data = RAROpenArchiveDataEx(ArcName=path, OpenMode=RAR_OM_EXTRACT, CmtBuf=None)
+    arc_data = _libunrar.RAROpenArchiveEx(byref(open_archive_data))
+    try:
+        if open_archive_data.OpenResult != 0:
+            raise UnRARException(_interpret_open_error(open_archive_data.OpenResult, path))
+        header_data = RARHeaderDataEx(CmtBuf=None)
+        first = True
+        while True:
+            if _libunrar.RARReadHeaderEx(arc_data, byref(header_data)) != 0:
+                raise UnRARException('%s has no files'%path if first
+                        else 'No match found in %s'%path)
+            file_name = header_data.FileNameW
+            if is_match(file_name):
+                PFCode = _libunrar.RARProcessFileW(arc_data, RAR_EXTRACT, None, None)
+                if PFCode != 0:
+                    raise UnRARException(_interpret_process_file_error(PFCode))
+                abspath = os.path.abspath(*file_name.split('/'))
+                return abspath
+            else:
+                PFCode = _libunrar.RARProcessFileW(arc_data, RAR_SKIP, None, None)
+                if PFCode != 0:
+                    raise UnRARException(_interpret_process_file_error(PFCode))
+            first = False
+
+    finally:
+        _libunrar.RARCloseArchive(arc_data)
+
+def extract_member(path, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I),
+        name=None, as_file=False):
    if hasattr(path, 'read'):
        data = path.read()
        f = NamedTemporaryFile(suffix='.rar')
        f.write(data)
        f.flush()
        path = f.name
-    with TemporaryDirectory('_libunrar') as dir:
-        with CurrentDir(dir):
-            open_archive_data = RAROpenArchiveDataEx(ArcName=path, OpenMode=RAR_OM_EXTRACT, CmtBuf=None)
-            arc_data = _libunrar.RAROpenArchiveEx(byref(open_archive_data))
-            try:
-                if open_archive_data.OpenResult != 0:
-                    raise UnRARException(_interpret_open_error(open_archive_data.OpenResult, path))
-                header_data = RARHeaderDataEx(CmtBuf=None)
-                while True:
-                    if _libunrar.RARReadHeaderEx(arc_data, byref(header_data)) != 0:
-                        raise UnRARException('%s has no files'%path)
-                    PFCode = _libunrar.RARProcessFileW(arc_data, RAR_EXTRACT, None, None)
-                    if PFCode != 0:
-                        raise UnRARException(_interpret_process_file_error(PFCode))
-                    file_name = header_data.FileNameW
-                    if (name is not None and file_name == name) or \
-                       (match is not None and match.search(file_name)):
-                        return header_data.FileNameW.replace('/', os.sep), \
-                                open(os.path.join(dir, *header_data.FileNameW.split('/')), 'rb').read()
-            finally:
-                _libunrar.RARCloseArchive(arc_data)

+    path = os.path.abspath(path)
+    if as_file:
+        path = _extract_member(path, match, name)
+        return path, open(path, 'rb')
+    else:
+        with TemporaryDirectory('_libunrar') as tdir:
+            with CurrentDir(tdir):
+                path = _extract_member(path, match, name)
+                return path, open(path, 'rb').read()

--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -19,15 +19,13 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData,
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre import entity_to_unicode
 from calibre.web import Recipe
-from calibre.ebooks import render_html
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
 from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
 from calibre.web.fetch.simple import option_parser as web2disk_option_parser
 from calibre.web.fetch.simple import RecursiveFetcher
 from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
-from calibre.ptempfile import PersistentTemporaryFile, \
-                              PersistentTemporaryDirectory
+from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.date import now as nowf

 class BasicNewsRecipe(Recipe):
@ -928,63 +926,52 @@ class BasicNewsRecipe(Recipe):
        '''
        Create a generic cover for recipes that dont have a cover
        '''
-        from calibre.gui2 import is_ok_to_use_qt
-        if not is_ok_to_use_qt():
-            return False
-        img_data = open(I('library.png'), 'rb').read()
-        tdir = PersistentTemporaryDirectory('_default_cover')
-        img = os.path.join(tdir, 'logo.png')
-        with open(img, 'wb') as g:
-            g.write(img_data)
-        img = os.path.basename(img)
-        html= u'''\
-        <html>
-            <head>
-                <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
-                <style type="text/css">
-                    body {
-                        background: white no-repeat fixed center center;
-                        text-align: center;
-                        vertical-align: center;
-                        overflow: hidden;
-                        font-size: 18px;
-                    }
-                    h1 { font-family: serif; }
-                    h2, h4 { font-family: monospace; }
-                </style>
-            </head>
-            <body>
-                <h1>%(title)s</h1>
-                <br/><br/>
-                <div style="position:relative">
-                    <div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
-                        <img src="%(img)s" alt="calibre" style="opacity:0.3"/>
-                    </div>
-                    <div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
-                        <h2>%(date)s</h2>
-                        <br/><br/><br/><br/><br/>
-                        <h3>%(author)s</h3>
-                        <br/><br/></br/><br/><br/><br/><br/><br/><br/>
-                        <h4>Produced by %(app)s</h4>
-                    </div>
-                </div>
-            </body>
-        </html>
-        '''%dict(title=self.title if isinstance(self.title, unicode) else self.title.decode(preferred_encoding, 'replace'),
-                 author=self.__author__ if isinstance(self.__author__, unicode) else self.__author__.decode(preferred_encoding, 'replace'),
-                 date=strftime(self.timefmt),
-                 app=__appname__ +' '+__version__,
-                 img=img)
-        hf = os.path.join(tdir, 'cover.htm')
-        with open(hf, 'wb') as f:
-            f.write(html.encode('utf-8'))
-        renderer = render_html(hf)
-        if renderer.tb is not None:
-            self.log.warning('Failed to render default cover')
-            self.log.debug(renderer.tb)
-        else:
-            cover_file.write(renderer.data)
+        try:
+            try:
+                from PIL import Image, ImageDraw, ImageFont
+                Image, ImageDraw, ImageFont
+            except ImportError:
+                import Image, ImageDraw, ImageFont
+            font_path = P('fonts/liberation/LiberationSerif-Bold.ttf')
+            title = self.title if isinstance(self.title, unicode) else \
+                    self.title.decode(preferred_encoding, 'replace')
+            date = strftime(self.timefmt)
+            app = '['+__appname__ +' '+__version__+']'
+
+            COVER_WIDTH, COVER_HEIGHT = 590, 750
+            img = Image.new('RGB', (COVER_WIDTH, COVER_HEIGHT), 'white')
+            draw = ImageDraw.Draw(img)
+            # Title
+            font = ImageFont.truetype(font_path, 44)
+            width, height = draw.textsize(title, font=font)
+            left = max(int((COVER_WIDTH - width)/2.), 0)
+            top = 15
+            draw.text((left, top), title, fill=(0,0,0), font=font)
+            bottom = top + height
+            # Date
+            font = ImageFont.truetype(font_path, 32)
+            width, height = draw.textsize(date, font=font)
+            left = max(int((COVER_WIDTH - width)/2.), 0)
+            draw.text((left, bottom+15), date, fill=(0,0,0), font=font)
+            # Vanity
+            font = ImageFont.truetype(font_path, 28)
+            width, height = draw.textsize(app, font=font)
+            left = max(int((COVER_WIDTH - width)/2.), 0)
+            top = COVER_HEIGHT - height - 15
+            draw.text((left, top), app, fill=(0,0,0), font=font)
+            # Logo
+            logo = Image.open(I('library.png'), 'r')
+            width, height = logo.size
+            left = max(int((COVER_WIDTH - width)/2.), 0)
+            top = max(int((COVER_HEIGHT - height)/2.), 0)
+            img.paste(logo, (left, top))
+            img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE)
+
+            img.convert('RGB').save(cover_file, 'JPEG')
            cover_file.flush()
+        except:
+            self.log.exception('Failed to generate default cover')
+            return False
        return True

    def get_masthead_title(self):