Update all Serbian and Croatin recipes to work with calibre 0.6

2025-07-09 03:04:10 -04:00 · 2009-08-16 15:56:04 -06:00 · 2009-08-16 15:56:04 -06:00 · fd2888af18
commit fd2888af18
parent 1ae3724038
27 changed files with 501 additions and 404 deletions
--- a/src/calibre/web/feeds/recipes/recipe_24sata.py
+++ b/src/calibre/web/feeds/recipes/recipe_24sata.py
@ -9,6 +9,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class Cro24Sata(BasicNewsRecipe):
    title                 = '24 Sata - Hr'
@ -22,18 +23,18 @@ class Cro24Sata(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    remove_javascript     = True    
    language              = _('Croatian')
+    lang                  = 'hr-HR'

    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
     
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

@ -45,9 +46,11 @@ class Cro24Sata(BasicNewsRecipe):
    feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]

    def preprocess_html(self, soup):
-        soup.html['lang']     = 'hr-HR'
-        mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
-        soup.head.insert(0,mtag)
+        soup.html['lang']     = self.lang
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/src/calibre/web/feeds/recipes/recipe_24sata_rs.py
+++ b/src/calibre/web/feeds/recipes/recipe_24sata_rs.py
@ -17,53 +17,51 @@ class Ser24Sata(BasicNewsRecipe):
    description           = '24 sata portal vesti iz Srbije'
    publisher             = 'Ringier d.o.o.'
    category              = 'news, politics, entertainment, Serbia'
-    oldest_article        = 1
+    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    remove_javascript     = True
    language              = _('Serbian')
-
+    lang                  = 'sr-Latn-RS'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'

-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

    feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]

-    def cleanup_image_tags(self,soup):
-        for item in soup.findAll('img'):
-            for attrib in ['height','width','border','align']:
-                if item.has_key(attrib):
-                   del item[attrib]
-            oldParent = item.parent
-            myIndex = oldParent.contents.index(item)
-            item.extract()
-            divtag = Tag(soup,'div')
-            brtag  = Tag(soup,'br')
-            oldParent.insert(myIndex,divtag)
-            divtag.append(item)
-            divtag.append(brtag)
-        return soup
-
    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = 'sr-Latn-RS'
-        soup.html['lang']     = 'sr-Latn-RS'
-        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
-        soup.head.insert(0,mtag)
-        return self.cleanup_image_tags(soup)
+        soup.html['xml:lang'] = self.lang
+        soup.html['lang']     = self.lang
+        
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]                        
+        
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
+        return self.adeify_images(soup)

    def print_version(self, url):
-        article, sep, rest = url.partition('#')
-        article_base, sep2, article_id = article.partition('id=')
-        return 'http://www.24sata.co.rs/_print.php?id=' + article_id
+        article    = url.partition('#')[0]
+        article_id = article.partition('id=')[2]
+        return 'http://www.24sata.rs/_print.php?id=' + article_id

--- a/src/calibre/web/feeds/recipes/recipe_b92.py
+++ b/src/calibre/web/feeds/recipes/recipe_b92.py
@ -14,23 +14,21 @@ class B92(BasicNewsRecipe):
    description           = 'Dnevne vesti iz Srbije i sveta'    
    publisher             = 'B92'
    category              = 'news, politics, Serbia'
-    oldest_article        = 1
+    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    remove_javascript     = True
    encoding              = 'cp1250'
    language              = _('Serbian')
+    lang                  = 'sr-Latn-RS'
    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        }
    
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    
@ -39,6 +37,7 @@ class B92(BasicNewsRecipe):
    remove_tags = [
                     dict(name='ul', attrs={'class':'comment-nav'})
                    ,dict(name=['embed','link','base']            )
+                    ,dict(name='div', attrs={'class':'udokum'}    )
                  ]

    feeds          = [
@ -51,14 +50,19 @@ class B92(BasicNewsRecipe):

    def preprocess_html(self, soup):
        del soup.body['onload']
-        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
-        for item in soup.findAll(align=True):
-            del item['align']
        for item in soup.findAll('font'):
-            item.name='p'
+            item.name='div'
            if item.has_key('size'):
               del item['size']
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]                           
        return soup
--- a/src/calibre/web/feeds/recipes/recipe_blic.py
+++ b/src/calibre/web/feeds/recipes/recipe_blic.py
@ -26,15 +26,13 @@ class Blic(BasicNewsRecipe):
    lang                  = 'sr-Latn-RS'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
    
-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "' 
-    
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        }
+        
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

    keep_only_tags     = [dict(name='div', attrs={'class':'single_news'})]
@ -44,14 +42,21 @@ class Blic(BasicNewsRecipe):
    remove_tags        = [dict(name=['object','link'])]
    
    def print_version(self, url):
-        start_url, question, rest_url = url.partition('?')
+        rest_url = url.partition('?')[2]
        return u'http://www.blic.rs/_print.php?' + rest_url

    def preprocess_html(self, soup):
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        soup.head.insert(0,mlang)
-        for item in soup.findAll(style=True):
-            del item['style']
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]                        
        return self.adeify_images(soup)

    def get_article_url(self, article):
--- a/src/calibre/web/feeds/recipes/recipe_borba.py
+++ b/src/calibre/web/feeds/recipes/recipe_borba.py
@ -17,24 +17,23 @@ class Borba(BasicNewsRecipe):
    publisher             = 'IP Novine Borba'
    category              = 'news, politics, Serbia'    
    language              = _('Serbian')
-    oldest_article        = 1
+    lang                  = _('sr-Latn-RS')
+    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
-    encoding              = 'utf8'
-    remove_javascript     = True
+    encoding              = 'utf-8'
    use_embedded_content  = False
    cover_url             = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
    INDEX                 = u'http://www.borba.rs/'
-    extra_css = '@font-face {font-family: "serif0";src:url(res:///Data/FONT/serif0.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif0, serif1, serif} .article_description{font-family: serif0, serif1, serif}'
+    extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
     
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

@ -60,14 +59,17 @@ class Borba(BasicNewsRecipe):
            ]

    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = 'sr-Latn-ME'
-        soup.html['lang']     = 'sr-Latn-ME'
-        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
-        for item in soup.findAll(font=True):
-            del item['font']
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]            
        return soup

    def parse_index(self):
--- a/src/calibre/web/feeds/recipes/recipe_danas.py
+++ b/src/calibre/web/feeds/recipes/recipe_danas.py
@ -7,9 +7,10 @@ danas.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class Danas(BasicNewsRecipe):
-    title                 = u'Danas'
+    title                 = 'Danas'
    __author__            = 'Darko Miletic'
    description           = 'Vesti'
    publisher             = 'Danas d.o.o.'
@ -17,19 +18,19 @@ class Danas(BasicNewsRecipe):
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = False
-    remove_javascript     = True
    use_embedded_content  = False
    language              = _('Serbian')
+    lang                  = 'sr-Latn-RS'
+    direction             = 'ltr'    
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
    
-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
 

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -44,8 +45,17 @@ class Danas(BasicNewsRecipe):
    feeds          = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
-        soup.head.insert(0,mtag)    
-        for item in soup.findAll(style=True):
-            del item['style']
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        soup.head.insert(0,mlang)
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]                        
        return soup
--- a/src/calibre/web/feeds/recipes/recipe_dnevni_avaz.py
+++ b/src/calibre/web/feeds/recipes/recipe_dnevni_avaz.py
@ -9,6 +9,7 @@ dnevniavaz.ba

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class DnevniAvaz(BasicNewsRecipe):
    title                 = 'Dnevni Avaz'
@ -25,17 +26,18 @@ class DnevniAvaz(BasicNewsRecipe):
    cover_url             = 'http://www.dnevniavaz.ba/img/logo.gif'
    lang                  = 'bs-BA'
    language              = _('Bosnian')
+    direction             = 'ltr'

    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'

-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
-
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
+  
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

    keep_only_tags = [dict(name='div', attrs={'id':['fullarticle-title','fullarticle-leading','fullarticle-date','fullarticle-text','articleauthor']})]
@ -47,9 +49,20 @@ class DnevniAvaz(BasicNewsRecipe):
              ,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno')
            ]

+    def replace_tagname(self,soup,tagname,tagid,newtagname):
+        headtag = soup.find(tagname,attrs={'id':tagid})
+        if headtag:
+           headtag.name = newtagname
+        return
+        
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
-        mtag = '<meta http-equiv="Content-Language" content="bs-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
-        soup.head.insert(0,mtag)
-        return soup
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
+        self.replace_tagname(soup,'div','fullarticle-title'  ,'h1')
+        self.replace_tagname(soup,'div','fullarticle-leading','h3')
+        self.replace_tagname(soup,'div','fullarticle-date'   ,'h5')
+        return self.adeify_images(soup)
--- a/src/calibre/web/feeds/recipes/recipe_dnevnik_cro.py
+++ b/src/calibre/web/feeds/recipes/recipe_dnevnik_cro.py
@ -9,6 +9,7 @@ dnevnik.hr

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class DnevnikCro(BasicNewsRecipe):
    title                 = 'Dnevnik - Hr'
@ -22,19 +23,18 @@ class DnevnikCro(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    remove_javascript     = True    
    language              = _('Croatian')
-
+    lang                  = 'hr-HR'
+    direction             = 'ltr'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
     
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

@ -51,10 +51,24 @@ class DnevnikCro(BasicNewsRecipe):
    feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]

    def preprocess_html(self, soup):
-        soup.html['lang']     = 'hr-HR'
-        mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
+        soup.html['lang'] = self.lang
+        soup.html['dir' ] = self.direction
+        
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]                        
+        
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
+        return self.adeify_images(soup)

--- a/src/calibre/web/feeds/recipes/recipe_e_novine.py
+++ b/src/calibre/web/feeds/recipes/recipe_e_novine.py
@ -9,6 +9,7 @@ e-novine.com

 import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class E_novine(BasicNewsRecipe):
    title                 = 'E-Novine'
@ -16,23 +17,22 @@ class E_novine(BasicNewsRecipe):
    description           = 'News from Serbia'
    publisher             = 'E-novine'
    category              = 'news, politics, Balcans'
-    oldest_article        = 1
+    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1250'
-    cover_url             = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
-    remove_javascript     = True
    use_embedded_content  = False
    language              = _('Serbian')
+    lang                  = 'sr'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
     
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

@ -43,10 +43,10 @@ class E_novine(BasicNewsRecipe):
    feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]

    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = 'sr-Latn-ME'
-        soup.html['lang']     = 'sr-Latn-ME'
-        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
-        soup.head.insert(0,mtag)
+        soup.html['xml:lang'] = self.lang
+        soup.html['lang']     = self.lang
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        soup.head.insert(0,mlang)
        for item in soup.findAll(style=True):
            del item['style']
        ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
--- a/src/calibre/web/feeds/recipes/recipe_glas_srpske.py
+++ b/src/calibre/web/feeds/recipes/recipe_glas_srpske.py
@ -9,6 +9,7 @@ glassrpske.com

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class GlasSrpske(BasicNewsRecipe):
    title                 = 'Glas Srpske'
@ -21,7 +22,6 @@ class GlasSrpske(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    remove_javascript     = True    
    cover_url             = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
    lang                  = 'sr-BA'
    language              = _('Serbian')
@ -29,13 +29,13 @@ class GlasSrpske(BasicNewsRecipe):

    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
     
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    
@ -64,8 +64,8 @@ class GlasSrpske(BasicNewsRecipe):
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
-        mtag = '<meta http-equiv="Content-Language" content="sr-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
-        soup.head.insert(0,mtag)
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        soup.head.insert(0,mlang)
        return soup        
        
    def parse_index(self):
--- a/src/calibre/web/feeds/recipes/recipe_hrt.py
+++ b/src/calibre/web/feeds/recipes/recipe_hrt.py
@ -24,13 +24,13 @@ class HRT(BasicNewsRecipe):
    lang                  = 'hr-HR'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }

    
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
--- a/src/calibre/web/feeds/recipes/recipe_jutarnji.py
+++ b/src/calibre/web/feeds/recipes/recipe_jutarnji.py
@ -8,32 +8,32 @@ jutarnji.hr

 import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class Jutarnji(BasicNewsRecipe):
-    title                 = u'Jutarnji'
-    __author__            = u'Darko Miletic'
-    description           = u'Hrvatski portal'
+    title                 = 'Jutarnji'
+    __author__            = 'Darko Miletic'
+    description           = 'Hrvatski portal'
    publisher             = 'Jutarnji.hr'
    category              = 'news, politics, Croatia'    
-    oldest_article        = 1
+    oldest_article        = 2
    max_articles_per_feed = 100
-    simultaneous_downloads = 2
    delay                 = 1
    language              = _('Croatian')
    no_stylesheets        = True
    use_embedded_content  = False
-    remove_javascript     = True
    encoding              = 'cp1250'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    lang                  = 'hr-HR'
+    direction             = 'ltr'    
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .vijestnaslov{font-size: x-large; font-weight: bold}'
    
-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }


    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -59,11 +59,24 @@ class Jutarnji(BasicNewsRecipe):
        return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="hr-HR"/>'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']        
-        for item in soup.findAll(width=True):
-            del item['width']        
-        return soup
+        soup.html['lang'] = self.lang
+        soup.html['dir' ] = self.direction
+        
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]                        
+        
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
+        return self.adeify_images(soup)
        
--- a/src/calibre/web/feeds/recipes/recipe_nacional_cro.py
+++ b/src/calibre/web/feeds/recipes/recipe_nacional_cro.py
@ -9,6 +9,7 @@ nacional.hr

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class NacionalCro(BasicNewsRecipe):
    title                 = 'Nacional - Hr'
@ -22,19 +23,20 @@ class NacionalCro(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    remove_javascript     = True    
    language              = _('Croatian')
+    lang                 = 'hr-HR'
+    direction            = 'ltr'    

    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
-     
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
+        
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

    remove_tags = [dict(name=['object','link','embed'])]
@ -42,9 +44,12 @@ class NacionalCro(BasicNewsRecipe):
    feeds = [(u'Najnovije Vijesti', u'http://www.nacional.hr/rss')]

    def preprocess_html(self, soup):
-        soup.html['lang']     = 'hr-HR'
-        mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
-        soup.head.insert(0,mtag)
+        soup.html['lang'] = self.lang
+        soup.html['dir' ] = self.direction
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/src/calibre/web/feeds/recipes/recipe_nin.py
+++ b/src/calibre/web/feeds/recipes/recipe_nin.py
@ -26,21 +26,19 @@ class Nin(BasicNewsRecipe):
    INDEX                  = PREFIX + '/?change_lang=ls'
    LOGIN                  = PREFIX + '/?logout=true'
    FEED                   = PREFIX + '/misc/rss.php?feed=RSS2.0'
-    remove_javascript      = True
    use_embedded_content   = False
    language               = _('Serbian')
    lang                   = 'sr-Latn-RS'
    direction              = 'ltr'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}'
    
-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
                          
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    
@ -74,12 +72,20 @@ class Nin(BasicNewsRecipe):
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
        soup.head.insert(0,mlang)
-        soup.head.insert(1,mcharset)
-        for item in soup.findAll(style=True):
-            del item['style']        
+        soup.head.insert(1,mcharset)        
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]            
        return soup

    def get_article_url(self, article):
        raw = article.get('link',  None)         
        return raw.replace('.co.yu','.co.rs')
-        
--- a/src/calibre/web/feeds/recipes/recipe_novosti.py
+++ b/src/calibre/web/feeds/recipes/recipe_novosti.py
@ -8,30 +8,30 @@ novosti.rs

 import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class Novosti(BasicNewsRecipe):
-    title                 = u'Vecernje Novosti'
-    __author__            = u'Darko Miletic'
-    description           = u'Vesti'
+    title                 = 'Vecernje Novosti'
+    __author__            = 'Darko Miletic'
+    description           = 'Vesti'
    publisher             = 'Kompanija Novosti'
    category              = 'news, politics, Serbia'        
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    encoding              = 'utf8'
-    remove_javascript     = True
+    encoding              = 'utf-8'
    language              = _('Serbian')
+    lang                  = 'sr-Latn-RS'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
    
-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

@ -41,8 +41,17 @@ class Novosti(BasicNewsRecipe):
    feeds              = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
-        soup.head.insert(0,mtag)    
-        for item in soup.findAll(style=True):
-            del item['style']        
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        soup.head.insert(0,mlang)
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]                                    
        return soup
--- a/src/calibre/web/feeds/recipes/recipe_nspm.py
+++ b/src/calibre/web/feeds/recipes/recipe_nspm.py
@ -21,19 +21,18 @@ class Nspm(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    INDEX                 = 'http://www.nspm.rs/?alphabet=l'
-    encoding              = 'utf8'
-    remove_javascript     = True
+    encoding              = 'utf-8'
    language              = _('Serbian')
+    lang                  = 'sr-Latn-RS'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
    
-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    remove_tags        = [
@ -51,28 +50,18 @@ class Nspm(BasicNewsRecipe):
    def print_version(self, url):
        return url.replace('.html','/stampa.html')

-    def cleanup_image_tags(self,soup):
-        for item in soup.findAll('img'):
-            for attrib in ['height','width','border','align']:
-                if item.has_key(attrib):
-                   del item[attrib]
-            oldParent = item.parent
-            myIndex = oldParent.contents.index(item)
-            item.extract()
-            divtag = Tag(soup,'div')
-            brtag  = Tag(soup,'br')
-            oldParent.insert(myIndex,divtag)
-            divtag.append(item)
-            divtag.append(brtag)
-        return soup
-
    def preprocess_html(self, soup):
-        lng = 'sr-Latn-RS'
-        soup.html['xml:lang'] = lng
-        soup.html['lang']     = lng
-        ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
-        if ftag:
-           ftag['content'] = lng
-        for item in soup.findAll(style=True):
-            del item['style']     
-        return self.cleanup_image_tags(soup)
+        soup.html['xml:lang'] = self.lang
+        soup.html['lang']     = self.lang
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]                        
+        return self.adeify_images(soup)
--- a/src/calibre/web/feeds/recipes/recipe_pescanik.py
+++ b/src/calibre/web/feeds/recipes/recipe_pescanik.py
@ -8,6 +8,7 @@ pescanik.net

 import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class Pescanik(BasicNewsRecipe):
    title                 = 'Pescanik'
@ -19,20 +20,18 @@ class Pescanik(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    remove_javascript     = True
-    encoding              = 'utf8'
-    cover_url             = "http://pescanik.net/templates/ja_teline/images/logo.png"
+    encoding              = 'utf-8'
    language              = _('Serbian')
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    lang                  = 'sr-Latn-RS'
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .contentheading{font-size: x-large; font-weight: bold} .small{font-size: small} .createdate{font-size: x-small; font-weight: bold}'
    
-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
    
    
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -40,18 +39,27 @@ class Pescanik(BasicNewsRecipe):
    remove_tags = [
                     dict(name='td'  , attrs={'class':'buttonheading'})
                    ,dict(name='span', attrs={'class':'article_seperator'})
-                    ,dict(name=['object','link','img','h4','ul'])
+                    ,dict(name=['object','link','h4','ul'])
                  ]

-    feeds       = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
+    feeds       = [(u'Pescanik Online', u'http://www.pescanik.net/index.php?option=com_rd_rss&id=12')]

    def print_version(self, url):
        nurl = url.replace('/index.php','/index2.php')        
        return nurl + '&pop=1&page=0'

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
-        soup.head.insert(0,mtag)    
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        soup.head.insert(0,mlang)
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]                        
+        return self.adeify_images(soup)
--- a/src/calibre/web/feeds/recipes/recipe_pobjeda.py
+++ b/src/calibre/web/feeds/recipes/recipe_pobjeda.py
@ -19,22 +19,20 @@ class Pobjeda(BasicNewsRecipe):
    publisher             = 'Pobjeda a.d.'
    category              = 'news, politics, Montenegro'    
    no_stylesheets        = True
-    remove_javascript     = True
-    encoding              = 'utf8'
-    remove_javascript     = True
+    encoding              = 'utf-8'
    use_embedded_content  = False
-    language              = _('Serbian')
+    language              = _('Montenegrin')
    lang                  = 'sr-Latn-Me'
    INDEX                 = u'http://www.pobjeda.co.me'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }

    
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
--- a/src/calibre/web/feeds/recipes/recipe_politika.py
+++ b/src/calibre/web/feeds/recipes/recipe_politika.py
@ -1,15 +1,16 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 politika.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class Politika(BasicNewsRecipe):
-    title                 = u'Politika Online'
+    title                 = 'Politika Online'
    __author__            = 'Darko Miletic'
    description           = 'Najstariji dnevni list na Balkanu'
    publisher             = 'Politika novine i Magazini d.o.o'
@ -21,16 +22,18 @@ class Politika(BasicNewsRecipe):
    remove_javascript     = True
    encoding              = 'utf8'
    language              = _('Serbian')
+    lang                 = 'sr-Latn-RS'
+    direction            = 'ltr'    
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
-

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

@ -55,11 +58,13 @@ class Politika(BasicNewsRecipe):
                     ]

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
-        soup.head.insert(0,mtag)    
+        soup.html['lang'] = self.lang
+        soup.html['dir' ] = self.direction      
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        soup.head.insert(0,mlang)
        for item in soup.findAll(style=True):
            del item['style']
        ftag = soup.find('div',attrs={'class':'content_center_border'})
        if ftag.has_key('align'):
           del ftag['align']
-        return soup
+        return self.adeify_images(soup)
--- a/src/calibre/web/feeds/recipes/recipe_pressonline.py
+++ b/src/calibre/web/feeds/recipes/recipe_pressonline.py
@ -9,6 +9,7 @@ pressonline.rs

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class PressOnline(BasicNewsRecipe):
    title                 = 'Press Online'
@ -19,20 +20,21 @@ class PressOnline(BasicNewsRecipe):
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
-    encoding              = 'utf8'
+    encoding              = 'utf-8'
    use_embedded_content  = True
-    cover_url             = 'http://www.pressonline.rs/img/logo.gif'
    language              = _('Serbian')
+    lang                  = 'sr-Latn-RS'
+    direction             = 'ltr'

    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
     
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

@ -57,10 +59,8 @@ class PressOnline(BasicNewsRecipe):
            ]

    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = 'sr-Latn-RS'
-        soup.html['lang']     = 'sr-Latn-RS'
-        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
-        soup.head.insert(0,mtag)
-        for img in soup.findAll('img', align=True):
-            del img['align']
-        return soup        
+        soup.html['lang'] = self.lang
+        soup.html['dir' ] = self.direction
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        soup.head.insert(0,mlang)
+        return self.adeify_images(soup)
--- a/src/calibre/web/feeds/recipes/recipe_rts.py
+++ b/src/calibre/web/feeds/recipes/recipe_rts.py
@ -24,13 +24,13 @@ class RTS(BasicNewsRecipe):
    lang                  = 'sr-Latn-RS'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }

    
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
--- a/src/calibre/web/feeds/recipes/recipe_spiegel_int.py
+++ b/src/calibre/web/feeds/recipes/recipe_spiegel_int.py
@ -1,7 +1,7 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 spiegel.de
 '''
@ -9,21 +9,25 @@ spiegel.de
 from calibre.web.feeds.news import BasicNewsRecipe

 class Spiegel_int(BasicNewsRecipe):
-    title                 = u'Spiegel Online International'
+    title                 = 'Spiegel Online International'
    __author__            = 'Darko Miletic'
    description           = "News and POV from Europe's largest newsmagazine"
    oldest_article        = 7
    max_articles_per_feed = 100
-    language = _('English')
+    language              = _('English')
    no_stylesheets        = True
    use_embedded_content  = False
-    cover_url = 'http://www.spiegel.de/static/sys/v8/headlines/spiegelonline.gif'
-    html2lrf_options = [
-                          '--comment', description
-                        , '--base-font-size', '10'
-                        , '--category', 'news, politics, Germany'
-                        , '--publisher', 'SPIEGEL ONLINE GmbH'
-                        ]
+    publisher             = 'SPIEGEL ONLINE GmbH'
+    category              = 'news, politics, Germany'
+    lang                  = 'en'
+    
+    conversion_options = {  
+                             'comments'    : description
+                            ,'tags'        : category
+                            ,'language'    : lang
+                            ,'publisher'   : publisher
+                            ,'pretty_print': True
+                         }

    remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})

--- a/src/calibre/web/feeds/recipes/recipe_tanjug.py
+++ b/src/calibre/web/feeds/recipes/recipe_tanjug.py
@ -7,6 +7,7 @@ tanjug.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class Tanjug(BasicNewsRecipe):
    title                 = 'Tanjug'
@ -14,21 +15,22 @@ class Tanjug(BasicNewsRecipe):
    description           = 'Novinska agencija TANJUG - Dnevne vesti iz Srbije i sveta'
    publisher             = 'Tanjug'
    category              = 'news, politics, Serbia'
-    oldest_article        = 1
+    oldest_article        = 2
    max_articles_per_feed = 100
    use_embedded_content  = True
    encoding              = 'utf-8'
    lang                  = 'sr-Latn-RS'
    language              = _('Serbian')
+    direction             = 'ltr'             
    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
    
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    
@ -37,7 +39,7 @@ class Tanjug(BasicNewsRecipe):
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang'    ] = self.lang
-        soup.html['dir'     ] = "ltr"
-        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
-        soup.head.insert(0,mtag)
-        return soup
+        soup.html['dir'     ] = self.direction
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        soup.head.insert(0,mlang)
+        return self.adeify_images(soup)
--- a/src/calibre/web/feeds/recipes/recipe_twitchfilms.py
+++ b/src/calibre/web/feeds/recipes/recipe_twitchfilms.py
@ -20,14 +20,15 @@ class Twitchfilm(BasicNewsRecipe):
    publisher             = 'Twitch'
    category              = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
    language              = _('English')
+    lang                  = 'en-US'

-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }

    remove_tags = [dict(name='div', attrs={'class':'feedflare'})]

@ -36,6 +37,6 @@ class Twitchfilm(BasicNewsRecipe):
    def preprocess_html(self, soup):
        mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
        soup.head.insert(0,mtag)
-        soup.html['lang'] = 'en-US'
-        return soup
+        soup.html['lang'] = self.lang
+        return self.adeify_images(soup)

--- a/src/calibre/web/feeds/recipes/recipe_vecernji_list.py
+++ b/src/calibre/web/feeds/recipes/recipe_vecernji_list.py
@ -9,6 +9,7 @@ www.vecernji.hr

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class VecernjiList(BasicNewsRecipe):
    title                 = 'Vecernji List'
@ -18,23 +19,23 @@ class VecernjiList(BasicNewsRecipe):
    category              = 'news, politics, Croatia'    
    oldest_article        = 2
    max_articles_per_feed = 100
-    delay                 = 4
+    delay                 = 1
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    remove_javascript     = True    
    language              = _('Croatian')
+    lang                 = 'hr-HR'
+    direction            = 'ltr'    

    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
     
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

@ -46,13 +47,16 @@ class VecernjiList(BasicNewsRecipe):
    feeds = [(u'Vijesti', u'http://www.vecernji.hr/rss/')]

    def preprocess_html(self, soup):
-        soup.html['lang']     = 'hr-HR'
-        mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
+        soup.html['lang'] = self.lang
+        soup.html['dir' ] = self.direction
+                
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
+        return self.adeify_images(soup)

    def print_version(self, url):
-        return url.replace('/index.do','/print.do')
+        artid = url.rpartition('-')[2]
+        return 'http://www.vecernji.hr/index.php?cmd=show_clanak&action=print_popup&clanak_id='+artid
        
--- a/src/calibre/web/feeds/recipes/recipe_vijesti.py
+++ b/src/calibre/web/feeds/recipes/recipe_vijesti.py
@ -20,22 +20,19 @@ class Vijesti(BasicNewsRecipe):
    oldest_article        = 2
    max_articles_per_feed = 150
    no_stylesheets        = True
-    remove_javascript     = True
    encoding              = 'cp1250'
-    cover_url             = 'http://www.vijesti.me/img/logo.gif'
-    remove_javascript     = True
    use_embedded_content  = False
-    language              = _('Serbian')
+    language              = _('Montenegrin')
    lang                  ='sr-Latn-Me'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
     
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

--- a/src/calibre/web/feeds/recipes/recipe_vreme.py
+++ b/src/calibre/web/feeds/recipes/recipe_vreme.py
@ -22,22 +22,20 @@ class Vreme(BasicNewsRecipe):
    needs_subscription   = True    
    INDEX                = 'http://www.vreme.com'
    LOGIN                = 'http://www.vreme.com/account/login.php?url=%2F'
-    remove_javascript    = True
    use_embedded_content = False
    encoding             = 'utf-8'
    language             = _('Serbian')
    lang                 = 'sr-Latn-RS'
    direction            = 'ltr'    
-    extra_css            = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} ' 
+    extra_css            = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} ' 
    
-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        , 'pretty_print'     : True
+                        }
    
    
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -84,12 +82,21 @@ class Vreme(BasicNewsRecipe):
        del soup.body['text'   ]
        del soup.body['bgcolor']
        del soup.body['onload' ]
-        for item in soup.findAll(face=True):
-            del item['face']
-        for item in soup.findAll(size=True):
-            del item['size']
        soup.html['lang'] = self.lang
        soup.html['dir' ] = self.direction
+        
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]                        
+        
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
        soup.head.insert(0,mlang)