Update all Serbian and Croatin recipes to work with calibre 0.6

This commit is contained in:
Kovid Goyal 2009-08-16 15:56:04 -06:00
parent 1ae3724038
commit fd2888af18
27 changed files with 501 additions and 404 deletions

View File

@ -9,6 +9,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Cro24Sata(BasicNewsRecipe): class Cro24Sata(BasicNewsRecipe):
title = '24 Sata - Hr' title = '24 Sata - Hr'
@ -22,18 +23,18 @@ class Cro24Sata(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = False use_embedded_content = False
remove_javascript = True
language = _('Croatian') language = _('Croatian')
lang = 'hr-HR'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -45,9 +46,11 @@ class Cro24Sata(BasicNewsRecipe):
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')] feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR' soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mtag) mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup

View File

@ -17,53 +17,51 @@ class Ser24Sata(BasicNewsRecipe):
description = '24 sata portal vesti iz Srbije' description = '24 sata portal vesti iz Srbije'
publisher = 'Ringier d.o.o.' publisher = 'Ringier d.o.o.'
category = 'news, politics, entertainment, Serbia' category = 'news, politics, entertainment, Serbia'
oldest_article = 1 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = False use_embedded_content = False
remove_javascript = True
language = _('Serbian') language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] , 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')] feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
def cleanup_image_tags(self,soup):
for item in soup.findAll('img'):
for attrib in ['height','width','border','align']:
if item.has_key(attrib):
del item[attrib]
oldParent = item.parent
myIndex = oldParent.contents.index(item)
item.extract()
divtag = Tag(soup,'div')
brtag = Tag(soup,'br')
oldParent.insert(myIndex,divtag)
divtag.append(item)
divtag.append(brtag)
return soup
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS' soup.html['xml:lang'] = self.lang
soup.html['lang'] = 'sr-Latn-RS' soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag) attribs = [ 'style','font','valign'
return self.cleanup_image_tags(soup) ,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)
def print_version(self, url): def print_version(self, url):
article, sep, rest = url.partition('#') article = url.partition('#')[0]
article_base, sep2, article_id = article.partition('id=') article_id = article.partition('id=')[2]
return 'http://www.24sata.co.rs/_print.php?id=' + article_id return 'http://www.24sata.rs/_print.php?id=' + article_id

View File

@ -14,23 +14,21 @@ class B92(BasicNewsRecipe):
description = 'Dnevne vesti iz Srbije i sveta' description = 'Dnevne vesti iz Srbije i sveta'
publisher = 'B92' publisher = 'B92'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
oldest_article = 1 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_javascript = True
encoding = 'cp1250' encoding = 'cp1250'
language = _('Serbian') language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] }
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -39,6 +37,7 @@ class B92(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name='ul', attrs={'class':'comment-nav'}) dict(name='ul', attrs={'class':'comment-nav'})
,dict(name=['embed','link','base'] ) ,dict(name=['embed','link','base'] )
,dict(name='div', attrs={'class':'udokum'} )
] ]
feeds = [ feeds = [
@ -51,14 +50,19 @@ class B92(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
del soup.body['onload'] del soup.body['onload']
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(align=True):
del item['align']
for item in soup.findAll('font'): for item in soup.findAll('font'):
item.name='p' item.name='div'
if item.has_key('size'): if item.has_key('size'):
del item['size'] del item['size']
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup return soup

View File

@ -26,15 +26,13 @@ class Blic(BasicNewsRecipe):
lang = 'sr-Latn-RS' lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} ' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
html2lrf_options = [ conversion_options = {
'--comment' , description 'comment' : description
, '--category' , category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] }
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})] keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
@ -44,14 +42,21 @@ class Blic(BasicNewsRecipe):
remove_tags = [dict(name=['object','link'])] remove_tags = [dict(name=['object','link'])]
def print_version(self, url): def print_version(self, url):
start_url, question, rest_url = url.partition('?') rest_url = url.partition('?')[2]
return u'http://www.blic.rs/_print.php?' + rest_url return u'http://www.blic.rs/_print.php?' + rest_url
def preprocess_html(self, soup): def preprocess_html(self, soup):
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) attribs = [ 'style','font','valign'
soup.head.insert(0,mlang) ,'colspan','width','height'
for item in soup.findAll(style=True): ,'rowspan','summary','align'
del item['style'] ,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return self.adeify_images(soup) return self.adeify_images(soup)
def get_article_url(self, article): def get_article_url(self, article):

View File

@ -17,24 +17,23 @@ class Borba(BasicNewsRecipe):
publisher = 'IP Novine Borba' publisher = 'IP Novine Borba'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
language = _('Serbian') language = _('Serbian')
oldest_article = 1 lang = _('sr-Latn-RS')
oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf-8'
remove_javascript = True
use_embedded_content = False use_embedded_content = False
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg' cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
INDEX = u'http://www.borba.rs/' INDEX = u'http://www.borba.rs/'
extra_css = '@font-face {font-family: "serif0";src:url(res:///Data/FONT/serif0.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif0, serif1, serif} .article_description{font-family: serif0, serif1, serif}' extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] , 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -60,14 +59,17 @@ class Borba(BasicNewsRecipe):
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME' attribs = [ 'style','font','valign'
soup.html['lang'] = 'sr-Latn-ME' ,'colspan','width','height'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>' ,'rowspan','summary','align'
soup.head.insert(0,mtag) ,'cellspacing','cellpadding'
for item in soup.findAll(style=True): ,'frames','rules','border'
del item['style'] ]
for item in soup.findAll(font=True): for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
del item['font'] item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup return soup
def parse_index(self): def parse_index(self):

View File

@ -7,9 +7,10 @@ danas.rs
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Danas(BasicNewsRecipe): class Danas(BasicNewsRecipe):
title = u'Danas' title = 'Danas'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Vesti' description = 'Vesti'
publisher = 'Danas d.o.o.' publisher = 'Danas d.o.o.'
@ -17,19 +18,19 @@ class Danas(BasicNewsRecipe):
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = False no_stylesheets = False
remove_javascript = True
use_embedded_content = False use_embedded_content = False
language = _('Serbian') language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ conversion_options = {
'--comment' , description 'comment' : description
, '--category' , category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] , 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -44,8 +45,17 @@ class Danas(BasicNewsRecipe):
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')] feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>' mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mtag) soup.head.insert(0,mlang)
for item in soup.findAll(style=True): attribs = [ 'style','font','valign'
del item['style'] ,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup return soup

View File

@ -9,6 +9,7 @@ dnevniavaz.ba
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class DnevniAvaz(BasicNewsRecipe): class DnevniAvaz(BasicNewsRecipe):
title = 'Dnevni Avaz' title = 'Dnevni Avaz'
@ -25,17 +26,18 @@ class DnevniAvaz(BasicNewsRecipe):
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif' cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
lang = 'bs-BA' lang = 'bs-BA'
language = _('Bosnian') language = _('Bosnian')
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'id':['fullarticle-title','fullarticle-leading','fullarticle-date','fullarticle-text','articleauthor']})] keep_only_tags = [dict(name='div', attrs={'id':['fullarticle-title','fullarticle-leading','fullarticle-date','fullarticle-text','articleauthor']})]
@ -47,9 +49,20 @@ class DnevniAvaz(BasicNewsRecipe):
,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno') ,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno')
] ]
def replace_tagname(self,soup,tagname,tagid,newtagname):
headtag = soup.find(tagname,attrs={'id':tagid})
if headtag:
headtag.name = newtagname
return
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="bs-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mtag) mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
return soup soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
self.replace_tagname(soup,'div','fullarticle-title' ,'h1')
self.replace_tagname(soup,'div','fullarticle-leading','h3')
self.replace_tagname(soup,'div','fullarticle-date' ,'h5')
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ dnevnik.hr
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class DnevnikCro(BasicNewsRecipe): class DnevnikCro(BasicNewsRecipe):
title = 'Dnevnik - Hr' title = 'Dnevnik - Hr'
@ -22,19 +23,18 @@ class DnevnikCro(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = False use_embedded_content = False
remove_javascript = True
language = _('Croatian') language = _('Croatian')
lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] , 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -51,10 +51,24 @@ class DnevnikCro(BasicNewsRecipe):
feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')] feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR' soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' soup.html['dir' ] = self.direction
soup.head.insert(0,mtag)
for item in soup.findAll(style=True): attribs = [ 'style','font','valign'
del item['style'] ,'colspan','width','height'
return soup ,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ e-novine.com
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class E_novine(BasicNewsRecipe): class E_novine(BasicNewsRecipe):
title = 'E-Novine' title = 'E-Novine'
@ -16,23 +17,22 @@ class E_novine(BasicNewsRecipe):
description = 'News from Serbia' description = 'News from Serbia'
publisher = 'E-novine' publisher = 'E-novine'
category = 'news, politics, Balcans' category = 'news, politics, Balcans'
oldest_article = 1 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'cp1250' encoding = 'cp1250'
cover_url = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
remove_javascript = True
use_embedded_content = False use_embedded_content = False
language = _('Serbian') language = _('Serbian')
lang = 'sr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -43,10 +43,10 @@ class E_novine(BasicNewsRecipe):
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )] feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME' soup.html['xml:lang'] = self.lang
soup.html['lang'] = 'sr-Latn-ME' soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>' mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mtag) soup.head.insert(0,mlang)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'}) ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})

View File

@ -9,6 +9,7 @@ glassrpske.com
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class GlasSrpske(BasicNewsRecipe): class GlasSrpske(BasicNewsRecipe):
title = 'Glas Srpske' title = 'Glas Srpske'
@ -21,7 +22,6 @@ class GlasSrpske(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = False use_embedded_content = False
remove_javascript = True
cover_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png' cover_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
lang = 'sr-BA' lang = 'sr-BA'
language = _('Serbian') language = _('Serbian')
@ -29,13 +29,13 @@ class GlasSrpske(BasicNewsRecipe):
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -64,8 +64,8 @@ class GlasSrpske(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="sr-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mtag) soup.head.insert(0,mlang)
return soup return soup
def parse_index(self): def parse_index(self):

View File

@ -24,13 +24,13 @@ class HRT(BasicNewsRecipe):
lang = 'hr-HR' lang = 'hr-HR'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -8,32 +8,32 @@ jutarnji.hr
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Jutarnji(BasicNewsRecipe): class Jutarnji(BasicNewsRecipe):
title = u'Jutarnji' title = 'Jutarnji'
__author__ = u'Darko Miletic' __author__ = 'Darko Miletic'
description = u'Hrvatski portal' description = 'Hrvatski portal'
publisher = 'Jutarnji.hr' publisher = 'Jutarnji.hr'
category = 'news, politics, Croatia' category = 'news, politics, Croatia'
oldest_article = 1 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
simultaneous_downloads = 2
delay = 1 delay = 1
language = _('Croatian') language = _('Croatian')
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_javascript = True
encoding = 'cp1250' encoding = 'cp1250'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .vijestnaslov{font-size: x-large; font-weight: bold}'
html2lrf_options = [ conversion_options = {
'--comment' , description 'comment' : description
, '--category' , category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] , 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -59,11 +59,24 @@ class Jutarnji(BasicNewsRecipe):
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="hr-HR"/>' soup.html['lang'] = self.lang
soup.head.insert(0,mtag) soup.html['dir' ] = self.direction
for item in soup.findAll(style=True):
del item['style'] attribs = [ 'style','font','valign'
for item in soup.findAll(width=True): ,'colspan','width','height'
del item['width'] ,'rowspan','summary','align'
return soup ,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ nacional.hr
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class NacionalCro(BasicNewsRecipe): class NacionalCro(BasicNewsRecipe):
title = 'Nacional - Hr' title = 'Nacional - Hr'
@ -22,19 +23,20 @@ class NacionalCro(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = False use_embedded_content = False
remove_javascript = True
language = _('Croatian') language = _('Croatian')
lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(name=['object','link','embed'])] remove_tags = [dict(name=['object','link','embed'])]
@ -42,9 +44,12 @@ class NacionalCro(BasicNewsRecipe):
feeds = [(u'Najnovije Vijesti', u'http://www.nacional.hr/rss')] feeds = [(u'Najnovije Vijesti', u'http://www.nacional.hr/rss')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR' soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' soup.html['dir' ] = self.direction
soup.head.insert(0,mtag) mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup

View File

@ -26,21 +26,19 @@ class Nin(BasicNewsRecipe):
INDEX = PREFIX + '/?change_lang=ls' INDEX = PREFIX + '/?change_lang=ls'
LOGIN = PREFIX + '/?logout=true' LOGIN = PREFIX + '/?logout=true'
FEED = PREFIX + '/misc/rss.php?feed=RSS2.0' FEED = PREFIX + '/misc/rss.php?feed=RSS2.0'
remove_javascript = True
use_embedded_content = False use_embedded_content = False
language = _('Serbian') language = _('Serbian')
lang = 'sr-Latn-RS' lang = 'sr-Latn-RS'
direction = 'ltr' direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}'
html2lrf_options = [ conversion_options = {
'--comment' , description 'comment' : description
, '--category' , category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] , 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -74,12 +72,20 @@ class Nin(BasicNewsRecipe):
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang) soup.head.insert(0,mlang)
soup.head.insert(1,mcharset) soup.head.insert(1,mcharset)
for item in soup.findAll(style=True): attribs = [ 'style','font','valign'
del item['style'] ,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup return soup
def get_article_url(self, article): def get_article_url(self, article):
raw = article.get('link', None) raw = article.get('link', None)
return raw.replace('.co.yu','.co.rs') return raw.replace('.co.yu','.co.rs')

View File

@ -8,30 +8,30 @@ novosti.rs
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Novosti(BasicNewsRecipe): class Novosti(BasicNewsRecipe):
title = u'Vecernje Novosti' title = 'Vecernje Novosti'
__author__ = u'Darko Miletic' __author__ = 'Darko Miletic'
description = u'Vesti' description = 'Vesti'
publisher = 'Kompanija Novosti' publisher = 'Kompanija Novosti'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf-8'
remove_javascript = True
language = _('Serbian') language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ conversion_options = {
'--comment' , description 'comment' : description
, '--category' , category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] , 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -41,8 +41,17 @@ class Novosti(BasicNewsRecipe):
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')] feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>' mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mtag) soup.head.insert(0,mlang)
for item in soup.findAll(style=True): attribs = [ 'style','font','valign'
del item['style'] ,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup return soup

View File

@ -21,19 +21,18 @@ class Nspm(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
INDEX = 'http://www.nspm.rs/?alphabet=l' INDEX = 'http://www.nspm.rs/?alphabet=l'
encoding = 'utf8' encoding = 'utf-8'
remove_javascript = True
language = _('Serbian') language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ conversion_options = {
'--comment' , description 'comment' : description
, '--category' , category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] , 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [ remove_tags = [
@ -51,28 +50,18 @@ class Nspm(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url.replace('.html','/stampa.html') return url.replace('.html','/stampa.html')
def cleanup_image_tags(self,soup):
for item in soup.findAll('img'):
for attrib in ['height','width','border','align']:
if item.has_key(attrib):
del item[attrib]
oldParent = item.parent
myIndex = oldParent.contents.index(item)
item.extract()
divtag = Tag(soup,'div')
brtag = Tag(soup,'br')
oldParent.insert(myIndex,divtag)
divtag.append(item)
divtag.append(brtag)
return soup
def preprocess_html(self, soup): def preprocess_html(self, soup):
lng = 'sr-Latn-RS' soup.html['xml:lang'] = self.lang
soup.html['xml:lang'] = lng soup.html['lang'] = self.lang
soup.html['lang'] = lng attribs = [ 'style','font','valign'
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'}) ,'colspan','width','height'
if ftag: ,'rowspan','summary','align'
ftag['content'] = lng ,'cellspacing','cellpadding'
for item in soup.findAll(style=True): ,'frames','rules','border'
del item['style'] ]
return self.cleanup_image_tags(soup) for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return self.adeify_images(soup)

View File

@ -8,6 +8,7 @@ pescanik.net
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Pescanik(BasicNewsRecipe): class Pescanik(BasicNewsRecipe):
title = 'Pescanik' title = 'Pescanik'
@ -19,20 +20,18 @@ class Pescanik(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_javascript = True encoding = 'utf-8'
encoding = 'utf8'
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
language = _('Serbian') language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .contentheading{font-size: x-large; font-weight: bold} .small{font-size: small} .createdate{font-size: x-small; font-weight: bold}'
html2lrf_options = [ conversion_options = {
'--comment' , description 'comment' : description
, '--category' , category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] , 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -40,18 +39,27 @@ class Pescanik(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name='td' , attrs={'class':'buttonheading'}) dict(name='td' , attrs={'class':'buttonheading'})
,dict(name='span', attrs={'class':'article_seperator'}) ,dict(name='span', attrs={'class':'article_seperator'})
,dict(name=['object','link','img','h4','ul']) ,dict(name=['object','link','h4','ul'])
] ]
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')] feeds = [(u'Pescanik Online', u'http://www.pescanik.net/index.php?option=com_rd_rss&id=12')]
def print_version(self, url): def print_version(self, url):
nurl = url.replace('/index.php','/index2.php') nurl = url.replace('/index.php','/index2.php')
return nurl + '&pop=1&page=0' return nurl + '&pop=1&page=0'
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>' mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mtag) soup.head.insert(0,mlang)
for item in soup.findAll(style=True): attribs = [ 'style','font','valign'
del item['style'] ,'colspan','width','height'
return soup ,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return self.adeify_images(soup)

View File

@ -19,22 +19,20 @@ class Pobjeda(BasicNewsRecipe):
publisher = 'Pobjeda a.d.' publisher = 'Pobjeda a.d.'
category = 'news, politics, Montenegro' category = 'news, politics, Montenegro'
no_stylesheets = True no_stylesheets = True
remove_javascript = True encoding = 'utf-8'
encoding = 'utf8'
remove_javascript = True
use_embedded_content = False use_embedded_content = False
language = _('Serbian') language = _('Montenegrin')
lang = 'sr-Latn-Me' lang = 'sr-Latn-Me'
INDEX = u'http://www.pobjeda.co.me' INDEX = u'http://www.pobjeda.co.me'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -1,15 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
politika.rs politika.rs
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Politika(BasicNewsRecipe): class Politika(BasicNewsRecipe):
title = u'Politika Online' title = 'Politika Online'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Najstariji dnevni list na Balkanu' description = 'Najstariji dnevni list na Balkanu'
publisher = 'Politika novine i Magazini d.o.o' publisher = 'Politika novine i Magazini d.o.o'
@ -21,16 +22,18 @@ class Politika(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
encoding = 'utf8' encoding = 'utf8'
language = _('Serbian') language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -55,11 +58,13 @@ class Politika(BasicNewsRecipe):
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>' soup.html['lang'] = self.lang
soup.head.insert(0,mtag) soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
ftag = soup.find('div',attrs={'class':'content_center_border'}) ftag = soup.find('div',attrs={'class':'content_center_border'})
if ftag.has_key('align'): if ftag.has_key('align'):
del ftag['align'] del ftag['align']
return soup return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ pressonline.rs
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class PressOnline(BasicNewsRecipe): class PressOnline(BasicNewsRecipe):
title = 'Press Online' title = 'Press Online'
@ -19,20 +20,21 @@ class PressOnline(BasicNewsRecipe):
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf-8'
use_embedded_content = True use_embedded_content = True
cover_url = 'http://www.pressonline.rs/img/logo.gif'
language = _('Serbian') language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -57,10 +59,8 @@ class PressOnline(BasicNewsRecipe):
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS' soup.html['lang'] = self.lang
soup.html['lang'] = 'sr-Latn-RS' soup.html['dir' ] = self.direction
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mtag) soup.head.insert(0,mlang)
for img in soup.findAll('img', align=True): return self.adeify_images(soup)
del img['align']
return soup

View File

@ -24,13 +24,13 @@ class RTS(BasicNewsRecipe):
lang = 'sr-Latn-RS' lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
spiegel.de spiegel.de
''' '''
@ -9,21 +9,25 @@ spiegel.de
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Spiegel_int(BasicNewsRecipe): class Spiegel_int(BasicNewsRecipe):
title = u'Spiegel Online International' title = 'Spiegel Online International'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = "News and POV from Europe's largest newsmagazine" description = "News and POV from Europe's largest newsmagazine"
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
language = _('English') language = _('English')
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
cover_url = 'http://www.spiegel.de/static/sys/v8/headlines/spiegelonline.gif' publisher = 'SPIEGEL ONLINE GmbH'
html2lrf_options = [ category = 'news, politics, Germany'
'--comment', description lang = 'en'
, '--base-font-size', '10'
, '--category', 'news, politics, Germany' conversion_options = {
, '--publisher', 'SPIEGEL ONLINE GmbH' 'comments' : description
] ,'tags' : category
,'language' : lang
,'publisher' : publisher
,'pretty_print': True
}
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'}) remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})

View File

@ -7,6 +7,7 @@ tanjug.rs
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Tanjug(BasicNewsRecipe): class Tanjug(BasicNewsRecipe):
title = 'Tanjug' title = 'Tanjug'
@ -14,21 +15,22 @@ class Tanjug(BasicNewsRecipe):
description = 'Novinska agencija TANJUG - Dnevne vesti iz Srbije i sveta' description = 'Novinska agencija TANJUG - Dnevne vesti iz Srbije i sveta'
publisher = 'Tanjug' publisher = 'Tanjug'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
oldest_article = 1 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
use_embedded_content = True use_embedded_content = True
encoding = 'utf-8' encoding = 'utf-8'
lang = 'sr-Latn-RS' lang = 'sr-Latn-RS'
language = _('Serbian') language = _('Serbian')
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"' }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -37,7 +39,7 @@ class Tanjug(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang soup.html['xml:lang'] = self.lang
soup.html['lang' ] = self.lang soup.html['lang' ] = self.lang
soup.html['dir' ] = "ltr" soup.html['dir' ] = self.direction
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>' mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mtag) soup.head.insert(0,mlang)
return soup return self.adeify_images(soup)

View File

@ -20,14 +20,15 @@ class Twitchfilm(BasicNewsRecipe):
publisher = 'Twitch' publisher = 'Twitch'
category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk' category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
language = _('English') language = _('English')
lang = 'en-US'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' }
remove_tags = [dict(name='div', attrs={'class':'feedflare'})] remove_tags = [dict(name='div', attrs={'class':'feedflare'})]
@ -36,6 +37,6 @@ class Twitchfilm(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')]) mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
soup.html['lang'] = 'en-US' soup.html['lang'] = self.lang
return soup return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ www.vecernji.hr
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class VecernjiList(BasicNewsRecipe): class VecernjiList(BasicNewsRecipe):
title = 'Vecernji List' title = 'Vecernji List'
@ -18,23 +19,23 @@ class VecernjiList(BasicNewsRecipe):
category = 'news, politics, Croatia' category = 'news, politics, Croatia'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
delay = 4 delay = 1
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = False use_embedded_content = False
remove_javascript = True
language = _('Croatian') language = _('Croatian')
lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] , 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -46,13 +47,16 @@ class VecernjiList(BasicNewsRecipe):
feeds = [(u'Vijesti', u'http://www.vecernji.hr/rss/')] feeds = [(u'Vijesti', u'http://www.vecernji.hr/rss/')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR' soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' soup.html['dir' ] = self.direction
soup.head.insert(0,mtag)
for item in soup.findAll(style=True): mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
del item['style'] mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
return soup soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)
def print_version(self, url): def print_version(self, url):
return url.replace('/index.do','/print.do') artid = url.rpartition('-')[2]
return 'http://www.vecernji.hr/index.php?cmd=show_clanak&action=print_popup&clanak_id='+artid

View File

@ -20,22 +20,19 @@ class Vijesti(BasicNewsRecipe):
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 150 max_articles_per_feed = 150
no_stylesheets = True no_stylesheets = True
remove_javascript = True
encoding = 'cp1250' encoding = 'cp1250'
cover_url = 'http://www.vijesti.me/img/logo.gif'
remove_javascript = True
use_embedded_content = False use_embedded_content = False
language = _('Serbian') language = _('Montenegrin')
lang ='sr-Latn-Me' lang ='sr-Latn-Me'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ conversion_options = {
'--comment', description 'comment' : description
, '--category', category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : lang
, 'pretty_print' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -22,22 +22,20 @@ class Vreme(BasicNewsRecipe):
needs_subscription = True needs_subscription = True
INDEX = 'http://www.vreme.com' INDEX = 'http://www.vreme.com'
LOGIN = 'http://www.vreme.com/account/login.php?url=%2F' LOGIN = 'http://www.vreme.com/account/login.php?url=%2F'
remove_javascript = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
language = _('Serbian') language = _('Serbian')
lang = 'sr-Latn-RS' lang = 'sr-Latn-RS'
direction = 'ltr' direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} ' extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} '
html2lrf_options = [ conversion_options = {
'--comment' , description 'comment' : description
, '--category' , category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
, '--ignore-tables' , 'language' : lang
] , 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -84,12 +82,21 @@ class Vreme(BasicNewsRecipe):
del soup.body['text' ] del soup.body['text' ]
del soup.body['bgcolor'] del soup.body['bgcolor']
del soup.body['onload' ] del soup.body['onload' ]
for item in soup.findAll(face=True):
del item['face']
for item in soup.findAll(size=True):
del item['size']
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction soup.html['dir' ] = self.direction
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang) soup.head.insert(0,mlang)