Update all Serbian and Croatin recipes to work with calibre 0.6

This commit is contained in:
Kovid Goyal 2009-08-16 15:56:04 -06:00
parent 1ae3724038
commit fd2888af18
27 changed files with 501 additions and 404 deletions

View File

@ -9,6 +9,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Cro24Sata(BasicNewsRecipe):
title = '24 Sata - Hr'
@ -22,18 +23,18 @@ class Cro24Sata(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
language = _('Croatian')
lang = 'hr-HR'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -45,9 +46,11 @@ class Cro24Sata(BasicNewsRecipe):
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR'
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
soup.html['lang'] = self.lang
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -17,53 +17,51 @@ class Ser24Sata(BasicNewsRecipe):
description = '24 sata portal vesti iz Srbije'
publisher = 'Ringier d.o.o.'
category = 'news, politics, entertainment, Serbia'
oldest_article = 1
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
def cleanup_image_tags(self,soup):
for item in soup.findAll('img'):
for attrib in ['height','width','border','align']:
if item.has_key(attrib):
del item[attrib]
oldParent = item.parent
myIndex = oldParent.contents.index(item)
item.extract()
divtag = Tag(soup,'div')
brtag = Tag(soup,'br')
oldParent.insert(myIndex,divtag)
divtag.append(item)
divtag.append(brtag)
return soup
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS'
soup.html['lang'] = 'sr-Latn-RS'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
return self.cleanup_image_tags(soup)
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)
def print_version(self, url):
article, sep, rest = url.partition('#')
article_base, sep2, article_id = article.partition('id=')
return 'http://www.24sata.co.rs/_print.php?id=' + article_id
article = url.partition('#')[0]
article_id = article.partition('id=')[2]
return 'http://www.24sata.rs/_print.php?id=' + article_id

View File

@ -14,23 +14,21 @@ class B92(BasicNewsRecipe):
description = 'Dnevne vesti iz Srbije i sveta'
publisher = 'B92'
category = 'news, politics, Serbia'
oldest_article = 1
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'cp1250'
language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -39,6 +37,7 @@ class B92(BasicNewsRecipe):
remove_tags = [
dict(name='ul', attrs={'class':'comment-nav'})
,dict(name=['embed','link','base'] )
,dict(name='div', attrs={'class':'udokum'} )
]
feeds = [
@ -51,14 +50,19 @@ class B92(BasicNewsRecipe):
def preprocess_html(self, soup):
del soup.body['onload']
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(align=True):
del item['align']
for item in soup.findAll('font'):
item.name='p'
item.name='div'
if item.has_key('size'):
del item['size']
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup

View File

@ -26,15 +26,13 @@ class Blic(BasicNewsRecipe):
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
@ -44,14 +42,21 @@ class Blic(BasicNewsRecipe):
remove_tags = [dict(name=['object','link'])]
def print_version(self, url):
start_url, question, rest_url = url.partition('?')
rest_url = url.partition('?')[2]
return u'http://www.blic.rs/_print.php?' + rest_url
def preprocess_html(self, soup):
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
for item in soup.findAll(style=True):
del item['style']
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return self.adeify_images(soup)
def get_article_url(self, article):

View File

@ -17,24 +17,23 @@ class Borba(BasicNewsRecipe):
publisher = 'IP Novine Borba'
category = 'news, politics, Serbia'
language = _('Serbian')
oldest_article = 1
lang = _('sr-Latn-RS')
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf8'
remove_javascript = True
encoding = 'utf-8'
use_embedded_content = False
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
INDEX = u'http://www.borba.rs/'
extra_css = '@font-face {font-family: "serif0";src:url(res:///Data/FONT/serif0.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif0, serif1, serif} .article_description{font-family: serif0, serif1, serif}'
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -60,14 +59,17 @@ class Borba(BasicNewsRecipe):
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME'
soup.html['lang'] = 'sr-Latn-ME'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(font=True):
del item['font']
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup
def parse_index(self):

View File

@ -7,9 +7,10 @@ danas.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Danas(BasicNewsRecipe):
title = u'Danas'
title = 'Danas'
__author__ = 'Darko Miletic'
description = 'Vesti'
publisher = 'Danas d.o.o.'
@ -17,19 +18,19 @@ class Danas(BasicNewsRecipe):
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = False
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -44,8 +45,17 @@ class Danas(BasicNewsRecipe):
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup

View File

@ -9,6 +9,7 @@ dnevniavaz.ba
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class DnevniAvaz(BasicNewsRecipe):
title = 'Dnevni Avaz'
@ -25,17 +26,18 @@ class DnevniAvaz(BasicNewsRecipe):
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
lang = 'bs-BA'
language = _('Bosnian')
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'id':['fullarticle-title','fullarticle-leading','fullarticle-date','fullarticle-text','articleauthor']})]
@ -47,9 +49,20 @@ class DnevniAvaz(BasicNewsRecipe):
,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno')
]
def replace_tagname(self,soup,tagname,tagid,newtagname):
headtag = soup.find(tagname,attrs={'id':tagid})
if headtag:
headtag.name = newtagname
return
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="bs-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
return soup
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
self.replace_tagname(soup,'div','fullarticle-title' ,'h1')
self.replace_tagname(soup,'div','fullarticle-leading','h3')
self.replace_tagname(soup,'div','fullarticle-date' ,'h5')
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ dnevnik.hr
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class DnevnikCro(BasicNewsRecipe):
title = 'Dnevnik - Hr'
@ -22,19 +23,18 @@ class DnevnikCro(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
language = _('Croatian')
lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -51,10 +51,24 @@ class DnevnikCro(BasicNewsRecipe):
feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR'
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ e-novine.com
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class E_novine(BasicNewsRecipe):
title = 'E-Novine'
@ -16,23 +17,22 @@ class E_novine(BasicNewsRecipe):
description = 'News from Serbia'
publisher = 'E-novine'
category = 'news, politics, Balcans'
oldest_article = 1
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'cp1250'
cover_url = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
lang = 'sr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -43,10 +43,10 @@ class E_novine(BasicNewsRecipe):
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME'
soup.html['lang'] = 'sr-Latn-ME'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
soup.head.insert(0,mtag)
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
for item in soup.findAll(style=True):
del item['style']
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})

View File

@ -9,6 +9,7 @@ glassrpske.com
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class GlasSrpske(BasicNewsRecipe):
title = 'Glas Srpske'
@ -21,7 +22,6 @@ class GlasSrpske(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
cover_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
lang = 'sr-BA'
language = _('Serbian')
@ -29,13 +29,13 @@ class GlasSrpske(BasicNewsRecipe):
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -64,8 +64,8 @@ class GlasSrpske(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Language" content="sr-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
return soup
def parse_index(self):

View File

@ -24,13 +24,13 @@ class HRT(BasicNewsRecipe):
lang = 'hr-HR'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -8,32 +8,32 @@ jutarnji.hr
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Jutarnji(BasicNewsRecipe):
title = u'Jutarnji'
__author__ = u'Darko Miletic'
description = u'Hrvatski portal'
title = 'Jutarnji'
__author__ = 'Darko Miletic'
description = 'Hrvatski portal'
publisher = 'Jutarnji.hr'
category = 'news, politics, Croatia'
oldest_article = 1
oldest_article = 2
max_articles_per_feed = 100
simultaneous_downloads = 2
delay = 1
language = _('Croatian')
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'cp1250'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .vijestnaslov{font-size: x-large; font-weight: bold}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -59,11 +59,24 @@ class Jutarnji(BasicNewsRecipe):
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="hr-HR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(width=True):
del item['width']
return soup
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ nacional.hr
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class NacionalCro(BasicNewsRecipe):
title = 'Nacional - Hr'
@ -22,19 +23,20 @@ class NacionalCro(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
language = _('Croatian')
lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(name=['object','link','embed'])]
@ -42,9 +44,12 @@ class NacionalCro(BasicNewsRecipe):
feeds = [(u'Najnovije Vijesti', u'http://www.nacional.hr/rss')]
def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR'
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -26,21 +26,19 @@ class Nin(BasicNewsRecipe):
INDEX = PREFIX + '/?change_lang=ls'
LOGIN = PREFIX + '/?logout=true'
FEED = PREFIX + '/misc/rss.php?feed=RSS2.0'
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -74,12 +72,20 @@ class Nin(BasicNewsRecipe):
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
soup.head.insert(1,mcharset)
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup
def get_article_url(self, article):
raw = article.get('link', None)
return raw.replace('.co.yu','.co.rs')

View File

@ -8,30 +8,30 @@ novosti.rs
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Novosti(BasicNewsRecipe):
title = u'Vecernje Novosti'
__author__ = u'Darko Miletic'
description = u'Vesti'
title = 'Vecernje Novosti'
__author__ = 'Darko Miletic'
description = 'Vesti'
publisher = 'Kompanija Novosti'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
encoding = 'utf-8'
language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -41,8 +41,17 @@ class Novosti(BasicNewsRecipe):
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup

View File

@ -21,19 +21,18 @@ class Nspm(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
INDEX = 'http://www.nspm.rs/?alphabet=l'
encoding = 'utf8'
remove_javascript = True
encoding = 'utf-8'
language = _('Serbian')
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [
@ -51,28 +50,18 @@ class Nspm(BasicNewsRecipe):
def print_version(self, url):
return url.replace('.html','/stampa.html')
def cleanup_image_tags(self,soup):
for item in soup.findAll('img'):
for attrib in ['height','width','border','align']:
if item.has_key(attrib):
del item[attrib]
oldParent = item.parent
myIndex = oldParent.contents.index(item)
item.extract()
divtag = Tag(soup,'div')
brtag = Tag(soup,'br')
oldParent.insert(myIndex,divtag)
divtag.append(item)
divtag.append(brtag)
return soup
def preprocess_html(self, soup):
lng = 'sr-Latn-RS'
soup.html['xml:lang'] = lng
soup.html['lang'] = lng
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
if ftag:
ftag['content'] = lng
for item in soup.findAll(style=True):
del item['style']
return self.cleanup_image_tags(soup)
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return self.adeify_images(soup)

View File

@ -8,6 +8,7 @@ pescanik.net
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Pescanik(BasicNewsRecipe):
title = 'Pescanik'
@ -19,20 +20,18 @@ class Pescanik(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'utf8'
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
encoding = 'utf-8'
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .contentheading{font-size: x-large; font-weight: bold} .small{font-size: small} .createdate{font-size: x-small; font-weight: bold}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -40,18 +39,27 @@ class Pescanik(BasicNewsRecipe):
remove_tags = [
dict(name='td' , attrs={'class':'buttonheading'})
,dict(name='span', attrs={'class':'article_seperator'})
,dict(name=['object','link','img','h4','ul'])
,dict(name=['object','link','h4','ul'])
]
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
feeds = [(u'Pescanik Online', u'http://www.pescanik.net/index.php?option=com_rd_rss&id=12')]
def print_version(self, url):
nurl = url.replace('/index.php','/index2.php')
return nurl + '&pop=1&page=0'
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return self.adeify_images(soup)

View File

@ -19,22 +19,20 @@ class Pobjeda(BasicNewsRecipe):
publisher = 'Pobjeda a.d.'
category = 'news, politics, Montenegro'
no_stylesheets = True
remove_javascript = True
encoding = 'utf8'
remove_javascript = True
encoding = 'utf-8'
use_embedded_content = False
language = _('Serbian')
language = _('Montenegrin')
lang = 'sr-Latn-Me'
INDEX = u'http://www.pobjeda.co.me'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -1,15 +1,16 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
politika.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Politika(BasicNewsRecipe):
title = u'Politika Online'
title = 'Politika Online'
__author__ = 'Darko Miletic'
description = 'Najstariji dnevni list na Balkanu'
publisher = 'Politika novine i Magazini d.o.o'
@ -21,16 +22,18 @@ class Politika(BasicNewsRecipe):
remove_javascript = True
encoding = 'utf8'
language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -55,11 +58,13 @@ class Politika(BasicNewsRecipe):
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
for item in soup.findAll(style=True):
del item['style']
ftag = soup.find('div',attrs={'class':'content_center_border'})
if ftag.has_key('align'):
del ftag['align']
return soup
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ pressonline.rs
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class PressOnline(BasicNewsRecipe):
title = 'Press Online'
@ -19,20 +20,21 @@ class PressOnline(BasicNewsRecipe):
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf8'
encoding = 'utf-8'
use_embedded_content = True
cover_url = 'http://www.pressonline.rs/img/logo.gif'
language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -57,10 +59,8 @@ class PressOnline(BasicNewsRecipe):
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS'
soup.html['lang'] = 'sr-Latn-RS'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
for img in soup.findAll('img', align=True):
del img['align']
return soup
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
return self.adeify_images(soup)

View File

@ -24,13 +24,13 @@ class RTS(BasicNewsRecipe):
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
spiegel.de
'''
@ -9,21 +9,25 @@ spiegel.de
from calibre.web.feeds.news import BasicNewsRecipe
class Spiegel_int(BasicNewsRecipe):
title = u'Spiegel Online International'
title = 'Spiegel Online International'
__author__ = 'Darko Miletic'
description = "News and POV from Europe's largest newsmagazine"
oldest_article = 7
max_articles_per_feed = 100
language = _('English')
language = _('English')
no_stylesheets = True
use_embedded_content = False
cover_url = 'http://www.spiegel.de/static/sys/v8/headlines/spiegelonline.gif'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, politics, Germany'
, '--publisher', 'SPIEGEL ONLINE GmbH'
]
publisher = 'SPIEGEL ONLINE GmbH'
category = 'news, politics, Germany'
lang = 'en'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : lang
,'publisher' : publisher
,'pretty_print': True
}
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})

View File

@ -7,6 +7,7 @@ tanjug.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Tanjug(BasicNewsRecipe):
title = 'Tanjug'
@ -14,21 +15,22 @@ class Tanjug(BasicNewsRecipe):
description = 'Novinska agencija TANJUG - Dnevne vesti iz Srbije i sveta'
publisher = 'Tanjug'
category = 'news, politics, Serbia'
oldest_article = 1
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = True
encoding = 'utf-8'
lang = 'sr-Latn-RS'
language = _('Serbian')
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -37,7 +39,7 @@ class Tanjug(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang' ] = self.lang
soup.html['dir' ] = "ltr"
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
soup.head.insert(0,mtag)
return soup
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
return self.adeify_images(soup)

View File

@ -20,14 +20,15 @@ class Twitchfilm(BasicNewsRecipe):
publisher = 'Twitch'
category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
language = _('English')
lang = 'en-US'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
remove_tags = [dict(name='div', attrs={'class':'feedflare'})]
@ -36,6 +37,6 @@ class Twitchfilm(BasicNewsRecipe):
def preprocess_html(self, soup):
mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
soup.head.insert(0,mtag)
soup.html['lang'] = 'en-US'
return soup
soup.html['lang'] = self.lang
return self.adeify_images(soup)

View File

@ -9,6 +9,7 @@ www.vecernji.hr
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class VecernjiList(BasicNewsRecipe):
title = 'Vecernji List'
@ -18,23 +19,23 @@ class VecernjiList(BasicNewsRecipe):
category = 'news, politics, Croatia'
oldest_article = 2
max_articles_per_feed = 100
delay = 4
delay = 1
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
language = _('Croatian')
lang = 'hr-HR'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -46,13 +47,16 @@ class VecernjiList(BasicNewsRecipe):
feeds = [(u'Vijesti', u'http://www.vecernji.hr/rss/')]
def preprocess_html(self, soup):
soup.html['lang'] = 'hr-HR'
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)
def print_version(self, url):
return url.replace('/index.do','/print.do')
artid = url.rpartition('-')[2]
return 'http://www.vecernji.hr/index.php?cmd=show_clanak&action=print_popup&clanak_id='+artid

View File

@ -20,22 +20,19 @@ class Vijesti(BasicNewsRecipe):
oldest_article = 2
max_articles_per_feed = 150
no_stylesheets = True
remove_javascript = True
encoding = 'cp1250'
cover_url = 'http://www.vijesti.me/img/logo.gif'
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
language = _('Montenegrin')
lang ='sr-Latn-Me'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -22,22 +22,20 @@ class Vreme(BasicNewsRecipe):
needs_subscription = True
INDEX = 'http://www.vreme.com'
LOGIN = 'http://www.vreme.com/account/login.php?url=%2F'
remove_javascript = True
use_embedded_content = False
encoding = 'utf-8'
language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} '
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} '
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -84,12 +82,21 @@ class Vreme(BasicNewsRecipe):
del soup.body['text' ]
del soup.body['bgcolor']
del soup.body['onload' ]
for item in soup.findAll(face=True):
del item['face']
for item in soup.findAll(size=True):
del item['size']
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)