mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update all Serbian and Croatin recipes to work with calibre 0.6
This commit is contained in:
parent
1ae3724038
commit
fd2888af18
@ -9,6 +9,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Cro24Sata(BasicNewsRecipe):
|
||||
title = '24 Sata - Hr'
|
||||
@ -22,18 +23,18 @@ class Cro24Sata(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
language = _('Croatian')
|
||||
lang = 'hr-HR'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -45,9 +46,11 @@ class Cro24Sata(BasicNewsRecipe):
|
||||
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = 'hr-HR'
|
||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -17,53 +17,51 @@ class Ser24Sata(BasicNewsRecipe):
|
||||
description = '24 sata portal vesti iz Srbije'
|
||||
publisher = 'Ringier d.o.o.'
|
||||
category = 'news, politics, entertainment, Serbia'
|
||||
oldest_article = 1
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
language = _('Serbian')
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
||||
|
||||
def cleanup_image_tags(self,soup):
|
||||
for item in soup.findAll('img'):
|
||||
for attrib in ['height','width','border','align']:
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
oldParent = item.parent
|
||||
myIndex = oldParent.contents.index(item)
|
||||
item.extract()
|
||||
divtag = Tag(soup,'div')
|
||||
brtag = Tag(soup,'br')
|
||||
oldParent.insert(myIndex,divtag)
|
||||
divtag.append(item)
|
||||
divtag.append(brtag)
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = 'sr-Latn-RS'
|
||||
soup.html['lang'] = 'sr-Latn-RS'
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
return self.cleanup_image_tags(soup)
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def print_version(self, url):
|
||||
article, sep, rest = url.partition('#')
|
||||
article_base, sep2, article_id = article.partition('id=')
|
||||
return 'http://www.24sata.co.rs/_print.php?id=' + article_id
|
||||
article = url.partition('#')[0]
|
||||
article_id = article.partition('id=')[2]
|
||||
return 'http://www.24sata.rs/_print.php?id=' + article_id
|
||||
|
||||
|
@ -14,23 +14,21 @@ class B92(BasicNewsRecipe):
|
||||
description = 'Dnevne vesti iz Srbije i sveta'
|
||||
publisher = 'B92'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 1
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
encoding = 'cp1250'
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -39,6 +37,7 @@ class B92(BasicNewsRecipe):
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'class':'comment-nav'})
|
||||
,dict(name=['embed','link','base'] )
|
||||
,dict(name='div', attrs={'class':'udokum'} )
|
||||
]
|
||||
|
||||
feeds = [
|
||||
@ -51,14 +50,19 @@ class B92(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
del soup.body['onload']
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(align=True):
|
||||
del item['align']
|
||||
for item in soup.findAll('font'):
|
||||
item.name='p'
|
||||
item.name='div'
|
||||
if item.has_key('size'):
|
||||
del item['size']
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
@ -26,14 +26,12 @@ class Blic(BasicNewsRecipe):
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -44,14 +42,21 @@ class Blic(BasicNewsRecipe):
|
||||
remove_tags = [dict(name=['object','link'])]
|
||||
|
||||
def print_version(self, url):
|
||||
start_url, question, rest_url = url.partition('?')
|
||||
rest_url = url.partition('?')[2]
|
||||
return u'http://www.blic.rs/_print.php?' + rest_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def get_article_url(self, article):
|
||||
|
@ -17,24 +17,23 @@ class Borba(BasicNewsRecipe):
|
||||
publisher = 'IP Novine Borba'
|
||||
category = 'news, politics, Serbia'
|
||||
language = _('Serbian')
|
||||
oldest_article = 1
|
||||
lang = _('sr-Latn-RS')
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
|
||||
INDEX = u'http://www.borba.rs/'
|
||||
extra_css = '@font-face {font-family: "serif0";src:url(res:///Data/FONT/serif0.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif0, serif1, serif} .article_description{font-family: serif0, serif1, serif}'
|
||||
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -60,14 +59,17 @@ class Borba(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = 'sr-Latn-ME'
|
||||
soup.html['lang'] = 'sr-Latn-ME'
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(font=True):
|
||||
del item['font']
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
|
@ -7,9 +7,10 @@ danas.rs
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = u'Danas'
|
||||
title = 'Danas'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Vesti'
|
||||
publisher = 'Danas d.o.o.'
|
||||
@ -17,19 +18,19 @@ class Danas(BasicNewsRecipe):
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
@ -44,8 +45,17 @@ class Danas(BasicNewsRecipe):
|
||||
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
@ -9,6 +9,7 @@ dnevniavaz.ba
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class DnevniAvaz(BasicNewsRecipe):
|
||||
title = 'Dnevni Avaz'
|
||||
@ -25,16 +26,17 @@ class DnevniAvaz(BasicNewsRecipe):
|
||||
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
|
||||
lang = 'bs-BA'
|
||||
language = _('Bosnian')
|
||||
direction = 'ltr'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -47,9 +49,20 @@ class DnevniAvaz(BasicNewsRecipe):
|
||||
,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno')
|
||||
]
|
||||
|
||||
def replace_tagname(self,soup,tagname,tagid,newtagname):
|
||||
headtag = soup.find(tagname,attrs={'id':tagid})
|
||||
if headtag:
|
||||
headtag.name = newtagname
|
||||
return
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Language" content="bs-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
self.replace_tagname(soup,'div','fullarticle-title' ,'h1')
|
||||
self.replace_tagname(soup,'div','fullarticle-leading','h3')
|
||||
self.replace_tagname(soup,'div','fullarticle-date' ,'h5')
|
||||
return self.adeify_images(soup)
|
||||
|
@ -9,6 +9,7 @@ dnevnik.hr
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class DnevnikCro(BasicNewsRecipe):
|
||||
title = 'Dnevnik - Hr'
|
||||
@ -22,19 +23,18 @@ class DnevnikCro(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
language = _('Croatian')
|
||||
|
||||
lang = 'hr-HR'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -51,10 +51,24 @@ class DnevnikCro(BasicNewsRecipe):
|
||||
feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = 'hr-HR'
|
||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
@ -9,6 +9,7 @@ e-novine.com
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class E_novine(BasicNewsRecipe):
|
||||
title = 'E-Novine'
|
||||
@ -16,23 +17,22 @@ class E_novine(BasicNewsRecipe):
|
||||
description = 'News from Serbia'
|
||||
publisher = 'E-novine'
|
||||
category = 'news, politics, Balcans'
|
||||
oldest_article = 1
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1250'
|
||||
cover_url = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
language = _('Serbian')
|
||||
lang = 'sr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -43,10 +43,10 @@ class E_novine(BasicNewsRecipe):
|
||||
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = 'sr-Latn-ME'
|
||||
soup.html['lang'] = 'sr-Latn-ME'
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
|
||||
|
@ -9,6 +9,7 @@ glassrpske.com
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class GlasSrpske(BasicNewsRecipe):
|
||||
title = 'Glas Srpske'
|
||||
@ -21,7 +22,6 @@ class GlasSrpske(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
|
||||
lang = 'sr-BA'
|
||||
language = _('Serbian')
|
||||
@ -29,13 +29,13 @@ class GlasSrpske(BasicNewsRecipe):
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -64,8 +64,8 @@ class GlasSrpske(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
|
@ -24,13 +24,13 @@ class HRT(BasicNewsRecipe):
|
||||
lang = 'hr-HR'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
@ -8,32 +8,32 @@ jutarnji.hr
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Jutarnji(BasicNewsRecipe):
|
||||
title = u'Jutarnji'
|
||||
__author__ = u'Darko Miletic'
|
||||
description = u'Hrvatski portal'
|
||||
title = 'Jutarnji'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Hrvatski portal'
|
||||
publisher = 'Jutarnji.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 1
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 2
|
||||
delay = 1
|
||||
language = _('Croatian')
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
encoding = 'cp1250'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
lang = 'hr-HR'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .vijestnaslov{font-size: x-large; font-weight: bold}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
@ -59,11 +59,24 @@ class Jutarnji(BasicNewsRecipe):
|
||||
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="hr-HR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(width=True):
|
||||
del item['width']
|
||||
return soup
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
@ -9,6 +9,7 @@ nacional.hr
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class NacionalCro(BasicNewsRecipe):
|
||||
title = 'Nacional - Hr'
|
||||
@ -22,18 +23,19 @@ class NacionalCro(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
language = _('Croatian')
|
||||
lang = 'hr-HR'
|
||||
direction = 'ltr'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -42,9 +44,12 @@ class NacionalCro(BasicNewsRecipe):
|
||||
feeds = [(u'Najnovije Vijesti', u'http://www.nacional.hr/rss')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = 'hr-HR'
|
||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -26,21 +26,19 @@ class Nin(BasicNewsRecipe):
|
||||
INDEX = PREFIX + '/?change_lang=ls'
|
||||
LOGIN = PREFIX + '/?logout=true'
|
||||
FEED = PREFIX + '/misc/rss.php?feed=RSS2.0'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -75,11 +73,19 @@ class Nin(BasicNewsRecipe):
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def get_article_url(self, article):
|
||||
raw = article.get('link', None)
|
||||
return raw.replace('.co.yu','.co.rs')
|
||||
|
@ -8,30 +8,30 @@ novosti.rs
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Novosti(BasicNewsRecipe):
|
||||
title = u'Vecernje Novosti'
|
||||
__author__ = u'Darko Miletic'
|
||||
description = u'Vesti'
|
||||
title = 'Vecernje Novosti'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Vesti'
|
||||
publisher = 'Kompanija Novosti'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -41,8 +41,17 @@ class Novosti(BasicNewsRecipe):
|
||||
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
@ -21,19 +21,18 @@ class Nspm(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
remove_tags = [
|
||||
@ -51,28 +50,18 @@ class Nspm(BasicNewsRecipe):
|
||||
def print_version(self, url):
|
||||
return url.replace('.html','/stampa.html')
|
||||
|
||||
def cleanup_image_tags(self,soup):
|
||||
for item in soup.findAll('img'):
|
||||
for attrib in ['height','width','border','align']:
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
oldParent = item.parent
|
||||
myIndex = oldParent.contents.index(item)
|
||||
item.extract()
|
||||
divtag = Tag(soup,'div')
|
||||
brtag = Tag(soup,'br')
|
||||
oldParent.insert(myIndex,divtag)
|
||||
divtag.append(item)
|
||||
divtag.append(brtag)
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
lng = 'sr-Latn-RS'
|
||||
soup.html['xml:lang'] = lng
|
||||
soup.html['lang'] = lng
|
||||
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
|
||||
if ftag:
|
||||
ftag['content'] = lng
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.cleanup_image_tags(soup)
|
||||
return self.adeify_images(soup)
|
||||
|
@ -8,6 +8,7 @@ pescanik.net
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Pescanik(BasicNewsRecipe):
|
||||
title = 'Pescanik'
|
||||
@ -19,20 +20,18 @@ class Pescanik(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
encoding = 'utf8'
|
||||
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
|
||||
encoding = 'utf-8'
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .contentheading{font-size: x-large; font-weight: bold} .small{font-size: small} .createdate{font-size: x-small; font-weight: bold}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
@ -40,18 +39,27 @@ class Pescanik(BasicNewsRecipe):
|
||||
remove_tags = [
|
||||
dict(name='td' , attrs={'class':'buttonheading'})
|
||||
,dict(name='span', attrs={'class':'article_seperator'})
|
||||
,dict(name=['object','link','img','h4','ul'])
|
||||
,dict(name=['object','link','h4','ul'])
|
||||
]
|
||||
|
||||
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
|
||||
feeds = [(u'Pescanik Online', u'http://www.pescanik.net/index.php?option=com_rd_rss&id=12')]
|
||||
|
||||
def print_version(self, url):
|
||||
nurl = url.replace('/index.php','/index2.php')
|
||||
return nurl + '&pop=1&page=0'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return self.adeify_images(soup)
|
||||
|
@ -19,22 +19,20 @@ class Pobjeda(BasicNewsRecipe):
|
||||
publisher = 'Pobjeda a.d.'
|
||||
category = 'news, politics, Montenegro'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = _('Serbian')
|
||||
language = _('Montenegrin')
|
||||
lang = 'sr-Latn-Me'
|
||||
INDEX = u'http://www.pobjeda.co.me'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
@ -1,15 +1,16 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
politika.rs
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Politika(BasicNewsRecipe):
|
||||
title = u'Politika Online'
|
||||
title = 'Politika Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Najstariji dnevni list na Balkanu'
|
||||
publisher = 'Politika novine i Magazini d.o.o'
|
||||
@ -21,15 +22,17 @@ class Politika(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
encoding = 'utf8'
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
@ -55,11 +58,13 @@ class Politika(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
ftag = soup.find('div',attrs={'class':'content_center_border'})
|
||||
if ftag.has_key('align'):
|
||||
del ftag['align']
|
||||
return soup
|
||||
return self.adeify_images(soup)
|
||||
|
@ -9,6 +9,7 @@ pressonline.rs
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class PressOnline(BasicNewsRecipe):
|
||||
title = 'Press Online'
|
||||
@ -19,20 +20,21 @@ class PressOnline(BasicNewsRecipe):
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = True
|
||||
cover_url = 'http://www.pressonline.rs/img/logo.gif'
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -57,10 +59,8 @@ class PressOnline(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = 'sr-Latn-RS'
|
||||
soup.html['lang'] = 'sr-Latn-RS'
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
for img in soup.findAll('img', align=True):
|
||||
del img['align']
|
||||
return soup
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
return self.adeify_images(soup)
|
@ -24,13 +24,13 @@ class RTS(BasicNewsRecipe):
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
spiegel.de
|
||||
'''
|
||||
@ -9,21 +9,25 @@ spiegel.de
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Spiegel_int(BasicNewsRecipe):
|
||||
title = u'Spiegel Online International'
|
||||
title = 'Spiegel Online International'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "News and POV from Europe's largest newsmagazine"
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = _('English')
|
||||
language = _('English')
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://www.spiegel.de/static/sys/v8/headlines/spiegelonline.gif'
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--base-font-size', '10'
|
||||
, '--category', 'news, politics, Germany'
|
||||
, '--publisher', 'SPIEGEL ONLINE GmbH'
|
||||
]
|
||||
publisher = 'SPIEGEL ONLINE GmbH'
|
||||
category = 'news, politics, Germany'
|
||||
lang = 'en'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : lang
|
||||
,'publisher' : publisher
|
||||
,'pretty_print': True
|
||||
}
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
|
||||
|
||||
|
@ -7,6 +7,7 @@ tanjug.rs
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Tanjug(BasicNewsRecipe):
|
||||
title = 'Tanjug'
|
||||
@ -14,21 +15,22 @@ class Tanjug(BasicNewsRecipe):
|
||||
description = 'Novinska agencija TANJUG - Dnevne vesti iz Srbije i sveta'
|
||||
publisher = 'Tanjug'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 1
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = True
|
||||
encoding = 'utf-8'
|
||||
lang = 'sr-Latn-RS'
|
||||
language = _('Serbian')
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -37,7 +39,7 @@ class Tanjug(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang' ] = self.lang
|
||||
soup.html['dir' ] = "ltr"
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
return self.adeify_images(soup)
|
||||
|
@ -20,14 +20,15 @@ class Twitchfilm(BasicNewsRecipe):
|
||||
publisher = 'Twitch'
|
||||
category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
|
||||
language = _('English')
|
||||
lang = 'en-US'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'feedflare'})]
|
||||
|
||||
@ -36,6 +37,6 @@ class Twitchfilm(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
|
||||
soup.head.insert(0,mtag)
|
||||
soup.html['lang'] = 'en-US'
|
||||
return soup
|
||||
soup.html['lang'] = self.lang
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
@ -9,6 +9,7 @@ www.vecernji.hr
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class VecernjiList(BasicNewsRecipe):
|
||||
title = 'Vecernji List'
|
||||
@ -18,23 +19,23 @@ class VecernjiList(BasicNewsRecipe):
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 4
|
||||
delay = 1
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
language = _('Croatian')
|
||||
lang = 'hr-HR'
|
||||
direction = 'ltr'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -46,13 +47,16 @@ class VecernjiList(BasicNewsRecipe):
|
||||
feeds = [(u'Vijesti', u'http://www.vecernji.hr/rss/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = 'hr-HR'
|
||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/index.do','/print.do')
|
||||
artid = url.rpartition('-')[2]
|
||||
return 'http://www.vecernji.hr/index.php?cmd=show_clanak&action=print_popup&clanak_id='+artid
|
||||
|
@ -20,22 +20,19 @@ class Vijesti(BasicNewsRecipe):
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 150
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'cp1250'
|
||||
cover_url = 'http://www.vijesti.me/img/logo.gif'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
language = _('Serbian')
|
||||
language = _('Montenegrin')
|
||||
lang ='sr-Latn-Me'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
|
@ -22,22 +22,20 @@ class Vreme(BasicNewsRecipe):
|
||||
needs_subscription = True
|
||||
INDEX = 'http://www.vreme.com'
|
||||
LOGIN = 'http://www.vreme.com/account/login.php?url=%2F'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} '
|
||||
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
@ -84,12 +82,21 @@ class Vreme(BasicNewsRecipe):
|
||||
del soup.body['text' ]
|
||||
del soup.body['bgcolor']
|
||||
del soup.body['onload' ]
|
||||
for item in soup.findAll(face=True):
|
||||
del item['face']
|
||||
for item in soup.findAll(size=True):
|
||||
del item['size']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
|
Loading…
x
Reference in New Issue
Block a user