mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update all Serbian and Croatin recipes to work with calibre 0.6
This commit is contained in:
parent
1ae3724038
commit
fd2888af18
@ -9,6 +9,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class Cro24Sata(BasicNewsRecipe):
|
class Cro24Sata(BasicNewsRecipe):
|
||||||
title = '24 Sata - Hr'
|
title = '24 Sata - Hr'
|
||||||
@ -22,18 +23,18 @@ class Cro24Sata(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
|
||||||
language = _('Croatian')
|
language = _('Croatian')
|
||||||
|
lang = 'hr-HR'
|
||||||
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -45,9 +46,11 @@ class Cro24Sata(BasicNewsRecipe):
|
|||||||
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = 'hr-HR'
|
soup.html['lang'] = self.lang
|
||||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mtag)
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
|
soup.head.insert(0,mlang)
|
||||||
|
soup.head.insert(1,mcharset)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
@ -17,53 +17,51 @@ class Ser24Sata(BasicNewsRecipe):
|
|||||||
description = '24 sata portal vesti iz Srbije'
|
description = '24 sata portal vesti iz Srbije'
|
||||||
publisher = 'Ringier d.o.o.'
|
publisher = 'Ringier d.o.o.'
|
||||||
category = 'news, politics, entertainment, Serbia'
|
category = 'news, politics, entertainment, Serbia'
|
||||||
oldest_article = 1
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
|
lang = 'sr-Latn-RS'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
||||||
|
|
||||||
def cleanup_image_tags(self,soup):
|
|
||||||
for item in soup.findAll('img'):
|
|
||||||
for attrib in ['height','width','border','align']:
|
|
||||||
if item.has_key(attrib):
|
|
||||||
del item[attrib]
|
|
||||||
oldParent = item.parent
|
|
||||||
myIndex = oldParent.contents.index(item)
|
|
||||||
item.extract()
|
|
||||||
divtag = Tag(soup,'div')
|
|
||||||
brtag = Tag(soup,'br')
|
|
||||||
oldParent.insert(myIndex,divtag)
|
|
||||||
divtag.append(item)
|
|
||||||
divtag.append(brtag)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = 'sr-Latn-RS'
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = 'sr-Latn-RS'
|
soup.html['lang'] = self.lang
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
|
||||||
soup.head.insert(0,mtag)
|
attribs = [ 'style','font','valign'
|
||||||
return self.cleanup_image_tags(soup)
|
,'colspan','width','height'
|
||||||
|
,'rowspan','summary','align'
|
||||||
|
,'cellspacing','cellpadding'
|
||||||
|
,'frames','rules','border'
|
||||||
|
]
|
||||||
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
|
|
||||||
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
|
soup.head.insert(0,mlang)
|
||||||
|
soup.head.insert(1,mcharset)
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
article, sep, rest = url.partition('#')
|
article = url.partition('#')[0]
|
||||||
article_base, sep2, article_id = article.partition('id=')
|
article_id = article.partition('id=')[2]
|
||||||
return 'http://www.24sata.co.rs/_print.php?id=' + article_id
|
return 'http://www.24sata.rs/_print.php?id=' + article_id
|
||||||
|
|
||||||
|
@ -14,23 +14,21 @@ class B92(BasicNewsRecipe):
|
|||||||
description = 'Dnevne vesti iz Srbije i sveta'
|
description = 'Dnevne vesti iz Srbije i sveta'
|
||||||
publisher = 'B92'
|
publisher = 'B92'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 1
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
|
lang = 'sr-Latn-RS'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
}
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"'
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -39,6 +37,7 @@ class B92(BasicNewsRecipe):
|
|||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='ul', attrs={'class':'comment-nav'})
|
dict(name='ul', attrs={'class':'comment-nav'})
|
||||||
,dict(name=['embed','link','base'] )
|
,dict(name=['embed','link','base'] )
|
||||||
|
,dict(name='div', attrs={'class':'udokum'} )
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -51,14 +50,19 @@ class B92(BasicNewsRecipe):
|
|||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
del soup.body['onload']
|
del soup.body['onload']
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
|
|
||||||
soup.head.insert(0,mtag)
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll(align=True):
|
|
||||||
del item['align']
|
|
||||||
for item in soup.findAll('font'):
|
for item in soup.findAll('font'):
|
||||||
item.name='p'
|
item.name='div'
|
||||||
if item.has_key('size'):
|
if item.has_key('size'):
|
||||||
del item['size']
|
del item['size']
|
||||||
|
attribs = [ 'style','font','valign'
|
||||||
|
,'colspan','width','height'
|
||||||
|
,'rowspan','summary','align'
|
||||||
|
,'cellspacing','cellpadding'
|
||||||
|
,'frames','rules','border'
|
||||||
|
]
|
||||||
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
return soup
|
return soup
|
||||||
|
@ -26,15 +26,13 @@ class Blic(BasicNewsRecipe):
|
|||||||
lang = 'sr-Latn-RS'
|
lang = 'sr-Latn-RS'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment' , description
|
'comment' : description
|
||||||
, '--category' , category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
}
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
|
||||||
@ -44,14 +42,21 @@ class Blic(BasicNewsRecipe):
|
|||||||
remove_tags = [dict(name=['object','link'])]
|
remove_tags = [dict(name=['object','link'])]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
start_url, question, rest_url = url.partition('?')
|
rest_url = url.partition('?')[2]
|
||||||
return u'http://www.blic.rs/_print.php?' + rest_url
|
return u'http://www.blic.rs/_print.php?' + rest_url
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
attribs = [ 'style','font','valign'
|
||||||
soup.head.insert(0,mlang)
|
,'colspan','width','height'
|
||||||
for item in soup.findAll(style=True):
|
,'rowspan','summary','align'
|
||||||
del item['style']
|
,'cellspacing','cellpadding'
|
||||||
|
,'frames','rules','border'
|
||||||
|
]
|
||||||
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
|
@ -17,24 +17,23 @@ class Borba(BasicNewsRecipe):
|
|||||||
publisher = 'IP Novine Borba'
|
publisher = 'IP Novine Borba'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
oldest_article = 1
|
lang = _('sr-Latn-RS')
|
||||||
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf-8'
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
|
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
|
||||||
INDEX = u'http://www.borba.rs/'
|
INDEX = u'http://www.borba.rs/'
|
||||||
extra_css = '@font-face {font-family: "serif0";src:url(res:///Data/FONT/serif0.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif0, serif1, serif} .article_description{font-family: serif0, serif1, serif}'
|
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -60,14 +59,17 @@ class Borba(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = 'sr-Latn-ME'
|
attribs = [ 'style','font','valign'
|
||||||
soup.html['lang'] = 'sr-Latn-ME'
|
,'colspan','width','height'
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
|
,'rowspan','summary','align'
|
||||||
soup.head.insert(0,mtag)
|
,'cellspacing','cellpadding'
|
||||||
for item in soup.findAll(style=True):
|
,'frames','rules','border'
|
||||||
del item['style']
|
]
|
||||||
for item in soup.findAll(font=True):
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
del item['font']
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
@ -7,9 +7,10 @@ danas.rs
|
|||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class Danas(BasicNewsRecipe):
|
class Danas(BasicNewsRecipe):
|
||||||
title = u'Danas'
|
title = 'Danas'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Vesti'
|
description = 'Vesti'
|
||||||
publisher = 'Danas d.o.o.'
|
publisher = 'Danas d.o.o.'
|
||||||
@ -17,19 +18,19 @@ class Danas(BasicNewsRecipe):
|
|||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
|
lang = 'sr-Latn-RS'
|
||||||
|
direction = 'ltr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment' , description
|
'comment' : description
|
||||||
, '--category' , category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
@ -44,8 +45,17 @@ class Danas(BasicNewsRecipe):
|
|||||||
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mlang)
|
||||||
for item in soup.findAll(style=True):
|
attribs = [ 'style','font','valign'
|
||||||
del item['style']
|
,'colspan','width','height'
|
||||||
|
,'rowspan','summary','align'
|
||||||
|
,'cellspacing','cellpadding'
|
||||||
|
,'frames','rules','border'
|
||||||
|
]
|
||||||
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
return soup
|
return soup
|
@ -9,6 +9,7 @@ dnevniavaz.ba
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class DnevniAvaz(BasicNewsRecipe):
|
class DnevniAvaz(BasicNewsRecipe):
|
||||||
title = 'Dnevni Avaz'
|
title = 'Dnevni Avaz'
|
||||||
@ -25,17 +26,18 @@ class DnevniAvaz(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
|
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
|
||||||
lang = 'bs-BA'
|
lang = 'bs-BA'
|
||||||
language = _('Bosnian')
|
language = _('Bosnian')
|
||||||
|
direction = 'ltr'
|
||||||
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['fullarticle-title','fullarticle-leading','fullarticle-date','fullarticle-text','articleauthor']})]
|
keep_only_tags = [dict(name='div', attrs={'id':['fullarticle-title','fullarticle-leading','fullarticle-date','fullarticle-text','articleauthor']})]
|
||||||
@ -47,9 +49,20 @@ class DnevniAvaz(BasicNewsRecipe):
|
|||||||
,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno')
|
,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def replace_tagname(self,soup,tagname,tagid,newtagname):
|
||||||
|
headtag = soup.find(tagname,attrs={'id':tagid})
|
||||||
|
if headtag:
|
||||||
|
headtag.name = newtagname
|
||||||
|
return
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mtag = '<meta http-equiv="Content-Language" content="bs-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mtag)
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
return soup
|
soup.head.insert(0,mlang)
|
||||||
|
soup.head.insert(1,mcharset)
|
||||||
|
self.replace_tagname(soup,'div','fullarticle-title' ,'h1')
|
||||||
|
self.replace_tagname(soup,'div','fullarticle-leading','h3')
|
||||||
|
self.replace_tagname(soup,'div','fullarticle-date' ,'h5')
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
@ -9,6 +9,7 @@ dnevnik.hr
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class DnevnikCro(BasicNewsRecipe):
|
class DnevnikCro(BasicNewsRecipe):
|
||||||
title = 'Dnevnik - Hr'
|
title = 'Dnevnik - Hr'
|
||||||
@ -22,19 +23,18 @@ class DnevnikCro(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
|
||||||
language = _('Croatian')
|
language = _('Croatian')
|
||||||
|
lang = 'hr-HR'
|
||||||
|
direction = 'ltr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -51,10 +51,24 @@ class DnevnikCro(BasicNewsRecipe):
|
|||||||
feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
|
feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = 'hr-HR'
|
soup.html['lang'] = self.lang
|
||||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
soup.html['dir' ] = self.direction
|
||||||
soup.head.insert(0,mtag)
|
|
||||||
for item in soup.findAll(style=True):
|
attribs = [ 'style','font','valign'
|
||||||
del item['style']
|
,'colspan','width','height'
|
||||||
return soup
|
,'rowspan','summary','align'
|
||||||
|
,'cellspacing','cellpadding'
|
||||||
|
,'frames','rules','border'
|
||||||
|
]
|
||||||
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
|
|
||||||
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
|
soup.head.insert(0,mlang)
|
||||||
|
soup.head.insert(1,mcharset)
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ e-novine.com
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class E_novine(BasicNewsRecipe):
|
class E_novine(BasicNewsRecipe):
|
||||||
title = 'E-Novine'
|
title = 'E-Novine'
|
||||||
@ -16,23 +17,22 @@ class E_novine(BasicNewsRecipe):
|
|||||||
description = 'News from Serbia'
|
description = 'News from Serbia'
|
||||||
publisher = 'E-novine'
|
publisher = 'E-novine'
|
||||||
category = 'news, politics, Balcans'
|
category = 'news, politics, Balcans'
|
||||||
oldest_article = 1
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
cover_url = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
|
lang = 'sr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -43,10 +43,10 @@ class E_novine(BasicNewsRecipe):
|
|||||||
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
|
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = 'sr-Latn-ME'
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = 'sr-Latn-ME'
|
soup.html['lang'] = self.lang
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mlang)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
|
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
|
||||||
|
@ -9,6 +9,7 @@ glassrpske.com
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class GlasSrpske(BasicNewsRecipe):
|
class GlasSrpske(BasicNewsRecipe):
|
||||||
title = 'Glas Srpske'
|
title = 'Glas Srpske'
|
||||||
@ -21,7 +22,6 @@ class GlasSrpske(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
|
||||||
cover_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
|
cover_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
|
||||||
lang = 'sr-BA'
|
lang = 'sr-BA'
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
@ -29,13 +29,13 @@ class GlasSrpske(BasicNewsRecipe):
|
|||||||
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -64,8 +64,8 @@ class GlasSrpske(BasicNewsRecipe):
|
|||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-BA"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mlang)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
@ -24,13 +24,13 @@ class HRT(BasicNewsRecipe):
|
|||||||
lang = 'hr-HR'
|
lang = 'hr-HR'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
}
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
@ -8,32 +8,32 @@ jutarnji.hr
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class Jutarnji(BasicNewsRecipe):
|
class Jutarnji(BasicNewsRecipe):
|
||||||
title = u'Jutarnji'
|
title = 'Jutarnji'
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = u'Hrvatski portal'
|
description = 'Hrvatski portal'
|
||||||
publisher = 'Jutarnji.hr'
|
publisher = 'Jutarnji.hr'
|
||||||
category = 'news, politics, Croatia'
|
category = 'news, politics, Croatia'
|
||||||
oldest_article = 1
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
simultaneous_downloads = 2
|
|
||||||
delay = 1
|
delay = 1
|
||||||
language = _('Croatian')
|
language = _('Croatian')
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
lang = 'hr-HR'
|
||||||
|
direction = 'ltr'
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .vijestnaslov{font-size: x-large; font-weight: bold}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment' , description
|
'comment' : description
|
||||||
, '--category' , category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
@ -59,11 +59,24 @@ class Jutarnji(BasicNewsRecipe):
|
|||||||
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
|
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="hr-HR"/>'
|
soup.html['lang'] = self.lang
|
||||||
soup.head.insert(0,mtag)
|
soup.html['dir' ] = self.direction
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
attribs = [ 'style','font','valign'
|
||||||
for item in soup.findAll(width=True):
|
,'colspan','width','height'
|
||||||
del item['width']
|
,'rowspan','summary','align'
|
||||||
return soup
|
,'cellspacing','cellpadding'
|
||||||
|
,'frames','rules','border'
|
||||||
|
]
|
||||||
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
|
|
||||||
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
|
soup.head.insert(0,mlang)
|
||||||
|
soup.head.insert(1,mcharset)
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
@ -9,6 +9,7 @@ nacional.hr
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class NacionalCro(BasicNewsRecipe):
|
class NacionalCro(BasicNewsRecipe):
|
||||||
title = 'Nacional - Hr'
|
title = 'Nacional - Hr'
|
||||||
@ -22,19 +23,20 @@ class NacionalCro(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
|
||||||
language = _('Croatian')
|
language = _('Croatian')
|
||||||
|
lang = 'hr-HR'
|
||||||
|
direction = 'ltr'
|
||||||
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link','embed'])]
|
remove_tags = [dict(name=['object','link','embed'])]
|
||||||
@ -42,9 +44,12 @@ class NacionalCro(BasicNewsRecipe):
|
|||||||
feeds = [(u'Najnovije Vijesti', u'http://www.nacional.hr/rss')]
|
feeds = [(u'Najnovije Vijesti', u'http://www.nacional.hr/rss')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = 'hr-HR'
|
soup.html['lang'] = self.lang
|
||||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
soup.html['dir' ] = self.direction
|
||||||
soup.head.insert(0,mtag)
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
|
soup.head.insert(0,mlang)
|
||||||
|
soup.head.insert(1,mcharset)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
@ -26,21 +26,19 @@ class Nin(BasicNewsRecipe):
|
|||||||
INDEX = PREFIX + '/?change_lang=ls'
|
INDEX = PREFIX + '/?change_lang=ls'
|
||||||
LOGIN = PREFIX + '/?logout=true'
|
LOGIN = PREFIX + '/?logout=true'
|
||||||
FEED = PREFIX + '/misc/rss.php?feed=RSS2.0'
|
FEED = PREFIX + '/misc/rss.php?feed=RSS2.0'
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
lang = 'sr-Latn-RS'
|
lang = 'sr-Latn-RS'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment' , description
|
'comment' : description
|
||||||
, '--category' , category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -74,12 +72,20 @@ class Nin(BasicNewsRecipe):
|
|||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
for item in soup.findAll(style=True):
|
attribs = [ 'style','font','valign'
|
||||||
del item['style']
|
,'colspan','width','height'
|
||||||
|
,'rowspan','summary','align'
|
||||||
|
,'cellspacing','cellpadding'
|
||||||
|
,'frames','rules','border'
|
||||||
|
]
|
||||||
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
raw = article.get('link', None)
|
raw = article.get('link', None)
|
||||||
return raw.replace('.co.yu','.co.rs')
|
return raw.replace('.co.yu','.co.rs')
|
||||||
|
|
@ -8,30 +8,30 @@ novosti.rs
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class Novosti(BasicNewsRecipe):
|
class Novosti(BasicNewsRecipe):
|
||||||
title = u'Vecernje Novosti'
|
title = 'Vecernje Novosti'
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = u'Vesti'
|
description = 'Vesti'
|
||||||
publisher = 'Kompanija Novosti'
|
publisher = 'Kompanija Novosti'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf-8'
|
||||||
remove_javascript = True
|
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
|
lang = 'sr-Latn-RS'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment' , description
|
'comment' : description
|
||||||
, '--category' , category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -41,8 +41,17 @@ class Novosti(BasicNewsRecipe):
|
|||||||
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
|
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mlang)
|
||||||
for item in soup.findAll(style=True):
|
attribs = [ 'style','font','valign'
|
||||||
del item['style']
|
,'colspan','width','height'
|
||||||
|
,'rowspan','summary','align'
|
||||||
|
,'cellspacing','cellpadding'
|
||||||
|
,'frames','rules','border'
|
||||||
|
]
|
||||||
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
return soup
|
return soup
|
||||||
|
@ -21,19 +21,18 @@ class Nspm(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
||||||
encoding = 'utf8'
|
encoding = 'utf-8'
|
||||||
remove_javascript = True
|
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
|
lang = 'sr-Latn-RS'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment' , description
|
'comment' : description
|
||||||
, '--category' , category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -51,28 +50,18 @@ class Nspm(BasicNewsRecipe):
|
|||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('.html','/stampa.html')
|
return url.replace('.html','/stampa.html')
|
||||||
|
|
||||||
def cleanup_image_tags(self,soup):
|
|
||||||
for item in soup.findAll('img'):
|
|
||||||
for attrib in ['height','width','border','align']:
|
|
||||||
if item.has_key(attrib):
|
|
||||||
del item[attrib]
|
|
||||||
oldParent = item.parent
|
|
||||||
myIndex = oldParent.contents.index(item)
|
|
||||||
item.extract()
|
|
||||||
divtag = Tag(soup,'div')
|
|
||||||
brtag = Tag(soup,'br')
|
|
||||||
oldParent.insert(myIndex,divtag)
|
|
||||||
divtag.append(item)
|
|
||||||
divtag.append(brtag)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
lng = 'sr-Latn-RS'
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['xml:lang'] = lng
|
soup.html['lang'] = self.lang
|
||||||
soup.html['lang'] = lng
|
attribs = [ 'style','font','valign'
|
||||||
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
|
,'colspan','width','height'
|
||||||
if ftag:
|
,'rowspan','summary','align'
|
||||||
ftag['content'] = lng
|
,'cellspacing','cellpadding'
|
||||||
for item in soup.findAll(style=True):
|
,'frames','rules','border'
|
||||||
del item['style']
|
]
|
||||||
return self.cleanup_image_tags(soup)
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
@ -8,6 +8,7 @@ pescanik.net
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class Pescanik(BasicNewsRecipe):
|
class Pescanik(BasicNewsRecipe):
|
||||||
title = 'Pescanik'
|
title = 'Pescanik'
|
||||||
@ -19,20 +20,18 @@ class Pescanik(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
encoding = 'utf-8'
|
||||||
encoding = 'utf8'
|
|
||||||
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
|
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
lang = 'sr-Latn-RS'
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .contentheading{font-size: x-large; font-weight: bold} .small{font-size: small} .createdate{font-size: x-small; font-weight: bold}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment' , description
|
'comment' : description
|
||||||
, '--category' , category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
@ -40,18 +39,27 @@ class Pescanik(BasicNewsRecipe):
|
|||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='td' , attrs={'class':'buttonheading'})
|
dict(name='td' , attrs={'class':'buttonheading'})
|
||||||
,dict(name='span', attrs={'class':'article_seperator'})
|
,dict(name='span', attrs={'class':'article_seperator'})
|
||||||
,dict(name=['object','link','img','h4','ul'])
|
,dict(name=['object','link','h4','ul'])
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
|
feeds = [(u'Pescanik Online', u'http://www.pescanik.net/index.php?option=com_rd_rss&id=12')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
nurl = url.replace('/index.php','/index2.php')
|
nurl = url.replace('/index.php','/index2.php')
|
||||||
return nurl + '&pop=1&page=0'
|
return nurl + '&pop=1&page=0'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mlang)
|
||||||
for item in soup.findAll(style=True):
|
attribs = [ 'style','font','valign'
|
||||||
del item['style']
|
,'colspan','width','height'
|
||||||
return soup
|
,'rowspan','summary','align'
|
||||||
|
,'cellspacing','cellpadding'
|
||||||
|
,'frames','rules','border'
|
||||||
|
]
|
||||||
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
@ -19,22 +19,20 @@ class Pobjeda(BasicNewsRecipe):
|
|||||||
publisher = 'Pobjeda a.d.'
|
publisher = 'Pobjeda a.d.'
|
||||||
category = 'news, politics, Montenegro'
|
category = 'news, politics, Montenegro'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
encoding = 'utf-8'
|
||||||
encoding = 'utf8'
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = _('Serbian')
|
language = _('Montenegrin')
|
||||||
lang = 'sr-Latn-Me'
|
lang = 'sr-Latn-Me'
|
||||||
INDEX = u'http://www.pobjeda.co.me'
|
INDEX = u'http://www.pobjeda.co.me'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
}
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
@ -1,15 +1,16 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
politika.rs
|
politika.rs
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class Politika(BasicNewsRecipe):
|
class Politika(BasicNewsRecipe):
|
||||||
title = u'Politika Online'
|
title = 'Politika Online'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Najstariji dnevni list na Balkanu'
|
description = 'Najstariji dnevni list na Balkanu'
|
||||||
publisher = 'Politika novine i Magazini d.o.o'
|
publisher = 'Politika novine i Magazini d.o.o'
|
||||||
@ -21,16 +22,18 @@ class Politika(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
|
lang = 'sr-Latn-RS'
|
||||||
|
direction = 'ltr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -55,11 +58,13 @@ class Politika(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
soup.html['lang'] = self.lang
|
||||||
soup.head.insert(0,mtag)
|
soup.html['dir' ] = self.direction
|
||||||
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
|
soup.head.insert(0,mlang)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
ftag = soup.find('div',attrs={'class':'content_center_border'})
|
ftag = soup.find('div',attrs={'class':'content_center_border'})
|
||||||
if ftag.has_key('align'):
|
if ftag.has_key('align'):
|
||||||
del ftag['align']
|
del ftag['align']
|
||||||
return soup
|
return self.adeify_images(soup)
|
||||||
|
@ -9,6 +9,7 @@ pressonline.rs
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class PressOnline(BasicNewsRecipe):
|
class PressOnline(BasicNewsRecipe):
|
||||||
title = 'Press Online'
|
title = 'Press Online'
|
||||||
@ -19,20 +20,21 @@ class PressOnline(BasicNewsRecipe):
|
|||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
cover_url = 'http://www.pressonline.rs/img/logo.gif'
|
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
|
lang = 'sr-Latn-RS'
|
||||||
|
direction = 'ltr'
|
||||||
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -57,10 +59,8 @@ class PressOnline(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = 'sr-Latn-RS'
|
soup.html['lang'] = self.lang
|
||||||
soup.html['lang'] = 'sr-Latn-RS'
|
soup.html['dir' ] = self.direction
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mlang)
|
||||||
for img in soup.findAll('img', align=True):
|
return self.adeify_images(soup)
|
||||||
del img['align']
|
|
||||||
return soup
|
|
@ -24,13 +24,13 @@ class RTS(BasicNewsRecipe):
|
|||||||
lang = 'sr-Latn-RS'
|
lang = 'sr-Latn-RS'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
}
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
spiegel.de
|
spiegel.de
|
||||||
'''
|
'''
|
||||||
@ -9,21 +9,25 @@ spiegel.de
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Spiegel_int(BasicNewsRecipe):
|
class Spiegel_int(BasicNewsRecipe):
|
||||||
title = u'Spiegel Online International'
|
title = 'Spiegel Online International'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = "News and POV from Europe's largest newsmagazine"
|
description = "News and POV from Europe's largest newsmagazine"
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = _('English')
|
language = _('English')
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
cover_url = 'http://www.spiegel.de/static/sys/v8/headlines/spiegelonline.gif'
|
publisher = 'SPIEGEL ONLINE GmbH'
|
||||||
html2lrf_options = [
|
category = 'news, politics, Germany'
|
||||||
'--comment', description
|
lang = 'en'
|
||||||
, '--base-font-size', '10'
|
|
||||||
, '--category', 'news, politics, Germany'
|
conversion_options = {
|
||||||
, '--publisher', 'SPIEGEL ONLINE GmbH'
|
'comments' : description
|
||||||
]
|
,'tags' : category
|
||||||
|
,'language' : lang
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'pretty_print': True
|
||||||
|
}
|
||||||
|
|
||||||
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
|
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ tanjug.rs
|
|||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class Tanjug(BasicNewsRecipe):
|
class Tanjug(BasicNewsRecipe):
|
||||||
title = 'Tanjug'
|
title = 'Tanjug'
|
||||||
@ -14,21 +15,22 @@ class Tanjug(BasicNewsRecipe):
|
|||||||
description = 'Novinska agencija TANJUG - Dnevne vesti iz Srbije i sveta'
|
description = 'Novinska agencija TANJUG - Dnevne vesti iz Srbije i sveta'
|
||||||
publisher = 'Tanjug'
|
publisher = 'Tanjug'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 1
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
lang = 'sr-Latn-RS'
|
lang = 'sr-Latn-RS'
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
|
direction = 'ltr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"'
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -37,7 +39,7 @@ class Tanjug(BasicNewsRecipe):
|
|||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang' ] = self.lang
|
soup.html['lang' ] = self.lang
|
||||||
soup.html['dir' ] = "ltr"
|
soup.html['dir' ] = self.direction
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mlang)
|
||||||
return soup
|
return self.adeify_images(soup)
|
||||||
|
@ -20,14 +20,15 @@ class Twitchfilm(BasicNewsRecipe):
|
|||||||
publisher = 'Twitch'
|
publisher = 'Twitch'
|
||||||
category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
|
category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
|
||||||
language = _('English')
|
language = _('English')
|
||||||
|
lang = 'en-US'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
}
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class':'feedflare'})]
|
remove_tags = [dict(name='div', attrs={'class':'feedflare'})]
|
||||||
|
|
||||||
@ -36,6 +37,6 @@ class Twitchfilm(BasicNewsRecipe):
|
|||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
|
mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
soup.html['lang'] = 'en-US'
|
soup.html['lang'] = self.lang
|
||||||
return soup
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ www.vecernji.hr
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class VecernjiList(BasicNewsRecipe):
|
class VecernjiList(BasicNewsRecipe):
|
||||||
title = 'Vecernji List'
|
title = 'Vecernji List'
|
||||||
@ -18,23 +19,23 @@ class VecernjiList(BasicNewsRecipe):
|
|||||||
category = 'news, politics, Croatia'
|
category = 'news, politics, Croatia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
delay = 4
|
delay = 1
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
|
||||||
language = _('Croatian')
|
language = _('Croatian')
|
||||||
|
lang = 'hr-HR'
|
||||||
|
direction = 'ltr'
|
||||||
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -46,13 +47,16 @@ class VecernjiList(BasicNewsRecipe):
|
|||||||
feeds = [(u'Vijesti', u'http://www.vecernji.hr/rss/')]
|
feeds = [(u'Vijesti', u'http://www.vecernji.hr/rss/')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = 'hr-HR'
|
soup.html['lang'] = self.lang
|
||||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
soup.html['dir' ] = self.direction
|
||||||
soup.head.insert(0,mtag)
|
|
||||||
for item in soup.findAll(style=True):
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
del item['style']
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
return soup
|
soup.head.insert(0,mlang)
|
||||||
|
soup.head.insert(1,mcharset)
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/index.do','/print.do')
|
artid = url.rpartition('-')[2]
|
||||||
|
return 'http://www.vecernji.hr/index.php?cmd=show_clanak&action=print_popup&clanak_id='+artid
|
||||||
|
|
@ -20,22 +20,19 @@ class Vijesti(BasicNewsRecipe):
|
|||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 150
|
max_articles_per_feed = 150
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
cover_url = 'http://www.vijesti.me/img/logo.gif'
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = _('Serbian')
|
language = _('Montenegrin')
|
||||||
lang ='sr-Latn-Me'
|
lang ='sr-Latn-Me'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : lang
|
||||||
|
, 'pretty_print' : True
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
@ -22,22 +22,20 @@ class Vreme(BasicNewsRecipe):
|
|||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
INDEX = 'http://www.vreme.com'
|
INDEX = 'http://www.vreme.com'
|
||||||
LOGIN = 'http://www.vreme.com/account/login.php?url=%2F'
|
LOGIN = 'http://www.vreme.com/account/login.php?url=%2F'
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = _('Serbian')
|
language = _('Serbian')
|
||||||
lang = 'sr-Latn-RS'
|
lang = 'sr-Latn-RS'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} '
|
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} '
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment' , description
|
'comment' : description
|
||||||
, '--category' , category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
, '--ignore-tables'
|
, 'language' : lang
|
||||||
]
|
, 'pretty_print' : True
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
@ -84,12 +82,21 @@ class Vreme(BasicNewsRecipe):
|
|||||||
del soup.body['text' ]
|
del soup.body['text' ]
|
||||||
del soup.body['bgcolor']
|
del soup.body['bgcolor']
|
||||||
del soup.body['onload' ]
|
del soup.body['onload' ]
|
||||||
for item in soup.findAll(face=True):
|
|
||||||
del item['face']
|
|
||||||
for item in soup.findAll(size=True):
|
|
||||||
del item['size']
|
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
soup.html['dir' ] = self.direction
|
soup.html['dir' ] = self.direction
|
||||||
|
|
||||||
|
attribs = [ 'style','font','valign'
|
||||||
|
,'colspan','width','height'
|
||||||
|
,'rowspan','summary','align'
|
||||||
|
,'cellspacing','cellpadding'
|
||||||
|
,'frames','rules','border'
|
||||||
|
]
|
||||||
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
|
item.name = 'div'
|
||||||
|
for attrib in attribs:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
|
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user