Implement #1843 (Various updated recipes for better EPUB support)

This commit is contained in:
Kovid Goyal 2009-02-13 10:36:34 -08:00
parent 2ecb5f82c8
commit 578cc310c2
15 changed files with 160 additions and 161 deletions

View File

@ -1,15 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
b92.net b92.net
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class B92(BasicNewsRecipe): class B92(BasicNewsRecipe):
title = 'B92' title = 'B92'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -22,19 +21,22 @@ class B92(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
cover_url = 'http://static.b92.net/images/fp/logo.gif' cover_url = 'http://static.b92.net/images/fp/logo.gif'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ] keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [ feeds = [
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml') (u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' ) ,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
@ -54,9 +56,10 @@ class B92(BasicNewsRecipe):
return nurl return nurl
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn' lng = 'sr-Latn-RS'
soup.html['lang'] = 'sr-Latn' soup.html['xml:lang'] = lng
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>' soup.html['lang'] = lng
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
@ -64,4 +67,3 @@ class B92(BasicNewsRecipe):
del item['align'] del item['align']
item.insert(0,'<br /><br />') item.insert(0,'<br /><br />')
return soup return soup
language = _('Serbian')

View File

@ -1,15 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
blic.rs blic.rs
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Blic(BasicNewsRecipe): class Blic(BasicNewsRecipe):
title = u'Blic' title = u'Blic'
__author__ = u'Darko Miletic' __author__ = u'Darko Miletic'
@ -21,15 +20,17 @@ class Blic(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment' , description
, '--category', category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -44,10 +45,9 @@ class Blic(BasicNewsRecipe):
return u'http://www.blic.rs/_print.php?' + rest_url return u'http://www.blic.rs/_print.php?' + rest_url
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>' mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Serbian')

View File

@ -1,14 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
danas.rs danas.rs
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Danas(BasicNewsRecipe): class Danas(BasicNewsRecipe):
title = u'Danas' title = u'Danas'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -20,15 +19,17 @@ class Danas(BasicNewsRecipe):
no_stylesheets = False no_stylesheets = False
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment' , description
, '--category', category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -43,9 +44,8 @@ class Danas(BasicNewsRecipe):
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')] feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>' mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Serbian')

View File

@ -5,9 +5,8 @@ __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
elargentino.com elargentino.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class ElArgentino(BasicNewsRecipe): class ElArgentino(BasicNewsRecipe):
title = 'ElArgentino.com' title = 'ElArgentino.com'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -21,9 +20,10 @@ class ElArgentino(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png' cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
language = _('Spanish')
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--category', category , '--category', category
, '--publisher', publisher , '--publisher', publisher
] ]
@ -59,5 +59,3 @@ class ElArgentino(BasicNewsRecipe):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Spanish')

View File

@ -1,14 +1,12 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
granma.cubaweb.cu granma.cubaweb.cu
''' '''
import urllib import urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Granma(BasicNewsRecipe): class Granma(BasicNewsRecipe):
title = 'Diario Granma' title = 'Diario Granma'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -21,18 +19,21 @@ class Granma(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg' cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
language = _('Spanish')
remove_javascript = True remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment' , description
, '--category', category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables' , '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [dict(name='table', attrs={'height':'466'})] keep_only_tags = [dict(name='table', attrs={'height':'466'})]
remove_tags = [dict(name=['embed','link','object'])]
feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )] feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )]
@ -48,5 +49,4 @@ class Granma(BasicNewsRecipe):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Spanish')

View File

@ -6,29 +6,36 @@ __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
infobae.com infobae.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Infobae(BasicNewsRecipe): class Infobae(BasicNewsRecipe):
title = 'Infobae.com' title = 'Infobae.com'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas' description = 'Informacion Libre las 24 horas'
publisher = 'Infobae.com' publisher = 'Infobae.com'
category = 'news, politics, Argentina' category = 'news, politics, Argentina'
oldest_article = 2 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = _('Spanish')
encoding = 'iso-8859-1' encoding = 'iso-8859-1'
cover_url = 'http://www.infobae.com/imgs/header/header.gif' cover_url = 'http://www.infobae.com/imgs/header/header.gif'
remove_javascript = True remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment' , description
, '--category', category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
remove_tags = [
dict(name=['embed','link','object'])
,dict(name='a', attrs={'onclick':'javascript:window.print()'})
]
feeds = [ feeds = [
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' ) (u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
@ -48,5 +55,3 @@ class Infobae(BasicNewsRecipe):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Spanish')

View File

@ -1,47 +1,46 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
jutarnji.hr jutarnji.hr
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Jutarnji(BasicNewsRecipe): class Jutarnji(BasicNewsRecipe):
title = u'Jutarnji' title = u'Jutarnji'
__author__ = u'Darko Miletic' __author__ = u'Darko Miletic'
description = u'Hrvatski portal' description = u'Hrvatski portal'
publisher = 'Jutarnji.hr' publisher = 'Jutarnji.hr'
category = 'news, politics, Croatia' category = 'news, politics, Croatia'
oldest_article = 2 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
simultaneous_downloads = 1 simultaneous_downloads = 2
delay = 1 delay = 1
language = _('Croatian') language = _('Croatian')
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_javascript = True remove_javascript = True
encoding = 'cp1250' encoding = 'cp1250'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment' , description
, '--category', category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [ remove_tags = [
dict(name='embed') dict(name=['embed','hr','link','object'])
,dict(name='a', attrs={'class':'a11'}) ,dict(name='a', attrs={'class':'a11'})
,dict(name='hr')
] ]
feeds = [ feeds = [
@ -60,13 +59,11 @@ class Jutarnji(BasicNewsRecipe):
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="hr-HR"/>'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
mtag = '<meta http-equiv="Content-Language" content="hr"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
for item in soup.findAll(width=True): for item in soup.findAll(width=True):
del item['width'] del item['width']
return soup return soup

View File

@ -1,14 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
juventudrebelde.cu juventudrebelde.cu
''' '''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Juventudrebelde(BasicNewsRecipe): class Juventudrebelde(BasicNewsRecipe):
title = 'Juventud Rebelde' title = 'Juventud Rebelde'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -20,17 +20,18 @@ class Juventudrebelde(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
language = _('Spanish')
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg') cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
remove_javascript = True remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment' , description
, '--category', category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables' , '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})] keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
@ -50,5 +51,4 @@ class Juventudrebelde(BasicNewsRecipe):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Spanish')

View File

@ -1,15 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
nin.co.yu nin.co.yu
''' '''
import re, urllib import re, urllib
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Nin(BasicNewsRecipe): class Nin(BasicNewsRecipe):
title = 'NIN online' title = 'NIN online'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -27,15 +26,17 @@ class Nin(BasicNewsRecipe):
LOGIN = PREFIX + '/?logout=true' LOGIN = PREFIX + '/?logout=true'
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment' , description
, '--category', category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -69,5 +70,3 @@ class Nin(BasicNewsRecipe):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Serbian')

View File

@ -1,15 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
novosti.rs novosti.rs
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Novosti(BasicNewsRecipe): class Novosti(BasicNewsRecipe):
title = u'Vecernje Novosti' title = u'Vecernje Novosti'
__author__ = u'Darko Miletic' __author__ = u'Darko Miletic'
@ -22,15 +21,17 @@ class Novosti(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
remove_javascript = True remove_javascript = True
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment' , description
, '--category', category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -40,10 +41,8 @@ class Novosti(BasicNewsRecipe):
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')] feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>' mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Serbian')

View File

@ -1,41 +1,44 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
nspm.rs nspm.rs
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Nspm(BasicNewsRecipe): class Nspm(BasicNewsRecipe):
title = u'Nova srpska politicka misao' title = u'Nova srpska politicka misao'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Casopis za politicku teoriju i drustvena istrazivanja' description = 'Casopis za politicku teoriju i drustvena istrazivanja'
publisher = 'NSPM' publisher = 'NSPM'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
oldest_article = 7 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
INDEX = 'http://www.nspm.rs/?alphabet=l' INDEX = 'http://www.nspm.rs/?alphabet=l'
encoding = 'utf8' encoding = 'utf8'
remove_javascript = True remove_javascript = True
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment' , description
, '--category', category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables' , '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(name='a')] remove_tags = [
dict(name=['a','img','link','object','embed'])
,dict(name='td', attrs={'class':'buttonheading'})
]
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
@ -48,13 +51,12 @@ class Nspm(BasicNewsRecipe):
return url.replace('.html','/stampa.html') return url.replace('.html','/stampa.html')
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS' lng = 'sr-Latn-RS'
soup.html['lang'] = 'sr-Latn-RS' soup.html['xml:lang'] = lng
soup.html['lang'] = lng
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'}) ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
if ftag: if ftag:
ftag['content'] = 'sr-Latn-RS' ftag['content'] = lng
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Serbian')

View File

@ -1,45 +1,46 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
pescanik.net pescanik.net
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Pescanik(BasicNewsRecipe): class Pescanik(BasicNewsRecipe):
title = 'Pescanik' title = 'Pescanik'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Pescanik' description = 'Pescanik'
publisher = 'Pescanik' publisher = 'Pescanik'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
oldest_article = 7 oldest_article = 5
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_javascript = True remove_javascript = True
encoding = 'utf8' encoding = 'utf8'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment' , description
, '--category', category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [ remove_tags = [
dict(name='td' , attrs={'class':'buttonheading'}) dict(name='td' , attrs={'class':'buttonheading'})
,dict(name='span', attrs={'class':'article_seperator'}) ,dict(name='span', attrs={'class':'article_seperator'})
,dict(name=['object','link']) ,dict(name=['object','link','img','h4','ul'])
] ]
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')] feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
@ -54,5 +55,3 @@ class Pescanik(BasicNewsRecipe):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Serbian')

View File

@ -6,9 +6,8 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
politika.rs politika.rs
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Politika(BasicNewsRecipe): class Politika(BasicNewsRecipe):
title = u'Politika Online' title = u'Politika Online'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -16,16 +15,16 @@ class Politika(BasicNewsRecipe):
publisher = 'Politika novine i Magazini d.o.o' publisher = 'Politika novine i Magazini d.o.o'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
oldest_article = 2 oldest_article = 2
language = _('Serbian')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_javascript = True remove_javascript = True
encoding = 'utf8' encoding = 'utf8'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--category', category , '--category', category
, '--publisher', publisher , '--publisher', publisher
] ]
@ -61,6 +60,6 @@ class Politika(BasicNewsRecipe):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
ftag = soup.find('div',attrs={'class':'content_center_border'}) ftag = soup.find('div',attrs={'class':'content_center_border'})
if ftag: if ftag.has_key('align'):
ftag['align'] = 'left' del ftag['align']
return soup return soup

View File

@ -4,13 +4,12 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
vijesti.cg.yu vijesti.me
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Vijesti(BasicNewsRecipe): class Vijesti(BasicNewsRecipe):
title = 'Vijesti' title = 'Vijesti'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -22,13 +21,14 @@ class Vijesti(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
encoding = 'cp1250' encoding = 'cp1250'
cover_url = 'http://www.vijesti.cg.yu/img/logo.gif' cover_url = 'http://www.vijesti.me/img/logo.gif'
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--category', category , '--category', category
, '--publisher', publisher , '--publisher', publisher
] ]
@ -39,12 +39,9 @@ class Vijesti(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})] keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})]
remove_tags = [ remove_tags = [dict(name=['object','link','embed'])]
dict(name='div', attrs={'align':'right'})
,dict(name=['object','link'])
]
feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )] feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss.php' )]
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME' soup.html['xml:lang'] = 'sr-Latn-ME'
@ -56,5 +53,3 @@ class Vijesti(BasicNewsRecipe):
del item['align'] del item['align']
item.insert(0,'<br /><br />') item.insert(0,'<br /><br />')
return soup return soup
language = _('Serbian')

View File

@ -1,16 +1,15 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
vreme.com vreme.com
''' '''
import re import re
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Vreme(BasicNewsRecipe): class Vreme(BasicNewsRecipe):
title = 'Vreme' title = 'Vreme'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -24,15 +23,17 @@ class Vreme(BasicNewsRecipe):
LOGIN = 'http://www.vreme.com/account/index.php' LOGIN = 'http://www.vreme.com/account/index.php'
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment' , description
, '--category', category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -87,14 +88,19 @@ class Vreme(BasicNewsRecipe):
del soup.body['text' ] del soup.body['text' ]
del soup.body['bgcolor'] del soup.body['bgcolor']
del soup.body['onload' ] del soup.body['onload' ]
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>' for item in soup.findAll('table'):
if item.has_key('width'):
del item['width']
if item.has_key('height'):
del item['height']
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
tbl = soup.body.table tbl = soup.body.table
tbbb = soup.find('td') tbbb = soup.find('td')
if tbbb: if tbbb:
tbbb.extract() tbbb.extract()
tbl.extract() tbl.extract()
soup.body.insert(0,tbbb) soup.body.insert(0,tbbb)
return soup return soup
def get_cover_url(self): def get_cover_url(self):
@ -104,5 +110,3 @@ class Vreme(BasicNewsRecipe):
if cover_item: if cover_item:
cover_url = self.INDEX + cover_item['src'] cover_url = self.INDEX + cover_item['src']
return cover_url return cover_url
language = _('Serbian')