Implement #1843 (Various updated recipes for better EPUB support)

This commit is contained in:
Kovid Goyal 2009-02-13 10:36:34 -08:00
parent 2ecb5f82c8
commit 578cc310c2
15 changed files with 160 additions and 161 deletions

View File

@ -1,13 +1,12 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
b92.net
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class B92(BasicNewsRecipe):
@ -22,19 +21,22 @@ class B92(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
cover_url = 'http://static.b92.net/images/fp/logo.gif'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
@ -54,9 +56,10 @@ class B92(BasicNewsRecipe):
return nurl
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn'
soup.html['lang'] = 'sr-Latn'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
lng = 'sr-Latn-RS'
soup.html['xml:lang'] = lng
soup.html['lang'] = lng
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
@ -64,4 +67,3 @@ class B92(BasicNewsRecipe):
del item['align']
item.insert(0,'<br /><br />')
return soup
language = _('Serbian')

View File

@ -1,13 +1,12 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
blic.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Blic(BasicNewsRecipe):
@ -21,15 +20,17 @@ class Blic(BasicNewsRecipe):
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -44,10 +45,9 @@ class Blic(BasicNewsRecipe):
return u'http://www.blic.rs/_print.php?' + rest_url
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -1,12 +1,11 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
danas.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Danas(BasicNewsRecipe):
@ -20,15 +19,17 @@ class Danas(BasicNewsRecipe):
no_stylesheets = False
remove_javascript = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -43,9 +44,8 @@ class Danas(BasicNewsRecipe):
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -5,7 +5,6 @@ __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
elargentino.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElArgentino(BasicNewsRecipe):
@ -21,6 +20,7 @@ class ElArgentino(BasicNewsRecipe):
use_embedded_content = False
encoding = 'utf8'
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
language = _('Spanish')
html2lrf_options = [
'--comment', description
@ -59,5 +59,3 @@ class ElArgentino(BasicNewsRecipe):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,14 +1,12 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
granma.cubaweb.cu
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Granma(BasicNewsRecipe):
title = 'Diario Granma'
__author__ = 'Darko Miletic'
@ -21,19 +19,22 @@ class Granma(BasicNewsRecipe):
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
language = _('Spanish')
remove_javascript = True
html2lrf_options = [
'--comment', description
, '--category', category
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [dict(name='table', attrs={'height':'466'})]
remove_tags = [dict(name=['embed','link','object'])]
feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )]
@ -49,4 +50,3 @@ class Granma(BasicNewsRecipe):
del item['style']
return soup
language = _('Spanish')

View File

@ -14,21 +14,28 @@ class Infobae(BasicNewsRecipe):
description = 'Informacion Libre las 24 horas'
publisher = 'Infobae.com'
category = 'news, politics, Argentina'
oldest_article = 2
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = _('Spanish')
encoding = 'iso-8859-1'
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
remove_javascript = True
html2lrf_options = [
'--comment', description
, '--category', category
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
remove_tags = [
dict(name=['embed','link','object'])
,dict(name='a', attrs={'onclick':'javascript:window.print()'})
]
feeds = [
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
@ -48,5 +55,3 @@ class Infobae(BasicNewsRecipe):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,13 +1,12 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
jutarnji.hr
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Jutarnji(BasicNewsRecipe):
@ -16,32 +15,32 @@ class Jutarnji(BasicNewsRecipe):
description = u'Hrvatski portal'
publisher = 'Jutarnji.hr'
category = 'news, politics, Croatia'
oldest_article = 2
oldest_article = 1
max_articles_per_feed = 100
simultaneous_downloads = 1
simultaneous_downloads = 2
delay = 1
language = _('Croatian')
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'cp1250'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [
dict(name='embed')
dict(name=['embed','hr','link','object'])
,dict(name='a', attrs={'class':'a11'})
,dict(name='hr')
]
feeds = [
@ -60,9 +59,7 @@ class Jutarnji(BasicNewsRecipe):
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
mtag = '<meta http-equiv="Content-Language" content="hr"/>'
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="hr-HR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']

View File

@ -1,12 +1,12 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
juventudrebelde.cu
'''
from calibre import strftime
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Juventudrebelde(BasicNewsRecipe):
@ -20,17 +20,18 @@ class Juventudrebelde(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
language = _('Spanish')
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
remove_javascript = True
html2lrf_options = [
'--comment', description
, '--category', category
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
@ -51,4 +52,3 @@ class Juventudrebelde(BasicNewsRecipe):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,13 +1,12 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
nin.co.yu
'''
import re, urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Nin(BasicNewsRecipe):
@ -27,15 +26,17 @@ class Nin(BasicNewsRecipe):
LOGIN = PREFIX + '/?logout=true'
remove_javascript = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -69,5 +70,3 @@ class Nin(BasicNewsRecipe):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -1,13 +1,12 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
novosti.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Novosti(BasicNewsRecipe):
@ -22,15 +21,17 @@ class Novosti(BasicNewsRecipe):
use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -40,10 +41,8 @@ class Novosti(BasicNewsRecipe):
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -1,13 +1,12 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
nspm.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Nspm(BasicNewsRecipe):
@ -16,26 +15,30 @@ class Nspm(BasicNewsRecipe):
description = 'Casopis za politicku teoriju i drustvena istrazivanja'
publisher = 'NSPM'
category = 'news, politics, Serbia'
oldest_article = 7
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
INDEX = 'http://www.nspm.rs/?alphabet=l'
encoding = 'utf8'
remove_javascript = True
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(name='a')]
remove_tags = [
dict(name=['a','img','link','object','embed'])
,dict(name='td', attrs={'class':'buttonheading'})
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
@ -48,13 +51,12 @@ class Nspm(BasicNewsRecipe):
return url.replace('.html','/stampa.html')
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS'
soup.html['lang'] = 'sr-Latn-RS'
lng = 'sr-Latn-RS'
soup.html['xml:lang'] = lng
soup.html['lang'] = lng
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
if ftag:
ftag['content'] = 'sr-Latn-RS'
ftag['content'] = lng
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -1,13 +1,12 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
pescanik.net
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Pescanik(BasicNewsRecipe):
@ -16,30 +15,32 @@ class Pescanik(BasicNewsRecipe):
description = 'Pescanik'
publisher = 'Pescanik'
category = 'news, politics, Serbia'
oldest_article = 7
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'utf8'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [
dict(name='td' , attrs={'class':'buttonheading'})
,dict(name='span', attrs={'class':'article_seperator'})
,dict(name=['object','link'])
,dict(name=['object','link','img','h4','ul'])
]
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
@ -54,5 +55,3 @@ class Pescanik(BasicNewsRecipe):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -6,7 +6,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
politika.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Politika(BasicNewsRecipe):
@ -16,13 +15,13 @@ class Politika(BasicNewsRecipe):
publisher = 'Politika novine i Magazini d.o.o'
category = 'news, politics, Serbia'
oldest_article = 2
language = _('Serbian')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'utf8'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
@ -61,6 +60,6 @@ class Politika(BasicNewsRecipe):
for item in soup.findAll(style=True):
del item['style']
ftag = soup.find('div',attrs={'class':'content_center_border'})
if ftag:
ftag['align'] = 'left'
if ftag.has_key('align'):
del ftag['align']
return soup

View File

@ -4,11 +4,10 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
vijesti.cg.yu
vijesti.me
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Vijesti(BasicNewsRecipe):
@ -22,10 +21,11 @@ class Vijesti(BasicNewsRecipe):
no_stylesheets = True
remove_javascript = True
encoding = 'cp1250'
cover_url = 'http://www.vijesti.cg.yu/img/logo.gif'
cover_url = 'http://www.vijesti.me/img/logo.gif'
remove_javascript = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
@ -39,12 +39,9 @@ class Vijesti(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})]
remove_tags = [
dict(name='div', attrs={'align':'right'})
,dict(name=['object','link'])
]
remove_tags = [dict(name=['object','link','embed'])]
feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )]
feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss.php' )]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME'
@ -56,5 +53,3 @@ class Vijesti(BasicNewsRecipe):
del item['align']
item.insert(0,'<br /><br />')
return soup
language = _('Serbian')

View File

@ -1,14 +1,13 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
vreme.com
'''
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Vreme(BasicNewsRecipe):
@ -24,15 +23,17 @@ class Vreme(BasicNewsRecipe):
LOGIN = 'http://www.vreme.com/account/index.php'
remove_javascript = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
'--comment' , description
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -87,7 +88,12 @@ class Vreme(BasicNewsRecipe):
del soup.body['text' ]
del soup.body['bgcolor']
del soup.body['onload' ]
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
for item in soup.findAll('table'):
if item.has_key('width'):
del item['width']
if item.has_key('height'):
del item['height']
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
tbl = soup.body.table
tbbb = soup.find('td')
@ -104,5 +110,3 @@ class Vreme(BasicNewsRecipe):
if cover_item:
cover_url = self.INDEX + cover_item['src']
return cover_url
language = _('Serbian')