mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement #1843 (Various updated recipes for better EPUB support)
This commit is contained in:
parent
2ecb5f82c8
commit
578cc310c2
@ -1,13 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
b92.net
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class B92(BasicNewsRecipe):
|
||||
@ -22,19 +21,22 @@ class B92(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://static.b92.net/images/fp/logo.gif'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [
|
||||
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
|
||||
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
|
||||
@ -54,9 +56,10 @@ class B92(BasicNewsRecipe):
|
||||
return nurl
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = 'sr-Latn'
|
||||
soup.html['lang'] = 'sr-Latn'
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||
lng = 'sr-Latn-RS'
|
||||
soup.html['xml:lang'] = lng
|
||||
soup.html['lang'] = lng
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
@ -64,4 +67,3 @@ class B92(BasicNewsRecipe):
|
||||
del item['align']
|
||||
item.insert(0,'<br /><br />')
|
||||
return soup
|
||||
language = _('Serbian')
|
@ -1,13 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
blic.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Blic(BasicNewsRecipe):
|
||||
@ -21,15 +20,17 @@ class Blic(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -44,10 +45,9 @@ class Blic(BasicNewsRecipe):
|
||||
return u'http://www.blic.rs/_print.php?' + rest_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -1,12 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
danas.rs
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
@ -20,15 +19,17 @@ class Danas(BasicNewsRecipe):
|
||||
no_stylesheets = False
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
@ -43,9 +44,8 @@ class Danas(BasicNewsRecipe):
|
||||
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
language = _('Serbian')
|
@ -5,7 +5,6 @@ __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elargentino.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElArgentino(BasicNewsRecipe):
|
||||
@ -21,6 +20,7 @@ class ElArgentino(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
|
||||
language = _('Spanish')
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
@ -59,5 +59,3 @@ class ElArgentino(BasicNewsRecipe):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -1,14 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
granma.cubaweb.cu
|
||||
'''
|
||||
import urllib
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Granma(BasicNewsRecipe):
|
||||
title = 'Diario Granma'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -21,6 +19,7 @@ class Granma(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
|
||||
language = _('Spanish')
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
@ -30,10 +29,12 @@ class Granma(BasicNewsRecipe):
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [dict(name='table', attrs={'height':'466'})]
|
||||
|
||||
remove_tags = [dict(name=['embed','link','object'])]
|
||||
|
||||
feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )]
|
||||
|
||||
|
||||
@ -49,4 +50,3 @@ class Granma(BasicNewsRecipe):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -14,10 +14,11 @@ class Infobae(BasicNewsRecipe):
|
||||
description = 'Informacion Libre las 24 horas'
|
||||
publisher = 'Infobae.com'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = _('Spanish')
|
||||
encoding = 'iso-8859-1'
|
||||
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
|
||||
remove_javascript = True
|
||||
@ -26,9 +27,15 @@ class Infobae(BasicNewsRecipe):
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['embed','link','object'])
|
||||
,dict(name='a', attrs={'onclick':'javascript:window.print()'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
|
||||
@ -48,5 +55,3 @@ class Infobae(BasicNewsRecipe):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -1,13 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
jutarnji.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Jutarnji(BasicNewsRecipe):
|
||||
@ -16,32 +15,32 @@ class Jutarnji(BasicNewsRecipe):
|
||||
description = u'Hrvatski portal'
|
||||
publisher = 'Jutarnji.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 1
|
||||
simultaneous_downloads = 2
|
||||
delay = 1
|
||||
language = _('Croatian')
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
encoding = 'cp1250'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='embed')
|
||||
dict(name=['embed','hr','link','object'])
|
||||
,dict(name='a', attrs={'class':'a11'})
|
||||
,dict(name='hr')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
@ -60,9 +59,7 @@ class Jutarnji(BasicNewsRecipe):
|
||||
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
mtag = '<meta http-equiv="Content-Language" content="hr"/>'
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="hr-HR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
|
@ -1,12 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
juventudrebelde.cu
|
||||
'''
|
||||
from calibre import strftime
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Juventudrebelde(BasicNewsRecipe):
|
||||
@ -20,6 +20,7 @@ class Juventudrebelde(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
language = _('Spanish')
|
||||
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
|
||||
remove_javascript = True
|
||||
|
||||
@ -30,7 +31,7 @@ class Juventudrebelde(BasicNewsRecipe):
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||
|
||||
@ -51,4 +52,3 @@ class Juventudrebelde(BasicNewsRecipe):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -1,13 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
nin.co.yu
|
||||
'''
|
||||
|
||||
import re, urllib
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Nin(BasicNewsRecipe):
|
||||
@ -27,15 +26,17 @@ class Nin(BasicNewsRecipe):
|
||||
LOGIN = PREFIX + '/?logout=true'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -69,5 +70,3 @@ class Nin(BasicNewsRecipe):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -1,13 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
novosti.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Novosti(BasicNewsRecipe):
|
||||
@ -22,15 +21,17 @@ class Novosti(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -40,10 +41,8 @@ class Novosti(BasicNewsRecipe):
|
||||
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -1,13 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
nspm.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Nspm(BasicNewsRecipe):
|
||||
@ -16,14 +15,15 @@ class Nspm(BasicNewsRecipe):
|
||||
description = 'Casopis za politicku teoriju i drustvena istrazivanja'
|
||||
publisher = 'NSPM'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 7
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
@ -32,10 +32,13 @@ class Nspm(BasicNewsRecipe):
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
remove_tags = [dict(name='a')]
|
||||
remove_tags = [
|
||||
dict(name=['a','img','link','object','embed'])
|
||||
,dict(name='td', attrs={'class':'buttonheading'})
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
@ -48,13 +51,12 @@ class Nspm(BasicNewsRecipe):
|
||||
return url.replace('.html','/stampa.html')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = 'sr-Latn-RS'
|
||||
soup.html['lang'] = 'sr-Latn-RS'
|
||||
lng = 'sr-Latn-RS'
|
||||
soup.html['xml:lang'] = lng
|
||||
soup.html['lang'] = lng
|
||||
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
|
||||
if ftag:
|
||||
ftag['content'] = 'sr-Latn-RS'
|
||||
ftag['content'] = lng
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -1,13 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
pescanik.net
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Pescanik(BasicNewsRecipe):
|
||||
@ -16,30 +15,32 @@ class Pescanik(BasicNewsRecipe):
|
||||
description = 'Pescanik'
|
||||
publisher = 'Pescanik'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 7
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
encoding = 'utf8'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='td' , attrs={'class':'buttonheading'})
|
||||
,dict(name='span', attrs={'class':'article_seperator'})
|
||||
,dict(name=['object','link'])
|
||||
,dict(name=['object','link','img','h4','ul'])
|
||||
]
|
||||
|
||||
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
|
||||
@ -54,5 +55,3 @@ class Pescanik(BasicNewsRecipe):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -6,7 +6,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
politika.rs
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Politika(BasicNewsRecipe):
|
||||
@ -16,13 +15,13 @@ class Politika(BasicNewsRecipe):
|
||||
publisher = 'Politika novine i Magazini d.o.o'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
language = _('Serbian')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
encoding = 'utf8'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
@ -61,6 +60,6 @@ class Politika(BasicNewsRecipe):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
ftag = soup.find('div',attrs={'class':'content_center_border'})
|
||||
if ftag:
|
||||
ftag['align'] = 'left'
|
||||
if ftag.has_key('align'):
|
||||
del ftag['align']
|
||||
return soup
|
||||
|
@ -4,11 +4,10 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
vijesti.cg.yu
|
||||
vijesti.me
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Vijesti(BasicNewsRecipe):
|
||||
@ -22,10 +21,11 @@ class Vijesti(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'cp1250'
|
||||
cover_url = 'http://www.vijesti.cg.yu/img/logo.gif'
|
||||
cover_url = 'http://www.vijesti.me/img/logo.gif'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
@ -39,12 +39,9 @@ class Vijesti(BasicNewsRecipe):
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'align':'right'})
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
remove_tags = [dict(name=['object','link','embed'])]
|
||||
|
||||
feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )]
|
||||
feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss.php' )]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = 'sr-Latn-ME'
|
||||
@ -56,5 +53,3 @@ class Vijesti(BasicNewsRecipe):
|
||||
del item['align']
|
||||
item.insert(0,'<br /><br />')
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -1,14 +1,13 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
vreme.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre import strftime
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Vreme(BasicNewsRecipe):
|
||||
@ -24,15 +23,17 @@ class Vreme(BasicNewsRecipe):
|
||||
LOGIN = 'http://www.vreme.com/account/index.php'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
@ -87,7 +88,12 @@ class Vreme(BasicNewsRecipe):
|
||||
del soup.body['text' ]
|
||||
del soup.body['bgcolor']
|
||||
del soup.body['onload' ]
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||
for item in soup.findAll('table'):
|
||||
if item.has_key('width'):
|
||||
del item['width']
|
||||
if item.has_key('height'):
|
||||
del item['height']
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
tbl = soup.body.table
|
||||
tbbb = soup.find('td')
|
||||
@ -104,5 +110,3 @@ class Vreme(BasicNewsRecipe):
|
||||
if cover_item:
|
||||
cover_url = self.INDEX + cover_item['src']
|
||||
return cover_url
|
||||
|
||||
language = _('Serbian')
|
Loading…
x
Reference in New Issue
Block a user