mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement #1843 (Various updated recipes for better EPUB support)
This commit is contained in:
parent
2ecb5f82c8
commit
578cc310c2
@ -1,15 +1,14 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
b92.net
|
b92.net
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class B92(BasicNewsRecipe):
|
class B92(BasicNewsRecipe):
|
||||||
title = 'B92'
|
title = 'B92'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -22,19 +21,22 @@ class B92(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
cover_url = 'http://static.b92.net/images/fp/logo.gif'
|
cover_url = 'http://static.b92.net/images/fp/logo.gif'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
language = _('Serbian')
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment' , description
|
||||||
|
, '--category' , category
|
||||||
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
|
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description
|
|
||||||
, '--category', category
|
|
||||||
, '--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
|
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
|
||||||
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
|
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
|
||||||
@ -54,9 +56,10 @@ class B92(BasicNewsRecipe):
|
|||||||
return nurl
|
return nurl
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = 'sr-Latn'
|
lng = 'sr-Latn-RS'
|
||||||
soup.html['lang'] = 'sr-Latn'
|
soup.html['xml:lang'] = lng
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
soup.html['lang'] = lng
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
@ -64,4 +67,3 @@ class B92(BasicNewsRecipe):
|
|||||||
del item['align']
|
del item['align']
|
||||||
item.insert(0,'<br /><br />')
|
item.insert(0,'<br /><br />')
|
||||||
return soup
|
return soup
|
||||||
language = _('Serbian')
|
|
@ -1,15 +1,14 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
blic.rs
|
blic.rs
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Blic(BasicNewsRecipe):
|
class Blic(BasicNewsRecipe):
|
||||||
title = u'Blic'
|
title = u'Blic'
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = u'Darko Miletic'
|
||||||
@ -21,15 +20,17 @@ class Blic(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
language = _('Serbian')
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment' , description
|
||||||
, '--category', category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -44,10 +45,9 @@ class Blic(BasicNewsRecipe):
|
|||||||
return u'http://www.blic.rs/_print.php?' + rest_url
|
return u'http://www.blic.rs/_print.php?' + rest_url
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Serbian')
|
|
@ -1,14 +1,13 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
danas.rs
|
danas.rs
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Danas(BasicNewsRecipe):
|
class Danas(BasicNewsRecipe):
|
||||||
title = u'Danas'
|
title = u'Danas'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -20,15 +19,17 @@ class Danas(BasicNewsRecipe):
|
|||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
language = _('Serbian')
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment' , description
|
||||||
, '--category', category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
@ -43,9 +44,8 @@ class Danas(BasicNewsRecipe):
|
|||||||
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
language = _('Serbian')
|
|
@ -5,9 +5,8 @@ __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
'''
|
'''
|
||||||
elargentino.com
|
elargentino.com
|
||||||
'''
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class ElArgentino(BasicNewsRecipe):
|
class ElArgentino(BasicNewsRecipe):
|
||||||
title = 'ElArgentino.com'
|
title = 'ElArgentino.com'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -21,9 +20,10 @@ class ElArgentino(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
|
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
|
||||||
|
language = _('Spanish')
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
@ -59,5 +59,3 @@ class ElArgentino(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Spanish')
|
|
@ -1,14 +1,12 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
granma.cubaweb.cu
|
granma.cubaweb.cu
|
||||||
'''
|
'''
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Granma(BasicNewsRecipe):
|
class Granma(BasicNewsRecipe):
|
||||||
title = 'Diario Granma'
|
title = 'Diario Granma'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -21,18 +19,21 @@ class Granma(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
|
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
|
||||||
|
language = _('Spanish')
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment' , description
|
||||||
, '--category', category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='table', attrs={'height':'466'})]
|
keep_only_tags = [dict(name='table', attrs={'height':'466'})]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['embed','link','object'])]
|
||||||
|
|
||||||
feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )]
|
feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )]
|
||||||
|
|
||||||
@ -48,5 +49,4 @@ class Granma(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Spanish')
|
|
@ -6,29 +6,36 @@ __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
infobae.com
|
infobae.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Infobae(BasicNewsRecipe):
|
class Infobae(BasicNewsRecipe):
|
||||||
title = 'Infobae.com'
|
title = 'Infobae.com'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Informacion Libre las 24 horas'
|
description = 'Informacion Libre las 24 horas'
|
||||||
publisher = 'Infobae.com'
|
publisher = 'Infobae.com'
|
||||||
category = 'news, politics, Argentina'
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
language = _('Spanish')
|
||||||
encoding = 'iso-8859-1'
|
encoding = 'iso-8859-1'
|
||||||
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
|
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment' , description
|
||||||
, '--category', category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['embed','link','object'])
|
||||||
|
,dict(name='a', attrs={'onclick':'javascript:window.print()'})
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
|
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
|
||||||
@ -48,5 +55,3 @@ class Infobae(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Spanish')
|
|
@ -1,47 +1,46 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
jutarnji.hr
|
jutarnji.hr
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Jutarnji(BasicNewsRecipe):
|
class Jutarnji(BasicNewsRecipe):
|
||||||
title = u'Jutarnji'
|
title = u'Jutarnji'
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = u'Darko Miletic'
|
||||||
description = u'Hrvatski portal'
|
description = u'Hrvatski portal'
|
||||||
publisher = 'Jutarnji.hr'
|
publisher = 'Jutarnji.hr'
|
||||||
category = 'news, politics, Croatia'
|
category = 'news, politics, Croatia'
|
||||||
oldest_article = 2
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 2
|
||||||
delay = 1
|
delay = 1
|
||||||
language = _('Croatian')
|
language = _('Croatian')
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment' , description
|
||||||
, '--category', category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='embed')
|
dict(name=['embed','hr','link','object'])
|
||||||
,dict(name='a', attrs={'class':'a11'})
|
,dict(name='a', attrs={'class':'a11'})
|
||||||
,dict(name='hr')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -60,13 +59,11 @@ class Jutarnji(BasicNewsRecipe):
|
|||||||
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
|
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="hr-HR"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
mtag = '<meta http-equiv="Content-Language" content="hr"/>'
|
|
||||||
soup.head.insert(0,mtag)
|
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
for item in soup.findAll(width=True):
|
for item in soup.findAll(width=True):
|
||||||
del item['width']
|
del item['width']
|
||||||
return soup
|
return soup
|
||||||
|
|
@ -1,14 +1,14 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
juventudrebelde.cu
|
juventudrebelde.cu
|
||||||
'''
|
'''
|
||||||
from calibre import strftime
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Juventudrebelde(BasicNewsRecipe):
|
class Juventudrebelde(BasicNewsRecipe):
|
||||||
title = 'Juventud Rebelde'
|
title = 'Juventud Rebelde'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -20,17 +20,18 @@ class Juventudrebelde(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
|
language = _('Spanish')
|
||||||
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
|
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment' , description
|
||||||
, '--category', category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||||
|
|
||||||
@ -50,5 +51,4 @@ class Juventudrebelde(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Spanish')
|
|
@ -1,15 +1,14 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
nin.co.yu
|
nin.co.yu
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re, urllib
|
import re, urllib
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Nin(BasicNewsRecipe):
|
class Nin(BasicNewsRecipe):
|
||||||
title = 'NIN online'
|
title = 'NIN online'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -27,15 +26,17 @@ class Nin(BasicNewsRecipe):
|
|||||||
LOGIN = PREFIX + '/?logout=true'
|
LOGIN = PREFIX + '/?logout=true'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
language = _('Serbian')
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment' , description
|
||||||
, '--category', category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -69,5 +70,3 @@ class Nin(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Serbian')
|
|
@ -1,15 +1,14 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
novosti.rs
|
novosti.rs
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Novosti(BasicNewsRecipe):
|
class Novosti(BasicNewsRecipe):
|
||||||
title = u'Vecernje Novosti'
|
title = u'Vecernje Novosti'
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = u'Darko Miletic'
|
||||||
@ -22,15 +21,17 @@ class Novosti(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
language = _('Serbian')
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment' , description
|
||||||
, '--category', category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -40,10 +41,8 @@ class Novosti(BasicNewsRecipe):
|
|||||||
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
|
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Serbian')
|
|
@ -1,41 +1,44 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
nspm.rs
|
nspm.rs
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Nspm(BasicNewsRecipe):
|
class Nspm(BasicNewsRecipe):
|
||||||
title = u'Nova srpska politicka misao'
|
title = u'Nova srpska politicka misao'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Casopis za politicku teoriju i drustvena istrazivanja'
|
description = 'Casopis za politicku teoriju i drustvena istrazivanja'
|
||||||
publisher = 'NSPM'
|
publisher = 'NSPM'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 7
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
language = _('Serbian')
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment' , description
|
||||||
, '--category', category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
remove_tags = [dict(name='a')]
|
remove_tags = [
|
||||||
|
dict(name=['a','img','link','object','embed'])
|
||||||
|
,dict(name='td', attrs={'class':'buttonheading'})
|
||||||
|
]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
@ -48,13 +51,12 @@ class Nspm(BasicNewsRecipe):
|
|||||||
return url.replace('.html','/stampa.html')
|
return url.replace('.html','/stampa.html')
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = 'sr-Latn-RS'
|
lng = 'sr-Latn-RS'
|
||||||
soup.html['lang'] = 'sr-Latn-RS'
|
soup.html['xml:lang'] = lng
|
||||||
|
soup.html['lang'] = lng
|
||||||
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
|
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
|
||||||
if ftag:
|
if ftag:
|
||||||
ftag['content'] = 'sr-Latn-RS'
|
ftag['content'] = lng
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Serbian')
|
|
@ -1,45 +1,46 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
pescanik.net
|
pescanik.net
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Pescanik(BasicNewsRecipe):
|
class Pescanik(BasicNewsRecipe):
|
||||||
title = 'Pescanik'
|
title = 'Pescanik'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Pescanik'
|
description = 'Pescanik'
|
||||||
publisher = 'Pescanik'
|
publisher = 'Pescanik'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 7
|
oldest_article = 5
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
|
||||||
|
language = _('Serbian')
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment' , description
|
||||||
, '--category', category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='td' , attrs={'class':'buttonheading'})
|
dict(name='td' , attrs={'class':'buttonheading'})
|
||||||
,dict(name='span', attrs={'class':'article_seperator'})
|
,dict(name='span', attrs={'class':'article_seperator'})
|
||||||
,dict(name=['object','link'])
|
,dict(name=['object','link','img','h4','ul'])
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
|
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
|
||||||
@ -54,5 +55,3 @@ class Pescanik(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Serbian')
|
|
@ -6,9 +6,8 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
politika.rs
|
politika.rs
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Politika(BasicNewsRecipe):
|
class Politika(BasicNewsRecipe):
|
||||||
title = u'Politika Online'
|
title = u'Politika Online'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -16,16 +15,16 @@ class Politika(BasicNewsRecipe):
|
|||||||
publisher = 'Politika novine i Magazini d.o.o'
|
publisher = 'Politika novine i Magazini d.o.o'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = _('Serbian')
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
language = _('Serbian')
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
@ -61,6 +60,6 @@ class Politika(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
ftag = soup.find('div',attrs={'class':'content_center_border'})
|
ftag = soup.find('div',attrs={'class':'content_center_border'})
|
||||||
if ftag:
|
if ftag.has_key('align'):
|
||||||
ftag['align'] = 'left'
|
del ftag['align']
|
||||||
return soup
|
return soup
|
||||||
|
@ -4,13 +4,12 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
vijesti.cg.yu
|
vijesti.me
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Vijesti(BasicNewsRecipe):
|
class Vijesti(BasicNewsRecipe):
|
||||||
title = 'Vijesti'
|
title = 'Vijesti'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -22,13 +21,14 @@ class Vijesti(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
cover_url = 'http://www.vijesti.cg.yu/img/logo.gif'
|
cover_url = 'http://www.vijesti.me/img/logo.gif'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
language = _('Serbian')
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
@ -39,12 +39,9 @@ class Vijesti(BasicNewsRecipe):
|
|||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [dict(name=['object','link','embed'])]
|
||||||
dict(name='div', attrs={'align':'right'})
|
|
||||||
,dict(name=['object','link'])
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )]
|
feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss.php' )]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = 'sr-Latn-ME'
|
soup.html['xml:lang'] = 'sr-Latn-ME'
|
||||||
@ -56,5 +53,3 @@ class Vijesti(BasicNewsRecipe):
|
|||||||
del item['align']
|
del item['align']
|
||||||
item.insert(0,'<br /><br />')
|
item.insert(0,'<br /><br />')
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Serbian')
|
|
@ -1,16 +1,15 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
vreme.com
|
vreme.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Vreme(BasicNewsRecipe):
|
class Vreme(BasicNewsRecipe):
|
||||||
title = 'Vreme'
|
title = 'Vreme'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -24,15 +23,17 @@ class Vreme(BasicNewsRecipe):
|
|||||||
LOGIN = 'http://www.vreme.com/account/index.php'
|
LOGIN = 'http://www.vreme.com/account/index.php'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
language = _('Serbian')
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment' , description
|
||||||
, '--category', category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -87,14 +88,19 @@ class Vreme(BasicNewsRecipe):
|
|||||||
del soup.body['text' ]
|
del soup.body['text' ]
|
||||||
del soup.body['bgcolor']
|
del soup.body['bgcolor']
|
||||||
del soup.body['onload' ]
|
del soup.body['onload' ]
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
for item in soup.findAll('table'):
|
||||||
|
if item.has_key('width'):
|
||||||
|
del item['width']
|
||||||
|
if item.has_key('height'):
|
||||||
|
del item['height']
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
tbl = soup.body.table
|
tbl = soup.body.table
|
||||||
tbbb = soup.find('td')
|
tbbb = soup.find('td')
|
||||||
if tbbb:
|
if tbbb:
|
||||||
tbbb.extract()
|
tbbb.extract()
|
||||||
tbl.extract()
|
tbl.extract()
|
||||||
soup.body.insert(0,tbbb)
|
soup.body.insert(0,tbbb)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
@ -104,5 +110,3 @@ class Vreme(BasicNewsRecipe):
|
|||||||
if cover_item:
|
if cover_item:
|
||||||
cover_url = self.INDEX + cover_item['src']
|
cover_url = self.INDEX + cover_item['src']
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
language = _('Serbian')
|
|
Loading…
x
Reference in New Issue
Block a user