mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1742214 [Please remove following recipes from Calibre](https://bugs.launchpad.net/calibre/+bug/1742214)
This commit is contained in:
parent
6250465558
commit
61ca420d4b
@ -1,48 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
24sata.rs
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Ser24Sata(BasicNewsRecipe):
|
|
||||||
title = '24 Sata - Sr'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = '24 sata portal vesti iz Srbije'
|
|
||||||
publisher = 'Ringier d.o.o.'
|
|
||||||
category = 'news, politics, entertainment, Serbia'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'sr'
|
|
||||||
publication_type = 'newsportal'
|
|
||||||
extra_css = """
|
|
||||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
|
||||||
body{font-family: serif1, serif}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Vesti', u'http://www.24sata.rs/rss/vesti.xml'),
|
|
||||||
(u'Sport', u'http://www.24sata.rs/rss/sport.xml'),
|
|
||||||
(u'Šou', u'http://www.24sata.rs/rss/sou.xml'),
|
|
||||||
(u'Specijal', u'http://www.24sata.rs/rss/specijal.xml'),
|
|
||||||
(u'Novi Sad', u'http://www.24sata.rs/rss/ns.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
dpart, spart, apart = url.rpartition('/')
|
|
||||||
return dpart + '/print/' + apart
|
|
@ -1,51 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
akter.co.rs
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Akter(BasicNewsRecipe):
|
|
||||||
title = 'AKTER - Nedeljnik'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'AKTER - nedeljni politicki magazin savremene Srbije'
|
|
||||||
publisher = 'Akter Media Group d.o.o.'
|
|
||||||
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics' # noqa
|
|
||||||
oldest_article = 8
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf-8'
|
|
||||||
masthead_url = 'http://www.akter.co.rs/gfx/logoneover.png'
|
|
||||||
language = 'sr'
|
|
||||||
publication_type = 'magazine'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
extra_css = """
|
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
|
||||||
body{font-family: Tahoma,Geneva,sans1,sans-serif}
|
|
||||||
img{margin-bottom: 0.8em; display: block;}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'section_to_print'})]
|
|
||||||
feeds = [(u'Nedeljnik', u'http://akter.co.rs/rss/nedeljnik')]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
dpart, spart, apart = url.rpartition('/')
|
|
||||||
return dpart + '/print-' + apart
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
soup = self.index_to_soup('http://www.akter.co.rs/weekly.html')
|
|
||||||
divt = soup.find('div', attrs={'class': 'lastissue'})
|
|
||||||
if divt:
|
|
||||||
imgt = divt.find('img')
|
|
||||||
if imgt:
|
|
||||||
return 'http://www.akter.co.rs' + imgt['src']
|
|
||||||
return None
|
|
@ -1,41 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
akter.co.rs
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Akter(BasicNewsRecipe):
|
|
||||||
title = 'AKTER - Dnevnik'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'AKTER - Najnovije vesti iz Srbije'
|
|
||||||
publisher = 'Akter Media Group d.o.o.'
|
|
||||||
oldest_article = 8
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf-8'
|
|
||||||
masthead_url = 'http://www.akter.co.rs/gfx/logodnover.png'
|
|
||||||
language = 'sr'
|
|
||||||
publication_type = 'magazine'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
extra_css = """
|
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
|
||||||
body{font-family: Tahoma,Geneva,sans1,sans-serif}
|
|
||||||
img{margin-bottom: 0.8em; display: block;}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'section_to_print'})]
|
|
||||||
feeds = [(u'Vesti', u'http://akter.co.rs/rss/dnevni')]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
dpart, spart, apart = url.rpartition('/')
|
|
||||||
return dpart + '/print-' + apart
|
|
@ -1,63 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
www.alo.rs
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Alo_Novine(BasicNewsRecipe):
|
|
||||||
title = 'Alo!'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = "News Portal from Serbia"
|
|
||||||
publisher = 'Alo novine d.o.o.'
|
|
||||||
category = 'news, politics, Serbia'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
delay = 4
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'sr'
|
|
||||||
extra_css = """
|
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
|
||||||
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
|
|
||||||
.lead {font-size: 1.3em}
|
|
||||||
h1{color: #DB0700}
|
|
||||||
.article_uvod{font-style: italic; font-size: 1.2em}
|
|
||||||
img{margin-bottom: 0.8em} """
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
remove_tags = [dict(name=['object', 'link', 'embed'])]
|
|
||||||
remove_attributes = ['height', 'width']
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti'),
|
|
||||||
(u'Politika', u'http://www.alo.rs/rss/politika'),
|
|
||||||
(u'Vesti', u'http://www.alo.rs/rss/vesti'),
|
|
||||||
(u'Sport', u'http://www.alo.rs/rss/sport'),
|
|
||||||
(u'Ljudi', u'http://www.alo.rs/rss/ljudi'),
|
|
||||||
(u'Saveti', u'http://www.alo.rs/rss/saveti')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
artl = url.rpartition('/')[0]
|
|
||||||
artid = artl.rpartition('/')[2]
|
|
||||||
return 'http://www.alo.rs/resources/templates/tools/print.php?id=' + artid
|
|
||||||
|
|
||||||
def image_url_processor(self, baseurl, url):
|
|
||||||
return url.replace('alo.rs//', 'alo.rs/')
|
|
@ -1,37 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
beta.rs
|
|
||||||
'''
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Danas(BasicNewsRecipe):
|
|
||||||
title = 'BETA'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Novinska Agencija'
|
|
||||||
publisher = 'Beta'
|
|
||||||
category = 'news, politics, Serbia'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = False
|
|
||||||
use_embedded_content = True
|
|
||||||
language = 'sr'
|
|
||||||
publication_type = 'newsportal'
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} ' # noqa
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Vesti dana', u'http://www.beta.rs/rssvd.asp'), (u'Ekonomija',
|
|
||||||
u'http://www.beta.rs/rssek.asp'), (u'Sport', u'http://www.beta.rs/rsssp.asp')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,33 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
beta.rs
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Danas(BasicNewsRecipe):
|
|
||||||
title = 'BETA - English'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Serbian news agency'
|
|
||||||
publisher = 'Beta'
|
|
||||||
category = 'news, politics, Serbia'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = False
|
|
||||||
use_embedded_content = True
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
lang = 'en'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [(u'News', u'http://www.beta.rs/rssen.asp')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,53 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
e-novine.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class E_novine(BasicNewsRecipe):
|
|
||||||
title = 'E-Novine'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'News from Serbia'
|
|
||||||
publisher = 'E-novine'
|
|
||||||
category = 'news, politics, Balcans'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'sr'
|
|
||||||
publication_type = 'newsportal'
|
|
||||||
masthead_url = 'http://www.e-novine.com/themes/e_novine/img/logo.gif'
|
|
||||||
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} img{float: none; margin-bottom: 0.8em} ' # noqa
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'article_head'}), dict(
|
|
||||||
name='div', attrs={'id': 'article_body'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['object', 'link', 'embed', 'iframe']), dict(
|
|
||||||
attrs={'id': 'box_article_tools'})
|
|
||||||
]
|
|
||||||
remove_attributes = ['height', 'width', 'lang']
|
|
||||||
|
|
||||||
feeds = [(u'Sve vesti', u'http://www.e-novine.com/feed/index.1.rss')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return self.adeify_images(soup)
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url + '?print'
|
|
@ -1,48 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
emg.rs/vesti
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class emportal_rs(BasicNewsRecipe):
|
|
||||||
title = 'Ekonom:east Vesti'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Vasa dnevna doza poslovnih informacija iz Srbije, regiona i sveta. Vesti, Berze, Dogadaji, Casopisi.'
|
|
||||||
publisher = 'Ekonom:east Media Group'
|
|
||||||
category = 'Ekonom:east Media Group, Ekonomist, Budelar, Bankar, EMportal, Preduzeca, Moja Posla, EU praktikum, ekonomija, Srbija, Beograd, investicije, finansije, energetika, berza' # noqa
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'sr'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
masthead_url = 'http://www.emg.rs/img/emportal-rss.png'
|
|
||||||
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial,Helvetica,sans1,sans-serif } .article_description{font-family: sans1, sans-serif} ' # noqa
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
remove_tags = [dict(attrs={'class': ['text-share']})]
|
|
||||||
keep_only_tags = [dict(attrs={'class': 'text'})]
|
|
||||||
remove_tags_after = dict(attrs={'class': 'text-share'})
|
|
||||||
remove_attributes = ['width', 'height']
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Srbija', u'http://www.emg.rs/vesti/srbija/rss.xml'), (u'Region',
|
|
||||||
u'http://www.emg.rs/vesti/region/rss.xml'), (u'Svet', u'http://www.emg.rs/vesti/svet/rss.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('.html', '.print.html')
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
@ -1,82 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
glassrpske.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class GlasSrpske(BasicNewsRecipe):
|
|
||||||
title = 'Glas Srpske'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Latest news from republika srpska'
|
|
||||||
publisher = 'GLAS SRPSKE'
|
|
||||||
category = 'Novine, Dnevne novine, Vijesti, Novosti, Ekonomija, Sport, Crna Hronika, Banja Luka,, Republika Srpska, Bosna i Hercegovina'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
use_embedded_content = False
|
|
||||||
masthead_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
|
|
||||||
language = 'sr'
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
INDEX = 'http://www.glassrpske.com'
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} img{margin-bottom: 0.8em} ' # noqa
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'gl_cv paragraf'})]
|
|
||||||
|
|
||||||
remove_tags = [dict(name=['object', 'link', 'base'])]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Novosti', u'http://www.glassrpske.com/vijest/2/novosti/lat/'),
|
|
||||||
(u'Drustvo', u'http://www.glassrpske.com/vijest/3/drustvo/lat/'),
|
|
||||||
(u'Biznis', u'http://www.glassrpske.com/vijest/4/ekonomija/lat/'),
|
|
||||||
(u'Kroz RS', u'http://www.glassrpske.com/vijest/5/krozrs/lat/'),
|
|
||||||
(u'Hronika', u'http://www.glassrpske.com/vijest/6/hronika/lat/'),
|
|
||||||
(u'Srbija', u'http://www.glassrpske.com/vijest/8/srbija/lat/'),
|
|
||||||
(u'Region', u'http://www.glassrpske.com/vijest/18/region/lat/'),
|
|
||||||
(u'Svijet', u'http://www.glassrpske.com/vijest/12/svijet/lat/'),
|
|
||||||
(u'Kultura', u'http://www.glassrpske.com/vijest/9/kultura/lat/'),
|
|
||||||
(u'Banja Luka', u'http://www.glassrpske.com/vijest/10/banjaluka/lat/'),
|
|
||||||
(u'Jet Set', u'http://www.glassrpske.com/vijest/11/jetset/lat/'),
|
|
||||||
(u'Muzika', u'http://www.glassrpske.com/vijest/19/muzika/lat/'),
|
|
||||||
(u'Sport', u'http://www.glassrpske.com/vijest/13/sport/lat/'),
|
|
||||||
(u'Kolumne', u'http://www.glassrpske.com/vijest/16/kolumne/lat/'),
|
|
||||||
(u'Plus', u'http://www.glassrpske.com/vijest/7/plus/lat/')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
totalfeeds = []
|
|
||||||
lfeeds = self.get_feeds()
|
|
||||||
for feedobj in lfeeds:
|
|
||||||
feedtitle, feedurl = feedobj
|
|
||||||
self.report_progress(0, _('Fetching feed') + ' %s...' %
|
|
||||||
(feedtitle if feedtitle else feedurl))
|
|
||||||
articles = []
|
|
||||||
soup = self.index_to_soup(feedurl)
|
|
||||||
for item in soup.findAll('div', attrs={'class': 'gl_rub'}):
|
|
||||||
atag = item.find('a')
|
|
||||||
ptag = item.find('p')
|
|
||||||
url = self.INDEX + atag['href']
|
|
||||||
title = self.tag_to_string(atag)
|
|
||||||
description = self.tag_to_string(ptag)
|
|
||||||
date, sep, rest = self.tag_to_string(ptag).partition('|')
|
|
||||||
articles.append({
|
|
||||||
'title': title, 'date': date, 'url': url, 'description': description
|
|
||||||
})
|
|
||||||
totalfeeds.append((feedtitle, articles))
|
|
||||||
return totalfeeds
|
|
@ -1,71 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.glas-javnosti.rs
|
|
||||||
'''
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class GlasJavnosti(BasicNewsRecipe):
|
|
||||||
title = 'Glas Javnosti'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Glas javnosti - Mi ne ulepsavamo stvarnost'
|
|
||||||
publisher = 'Glas Javnosti'
|
|
||||||
category = 'news, politics, Serbia'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = False
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'sr'
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} ' # noqa
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': 'above-content'}
|
|
||||||
), dict(name='div', attrs={'class': 'node'})
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['object', 'link', 'img']), dict(name='div', attrs={
|
|
||||||
'class': ['links', 'meta']}), dict(name='div', attrs={'id': 'block-block-12'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Politika', u'http://www.glas-javnosti.rs/aktuelni-clanci/2'),
|
|
||||||
(u'Tema', u'http://www.glas-javnosti.rs/aktuelni-clanci/48'),
|
|
||||||
(u'Drustvo', u'http://www.glas-javnosti.rs/aktuelni-clanci/17'),
|
|
||||||
(u'Ekonomija', u'http://www.glas-javnosti.rs/aktuelni-clanci/16'),
|
|
||||||
(u'Dosije', u'http://www.glas-javnosti.rs/aktuelni-clanci/65'),
|
|
||||||
(u'Svet', u'http://www.glas-javnosti.rs/aktuelni-clanci/18'),
|
|
||||||
(u'Hronika', u'http://www.glas-javnosti.rs/aktuelni-clanci/19'),
|
|
||||||
(u'Kultura', u'http://www.glas-javnosti.rs/aktuelni-clanci/6'),
|
|
||||||
(u'Ljudi i Dogadjaji', u'http://www.glas-javnosti.rs/aktuelni-clanci/37'),
|
|
||||||
(u'Putovanja', u'http://www.glas-javnosti.rs/aktuelni-clanci/113'),
|
|
||||||
(u'Feljton', u'http://www.glas-javnosti.rs/aktuelni-clanci/49'),
|
|
||||||
(u'Sport', u'http://www.glas-javnosti.rs/aktuelni-clanci/1'),
|
|
||||||
(u'Lov i Ribolov', u'http://www.glas-javnosti.rs/aktuelni-clanci/591'),
|
|
||||||
(u'Nedelja', u'http://www.glas-javnosti.rs/aktuelni-clanci/1862'),
|
|
||||||
(u'Glasno', u'http://www.glas-javnosti.rs/aktuelni-clanci/590'),
|
|
||||||
(u'Tehnologija', u'http://www.glas-javnosti.rs/aktuelni-clanci/609'),
|
|
||||||
(u'Reflektor', u'http://www.glas-javnosti.rs/aktuelni-clanci/717'),
|
|
||||||
(u'Saznanja', u'http://www.glas-javnosti.rs/aktuelni-clanci/1694'),
|
|
||||||
(u'Beograd', u'http://www.glas-javnosti.rs/aktuelni-clanci/40'),
|
|
||||||
(u'Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/114'),
|
|
||||||
(u'Zapadna Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/41'),
|
|
||||||
(u'Istocna i Juzna Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/42'),
|
|
||||||
(u'Sumadija i Pomoravlje', u'http://www.glas-javnosti.rs/aktuelni-clanci/43'),
|
|
||||||
(u'Vojvodina', u'http://www.glas-javnosti.rs/aktuelni-clanci/44'),
|
|
||||||
(u'Republika Srpska', u'http://www.glas-javnosti.rs/aktuelni-clanci/45'),
|
|
||||||
(u'Slobodno Vreme', u'http://www.glas-javnosti.rs/aktuelni-clanci/61'),
|
|
||||||
(u'Konjske Snage', u'http://www.glas-javnosti.rs/aktuelni-clanci/46')
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_attributes = ['style', 'width', 'height', 'font', 'border', 'align']
|
|
@ -1,68 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
vesti.krstarica.com
|
|
||||||
'''
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Krstarica(BasicNewsRecipe):
|
|
||||||
title = 'Krstarica - Vesti'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Dnevne vesti iz Srbije i sveta'
|
|
||||||
publisher = 'Krstarica'
|
|
||||||
category = 'news, politics, Serbia'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
language = 'sr'
|
|
||||||
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa
|
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description, '--category', category, '--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + \
|
|
||||||
category + \
|
|
||||||
'"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"'
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Vesti dana', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=aktuelno&lang=0'),
|
|
||||||
(u'Srbija', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=scg&lang=0'),
|
|
||||||
(u'Svet', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=svet&lang=0'),
|
|
||||||
(u'Politika', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=politika&lang=0'),
|
|
||||||
(u'Ekonomija', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=ekonomija&lang=0'),
|
|
||||||
(u'Drustvo', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=drustvo&lang=0'),
|
|
||||||
(u'Kultura', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=kultura&lang=0'),
|
|
||||||
(u'Nauka i Tehnologija', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=nauka&lang=0'),
|
|
||||||
(u'Medicina', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=medicina&lang=0'),
|
|
||||||
(u'Sport', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=sport&lang=0'),
|
|
||||||
(u'Zanimljivosti', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=zanimljivosti&lang=0')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
|
|
||||||
soup.head.insert(0, mtag)
|
|
||||||
titletag = soup.find('h4')
|
|
||||||
if titletag:
|
|
||||||
realtag = titletag.parent.parent
|
|
||||||
realtag.extract()
|
|
||||||
for item in soup.findAll(['table', 'center']):
|
|
||||||
item.extract()
|
|
||||||
soup.body.insert(1, realtag)
|
|
||||||
realtag.name = 'div'
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll(align=True):
|
|
||||||
del item['align']
|
|
||||||
return soup
|
|
@ -1,62 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
libartes.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Libartes(BasicNewsRecipe):
|
|
||||||
title = 'Libartes'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Elektronski časopis Libartes delo je kulturnih entuzijasta, umetnika i teoretičara umetnosti i književnosti. Časopis Libartes izlazi tromesečno i bavi se različitim granama umetnosti - književnošću, muzikom, filmom, likovnim umetnostima, dizajnom i arhitekturom.' # noqa
|
|
||||||
publisher = 'Libartes'
|
|
||||||
category = 'literatura, knjizevnost, film, dizajn, arhitektura, muzika'
|
|
||||||
no_stylesheets = True
|
|
||||||
INDEX = 'http://libartes.com/'
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf-8'
|
|
||||||
language = 'sr'
|
|
||||||
publication_type = 'magazine'
|
|
||||||
masthead_url = 'http://libartes.com/index_files/logo.gif'
|
|
||||||
extra_css = """
|
|
||||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
|
||||||
body{font-family: "Times New Roman",Times,serif1, serif}
|
|
||||||
img{display:block}
|
|
||||||
.naslov{font-size: xx-large; font-weight: bold}
|
|
||||||
.nag{font-size: large; font-weight: bold}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
remove_tags_before = dict(attrs={'id': 'nav'})
|
|
||||||
remove_tags_after = dict(attrs={'id': 'fb'})
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'center_content'})]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['object', 'link', 'iframe', 'embed', 'meta']), dict(
|
|
||||||
attrs={'id': 'nav'})
|
|
||||||
]
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
articles = []
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
|
||||||
for item in soup.findAll(name='a', attrs={'class': 'belad'}, href=True):
|
|
||||||
feed_link = item
|
|
||||||
if feed_link['href'].startswith(self.INDEX):
|
|
||||||
url = feed_link['href']
|
|
||||||
else:
|
|
||||||
url = self.INDEX + feed_link['href']
|
|
||||||
|
|
||||||
title = self.tag_to_string(feed_link)
|
|
||||||
date = strftime(self.timefmt)
|
|
||||||
articles.append({
|
|
||||||
'title': title, 'date': date, 'url': url, 'description': ''
|
|
||||||
})
|
|
||||||
return [('Casopis Libartes', articles)]
|
|
@ -1,64 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
www.monitor.co.me
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class MonitorCG(BasicNewsRecipe):
|
|
||||||
title = 'MONITOR online'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Nezavisni nedjeljnik Monitor'
|
|
||||||
publisher = '"Monitor" D.O.O. Podgorica'
|
|
||||||
category = 'news, politics, Montenegro'
|
|
||||||
oldest_article = 15
|
|
||||||
max_articles_per_feed = 150
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
auto_cleanup = False
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'sr'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
extra_css = """
|
|
||||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
|
||||||
h2{font-family: Cambria,"Times New Roman",Times,serif1,serif}
|
|
||||||
body{font-family: Arial,sans1,sans-serif}
|
|
||||||
img{display: block}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'pretty_print': True
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(attrs={'class': ['contentheading', 'article-meta', 'article-content']})]
|
|
||||||
remove_attributes = ['width', 'height', 'font', 'border', 'align']
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Danas, Sjutra', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=5&Itemid=27&format=feed&type=rss'),
|
|
||||||
(u'Duhankesa', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=13&Itemid=37&format=feed&type=rss'),
|
|
||||||
(u'Znaci prepoznavanja', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=6&Itemid=358&format=feed&type=rss'),
|
|
||||||
(u'Paralele', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=8&Itemid=359&format=feed&type=rss'),
|
|
||||||
(u'Razbijeno ogledalo', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=18&Itemid=354&format=feed&type=rss'),
|
|
||||||
(u'Tržište', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=26&Itemid=371&format=feed&type=rss'),
|
|
||||||
(u'Feljton', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=29&Itemid=471&format=feed&type=rss'),
|
|
||||||
(u'Monitor', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=1&Itemid=1852&format=feed&type=rss'),
|
|
||||||
(u'Altervizija', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=31&Itemid=2623&format=feed&type=rss'),
|
|
||||||
(u'Fenomeni', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=35&Itemid=3549&format=feed&type=rss'),
|
|
||||||
(u'Fokus', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=19&Itemid=252&format=feed&type=rss'),
|
|
||||||
(u'Monitoring', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=19&Itemid=252&format=feed&type=rss'),
|
|
||||||
(u'Profil', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=21&Itemid=256&format=feed&type=rss'),
|
|
||||||
(u'Intervju', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=27&Itemid=404&format=feed&type=rss'),
|
|
||||||
(u'Društvo', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=14&Itemid=2&format=feed&type=rss'),
|
|
||||||
(u'Region', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=12&Itemid=53&format=feed&type=rss'),
|
|
||||||
(u'Svijet', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=11&Itemid=360&format=feed&type=rss'),
|
|
||||||
(u'Kultura', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=9&Itemid=361&format=feed&type=rss')
|
|
||||||
]
|
|
@ -1,100 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.standard.rs
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class NoviStandard(BasicNewsRecipe):
|
|
||||||
title = 'Novi Standard'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'NoviStandard - energija je neunistiva!'
|
|
||||||
publisher = 'Novi Standard'
|
|
||||||
category = 'news, politics, Serbia'
|
|
||||||
no_stylesheets = True
|
|
||||||
delay = 1
|
|
||||||
oldest_article = 15
|
|
||||||
encoding = 'utf-8'
|
|
||||||
publication_type = 'magazine'
|
|
||||||
needs_subscription = 'optional'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
INDEX = 'http://www.standard.rs/'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'sr'
|
|
||||||
publication_type = 'magazine'
|
|
||||||
masthead_url = 'http://www.standard.rs/templates/ja_opal/images/red/logo.png'
|
|
||||||
extra_css = """
|
|
||||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
|
||||||
body{font-family: Arial,"Segoe UI","Trebuchet MS",Helvetica,sans1,sans-serif}
|
|
||||||
.dropcap{font-family: Georgia,Times,serif1,serif; display:inline}
|
|
||||||
.dropcap:first-letter{display: inline; font-size: xx-large; font-weight: bold}
|
|
||||||
.contentheading{color: gray; font-size: x-large}
|
|
||||||
.article-meta, .createdby{color: red}
|
|
||||||
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
def get_browser(self):
|
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
|
||||||
br.open(self.INDEX)
|
|
||||||
if self.username is not None and self.password is not None:
|
|
||||||
br.select_form(name='login')
|
|
||||||
br['username'] = self.username
|
|
||||||
br['passwd'] = self.password
|
|
||||||
br.submit()
|
|
||||||
return br
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(attrs={'class': ['contentheading', 'article-meta', 'article-content']})]
|
|
||||||
remove_tags_after = dict(attrs={'class': 'extravote-container'})
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['object', 'link', 'iframe', 'meta', 'base']), dict(
|
|
||||||
attrs={'class': 'extravote-container'})
|
|
||||||
]
|
|
||||||
remove_attributes = ['border', 'background',
|
|
||||||
'height', 'width', 'align', 'valign', 'lang']
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Naslovna', u'http://www.standard.rs/index.php?format=feed&type=rss'),
|
|
||||||
(u'Politika', u'http://www.standard.rs/vesti/36-politika.html?format=feed&type=rss'),
|
|
||||||
(u'Cvijanovic preporucuje', u'http://www.standard.rs/-cvijanovi-vam-preporuuje.html?format=feed&type=rss'),
|
|
||||||
(u'Kolumne', u'http://www.standard.rs/vesti/49-kolumne.html?format=feed&type=rss'),
|
|
||||||
(u'Kultura', u'http://www.standard.rs/vesti/40-kultura.html?format=feed&type=rss'),
|
|
||||||
(u'Lifestyle', u'http://www.standard.rs/vesti/39-lifestyle.html?format=feed&type=rss'),
|
|
||||||
(u'Svet', u'http://www.standard.rs/vesti/41-svet.html?format=feed&type=rss'),
|
|
||||||
(u'Ekonomija', u'http://www.standard.rs/vesti/37-ekonomija.html?format=feed&type=rss'),
|
|
||||||
(u'Sport', u'http://www.standard.rs/vesti/38-sport.html?format=feed&type=rss')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll('div'):
|
|
||||||
if len(item.contents) == 0:
|
|
||||||
item.extract()
|
|
||||||
for item in soup.findAll('a'):
|
|
||||||
limg = item.find('img')
|
|
||||||
if item.string is not None:
|
|
||||||
str = item.string
|
|
||||||
item.replaceWith(str)
|
|
||||||
else:
|
|
||||||
if limg:
|
|
||||||
item.name = 'div'
|
|
||||||
item.attrs = []
|
|
||||||
else:
|
|
||||||
str = self.tag_to_string(item)
|
|
||||||
item.replaceWith(str)
|
|
||||||
for item in soup.findAll('img'):
|
|
||||||
if not item.has_key('alt'): # noqa
|
|
||||||
item['alt'] = 'image'
|
|
||||||
return soup
|
|
@ -1,51 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
english.pravda.ru
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Pravda_eng(BasicNewsRecipe):
|
|
||||||
title = 'Pravda in English'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'News from Russia and rest of the world'
|
|
||||||
publisher = 'PRAVDA.Ru'
|
|
||||||
category = 'news, politics, Russia'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'en_RU'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
masthead_url = 'http://english.pravda.ru/pix/logo.gif'
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Arial,sans-serif }
|
|
||||||
img{margin-bottom: 0.4em; display:block}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_attributes = ['lang', 'style']
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'article'})]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'World', u'http://english.pravda.ru/world/export-articles.xml'),
|
|
||||||
(u'Russia', u'http://english.pravda.ru/russia/export-articles.xml'),
|
|
||||||
(u'Society', u'http://english.pravda.ru/society/export-articles.xml'),
|
|
||||||
(u'Incidents', u'http://english.pravda.ru/hotspots/export-articles.xml'),
|
|
||||||
(u'Opinion', u'http://english.pravda.ru/opinion/export-articles.xml'),
|
|
||||||
(u'Science', u'http://english.pravda.ru/science/export-articles.xml'),
|
|
||||||
(u'Business', u'http://english.pravda.ru/business/export-articles.xml'),
|
|
||||||
(u'Economics', u'http://english.pravda.ru/russia/economics/export-articles.xml'),
|
|
||||||
(u'Politics', u'http://english.pravda.ru/russia/politics/export-articles.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url + '?mode=print'
|
|
@ -1,44 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
tanjug.rs
|
|
||||||
'''
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
|
|
||||||
class Tanjug(BasicNewsRecipe):
|
|
||||||
title = 'Tanjug'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Novinska agencija TANJUG - Dnevne vesti iz Srbije i sveta'
|
|
||||||
publisher = 'Tanjug'
|
|
||||||
category = 'news, politics, Serbia'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
lang = 'sr-Latn-RS'
|
|
||||||
language = 'sr'
|
|
||||||
|
|
||||||
direction = 'ltr'
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
feeds = [(u'Vesti', u'http://www.tanjug.rs/StaticPages/RssTanjug.aspx')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
soup.html['xml:lang'] = self.lang
|
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
soup.html['dir'] = self.direction
|
|
||||||
mlang = Tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
|
||||||
soup.head.insert(0, mlang)
|
|
||||||
return self.adeify_images(soup)
|
|
Loading…
x
Reference in New Issue
Block a user