Fix #1742214 [Please remove following recipes from Calibre](https://bugs.launchpad.net/calibre/+bug/1742214)

This commit is contained in:
Kovid Goyal 2018-01-09 23:48:05 +05:30
parent 6250465558
commit 61ca420d4b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
16 changed files with 0 additions and 916 deletions

View File

@ -1,48 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
'''
24sata.rs
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class Ser24Sata(BasicNewsRecipe):
title = '24 Sata - Sr'
__author__ = 'Darko Miletic'
description = '24 sata portal vesti iz Srbije'
publisher = 'Ringier d.o.o.'
category = 'news, politics, entertainment, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'sr'
publication_type = 'newsportal'
extra_css = """
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
body{font-family: serif1, serif}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [
(u'Vesti', u'http://www.24sata.rs/rss/vesti.xml'),
(u'Sport', u'http://www.24sata.rs/rss/sport.xml'),
(u'Šou', u'http://www.24sata.rs/rss/sou.xml'),
(u'Specijal', u'http://www.24sata.rs/rss/specijal.xml'),
(u'Novi Sad', u'http://www.24sata.rs/rss/ns.xml')
]
def print_version(self, url):
dpart, spart, apart = url.rpartition('/')
return dpart + '/print/' + apart

View File

@ -1,51 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
'''
akter.co.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Akter(BasicNewsRecipe):
title = 'AKTER - Nedeljnik'
__author__ = 'Darko Miletic'
description = 'AKTER - nedeljni politicki magazin savremene Srbije'
publisher = 'Akter Media Group d.o.o.'
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics' # noqa
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
masthead_url = 'http://www.akter.co.rs/gfx/logoneover.png'
language = 'sr'
publication_type = 'magazine'
remove_empty_feeds = True
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Tahoma,Geneva,sans1,sans-serif}
img{margin-bottom: 0.8em; display: block;}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'id': 'section_to_print'})]
feeds = [(u'Nedeljnik', u'http://akter.co.rs/rss/nedeljnik')]
def print_version(self, url):
dpart, spart, apart = url.rpartition('/')
return dpart + '/print-' + apart
def get_cover_url(self):
soup = self.index_to_soup('http://www.akter.co.rs/weekly.html')
divt = soup.find('div', attrs={'class': 'lastissue'})
if divt:
imgt = divt.find('img')
if imgt:
return 'http://www.akter.co.rs' + imgt['src']
return None

View File

@ -1,41 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
akter.co.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Akter(BasicNewsRecipe):
title = 'AKTER - Dnevnik'
__author__ = 'Darko Miletic'
description = 'AKTER - Najnovije vesti iz Srbije'
publisher = 'Akter Media Group d.o.o.'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
masthead_url = 'http://www.akter.co.rs/gfx/logodnover.png'
language = 'sr'
publication_type = 'magazine'
remove_empty_feeds = True
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Tahoma,Geneva,sans1,sans-serif}
img{margin-bottom: 0.8em; display: block;}
"""
conversion_options = {
'comment': description, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'id': 'section_to_print'})]
feeds = [(u'Vesti', u'http://akter.co.rs/rss/dnevni')]
def print_version(self, url):
dpart, spart, apart = url.rpartition('/')
return dpart + '/print-' + apart

View File

@ -1,63 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.alo.rs
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class Alo_Novine(BasicNewsRecipe):
title = 'Alo!'
__author__ = 'Darko Miletic'
description = "News Portal from Serbia"
publisher = 'Alo novine d.o.o.'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
delay = 4
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'sr'
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
.lead {font-size: 1.3em}
h1{color: #DB0700}
.article_uvod{font-style: italic; font-size: 1.2em}
img{margin-bottom: 0.8em} """
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(name=['object', 'link', 'embed'])]
remove_attributes = ['height', 'width']
feeds = [
(u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti'),
(u'Politika', u'http://www.alo.rs/rss/politika'),
(u'Vesti', u'http://www.alo.rs/rss/vesti'),
(u'Sport', u'http://www.alo.rs/rss/sport'),
(u'Ljudi', u'http://www.alo.rs/rss/ljudi'),
(u'Saveti', u'http://www.alo.rs/rss/saveti')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def print_version(self, url):
artl = url.rpartition('/')[0]
artid = artl.rpartition('/')[2]
return 'http://www.alo.rs/resources/templates/tools/print.php?id=' + artid
def image_url_processor(self, baseurl, url):
return url.replace('alo.rs//', 'alo.rs/')

View File

@ -1,37 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
beta.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Danas(BasicNewsRecipe):
title = 'BETA'
__author__ = 'Darko Miletic'
description = 'Novinska Agencija'
publisher = 'Beta'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = False
use_embedded_content = True
language = 'sr'
publication_type = 'newsportal'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} ' # noqa
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [
(u'Vesti dana', u'http://www.beta.rs/rssvd.asp'), (u'Ekonomija',
u'http://www.beta.rs/rssek.asp'), (u'Sport', u'http://www.beta.rs/rsssp.asp')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,33 +0,0 @@
#!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
beta.rs
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Danas(BasicNewsRecipe):
title = 'BETA - English'
__author__ = 'Darko Miletic'
description = 'Serbian news agency'
publisher = 'Beta'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = False
use_embedded_content = True
language = 'en'
lang = 'en'
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
}
feeds = [(u'News', u'http://www.beta.rs/rssen.asp')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,53 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
e-novine.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class E_novine(BasicNewsRecipe):
title = 'E-Novine'
__author__ = 'Darko Miletic'
description = 'News from Serbia'
publisher = 'E-novine'
category = 'news, politics, Balcans'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'sr'
publication_type = 'newsportal'
masthead_url = 'http://www.e-novine.com/themes/e_novine/img/logo.gif'
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} img{float: none; margin-bottom: 0.8em} ' # noqa
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [
dict(name='div', attrs={'class': 'article_head'}), dict(
name='div', attrs={'id': 'article_body'})
]
remove_tags = [
dict(name=['object', 'link', 'embed', 'iframe']), dict(
attrs={'id': 'box_article_tools'})
]
remove_attributes = ['height', 'width', 'lang']
feeds = [(u'Sve vesti', u'http://www.e-novine.com/feed/index.1.rss')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)
def print_version(self, url):
return url + '?print'

View File

@ -1,48 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
emg.rs/vesti
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class emportal_rs(BasicNewsRecipe):
title = 'Ekonom:east Vesti'
__author__ = 'Darko Miletic'
description = 'Vasa dnevna doza poslovnih informacija iz Srbije, regiona i sveta. Vesti, Berze, Dogadaji, Casopisi.'
publisher = 'Ekonom:east Media Group'
category = 'Ekonom:east Media Group, Ekonomist, Budelar, Bankar, EMportal, Preduzeca, Moja Posla, EU praktikum, ekonomija, Srbija, Beograd, investicije, finansije, energetika, berza' # noqa
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'sr'
remove_empty_feeds = True
masthead_url = 'http://www.emg.rs/img/emportal-rss.png'
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial,Helvetica,sans1,sans-serif } .article_description{font-family: sans1, sans-serif} ' # noqa
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(attrs={'class': ['text-share']})]
keep_only_tags = [dict(attrs={'class': 'text'})]
remove_tags_after = dict(attrs={'class': 'text-share'})
remove_attributes = ['width', 'height']
feeds = [
(u'Srbija', u'http://www.emg.rs/vesti/srbija/rss.xml'), (u'Region',
u'http://www.emg.rs/vesti/region/rss.xml'), (u'Svet', u'http://www.emg.rs/vesti/svet/rss.xml')
]
def print_version(self, url):
return url.replace('.html', '.print.html')
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,82 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
glassrpske.com
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class GlasSrpske(BasicNewsRecipe):
title = 'Glas Srpske'
__author__ = 'Darko Miletic'
description = 'Latest news from republika srpska'
publisher = 'GLAS SRPSKE'
category = 'Novine, Dnevne novine, Vijesti, Novosti, Ekonomija, Sport, Crna Hronika, Banja Luka,, Republika Srpska, Bosna i Hercegovina'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
masthead_url = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
language = 'sr'
publication_type = 'newspaper'
INDEX = 'http://www.glassrpske.com'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} img{margin-bottom: 0.8em} ' # noqa
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'class': 'gl_cv paragraf'})]
remove_tags = [dict(name=['object', 'link', 'base'])]
feeds = [
(u'Novosti', u'http://www.glassrpske.com/vijest/2/novosti/lat/'),
(u'Drustvo', u'http://www.glassrpske.com/vijest/3/drustvo/lat/'),
(u'Biznis', u'http://www.glassrpske.com/vijest/4/ekonomija/lat/'),
(u'Kroz RS', u'http://www.glassrpske.com/vijest/5/krozrs/lat/'),
(u'Hronika', u'http://www.glassrpske.com/vijest/6/hronika/lat/'),
(u'Srbija', u'http://www.glassrpske.com/vijest/8/srbija/lat/'),
(u'Region', u'http://www.glassrpske.com/vijest/18/region/lat/'),
(u'Svijet', u'http://www.glassrpske.com/vijest/12/svijet/lat/'),
(u'Kultura', u'http://www.glassrpske.com/vijest/9/kultura/lat/'),
(u'Banja Luka', u'http://www.glassrpske.com/vijest/10/banjaluka/lat/'),
(u'Jet Set', u'http://www.glassrpske.com/vijest/11/jetset/lat/'),
(u'Muzika', u'http://www.glassrpske.com/vijest/19/muzika/lat/'),
(u'Sport', u'http://www.glassrpske.com/vijest/13/sport/lat/'),
(u'Kolumne', u'http://www.glassrpske.com/vijest/16/kolumne/lat/'),
(u'Plus', u'http://www.glassrpske.com/vijest/7/plus/lat/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed') + ' %s...' %
(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('div', attrs={'class': 'gl_rub'}):
atag = item.find('a')
ptag = item.find('p')
url = self.INDEX + atag['href']
title = self.tag_to_string(atag)
description = self.tag_to_string(ptag)
date, sep, rest = self.tag_to_string(ptag).partition('|')
articles.append({
'title': title, 'date': date, 'url': url, 'description': description
})
totalfeeds.append((feedtitle, articles))
return totalfeeds

View File

@ -1,71 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.glas-javnosti.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class GlasJavnosti(BasicNewsRecipe):
title = 'Glas Javnosti'
__author__ = 'Darko Miletic'
description = 'Glas javnosti - Mi ne ulepsavamo stvarnost'
publisher = 'Glas Javnosti'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = False
use_embedded_content = False
language = 'sr'
publication_type = 'newspaper'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} ' # noqa
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [
dict(name='div', attrs={'id': 'above-content'}
), dict(name='div', attrs={'class': 'node'})
]
remove_tags = [
dict(name=['object', 'link', 'img']), dict(name='div', attrs={
'class': ['links', 'meta']}), dict(name='div', attrs={'id': 'block-block-12'})
]
feeds = [
(u'Politika', u'http://www.glas-javnosti.rs/aktuelni-clanci/2'),
(u'Tema', u'http://www.glas-javnosti.rs/aktuelni-clanci/48'),
(u'Drustvo', u'http://www.glas-javnosti.rs/aktuelni-clanci/17'),
(u'Ekonomija', u'http://www.glas-javnosti.rs/aktuelni-clanci/16'),
(u'Dosije', u'http://www.glas-javnosti.rs/aktuelni-clanci/65'),
(u'Svet', u'http://www.glas-javnosti.rs/aktuelni-clanci/18'),
(u'Hronika', u'http://www.glas-javnosti.rs/aktuelni-clanci/19'),
(u'Kultura', u'http://www.glas-javnosti.rs/aktuelni-clanci/6'),
(u'Ljudi i Dogadjaji', u'http://www.glas-javnosti.rs/aktuelni-clanci/37'),
(u'Putovanja', u'http://www.glas-javnosti.rs/aktuelni-clanci/113'),
(u'Feljton', u'http://www.glas-javnosti.rs/aktuelni-clanci/49'),
(u'Sport', u'http://www.glas-javnosti.rs/aktuelni-clanci/1'),
(u'Lov i Ribolov', u'http://www.glas-javnosti.rs/aktuelni-clanci/591'),
(u'Nedelja', u'http://www.glas-javnosti.rs/aktuelni-clanci/1862'),
(u'Glasno', u'http://www.glas-javnosti.rs/aktuelni-clanci/590'),
(u'Tehnologija', u'http://www.glas-javnosti.rs/aktuelni-clanci/609'),
(u'Reflektor', u'http://www.glas-javnosti.rs/aktuelni-clanci/717'),
(u'Saznanja', u'http://www.glas-javnosti.rs/aktuelni-clanci/1694'),
(u'Beograd', u'http://www.glas-javnosti.rs/aktuelni-clanci/40'),
(u'Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/114'),
(u'Zapadna Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/41'),
(u'Istocna i Juzna Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/42'),
(u'Sumadija i Pomoravlje', u'http://www.glas-javnosti.rs/aktuelni-clanci/43'),
(u'Vojvodina', u'http://www.glas-javnosti.rs/aktuelni-clanci/44'),
(u'Republika Srpska', u'http://www.glas-javnosti.rs/aktuelni-clanci/45'),
(u'Slobodno Vreme', u'http://www.glas-javnosti.rs/aktuelni-clanci/61'),
(u'Konjske Snage', u'http://www.glas-javnosti.rs/aktuelni-clanci/46')
]
remove_attributes = ['style', 'width', 'height', 'font', 'border', 'align']

View File

@ -1,68 +0,0 @@
#!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
vesti.krstarica.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Krstarica(BasicNewsRecipe):
title = 'Krstarica - Vesti'
__author__ = 'Darko Miletic'
description = 'Dnevne vesti iz Srbije i sveta'
publisher = 'Krstarica'
category = 'news, politics, Serbia'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'utf-8'
language = 'sr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa
html2lrf_options = [
'--comment', description, '--category', category, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + \
category + \
'"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em}"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [
(u'Vesti dana', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=aktuelno&lang=0'),
(u'Srbija', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=scg&lang=0'),
(u'Svet', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=svet&lang=0'),
(u'Politika', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=politika&lang=0'),
(u'Ekonomija', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=ekonomija&lang=0'),
(u'Drustvo', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=drustvo&lang=0'),
(u'Kultura', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=kultura&lang=0'),
(u'Nauka i Tehnologija', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=nauka&lang=0'),
(u'Medicina', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=medicina&lang=0'),
(u'Sport', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=sport&lang=0'),
(u'Zanimljivosti', u'http://vesti.krstarica.com/index.php?rss=1&rubrika=zanimljivosti&lang=0')
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>'
soup.head.insert(0, mtag)
titletag = soup.find('h4')
if titletag:
realtag = titletag.parent.parent
realtag.extract()
for item in soup.findAll(['table', 'center']):
item.extract()
soup.body.insert(1, realtag)
realtag.name = 'div'
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(align=True):
del item['align']
return soup

View File

@ -1,62 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
'''
libartes.com
'''
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Libartes(BasicNewsRecipe):
title = 'Libartes'
__author__ = 'Darko Miletic'
description = 'Elektronski časopis Libartes delo je kulturnih entuzijasta, umetnika i teoretičara umetnosti i književnosti. Časopis Libartes izlazi tromesečno i bavi se različitim granama umetnosti - književnošću, muzikom, filmom, likovnim umetnostima, dizajnom i arhitekturom.' # noqa
publisher = 'Libartes'
category = 'literatura, knjizevnost, film, dizajn, arhitektura, muzika'
no_stylesheets = True
INDEX = 'http://libartes.com/'
use_embedded_content = False
encoding = 'utf-8'
language = 'sr'
publication_type = 'magazine'
masthead_url = 'http://libartes.com/index_files/logo.gif'
extra_css = """
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: "Times New Roman",Times,serif1, serif}
img{display:block}
.naslov{font-size: xx-large; font-weight: bold}
.nag{font-size: large; font-weight: bold}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags_before = dict(attrs={'id': 'nav'})
remove_tags_after = dict(attrs={'id': 'fb'})
keep_only_tags = [dict(name='div', attrs={'id': 'center_content'})]
remove_tags = [
dict(name=['object', 'link', 'iframe', 'embed', 'meta']), dict(
attrs={'id': 'nav'})
]
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
for item in soup.findAll(name='a', attrs={'class': 'belad'}, href=True):
feed_link = item
if feed_link['href'].startswith(self.INDEX):
url = feed_link['href']
else:
url = self.INDEX + feed_link['href']
title = self.tag_to_string(feed_link)
date = strftime(self.timefmt)
articles.append({
'title': title, 'date': date, 'url': url, 'description': ''
})
return [('Casopis Libartes', articles)]

View File

@ -1,64 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.monitor.co.me
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class MonitorCG(BasicNewsRecipe):
title = 'MONITOR online'
__author__ = 'Darko Miletic'
description = 'Nezavisni nedjeljnik Monitor'
publisher = '"Monitor" D.O.O. Podgorica'
category = 'news, politics, Montenegro'
oldest_article = 15
max_articles_per_feed = 150
no_stylesheets = True
encoding = 'utf-8'
auto_cleanup = False
use_embedded_content = False
language = 'sr'
remove_empty_feeds = True
extra_css = """
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
h2{font-family: Cambria,"Times New Roman",Times,serif1,serif}
body{font-family: Arial,sans1,sans-serif}
img{display: block}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'pretty_print': True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [
dict(attrs={'class': ['contentheading', 'article-meta', 'article-content']})]
remove_attributes = ['width', 'height', 'font', 'border', 'align']
feeds = [
(u'Danas, Sjutra', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=5&Itemid=27&format=feed&type=rss'),
(u'Duhankesa', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=13&Itemid=37&format=feed&type=rss'),
(u'Znaci prepoznavanja', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=6&Itemid=358&format=feed&type=rss'),
(u'Paralele', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=8&Itemid=359&format=feed&type=rss'),
(u'Razbijeno ogledalo', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=18&Itemid=354&format=feed&type=rss'),
(u'Tržište', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=26&Itemid=371&format=feed&type=rss'),
(u'Feljton', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=29&Itemid=471&format=feed&type=rss'),
(u'Monitor', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=1&Itemid=1852&format=feed&type=rss'),
(u'Altervizija', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=31&Itemid=2623&format=feed&type=rss'),
(u'Fenomeni', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=35&Itemid=3549&format=feed&type=rss'),
(u'Fokus', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=19&Itemid=252&format=feed&type=rss'),
(u'Monitoring', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=19&Itemid=252&format=feed&type=rss'),
(u'Profil', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=21&Itemid=256&format=feed&type=rss'),
(u'Intervju', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=27&Itemid=404&format=feed&type=rss'),
(u'Društvo', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=14&Itemid=2&format=feed&type=rss'),
(u'Region', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=12&Itemid=53&format=feed&type=rss'),
(u'Svijet', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=11&Itemid=360&format=feed&type=rss'),
(u'Kultura', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=9&Itemid=361&format=feed&type=rss')
]

View File

@ -1,100 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.standard.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class NoviStandard(BasicNewsRecipe):
title = 'Novi Standard'
__author__ = 'Darko Miletic'
description = 'NoviStandard - energija je neunistiva!'
publisher = 'Novi Standard'
category = 'news, politics, Serbia'
no_stylesheets = True
delay = 1
oldest_article = 15
encoding = 'utf-8'
publication_type = 'magazine'
needs_subscription = 'optional'
remove_empty_feeds = True
INDEX = 'http://www.standard.rs/'
use_embedded_content = False
language = 'sr'
publication_type = 'magazine'
masthead_url = 'http://www.standard.rs/templates/ja_opal/images/red/logo.png'
extra_css = """
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Arial,"Segoe UI","Trebuchet MS",Helvetica,sans1,sans-serif}
.dropcap{font-family: Georgia,Times,serif1,serif; display:inline}
.dropcap:first-letter{display: inline; font-size: xx-large; font-weight: bold}
.contentheading{color: gray; font-size: x-large}
.article-meta, .createdby{color: red}
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open(self.INDEX)
if self.username is not None and self.password is not None:
br.select_form(name='login')
br['username'] = self.username
br['passwd'] = self.password
br.submit()
return br
keep_only_tags = [
dict(attrs={'class': ['contentheading', 'article-meta', 'article-content']})]
remove_tags_after = dict(attrs={'class': 'extravote-container'})
remove_tags = [
dict(name=['object', 'link', 'iframe', 'meta', 'base']), dict(
attrs={'class': 'extravote-container'})
]
remove_attributes = ['border', 'background',
'height', 'width', 'align', 'valign', 'lang']
feeds = [
(u'Naslovna', u'http://www.standard.rs/index.php?format=feed&type=rss'),
(u'Politika', u'http://www.standard.rs/vesti/36-politika.html?format=feed&type=rss'),
(u'Cvijanovic preporucuje', u'http://www.standard.rs/-cvijanovi-vam-preporuuje.html?format=feed&type=rss'),
(u'Kolumne', u'http://www.standard.rs/vesti/49-kolumne.html?format=feed&type=rss'),
(u'Kultura', u'http://www.standard.rs/vesti/40-kultura.html?format=feed&type=rss'),
(u'Lifestyle', u'http://www.standard.rs/vesti/39-lifestyle.html?format=feed&type=rss'),
(u'Svet', u'http://www.standard.rs/vesti/41-svet.html?format=feed&type=rss'),
(u'Ekonomija', u'http://www.standard.rs/vesti/37-ekonomija.html?format=feed&type=rss'),
(u'Sport', u'http://www.standard.rs/vesti/38-sport.html?format=feed&type=rss')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('div'):
if len(item.contents) == 0:
item.extract()
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'): # noqa
item['alt'] = 'image'
return soup

View File

@ -1,51 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
english.pravda.ru
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Pravda_eng(BasicNewsRecipe):
title = 'Pravda in English'
__author__ = 'Darko Miletic'
description = 'News from Russia and rest of the world'
publisher = 'PRAVDA.Ru'
category = 'news, politics, Russia'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en_RU'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://english.pravda.ru/pix/logo.gif'
extra_css = """
body{font-family: Arial,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_attributes = ['lang', 'style']
keep_only_tags = [dict(name='div', attrs={'id': 'article'})]
feeds = [
(u'World', u'http://english.pravda.ru/world/export-articles.xml'),
(u'Russia', u'http://english.pravda.ru/russia/export-articles.xml'),
(u'Society', u'http://english.pravda.ru/society/export-articles.xml'),
(u'Incidents', u'http://english.pravda.ru/hotspots/export-articles.xml'),
(u'Opinion', u'http://english.pravda.ru/opinion/export-articles.xml'),
(u'Science', u'http://english.pravda.ru/science/export-articles.xml'),
(u'Business', u'http://english.pravda.ru/business/export-articles.xml'),
(u'Economics', u'http://english.pravda.ru/russia/economics/export-articles.xml'),
(u'Politics', u'http://english.pravda.ru/russia/politics/export-articles.xml')
]
def print_version(self, url):
return url + '?mode=print'

View File

@ -1,44 +0,0 @@
#!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
tanjug.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class Tanjug(BasicNewsRecipe):
title = 'Tanjug'
__author__ = 'Darko Miletic'
description = 'Novinska agencija TANJUG - Dnevne vesti iz Srbije i sveta'
publisher = 'Tanjug'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = True
encoding = 'utf-8'
lang = 'sr-Latn-RS'
language = 'sr'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [(u'Vesti', u'http://www.tanjug.rs/StaticPages/RssTanjug.aspx')]
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
soup.head.insert(0, mlang)
return self.adeify_images(soup)