Merge branch 'master' of https://github.com/unkn0w7n/calibre
@ -1,61 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
##
|
||||
# Last Edited: 2018-02-13 Carlos Alves <carlosalves90@gmail.com>
|
||||
##
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
180.com.uy
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Noticias(BasicNewsRecipe):
|
||||
title = '180.com.uy'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Noticias de Uruguay'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
remove_tags_after = dict(name='article')
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'nota'}),
|
||||
dict(name='h3',),
|
||||
dict(name='h4',),
|
||||
dict(name='article',)
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'items'})
|
||||
]
|
||||
|
||||
remove_attributes = ['width', 'height', 'style', 'font', 'color']
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family: Georgia,"Times New Roman",Times,serif}
|
||||
h3{font-family: Georgia,"Times New Roman",Times,serif}
|
||||
h2{font-family: Georgia,"Times New Roman",Times,serif}
|
||||
p{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
img{margin-bottom: 0.4em; display:block;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
(u'Titulares', u'http://www.180.com.uy/feed.php')
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
pass
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
@ -1,68 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
24sata.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Cro24Sata(BasicNewsRecipe):
|
||||
title = '24 Sata - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "News Portal from Croatia"
|
||||
publisher = '24sata.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 4
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'hr'
|
||||
|
||||
lang = 'hr-HR'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object', 'link', 'embed']), dict(
|
||||
name='table', attrs={'class': 'enumbox'})
|
||||
]
|
||||
|
||||
feeds = [(u'Najnovije Vijesti',
|
||||
u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&action=ispis'
|
@ -1,48 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
sapteseri.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class SapteSeri(BasicNewsRecipe):
|
||||
title = u'Sapte Seri'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Sapte Seri'
|
||||
publisher = u'Sapte Seri'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Oras,Distractie,Fun'
|
||||
encoding = 'utf-8'
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.sapteseri.ro/Images/logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'id': 'title'}), dict(name='div', attrs={'class': 'mt10 mb10'}), dict(
|
||||
name='div', attrs={'class': 'mb20 mt10'}), dict(name='div', attrs={'class': 'mt5 mb20'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id': ['entityimgworking']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Ce se intampla azi in Bucuresti',
|
||||
u'http://www.sapteseri.ro/ro/feed/ce-se-intampla-azi/bucuresti/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Lokalavisen Aabenraa
|
||||
'''
|
||||
|
||||
|
||||
class AabenraaLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Lokalavisen Aabenraa'
|
||||
description = 'Lokale og regionale nyheder, sport, kultur fra Aabenraa og omegn på aabenraa.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Lokalavisen Aabenraa', 'http://aabenraa.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Lokalavisen Aabenraa', 'http://aabenraa.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Lokalavisen Aabenraa', 'http://aabenraa.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Lokalavisen Aabenraa', 'http://aabenraa.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Lokalavisen Aabenraa', 'http://aabenraa.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Lokalavisen Aabenraa', 'http://aabenraa.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Lokalavisen Aarhus
|
||||
'''
|
||||
|
||||
|
||||
class AarhusLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Lokalavisen Aarhus'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Aarhus Midt, Nord, Vest og Syd på aarhus.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Lokalavisen Aarhus', 'http://aarhus.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Lokalavisen Aarhus', 'http://aarhus.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Lokalavisen Aarhus', 'http://aarhus.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Lokalavisen Aarhus', 'http://aarhus.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Lokalavisen Aarhus', 'http://aarhus.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Lokalavisen Aarhus', 'http://aarhus.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Lokalavisen Aarhus Midt
|
||||
'''
|
||||
|
||||
|
||||
class AarhusmidtLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Lokalavisen Aarhus Midt'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Aarhus Midt på aarhusmidt.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Lokalavisen Aarhus Midt', 'http://aarhusmidt.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Lokalavisen Aarhus Midt', 'http://aarhusmidt.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Lokalavisen Aarhus Midt', 'http://aarhusmidt.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Lokalavisen Aarhus Midt', 'http://aarhusmidt.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Lokalavisen Aarhus Midt', 'http://aarhusmidt.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Lokalavisen Aarhus Midt', 'http://aarhusmidt.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Lokalavisen Aarhus Nord
|
||||
'''
|
||||
|
||||
|
||||
class AarhusnordLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Lokalavisen Aarhus Nord'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Aarhus Nord på aarhusnord.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Lokalavisen Aarhus Nord', 'http://aarhusnord.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Lokalavisen Aarhus Nord', 'http://aarhusnord.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Lokalavisen Aarhus Nord', 'http://aarhusnord.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Lokalavisen Aarhus Nord', 'http://aarhusnord.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Lokalavisen Aarhus Nord', 'http://aarhusnord.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Lokalavisen Aarhus Nord', 'http://aarhusnord.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Lokalavisen Aarhus Syd
|
||||
'''
|
||||
|
||||
|
||||
class AarhussydLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Lokalavisen Aarhus Syd'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Aarhus Syd på aarhussyd.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Lokalavisen Aarhus Syd', 'http://aarhussyd.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Lokalavisen Aarhus Syd', 'http://aarhussyd.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Lokalavisen Aarhus Syd', 'http://aarhussyd.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Lokalavisen Aarhus Syd', 'http://aarhussyd.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Lokalavisen Aarhus Syd', 'http://aarhussyd.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Lokalavisen Aarhus Syd', 'http://aarhussyd.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Lokalavisen Aarhus Ves
|
||||
'''
|
||||
|
||||
|
||||
class AarhusvestLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Lokalavisen Aarhus Ves'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Aarhus Vest på aarhusvest.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Lokalavisen Aarhus Vest', 'http://aarhusvest.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Lokalavisen Aarhus Vest', 'http://aarhusvest.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Lokalavisen Aarhus Vest', 'http://aarhusvest.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Lokalavisen Aarhus Vest', 'http://aarhusvest.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Lokalavisen Aarhus Vest', 'http://aarhusvest.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Lokalavisen Aarhus Vest', 'http://aarhusvest.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,50 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
abc.com.py
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class ABC_py(BasicNewsRecipe):
|
||||
title = 'ABC Color'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Paraguay y el resto del mundo'
|
||||
publisher = 'ABC'
|
||||
category = 'news, politics, Paraguay'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es_PY'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.abc.com.py/plantillas/img/abc-logo.png'
|
||||
publication_type = 'newspaper'
|
||||
extra_css = """
|
||||
body{font-family: UnitSlabProMedium,"Times New Roman",serif }
|
||||
img{margin-bottom: 0.4em; display: block;}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['form', 'iframe', 'embed',
|
||||
'object', 'link', 'base', 'table']),
|
||||
dict(attrs={'class': ['es-carousel-wrapper']}),
|
||||
dict(attrs={'id': ['tools', 'article-banner-1']})
|
||||
]
|
||||
keep_only_tags = [dict(attrs={'id': 'article'})]
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Ultimo momento', u'http://www.abc.com.py/rss.xml'),
|
||||
(u'Nacionales', u'http://www.abc.com.py/nacionales/rss.xml'),
|
||||
(u'Mundo', u'http://www.abc.com.py/internacionales/rss.xml'),
|
||||
(u'Deportes', u'http://www.abc.com.py/deportes/rss.xml'),
|
||||
(u'Espectaculos', u'http://www.abc.com.py/espectaculos/rss.xml'),
|
||||
(u'TecnoCiencia', u'http://www.abc.com.py/ciencia/rss.xml')
|
||||
]
|
@ -1,20 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1599499742(BasicNewsRecipe):
|
||||
title = 'Aftonbladet'
|
||||
__author__ = 'Jonatan Nyberg'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
language = 'sv'
|
||||
description = u'Nyheter från Sveriges största nyhetssajt.'
|
||||
publisher = 'Aftonbladet'
|
||||
category = 'news'
|
||||
cover_url = 'https://gfx.aftonbladet-cdn.se/hyper-assets/f684737c60484ef64ab63a9e73a54d8b.jpg'
|
||||
|
||||
feeds = [
|
||||
('Aftonbladet', 'http://www.aftonbladet.se/rss.xml'),
|
||||
]
|
@ -1,134 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'Creative Commons Attribution 4.0 International License'
|
||||
__author__ = 'John McDole'
|
||||
__copyright__ = ''
|
||||
__version__ = '0.1'
|
||||
__date__ = '2015/01/10'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
now = datetime.datetime.now()
|
||||
title = 'The AJC'
|
||||
timefmt = ' [%a,%d %B %Y %I:%M %p]'
|
||||
__author__ = 'John McDole'
|
||||
language = 'en'
|
||||
description = 'The Atlanta Journal-Constitution; Metro Atlanta & Georgia'
|
||||
publisher = 'The Atlanta Journal-Constitution'
|
||||
publication_type = 'newspaper'
|
||||
category = 'news, politics, USA'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
# The AJC lists identical articles in multiple feeds; this removes them
|
||||
# based on their URL
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
# And this says "Hey, AJC, different feeds should mean something!"
|
||||
remove_empty_feeds = True
|
||||
|
||||
# Sets whether a feed has full articles embedded in it. The AJC feeds do
|
||||
# not.
|
||||
use_embedded_content = False
|
||||
|
||||
masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
|
||||
|
||||
# Pick your poison. Business seems to be mostly cross-linked articles. Premium and cross-linked
|
||||
# articles will be dropped.
|
||||
feeds = [
|
||||
('Breaking News', 'http://www.ajc.com/list/rss/online/ajc-auto-list-iphone-topnews/aFKq/'),
|
||||
('Metro and Georgia',
|
||||
'http://www.ajc.com/list/rss/news/local/news-georgia-and-region/aCxP/'),
|
||||
('Business', 'http://www.ajc.com/feeds/categories/business/'),
|
||||
('Health', 'http://www.ajc.com/feeds/categories/health/'),
|
||||
# ('Braves', 'http://www.ajc.com/list/rss/sports/baseball/atlanta-braves-news/aGpN/'),
|
||||
# ('Falcons', 'http://www.ajc.com/list/rss/sports/football/falcons-news/aGK4/'),
|
||||
# ('Georgia Tech Yellow Jackets', 'http://www.ajc.com/list/rss/sports/college/georgia-tech-headlines/aGK6/'),
|
||||
]
|
||||
|
||||
headline_reg_exp = '^.*cm-story-headline.*$'
|
||||
story_body_reg_exp = '^.*cm-story-body.*$'
|
||||
author_reg_exp = '^.*cm-story-author.*$'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': re.compile(
|
||||
headline_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class': 'cm-story-meta'}),
|
||||
dict(name='div', attrs={'class': re.compile(
|
||||
author_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='meta', attrs={'name': 'description'}),
|
||||
dict(name='div', attrs={'class': re.compile(
|
||||
story_body_reg_exp, re.IGNORECASE)}),
|
||||
]
|
||||
|
||||
premium_reg_exp = '^.*cmPremiumContent.*$'
|
||||
footer_reg_exp = '^.*cm-story-footer.*$'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': re.compile(
|
||||
footer_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class': 'cm-inline-related-group'})
|
||||
]
|
||||
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.cm-story-headline h1 { text-align: center; font-size: 175%; font-weight: bold; } \
|
||||
.cm-story-meta { font-size: 80%; } \
|
||||
.cm-related-caption, .cmPhotoImageAttribution, img { display: block; font-size: 75%; font-style: italic; text-align: center; margin: 5px auto;} \
|
||||
.cm-story-author { display: block; font-size: 80%; font-style: italic; }'
|
||||
|
||||
# I would love to remove these completely from the finished product, but I can't see how at the momemnt.
|
||||
# Retuning "None" from preprocess_html(soup) as suggested in mobileread
|
||||
# forums leads to errors.
|
||||
def preprocess_html(self, soup):
|
||||
premium = soup.find('div', attrs={'class': re.compile(
|
||||
self.premium_reg_exp, re.IGNORECASE)})
|
||||
if premium:
|
||||
return None
|
||||
crosslink = soup.find('a', attrs={'class': 'cm-feed-story-more-link'})
|
||||
if crosslink:
|
||||
return None
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
for meta in soup.findAll('meta', attrs={'name': 'description'}):
|
||||
article.text_summary = meta['content']
|
||||
article.summary = meta['content']
|
||||
|
||||
lead = soup.find('div', attrs={'class': 'cm-story-photo'})
|
||||
if lead:
|
||||
lead = lead.find('img')
|
||||
else:
|
||||
lead = soup.find('img')
|
||||
if lead:
|
||||
self.add_toc_thumbnail(article, lead['src'])
|
||||
names = ''
|
||||
comma = ''
|
||||
for div in soup.findAll('div', attrs={'class': re.compile(self.author_reg_exp, re.IGNORECASE)}):
|
||||
div.extract()
|
||||
for auth in div.findAll('a'):
|
||||
if auth.get('class') == 'cm-source-image':
|
||||
continue
|
||||
names = names + comma + auth.contents[0]
|
||||
comma = ', '
|
||||
article.author = names
|
||||
if len(names) > 0:
|
||||
tag = new_tag(soup, 'div', [('class', 'cm-story-author')])
|
||||
tag.append("by: ")
|
||||
tag.append(names)
|
||||
meta = soup.find('div', attrs={'class': 'cm-story-meta'})
|
||||
meta_idx = meta.parent.contents.index(meta)
|
||||
meta.parent.insert(meta_idx + 1, tag)
|
@ -1,22 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
ajiajin.com/blog
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AjiajinBlog(BasicNewsRecipe):
|
||||
title = u'Ajiajin blog'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
oldest_article = 5
|
||||
publication_type = 'blog'
|
||||
max_articles_per_feed = 100
|
||||
description = 'The next generation internet trends in Japan and Asia'
|
||||
publisher = ''
|
||||
category = 'internet, asia, japan'
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
|
||||
feeds = [(u'blog', u'http://feeds.feedburner.com/Asiajin')]
|
Before Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 405 B |
Before Width: | Height: | Size: 245 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 2.2 KiB |
Before Width: | Height: | Size: 830 B |
Before Width: | Height: | Size: 846 B |
Before Width: | Height: | Size: 136 B |