MiDDay by calibre Periodicals

This commit is contained in:
Kovid Goyal 2010-03-30 19:37:38 +05:30
parent 8767957773
commit 91a75aa0c9
10 changed files with 119 additions and 73 deletions

View File

@ -9,15 +9,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
class heiseDe(BasicNewsRecipe):
title = 'heise'
description = 'Computernews from Germany'
__author__ = 'Oliver Niesner'
use_embedded_content = False
language = 'de'
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 40
no_stylesheets = True
remove_tags = [dict(id='navi_top'),
dict(id='navi_bottom'),
dict(id='logo'),
@ -35,8 +36,8 @@ class heiseDe(BasicNewsRecipe):
dict(name='p', attrs={'class':'news_navi'}),
dict(name='div', attrs={'class':'news_foren'})]
remove_tags_after = [dict(name='div', attrs={'class':'news_foren'})]
feeds = [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ]
feeds = [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ]

View File

@ -4,7 +4,7 @@ import re
class SmeRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'Abelturd'
language = 'cz'
language = 'cs'
version = 1
title = u'iLiteratura.cz'

View File

@ -9,6 +9,7 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
description = 'Medical news'
timefmt = ' [%d %b, %Y]'
needs_subscription = True
language = 'en'
no_stylesheets = True
#remove_tags_before = dict(name='div', attrs={'align':'center'})

View File

@ -0,0 +1,13 @@
from calibre.web.feeds.news import CalibrePeriodical
class MiDDay(CalibrePeriodical):
title = 'MiDDay'
calibre_periodicals_slug = 'midday'
description = '''Get your dose of the latest news, views and fun - from the
world of politics, sports and Bollywood to the cartoons, comics and games of
the entertainment section - Indias leading tabloid has it all. To subscribe
visit <a href="http://news.calibre-ebook.com/periodical/midday">calibre
Periodicals</a>.'''
language = 'en_IN'

View File

@ -18,7 +18,7 @@ class NursingTimes(BasicNewsRecipe):
encoding = 'utf-8'
publisher = 'emap'
category = 'news, health, nursing, UK'
language = 'en-UK'
language = 'en_GB'
needs_subscription = True
LOGIN = 'http://www.nursingtimes.net/sign-in'

View File

@ -6,6 +6,7 @@ class TaNea(BasicNewsRecipe):
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
language = 'el'
remove_tags_before = dict(name='div',attrs={'id':'print-body'})
remove_tags_after = dict(name='div',attrs={'id':'text'})

View File

@ -8,43 +8,44 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe
class TelepolisNews(BasicNewsRecipe):
title = u'Telepolis (News)'
__author__ = 'Gerhard Aigner'
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
description = 'News from telepolis'
category = 'news'
oldest_article = 7
max_articles_per_feed = 100
recursion = 0
no_stylesheets = True
encoding = "utf-8"
title = u'Telepolis (News)'
__author__ = 'Gerhard Aigner'
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
description = 'News from telepolis'
category = 'news'
oldest_article = 7
max_articles_per_feed = 100
recursion = 0
no_stylesheets = True
encoding = "utf-8"
language = 'de_AT'
use_embedded_content = False
remove_empty_feeds = True
use_embedded_content = False
remove_empty_feeds = True
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})]
remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})]
keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})]
remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})]
feeds = [(u'News', u'http://www.heise.de/tp/news.rdf')]
feeds = [(u'News', u'http://www.heise.de/tp/news.rdf')]
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
]
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
def get_article_url(self, article):
'''if the linked article is of kind artikel don't take it'''
if (article.link.count('artikel') > 0) :
return None
return article.link
def get_article_url(self, article):
'''if the linked article is of kind artikel don't take it'''
if (article.link.count('artikel') > 0) :
return None
return article.link
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag)
return soup
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag)
return soup

View File

@ -8,36 +8,37 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe
class TelepolisArtikel(BasicNewsRecipe):
title = u'Telepolis (Artikel)'
__author__ = 'Gerhard Aigner'
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
category = 'news'
description = 'Telepolis Artikel'
oldest_article = 7
max_articles_per_feed = 100
recursion = 0
no_stylesheets = True
title = u'Telepolis (Artikel)'
__author__ = 'Gerhard Aigner'
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
category = 'news'
description = 'Telepolis Artikel'
language = 'de_AT'
oldest_article = 7
max_articles_per_feed = 100
recursion = 0
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
use_embedded_content = False
remove_empty_feeds = True
remove_tags_before = dict(name='h1')
remove_tags = [dict(name='img')]
remove_tags_before = dict(name='h1')
remove_tags = [dict(name='img')]
feeds = [(u'Artikel', u'http://www.heise.de/tp/rss/news-a.rdf')]
feeds = [(u'Artikel', u'http://www.heise.de/tp/rss/news-a.rdf')]
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher]
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
def print_version(self, url):
p = re.compile(r'\d{5}', re.DOTALL|re.IGNORECASE)
m = p.search(url)
return "http://www.heise.de/bin/tp/issue/r4/dl-artikel2.cgi?artikelnr="+ m.group() +"&mode=print"
def print_version(self, url):
p = re.compile(r'\d{5}', re.DOTALL|re.IGNORECASE)
m = p.search(url)
return "http://www.heise.de/bin/tp/issue/r4/dl-artikel2.cgi?artikelnr="+ m.group() +"&mode=print"

View File

@ -1359,25 +1359,51 @@ class AutomaticNewsRecipe(BasicNewsRecipe):
self.web2disk_options.keep_only_tags = []
return BasicNewsRecipe.fetch_embedded_article(self, article, dir, f, a, num_of_feeds)
class DownloadedNewsRecipe(BasicNewsRecipe):
class LoginFailed(ValueError):
pass
def get_downloaded_recipe(self):
'Return path on local filesystem to downloaded recipe'
raise NotImplementedError
class CalibrePeriodical(BasicNewsRecipe):
#: Set this to the slug for the calibre periodical
calibre_periodicals_slug = None
LOG_IN = 'http://news.calibre-ebook.com/accounts/login'
needs_subscription = True
__author__ = 'calibre Periodicals'
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open(self.LOG_IN)
br.select_form(name='login')
br['username'] = self.username
br['password'] = self.password
raw = br.submit().read()
if 'href="/my-account"' not in raw:
raise LoginFailed(
'Failed to log in, check your username and password for'
' the calibre Periodicals service.')
return br
def download(self):
import cStringIO
self.log('Fetching downloaded recipe')
rpath = self.get_downloaded_recipe()
raw = self.browser.open_novisit(
'http://news.calibre-ebook.com/subscribed_files/%s/0/temp.downloaded_recipe'
% self.calibre_periodicals_slug
).read()
f = cStringIO.StringIO(raw)
from calibre.utils.zipfile import ZipFile
zf = ZipFile(rpath)
zf = ZipFile(f)
zf.extractall()
zf.close()
from calibre.web.feeds.recipes import compile_recipe
from glob import glob
try:
recipe = compile_recipe(open(glob('*.downloaded_recipe')[0],
recipe = compile_recipe(open(glob('*.recipe')[0],
'rb').read())
self.conversion_options = recipe.conversion_options
except:
self.log.exception('Failed to compile downloaded recipe')
return os.path.abspath('index.html')

View File

@ -5,14 +5,16 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Builtin recipes.
'''
import re, imp, inspect, time, os
from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, \
AutomaticNewsRecipe, CalibrePeriodical
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre import __appname__, english_sort
BeautifulSoup, time, english_sort
basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe)
basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe,
CalibrePeriodical)
_tdir = None
_crep = 0
def compile_recipe(src):