mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
66 lines
1.9 KiB
Python
66 lines
1.9 KiB
Python
#!/usr/bin/env python2
|
|
__license__ = 'GPL v3'
|
|
__author__ = 'Lorenzo Vigentini'
|
|
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
|
__version__ = 'v1.01'
|
|
__date__ = '14, January 2010'
|
|
|
|
'''
|
|
http://media.digitalartsonline.co.uk/
|
|
'''
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
from calibre.ptempfile import PersistentTemporaryFile
|
|
|
|
temp_files = []
|
|
articles_are_obfuscated = True
|
|
|
|
|
|
class digiArts(BasicNewsRecipe):
|
|
__author__ = 'Lorenzo Vigentini'
|
|
description = 'Digital Arts - comprehensive coverage of the art of graphic design, 3D, animation, video, effects, web and interactive design, in print and online.' # noqa
|
|
cover_url = 'http://media.digitalartsonline.co.uk/graphics/logo_digital_arts.gif'
|
|
|
|
title = 'Digital Arts Magazine '
|
|
publisher = 'IDG Communication'
|
|
category = 'Multimedia, photo, video, computing, product reviews, editing, cameras, production'
|
|
|
|
language = 'en'
|
|
encoding = 'cp1252'
|
|
timefmt = '[%a, %d %b, %Y]'
|
|
|
|
oldest_article = 30
|
|
max_articles_per_feed = 100
|
|
use_embedded_content = False
|
|
recursion = 10
|
|
|
|
remove_javascript = True
|
|
no_stylesheets = True
|
|
|
|
def get_obfuscated_article(self, url):
|
|
br = self.get_browser()
|
|
br.open(url + '&print')
|
|
|
|
response = br.follow_link(url, nr=0)
|
|
html = response.read()
|
|
|
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
|
self.temp_files[-1].write(html)
|
|
self.temp_files[-1].close()
|
|
return self.temp_files[-1].name
|
|
|
|
keep_only_tags = [
|
|
dict(name='div', attrs={'id': ['articleHeader', 'articleContent']})
|
|
]
|
|
|
|
remove_tags = [
|
|
dict(name='div', attrs={'class': ['submissionBar', 'mpuContainer']}),
|
|
dict(name='div', attrs={'id': ['articleSidebar', 'articleFooter']})
|
|
]
|
|
remove_tags_after = [
|
|
dict(name='p', attrs={'id': 'articlePageList'})
|
|
]
|
|
feeds = [
|
|
(u'Content', u'http://rss.feedsportal.com/c/662/f/8410/index.rss')
|
|
]
|