mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Fix Wired recipe
This commit is contained in:
parent
e89c1392a1
commit
3829b4c20f
@ -2,35 +2,24 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Wired(BasicNewsRecipe):
|
class Wired(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Wired.com'
|
title = 'Wired.com'
|
||||||
__author__ = 'David Chen <SonyReader<at>DaveChen<dot>org>'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'Technology news'
|
description = 'Technology news'
|
||||||
timefmt = ' [%Y%b%d %H%M]'
|
timefmt = ' [%Y%b%d %H%M]'
|
||||||
language = _('English')
|
language = _('English')
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
#html2lrf_options = ['--base-font-size', '16']
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
remove_tags_before = dict(name='div', id='content')
|
||||||
|
remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
|
||||||
[
|
'footer', 'advertisement', 'blog_subscription_unit',
|
||||||
|
'brightcove_component']),
|
||||||
## Remove any banners/links/ads/cruft before the body of the article.
|
{'class':'entryActions'},
|
||||||
(r'<body.*?((<div id="article_body">)|(<div id="st-page-maincontent">)|(<div id="containermain">)|(<p class="ap-story-p">)|(<!-- img_nav -->))', lambda match: '<body><div>'),
|
dict(name=['noscript', 'script'])]
|
||||||
|
|
||||||
## Remove any links/ads/comments/cruft from the end of the body of the article.
|
|
||||||
(r'((<!-- end article content -->)|(<div id="st-custom-afterpagecontent">)|(<p class="ap-story-p">©)|(<div class="entry-footer">)|(<div id="see_also">)|(<p>Via <a href=)|(<div id="ss_nav">)).*?</html>', lambda match : '</div></body></html>'),
|
|
||||||
|
|
||||||
## Correctly embed in-line images by removing the surrounding javascript that will be ignored in the conversion
|
|
||||||
(r'<a.*?onclick.*?>.*?(<img .*?>)', lambda match: match.group(1),),
|
|
||||||
|
|
||||||
]
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Top News', 'http://feeds.wired.com/wired/index'),
|
('Top News', 'http://feeds.wired.com/wired/index'),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user