diff --git a/recipes/nme.recipe b/recipes/nme.recipe
index b5f685b911..d65f7fe619 100644
--- a/recipes/nme.recipe
+++ b/recipes/nme.recipe
@@ -1,25 +1,91 @@
+import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import browser
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.BeautifulSoup import Tag
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
title = u'New Musical Express Magazine'
- description = 'Author D.Asbury. UK Rock & Pop Mag. '
- __author__ = 'Dave Asbury'
- # last updated 17/5/13 News feed url altered
+ description = 'UK Rock & Pop Mag.'
+ __author__ = 'Dave Asbury, Inge Aning'
+ category = 'Music, Film, Tv'
+ publisher = 'Time Inc. (UK) Ltd.'
+ '''
+ ' updated 11/3/2015
+ ' feeds url
+ ' cover and masterhead url
+ ' fix for a bug that prevents some pages render
+ ' changes to website
+ '''
+
remove_empty_feeds = True
- remove_javascript = True
+ encoding = 'utf-8'
+ remove_javascript = True
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 20
- #auto_cleanup = True
- language = 'en_GB'
+ auto_cleanup = False
+ language = 'en'
compress_news_images = True
+ simultaneous_downloads = 20
+ use_embedded_content = False
+ recursions = 0
+
+ conversion_options = {
+ 'comment' : description
+ , 'tags' : category
+ , 'publisher' : publisher
+ , 'language' : language
+ }
+
+ feeds = [
+ (u'NME News',u'http://www.nme.com/rss/news'),
+ (u'Reviews',u'http://www.nme.com/rss/reviews'),
+ (u'Blogs',u'http://www.nme.com/rss/blogs'),
+ ]
+
+ keep_only_tags = [
+ dict(name='div',attrs={'id':'content'}),
+ ]
+
+ remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan',
+ 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
+
+ remove_tags = [
+ dict(name='meta'),
+ dict(name='span',attrs={'class':'article_info'}),
+ dict(name='div',attrs={'class':'breadcrumbs'}),
+ dict(name='div',attrs={'class':'mugshot'}),
+ dict(name='div',attrs={'class':'header'}),
+ dict(name='div',attrs={'class':re.compile('youtube.*',re.IGNORECASE)}),
+ dict(name='div',attrs={'class':re.compile('socialbuttons.*',re.IGNORECASE)}),
+ dict(name='div',attrs={'class':'clear_both'}),
+ dict(name='div',attrs={'class':re.compile('headline.*',re.IGNORECASE)}),
+ dict(name='div',attrs={'class':'member-signedout'}),
+ dict(name='div',attrs={'class':re.compile('prev_next.*',re.IGNORECASE)}),
+ dict(name='div',attrs={'class':re.compile('article_related.*',re.IGNORECASE)}),
+ dict(name='div',attrs={'class':re.compile('feature_bar.*',re.IGNORECASE)}),
+ dict(name='div',attrs={'class':re.compile('ebay.*',re.IGNORECASE)}),
+ dict(name='div',attrs={'id':re.compile('morenews.*',re.IGNORECASE)}),
+ dict(name='div',attrs={'id':re.compile('ticketspopup.*',re.IGNORECASE)}),
+ dict(name='div',attrs={'id':re.compile('ratemy_logprompt.*',re.IGNORECASE)}),
+ dict(name='div',attrs={'id':re.compile('related_artist.*',re.IGNORECASE)}),
+ dict(name='img',attrs={'class':re.compile('video_play_large.*',re.IGNORECASE)}),
+ dict(name='ul',attrs={'class':re.compile('prev_next.*',re.IGNORECASE)}),
+ dict(name='ul',attrs={'class':re.compile('nme_store.*',re.IGNORECASE)}),
+ dict(name='p',attrs={'class':re.compile('top',re.IGNORECASE)}),
+ dict(name='table',attrs={'class':re.compile('tickets.*',re.IGNORECASE)}),
+ ]
+
+ masthead_url = 'http://default.media.ipcdigital.co.uk/300/000001014/e1ab_orh100000w300/NME-logo.jpg'
+
def get_cover_url(self):
- soup = self.index_to_soup('http://www.nme.com/component/subscribe')
- cov = soup.find(attrs={'id' : 'magazine_cover'})
+ magazine_page_raw = self.index_to_soup('http://www.nme.com/magazine', raw=True)
+ magazine_page_raw = re.sub(r'', re.DOTALL|re.IGNORECASE), lambda h1: ''),
+ (re.compile(r'',re.IGNORECASE), lambda h2: ''),
+ (re.compile(r'p:not(.date){
+ font-weight:bold;
+ }
+ '''