From 29f33524ffb3b9af36bc41fd9b14cd9b4e1f4aa0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 5 Dec 2010 17:42:49 -0700 Subject: [PATCH] Fix #7810 (Updated recipe for The New Yorker) --- resources/recipes/new_yorker.recipe | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/resources/recipes/new_yorker.recipe b/resources/recipes/new_yorker.recipe index 1a2091cd52..0c95aa358d 100644 --- a/resources/recipes/new_yorker.recipe +++ b/resources/recipes/new_yorker.recipe @@ -22,8 +22,19 @@ class NewYorker(BasicNewsRecipe): masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif' extra_css = """ body {font-family: "Times New Roman",Times,serif} - .articleauthor{color: #9F9F9F; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase} - .rubric{color: #CD0021; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase} + .articleauthor{color: #9F9F9F; + font-family: Arial, sans-serif; + font-size: small; + text-transform: uppercase} + .rubric,.dd,h6#credit{color: #CD0021; + font-family: Arial, sans-serif; + font-size: small; + text-transform: uppercase} + .descender:first-letter{display: inline; font-size: xx-large; font-weight: bold} + .dd,h6#credit{color: gray} + .c{display: block} + .caption,h2#articleintro{font-style: italic} + .caption{font-size: small} """ conversion_options = { @@ -39,7 +50,7 @@ class NewYorker(BasicNewsRecipe): ] remove_tags = [ dict(name=['meta','iframe','base','link','embed','object']) - ,dict(attrs={'class':['utils','articleRailLinks','icons'] }) + ,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons'] }) ,dict(attrs={'id':['show-header','show-footer'] }) ] remove_attributes = ['lang'] @@ -59,3 +70,13 @@ class NewYorker(BasicNewsRecipe): cover_url = 'http://www.newyorker.com' + cover_item['src'].strip() return cover_url + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + auth = soup.find(attrs={'id':'articleauthor'}) + if auth: + alink = auth.find('a') + if alink and alink.string is not None: + txt = alink.string + alink.replaceWith(txt) + return soup