from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag def new_tag(soup, name, attrs=()): impl = getattr(soup, 'new_tag', None) if impl is not None: return impl(name, attrs=dict(attrs)) return Tag(soup, name, attrs=attrs or None) class AdvancedUserRecipe1282101454(BasicNewsRecipe): title = 'BuckMasters In The Kitchen' language = 'en' __author__ = 'TonytheBookworm & Starson17' description = 'Learn how to cook all those outdoor varments' publisher = 'BuckMasters.com' category = 'food,cooking,recipes' oldest_article = 365 max_articles_per_feed = 100 conversion_options = {'linearize_tables': True} masthead_url = 'http://www.buckmasters.com/Portals/_default/Skins/BM_10/images/header_bg.jpg' keep_only_tags = [ dict(name='table', attrs={'class': ['containermaster_black']}) ] remove_tags_after = [dict(name='div', attrs={'align': ['left']})] feeds = [ ('Recipes', 'http://www.buckmasters.com/DesktopModules/DnnForge%20-%20NewsArticles/RSS.aspx?TabID=292&ModuleID=658&MaxCount=25'), ] def preprocess_html(self, soup): item = soup.find('a', attrs={'class': ['MenuTopSelected']}) if item: item.parent.extract() for img_tag in soup.findAll('img'): parent_tag = img_tag.parent if parent_tag.name == 'a': ntag = new_tag(soup, 'p') ntag.insert(0, img_tag) parent_tag.replaceWith(ntag) elif parent_tag.name == 'p': if not self.tag_to_string(parent_tag) == '': new_div = new_tag(soup, 'div') ntag = new_tag(soup, 'p') ntag.insert(0, img_tag) parent_tag.replaceWith(new_div) new_div.insert(0, ntag) new_div.insert(1, parent_tag) return soup