From c68a5c8ab17f97f639328cfe2bcdba1a9f343640 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Mar 2019 08:06:25 +0530 Subject: [PATCH] Port Tag creation in recipes to work with any version of BeautifulSoup --- recipes/24sata.recipe | 11 +++++++++-- recipes/ajc.recipe | 9 ++++++++- recipes/ap.recipe | 9 ++++++++- recipes/bloomberg_columns.recipe | 9 ++++++++- recipes/boston.com.recipe | 9 ++++++++- recipes/buckmasters.recipe | 21 ++++++++++++++------- recipes/calgary_herald.recipe | 15 +++++++++++---- recipes/climate_progress.recipe | 11 +++++++++-- recipes/degentenaar.recipe | 11 +++++++++-- recipes/dnevni_avaz.recipe | 11 +++++++++-- recipes/dnevnik_cro.recipe | 11 +++++++++-- recipes/dunyahalleri.recipe | 19 +++++++++++++------ recipes/dunyahalleri_haftaninozeti.recipe | 15 +++++++++++---- recipes/economist.recipe | 11 +++++++++-- recipes/economist_free.recipe | 11 +++++++++-- recipes/edmonton_journal.recipe | 15 +++++++++++---- recipes/elperiodico_catalan.recipe | 9 ++++++++- recipes/elperiodico_spanish.recipe | 9 ++++++++- recipes/eltiempo_hn.recipe | 11 +++++++++-- recipes/estadao.recipe | 11 +++++++++-- recipes/fastcompany.recipe | 11 +++++++++-- recipes/fokkeensukke.recipe | 9 ++++++++- recipes/glennbeck.recipe | 9 ++++++++- recipes/hln.recipe | 11 +++++++++-- recipes/hoy.recipe | 9 ++++++++- recipes/hrt.recipe | 11 +++++++++-- recipes/independent.recipe | 9 ++++++++- recipes/joop.recipe | 9 ++++++++- recipes/jutarnji.recipe | 11 +++++++++-- recipes/laprensa_hn.recipe | 11 +++++++++-- recipes/latribuna.recipe | 11 +++++++++-- recipes/lavanguardia.recipe | 9 ++++++++- recipes/lenta_ru.recipe | 13 ++++++++++--- recipes/levante.recipe | 9 ++++++++- recipes/moneycontrol.recipe | 12 ++++++++++-- recipes/montreal_gazette.recipe | 15 +++++++++++---- recipes/nacional_cro.recipe | 11 +++++++++-- recipes/natgeo.recipe | 9 ++++++++- recipes/ncrnext.recipe | 13 ++++++++++--- recipes/new_yorker.recipe | 13 ++++++++++--- recipes/noaa.recipe | 11 +++++++++-- recipes/nrc.nl.recipe | 9 ++++++++- recipes/nspm.recipe | 13 ++++++++++--- recipes/nytimes.recipe | 9 ++++++++- recipes/nytimes_sub.recipe | 9 ++++++++- recipes/nzz_webpaper.recipe | 9 ++++++++- recipes/ottawa_citizen.recipe | 15 +++++++++++---- recipes/pagina_12_print_ed.recipe | 11 +++++++++-- recipes/pobjeda.recipe | 11 +++++++++-- recipes/pressonline.recipe | 9 ++++++++- recipes/revista_muy.recipe | 9 ++++++++- recipes/rts.recipe | 11 +++++++++-- recipes/sarajevo_x.recipe | 9 ++++++++- recipes/scmp.recipe | 9 ++++++++- recipes/southernstar.recipe | 11 +++++++++-- recipes/tijd.recipe | 11 +++++++++-- recipes/uncrate.recipe | 11 +++++++++-- recipes/vancouver_province.recipe | 15 +++++++++++---- recipes/vancouver_sun.recipe | 15 +++++++++++---- recipes/vecernji_list.recipe | 11 +++++++++-- recipes/vedomosti.recipe | 9 ++++++++- recipes/veintitres.recipe | 11 +++++++++-- recipes/vic_times.recipe | 11 +++++++++-- recipes/vrijnederland.recipe | 9 ++++++++- 64 files changed, 580 insertions(+), 131 deletions(-) diff --git a/recipes/24sata.recipe b/recipes/24sata.recipe index d7f080e139..487c29b1a0 100644 --- a/recipes/24sata.recipe +++ b/recipes/24sata.recipe @@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class Cro24Sata(BasicNewsRecipe): title = '24 Sata - Hr' __author__ = 'Darko Miletic' @@ -46,9 +53,9 @@ class Cro24Sata(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['lang'] = self.lang - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/ajc.recipe b/recipes/ajc.recipe index 0b884373b7..95be4ab47a 100644 --- a/recipes/ajc.recipe +++ b/recipes/ajc.recipe @@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class AdvancedUserRecipe1282101454(BasicNewsRecipe): now = datetime.datetime.now() title = 'The AJC' @@ -118,7 +125,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): comma = ', ' article.author = names if len(names) > 0: - tag = Tag(soup, 'div', [('class', 'cm-story-author')]) + tag = new_tag(soup, 'div', [('class', 'cm-story-author')]) tag.append("by: ") tag.append(names) meta = soup.find('div', attrs={'class': 'cm-story-meta'}) diff --git a/recipes/ap.recipe b/recipes/ap.recipe index 6d395af474..f3d232b7b4 100644 --- a/recipes/ap.recipe +++ b/recipes/ap.recipe @@ -17,6 +17,13 @@ def classes(classes): ) +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class AssociatedPress(BasicNewsRecipe): title = u'Associated Press' @@ -76,7 +83,7 @@ class AssociatedPress(BasicNewsRecipe): def preprocess_html(self, soup, *a): for meta in soup.findAll('meta', attrs=dict(name="twitter:image:alt")): for div in soup.findAll(**classes('LeadFeature')): - img = Tag(soup, 'img') + img = new_tag(soup, 'img') img['src'] = meta['content'] div.insert(0, img) return soup diff --git a/recipes/bloomberg_columns.recipe b/recipes/bloomberg_columns.recipe index 552e3278d0..301f26349e 100644 --- a/recipes/bloomberg_columns.recipe +++ b/recipes/bloomberg_columns.recipe @@ -127,6 +127,13 @@ class BloombergContributor: return self._name +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class BloombergContributors(BasicNewsRecipe): title = u'Bloomberg, Editorial Contributors' description = 'Articles from Bloomberg.com contributors' @@ -175,7 +182,7 @@ class BloombergContributors(BasicNewsRecipe): .strftime("%B %d, %Y %I:%M %p") + " UTC" except: parsed_time = time_stamp - insert_tag = Tag(soup, "p", [("class", "user-inserted")]) + insert_tag = new_tag(soup, "p", [("class", "user-inserted")]) insert_tag.insert(0, parsed_time) soup.time.replaceWith(insert_tag) diff --git a/recipes/boston.com.recipe b/recipes/boston.com.recipe index 05835bdfda..9f3fe59065 100644 --- a/recipes/boston.com.recipe +++ b/recipes/boston.com.recipe @@ -10,6 +10,13 @@ def classes(classes): 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class BostonGlobeSubscription(BasicNewsRecipe): title = "Boston Globe Subscription" @@ -204,7 +211,7 @@ class BostonGlobeSubscription(BasicNewsRecipe): imgLink = main.find("a", "comic") img = imgLink.img - body = Tag(soup, "body") + body = new_tag(soup, "body") body.insert(0, title) body.insert(1, byline) body.insert(2, img) diff --git a/recipes/buckmasters.recipe b/recipes/buckmasters.recipe index d72d2f52e1..ef198c0556 100644 --- a/recipes/buckmasters.recipe +++ b/recipes/buckmasters.recipe @@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class AdvancedUserRecipe1282101454(BasicNewsRecipe): title = 'BuckMasters In The Kitchen' language = 'en' @@ -28,15 +35,15 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): for img_tag in soup.findAll('img'): parent_tag = img_tag.parent if parent_tag.name == 'a': - new_tag = Tag(soup, 'p') - new_tag.insert(0, img_tag) - parent_tag.replaceWith(new_tag) + ntag = new_tag(soup, 'p') + ntag.insert(0, img_tag) + parent_tag.replaceWith(ntag) elif parent_tag.name == 'p': if not self.tag_to_string(parent_tag) == '': - new_div = Tag(soup, 'div') - new_tag = Tag(soup, 'p') - new_tag.insert(0, img_tag) + new_div = new_tag(soup, 'div') + ntag = new_tag(soup, 'p') + ntag.insert(0, img_tag) parent_tag.replaceWith(new_div) - new_div.insert(0, new_tag) + new_div.insert(0, ntag) new_div.insert(1, parent_tag) return soup diff --git a/recipes/calgary_herald.recipe b/recipes/calgary_herald.recipe index d3c4e9d554..a0e3be0642 100644 --- a/recipes/calgary_herald.recipe +++ b/recipes/calgary_herald.recipe @@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class CanWestPaper(BasicNewsRecipe): postmedia_index_pages = [ @@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe): pgall = soup.find('div', attrs={'id': 'storyphoto'}) if pgall is not None: # photo gallery perhaps if (soup.find('div', attrs={'id': 'storycontent'}) is None): - allpics = Tag(soup, 'div') + allpics = new_tag(soup, 'div') first_img = pgall.find('div', 'storyimage') if first_img is not None: first_img.extract() tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) if tlist is not None: for atag in tlist.findAll('a'): - img = Tag(soup, 'img') + img = new_tag(soup, 'img') srcpre, sep, srcpost = atag.img[ 'src'].partition('?') img['src'] = srcpre - pdesc = Tag(soup, 'p') + pdesc = new_tag(soup, 'p') pdesc.insert(0, atag.img['alt']) pdesc['class'] = 'photocaption' - div = Tag(soup, 'div') + div = new_tag(soup, 'div') div.insert(0, pdesc) div.insert(0, img) allpics.append(div) diff --git a/recipes/climate_progress.recipe b/recipes/climate_progress.recipe index db652acafc..09e6beb20f 100644 --- a/recipes/climate_progress.recipe +++ b/recipes/climate_progress.recipe @@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class ClimateProgress(BasicNewsRecipe): title = 'Climate Progress' __author__ = 'Darko Miletic' @@ -47,9 +54,9 @@ class ClimateProgress(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['lang'] = self.lang soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/degentenaar.recipe b/recipes/degentenaar.recipe index 2c08ee7859..1b64d86e45 100644 --- a/recipes/degentenaar.recipe +++ b/recipes/degentenaar.recipe @@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class DeGentenaarOnline(BasicNewsRecipe): title = 'De Gentenaar' __author__ = 'Darko Miletic' @@ -69,9 +76,9 @@ class DeGentenaarOnline(BasicNewsRecipe): soup.html['lang'] = self.lang soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/dnevni_avaz.recipe b/recipes/dnevni_avaz.recipe index 45c63466d6..45916f70a7 100644 --- a/recipes/dnevni_avaz.recipe +++ b/recipes/dnevni_avaz.recipe @@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class DnevniAvaz(BasicNewsRecipe): title = 'Dnevni Avaz' __author__ = 'Darko Miletic' @@ -57,9 +64,9 @@ class DnevniAvaz(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['xml:lang'] = self.lang soup.html['lang'] = self.lang - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/dnevnik_cro.recipe b/recipes/dnevnik_cro.recipe index 02f4a3bcd8..b0a948dab5 100644 --- a/recipes/dnevnik_cro.recipe +++ b/recipes/dnevnik_cro.recipe @@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class DnevnikCro(BasicNewsRecipe): title = 'Dnevnik - Hr' __author__ = 'Darko Miletic' @@ -58,9 +65,9 @@ class DnevnikCro(BasicNewsRecipe): item[attrib] = '' del item[attrib] - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/dunyahalleri.recipe b/recipes/dunyahalleri.recipe index af34bb1b1f..055bbabbff 100644 --- a/recipes/dunyahalleri.recipe +++ b/recipes/dunyahalleri.recipe @@ -16,6 +16,13 @@ __license__ = 'GPL v3' __copyright__ = '2017, sukru alatas / alatas.org' +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class DunyaHalleri(BasicNewsRecipe): title = 'Dünya Halleri' description = 'Gözden Kaçanlar Rehberi' @@ -78,20 +85,20 @@ class DunyaHalleri(BasicNewsRecipe): # title insert article_title = soup.title.contents[0] article_title.replace(' - Dünya Halleri'.decode('utf-8', 'replace'), '') - h2 = Tag(soup, 'h2') + h2 = new_tag(soup, 'h2') h2.append(article_title) span.insert(0, h2) # featured image insert meta = soup.findAll('meta', {'property': 'og:image'}, limit=1)[0] if meta: - img = Tag(soup, 'img') + img = new_tag(soup, 'img') img.attrs = [('src', meta['content'])] span.insert(1, img) # gallery normalization for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}): - p = Tag(soup, 'p') + p = new_tag(soup, 'p') for img in div.findAll('img'): img.attrs = [(key, value) for key, value in img.attrs if key in ['src']] @@ -102,9 +109,9 @@ class DunyaHalleri(BasicNewsRecipe): # this block finds the cover image for each embeded youtube video then # changes it to "a href" and "img" for iframe in soup.findAll('iframe'): - a = Tag(soup, 'a') - caption = Tag(soup, 'pre') - img = Tag(soup, 'img') + a = new_tag(soup, 'a') + caption = new_tag(soup, 'pre') + img = new_tag(soup, 'img') m = re.match( r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)' diff --git a/recipes/dunyahalleri_haftaninozeti.recipe b/recipes/dunyahalleri_haftaninozeti.recipe index 68007f8c8f..37d90bda4f 100644 --- a/recipes/dunyahalleri_haftaninozeti.recipe +++ b/recipes/dunyahalleri_haftaninozeti.recipe @@ -16,6 +16,13 @@ __license__ = 'GPL v3' __copyright__ = '2017, sukru alatas / alatas.org' +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe): title = 'Dünya Halleri - Haftanın Özeti' description = ('Geçen hafta boyunca Türkiye ve dünyadan haber,' @@ -156,7 +163,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe): def preprocess_html(self, soup): # gallery normalization for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}): - p = Tag(soup, 'p') + p = new_tag(soup, 'p') for img in div.findAll('img'): img.attrs = [(key, value) for key, value in img.attrs if key in ['src']] @@ -167,9 +174,9 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe): # this block finds the cover image for each embeded youtube video then # changes it to "a href" and "img" for iframe in soup.findAll('iframe'): - a = Tag(soup, 'a') - caption = Tag(soup, 'pre') - img = Tag(soup, 'img') + a = new_tag(soup, 'a') + caption = new_tag(soup, 'pre') + img = new_tag(soup, 'img') m = re.match( r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)' diff --git a/recipes/economist.recipe b/recipes/economist.recipe index 53cd62d2b0..0b9a789a5b 100644 --- a/recipes/economist.recipe +++ b/recipes/economist.recipe @@ -19,6 +19,13 @@ def classes(classes): 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class NoArticles(Exception): pass @@ -266,11 +273,11 @@ class Economist(BasicNewsRecipe): for table in list(self.eco_find_image_tables(soup)): caption = table.find('font') img = table.find('img') - div = Tag(soup, 'div') + div = new_tag(soup, 'div') div['style'] = 'text-align:left;font-size:70%' ns = NavigableString(self.tag_to_string(caption)) div.insert(0, ns) - div.insert(1, Tag(soup, 'br')) + div.insert(1, new_tag(soup, 'br')) del img['width'] del img['height'] img.extract() diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe index 53cd62d2b0..0b9a789a5b 100644 --- a/recipes/economist_free.recipe +++ b/recipes/economist_free.recipe @@ -19,6 +19,13 @@ def classes(classes): 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class NoArticles(Exception): pass @@ -266,11 +273,11 @@ class Economist(BasicNewsRecipe): for table in list(self.eco_find_image_tables(soup)): caption = table.find('font') img = table.find('img') - div = Tag(soup, 'div') + div = new_tag(soup, 'div') div['style'] = 'text-align:left;font-size:70%' ns = NavigableString(self.tag_to_string(caption)) div.insert(0, ns) - div.insert(1, Tag(soup, 'br')) + div.insert(1, new_tag(soup, 'br')) del img['width'] del img['height'] img.extract() diff --git a/recipes/edmonton_journal.recipe b/recipes/edmonton_journal.recipe index 9e7ae425bf..5e6b671dd3 100644 --- a/recipes/edmonton_journal.recipe +++ b/recipes/edmonton_journal.recipe @@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class CanWestPaper(BasicNewsRecipe): postmedia_index_pages = [ @@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe): pgall = soup.find('div', attrs={'id': 'storyphoto'}) if pgall is not None: # photo gallery perhaps if (soup.find('div', attrs={'id': 'storycontent'}) is None): - allpics = Tag(soup, 'div') + allpics = new_tag(soup, 'div') first_img = pgall.find('div', 'storyimage') if first_img is not None: first_img.extract() tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) if tlist is not None: for atag in tlist.findAll('a'): - img = Tag(soup, 'img') + img = new_tag(soup, 'img') srcpre, sep, srcpost = atag.img[ 'src'].partition('?') img['src'] = srcpre - pdesc = Tag(soup, 'p') + pdesc = new_tag(soup, 'p') pdesc.insert(0, atag.img['alt']) pdesc['class'] = 'photocaption' - div = Tag(soup, 'div') + div = new_tag(soup, 'div') div.insert(0, pdesc) div.insert(0, img) allpics.append(div) diff --git a/recipes/elperiodico_catalan.recipe b/recipes/elperiodico_catalan.recipe index 5ba20c3724..6d8be7749a 100644 --- a/recipes/elperiodico_catalan.recipe +++ b/recipes/elperiodico_catalan.recipe @@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class ElPeriodico_cat(BasicNewsRecipe): title = 'El Periodico de Catalunya' __author__ = 'Jordi Balcells/Darko Miletic' @@ -58,7 +65,7 @@ class ElPeriodico_cat(BasicNewsRecipe): return url.replace('/default.asp?', '/print.asp?') def preprocess_html(self, soup): - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mcharset) for item in soup.findAll(style=True): diff --git a/recipes/elperiodico_spanish.recipe b/recipes/elperiodico_spanish.recipe index dd5bc8f134..eb8f0fab6a 100644 --- a/recipes/elperiodico_spanish.recipe +++ b/recipes/elperiodico_spanish.recipe @@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class ElPeriodico_cat(BasicNewsRecipe): title = 'El Periodico de Catalunya' __author__ = 'Jordi Balcells/Darko Miletic' @@ -58,7 +65,7 @@ class ElPeriodico_cat(BasicNewsRecipe): return url.replace('/default.asp?', '/print.asp?') def preprocess_html(self, soup): - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mcharset) for item in soup.findAll(style=True): diff --git a/recipes/eltiempo_hn.recipe b/recipes/eltiempo_hn.recipe index 1a8d3808a3..8ca1264097 100644 --- a/recipes/eltiempo_hn.recipe +++ b/recipes/eltiempo_hn.recipe @@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class ElTiempoHn(BasicNewsRecipe): title = 'El Tiempo - Honduras' __author__ = 'Darko Miletic' @@ -36,9 +43,9 @@ class ElTiempoHn(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['lang'] = self.lang soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/estadao.recipe b/recipes/estadao.recipe index b2f8215528..5e7bf6b43f 100644 --- a/recipes/estadao.recipe +++ b/recipes/estadao.recipe @@ -6,6 +6,13 @@ from calibre.utils.magick import Image, PixelWand from urllib2 import Request, urlopen, URLError +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class Estadao(BasicNewsRecipe): THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here LANGUAGE = 'pt_br' @@ -76,11 +83,11 @@ class Estadao(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] if not soup.find(attrs={'http-equiv': 'Content-Language'}): - meta0 = Tag(soup, 'meta', [ + meta0 = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.LANGHTM)]) soup.head.insert(0, meta0) if not soup.find(attrs={'http-equiv': 'Content-Type'}): - meta1 = Tag(soup, 'meta', [ + meta1 = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=" + self.ENCHTM)]) soup.head.insert(0, meta1) return soup diff --git a/recipes/fastcompany.recipe b/recipes/fastcompany.recipe index cfd9dd65a6..e19510aff1 100644 --- a/recipes/fastcompany.recipe +++ b/recipes/fastcompany.recipe @@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class FastCompany(BasicNewsRecipe): title = 'Fast Company' __author__ = 'Darko Miletic' @@ -43,9 +50,9 @@ class FastCompany(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['xml:lang'] = self.lang soup.html['lang'] = self.lang - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/fokkeensukke.recipe b/recipes/fokkeensukke.recipe index 906b55c03d..d8fac65ff1 100644 --- a/recipes/fokkeensukke.recipe +++ b/recipes/fokkeensukke.recipe @@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class FokkeEnSukkeRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = 'kwetal' @@ -79,7 +86,7 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe): if img: title = img['alt'] - tag = Tag(soup, 'div', [('class', 'title')]) + tag = new_tag(soup, 'div', [('class', 'title')]) tag.insert(0, title) cartoon.insert(0, tag) diff --git a/recipes/glennbeck.recipe b/recipes/glennbeck.recipe index 0b40251333..186becf2f5 100644 --- a/recipes/glennbeck.recipe +++ b/recipes/glennbeck.recipe @@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, Comment +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class GlennBeckRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = 'kwetal' @@ -71,7 +78,7 @@ class GlennBeckRecipe(BasicNewsRecipe): if (txt.parent.name == 'body' and len(raw) > 0) and not (len(raw) == 6 and raw == ' '): # This is our content; ignore the rest. - para = Tag(freshSoup, 'p') + para = new_tag(freshSoup, 'p') para.append(raw) freshSoup.body.append(para) counter += 1 diff --git a/recipes/hln.recipe b/recipes/hln.recipe index f1369f049d..0ecc477c8a 100644 --- a/recipes/hln.recipe +++ b/recipes/hln.recipe @@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class HLN_be(BasicNewsRecipe): title = 'Het Belang Van Limburg' __author__ = 'Darko Miletic and Sujata Raman' @@ -53,9 +60,9 @@ class HLN_be(BasicNewsRecipe): del item['style'] soup.html['lang'] = self.lang soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/hoy.recipe b/recipes/hoy.recipe index 20e1b3ed22..104e04ab7c 100644 --- a/recipes/hoy.recipe +++ b/recipes/hoy.recipe @@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class Hoy(BasicNewsRecipe): title = 'HOY' __author__ = 'Fco Javier Nieto' @@ -61,7 +68,7 @@ class Hoy(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['dir'] = self.direction - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mcharset) for item in soup.findAll(style=True): diff --git a/recipes/hrt.recipe b/recipes/hrt.recipe index f84133e939..d2fcf3bcdc 100644 --- a/recipes/hrt.recipe +++ b/recipes/hrt.recipe @@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class HRT(BasicNewsRecipe): title = 'HRT: Vesti' __author__ = 'Darko Miletic' @@ -62,9 +69,9 @@ class HRT(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['xml:lang'] = self.lang soup.html['lang'] = self.lang - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/independent.recipe b/recipes/independent.recipe index 6ca4273895..a4569715fb 100644 --- a/recipes/independent.recipe +++ b/recipes/independent.recipe @@ -11,6 +11,13 @@ def classes(classes): 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class TheIndependentNew(BasicNewsRecipe): title = u'The Independent' @@ -65,7 +72,7 @@ class TheIndependentNew(BasicNewsRecipe): for li in div.findAll('li', attrs={'data-gallery-legend': True}): src = imgs.get(li['data-gallery-legend']) if src is not None: - img = Tag(soup, 'img') + img = new_tag(soup, 'img') img['src'] = src img['style'] = 'display:block' li.append(img) diff --git a/recipes/joop.recipe b/recipes/joop.recipe index d2cd92ef4c..4cbdf39eca 100644 --- a/recipes/joop.recipe +++ b/recipes/joop.recipe @@ -3,6 +3,13 @@ from calibre.ebooks.BeautifulSoup import Tag import re +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class JoopRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = 'kwetal' @@ -89,7 +96,7 @@ class JoopRecipe(BasicNewsRecipe): span = h2.find('span', 'info') if span: txt = span.find(text=True) - div = Tag(soup, 'div', attrs=[('class', 'joop_date')]) + div = new_tag(soup, 'div', attrs=[('class', 'joop_date')]) div.append(txt) h2.replaceWith(div) diff --git a/recipes/jutarnji.recipe b/recipes/jutarnji.recipe index 9397cb3067..ddfb0e6592 100644 --- a/recipes/jutarnji.recipe +++ b/recipes/jutarnji.recipe @@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class Jutarnji(BasicNewsRecipe): title = 'Jutarnji' __author__ = 'Darko Miletic' @@ -68,9 +75,9 @@ class Jutarnji(BasicNewsRecipe): item[attrib] = '' del item[attrib] - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/laprensa_hn.recipe b/recipes/laprensa_hn.recipe index 88c3e58c54..37e50e4b5a 100644 --- a/recipes/laprensa_hn.recipe +++ b/recipes/laprensa_hn.recipe @@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class LaPrensaHn(BasicNewsRecipe): title = 'La Prensa - Honduras' __author__ = 'Darko Miletic' @@ -46,9 +53,9 @@ class LaPrensaHn(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['lang'] = self.lang soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/latribuna.recipe b/recipes/latribuna.recipe index 3d8709739d..5176c4c4b1 100644 --- a/recipes/latribuna.recipe +++ b/recipes/latribuna.recipe @@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class LaTribuna(BasicNewsRecipe): title = 'La Tribuna - Honduras' __author__ = 'Darko Miletic' @@ -49,9 +56,9 @@ class LaTribuna(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['lang'] = self.lang soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/lavanguardia.recipe b/recipes/lavanguardia.recipe index c01dd3c57f..6165d1a648 100644 --- a/recipes/lavanguardia.recipe +++ b/recipes/lavanguardia.recipe @@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class LaVanguardia(BasicNewsRecipe): title = 'La Vanguardia Digital' __author__ = 'Darko Miletic' @@ -61,7 +68,7 @@ class LaVanguardia(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['dir'] = self.direction - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mcharset) for item in soup.findAll(style=True): diff --git a/recipes/lenta_ru.recipe b/recipes/lenta_ru.recipe index 583954ea75..a680013cae 100644 --- a/recipes/lenta_ru.recipe +++ b/recipes/lenta_ru.recipe @@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe import re +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class LentaRURecipe(BasicNewsRecipe): title = u'Lenta.ru: \u041d\u043e\u0432\u043e\u0441\u0442\u0438' __author__ = 'Nikolai Kotchetkov' @@ -113,7 +120,7 @@ class LentaRURecipe(BasicNewsRecipe): def postprocess_html(self, soup, first_fetch): - contents = Tag(soup, 'div') + contents = new_tag(soup, 'div') # Extract tags with given attributes extractElements = {'div': [{'id': 'readers-block'}]} @@ -155,13 +162,13 @@ class LentaRURecipe(BasicNewsRecipe): # Place article picture after date pic = soup.find('img') if pic: - picDiv = Tag(soup, 'div') + picDiv = new_tag(soup, 'div') picDiv['style'] = 'width: 100%; text-align: center;' pic.extract() picDiv.insert(0, pic) title = pic.get('title', None) if title: - titleDiv = Tag(soup, 'div') + titleDiv = new_tag(soup, 'div') titleDiv['style'] = 'font-size: 0.5em;' titleDiv.insert(0, title) picDiv.insert(1, titleDiv) diff --git a/recipes/levante.recipe b/recipes/levante.recipe index 33652afb00..ab236393cf 100644 --- a/recipes/levante.recipe +++ b/recipes/levante.recipe @@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class LevanteRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = 'kwetal' @@ -92,7 +99,7 @@ class LevanteRecipe(BasicNewsRecipe): # Nuke some real crappy html theirHead = soup.head theirHead.extract() - myHead = Tag(soup, 'head') + myHead = new_tag(soup, 'head') soup.insert(0, myHead) return soup diff --git a/recipes/moneycontrol.recipe b/recipes/moneycontrol.recipe index 8cc0cfdeac..1c893e6d44 100644 --- a/recipes/moneycontrol.recipe +++ b/recipes/moneycontrol.recipe @@ -1,4 +1,12 @@ from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + + +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) class MoneyControlRecipe(BasicNewsRecipe): @@ -37,7 +45,7 @@ class MoneyControlRecipe(BasicNewsRecipe): headline = soup.find('td', attrs = {'class': 'heading'}) if headline: - h1 = Tag(freshSoup, 'h1') + h1 = new_tag(freshSoup, 'h1') # Convert to string before adding it to the document! h1.append(self.tag_to_string(headline)) freshSoup.body.append(h1) @@ -47,7 +55,7 @@ class MoneyControlRecipe(BasicNewsRecipe): # We have some weird pagebreak marker here; it will not find all of them however continue - para = Tag(freshSoup, 'p') + para = new_tag(freshSoup, 'p') # Convert to string; this will loose all formatting but also all illegal markup para.append(self.tag_to_string(p)) diff --git a/recipes/montreal_gazette.recipe b/recipes/montreal_gazette.recipe index d1366c4c07..456f082dfd 100644 --- a/recipes/montreal_gazette.recipe +++ b/recipes/montreal_gazette.recipe @@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class CanWestPaper(BasicNewsRecipe): postmedia_index_pages = [ @@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe): pgall = soup.find('div', attrs={'id': 'storyphoto'}) if pgall is not None: # photo gallery perhaps if (soup.find('div', attrs={'id': 'storycontent'}) is None): - allpics = Tag(soup, 'div') + allpics = new_tag(soup, 'div') first_img = pgall.find('div', 'storyimage') if first_img is not None: first_img.extract() tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) if tlist is not None: for atag in tlist.findAll('a'): - img = Tag(soup, 'img') + img = new_tag(soup, 'img') srcpre, sep, srcpost = atag.img[ 'src'].partition('?') img['src'] = srcpre - pdesc = Tag(soup, 'p') + pdesc = new_tag(soup, 'p') pdesc.insert(0, atag.img['alt']) pdesc['class'] = 'photocaption' - div = Tag(soup, 'div') + div = new_tag(soup, 'div') div.insert(0, pdesc) div.insert(0, img) allpics.append(div) diff --git a/recipes/nacional_cro.recipe b/recipes/nacional_cro.recipe index e6c33008fa..e11c4028a4 100644 --- a/recipes/nacional_cro.recipe +++ b/recipes/nacional_cro.recipe @@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class NacionalCro(BasicNewsRecipe): title = 'Nacional - Hr' __author__ = 'Darko Miletic' @@ -44,9 +51,9 @@ class NacionalCro(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['lang'] = self.lang soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/natgeo.recipe b/recipes/natgeo.recipe index f185b3c731..af9b4e4c9f 100644 --- a/recipes/natgeo.recipe +++ b/recipes/natgeo.recipe @@ -13,6 +13,13 @@ def classes(classes): 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class NatGeo(BasicNewsRecipe): title = u'National Geographic' description = 'Daily news articles from The National Geographic' @@ -64,7 +71,7 @@ class NatGeo(BasicNewsRecipe): idx = url.find('.jpg/{width') if idx != -1: url = url[:idx + 4] - img = Tag(soup, "img") + img = new_tag(soup, "img") img['src'] = url div.append(img) diff --git a/recipes/ncrnext.recipe b/recipes/ncrnext.recipe index 96d3337703..a28879028f 100644 --- a/recipes/ncrnext.recipe +++ b/recipes/ncrnext.recipe @@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class NrcNextRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = 'kwetal' @@ -100,20 +107,20 @@ class NrcNextRecipe(BasicNewsRecipe): if tag: h2 = tag.find('h2', 'vlag') if h2: - new_h2 = Tag(soup, 'h2', attrs=[('class', 'vlag')]) + new_h2 = new_tag(soup, 'h2', attrs=[('class', 'vlag')]) new_h2.append(self.tag_to_string(h2)) h2.replaceWith(new_h2) else: h2 = tag.find('h2') if h2: - new_h2 = Tag(soup, 'h2', attrs=[ + new_h2 = new_tag(soup, 'h2', attrs=[ ('class', 'sub_title')]) new_h2.append(self.tag_to_string(h2)) h2.replaceWith(new_h2) h1 = tag.find('h1') if h1: - new_h1 = Tag(soup, 'h1') + new_h1 = new_tag(soup, 'h1') new_h1.append(self.tag_to_string(h1)) h1.replaceWith(new_h1) diff --git a/recipes/new_yorker.recipe b/recipes/new_yorker.recipe index 93f5d16203..d19cd42c86 100644 --- a/recipes/new_yorker.recipe +++ b/recipes/new_yorker.recipe @@ -23,6 +23,13 @@ def absurl(x): return x +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class NewYorker(BasicNewsRecipe): title = u'New Yorker Magazine' @@ -114,12 +121,12 @@ class NewYorker(BasicNewsRecipe): title = soup.find('meta', itemprop='name') if title: if self.featured_image: - img = Tag(soup, 'img') + img = new_tag(soup, 'img') img['src'] = self.featured_image - div = Tag(soup, 'div') + div = new_tag(soup, 'div') div.append(img) body.insert(0, div) - h1 = Tag(soup, 'h1') + h1 = new_tag(soup, 'h1') h1.append(title.get('content')) body.insert(0, h1) for attr in 'srcset data-src-mobile'.split(): diff --git a/recipes/noaa.recipe b/recipes/noaa.recipe index 87377dca72..c156d215eb 100644 --- a/recipes/noaa.recipe +++ b/recipes/noaa.recipe @@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class NOAA(BasicNewsRecipe): title = 'NOAA Online' __author__ = 'Darko Miletic' @@ -32,9 +39,9 @@ class NOAA(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['xml:lang'] = self.lang soup.html['lang'] = self.lang - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/nrc.nl.recipe b/recipes/nrc.nl.recipe index c3993c6e69..cfa3751b71 100644 --- a/recipes/nrc.nl.recipe +++ b/recipes/nrc.nl.recipe @@ -8,6 +8,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class Pagina12(BasicNewsRecipe): title = 'NRC' __author__ = 'Darko Miletic' @@ -42,7 +49,7 @@ class Pagina12(BasicNewsRecipe): div = soup.find( 'div', attrs={'class': lambda x: x and 'featured-img' in x}) if div is not None: - img = Tag(soup, 'img') + img = new_tag(soup, 'img') img['src'] = src div.append(img) return soup diff --git a/recipes/nspm.recipe b/recipes/nspm.recipe index f2e41e9d1d..ca1089ff7e 100644 --- a/recipes/nspm.recipe +++ b/recipes/nspm.recipe @@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import NavigableString, Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class Nspm(BasicNewsRecipe): title = 'Nova srpska politicka misao' __author__ = 'Darko Miletic' @@ -62,7 +69,7 @@ class Nspm(BasicNewsRecipe): def preprocess_html(self, soup): atitle = soup.body.find('a', attrs={'class': 'contentpagetitle'}) if atitle: - cleanTitle = Tag(soup, 'h1', [('class', 'contentpagetitle')]) + cleanTitle = new_tag(soup, 'h1', [('class', 'contentpagetitle')]) cnt = NavigableString(self.tag_to_string(atitle)) cleanTitle.append(cnt) @@ -73,12 +80,12 @@ class Nspm(BasicNewsRecipe): crdate = soup.body.find('td', attrs={'class': 'createdate'}) if crdate: - cleanCrdate = Tag(soup, 'div', [('class', 'createdate')]) + cleanCrdate = new_tag(soup, 'div', [('class', 'createdate')]) cnt = NavigableString(self.tag_to_string(crdate)) cleanCrdate.append(cnt) # get the dependant element - artText = Tag(soup, 'div', [('class', 'text')]) + artText = new_tag(soup, 'div', [('class', 'text')]) textHolderp = crdate.parent textHolder = textHolderp.nextSibling while textHolder and (not isinstance(textHolder, Tag) or (textHolder.name != textHolderp.name)): diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe index 8ef35ee375..53cd997b4e 100644 --- a/recipes/nytimes.recipe +++ b/recipes/nytimes.recipe @@ -67,6 +67,13 @@ def classes(classes): 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class NewYorkTimes(BasicNewsRecipe): if is_web_edition: @@ -116,7 +123,7 @@ class NewYorkTimes(BasicNewsRecipe): keep_only_tags = [ dict(id='story'), ] - body = Tag(soup, 'body') + body = new_tag(soup, 'body') for spec in keep_only_tags: for tag in soup.find('body').findAll(**spec): body.insert(len(body.contents), tag) diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index 0670acc39f..6dfa18d188 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -67,6 +67,13 @@ def classes(classes): 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class NewYorkTimes(BasicNewsRecipe): if is_web_edition: @@ -116,7 +123,7 @@ class NewYorkTimes(BasicNewsRecipe): keep_only_tags = [ dict(id='story'), ] - body = Tag(soup, 'body') + body = new_tag(soup, 'body') for spec in keep_only_tags: for tag in soup.find('body').findAll(**spec): body.insert(len(body.contents), tag) diff --git a/recipes/nzz_webpaper.recipe b/recipes/nzz_webpaper.recipe index f65d4af015..8847e26547 100644 --- a/recipes/nzz_webpaper.recipe +++ b/recipes/nzz_webpaper.recipe @@ -13,6 +13,13 @@ from calibre.ptempfile import PersistentTemporaryFile from calibre.web.feeds.recipes import BasicNewsRecipe +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class Nzz(BasicNewsRecipe): title = 'NZZ Webpaper' __author__ = 'Bernd Leinfelder' @@ -56,7 +63,7 @@ class Nzz(BasicNewsRecipe): for span in soup.findAll('span', attrs={'data-src-640': True}): imgSrc = span['data-src-640'] # print "image source: "+ imgSrc - imgTag = Tag(soup, "img", [("src", imgSrc)]) + imgTag = new_tag(soup, "img", [("src", imgSrc)]) span.replaceWith(imgTag) # print soup.prettify() diff --git a/recipes/ottawa_citizen.recipe b/recipes/ottawa_citizen.recipe index a5f16c5bef..ac5f4ca441 100644 --- a/recipes/ottawa_citizen.recipe +++ b/recipes/ottawa_citizen.recipe @@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class CanWestPaper(BasicNewsRecipe): postmedia_index_pages = [ @@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe): pgall = soup.find('div', attrs={'id': 'storyphoto'}) if pgall is not None: # photo gallery perhaps if (soup.find('div', attrs={'id': 'storycontent'}) is None): - allpics = Tag(soup, 'div') + allpics = new_tag(soup, 'div') first_img = pgall.find('div', 'storyimage') if first_img is not None: first_img.extract() tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) if tlist is not None: for atag in tlist.findAll('a'): - img = Tag(soup, 'img') + img = new_tag(soup, 'img') srcpre, sep, srcpost = atag.img[ 'src'].partition('?') img['src'] = srcpre - pdesc = Tag(soup, 'p') + pdesc = new_tag(soup, 'p') pdesc.insert(0, atag.img['alt']) pdesc['class'] = 'photocaption' - div = Tag(soup, 'div') + div = new_tag(soup, 'div') div.insert(0, pdesc) div.insert(0, img) allpics.append(div) diff --git a/recipes/pagina_12_print_ed.recipe b/recipes/pagina_12_print_ed.recipe index fd25d69b3a..0c1f38bc5b 100644 --- a/recipes/pagina_12_print_ed.recipe +++ b/recipes/pagina_12_print_ed.recipe @@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag, NavigableString +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class Pagina12(BasicNewsRecipe): title = 'Pagina/12 - Edicion Impresa' @@ -84,10 +91,10 @@ class Pagina12(BasicNewsRecipe): if img is not None: img.extract() caption = self.tag_to_string(table).strip() - div = Tag(soup, 'div') + div = new_tag(soup, 'div') div['style'] = 'text-align:center' div.insert(0, img) - div.insert(1, Tag(soup, 'br')) + div.insert(1, new_tag(soup, 'br')) if caption: div.insert(2, NavigableString(caption)) table.replaceWith(div) diff --git a/recipes/pobjeda.recipe b/recipes/pobjeda.recipe index 5671bf30dc..1bb4c1813e 100644 --- a/recipes/pobjeda.recipe +++ b/recipes/pobjeda.recipe @@ -13,6 +13,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class Pobjeda(BasicNewsRecipe): title = 'Pobjeda Online' __author__ = 'Darko Miletic' @@ -59,9 +66,9 @@ class Pobjeda(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['xml:lang'] = self.lang soup.html['lang'] = self.lang - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/pressonline.recipe b/recipes/pressonline.recipe index bdae80d3fc..98f9c84147 100644 --- a/recipes/pressonline.recipe +++ b/recipes/pressonline.recipe @@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class PressOnline(BasicNewsRecipe): title = 'Press Online' __author__ = 'Darko Miletic' @@ -60,7 +67,7 @@ class PressOnline(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['lang'] = self.lang soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) soup.head.insert(0, mlang) return self.adeify_images(soup) diff --git a/recipes/revista_muy.recipe b/recipes/revista_muy.recipe index 7e344efd8f..ed5b087861 100644 --- a/recipes/revista_muy.recipe +++ b/recipes/revista_muy.recipe @@ -3,6 +3,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class RevistaMuyInteresante(BasicNewsRecipe): title = 'Revista Muy Interesante' @@ -24,7 +31,7 @@ class RevistaMuyInteresante(BasicNewsRecipe): for img_tag in soup.findAll('img'): imagen = img_tag - new_tag = Tag(soup, 'p') + new_tag = new_tag(soup, 'p') img_tag.replaceWith(new_tag) div = soup.find(attrs={'class': 'article_category'}) div.insert(0, imagen) diff --git a/recipes/rts.recipe b/recipes/rts.recipe index 29c10f168b..2d42bea682 100644 --- a/recipes/rts.recipe +++ b/recipes/rts.recipe @@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class RTS(BasicNewsRecipe): title = 'RTS: Vesti' __author__ = 'Darko Miletic' @@ -50,9 +57,9 @@ class RTS(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['xml:lang'] = self.lang soup.html['lang'] = self.lang - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/sarajevo_x.recipe b/recipes/sarajevo_x.recipe index 5fa8181608..21524ecab6 100644 --- a/recipes/sarajevo_x.recipe +++ b/recipes/sarajevo_x.recipe @@ -10,6 +10,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag, NavigableString +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class SarajevoX(BasicNewsRecipe): title = 'Sarajevo-x.com' __author__ = 'Darko Miletic' @@ -57,7 +64,7 @@ class SarajevoX(BasicNewsRecipe): if sp: sp else: - mtag = Tag(soup, 'div', [ + mtag = new_tag(soup, 'div', [ ("id", "opisslike"), ("class", "opscitech")]) mopis = NavigableString("Opis") mtag.insert(0, mopis) diff --git a/recipes/scmp.recipe b/recipes/scmp.recipe index 46c5d24aad..735ac9a79f 100644 --- a/recipes/scmp.recipe +++ b/recipes/scmp.recipe @@ -12,6 +12,13 @@ def classes(classes): 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class SCMP(BasicNewsRecipe): title = 'South China Morning Post' __author__ = 'llam' @@ -71,7 +78,7 @@ class SCMP(BasicNewsRecipe): wrapper = soup.find(**classes('image-wrapper__placeholder')) if wrapper is not None: p = wrapper.parent - img = Tag(soup, 'img') + img = new_tag(soup, 'img') img['src'] = meta['content'] p.append(img) wrapper.extract() diff --git a/recipes/southernstar.recipe b/recipes/southernstar.recipe index 38ed08970b..8bcd12e885 100644 --- a/recipes/southernstar.recipe +++ b/recipes/southernstar.recipe @@ -14,6 +14,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag, NavigableString +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class TheSouthernStar(BasicNewsRecipe): title = 'The Southern Star' @@ -117,10 +124,10 @@ class TheSouthernStar(BasicNewsRecipe): if img is not None: img.extract() caption = self.tag_to_string(table).strip() - div = Tag(soup, 'div') + div = new_tag(soup, 'div') div['style'] = 'text-align:center' div.insert(0, img) - div.insert(1, Tag(soup, 'br')) + div.insert(1, new_tag(soup, 'br')) if caption: div.insert(2, NavigableString(caption)) table.replaceWith(div) diff --git a/recipes/tijd.recipe b/recipes/tijd.recipe index a2dd9aa7d4..86eff286c4 100644 --- a/recipes/tijd.recipe +++ b/recipes/tijd.recipe @@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class DeTijd(BasicNewsRecipe): title = 'De Tijd' __author__ = 'Darko Miletic' @@ -68,11 +75,11 @@ class DeTijd(BasicNewsRecipe): del item['style'] soup.html['lang'] = self.lang soup.html['dir'] = self.direction - mlang = Tag( + mlang = new_tag( soup, 'meta', [("http-equiv", "Content-Language"), ("content", self.lang)] ) - mcharset = Tag( + mcharset = new_tag( soup, 'meta', [("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")] ) diff --git a/recipes/uncrate.recipe b/recipes/uncrate.recipe index 09a8ad079d..cb174b8892 100644 --- a/recipes/uncrate.recipe +++ b/recipes/uncrate.recipe @@ -10,6 +10,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class Uncrate(BasicNewsRecipe): title = 'Uncrate' __author__ = 'Darko Miletic' @@ -47,9 +54,9 @@ class Uncrate(BasicNewsRecipe): feeds = [(u'Articles', u'http://feeds.feedburner.com/uncrate')] def preprocess_html(self, soup): - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/vancouver_province.recipe b/recipes/vancouver_province.recipe index 588133290d..39ce9681e6 100644 --- a/recipes/vancouver_province.recipe +++ b/recipes/vancouver_province.recipe @@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class CanWestPaper(BasicNewsRecipe): postmedia_index_pages = [ @@ -231,21 +238,21 @@ class CanWestPaper(BasicNewsRecipe): pgall = soup.find('div', attrs={'id': 'storyphoto'}) if pgall is not None: # photo gallery perhaps if (soup.find('div', attrs={'id': 'storycontent'}) is None): - allpics = Tag(soup, 'div') + allpics = new_tag(soup, 'div') first_img = pgall.find('div', 'storyimage') if first_img is not None: first_img.extract() tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) if tlist is not None: for atag in tlist.findAll('a'): - img = Tag(soup, 'img') + img = new_tag(soup, 'img') srcpre, sep, srcpost = atag.img[ 'src'].partition('?') img['src'] = srcpre - pdesc = Tag(soup, 'p') + pdesc = new_tag(soup, 'p') pdesc.insert(0, atag.img['alt']) pdesc['class'] = 'photocaption' - div = Tag(soup, 'div') + div = new_tag(soup, 'div') div.insert(0, pdesc) div.insert(0, img) allpics.append(div) diff --git a/recipes/vancouver_sun.recipe b/recipes/vancouver_sun.recipe index e0c9e4fdce..45e2b8f9b5 100644 --- a/recipes/vancouver_sun.recipe +++ b/recipes/vancouver_sun.recipe @@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class CanWestPaper(BasicNewsRecipe): compress_news_images = True @@ -219,21 +226,21 @@ class CanWestPaper(BasicNewsRecipe): pgall = soup.find('div', attrs={'id': 'storyphoto'}) if pgall is not None: # photo gallery perhaps if (soup.find('div', attrs={'id': 'storycontent'}) is None): - allpics = Tag(soup, 'div') + allpics = new_tag(soup, 'div') first_img = pgall.find('div', 'storyimage') if first_img is not None: first_img.extract() tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) if tlist is not None: for atag in tlist.findAll('a'): - img = Tag(soup, 'img') + img = new_tag(soup, 'img') srcpre, sep, srcpost = atag.img[ 'src'].partition('?') img['src'] = srcpre - pdesc = Tag(soup, 'p') + pdesc = new_tag(soup, 'p') pdesc.insert(0, atag.img['alt']) pdesc['class'] = 'photocaption' - div = Tag(soup, 'div') + div = new_tag(soup, 'div') div.insert(0, pdesc) div.insert(0, img) allpics.append(div) diff --git a/recipes/vecernji_list.recipe b/recipes/vecernji_list.recipe index ae627c155e..95a7a0cc70 100644 --- a/recipes/vecernji_list.recipe +++ b/recipes/vecernji_list.recipe @@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class VecernjiList(BasicNewsRecipe): title = 'Vecernji List' __author__ = 'Darko Miletic' @@ -48,9 +55,9 @@ class VecernjiList(BasicNewsRecipe): soup.html['lang'] = self.lang soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/vedomosti.recipe b/recipes/vedomosti.recipe index f3e67fef11..0270e221b1 100644 --- a/recipes/vedomosti.recipe +++ b/recipes/vedomosti.recipe @@ -9,6 +9,13 @@ from calibre.ebooks.BeautifulSoup import Tag from calibre.web.feeds.news import BasicNewsRecipe +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class VedomostiRecipe(BasicNewsRecipe): title = u'Ведомости' __author__ = 'Nikolai Kotchetkov' @@ -145,7 +152,7 @@ class VedomostiRecipe(BasicNewsRecipe): if newstop: img = newstop.find('img') if img: - imgDiv = Tag(soup, 'div') + imgDiv = new_tag(soup, 'div') imgDiv['class'] = 'article_img' if img.get('width'): diff --git a/recipes/veintitres.recipe b/recipes/veintitres.recipe index 372ce2fb91..a3f3618954 100644 --- a/recipes/veintitres.recipe +++ b/recipes/veintitres.recipe @@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class Veintitres(BasicNewsRecipe): title = 'Veintitres' __author__ = 'Darko Miletic' @@ -52,9 +59,9 @@ class Veintitres(BasicNewsRecipe): del item['style'] soup.html['lang'] = self.lang soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ + mlang = new_tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ + mcharset = new_tag(soup, 'meta', [ ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) soup.head.insert(0, mlang) soup.head.insert(1, mcharset) diff --git a/recipes/vic_times.recipe b/recipes/vic_times.recipe index 822b50f503..148985c43e 100644 --- a/recipes/vic_times.recipe +++ b/recipes/vic_times.recipe @@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class TimesColonist(BasicNewsRecipe): # Customization -- remove sections you don't want. @@ -179,7 +186,7 @@ class TimesColonist(BasicNewsRecipe): authstr = re.sub('/ *Times Colonist', '/', authstr, flags=re.IGNORECASE) authstr = re.sub('BY */', '', authstr, flags=re.IGNORECASE) - newdiv = Tag(soup, 'div') + newdiv = new_tag(soup, 'div') newdiv.insert(0, authstr) newdiv['class'] = 'byline' byline.replaceWith(newdiv) @@ -187,7 +194,7 @@ class TimesColonist(BasicNewsRecipe): capstr = self.tag_to_string(caption, False) capstr = re.sub('Photograph by.*$', '', capstr, flags=re.IGNORECASE) - newdiv = Tag(soup, 'div') + newdiv = new_tag(soup, 'div') newdiv.insert(0, capstr) newdiv['class'] = 'caption' caption.replaceWith(newdiv) diff --git a/recipes/vrijnederland.recipe b/recipes/vrijnederland.recipe index 26be59406f..bd5c7e80ce 100644 --- a/recipes/vrijnederland.recipe +++ b/recipes/vrijnederland.recipe @@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class VrijNederlandRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = 'kwetal' @@ -73,7 +80,7 @@ class VrijNederlandRecipe(BasicNewsRecipe): # altogether theirHead = soup.head theirHead.extract() - myHead = Tag(soup, 'head') + myHead = new_tag(soup, 'head') soup.insert(0, myHead) return soup