Port Tag creation in recipes to work with any version of BeautifulSoup

This commit is contained in:
Kovid Goyal 2019-03-23 08:06:25 +05:30
parent 930624d2be
commit c68a5c8ab1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
64 changed files with 580 additions and 131 deletions

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Cro24Sata(BasicNewsRecipe): class Cro24Sata(BasicNewsRecipe):
title = '24 Sata - Hr' title = '24 Sata - Hr'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -46,9 +53,9 @@ class Cro24Sata(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class AdvancedUserRecipe1282101454(BasicNewsRecipe): class AdvancedUserRecipe1282101454(BasicNewsRecipe):
now = datetime.datetime.now() now = datetime.datetime.now()
title = 'The AJC' title = 'The AJC'
@ -118,7 +125,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
comma = ', ' comma = ', '
article.author = names article.author = names
if len(names) > 0: if len(names) > 0:
tag = Tag(soup, 'div', [('class', 'cm-story-author')]) tag = new_tag(soup, 'div', [('class', 'cm-story-author')])
tag.append("by: ") tag.append("by: ")
tag.append(names) tag.append(names)
meta = soup.find('div', attrs={'class': 'cm-story-meta'}) meta = soup.find('div', attrs={'class': 'cm-story-meta'})

View File

@ -17,6 +17,13 @@ def classes(classes):
) )
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class AssociatedPress(BasicNewsRecipe): class AssociatedPress(BasicNewsRecipe):
title = u'Associated Press' title = u'Associated Press'
@ -76,7 +83,7 @@ class AssociatedPress(BasicNewsRecipe):
def preprocess_html(self, soup, *a): def preprocess_html(self, soup, *a):
for meta in soup.findAll('meta', attrs=dict(name="twitter:image:alt")): for meta in soup.findAll('meta', attrs=dict(name="twitter:image:alt")):
for div in soup.findAll(**classes('LeadFeature')): for div in soup.findAll(**classes('LeadFeature')):
img = Tag(soup, 'img') img = new_tag(soup, 'img')
img['src'] = meta['content'] img['src'] = meta['content']
div.insert(0, img) div.insert(0, img)
return soup return soup

View File

@ -127,6 +127,13 @@ class BloombergContributor:
return self._name return self._name
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class BloombergContributors(BasicNewsRecipe): class BloombergContributors(BasicNewsRecipe):
title = u'Bloomberg, Editorial Contributors' title = u'Bloomberg, Editorial Contributors'
description = 'Articles from Bloomberg.com contributors' description = 'Articles from Bloomberg.com contributors'
@ -175,7 +182,7 @@ class BloombergContributors(BasicNewsRecipe):
.strftime("%B %d, %Y %I:%M %p") + " UTC" .strftime("%B %d, %Y %I:%M %p") + " UTC"
except: except:
parsed_time = time_stamp parsed_time = time_stamp
insert_tag = Tag(soup, "p", [("class", "user-inserted")]) insert_tag = new_tag(soup, "p", [("class", "user-inserted")])
insert_tag.insert(0, parsed_time) insert_tag.insert(0, parsed_time)
soup.time.replaceWith(insert_tag) soup.time.replaceWith(insert_tag)

View File

@ -10,6 +10,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)}) 'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class BostonGlobeSubscription(BasicNewsRecipe): class BostonGlobeSubscription(BasicNewsRecipe):
title = "Boston Globe Subscription" title = "Boston Globe Subscription"
@ -204,7 +211,7 @@ class BostonGlobeSubscription(BasicNewsRecipe):
imgLink = main.find("a", "comic") imgLink = main.find("a", "comic")
img = imgLink.img img = imgLink.img
body = Tag(soup, "body") body = new_tag(soup, "body")
body.insert(0, title) body.insert(0, title)
body.insert(1, byline) body.insert(1, byline)
body.insert(2, img) body.insert(2, img)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class AdvancedUserRecipe1282101454(BasicNewsRecipe): class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'BuckMasters In The Kitchen' title = 'BuckMasters In The Kitchen'
language = 'en' language = 'en'
@ -28,15 +35,15 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
for img_tag in soup.findAll('img'): for img_tag in soup.findAll('img'):
parent_tag = img_tag.parent parent_tag = img_tag.parent
if parent_tag.name == 'a': if parent_tag.name == 'a':
new_tag = Tag(soup, 'p') ntag = new_tag(soup, 'p')
new_tag.insert(0, img_tag) ntag.insert(0, img_tag)
parent_tag.replaceWith(new_tag) parent_tag.replaceWith(ntag)
elif parent_tag.name == 'p': elif parent_tag.name == 'p':
if not self.tag_to_string(parent_tag) == '': if not self.tag_to_string(parent_tag) == '':
new_div = Tag(soup, 'div') new_div = new_tag(soup, 'div')
new_tag = Tag(soup, 'p') ntag = new_tag(soup, 'p')
new_tag.insert(0, img_tag) ntag.insert(0, img_tag)
parent_tag.replaceWith(new_div) parent_tag.replaceWith(new_div)
new_div.insert(0, new_tag) new_div.insert(0, ntag)
new_div.insert(1, parent_tag) new_div.insert(1, parent_tag)
return soup return soup

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
postmedia_index_pages = [ postmedia_index_pages = [
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'}) pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None): if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div') allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage') first_img = pgall.find('div', 'storyimage')
if first_img is not None: if first_img is not None:
first_img.extract() first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None: if tlist is not None:
for atag in tlist.findAll('a'): for atag in tlist.findAll('a'):
img = Tag(soup, 'img') img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[ srcpre, sep, srcpost = atag.img[
'src'].partition('?') 'src'].partition('?')
img['src'] = srcpre img['src'] = srcpre
pdesc = Tag(soup, 'p') pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt']) pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption' pdesc['class'] = 'photocaption'
div = Tag(soup, 'div') div = new_tag(soup, 'div')
div.insert(0, pdesc) div.insert(0, pdesc)
div.insert(0, img) div.insert(0, img)
allpics.append(div) allpics.append(div)

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class ClimateProgress(BasicNewsRecipe): class ClimateProgress(BasicNewsRecipe):
title = 'Climate Progress' title = 'Climate Progress'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -47,9 +54,9 @@ class ClimateProgress(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DeGentenaarOnline(BasicNewsRecipe): class DeGentenaarOnline(BasicNewsRecipe):
title = 'De Gentenaar' title = 'De Gentenaar'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -69,9 +76,9 @@ class DeGentenaarOnline(BasicNewsRecipe):
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DnevniAvaz(BasicNewsRecipe): class DnevniAvaz(BasicNewsRecipe):
title = 'Dnevni Avaz' title = 'Dnevni Avaz'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -57,9 +64,9 @@ class DnevniAvaz(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DnevnikCro(BasicNewsRecipe): class DnevnikCro(BasicNewsRecipe):
title = 'Dnevnik - Hr' title = 'Dnevnik - Hr'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -58,9 +65,9 @@ class DnevnikCro(BasicNewsRecipe):
item[attrib] = '' item[attrib] = ''
del item[attrib] del item[attrib]
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -16,6 +16,13 @@ __license__ = 'GPL v3'
__copyright__ = '2017, sukru alatas / alatas.org' __copyright__ = '2017, sukru alatas / alatas.org'
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DunyaHalleri(BasicNewsRecipe): class DunyaHalleri(BasicNewsRecipe):
title = 'Dünya Halleri' title = 'Dünya Halleri'
description = 'Gözden Kaçanlar Rehberi' description = 'Gözden Kaçanlar Rehberi'
@ -78,20 +85,20 @@ class DunyaHalleri(BasicNewsRecipe):
# title insert # title insert
article_title = soup.title.contents[0] article_title = soup.title.contents[0]
article_title.replace(' - Dünya Halleri'.decode('utf-8', 'replace'), '') article_title.replace(' - Dünya Halleri'.decode('utf-8', 'replace'), '')
h2 = Tag(soup, 'h2') h2 = new_tag(soup, 'h2')
h2.append(article_title) h2.append(article_title)
span.insert(0, h2) span.insert(0, h2)
# featured image insert # featured image insert
meta = soup.findAll('meta', {'property': 'og:image'}, limit=1)[0] meta = soup.findAll('meta', {'property': 'og:image'}, limit=1)[0]
if meta: if meta:
img = Tag(soup, 'img') img = new_tag(soup, 'img')
img.attrs = [('src', meta['content'])] img.attrs = [('src', meta['content'])]
span.insert(1, img) span.insert(1, img)
# gallery normalization # gallery normalization
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}): for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
p = Tag(soup, 'p') p = new_tag(soup, 'p')
for img in div.findAll('img'): for img in div.findAll('img'):
img.attrs = [(key, value) img.attrs = [(key, value)
for key, value in img.attrs if key in ['src']] for key, value in img.attrs if key in ['src']]
@ -102,9 +109,9 @@ class DunyaHalleri(BasicNewsRecipe):
# this block finds the cover image for each embeded youtube video then # this block finds the cover image for each embeded youtube video then
# changes it to "a href" and "img" # changes it to "a href" and "img"
for iframe in soup.findAll('iframe'): for iframe in soup.findAll('iframe'):
a = Tag(soup, 'a') a = new_tag(soup, 'a')
caption = Tag(soup, 'pre') caption = new_tag(soup, 'pre')
img = Tag(soup, 'img') img = new_tag(soup, 'img')
m = re.match( m = re.match(
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)' r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'

View File

@ -16,6 +16,13 @@ __license__ = 'GPL v3'
__copyright__ = '2017, sukru alatas / alatas.org' __copyright__ = '2017, sukru alatas / alatas.org'
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe): class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
title = 'Dünya Halleri - Haftanın Özeti' title = 'Dünya Halleri - Haftanın Özeti'
description = ('Geçen hafta boyunca Türkiye ve dünyadan haber,' description = ('Geçen hafta boyunca Türkiye ve dünyadan haber,'
@ -156,7 +163,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
# gallery normalization # gallery normalization
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}): for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
p = Tag(soup, 'p') p = new_tag(soup, 'p')
for img in div.findAll('img'): for img in div.findAll('img'):
img.attrs = [(key, value) img.attrs = [(key, value)
for key, value in img.attrs if key in ['src']] for key, value in img.attrs if key in ['src']]
@ -167,9 +174,9 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
# this block finds the cover image for each embeded youtube video then # this block finds the cover image for each embeded youtube video then
# changes it to "a href" and "img" # changes it to "a href" and "img"
for iframe in soup.findAll('iframe'): for iframe in soup.findAll('iframe'):
a = Tag(soup, 'a') a = new_tag(soup, 'a')
caption = Tag(soup, 'pre') caption = new_tag(soup, 'pre')
img = Tag(soup, 'img') img = new_tag(soup, 'img')
m = re.match( m = re.match(
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)' r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'

View File

@ -19,6 +19,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)}) 'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NoArticles(Exception): class NoArticles(Exception):
pass pass
@ -266,11 +273,11 @@ class Economist(BasicNewsRecipe):
for table in list(self.eco_find_image_tables(soup)): for table in list(self.eco_find_image_tables(soup)):
caption = table.find('font') caption = table.find('font')
img = table.find('img') img = table.find('img')
div = Tag(soup, 'div') div = new_tag(soup, 'div')
div['style'] = 'text-align:left;font-size:70%' div['style'] = 'text-align:left;font-size:70%'
ns = NavigableString(self.tag_to_string(caption)) ns = NavigableString(self.tag_to_string(caption))
div.insert(0, ns) div.insert(0, ns)
div.insert(1, Tag(soup, 'br')) div.insert(1, new_tag(soup, 'br'))
del img['width'] del img['width']
del img['height'] del img['height']
img.extract() img.extract()

View File

@ -19,6 +19,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)}) 'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NoArticles(Exception): class NoArticles(Exception):
pass pass
@ -266,11 +273,11 @@ class Economist(BasicNewsRecipe):
for table in list(self.eco_find_image_tables(soup)): for table in list(self.eco_find_image_tables(soup)):
caption = table.find('font') caption = table.find('font')
img = table.find('img') img = table.find('img')
div = Tag(soup, 'div') div = new_tag(soup, 'div')
div['style'] = 'text-align:left;font-size:70%' div['style'] = 'text-align:left;font-size:70%'
ns = NavigableString(self.tag_to_string(caption)) ns = NavigableString(self.tag_to_string(caption))
div.insert(0, ns) div.insert(0, ns)
div.insert(1, Tag(soup, 'br')) div.insert(1, new_tag(soup, 'br'))
del img['width'] del img['width']
del img['height'] del img['height']
img.extract() img.extract()

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
postmedia_index_pages = [ postmedia_index_pages = [
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'}) pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None): if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div') allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage') first_img = pgall.find('div', 'storyimage')
if first_img is not None: if first_img is not None:
first_img.extract() first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None: if tlist is not None:
for atag in tlist.findAll('a'): for atag in tlist.findAll('a'):
img = Tag(soup, 'img') img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[ srcpre, sep, srcpost = atag.img[
'src'].partition('?') 'src'].partition('?')
img['src'] = srcpre img['src'] = srcpre
pdesc = Tag(soup, 'p') pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt']) pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption' pdesc['class'] = 'photocaption'
div = Tag(soup, 'div') div = new_tag(soup, 'div')
div.insert(0, pdesc) div.insert(0, pdesc)
div.insert(0, img) div.insert(0, img)
allpics.append(div) allpics.append(div)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class ElPeriodico_cat(BasicNewsRecipe): class ElPeriodico_cat(BasicNewsRecipe):
title = 'El Periodico de Catalunya' title = 'El Periodico de Catalunya'
__author__ = 'Jordi Balcells/Darko Miletic' __author__ = 'Jordi Balcells/Darko Miletic'
@ -58,7 +65,7 @@ class ElPeriodico_cat(BasicNewsRecipe):
return url.replace('/default.asp?', '/print.asp?') return url.replace('/default.asp?', '/print.asp?')
def preprocess_html(self, soup): def preprocess_html(self, soup):
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mcharset) soup.head.insert(0, mcharset)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class ElPeriodico_cat(BasicNewsRecipe): class ElPeriodico_cat(BasicNewsRecipe):
title = 'El Periodico de Catalunya' title = 'El Periodico de Catalunya'
__author__ = 'Jordi Balcells/Darko Miletic' __author__ = 'Jordi Balcells/Darko Miletic'
@ -58,7 +65,7 @@ class ElPeriodico_cat(BasicNewsRecipe):
return url.replace('/default.asp?', '/print.asp?') return url.replace('/default.asp?', '/print.asp?')
def preprocess_html(self, soup): def preprocess_html(self, soup):
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mcharset) soup.head.insert(0, mcharset)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class ElTiempoHn(BasicNewsRecipe): class ElTiempoHn(BasicNewsRecipe):
title = 'El Tiempo - Honduras' title = 'El Tiempo - Honduras'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -36,9 +43,9 @@ class ElTiempoHn(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -6,6 +6,13 @@ from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError from urllib2 import Request, urlopen, URLError
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Estadao(BasicNewsRecipe): class Estadao(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br' LANGUAGE = 'pt_br'
@ -76,11 +83,11 @@ class Estadao(BasicNewsRecipe):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
if not soup.find(attrs={'http-equiv': 'Content-Language'}): if not soup.find(attrs={'http-equiv': 'Content-Language'}):
meta0 = Tag(soup, 'meta', [ meta0 = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.LANGHTM)]) ("http-equiv", "Content-Language"), ("content", self.LANGHTM)])
soup.head.insert(0, meta0) soup.head.insert(0, meta0)
if not soup.find(attrs={'http-equiv': 'Content-Type'}): if not soup.find(attrs={'http-equiv': 'Content-Type'}):
meta1 = Tag(soup, 'meta', [ meta1 = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=" + self.ENCHTM)]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=" + self.ENCHTM)])
soup.head.insert(0, meta1) soup.head.insert(0, meta1)
return soup return soup

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class FastCompany(BasicNewsRecipe): class FastCompany(BasicNewsRecipe):
title = 'Fast Company' title = 'Fast Company'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -43,9 +50,9 @@ class FastCompany(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class FokkeEnSukkeRecipe(BasicNewsRecipe): class FokkeEnSukkeRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'kwetal' __author__ = 'kwetal'
@ -79,7 +86,7 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe):
if img: if img:
title = img['alt'] title = img['alt']
tag = Tag(soup, 'div', [('class', 'title')]) tag = new_tag(soup, 'div', [('class', 'title')])
tag.insert(0, title) tag.insert(0, title)
cartoon.insert(0, tag) cartoon.insert(0, tag)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, Comment from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, Comment
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class GlennBeckRecipe(BasicNewsRecipe): class GlennBeckRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'kwetal' __author__ = 'kwetal'
@ -71,7 +78,7 @@ class GlennBeckRecipe(BasicNewsRecipe):
if (txt.parent.name == 'body' and len(raw) > 0) and not (len(raw) == 6 and raw == ' '): if (txt.parent.name == 'body' and len(raw) > 0) and not (len(raw) == 6 and raw == ' '):
# This is our content; ignore the rest. # This is our content; ignore the rest.
para = Tag(freshSoup, 'p') para = new_tag(freshSoup, 'p')
para.append(raw) para.append(raw)
freshSoup.body.append(para) freshSoup.body.append(para)
counter += 1 counter += 1

View File

@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class HLN_be(BasicNewsRecipe): class HLN_be(BasicNewsRecipe):
title = 'Het Belang Van Limburg' title = 'Het Belang Van Limburg'
__author__ = 'Darko Miletic and Sujata Raman' __author__ = 'Darko Miletic and Sujata Raman'
@ -53,9 +60,9 @@ class HLN_be(BasicNewsRecipe):
del item['style'] del item['style']
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Hoy(BasicNewsRecipe): class Hoy(BasicNewsRecipe):
title = 'HOY' title = 'HOY'
__author__ = 'Fco Javier Nieto' __author__ = 'Fco Javier Nieto'
@ -61,7 +68,7 @@ class Hoy(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mcharset) soup.head.insert(0, mcharset)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class HRT(BasicNewsRecipe): class HRT(BasicNewsRecipe):
title = 'HRT: Vesti' title = 'HRT: Vesti'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -62,9 +69,9 @@ class HRT(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -11,6 +11,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)}) 'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class TheIndependentNew(BasicNewsRecipe): class TheIndependentNew(BasicNewsRecipe):
title = u'The Independent' title = u'The Independent'
@ -65,7 +72,7 @@ class TheIndependentNew(BasicNewsRecipe):
for li in div.findAll('li', attrs={'data-gallery-legend': True}): for li in div.findAll('li', attrs={'data-gallery-legend': True}):
src = imgs.get(li['data-gallery-legend']) src = imgs.get(li['data-gallery-legend'])
if src is not None: if src is not None:
img = Tag(soup, 'img') img = new_tag(soup, 'img')
img['src'] = src img['src'] = src
img['style'] = 'display:block' img['style'] = 'display:block'
li.append(img) li.append(img)

View File

@ -3,6 +3,13 @@ from calibre.ebooks.BeautifulSoup import Tag
import re import re
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class JoopRecipe(BasicNewsRecipe): class JoopRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'kwetal' __author__ = 'kwetal'
@ -89,7 +96,7 @@ class JoopRecipe(BasicNewsRecipe):
span = h2.find('span', 'info') span = h2.find('span', 'info')
if span: if span:
txt = span.find(text=True) txt = span.find(text=True)
div = Tag(soup, 'div', attrs=[('class', 'joop_date')]) div = new_tag(soup, 'div', attrs=[('class', 'joop_date')])
div.append(txt) div.append(txt)
h2.replaceWith(div) h2.replaceWith(div)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Jutarnji(BasicNewsRecipe): class Jutarnji(BasicNewsRecipe):
title = 'Jutarnji' title = 'Jutarnji'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -68,9 +75,9 @@ class Jutarnji(BasicNewsRecipe):
item[attrib] = '' item[attrib] = ''
del item[attrib] del item[attrib]
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class LaPrensaHn(BasicNewsRecipe): class LaPrensaHn(BasicNewsRecipe):
title = 'La Prensa - Honduras' title = 'La Prensa - Honduras'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -46,9 +53,9 @@ class LaPrensaHn(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class LaTribuna(BasicNewsRecipe): class LaTribuna(BasicNewsRecipe):
title = 'La Tribuna - Honduras' title = 'La Tribuna - Honduras'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -49,9 +56,9 @@ class LaTribuna(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class LaVanguardia(BasicNewsRecipe): class LaVanguardia(BasicNewsRecipe):
title = 'La Vanguardia Digital' title = 'La Vanguardia Digital'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -61,7 +68,7 @@ class LaVanguardia(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mcharset) soup.head.insert(0, mcharset)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
import re import re
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class LentaRURecipe(BasicNewsRecipe): class LentaRURecipe(BasicNewsRecipe):
title = u'Lenta.ru: \u041d\u043e\u0432\u043e\u0441\u0442\u0438' title = u'Lenta.ru: \u041d\u043e\u0432\u043e\u0441\u0442\u0438'
__author__ = 'Nikolai Kotchetkov' __author__ = 'Nikolai Kotchetkov'
@ -113,7 +120,7 @@ class LentaRURecipe(BasicNewsRecipe):
def postprocess_html(self, soup, first_fetch): def postprocess_html(self, soup, first_fetch):
contents = Tag(soup, 'div') contents = new_tag(soup, 'div')
# Extract tags with given attributes # Extract tags with given attributes
extractElements = {'div': [{'id': 'readers-block'}]} extractElements = {'div': [{'id': 'readers-block'}]}
@ -155,13 +162,13 @@ class LentaRURecipe(BasicNewsRecipe):
# Place article picture after date # Place article picture after date
pic = soup.find('img') pic = soup.find('img')
if pic: if pic:
picDiv = Tag(soup, 'div') picDiv = new_tag(soup, 'div')
picDiv['style'] = 'width: 100%; text-align: center;' picDiv['style'] = 'width: 100%; text-align: center;'
pic.extract() pic.extract()
picDiv.insert(0, pic) picDiv.insert(0, pic)
title = pic.get('title', None) title = pic.get('title', None)
if title: if title:
titleDiv = Tag(soup, 'div') titleDiv = new_tag(soup, 'div')
titleDiv['style'] = 'font-size: 0.5em;' titleDiv['style'] = 'font-size: 0.5em;'
titleDiv.insert(0, title) titleDiv.insert(0, title)
picDiv.insert(1, titleDiv) picDiv.insert(1, titleDiv)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class LevanteRecipe(BasicNewsRecipe): class LevanteRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'kwetal' __author__ = 'kwetal'
@ -92,7 +99,7 @@ class LevanteRecipe(BasicNewsRecipe):
# Nuke some real crappy html # Nuke some real crappy html
theirHead = soup.head theirHead = soup.head
theirHead.extract() theirHead.extract()
myHead = Tag(soup, 'head') myHead = new_tag(soup, 'head')
soup.insert(0, myHead) soup.insert(0, myHead)
return soup return soup

View File

@ -1,4 +1,12 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class MoneyControlRecipe(BasicNewsRecipe): class MoneyControlRecipe(BasicNewsRecipe):
@ -37,7 +45,7 @@ class MoneyControlRecipe(BasicNewsRecipe):
headline = soup.find('td', attrs = {'class': 'heading'}) headline = soup.find('td', attrs = {'class': 'heading'})
if headline: if headline:
h1 = Tag(freshSoup, 'h1') h1 = new_tag(freshSoup, 'h1')
# Convert to string before adding it to the document! # Convert to string before adding it to the document!
h1.append(self.tag_to_string(headline)) h1.append(self.tag_to_string(headline))
freshSoup.body.append(h1) freshSoup.body.append(h1)
@ -47,7 +55,7 @@ class MoneyControlRecipe(BasicNewsRecipe):
# We have some weird pagebreak marker here; it will not find all of them however # We have some weird pagebreak marker here; it will not find all of them however
continue continue
para = Tag(freshSoup, 'p') para = new_tag(freshSoup, 'p')
# Convert to string; this will loose all formatting but also all illegal markup # Convert to string; this will loose all formatting but also all illegal markup
para.append(self.tag_to_string(p)) para.append(self.tag_to_string(p))

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
postmedia_index_pages = [ postmedia_index_pages = [
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'}) pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None): if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div') allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage') first_img = pgall.find('div', 'storyimage')
if first_img is not None: if first_img is not None:
first_img.extract() first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None: if tlist is not None:
for atag in tlist.findAll('a'): for atag in tlist.findAll('a'):
img = Tag(soup, 'img') img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[ srcpre, sep, srcpost = atag.img[
'src'].partition('?') 'src'].partition('?')
img['src'] = srcpre img['src'] = srcpre
pdesc = Tag(soup, 'p') pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt']) pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption' pdesc['class'] = 'photocaption'
div = Tag(soup, 'div') div = new_tag(soup, 'div')
div.insert(0, pdesc) div.insert(0, pdesc)
div.insert(0, img) div.insert(0, img)
allpics.append(div) allpics.append(div)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NacionalCro(BasicNewsRecipe): class NacionalCro(BasicNewsRecipe):
title = 'Nacional - Hr' title = 'Nacional - Hr'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -44,9 +51,9 @@ class NacionalCro(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -13,6 +13,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)}) 'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NatGeo(BasicNewsRecipe): class NatGeo(BasicNewsRecipe):
title = u'National Geographic' title = u'National Geographic'
description = 'Daily news articles from The National Geographic' description = 'Daily news articles from The National Geographic'
@ -64,7 +71,7 @@ class NatGeo(BasicNewsRecipe):
idx = url.find('.jpg/{width') idx = url.find('.jpg/{width')
if idx != -1: if idx != -1:
url = url[:idx + 4] url = url[:idx + 4]
img = Tag(soup, "img") img = new_tag(soup, "img")
img['src'] = url img['src'] = url
div.append(img) div.append(img)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NrcNextRecipe(BasicNewsRecipe): class NrcNextRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'kwetal' __author__ = 'kwetal'
@ -100,20 +107,20 @@ class NrcNextRecipe(BasicNewsRecipe):
if tag: if tag:
h2 = tag.find('h2', 'vlag') h2 = tag.find('h2', 'vlag')
if h2: if h2:
new_h2 = Tag(soup, 'h2', attrs=[('class', 'vlag')]) new_h2 = new_tag(soup, 'h2', attrs=[('class', 'vlag')])
new_h2.append(self.tag_to_string(h2)) new_h2.append(self.tag_to_string(h2))
h2.replaceWith(new_h2) h2.replaceWith(new_h2)
else: else:
h2 = tag.find('h2') h2 = tag.find('h2')
if h2: if h2:
new_h2 = Tag(soup, 'h2', attrs=[ new_h2 = new_tag(soup, 'h2', attrs=[
('class', 'sub_title')]) ('class', 'sub_title')])
new_h2.append(self.tag_to_string(h2)) new_h2.append(self.tag_to_string(h2))
h2.replaceWith(new_h2) h2.replaceWith(new_h2)
h1 = tag.find('h1') h1 = tag.find('h1')
if h1: if h1:
new_h1 = Tag(soup, 'h1') new_h1 = new_tag(soup, 'h1')
new_h1.append(self.tag_to_string(h1)) new_h1.append(self.tag_to_string(h1))
h1.replaceWith(new_h1) h1.replaceWith(new_h1)

View File

@ -23,6 +23,13 @@ def absurl(x):
return x return x
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NewYorker(BasicNewsRecipe): class NewYorker(BasicNewsRecipe):
title = u'New Yorker Magazine' title = u'New Yorker Magazine'
@ -114,12 +121,12 @@ class NewYorker(BasicNewsRecipe):
title = soup.find('meta', itemprop='name') title = soup.find('meta', itemprop='name')
if title: if title:
if self.featured_image: if self.featured_image:
img = Tag(soup, 'img') img = new_tag(soup, 'img')
img['src'] = self.featured_image img['src'] = self.featured_image
div = Tag(soup, 'div') div = new_tag(soup, 'div')
div.append(img) div.append(img)
body.insert(0, div) body.insert(0, div)
h1 = Tag(soup, 'h1') h1 = new_tag(soup, 'h1')
h1.append(title.get('content')) h1.append(title.get('content'))
body.insert(0, h1) body.insert(0, h1)
for attr in 'srcset data-src-mobile'.split(): for attr in 'srcset data-src-mobile'.split():

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NOAA(BasicNewsRecipe): class NOAA(BasicNewsRecipe):
title = 'NOAA Online' title = 'NOAA Online'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -32,9 +39,9 @@ class NOAA(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -8,6 +8,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Pagina12(BasicNewsRecipe): class Pagina12(BasicNewsRecipe):
title = 'NRC' title = 'NRC'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -42,7 +49,7 @@ class Pagina12(BasicNewsRecipe):
div = soup.find( div = soup.find(
'div', attrs={'class': lambda x: x and 'featured-img' in x}) 'div', attrs={'class': lambda x: x and 'featured-img' in x})
if div is not None: if div is not None:
img = Tag(soup, 'img') img = new_tag(soup, 'img')
img['src'] = src img['src'] = src
div.append(img) div.append(img)
return soup return soup

View File

@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import NavigableString, Tag from calibre.ebooks.BeautifulSoup import NavigableString, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Nspm(BasicNewsRecipe): class Nspm(BasicNewsRecipe):
title = 'Nova srpska politicka misao' title = 'Nova srpska politicka misao'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -62,7 +69,7 @@ class Nspm(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
atitle = soup.body.find('a', attrs={'class': 'contentpagetitle'}) atitle = soup.body.find('a', attrs={'class': 'contentpagetitle'})
if atitle: if atitle:
cleanTitle = Tag(soup, 'h1', [('class', 'contentpagetitle')]) cleanTitle = new_tag(soup, 'h1', [('class', 'contentpagetitle')])
cnt = NavigableString(self.tag_to_string(atitle)) cnt = NavigableString(self.tag_to_string(atitle))
cleanTitle.append(cnt) cleanTitle.append(cnt)
@ -73,12 +80,12 @@ class Nspm(BasicNewsRecipe):
crdate = soup.body.find('td', attrs={'class': 'createdate'}) crdate = soup.body.find('td', attrs={'class': 'createdate'})
if crdate: if crdate:
cleanCrdate = Tag(soup, 'div', [('class', 'createdate')]) cleanCrdate = new_tag(soup, 'div', [('class', 'createdate')])
cnt = NavigableString(self.tag_to_string(crdate)) cnt = NavigableString(self.tag_to_string(crdate))
cleanCrdate.append(cnt) cleanCrdate.append(cnt)
# get the dependant element # get the dependant element
artText = Tag(soup, 'div', [('class', 'text')]) artText = new_tag(soup, 'div', [('class', 'text')])
textHolderp = crdate.parent textHolderp = crdate.parent
textHolder = textHolderp.nextSibling textHolder = textHolderp.nextSibling
while textHolder and (not isinstance(textHolder, Tag) or (textHolder.name != textHolderp.name)): while textHolder and (not isinstance(textHolder, Tag) or (textHolder.name != textHolderp.name)):

View File

@ -67,6 +67,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)}) 'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NewYorkTimes(BasicNewsRecipe): class NewYorkTimes(BasicNewsRecipe):
if is_web_edition: if is_web_edition:
@ -116,7 +123,7 @@ class NewYorkTimes(BasicNewsRecipe):
keep_only_tags = [ keep_only_tags = [
dict(id='story'), dict(id='story'),
] ]
body = Tag(soup, 'body') body = new_tag(soup, 'body')
for spec in keep_only_tags: for spec in keep_only_tags:
for tag in soup.find('body').findAll(**spec): for tag in soup.find('body').findAll(**spec):
body.insert(len(body.contents), tag) body.insert(len(body.contents), tag)

View File

@ -67,6 +67,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)}) 'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NewYorkTimes(BasicNewsRecipe): class NewYorkTimes(BasicNewsRecipe):
if is_web_edition: if is_web_edition:
@ -116,7 +123,7 @@ class NewYorkTimes(BasicNewsRecipe):
keep_only_tags = [ keep_only_tags = [
dict(id='story'), dict(id='story'),
] ]
body = Tag(soup, 'body') body = new_tag(soup, 'body')
for spec in keep_only_tags: for spec in keep_only_tags:
for tag in soup.find('body').findAll(**spec): for tag in soup.find('body').findAll(**spec):
body.insert(len(body.contents), tag) body.insert(len(body.contents), tag)

View File

@ -13,6 +13,13 @@ from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Nzz(BasicNewsRecipe): class Nzz(BasicNewsRecipe):
title = 'NZZ Webpaper' title = 'NZZ Webpaper'
__author__ = 'Bernd Leinfelder' __author__ = 'Bernd Leinfelder'
@ -56,7 +63,7 @@ class Nzz(BasicNewsRecipe):
for span in soup.findAll('span', attrs={'data-src-640': True}): for span in soup.findAll('span', attrs={'data-src-640': True}):
imgSrc = span['data-src-640'] imgSrc = span['data-src-640']
# print "image source: "+ imgSrc # print "image source: "+ imgSrc
imgTag = Tag(soup, "img", [("src", imgSrc)]) imgTag = new_tag(soup, "img", [("src", imgSrc)])
span.replaceWith(imgTag) span.replaceWith(imgTag)
# print soup.prettify() # print soup.prettify()

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
postmedia_index_pages = [ postmedia_index_pages = [
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'}) pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None): if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div') allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage') first_img = pgall.find('div', 'storyimage')
if first_img is not None: if first_img is not None:
first_img.extract() first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None: if tlist is not None:
for atag in tlist.findAll('a'): for atag in tlist.findAll('a'):
img = Tag(soup, 'img') img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[ srcpre, sep, srcpost = atag.img[
'src'].partition('?') 'src'].partition('?')
img['src'] = srcpre img['src'] = srcpre
pdesc = Tag(soup, 'p') pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt']) pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption' pdesc['class'] = 'photocaption'
div = Tag(soup, 'div') div = new_tag(soup, 'div')
div.insert(0, pdesc) div.insert(0, pdesc)
div.insert(0, img) div.insert(0, img)
allpics.append(div) allpics.append(div)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString from calibre.ebooks.BeautifulSoup import Tag, NavigableString
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Pagina12(BasicNewsRecipe): class Pagina12(BasicNewsRecipe):
title = 'Pagina/12 - Edicion Impresa' title = 'Pagina/12 - Edicion Impresa'
@ -84,10 +91,10 @@ class Pagina12(BasicNewsRecipe):
if img is not None: if img is not None:
img.extract() img.extract()
caption = self.tag_to_string(table).strip() caption = self.tag_to_string(table).strip()
div = Tag(soup, 'div') div = new_tag(soup, 'div')
div['style'] = 'text-align:center' div['style'] = 'text-align:center'
div.insert(0, img) div.insert(0, img)
div.insert(1, Tag(soup, 'br')) div.insert(1, new_tag(soup, 'br'))
if caption: if caption:
div.insert(2, NavigableString(caption)) div.insert(2, NavigableString(caption))
table.replaceWith(div) table.replaceWith(div)

View File

@ -13,6 +13,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Pobjeda(BasicNewsRecipe): class Pobjeda(BasicNewsRecipe):
title = 'Pobjeda Online' title = 'Pobjeda Online'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -59,9 +66,9 @@ class Pobjeda(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class PressOnline(BasicNewsRecipe): class PressOnline(BasicNewsRecipe):
title = 'Press Online' title = 'Press Online'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -60,7 +67,7 @@ class PressOnline(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
return self.adeify_images(soup) return self.adeify_images(soup)

View File

@ -3,6 +3,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class RevistaMuyInteresante(BasicNewsRecipe): class RevistaMuyInteresante(BasicNewsRecipe):
title = 'Revista Muy Interesante' title = 'Revista Muy Interesante'
@ -24,7 +31,7 @@ class RevistaMuyInteresante(BasicNewsRecipe):
for img_tag in soup.findAll('img'): for img_tag in soup.findAll('img'):
imagen = img_tag imagen = img_tag
new_tag = Tag(soup, 'p') new_tag = new_tag(soup, 'p')
img_tag.replaceWith(new_tag) img_tag.replaceWith(new_tag)
div = soup.find(attrs={'class': 'article_category'}) div = soup.find(attrs={'class': 'article_category'})
div.insert(0, imagen) div.insert(0, imagen)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class RTS(BasicNewsRecipe): class RTS(BasicNewsRecipe):
title = 'RTS: Vesti' title = 'RTS: Vesti'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -50,9 +57,9 @@ class RTS(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString from calibre.ebooks.BeautifulSoup import Tag, NavigableString
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class SarajevoX(BasicNewsRecipe): class SarajevoX(BasicNewsRecipe):
title = 'Sarajevo-x.com' title = 'Sarajevo-x.com'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -57,7 +64,7 @@ class SarajevoX(BasicNewsRecipe):
if sp: if sp:
sp sp
else: else:
mtag = Tag(soup, 'div', [ mtag = new_tag(soup, 'div', [
("id", "opisslike"), ("class", "opscitech")]) ("id", "opisslike"), ("class", "opscitech")])
mopis = NavigableString("Opis") mopis = NavigableString("Opis")
mtag.insert(0, mopis) mtag.insert(0, mopis)

View File

@ -12,6 +12,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)}) 'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class SCMP(BasicNewsRecipe): class SCMP(BasicNewsRecipe):
title = 'South China Morning Post' title = 'South China Morning Post'
__author__ = 'llam' __author__ = 'llam'
@ -71,7 +78,7 @@ class SCMP(BasicNewsRecipe):
wrapper = soup.find(**classes('image-wrapper__placeholder')) wrapper = soup.find(**classes('image-wrapper__placeholder'))
if wrapper is not None: if wrapper is not None:
p = wrapper.parent p = wrapper.parent
img = Tag(soup, 'img') img = new_tag(soup, 'img')
img['src'] = meta['content'] img['src'] = meta['content']
p.append(img) p.append(img)
wrapper.extract() wrapper.extract()

View File

@ -14,6 +14,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString from calibre.ebooks.BeautifulSoup import Tag, NavigableString
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class TheSouthernStar(BasicNewsRecipe): class TheSouthernStar(BasicNewsRecipe):
title = 'The Southern Star' title = 'The Southern Star'
@ -117,10 +124,10 @@ class TheSouthernStar(BasicNewsRecipe):
if img is not None: if img is not None:
img.extract() img.extract()
caption = self.tag_to_string(table).strip() caption = self.tag_to_string(table).strip()
div = Tag(soup, 'div') div = new_tag(soup, 'div')
div['style'] = 'text-align:center' div['style'] = 'text-align:center'
div.insert(0, img) div.insert(0, img)
div.insert(1, Tag(soup, 'br')) div.insert(1, new_tag(soup, 'br'))
if caption: if caption:
div.insert(2, NavigableString(caption)) div.insert(2, NavigableString(caption))
table.replaceWith(div) table.replaceWith(div)

View File

@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DeTijd(BasicNewsRecipe): class DeTijd(BasicNewsRecipe):
title = 'De Tijd' title = 'De Tijd'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -68,11 +75,11 @@ class DeTijd(BasicNewsRecipe):
del item['style'] del item['style']
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag( mlang = new_tag(
soup, 'meta', [("http-equiv", "Content-Language"), soup, 'meta', [("http-equiv", "Content-Language"),
("content", self.lang)] ("content", self.lang)]
) )
mcharset = Tag( mcharset = new_tag(
soup, 'meta', [("http-equiv", "Content-Type"), soup, 'meta', [("http-equiv", "Content-Type"),
("content", "text/html; charset=utf-8")] ("content", "text/html; charset=utf-8")]
) )

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Uncrate(BasicNewsRecipe): class Uncrate(BasicNewsRecipe):
title = 'Uncrate' title = 'Uncrate'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -47,9 +54,9 @@ class Uncrate(BasicNewsRecipe):
feeds = [(u'Articles', u'http://feeds.feedburner.com/uncrate')] feeds = [(u'Articles', u'http://feeds.feedburner.com/uncrate')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
postmedia_index_pages = [ postmedia_index_pages = [
@ -231,21 +238,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'}) pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None): if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div') allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage') first_img = pgall.find('div', 'storyimage')
if first_img is not None: if first_img is not None:
first_img.extract() first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None: if tlist is not None:
for atag in tlist.findAll('a'): for atag in tlist.findAll('a'):
img = Tag(soup, 'img') img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[ srcpre, sep, srcpost = atag.img[
'src'].partition('?') 'src'].partition('?')
img['src'] = srcpre img['src'] = srcpre
pdesc = Tag(soup, 'p') pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt']) pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption' pdesc['class'] = 'photocaption'
div = Tag(soup, 'div') div = new_tag(soup, 'div')
div.insert(0, pdesc) div.insert(0, pdesc)
div.insert(0, img) div.insert(0, img)
allpics.append(div) allpics.append(div)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
compress_news_images = True compress_news_images = True
@ -219,21 +226,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'}) pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None): if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div') allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage') first_img = pgall.find('div', 'storyimage')
if first_img is not None: if first_img is not None:
first_img.extract() first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'}) tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None: if tlist is not None:
for atag in tlist.findAll('a'): for atag in tlist.findAll('a'):
img = Tag(soup, 'img') img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[ srcpre, sep, srcpost = atag.img[
'src'].partition('?') 'src'].partition('?')
img['src'] = srcpre img['src'] = srcpre
pdesc = Tag(soup, 'p') pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt']) pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption' pdesc['class'] = 'photocaption'
div = Tag(soup, 'div') div = new_tag(soup, 'div')
div.insert(0, pdesc) div.insert(0, pdesc)
div.insert(0, img) div.insert(0, img)
allpics.append(div) allpics.append(div)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class VecernjiList(BasicNewsRecipe): class VecernjiList(BasicNewsRecipe):
title = 'Vecernji List' title = 'Vecernji List'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -48,9 +55,9 @@ class VecernjiList(BasicNewsRecipe):
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -9,6 +9,13 @@ from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class VedomostiRecipe(BasicNewsRecipe): class VedomostiRecipe(BasicNewsRecipe):
title = u'Ведомости' title = u'Ведомости'
__author__ = 'Nikolai Kotchetkov' __author__ = 'Nikolai Kotchetkov'
@ -145,7 +152,7 @@ class VedomostiRecipe(BasicNewsRecipe):
if newstop: if newstop:
img = newstop.find('img') img = newstop.find('img')
if img: if img:
imgDiv = Tag(soup, 'div') imgDiv = new_tag(soup, 'div')
imgDiv['class'] = 'article_img' imgDiv['class'] = 'article_img'
if img.get('width'): if img.get('width'):

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Veintitres(BasicNewsRecipe): class Veintitres(BasicNewsRecipe):
title = 'Veintitres' title = 'Veintitres'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -52,9 +59,9 @@ class Veintitres(BasicNewsRecipe):
del item['style'] del item['style']
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class TimesColonist(BasicNewsRecipe): class TimesColonist(BasicNewsRecipe):
# Customization -- remove sections you don't want. # Customization -- remove sections you don't want.
@ -179,7 +186,7 @@ class TimesColonist(BasicNewsRecipe):
authstr = re.sub('/ *Times Colonist', '/', authstr = re.sub('/ *Times Colonist', '/',
authstr, flags=re.IGNORECASE) authstr, flags=re.IGNORECASE)
authstr = re.sub('BY */', '', authstr, flags=re.IGNORECASE) authstr = re.sub('BY */', '', authstr, flags=re.IGNORECASE)
newdiv = Tag(soup, 'div') newdiv = new_tag(soup, 'div')
newdiv.insert(0, authstr) newdiv.insert(0, authstr)
newdiv['class'] = 'byline' newdiv['class'] = 'byline'
byline.replaceWith(newdiv) byline.replaceWith(newdiv)
@ -187,7 +194,7 @@ class TimesColonist(BasicNewsRecipe):
capstr = self.tag_to_string(caption, False) capstr = self.tag_to_string(caption, False)
capstr = re.sub('Photograph by.*$', '', capstr = re.sub('Photograph by.*$', '',
capstr, flags=re.IGNORECASE) capstr, flags=re.IGNORECASE)
newdiv = Tag(soup, 'div') newdiv = new_tag(soup, 'div')
newdiv.insert(0, capstr) newdiv.insert(0, capstr)
newdiv['class'] = 'caption' newdiv['class'] = 'caption'
caption.replaceWith(newdiv) caption.replaceWith(newdiv)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class VrijNederlandRecipe(BasicNewsRecipe): class VrijNederlandRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'kwetal' __author__ = 'kwetal'
@ -73,7 +80,7 @@ class VrijNederlandRecipe(BasicNewsRecipe):
# altogether # altogether
theirHead = soup.head theirHead = soup.head
theirHead.extract() theirHead.extract()
myHead = Tag(soup, 'head') myHead = new_tag(soup, 'head')
soup.insert(0, myHead) soup.insert(0, myHead)
return soup return soup