Port Tag creation in recipes to work with any version of BeautifulSoup

This commit is contained in:
Kovid Goyal 2019-03-23 08:06:25 +05:30
parent 930624d2be
commit c68a5c8ab1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
64 changed files with 580 additions and 131 deletions

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Cro24Sata(BasicNewsRecipe):
title = '24 Sata - Hr'
__author__ = 'Darko Miletic'
@ -46,9 +53,9 @@ class Cro24Sata(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
now = datetime.datetime.now()
title = 'The AJC'
@ -118,7 +125,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
comma = ', '
article.author = names
if len(names) > 0:
tag = Tag(soup, 'div', [('class', 'cm-story-author')])
tag = new_tag(soup, 'div', [('class', 'cm-story-author')])
tag.append("by: ")
tag.append(names)
meta = soup.find('div', attrs={'class': 'cm-story-meta'})

View File

@ -17,6 +17,13 @@ def classes(classes):
)
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class AssociatedPress(BasicNewsRecipe):
title = u'Associated Press'
@ -76,7 +83,7 @@ class AssociatedPress(BasicNewsRecipe):
def preprocess_html(self, soup, *a):
for meta in soup.findAll('meta', attrs=dict(name="twitter:image:alt")):
for div in soup.findAll(**classes('LeadFeature')):
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
img['src'] = meta['content']
div.insert(0, img)
return soup

View File

@ -127,6 +127,13 @@ class BloombergContributor:
return self._name
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class BloombergContributors(BasicNewsRecipe):
title = u'Bloomberg, Editorial Contributors'
description = 'Articles from Bloomberg.com contributors'
@ -175,7 +182,7 @@ class BloombergContributors(BasicNewsRecipe):
.strftime("%B %d, %Y %I:%M %p") + " UTC"
except:
parsed_time = time_stamp
insert_tag = Tag(soup, "p", [("class", "user-inserted")])
insert_tag = new_tag(soup, "p", [("class", "user-inserted")])
insert_tag.insert(0, parsed_time)
soup.time.replaceWith(insert_tag)

View File

@ -10,6 +10,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class BostonGlobeSubscription(BasicNewsRecipe):
title = "Boston Globe Subscription"
@ -204,7 +211,7 @@ class BostonGlobeSubscription(BasicNewsRecipe):
imgLink = main.find("a", "comic")
img = imgLink.img
body = Tag(soup, "body")
body = new_tag(soup, "body")
body.insert(0, title)
body.insert(1, byline)
body.insert(2, img)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'BuckMasters In The Kitchen'
language = 'en'
@ -28,15 +35,15 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
for img_tag in soup.findAll('img'):
parent_tag = img_tag.parent
if parent_tag.name == 'a':
new_tag = Tag(soup, 'p')
new_tag.insert(0, img_tag)
parent_tag.replaceWith(new_tag)
ntag = new_tag(soup, 'p')
ntag.insert(0, img_tag)
parent_tag.replaceWith(ntag)
elif parent_tag.name == 'p':
if not self.tag_to_string(parent_tag) == '':
new_div = Tag(soup, 'div')
new_tag = Tag(soup, 'p')
new_tag.insert(0, img_tag)
new_div = new_tag(soup, 'div')
ntag = new_tag(soup, 'p')
ntag.insert(0, img_tag)
parent_tag.replaceWith(new_div)
new_div.insert(0, new_tag)
new_div.insert(0, ntag)
new_div.insert(1, parent_tag)
return soup

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe):
postmedia_index_pages = [
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div')
allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[
'src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup, 'p')
pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption'
div = Tag(soup, 'div')
div = new_tag(soup, 'div')
div.insert(0, pdesc)
div.insert(0, img)
allpics.append(div)

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class ClimateProgress(BasicNewsRecipe):
title = 'Climate Progress'
__author__ = 'Darko Miletic'
@ -47,9 +54,9 @@ class ClimateProgress(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DeGentenaarOnline(BasicNewsRecipe):
title = 'De Gentenaar'
__author__ = 'Darko Miletic'
@ -69,9 +76,9 @@ class DeGentenaarOnline(BasicNewsRecipe):
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DnevniAvaz(BasicNewsRecipe):
title = 'Dnevni Avaz'
__author__ = 'Darko Miletic'
@ -57,9 +64,9 @@ class DnevniAvaz(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DnevnikCro(BasicNewsRecipe):
title = 'Dnevnik - Hr'
__author__ = 'Darko Miletic'
@ -58,9 +65,9 @@ class DnevnikCro(BasicNewsRecipe):
item[attrib] = ''
del item[attrib]
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -16,6 +16,13 @@ __license__ = 'GPL v3'
__copyright__ = '2017, sukru alatas / alatas.org'
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DunyaHalleri(BasicNewsRecipe):
title = 'Dünya Halleri'
description = 'Gözden Kaçanlar Rehberi'
@ -78,20 +85,20 @@ class DunyaHalleri(BasicNewsRecipe):
# title insert
article_title = soup.title.contents[0]
article_title.replace(' - Dünya Halleri'.decode('utf-8', 'replace'), '')
h2 = Tag(soup, 'h2')
h2 = new_tag(soup, 'h2')
h2.append(article_title)
span.insert(0, h2)
# featured image insert
meta = soup.findAll('meta', {'property': 'og:image'}, limit=1)[0]
if meta:
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
img.attrs = [('src', meta['content'])]
span.insert(1, img)
# gallery normalization
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
p = Tag(soup, 'p')
p = new_tag(soup, 'p')
for img in div.findAll('img'):
img.attrs = [(key, value)
for key, value in img.attrs if key in ['src']]
@ -102,9 +109,9 @@ class DunyaHalleri(BasicNewsRecipe):
# this block finds the cover image for each embeded youtube video then
# changes it to "a href" and "img"
for iframe in soup.findAll('iframe'):
a = Tag(soup, 'a')
caption = Tag(soup, 'pre')
img = Tag(soup, 'img')
a = new_tag(soup, 'a')
caption = new_tag(soup, 'pre')
img = new_tag(soup, 'img')
m = re.match(
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'

View File

@ -16,6 +16,13 @@ __license__ = 'GPL v3'
__copyright__ = '2017, sukru alatas / alatas.org'
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
title = 'Dünya Halleri - Haftanın Özeti'
description = ('Geçen hafta boyunca Türkiye ve dünyadan haber,'
@ -156,7 +163,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
def preprocess_html(self, soup):
# gallery normalization
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
p = Tag(soup, 'p')
p = new_tag(soup, 'p')
for img in div.findAll('img'):
img.attrs = [(key, value)
for key, value in img.attrs if key in ['src']]
@ -167,9 +174,9 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
# this block finds the cover image for each embeded youtube video then
# changes it to "a href" and "img"
for iframe in soup.findAll('iframe'):
a = Tag(soup, 'a')
caption = Tag(soup, 'pre')
img = Tag(soup, 'img')
a = new_tag(soup, 'a')
caption = new_tag(soup, 'pre')
img = new_tag(soup, 'img')
m = re.match(
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'

View File

@ -19,6 +19,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NoArticles(Exception):
pass
@ -266,11 +273,11 @@ class Economist(BasicNewsRecipe):
for table in list(self.eco_find_image_tables(soup)):
caption = table.find('font')
img = table.find('img')
div = Tag(soup, 'div')
div = new_tag(soup, 'div')
div['style'] = 'text-align:left;font-size:70%'
ns = NavigableString(self.tag_to_string(caption))
div.insert(0, ns)
div.insert(1, Tag(soup, 'br'))
div.insert(1, new_tag(soup, 'br'))
del img['width']
del img['height']
img.extract()

View File

@ -19,6 +19,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NoArticles(Exception):
pass
@ -266,11 +273,11 @@ class Economist(BasicNewsRecipe):
for table in list(self.eco_find_image_tables(soup)):
caption = table.find('font')
img = table.find('img')
div = Tag(soup, 'div')
div = new_tag(soup, 'div')
div['style'] = 'text-align:left;font-size:70%'
ns = NavigableString(self.tag_to_string(caption))
div.insert(0, ns)
div.insert(1, Tag(soup, 'br'))
div.insert(1, new_tag(soup, 'br'))
del img['width']
del img['height']
img.extract()

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe):
postmedia_index_pages = [
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div')
allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[
'src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup, 'p')
pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption'
div = Tag(soup, 'div')
div = new_tag(soup, 'div')
div.insert(0, pdesc)
div.insert(0, img)
allpics.append(div)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class ElPeriodico_cat(BasicNewsRecipe):
title = 'El Periodico de Catalunya'
__author__ = 'Jordi Balcells/Darko Miletic'
@ -58,7 +65,7 @@ class ElPeriodico_cat(BasicNewsRecipe):
return url.replace('/default.asp?', '/print.asp?')
def preprocess_html(self, soup):
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mcharset)
for item in soup.findAll(style=True):

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class ElPeriodico_cat(BasicNewsRecipe):
title = 'El Periodico de Catalunya'
__author__ = 'Jordi Balcells/Darko Miletic'
@ -58,7 +65,7 @@ class ElPeriodico_cat(BasicNewsRecipe):
return url.replace('/default.asp?', '/print.asp?')
def preprocess_html(self, soup):
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mcharset)
for item in soup.findAll(style=True):

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class ElTiempoHn(BasicNewsRecipe):
title = 'El Tiempo - Honduras'
__author__ = 'Darko Miletic'
@ -36,9 +43,9 @@ class ElTiempoHn(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -6,6 +6,13 @@ from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Estadao(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br'
@ -76,11 +83,11 @@ class Estadao(BasicNewsRecipe):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv': 'Content-Language'}):
meta0 = Tag(soup, 'meta', [
meta0 = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.LANGHTM)])
soup.head.insert(0, meta0)
if not soup.find(attrs={'http-equiv': 'Content-Type'}):
meta1 = Tag(soup, 'meta', [
meta1 = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=" + self.ENCHTM)])
soup.head.insert(0, meta1)
return soup

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class FastCompany(BasicNewsRecipe):
title = 'Fast Company'
__author__ = 'Darko Miletic'
@ -43,9 +50,9 @@ class FastCompany(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class FokkeEnSukkeRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
@ -79,7 +86,7 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe):
if img:
title = img['alt']
tag = Tag(soup, 'div', [('class', 'title')])
tag = new_tag(soup, 'div', [('class', 'title')])
tag.insert(0, title)
cartoon.insert(0, tag)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, Comment
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class GlennBeckRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
@ -71,7 +78,7 @@ class GlennBeckRecipe(BasicNewsRecipe):
if (txt.parent.name == 'body' and len(raw) > 0) and not (len(raw) == 6 and raw == ' '):
# This is our content; ignore the rest.
para = Tag(freshSoup, 'p')
para = new_tag(freshSoup, 'p')
para.append(raw)
freshSoup.body.append(para)
counter += 1

View File

@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class HLN_be(BasicNewsRecipe):
title = 'Het Belang Van Limburg'
__author__ = 'Darko Miletic and Sujata Raman'
@ -53,9 +60,9 @@ class HLN_be(BasicNewsRecipe):
del item['style']
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Hoy(BasicNewsRecipe):
title = 'HOY'
__author__ = 'Fco Javier Nieto'
@ -61,7 +68,7 @@ class Hoy(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['dir'] = self.direction
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mcharset)
for item in soup.findAll(style=True):

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class HRT(BasicNewsRecipe):
title = 'HRT: Vesti'
__author__ = 'Darko Miletic'
@ -62,9 +69,9 @@ class HRT(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -11,6 +11,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class TheIndependentNew(BasicNewsRecipe):
title = u'The Independent'
@ -65,7 +72,7 @@ class TheIndependentNew(BasicNewsRecipe):
for li in div.findAll('li', attrs={'data-gallery-legend': True}):
src = imgs.get(li['data-gallery-legend'])
if src is not None:
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
img['src'] = src
img['style'] = 'display:block'
li.append(img)

View File

@ -3,6 +3,13 @@ from calibre.ebooks.BeautifulSoup import Tag
import re
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class JoopRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
@ -89,7 +96,7 @@ class JoopRecipe(BasicNewsRecipe):
span = h2.find('span', 'info')
if span:
txt = span.find(text=True)
div = Tag(soup, 'div', attrs=[('class', 'joop_date')])
div = new_tag(soup, 'div', attrs=[('class', 'joop_date')])
div.append(txt)
h2.replaceWith(div)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Jutarnji(BasicNewsRecipe):
title = 'Jutarnji'
__author__ = 'Darko Miletic'
@ -68,9 +75,9 @@ class Jutarnji(BasicNewsRecipe):
item[attrib] = ''
del item[attrib]
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class LaPrensaHn(BasicNewsRecipe):
title = 'La Prensa - Honduras'
__author__ = 'Darko Miletic'
@ -46,9 +53,9 @@ class LaPrensaHn(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class LaTribuna(BasicNewsRecipe):
title = 'La Tribuna - Honduras'
__author__ = 'Darko Miletic'
@ -49,9 +56,9 @@ class LaTribuna(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class LaVanguardia(BasicNewsRecipe):
title = 'La Vanguardia Digital'
__author__ = 'Darko Miletic'
@ -61,7 +68,7 @@ class LaVanguardia(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['dir'] = self.direction
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mcharset)
for item in soup.findAll(style=True):

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
import re
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class LentaRURecipe(BasicNewsRecipe):
title = u'Lenta.ru: \u041d\u043e\u0432\u043e\u0441\u0442\u0438'
__author__ = 'Nikolai Kotchetkov'
@ -113,7 +120,7 @@ class LentaRURecipe(BasicNewsRecipe):
def postprocess_html(self, soup, first_fetch):
contents = Tag(soup, 'div')
contents = new_tag(soup, 'div')
# Extract tags with given attributes
extractElements = {'div': [{'id': 'readers-block'}]}
@ -155,13 +162,13 @@ class LentaRURecipe(BasicNewsRecipe):
# Place article picture after date
pic = soup.find('img')
if pic:
picDiv = Tag(soup, 'div')
picDiv = new_tag(soup, 'div')
picDiv['style'] = 'width: 100%; text-align: center;'
pic.extract()
picDiv.insert(0, pic)
title = pic.get('title', None)
if title:
titleDiv = Tag(soup, 'div')
titleDiv = new_tag(soup, 'div')
titleDiv['style'] = 'font-size: 0.5em;'
titleDiv.insert(0, title)
picDiv.insert(1, titleDiv)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class LevanteRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
@ -92,7 +99,7 @@ class LevanteRecipe(BasicNewsRecipe):
# Nuke some real crappy html
theirHead = soup.head
theirHead.extract()
myHead = Tag(soup, 'head')
myHead = new_tag(soup, 'head')
soup.insert(0, myHead)
return soup

View File

@ -1,4 +1,12 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class MoneyControlRecipe(BasicNewsRecipe):
@ -37,7 +45,7 @@ class MoneyControlRecipe(BasicNewsRecipe):
headline = soup.find('td', attrs = {'class': 'heading'})
if headline:
h1 = Tag(freshSoup, 'h1')
h1 = new_tag(freshSoup, 'h1')
# Convert to string before adding it to the document!
h1.append(self.tag_to_string(headline))
freshSoup.body.append(h1)
@ -47,7 +55,7 @@ class MoneyControlRecipe(BasicNewsRecipe):
# We have some weird pagebreak marker here; it will not find all of them however
continue
para = Tag(freshSoup, 'p')
para = new_tag(freshSoup, 'p')
# Convert to string; this will loose all formatting but also all illegal markup
para.append(self.tag_to_string(p))

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe):
postmedia_index_pages = [
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div')
allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[
'src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup, 'p')
pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption'
div = Tag(soup, 'div')
div = new_tag(soup, 'div')
div.insert(0, pdesc)
div.insert(0, img)
allpics.append(div)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NacionalCro(BasicNewsRecipe):
title = 'Nacional - Hr'
__author__ = 'Darko Miletic'
@ -44,9 +51,9 @@ class NacionalCro(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -13,6 +13,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NatGeo(BasicNewsRecipe):
title = u'National Geographic'
description = 'Daily news articles from The National Geographic'
@ -64,7 +71,7 @@ class NatGeo(BasicNewsRecipe):
idx = url.find('.jpg/{width')
if idx != -1:
url = url[:idx + 4]
img = Tag(soup, "img")
img = new_tag(soup, "img")
img['src'] = url
div.append(img)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NrcNextRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
@ -100,20 +107,20 @@ class NrcNextRecipe(BasicNewsRecipe):
if tag:
h2 = tag.find('h2', 'vlag')
if h2:
new_h2 = Tag(soup, 'h2', attrs=[('class', 'vlag')])
new_h2 = new_tag(soup, 'h2', attrs=[('class', 'vlag')])
new_h2.append(self.tag_to_string(h2))
h2.replaceWith(new_h2)
else:
h2 = tag.find('h2')
if h2:
new_h2 = Tag(soup, 'h2', attrs=[
new_h2 = new_tag(soup, 'h2', attrs=[
('class', 'sub_title')])
new_h2.append(self.tag_to_string(h2))
h2.replaceWith(new_h2)
h1 = tag.find('h1')
if h1:
new_h1 = Tag(soup, 'h1')
new_h1 = new_tag(soup, 'h1')
new_h1.append(self.tag_to_string(h1))
h1.replaceWith(new_h1)

View File

@ -23,6 +23,13 @@ def absurl(x):
return x
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NewYorker(BasicNewsRecipe):
title = u'New Yorker Magazine'
@ -114,12 +121,12 @@ class NewYorker(BasicNewsRecipe):
title = soup.find('meta', itemprop='name')
if title:
if self.featured_image:
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
img['src'] = self.featured_image
div = Tag(soup, 'div')
div = new_tag(soup, 'div')
div.append(img)
body.insert(0, div)
h1 = Tag(soup, 'h1')
h1 = new_tag(soup, 'h1')
h1.append(title.get('content'))
body.insert(0, h1)
for attr in 'srcset data-src-mobile'.split():

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NOAA(BasicNewsRecipe):
title = 'NOAA Online'
__author__ = 'Darko Miletic'
@ -32,9 +39,9 @@ class NOAA(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -8,6 +8,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Pagina12(BasicNewsRecipe):
title = 'NRC'
__author__ = 'Darko Miletic'
@ -42,7 +49,7 @@ class Pagina12(BasicNewsRecipe):
div = soup.find(
'div', attrs={'class': lambda x: x and 'featured-img' in x})
if div is not None:
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
img['src'] = src
div.append(img)
return soup

View File

@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import NavigableString, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Nspm(BasicNewsRecipe):
title = 'Nova srpska politicka misao'
__author__ = 'Darko Miletic'
@ -62,7 +69,7 @@ class Nspm(BasicNewsRecipe):
def preprocess_html(self, soup):
atitle = soup.body.find('a', attrs={'class': 'contentpagetitle'})
if atitle:
cleanTitle = Tag(soup, 'h1', [('class', 'contentpagetitle')])
cleanTitle = new_tag(soup, 'h1', [('class', 'contentpagetitle')])
cnt = NavigableString(self.tag_to_string(atitle))
cleanTitle.append(cnt)
@ -73,12 +80,12 @@ class Nspm(BasicNewsRecipe):
crdate = soup.body.find('td', attrs={'class': 'createdate'})
if crdate:
cleanCrdate = Tag(soup, 'div', [('class', 'createdate')])
cleanCrdate = new_tag(soup, 'div', [('class', 'createdate')])
cnt = NavigableString(self.tag_to_string(crdate))
cleanCrdate.append(cnt)
# get the dependant element
artText = Tag(soup, 'div', [('class', 'text')])
artText = new_tag(soup, 'div', [('class', 'text')])
textHolderp = crdate.parent
textHolder = textHolderp.nextSibling
while textHolder and (not isinstance(textHolder, Tag) or (textHolder.name != textHolderp.name)):

View File

@ -67,6 +67,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NewYorkTimes(BasicNewsRecipe):
if is_web_edition:
@ -116,7 +123,7 @@ class NewYorkTimes(BasicNewsRecipe):
keep_only_tags = [
dict(id='story'),
]
body = Tag(soup, 'body')
body = new_tag(soup, 'body')
for spec in keep_only_tags:
for tag in soup.find('body').findAll(**spec):
body.insert(len(body.contents), tag)

View File

@ -67,6 +67,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class NewYorkTimes(BasicNewsRecipe):
if is_web_edition:
@ -116,7 +123,7 @@ class NewYorkTimes(BasicNewsRecipe):
keep_only_tags = [
dict(id='story'),
]
body = Tag(soup, 'body')
body = new_tag(soup, 'body')
for spec in keep_only_tags:
for tag in soup.find('body').findAll(**spec):
body.insert(len(body.contents), tag)

View File

@ -13,6 +13,13 @@ from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.recipes import BasicNewsRecipe
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Nzz(BasicNewsRecipe):
title = 'NZZ Webpaper'
__author__ = 'Bernd Leinfelder'
@ -56,7 +63,7 @@ class Nzz(BasicNewsRecipe):
for span in soup.findAll('span', attrs={'data-src-640': True}):
imgSrc = span['data-src-640']
# print "image source: "+ imgSrc
imgTag = Tag(soup, "img", [("src", imgSrc)])
imgTag = new_tag(soup, "img", [("src", imgSrc)])
span.replaceWith(imgTag)
# print soup.prettify()

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe):
postmedia_index_pages = [
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div')
allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[
'src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup, 'p')
pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption'
div = Tag(soup, 'div')
div = new_tag(soup, 'div')
div.insert(0, pdesc)
div.insert(0, img)
allpics.append(div)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Pagina12(BasicNewsRecipe):
title = 'Pagina/12 - Edicion Impresa'
@ -84,10 +91,10 @@ class Pagina12(BasicNewsRecipe):
if img is not None:
img.extract()
caption = self.tag_to_string(table).strip()
div = Tag(soup, 'div')
div = new_tag(soup, 'div')
div['style'] = 'text-align:center'
div.insert(0, img)
div.insert(1, Tag(soup, 'br'))
div.insert(1, new_tag(soup, 'br'))
if caption:
div.insert(2, NavigableString(caption))
table.replaceWith(div)

View File

@ -13,6 +13,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Pobjeda(BasicNewsRecipe):
title = 'Pobjeda Online'
__author__ = 'Darko Miletic'
@ -59,9 +66,9 @@ class Pobjeda(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class PressOnline(BasicNewsRecipe):
title = 'Press Online'
__author__ = 'Darko Miletic'
@ -60,7 +67,7 @@ class PressOnline(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
soup.head.insert(0, mlang)
return self.adeify_images(soup)

View File

@ -3,6 +3,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class RevistaMuyInteresante(BasicNewsRecipe):
title = 'Revista Muy Interesante'
@ -24,7 +31,7 @@ class RevistaMuyInteresante(BasicNewsRecipe):
for img_tag in soup.findAll('img'):
imagen = img_tag
new_tag = Tag(soup, 'p')
new_tag = new_tag(soup, 'p')
img_tag.replaceWith(new_tag)
div = soup.find(attrs={'class': 'article_category'})
div.insert(0, imagen)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class RTS(BasicNewsRecipe):
title = 'RTS: Vesti'
__author__ = 'Darko Miletic'
@ -50,9 +57,9 @@ class RTS(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class SarajevoX(BasicNewsRecipe):
title = 'Sarajevo-x.com'
__author__ = 'Darko Miletic'
@ -57,7 +64,7 @@ class SarajevoX(BasicNewsRecipe):
if sp:
sp
else:
mtag = Tag(soup, 'div', [
mtag = new_tag(soup, 'div', [
("id", "opisslike"), ("class", "opscitech")])
mopis = NavigableString("Opis")
mtag.insert(0, mopis)

View File

@ -12,6 +12,13 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class SCMP(BasicNewsRecipe):
title = 'South China Morning Post'
__author__ = 'llam'
@ -71,7 +78,7 @@ class SCMP(BasicNewsRecipe):
wrapper = soup.find(**classes('image-wrapper__placeholder'))
if wrapper is not None:
p = wrapper.parent
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
img['src'] = meta['content']
p.append(img)
wrapper.extract()

View File

@ -14,6 +14,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class TheSouthernStar(BasicNewsRecipe):
title = 'The Southern Star'
@ -117,10 +124,10 @@ class TheSouthernStar(BasicNewsRecipe):
if img is not None:
img.extract()
caption = self.tag_to_string(table).strip()
div = Tag(soup, 'div')
div = new_tag(soup, 'div')
div['style'] = 'text-align:center'
div.insert(0, img)
div.insert(1, Tag(soup, 'br'))
div.insert(1, new_tag(soup, 'br'))
if caption:
div.insert(2, NavigableString(caption))
table.replaceWith(div)

View File

@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DeTijd(BasicNewsRecipe):
title = 'De Tijd'
__author__ = 'Darko Miletic'
@ -68,11 +75,11 @@ class DeTijd(BasicNewsRecipe):
del item['style']
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(
mlang = new_tag(
soup, 'meta', [("http-equiv", "Content-Language"),
("content", self.lang)]
)
mcharset = Tag(
mcharset = new_tag(
soup, 'meta', [("http-equiv", "Content-Type"),
("content", "text/html; charset=utf-8")]
)

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Uncrate(BasicNewsRecipe):
title = 'Uncrate'
__author__ = 'Darko Miletic'
@ -47,9 +54,9 @@ class Uncrate(BasicNewsRecipe):
feeds = [(u'Articles', u'http://feeds.feedburner.com/uncrate')]
def preprocess_html(self, soup):
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe):
postmedia_index_pages = [
@ -231,21 +238,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div')
allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[
'src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup, 'p')
pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption'
div = Tag(soup, 'div')
div = new_tag(soup, 'div')
div.insert(0, pdesc)
div.insert(0, img)
allpics.append(div)

View File

@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class CanWestPaper(BasicNewsRecipe):
compress_news_images = True
@ -219,21 +226,21 @@ class CanWestPaper(BasicNewsRecipe):
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
allpics = Tag(soup, 'div')
allpics = new_tag(soup, 'div')
first_img = pgall.find('div', 'storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup, 'img')
img = new_tag(soup, 'img')
srcpre, sep, srcpost = atag.img[
'src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup, 'p')
pdesc = new_tag(soup, 'p')
pdesc.insert(0, atag.img['alt'])
pdesc['class'] = 'photocaption'
div = Tag(soup, 'div')
div = new_tag(soup, 'div')
div.insert(0, pdesc)
div.insert(0, img)
allpics.append(div)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class VecernjiList(BasicNewsRecipe):
title = 'Vecernji List'
__author__ = 'Darko Miletic'
@ -48,9 +55,9 @@ class VecernjiList(BasicNewsRecipe):
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -9,6 +9,13 @@ from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.news import BasicNewsRecipe
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class VedomostiRecipe(BasicNewsRecipe):
title = u'Ведомости'
__author__ = 'Nikolai Kotchetkov'
@ -145,7 +152,7 @@ class VedomostiRecipe(BasicNewsRecipe):
if newstop:
img = newstop.find('img')
if img:
imgDiv = Tag(soup, 'div')
imgDiv = new_tag(soup, 'div')
imgDiv['class'] = 'article_img'
if img.get('width'):

View File

@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Veintitres(BasicNewsRecipe):
title = 'Veintitres'
__author__ = 'Darko Miletic'
@ -52,9 +59,9 @@ class Veintitres(BasicNewsRecipe):
del item['style']
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = Tag(soup, 'meta', [
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)

View File

@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class TimesColonist(BasicNewsRecipe):
# Customization -- remove sections you don't want.
@ -179,7 +186,7 @@ class TimesColonist(BasicNewsRecipe):
authstr = re.sub('/ *Times Colonist', '/',
authstr, flags=re.IGNORECASE)
authstr = re.sub('BY */', '', authstr, flags=re.IGNORECASE)
newdiv = Tag(soup, 'div')
newdiv = new_tag(soup, 'div')
newdiv.insert(0, authstr)
newdiv['class'] = 'byline'
byline.replaceWith(newdiv)
@ -187,7 +194,7 @@ class TimesColonist(BasicNewsRecipe):
capstr = self.tag_to_string(caption, False)
capstr = re.sub('Photograph by.*$', '',
capstr, flags=re.IGNORECASE)
newdiv = Tag(soup, 'div')
newdiv = new_tag(soup, 'div')
newdiv.insert(0, capstr)
newdiv['class'] = 'caption'
caption.replaceWith(newdiv)

View File

@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class VrijNederlandRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
@ -73,7 +80,7 @@ class VrijNederlandRecipe(BasicNewsRecipe):
# altogether
theirHead = soup.head
theirHead.extract()
myHead = Tag(soup, 'head')
myHead = new_tag(soup, 'head')
soup.insert(0, myHead)
return soup