mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Port Tag creation in recipes to work with any version of BeautifulSoup
This commit is contained in:
parent
930624d2be
commit
c68a5c8ab1
@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Cro24Sata(BasicNewsRecipe):
|
||||
title = '24 Sata - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -46,9 +53,9 @@ class Cro24Sata(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
now = datetime.datetime.now()
|
||||
title = 'The AJC'
|
||||
@ -118,7 +125,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
comma = ', '
|
||||
article.author = names
|
||||
if len(names) > 0:
|
||||
tag = Tag(soup, 'div', [('class', 'cm-story-author')])
|
||||
tag = new_tag(soup, 'div', [('class', 'cm-story-author')])
|
||||
tag.append("by: ")
|
||||
tag.append(names)
|
||||
meta = soup.find('div', attrs={'class': 'cm-story-meta'})
|
||||
|
@ -17,6 +17,13 @@ def classes(classes):
|
||||
)
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class AssociatedPress(BasicNewsRecipe):
|
||||
|
||||
title = u'Associated Press'
|
||||
@ -76,7 +83,7 @@ class AssociatedPress(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup, *a):
|
||||
for meta in soup.findAll('meta', attrs=dict(name="twitter:image:alt")):
|
||||
for div in soup.findAll(**classes('LeadFeature')):
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
img['src'] = meta['content']
|
||||
div.insert(0, img)
|
||||
return soup
|
||||
|
@ -127,6 +127,13 @@ class BloombergContributor:
|
||||
return self._name
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class BloombergContributors(BasicNewsRecipe):
|
||||
title = u'Bloomberg, Editorial Contributors'
|
||||
description = 'Articles from Bloomberg.com contributors'
|
||||
@ -175,7 +182,7 @@ class BloombergContributors(BasicNewsRecipe):
|
||||
.strftime("%B %d, %Y %I:%M %p") + " UTC"
|
||||
except:
|
||||
parsed_time = time_stamp
|
||||
insert_tag = Tag(soup, "p", [("class", "user-inserted")])
|
||||
insert_tag = new_tag(soup, "p", [("class", "user-inserted")])
|
||||
insert_tag.insert(0, parsed_time)
|
||||
soup.time.replaceWith(insert_tag)
|
||||
|
||||
|
@ -10,6 +10,13 @@ def classes(classes):
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class BostonGlobeSubscription(BasicNewsRecipe):
|
||||
|
||||
title = "Boston Globe Subscription"
|
||||
@ -204,7 +211,7 @@ class BostonGlobeSubscription(BasicNewsRecipe):
|
||||
imgLink = main.find("a", "comic")
|
||||
img = imgLink.img
|
||||
|
||||
body = Tag(soup, "body")
|
||||
body = new_tag(soup, "body")
|
||||
body.insert(0, title)
|
||||
body.insert(1, byline)
|
||||
body.insert(2, img)
|
||||
|
@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
title = 'BuckMasters In The Kitchen'
|
||||
language = 'en'
|
||||
@ -28,15 +35,15 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
for img_tag in soup.findAll('img'):
|
||||
parent_tag = img_tag.parent
|
||||
if parent_tag.name == 'a':
|
||||
new_tag = Tag(soup, 'p')
|
||||
new_tag.insert(0, img_tag)
|
||||
parent_tag.replaceWith(new_tag)
|
||||
ntag = new_tag(soup, 'p')
|
||||
ntag.insert(0, img_tag)
|
||||
parent_tag.replaceWith(ntag)
|
||||
elif parent_tag.name == 'p':
|
||||
if not self.tag_to_string(parent_tag) == '':
|
||||
new_div = Tag(soup, 'div')
|
||||
new_tag = Tag(soup, 'p')
|
||||
new_tag.insert(0, img_tag)
|
||||
new_div = new_tag(soup, 'div')
|
||||
ntag = new_tag(soup, 'p')
|
||||
ntag.insert(0, img_tag)
|
||||
parent_tag.replaceWith(new_div)
|
||||
new_div.insert(0, new_tag)
|
||||
new_div.insert(0, ntag)
|
||||
new_div.insert(1, parent_tag)
|
||||
return soup
|
||||
|
@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
postmedia_index_pages = [
|
||||
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
|
||||
allpics = Tag(soup, 'div')
|
||||
allpics = new_tag(soup, 'div')
|
||||
first_img = pgall.find('div', 'storyimage')
|
||||
if first_img is not None:
|
||||
first_img.extract()
|
||||
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
|
||||
if tlist is not None:
|
||||
for atag in tlist.findAll('a'):
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
srcpre, sep, srcpost = atag.img[
|
||||
'src'].partition('?')
|
||||
img['src'] = srcpre
|
||||
pdesc = Tag(soup, 'p')
|
||||
pdesc = new_tag(soup, 'p')
|
||||
pdesc.insert(0, atag.img['alt'])
|
||||
pdesc['class'] = 'photocaption'
|
||||
div = Tag(soup, 'div')
|
||||
div = new_tag(soup, 'div')
|
||||
div.insert(0, pdesc)
|
||||
div.insert(0, img)
|
||||
allpics.append(div)
|
||||
|
@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class ClimateProgress(BasicNewsRecipe):
|
||||
title = 'Climate Progress'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -47,9 +54,9 @@ class ClimateProgress(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class DeGentenaarOnline(BasicNewsRecipe):
|
||||
title = 'De Gentenaar'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -69,9 +76,9 @@ class DeGentenaarOnline(BasicNewsRecipe):
|
||||
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class DnevniAvaz(BasicNewsRecipe):
|
||||
title = 'Dnevni Avaz'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -57,9 +64,9 @@ class DnevniAvaz(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class DnevnikCro(BasicNewsRecipe):
|
||||
title = 'Dnevnik - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -58,9 +65,9 @@ class DnevnikCro(BasicNewsRecipe):
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -16,6 +16,13 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2017, sukru alatas / alatas.org'
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class DunyaHalleri(BasicNewsRecipe):
|
||||
title = 'Dünya Halleri'
|
||||
description = 'Gözden Kaçanlar Rehberi'
|
||||
@ -78,20 +85,20 @@ class DunyaHalleri(BasicNewsRecipe):
|
||||
# title insert
|
||||
article_title = soup.title.contents[0]
|
||||
article_title.replace(' - Dünya Halleri'.decode('utf-8', 'replace'), '')
|
||||
h2 = Tag(soup, 'h2')
|
||||
h2 = new_tag(soup, 'h2')
|
||||
h2.append(article_title)
|
||||
span.insert(0, h2)
|
||||
|
||||
# featured image insert
|
||||
meta = soup.findAll('meta', {'property': 'og:image'}, limit=1)[0]
|
||||
if meta:
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
img.attrs = [('src', meta['content'])]
|
||||
span.insert(1, img)
|
||||
|
||||
# gallery normalization
|
||||
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
|
||||
p = Tag(soup, 'p')
|
||||
p = new_tag(soup, 'p')
|
||||
for img in div.findAll('img'):
|
||||
img.attrs = [(key, value)
|
||||
for key, value in img.attrs if key in ['src']]
|
||||
@ -102,9 +109,9 @@ class DunyaHalleri(BasicNewsRecipe):
|
||||
# this block finds the cover image for each embeded youtube video then
|
||||
# changes it to "a href" and "img"
|
||||
for iframe in soup.findAll('iframe'):
|
||||
a = Tag(soup, 'a')
|
||||
caption = Tag(soup, 'pre')
|
||||
img = Tag(soup, 'img')
|
||||
a = new_tag(soup, 'a')
|
||||
caption = new_tag(soup, 'pre')
|
||||
img = new_tag(soup, 'img')
|
||||
|
||||
m = re.match(
|
||||
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'
|
||||
|
@ -16,6 +16,13 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2017, sukru alatas / alatas.org'
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
|
||||
title = 'Dünya Halleri - Haftanın Özeti'
|
||||
description = ('Geçen hafta boyunca Türkiye ve dünyadan haber,'
|
||||
@ -156,7 +163,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
# gallery normalization
|
||||
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
|
||||
p = Tag(soup, 'p')
|
||||
p = new_tag(soup, 'p')
|
||||
for img in div.findAll('img'):
|
||||
img.attrs = [(key, value)
|
||||
for key, value in img.attrs if key in ['src']]
|
||||
@ -167,9 +174,9 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
|
||||
# this block finds the cover image for each embeded youtube video then
|
||||
# changes it to "a href" and "img"
|
||||
for iframe in soup.findAll('iframe'):
|
||||
a = Tag(soup, 'a')
|
||||
caption = Tag(soup, 'pre')
|
||||
img = Tag(soup, 'img')
|
||||
a = new_tag(soup, 'a')
|
||||
caption = new_tag(soup, 'pre')
|
||||
img = new_tag(soup, 'img')
|
||||
|
||||
m = re.match(
|
||||
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'
|
||||
|
@ -19,6 +19,13 @@ def classes(classes):
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class NoArticles(Exception):
|
||||
pass
|
||||
|
||||
@ -266,11 +273,11 @@ class Economist(BasicNewsRecipe):
|
||||
for table in list(self.eco_find_image_tables(soup)):
|
||||
caption = table.find('font')
|
||||
img = table.find('img')
|
||||
div = Tag(soup, 'div')
|
||||
div = new_tag(soup, 'div')
|
||||
div['style'] = 'text-align:left;font-size:70%'
|
||||
ns = NavigableString(self.tag_to_string(caption))
|
||||
div.insert(0, ns)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
div.insert(1, new_tag(soup, 'br'))
|
||||
del img['width']
|
||||
del img['height']
|
||||
img.extract()
|
||||
|
@ -19,6 +19,13 @@ def classes(classes):
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class NoArticles(Exception):
|
||||
pass
|
||||
|
||||
@ -266,11 +273,11 @@ class Economist(BasicNewsRecipe):
|
||||
for table in list(self.eco_find_image_tables(soup)):
|
||||
caption = table.find('font')
|
||||
img = table.find('img')
|
||||
div = Tag(soup, 'div')
|
||||
div = new_tag(soup, 'div')
|
||||
div['style'] = 'text-align:left;font-size:70%'
|
||||
ns = NavigableString(self.tag_to_string(caption))
|
||||
div.insert(0, ns)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
div.insert(1, new_tag(soup, 'br'))
|
||||
del img['width']
|
||||
del img['height']
|
||||
img.extract()
|
||||
|
@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
postmedia_index_pages = [
|
||||
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
|
||||
allpics = Tag(soup, 'div')
|
||||
allpics = new_tag(soup, 'div')
|
||||
first_img = pgall.find('div', 'storyimage')
|
||||
if first_img is not None:
|
||||
first_img.extract()
|
||||
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
|
||||
if tlist is not None:
|
||||
for atag in tlist.findAll('a'):
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
srcpre, sep, srcpost = atag.img[
|
||||
'src'].partition('?')
|
||||
img['src'] = srcpre
|
||||
pdesc = Tag(soup, 'p')
|
||||
pdesc = new_tag(soup, 'p')
|
||||
pdesc.insert(0, atag.img['alt'])
|
||||
pdesc['class'] = 'photocaption'
|
||||
div = Tag(soup, 'div')
|
||||
div = new_tag(soup, 'div')
|
||||
div.insert(0, pdesc)
|
||||
div.insert(0, img)
|
||||
allpics.append(div)
|
||||
|
@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class ElPeriodico_cat(BasicNewsRecipe):
|
||||
title = 'El Periodico de Catalunya'
|
||||
__author__ = 'Jordi Balcells/Darko Miletic'
|
||||
@ -58,7 +65,7 @@ class ElPeriodico_cat(BasicNewsRecipe):
|
||||
return url.replace('/default.asp?', '/print.asp?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
|
@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class ElPeriodico_cat(BasicNewsRecipe):
|
||||
title = 'El Periodico de Catalunya'
|
||||
__author__ = 'Jordi Balcells/Darko Miletic'
|
||||
@ -58,7 +65,7 @@ class ElPeriodico_cat(BasicNewsRecipe):
|
||||
return url.replace('/default.asp?', '/print.asp?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
|
@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class ElTiempoHn(BasicNewsRecipe):
|
||||
title = 'El Tiempo - Honduras'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -36,9 +43,9 @@ class ElTiempoHn(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -6,6 +6,13 @@ from calibre.utils.magick import Image, PixelWand
|
||||
from urllib2 import Request, urlopen, URLError
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Estadao(BasicNewsRecipe):
|
||||
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
|
||||
LANGUAGE = 'pt_br'
|
||||
@ -76,11 +83,11 @@ class Estadao(BasicNewsRecipe):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
if not soup.find(attrs={'http-equiv': 'Content-Language'}):
|
||||
meta0 = Tag(soup, 'meta', [
|
||||
meta0 = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.LANGHTM)])
|
||||
soup.head.insert(0, meta0)
|
||||
if not soup.find(attrs={'http-equiv': 'Content-Type'}):
|
||||
meta1 = Tag(soup, 'meta', [
|
||||
meta1 = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=" + self.ENCHTM)])
|
||||
soup.head.insert(0, meta1)
|
||||
return soup
|
||||
|
@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class FastCompany(BasicNewsRecipe):
|
||||
title = 'Fast Company'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -43,9 +50,9 @@ class FastCompany(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class FokkeEnSukkeRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
@ -79,7 +86,7 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe):
|
||||
if img:
|
||||
title = img['alt']
|
||||
|
||||
tag = Tag(soup, 'div', [('class', 'title')])
|
||||
tag = new_tag(soup, 'div', [('class', 'title')])
|
||||
tag.insert(0, title)
|
||||
cartoon.insert(0, tag)
|
||||
|
||||
|
@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, Comment
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class GlennBeckRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
@ -71,7 +78,7 @@ class GlennBeckRecipe(BasicNewsRecipe):
|
||||
|
||||
if (txt.parent.name == 'body' and len(raw) > 0) and not (len(raw) == 6 and raw == ' '):
|
||||
# This is our content; ignore the rest.
|
||||
para = Tag(freshSoup, 'p')
|
||||
para = new_tag(freshSoup, 'p')
|
||||
para.append(raw)
|
||||
freshSoup.body.append(para)
|
||||
counter += 1
|
||||
|
@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class HLN_be(BasicNewsRecipe):
|
||||
title = 'Het Belang Van Limburg'
|
||||
__author__ = 'Darko Miletic and Sujata Raman'
|
||||
@ -53,9 +60,9 @@ class HLN_be(BasicNewsRecipe):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Hoy(BasicNewsRecipe):
|
||||
title = 'HOY'
|
||||
__author__ = 'Fco Javier Nieto'
|
||||
@ -61,7 +68,7 @@ class Hoy(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['dir'] = self.direction
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
|
@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class HRT(BasicNewsRecipe):
|
||||
title = 'HRT: Vesti'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -62,9 +69,9 @@ class HRT(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -11,6 +11,13 @@ def classes(classes):
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class TheIndependentNew(BasicNewsRecipe):
|
||||
|
||||
title = u'The Independent'
|
||||
@ -65,7 +72,7 @@ class TheIndependentNew(BasicNewsRecipe):
|
||||
for li in div.findAll('li', attrs={'data-gallery-legend': True}):
|
||||
src = imgs.get(li['data-gallery-legend'])
|
||||
if src is not None:
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
img['src'] = src
|
||||
img['style'] = 'display:block'
|
||||
li.append(img)
|
||||
|
@ -3,6 +3,13 @@ from calibre.ebooks.BeautifulSoup import Tag
|
||||
import re
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class JoopRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
@ -89,7 +96,7 @@ class JoopRecipe(BasicNewsRecipe):
|
||||
span = h2.find('span', 'info')
|
||||
if span:
|
||||
txt = span.find(text=True)
|
||||
div = Tag(soup, 'div', attrs=[('class', 'joop_date')])
|
||||
div = new_tag(soup, 'div', attrs=[('class', 'joop_date')])
|
||||
div.append(txt)
|
||||
h2.replaceWith(div)
|
||||
|
||||
|
@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Jutarnji(BasicNewsRecipe):
|
||||
title = 'Jutarnji'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -68,9 +75,9 @@ class Jutarnji(BasicNewsRecipe):
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class LaPrensaHn(BasicNewsRecipe):
|
||||
title = 'La Prensa - Honduras'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -46,9 +53,9 @@ class LaPrensaHn(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class LaTribuna(BasicNewsRecipe):
|
||||
title = 'La Tribuna - Honduras'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -49,9 +56,9 @@ class LaTribuna(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -11,6 +11,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class LaVanguardia(BasicNewsRecipe):
|
||||
title = 'La Vanguardia Digital'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -61,7 +68,7 @@ class LaVanguardia(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['dir'] = self.direction
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
|
@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class LentaRURecipe(BasicNewsRecipe):
|
||||
title = u'Lenta.ru: \u041d\u043e\u0432\u043e\u0441\u0442\u0438'
|
||||
__author__ = 'Nikolai Kotchetkov'
|
||||
@ -113,7 +120,7 @@ class LentaRURecipe(BasicNewsRecipe):
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
|
||||
contents = Tag(soup, 'div')
|
||||
contents = new_tag(soup, 'div')
|
||||
|
||||
# Extract tags with given attributes
|
||||
extractElements = {'div': [{'id': 'readers-block'}]}
|
||||
@ -155,13 +162,13 @@ class LentaRURecipe(BasicNewsRecipe):
|
||||
# Place article picture after date
|
||||
pic = soup.find('img')
|
||||
if pic:
|
||||
picDiv = Tag(soup, 'div')
|
||||
picDiv = new_tag(soup, 'div')
|
||||
picDiv['style'] = 'width: 100%; text-align: center;'
|
||||
pic.extract()
|
||||
picDiv.insert(0, pic)
|
||||
title = pic.get('title', None)
|
||||
if title:
|
||||
titleDiv = Tag(soup, 'div')
|
||||
titleDiv = new_tag(soup, 'div')
|
||||
titleDiv['style'] = 'font-size: 0.5em;'
|
||||
titleDiv.insert(0, title)
|
||||
picDiv.insert(1, titleDiv)
|
||||
|
@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class LevanteRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
@ -92,7 +99,7 @@ class LevanteRecipe(BasicNewsRecipe):
|
||||
# Nuke some real crappy html
|
||||
theirHead = soup.head
|
||||
theirHead.extract()
|
||||
myHead = Tag(soup, 'head')
|
||||
myHead = new_tag(soup, 'head')
|
||||
soup.insert(0, myHead)
|
||||
|
||||
return soup
|
||||
|
@ -1,4 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class MoneyControlRecipe(BasicNewsRecipe):
|
||||
@ -37,7 +45,7 @@ class MoneyControlRecipe(BasicNewsRecipe):
|
||||
|
||||
headline = soup.find('td', attrs = {'class': 'heading'})
|
||||
if headline:
|
||||
h1 = Tag(freshSoup, 'h1')
|
||||
h1 = new_tag(freshSoup, 'h1')
|
||||
# Convert to string before adding it to the document!
|
||||
h1.append(self.tag_to_string(headline))
|
||||
freshSoup.body.append(h1)
|
||||
@ -47,7 +55,7 @@ class MoneyControlRecipe(BasicNewsRecipe):
|
||||
# We have some weird pagebreak marker here; it will not find all of them however
|
||||
continue
|
||||
|
||||
para = Tag(freshSoup, 'p')
|
||||
para = new_tag(freshSoup, 'p')
|
||||
# Convert to string; this will loose all formatting but also all illegal markup
|
||||
para.append(self.tag_to_string(p))
|
||||
|
||||
|
@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
postmedia_index_pages = [
|
||||
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
|
||||
allpics = Tag(soup, 'div')
|
||||
allpics = new_tag(soup, 'div')
|
||||
first_img = pgall.find('div', 'storyimage')
|
||||
if first_img is not None:
|
||||
first_img.extract()
|
||||
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
|
||||
if tlist is not None:
|
||||
for atag in tlist.findAll('a'):
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
srcpre, sep, srcpost = atag.img[
|
||||
'src'].partition('?')
|
||||
img['src'] = srcpre
|
||||
pdesc = Tag(soup, 'p')
|
||||
pdesc = new_tag(soup, 'p')
|
||||
pdesc.insert(0, atag.img['alt'])
|
||||
pdesc['class'] = 'photocaption'
|
||||
div = Tag(soup, 'div')
|
||||
div = new_tag(soup, 'div')
|
||||
div.insert(0, pdesc)
|
||||
div.insert(0, img)
|
||||
allpics.append(div)
|
||||
|
@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class NacionalCro(BasicNewsRecipe):
|
||||
title = 'Nacional - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -44,9 +51,9 @@ class NacionalCro(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -13,6 +13,13 @@ def classes(classes):
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class NatGeo(BasicNewsRecipe):
|
||||
title = u'National Geographic'
|
||||
description = 'Daily news articles from The National Geographic'
|
||||
@ -64,7 +71,7 @@ class NatGeo(BasicNewsRecipe):
|
||||
idx = url.find('.jpg/{width')
|
||||
if idx != -1:
|
||||
url = url[:idx + 4]
|
||||
img = Tag(soup, "img")
|
||||
img = new_tag(soup, "img")
|
||||
img['src'] = url
|
||||
div.append(img)
|
||||
|
||||
|
@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class NrcNextRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
@ -100,20 +107,20 @@ class NrcNextRecipe(BasicNewsRecipe):
|
||||
if tag:
|
||||
h2 = tag.find('h2', 'vlag')
|
||||
if h2:
|
||||
new_h2 = Tag(soup, 'h2', attrs=[('class', 'vlag')])
|
||||
new_h2 = new_tag(soup, 'h2', attrs=[('class', 'vlag')])
|
||||
new_h2.append(self.tag_to_string(h2))
|
||||
h2.replaceWith(new_h2)
|
||||
else:
|
||||
h2 = tag.find('h2')
|
||||
if h2:
|
||||
new_h2 = Tag(soup, 'h2', attrs=[
|
||||
new_h2 = new_tag(soup, 'h2', attrs=[
|
||||
('class', 'sub_title')])
|
||||
new_h2.append(self.tag_to_string(h2))
|
||||
h2.replaceWith(new_h2)
|
||||
|
||||
h1 = tag.find('h1')
|
||||
if h1:
|
||||
new_h1 = Tag(soup, 'h1')
|
||||
new_h1 = new_tag(soup, 'h1')
|
||||
new_h1.append(self.tag_to_string(h1))
|
||||
h1.replaceWith(new_h1)
|
||||
|
||||
|
@ -23,6 +23,13 @@ def absurl(x):
|
||||
return x
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class NewYorker(BasicNewsRecipe):
|
||||
|
||||
title = u'New Yorker Magazine'
|
||||
@ -114,12 +121,12 @@ class NewYorker(BasicNewsRecipe):
|
||||
title = soup.find('meta', itemprop='name')
|
||||
if title:
|
||||
if self.featured_image:
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
img['src'] = self.featured_image
|
||||
div = Tag(soup, 'div')
|
||||
div = new_tag(soup, 'div')
|
||||
div.append(img)
|
||||
body.insert(0, div)
|
||||
h1 = Tag(soup, 'h1')
|
||||
h1 = new_tag(soup, 'h1')
|
||||
h1.append(title.get('content'))
|
||||
body.insert(0, h1)
|
||||
for attr in 'srcset data-src-mobile'.split():
|
||||
|
@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class NOAA(BasicNewsRecipe):
|
||||
title = 'NOAA Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -32,9 +39,9 @@ class NOAA(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -8,6 +8,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Pagina12(BasicNewsRecipe):
|
||||
title = 'NRC'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -42,7 +49,7 @@ class Pagina12(BasicNewsRecipe):
|
||||
div = soup.find(
|
||||
'div', attrs={'class': lambda x: x and 'featured-img' in x})
|
||||
if div is not None:
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
img['src'] = src
|
||||
div.append(img)
|
||||
return soup
|
||||
|
@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import NavigableString, Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Nspm(BasicNewsRecipe):
|
||||
title = 'Nova srpska politicka misao'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -62,7 +69,7 @@ class Nspm(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
atitle = soup.body.find('a', attrs={'class': 'contentpagetitle'})
|
||||
if atitle:
|
||||
cleanTitle = Tag(soup, 'h1', [('class', 'contentpagetitle')])
|
||||
cleanTitle = new_tag(soup, 'h1', [('class', 'contentpagetitle')])
|
||||
cnt = NavigableString(self.tag_to_string(atitle))
|
||||
cleanTitle.append(cnt)
|
||||
|
||||
@ -73,12 +80,12 @@ class Nspm(BasicNewsRecipe):
|
||||
|
||||
crdate = soup.body.find('td', attrs={'class': 'createdate'})
|
||||
if crdate:
|
||||
cleanCrdate = Tag(soup, 'div', [('class', 'createdate')])
|
||||
cleanCrdate = new_tag(soup, 'div', [('class', 'createdate')])
|
||||
cnt = NavigableString(self.tag_to_string(crdate))
|
||||
cleanCrdate.append(cnt)
|
||||
|
||||
# get the dependant element
|
||||
artText = Tag(soup, 'div', [('class', 'text')])
|
||||
artText = new_tag(soup, 'div', [('class', 'text')])
|
||||
textHolderp = crdate.parent
|
||||
textHolder = textHolderp.nextSibling
|
||||
while textHolder and (not isinstance(textHolder, Tag) or (textHolder.name != textHolderp.name)):
|
||||
|
@ -67,6 +67,13 @@ def classes(classes):
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class NewYorkTimes(BasicNewsRecipe):
|
||||
|
||||
if is_web_edition:
|
||||
@ -116,7 +123,7 @@ class NewYorkTimes(BasicNewsRecipe):
|
||||
keep_only_tags = [
|
||||
dict(id='story'),
|
||||
]
|
||||
body = Tag(soup, 'body')
|
||||
body = new_tag(soup, 'body')
|
||||
for spec in keep_only_tags:
|
||||
for tag in soup.find('body').findAll(**spec):
|
||||
body.insert(len(body.contents), tag)
|
||||
|
@ -67,6 +67,13 @@ def classes(classes):
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class NewYorkTimes(BasicNewsRecipe):
|
||||
|
||||
if is_web_edition:
|
||||
@ -116,7 +123,7 @@ class NewYorkTimes(BasicNewsRecipe):
|
||||
keep_only_tags = [
|
||||
dict(id='story'),
|
||||
]
|
||||
body = Tag(soup, 'body')
|
||||
body = new_tag(soup, 'body')
|
||||
for spec in keep_only_tags:
|
||||
for tag in soup.find('body').findAll(**spec):
|
||||
body.insert(len(body.contents), tag)
|
||||
|
@ -13,6 +13,13 @@ from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Nzz(BasicNewsRecipe):
|
||||
title = 'NZZ Webpaper'
|
||||
__author__ = 'Bernd Leinfelder'
|
||||
@ -56,7 +63,7 @@ class Nzz(BasicNewsRecipe):
|
||||
for span in soup.findAll('span', attrs={'data-src-640': True}):
|
||||
imgSrc = span['data-src-640']
|
||||
# print "image source: "+ imgSrc
|
||||
imgTag = Tag(soup, "img", [("src", imgSrc)])
|
||||
imgTag = new_tag(soup, "img", [("src", imgSrc)])
|
||||
span.replaceWith(imgTag)
|
||||
|
||||
# print soup.prettify()
|
||||
|
@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
postmedia_index_pages = [
|
||||
@ -218,21 +225,21 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
|
||||
allpics = Tag(soup, 'div')
|
||||
allpics = new_tag(soup, 'div')
|
||||
first_img = pgall.find('div', 'storyimage')
|
||||
if first_img is not None:
|
||||
first_img.extract()
|
||||
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
|
||||
if tlist is not None:
|
||||
for atag in tlist.findAll('a'):
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
srcpre, sep, srcpost = atag.img[
|
||||
'src'].partition('?')
|
||||
img['src'] = srcpre
|
||||
pdesc = Tag(soup, 'p')
|
||||
pdesc = new_tag(soup, 'p')
|
||||
pdesc.insert(0, atag.img['alt'])
|
||||
pdesc['class'] = 'photocaption'
|
||||
div = Tag(soup, 'div')
|
||||
div = new_tag(soup, 'div')
|
||||
div.insert(0, pdesc)
|
||||
div.insert(0, img)
|
||||
allpics.append(div)
|
||||
|
@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Pagina12(BasicNewsRecipe):
|
||||
|
||||
title = 'Pagina/12 - Edicion Impresa'
|
||||
@ -84,10 +91,10 @@ class Pagina12(BasicNewsRecipe):
|
||||
if img is not None:
|
||||
img.extract()
|
||||
caption = self.tag_to_string(table).strip()
|
||||
div = Tag(soup, 'div')
|
||||
div = new_tag(soup, 'div')
|
||||
div['style'] = 'text-align:center'
|
||||
div.insert(0, img)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
div.insert(1, new_tag(soup, 'br'))
|
||||
if caption:
|
||||
div.insert(2, NavigableString(caption))
|
||||
table.replaceWith(div)
|
||||
|
@ -13,6 +13,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Pobjeda(BasicNewsRecipe):
|
||||
title = 'Pobjeda Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -59,9 +66,9 @@ class Pobjeda(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class PressOnline(BasicNewsRecipe):
|
||||
title = 'Press Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -60,7 +67,7 @@ class PressOnline(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
soup.head.insert(0, mlang)
|
||||
return self.adeify_images(soup)
|
||||
|
@ -3,6 +3,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class RevistaMuyInteresante(BasicNewsRecipe):
|
||||
|
||||
title = 'Revista Muy Interesante'
|
||||
@ -24,7 +31,7 @@ class RevistaMuyInteresante(BasicNewsRecipe):
|
||||
|
||||
for img_tag in soup.findAll('img'):
|
||||
imagen = img_tag
|
||||
new_tag = Tag(soup, 'p')
|
||||
new_tag = new_tag(soup, 'p')
|
||||
img_tag.replaceWith(new_tag)
|
||||
div = soup.find(attrs={'class': 'article_category'})
|
||||
div.insert(0, imagen)
|
||||
|
@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class RTS(BasicNewsRecipe):
|
||||
title = 'RTS: Vesti'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -50,9 +57,9 @@ class RTS(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -10,6 +10,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class SarajevoX(BasicNewsRecipe):
|
||||
title = 'Sarajevo-x.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -57,7 +64,7 @@ class SarajevoX(BasicNewsRecipe):
|
||||
if sp:
|
||||
sp
|
||||
else:
|
||||
mtag = Tag(soup, 'div', [
|
||||
mtag = new_tag(soup, 'div', [
|
||||
("id", "opisslike"), ("class", "opscitech")])
|
||||
mopis = NavigableString("Opis")
|
||||
mtag.insert(0, mopis)
|
||||
|
@ -12,6 +12,13 @@ def classes(classes):
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class SCMP(BasicNewsRecipe):
|
||||
title = 'South China Morning Post'
|
||||
__author__ = 'llam'
|
||||
@ -71,7 +78,7 @@ class SCMP(BasicNewsRecipe):
|
||||
wrapper = soup.find(**classes('image-wrapper__placeholder'))
|
||||
if wrapper is not None:
|
||||
p = wrapper.parent
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
img['src'] = meta['content']
|
||||
p.append(img)
|
||||
wrapper.extract()
|
||||
|
@ -14,6 +14,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class TheSouthernStar(BasicNewsRecipe):
|
||||
|
||||
title = 'The Southern Star'
|
||||
@ -117,10 +124,10 @@ class TheSouthernStar(BasicNewsRecipe):
|
||||
if img is not None:
|
||||
img.extract()
|
||||
caption = self.tag_to_string(table).strip()
|
||||
div = Tag(soup, 'div')
|
||||
div = new_tag(soup, 'div')
|
||||
div['style'] = 'text-align:center'
|
||||
div.insert(0, img)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
div.insert(1, new_tag(soup, 'br'))
|
||||
if caption:
|
||||
div.insert(2, NavigableString(caption))
|
||||
table.replaceWith(div)
|
||||
|
@ -9,6 +9,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class DeTijd(BasicNewsRecipe):
|
||||
title = 'De Tijd'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -68,11 +75,11 @@ class DeTijd(BasicNewsRecipe):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(
|
||||
mlang = new_tag(
|
||||
soup, 'meta', [("http-equiv", "Content-Language"),
|
||||
("content", self.lang)]
|
||||
)
|
||||
mcharset = Tag(
|
||||
mcharset = new_tag(
|
||||
soup, 'meta', [("http-equiv", "Content-Type"),
|
||||
("content", "text/html; charset=utf-8")]
|
||||
)
|
||||
|
@ -10,6 +10,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Uncrate(BasicNewsRecipe):
|
||||
title = 'Uncrate'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -47,9 +54,9 @@ class Uncrate(BasicNewsRecipe):
|
||||
feeds = [(u'Articles', u'http://feeds.feedburner.com/uncrate')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
postmedia_index_pages = [
|
||||
@ -231,21 +238,21 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
|
||||
allpics = Tag(soup, 'div')
|
||||
allpics = new_tag(soup, 'div')
|
||||
first_img = pgall.find('div', 'storyimage')
|
||||
if first_img is not None:
|
||||
first_img.extract()
|
||||
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
|
||||
if tlist is not None:
|
||||
for atag in tlist.findAll('a'):
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
srcpre, sep, srcpost = atag.img[
|
||||
'src'].partition('?')
|
||||
img['src'] = srcpre
|
||||
pdesc = Tag(soup, 'p')
|
||||
pdesc = new_tag(soup, 'p')
|
||||
pdesc.insert(0, atag.img['alt'])
|
||||
pdesc['class'] = 'photocaption'
|
||||
div = Tag(soup, 'div')
|
||||
div = new_tag(soup, 'div')
|
||||
div.insert(0, pdesc)
|
||||
div.insert(0, img)
|
||||
allpics.append(div)
|
||||
|
@ -11,6 +11,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
compress_news_images = True
|
||||
@ -219,21 +226,21 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
if (soup.find('div', attrs={'id': 'storycontent'}) is None):
|
||||
allpics = Tag(soup, 'div')
|
||||
allpics = new_tag(soup, 'div')
|
||||
first_img = pgall.find('div', 'storyimage')
|
||||
if first_img is not None:
|
||||
first_img.extract()
|
||||
tlist = pgall.find('div', attrs={'id': 'relatedthumbs'})
|
||||
if tlist is not None:
|
||||
for atag in tlist.findAll('a'):
|
||||
img = Tag(soup, 'img')
|
||||
img = new_tag(soup, 'img')
|
||||
srcpre, sep, srcpost = atag.img[
|
||||
'src'].partition('?')
|
||||
img['src'] = srcpre
|
||||
pdesc = Tag(soup, 'p')
|
||||
pdesc = new_tag(soup, 'p')
|
||||
pdesc.insert(0, atag.img['alt'])
|
||||
pdesc['class'] = 'photocaption'
|
||||
div = Tag(soup, 'div')
|
||||
div = new_tag(soup, 'div')
|
||||
div.insert(0, pdesc)
|
||||
div.insert(0, img)
|
||||
allpics.append(div)
|
||||
|
@ -12,6 +12,13 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class VecernjiList(BasicNewsRecipe):
|
||||
title = 'Vecernji List'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -48,9 +55,9 @@ class VecernjiList(BasicNewsRecipe):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -9,6 +9,13 @@ from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class VedomostiRecipe(BasicNewsRecipe):
|
||||
title = u'Ведомости'
|
||||
__author__ = 'Nikolai Kotchetkov'
|
||||
@ -145,7 +152,7 @@ class VedomostiRecipe(BasicNewsRecipe):
|
||||
if newstop:
|
||||
img = newstop.find('img')
|
||||
if img:
|
||||
imgDiv = Tag(soup, 'div')
|
||||
imgDiv = new_tag(soup, 'div')
|
||||
imgDiv['class'] = 'article_img'
|
||||
|
||||
if img.get('width'):
|
||||
|
@ -10,6 +10,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class Veintitres(BasicNewsRecipe):
|
||||
title = 'Veintitres'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -52,9 +59,9 @@ class Veintitres(BasicNewsRecipe):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(soup, 'meta', [
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
|
@ -12,6 +12,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class TimesColonist(BasicNewsRecipe):
|
||||
|
||||
# Customization -- remove sections you don't want.
|
||||
@ -179,7 +186,7 @@ class TimesColonist(BasicNewsRecipe):
|
||||
authstr = re.sub('/ *Times Colonist', '/',
|
||||
authstr, flags=re.IGNORECASE)
|
||||
authstr = re.sub('BY */', '', authstr, flags=re.IGNORECASE)
|
||||
newdiv = Tag(soup, 'div')
|
||||
newdiv = new_tag(soup, 'div')
|
||||
newdiv.insert(0, authstr)
|
||||
newdiv['class'] = 'byline'
|
||||
byline.replaceWith(newdiv)
|
||||
@ -187,7 +194,7 @@ class TimesColonist(BasicNewsRecipe):
|
||||
capstr = self.tag_to_string(caption, False)
|
||||
capstr = re.sub('Photograph by.*$', '',
|
||||
capstr, flags=re.IGNORECASE)
|
||||
newdiv = Tag(soup, 'div')
|
||||
newdiv = new_tag(soup, 'div')
|
||||
newdiv.insert(0, capstr)
|
||||
newdiv['class'] = 'caption'
|
||||
caption.replaceWith(newdiv)
|
||||
|
@ -2,6 +2,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class VrijNederlandRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
@ -73,7 +80,7 @@ class VrijNederlandRecipe(BasicNewsRecipe):
|
||||
# altogether
|
||||
theirHead = soup.head
|
||||
theirHead.extract()
|
||||
myHead = Tag(soup, 'head')
|
||||
myHead = new_tag(soup, 'head')
|
||||
soup.insert(0, myHead)
|
||||
|
||||
return soup
|
||||
|
Loading…
x
Reference in New Issue
Block a user