mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix incorrect soup usage in various recipes
Also make SoupStrainer available in calibre.ebooks.BeautifulSoup
This commit is contained in:
parent
de9d97d688
commit
ba59ac679d
@ -1,5 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||||
|
@ -3,7 +3,6 @@ from __future__ import print_function
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
from calibre.utils.magick import Image
|
from calibre.utils.magick import Image
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
|
|
||||||
''' Version 1.2, updated cover image to match the changed website.
|
''' Version 1.2, updated cover image to match the changed website.
|
||||||
added info date on title
|
added info date on title
|
||||||
@ -163,7 +162,7 @@ class MerryExtract():
|
|||||||
return killingSoup
|
return killingSoup
|
||||||
|
|
||||||
|
|
||||||
class MerryProcess(BeautifulSoup):
|
class MerryProcess(object):
|
||||||
myKiller = MerryExtract()
|
myKiller = MerryExtract()
|
||||||
myPrepare = MerryPreProcess()
|
myPrepare = MerryPreProcess()
|
||||||
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import re
|
import re
|
||||||
import urllib2
|
import urllib2
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, SoupStrainer
|
|
||||||
|
|
||||||
|
|
||||||
class Ebert(BasicNewsRecipe):
|
class Ebert(BasicNewsRecipe):
|
||||||
@ -78,8 +77,8 @@ class Ebert(BasicNewsRecipe):
|
|||||||
description = match.group(2)
|
description = match.group(2)
|
||||||
|
|
||||||
self.log(thislink)
|
self.log(thislink)
|
||||||
|
soup = self.index_to_soup(thislink)
|
||||||
for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')):
|
for link in soup.findAll('a', href=True):
|
||||||
thisurl = self.PREFIX + link['href']
|
thisurl = self.PREFIX + link['href']
|
||||||
thislinktext = self.tag_to_string(link)
|
thislinktext = self.tag_to_string(link)
|
||||||
|
|
||||||
@ -91,7 +90,7 @@ class Ebert(BasicNewsRecipe):
|
|||||||
if thistitle == '':
|
if thistitle == '':
|
||||||
thistitle = 'Ebert Journal Post'
|
thistitle = 'Ebert Journal Post'
|
||||||
|
|
||||||
"""
|
r"""
|
||||||
pattern2 = r'AID=\/(.*?)\/'
|
pattern2 = r'AID=\/(.*?)\/'
|
||||||
reg2 = re.compile(pattern2, re.IGNORECASE|re.DOTALL)
|
reg2 = re.compile(pattern2, re.IGNORECASE|re.DOTALL)
|
||||||
match2 = reg2.search(thisurl)
|
match2 = reg2.search(thisurl)
|
||||||
|
@ -2,7 +2,6 @@ import re
|
|||||||
import urllib2
|
import urllib2
|
||||||
import time
|
import time
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, SoupStrainer
|
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -94,8 +93,9 @@ class Ebert(BasicNewsRecipe):
|
|||||||
description = match.group(2)
|
description = match.group(2)
|
||||||
|
|
||||||
self.log(thislink)
|
self.log(thislink)
|
||||||
|
soup = self.index_to_soup(thislink)
|
||||||
|
|
||||||
for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')):
|
for link in soup.findAll('a', href=True):
|
||||||
thisurl = self.PREFIX + link['href']
|
thisurl = self.PREFIX + link['href']
|
||||||
thislinktext = self.tag_to_string(link)
|
thislinktext = self.tag_to_string(link)
|
||||||
|
|
||||||
|
@ -6,8 +6,8 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
|||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
from bs4 import ( # noqa
|
from bs4 import ( # noqa
|
||||||
CData, Comment, Declaration, NavigableString, ProcessingInstruction, Tag,
|
CData, Comment, Declaration, NavigableString, ProcessingInstruction,
|
||||||
__version__
|
SoupStrainer, Tag, __version__
|
||||||
)
|
)
|
||||||
|
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type
|
||||||
|
Loading…
x
Reference in New Issue
Block a user