Fix incorrect soup usage in various recipes

Also make SoupStrainer available in calibre.ebooks.BeautifulSoup
2025-07-09 03:04:10 -04:00 · 2019-03-25 10:17:27 +05:30 · 2019-03-25 10:17:27 +05:30 · ba59ac679d
commit ba59ac679d
parent de9d97d688
6 changed files with 10 additions and 12 deletions
--- a/recipes/calcalist.recipe
+++ b/recipes/calcalist.recipe
@ -1,5 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import re
+import re
 class AdvancedUserRecipe1283848012(BasicNewsRecipe):
--- a/recipes/globes_co_il.recipe
+++ b/recipes/globes_co_il.recipe
@ -1,5 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import re
+import re
 class AdvancedUserRecipe1283848012(BasicNewsRecipe):
--- a/recipes/metro_news_nl.recipe
+++ b/recipes/metro_news_nl.recipe
@ -3,7 +3,6 @@ from __future__ import print_function
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 from calibre.utils.magick import Image
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 ''' Version 1.2, updated cover image to match the changed website.
 added info date on title
@ -163,7 +162,7 @@ class MerryExtract():
        return killingSoup
-class MerryProcess(BeautifulSoup):
+class MerryProcess(object):
    myKiller = MerryExtract()
    myPrepare = MerryPreProcess()
--- a/recipes/roger_ebert.recipe
+++ b/recipes/roger_ebert.recipe
@ -1,7 +1,6 @@
 import re
 import urllib2
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, SoupStrainer
 class Ebert(BasicNewsRecipe):
@ -78,8 +77,8 @@ class Ebert(BasicNewsRecipe):
                    description = match.group(2)
                self.log(thislink)
-
+                soup = self.index_to_soup(thislink)
-                for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')):
+                for link in soup.findAll('a', href=True):
                    thisurl = self.PREFIX + link['href']
                    thislinktext = self.tag_to_string(link)
@ -91,7 +90,7 @@ class Ebert(BasicNewsRecipe):
                    if thistitle == '':
                        thistitle = 'Ebert Journal Post'
-                    """
+                    r"""
                    pattern2 = r'AID=\/(.*?)\/'
                    reg2 = re.compile(pattern2, re.IGNORECASE|re.DOTALL)
                    match2 = reg2.search(thisurl)
--- a/recipes/roger_ebert_blog.recipe
+++ b/recipes/roger_ebert_blog.recipe
@ -2,7 +2,6 @@ import re
 import urllib2
 import time
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, SoupStrainer
 from calibre import strftime
 '''
@ -94,8 +93,9 @@ class Ebert(BasicNewsRecipe):
                    description = match.group(2)
                self.log(thislink)
                soup = self.index_to_soup(thislink)
-                for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')):
+                for link in soup.findAll('a', href=True):
                    thisurl = self.PREFIX + link['href']
                    thislinktext = self.tag_to_string(link)
--- a/src/calibre/ebooks/BeautifulSoup.py
+++ b/src/calibre/ebooks/BeautifulSoup.py
@ -6,8 +6,8 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 import bs4
 from bs4 import (  # noqa
-    CData, Comment, Declaration, NavigableString, ProcessingInstruction, Tag,
+    CData, Comment, Declaration, NavigableString, ProcessingInstruction,
-    __version__
+    SoupStrainer, Tag, __version__
 )
 from polyglot.builtins import unicode_type