Fix #7938 (Stanza shows some authors as "catalog is empty")

2025-07-09 03:04:10 -04:00 · 2010-12-17 20:59:48 -07:00 · 2010-12-17 20:59:48 -07:00 · b9f2346cba
commit b9f2346cba
parent 3cc953d621
2 changed files with 41 additions and 51 deletions
--- a/resources/recipes/johm.recipe
+++ b/resources/recipes/johm.recipe
@ -12,16 +12,6 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
    language = 'en'

    no_stylesheets = True
-    #remove_tags_before = dict(name='div', attrs={'align':'center'})
-    #remove_tags_after  = dict(name='ol', attrs={'compact':'COMPACT'})
-    remove_tags = [
-       dict(name='iframe'),
-       dict(name='div', attrs={'class':'subContent'}),
-       dict(name='div', attrs={'id':['contentFrame']}),
-       #dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
-       #dict(name='table', attrs={'align':'RIGHT'}),
-    ]
-


   # TO LOGIN
@ -39,47 +29,47 @@ class JournalofHospitalMedicine(BasicNewsRecipe):

    #TO GET ARTICLE TOC
    def johm_get_index(self):
-            return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
+        return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')

    # To parse artice toc
    def parse_index(self):
-            parse_soup = self.johm_get_index()
+        parse_soup = self.johm_get_index()

-            div = parse_soup.find(id='contentCell')
+        div = parse_soup.find(id='contentCell')

-            current_section = None
-            current_articles = []
-            feeds = []
-            for x in div.findAll(True):
-                if x.name == 'h4':
-                    # Section heading found
-                    if current_articles and current_section:
-                        feeds.append((current_section, current_articles))
-                    current_section = self.tag_to_string(x)
-                    current_articles = []
-                    self.log('\tFound section:', current_section)
-                if current_section is not None and x.name == 'strong':
-                    title = self.tag_to_string(x)
-                    p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
-                    if p is None:
-                        continue
-                    url = p.get('href', False)
-                    if not url or not title:
-                        continue
-                    if url.startswith('/'):
-                         url = 'http://www3.interscience.wiley.com'+url
-                    url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
-                    self.log('\t\tFound article:', title)
-                    self.log('\t\t\t', url)
-                    #if url.startswith('/'):
-                        #url = 'http://online.wsj.com'+url
-                    current_articles.append({'title': title, 'url':url,
-                        'description':'', 'date':''})
+        current_section = None
+        current_articles = []
+        feeds = []
+        for x in div.findAll(True):
+            if x.name == 'h4':
+                # Section heading found
+                if current_articles and current_section:
+                    feeds.append((current_section, current_articles))
+                current_section = self.tag_to_string(x)
+                current_articles = []
+                self.log('\tFound section:', current_section)
+            if current_section is not None and x.name == 'strong':
+                title = self.tag_to_string(x)
+                p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
+                if p is None:
+                    continue
+                url = p.get('href', False)
+                if not url or not title:
+                    continue
+                if url.startswith('/'):
+                        url = 'http://www3.interscience.wiley.com'+url
+                url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
+                self.log('\t\tFound article:', title)
+                self.log('\t\t\t', url)
+                #if url.startswith('/'):
+                    #url = 'http://online.wsj.com'+url
+                current_articles.append({'title': title, 'url':url,
+                    'description':'', 'date':''})

-            if current_articles and current_section:
-                feeds.append((current_section, current_articles))
+        if current_articles and current_section:
+            feeds.append((current_section, current_articles))

-            return feeds
+        return feeds

    def preprocess_html(self, soup):
        for img in soup.findAll('img', src=True):
--- a/src/calibre/library/server/opds.py
+++ b/src/calibre/library/server/opds.py
@ -19,7 +19,7 @@ from calibre.ebooks.metadata import fmt_sidx
 from calibre.library.comments import comments_to_html
 from calibre.library.server import custom_fields_to_display
 from calibre.library.server.utils import format_tag_string, Offsets
-from calibre import guess_type
+from calibre import guess_type, prepare_string_for_xml as xml
 from calibre.utils.icu import sort_key
 from calibre.utils.ordered_dict import OrderedDict

@ -150,13 +150,13 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
        extra.append(_('RATING: %s<br />')%rating)
    tags = item[FM['tags']]
    if tags:
-        extra.append(_('TAGS: %s<br />')%format_tag_string(tags, ',',
+        extra.append(_('TAGS: %s<br />')%xml(format_tag_string(tags, ',',
                                                           ignore_max=True,
-                                                           no_tag_count=True))
+                                                           no_tag_count=True)))
    series = item[FM['series']]
    if series:
        extra.append(_('SERIES: %s [%s]<br />')%\
-                (series,
+                (xml(series),
                fmt_sidx(float(item[FM['series_index']]))))
    for key in CKEYS:
        mi = db.get_metadata(item[CFM['id']['rec_index']], index_is_id=True)
@ -164,11 +164,11 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
        if val:
            datatype = CFM[key]['datatype']
            if datatype == 'text' and CFM[key]['is_multiple']:
-                extra.append('%s: %s<br />'%(name, format_tag_string(val, ',',
+                extra.append('%s: %s<br />'%(xml(name), xml(format_tag_string(val, ',',
                                                           ignore_max=True,
-                                                           no_tag_count=True)))
+                                                           no_tag_count=True))))
            else:
-                extra.append('%s: %s<br />'%(name, val))
+                extra.append('%s: %s<br />'%(xml(name), xml(unicode(val))))
    comments = item[FM['comments']]
    if comments:
        comments = comments_to_html(comments)