mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #7938 (Stanza shows some authors as "catalog is empty")
This commit is contained in:
parent
3cc953d621
commit
b9f2346cba
@ -12,16 +12,6 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
|
||||
no_stylesheets = True
|
||||
#remove_tags_before = dict(name='div', attrs={'align':'center'})
|
||||
#remove_tags_after = dict(name='ol', attrs={'compact':'COMPACT'})
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':'subContent'}),
|
||||
dict(name='div', attrs={'id':['contentFrame']}),
|
||||
#dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
|
||||
#dict(name='table', attrs={'align':'RIGHT'}),
|
||||
]
|
||||
|
||||
|
||||
|
||||
# TO LOGIN
|
||||
@ -39,47 +29,47 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
|
||||
|
||||
#TO GET ARTICLE TOC
|
||||
def johm_get_index(self):
|
||||
return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
|
||||
return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
|
||||
|
||||
# To parse artice toc
|
||||
def parse_index(self):
|
||||
parse_soup = self.johm_get_index()
|
||||
parse_soup = self.johm_get_index()
|
||||
|
||||
div = parse_soup.find(id='contentCell')
|
||||
div = parse_soup.find(id='contentCell')
|
||||
|
||||
current_section = None
|
||||
current_articles = []
|
||||
feeds = []
|
||||
for x in div.findAll(True):
|
||||
if x.name == 'h4':
|
||||
# Section heading found
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
current_section = self.tag_to_string(x)
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
if current_section is not None and x.name == 'strong':
|
||||
title = self.tag_to_string(x)
|
||||
p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
|
||||
if p is None:
|
||||
continue
|
||||
url = p.get('href', False)
|
||||
if not url or not title:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://www3.interscience.wiley.com'+url
|
||||
url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
#if url.startswith('/'):
|
||||
#url = 'http://online.wsj.com'+url
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
current_section = None
|
||||
current_articles = []
|
||||
feeds = []
|
||||
for x in div.findAll(True):
|
||||
if x.name == 'h4':
|
||||
# Section heading found
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
current_section = self.tag_to_string(x)
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
if current_section is not None and x.name == 'strong':
|
||||
title = self.tag_to_string(x)
|
||||
p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
|
||||
if p is None:
|
||||
continue
|
||||
url = p.get('href', False)
|
||||
if not url or not title:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://www3.interscience.wiley.com'+url
|
||||
url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
#if url.startswith('/'):
|
||||
#url = 'http://online.wsj.com'+url
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
|
||||
return feeds
|
||||
return feeds
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', src=True):
|
||||
|
@ -19,7 +19,7 @@ from calibre.ebooks.metadata import fmt_sidx
|
||||
from calibre.library.comments import comments_to_html
|
||||
from calibre.library.server import custom_fields_to_display
|
||||
from calibre.library.server.utils import format_tag_string, Offsets
|
||||
from calibre import guess_type
|
||||
from calibre import guess_type, prepare_string_for_xml as xml
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.ordered_dict import OrderedDict
|
||||
|
||||
@ -150,13 +150,13 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
|
||||
extra.append(_('RATING: %s<br />')%rating)
|
||||
tags = item[FM['tags']]
|
||||
if tags:
|
||||
extra.append(_('TAGS: %s<br />')%format_tag_string(tags, ',',
|
||||
extra.append(_('TAGS: %s<br />')%xml(format_tag_string(tags, ',',
|
||||
ignore_max=True,
|
||||
no_tag_count=True))
|
||||
no_tag_count=True)))
|
||||
series = item[FM['series']]
|
||||
if series:
|
||||
extra.append(_('SERIES: %s [%s]<br />')%\
|
||||
(series,
|
||||
(xml(series),
|
||||
fmt_sidx(float(item[FM['series_index']]))))
|
||||
for key in CKEYS:
|
||||
mi = db.get_metadata(item[CFM['id']['rec_index']], index_is_id=True)
|
||||
@ -164,11 +164,11 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
|
||||
if val:
|
||||
datatype = CFM[key]['datatype']
|
||||
if datatype == 'text' and CFM[key]['is_multiple']:
|
||||
extra.append('%s: %s<br />'%(name, format_tag_string(val, ',',
|
||||
extra.append('%s: %s<br />'%(xml(name), xml(format_tag_string(val, ',',
|
||||
ignore_max=True,
|
||||
no_tag_count=True)))
|
||||
no_tag_count=True))))
|
||||
else:
|
||||
extra.append('%s: %s<br />'%(name, val))
|
||||
extra.append('%s: %s<br />'%(xml(name), xml(unicode(val))))
|
||||
comments = item[FM['comments']]
|
||||
if comments:
|
||||
comments = comments_to_html(comments)
|
||||
|
Loading…
x
Reference in New Issue
Block a user