mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #7938 (Stanza shows some authors as "catalog is empty")
This commit is contained in:
parent
3cc953d621
commit
b9f2346cba
@ -12,16 +12,6 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
#remove_tags_before = dict(name='div', attrs={'align':'center'})
|
|
||||||
#remove_tags_after = dict(name='ol', attrs={'compact':'COMPACT'})
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='iframe'),
|
|
||||||
dict(name='div', attrs={'class':'subContent'}),
|
|
||||||
dict(name='div', attrs={'id':['contentFrame']}),
|
|
||||||
#dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
|
|
||||||
#dict(name='table', attrs={'align':'RIGHT'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# TO LOGIN
|
# TO LOGIN
|
||||||
@ -39,47 +29,47 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
|
|||||||
|
|
||||||
#TO GET ARTICLE TOC
|
#TO GET ARTICLE TOC
|
||||||
def johm_get_index(self):
|
def johm_get_index(self):
|
||||||
return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
|
return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
|
||||||
|
|
||||||
# To parse artice toc
|
# To parse artice toc
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
parse_soup = self.johm_get_index()
|
parse_soup = self.johm_get_index()
|
||||||
|
|
||||||
div = parse_soup.find(id='contentCell')
|
div = parse_soup.find(id='contentCell')
|
||||||
|
|
||||||
current_section = None
|
current_section = None
|
||||||
current_articles = []
|
current_articles = []
|
||||||
feeds = []
|
feeds = []
|
||||||
for x in div.findAll(True):
|
for x in div.findAll(True):
|
||||||
if x.name == 'h4':
|
if x.name == 'h4':
|
||||||
# Section heading found
|
# Section heading found
|
||||||
if current_articles and current_section:
|
if current_articles and current_section:
|
||||||
feeds.append((current_section, current_articles))
|
feeds.append((current_section, current_articles))
|
||||||
current_section = self.tag_to_string(x)
|
current_section = self.tag_to_string(x)
|
||||||
current_articles = []
|
current_articles = []
|
||||||
self.log('\tFound section:', current_section)
|
self.log('\tFound section:', current_section)
|
||||||
if current_section is not None and x.name == 'strong':
|
if current_section is not None and x.name == 'strong':
|
||||||
title = self.tag_to_string(x)
|
title = self.tag_to_string(x)
|
||||||
p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
|
p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
|
||||||
if p is None:
|
if p is None:
|
||||||
continue
|
continue
|
||||||
url = p.get('href', False)
|
url = p.get('href', False)
|
||||||
if not url or not title:
|
if not url or not title:
|
||||||
continue
|
continue
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://www3.interscience.wiley.com'+url
|
url = 'http://www3.interscience.wiley.com'+url
|
||||||
url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
|
url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
|
||||||
self.log('\t\tFound article:', title)
|
self.log('\t\tFound article:', title)
|
||||||
self.log('\t\t\t', url)
|
self.log('\t\t\t', url)
|
||||||
#if url.startswith('/'):
|
#if url.startswith('/'):
|
||||||
#url = 'http://online.wsj.com'+url
|
#url = 'http://online.wsj.com'+url
|
||||||
current_articles.append({'title': title, 'url':url,
|
current_articles.append({'title': title, 'url':url,
|
||||||
'description':'', 'date':''})
|
'description':'', 'date':''})
|
||||||
|
|
||||||
if current_articles and current_section:
|
if current_articles and current_section:
|
||||||
feeds.append((current_section, current_articles))
|
feeds.append((current_section, current_articles))
|
||||||
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for img in soup.findAll('img', src=True):
|
for img in soup.findAll('img', src=True):
|
||||||
|
@ -19,7 +19,7 @@ from calibre.ebooks.metadata import fmt_sidx
|
|||||||
from calibre.library.comments import comments_to_html
|
from calibre.library.comments import comments_to_html
|
||||||
from calibre.library.server import custom_fields_to_display
|
from calibre.library.server import custom_fields_to_display
|
||||||
from calibre.library.server.utils import format_tag_string, Offsets
|
from calibre.library.server.utils import format_tag_string, Offsets
|
||||||
from calibre import guess_type
|
from calibre import guess_type, prepare_string_for_xml as xml
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key
|
||||||
from calibre.utils.ordered_dict import OrderedDict
|
from calibre.utils.ordered_dict import OrderedDict
|
||||||
|
|
||||||
@ -150,13 +150,13 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
|
|||||||
extra.append(_('RATING: %s<br />')%rating)
|
extra.append(_('RATING: %s<br />')%rating)
|
||||||
tags = item[FM['tags']]
|
tags = item[FM['tags']]
|
||||||
if tags:
|
if tags:
|
||||||
extra.append(_('TAGS: %s<br />')%format_tag_string(tags, ',',
|
extra.append(_('TAGS: %s<br />')%xml(format_tag_string(tags, ',',
|
||||||
ignore_max=True,
|
ignore_max=True,
|
||||||
no_tag_count=True))
|
no_tag_count=True)))
|
||||||
series = item[FM['series']]
|
series = item[FM['series']]
|
||||||
if series:
|
if series:
|
||||||
extra.append(_('SERIES: %s [%s]<br />')%\
|
extra.append(_('SERIES: %s [%s]<br />')%\
|
||||||
(series,
|
(xml(series),
|
||||||
fmt_sidx(float(item[FM['series_index']]))))
|
fmt_sidx(float(item[FM['series_index']]))))
|
||||||
for key in CKEYS:
|
for key in CKEYS:
|
||||||
mi = db.get_metadata(item[CFM['id']['rec_index']], index_is_id=True)
|
mi = db.get_metadata(item[CFM['id']['rec_index']], index_is_id=True)
|
||||||
@ -164,11 +164,11 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
|
|||||||
if val:
|
if val:
|
||||||
datatype = CFM[key]['datatype']
|
datatype = CFM[key]['datatype']
|
||||||
if datatype == 'text' and CFM[key]['is_multiple']:
|
if datatype == 'text' and CFM[key]['is_multiple']:
|
||||||
extra.append('%s: %s<br />'%(name, format_tag_string(val, ',',
|
extra.append('%s: %s<br />'%(xml(name), xml(format_tag_string(val, ',',
|
||||||
ignore_max=True,
|
ignore_max=True,
|
||||||
no_tag_count=True)))
|
no_tag_count=True))))
|
||||||
else:
|
else:
|
||||||
extra.append('%s: %s<br />'%(name, val))
|
extra.append('%s: %s<br />'%(xml(name), xml(unicode(val))))
|
||||||
comments = item[FM['comments']]
|
comments = item[FM['comments']]
|
||||||
if comments:
|
if comments:
|
||||||
comments = comments_to_html(comments)
|
comments = comments_to_html(comments)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user