diff --git a/resources/recipes/johm.recipe b/resources/recipes/johm.recipe index ee162b27c2..6930f05a50 100644 --- a/resources/recipes/johm.recipe +++ b/resources/recipes/johm.recipe @@ -12,16 +12,6 @@ class JournalofHospitalMedicine(BasicNewsRecipe): language = 'en' no_stylesheets = True - #remove_tags_before = dict(name='div', attrs={'align':'center'}) - #remove_tags_after = dict(name='ol', attrs={'compact':'COMPACT'}) - remove_tags = [ - dict(name='iframe'), - dict(name='div', attrs={'class':'subContent'}), - dict(name='div', attrs={'id':['contentFrame']}), - #dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}), - #dict(name='table', attrs={'align':'RIGHT'}), - ] - # TO LOGIN @@ -39,47 +29,47 @@ class JournalofHospitalMedicine(BasicNewsRecipe): #TO GET ARTICLE TOC def johm_get_index(self): - return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home') + return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home') # To parse artice toc def parse_index(self): - parse_soup = self.johm_get_index() + parse_soup = self.johm_get_index() - div = parse_soup.find(id='contentCell') + div = parse_soup.find(id='contentCell') - current_section = None - current_articles = [] - feeds = [] - for x in div.findAll(True): - if x.name == 'h4': - # Section heading found - if current_articles and current_section: - feeds.append((current_section, current_articles)) - current_section = self.tag_to_string(x) - current_articles = [] - self.log('\tFound section:', current_section) - if current_section is not None and x.name == 'strong': - title = self.tag_to_string(x) - p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x) - if p is None: - continue - url = p.get('href', False) - if not url or not title: - continue - if url.startswith('/'): - url = 'http://www3.interscience.wiley.com'+url - url = url.replace('/HTMLSTART', '/main.html,ftx_abs') - self.log('\t\tFound article:', title) - self.log('\t\t\t', url) - #if url.startswith('/'): - #url = 'http://online.wsj.com'+url - current_articles.append({'title': title, 'url':url, - 'description':'', 'date':''}) + current_section = None + current_articles = [] + feeds = [] + for x in div.findAll(True): + if x.name == 'h4': + # Section heading found + if current_articles and current_section: + feeds.append((current_section, current_articles)) + current_section = self.tag_to_string(x) + current_articles = [] + self.log('\tFound section:', current_section) + if current_section is not None and x.name == 'strong': + title = self.tag_to_string(x) + p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x) + if p is None: + continue + url = p.get('href', False) + if not url or not title: + continue + if url.startswith('/'): + url = 'http://www3.interscience.wiley.com'+url + url = url.replace('/HTMLSTART', '/main.html,ftx_abs') + self.log('\t\tFound article:', title) + self.log('\t\t\t', url) + #if url.startswith('/'): + #url = 'http://online.wsj.com'+url + current_articles.append({'title': title, 'url':url, + 'description':'', 'date':''}) - if current_articles and current_section: - feeds.append((current_section, current_articles)) + if current_articles and current_section: + feeds.append((current_section, current_articles)) - return feeds + return feeds def preprocess_html(self, soup): for img in soup.findAll('img', src=True): diff --git a/src/calibre/library/server/opds.py b/src/calibre/library/server/opds.py index af635ebf48..cd0b80d4f0 100644 --- a/src/calibre/library/server/opds.py +++ b/src/calibre/library/server/opds.py @@ -19,7 +19,7 @@ from calibre.ebooks.metadata import fmt_sidx from calibre.library.comments import comments_to_html from calibre.library.server import custom_fields_to_display from calibre.library.server.utils import format_tag_string, Offsets -from calibre import guess_type +from calibre import guess_type, prepare_string_for_xml as xml from calibre.utils.icu import sort_key from calibre.utils.ordered_dict import OrderedDict @@ -150,13 +150,13 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix): extra.append(_('RATING: %s
')%rating) tags = item[FM['tags']] if tags: - extra.append(_('TAGS: %s
')%format_tag_string(tags, ',', + extra.append(_('TAGS: %s
')%xml(format_tag_string(tags, ',', ignore_max=True, - no_tag_count=True)) + no_tag_count=True))) series = item[FM['series']] if series: extra.append(_('SERIES: %s [%s]
')%\ - (series, + (xml(series), fmt_sidx(float(item[FM['series_index']])))) for key in CKEYS: mi = db.get_metadata(item[CFM['id']['rec_index']], index_is_id=True) @@ -164,11 +164,11 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix): if val: datatype = CFM[key]['datatype'] if datatype == 'text' and CFM[key]['is_multiple']: - extra.append('%s: %s
'%(name, format_tag_string(val, ',', + extra.append('%s: %s
'%(xml(name), xml(format_tag_string(val, ',', ignore_max=True, - no_tag_count=True))) + no_tag_count=True)))) else: - extra.append('%s: %s
'%(name, val)) + extra.append('%s: %s
'%(xml(name), xml(unicode(val)))) comments = item[FM['comments']] if comments: comments = comments_to_html(comments)