diff --git a/resources/recipes/johm.recipe b/resources/recipes/johm.recipe
index ee162b27c2..6930f05a50 100644
--- a/resources/recipes/johm.recipe
+++ b/resources/recipes/johm.recipe
@@ -12,16 +12,6 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
language = 'en'
no_stylesheets = True
- #remove_tags_before = dict(name='div', attrs={'align':'center'})
- #remove_tags_after = dict(name='ol', attrs={'compact':'COMPACT'})
- remove_tags = [
- dict(name='iframe'),
- dict(name='div', attrs={'class':'subContent'}),
- dict(name='div', attrs={'id':['contentFrame']}),
- #dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
- #dict(name='table', attrs={'align':'RIGHT'}),
- ]
-
# TO LOGIN
@@ -39,47 +29,47 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
#TO GET ARTICLE TOC
def johm_get_index(self):
- return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
+ return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
# To parse artice toc
def parse_index(self):
- parse_soup = self.johm_get_index()
+ parse_soup = self.johm_get_index()
- div = parse_soup.find(id='contentCell')
+ div = parse_soup.find(id='contentCell')
- current_section = None
- current_articles = []
- feeds = []
- for x in div.findAll(True):
- if x.name == 'h4':
- # Section heading found
- if current_articles and current_section:
- feeds.append((current_section, current_articles))
- current_section = self.tag_to_string(x)
- current_articles = []
- self.log('\tFound section:', current_section)
- if current_section is not None and x.name == 'strong':
- title = self.tag_to_string(x)
- p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
- if p is None:
- continue
- url = p.get('href', False)
- if not url or not title:
- continue
- if url.startswith('/'):
- url = 'http://www3.interscience.wiley.com'+url
- url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
- self.log('\t\tFound article:', title)
- self.log('\t\t\t', url)
- #if url.startswith('/'):
- #url = 'http://online.wsj.com'+url
- current_articles.append({'title': title, 'url':url,
- 'description':'', 'date':''})
+ current_section = None
+ current_articles = []
+ feeds = []
+ for x in div.findAll(True):
+ if x.name == 'h4':
+ # Section heading found
+ if current_articles and current_section:
+ feeds.append((current_section, current_articles))
+ current_section = self.tag_to_string(x)
+ current_articles = []
+ self.log('\tFound section:', current_section)
+ if current_section is not None and x.name == 'strong':
+ title = self.tag_to_string(x)
+ p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
+ if p is None:
+ continue
+ url = p.get('href', False)
+ if not url or not title:
+ continue
+ if url.startswith('/'):
+ url = 'http://www3.interscience.wiley.com'+url
+ url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
+ self.log('\t\tFound article:', title)
+ self.log('\t\t\t', url)
+ #if url.startswith('/'):
+ #url = 'http://online.wsj.com'+url
+ current_articles.append({'title': title, 'url':url,
+ 'description':'', 'date':''})
- if current_articles and current_section:
- feeds.append((current_section, current_articles))
+ if current_articles and current_section:
+ feeds.append((current_section, current_articles))
- return feeds
+ return feeds
def preprocess_html(self, soup):
for img in soup.findAll('img', src=True):
diff --git a/src/calibre/library/server/opds.py b/src/calibre/library/server/opds.py
index af635ebf48..cd0b80d4f0 100644
--- a/src/calibre/library/server/opds.py
+++ b/src/calibre/library/server/opds.py
@@ -19,7 +19,7 @@ from calibre.ebooks.metadata import fmt_sidx
from calibre.library.comments import comments_to_html
from calibre.library.server import custom_fields_to_display
from calibre.library.server.utils import format_tag_string, Offsets
-from calibre import guess_type
+from calibre import guess_type, prepare_string_for_xml as xml
from calibre.utils.icu import sort_key
from calibre.utils.ordered_dict import OrderedDict
@@ -150,13 +150,13 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
extra.append(_('RATING: %s
')%rating)
tags = item[FM['tags']]
if tags:
- extra.append(_('TAGS: %s
')%format_tag_string(tags, ',',
+ extra.append(_('TAGS: %s
')%xml(format_tag_string(tags, ',',
ignore_max=True,
- no_tag_count=True))
+ no_tag_count=True)))
series = item[FM['series']]
if series:
extra.append(_('SERIES: %s [%s]
')%\
- (series,
+ (xml(series),
fmt_sidx(float(item[FM['series_index']]))))
for key in CKEYS:
mi = db.get_metadata(item[CFM['id']['rec_index']], index_is_id=True)
@@ -164,11 +164,11 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
if val:
datatype = CFM[key]['datatype']
if datatype == 'text' and CFM[key]['is_multiple']:
- extra.append('%s: %s
'%(name, format_tag_string(val, ',',
+ extra.append('%s: %s
'%(xml(name), xml(format_tag_string(val, ',',
ignore_max=True,
- no_tag_count=True)))
+ no_tag_count=True))))
else:
- extra.append('%s: %s
'%(name, val))
+ extra.append('%s: %s
'%(xml(name), xml(unicode(val))))
comments = item[FM['comments']]
if comments:
comments = comments_to_html(comments)