diff --git a/resources/recipes/wapo_cartoons.recipe b/resources/recipes/wapo_cartoons.recipe
index 78440aa140..09810dbc71 100644
--- a/resources/recipes/wapo_cartoons.recipe
+++ b/resources/recipes/wapo_cartoons.recipe
@@ -1,145 +1,145 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-from datetime import date, timedelta
-
-class WaPoCartoonsRecipe(BasicNewsRecipe):
- __license__ = 'GPL v3'
- __author__ = 'kwetal'
- language = 'en'
- version = 2
-
- title = u'Washington Post Cartoons'
- publisher = u'Washington Post'
- category = u'News, Cartoons'
- description = u'Cartoons from the Washington Post'
-
- oldest_article = 7
- max_articles_per_feed = 100
- use_embedded_content = False
- no_stylesheets = True
-
- feeds = []
- feeds.append((u'Anderson', u'http://www.uclick.com/client/wpc/wpnan/'))
- feeds.append((u'Auth', u'http://www.uclick.com/client/wpc/ta/'))
- feeds.append((u'Bok', u'http://www.creators.com/featurepages/11_editorialcartoons_chip-bok.html?name=cb'))
- feeds.append((u'Carlson', u'http://www.uclick.com/client/wpc/sc/'))
- feeds.append((u'Luckovich', u'http://www.creators.com/featurepages/11_editorialcartoons_mike-luckovich.html?name=lk'))
- feeds.append((u'McCoy', u'http://www.uclick.com/client/wpc/gm/'))
- feeds.append((u'Pat Oliphant', u'http://www.uclick.com/client/wpc/po/'))
- feeds.append((u'Sargent', u'http://wpcomics.washingtonpost.com/client/wpc/bs/'))
- feeds.append((u'Wilkinson', u'http://www.uclick.com/client/wpc/wpswi/'))
-
- extra_css = '''
- body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
- h1 {font-size: medium; font-weight: bold; margin-bottom: -0.1em; padding: 0em; text-align: left;}
- #name {margin-bottom: 0.2em}
- #copyright {font-size: xx-small; color: #696969; text-align: right; margin-top: 0.2em;}
- '''
-
- def parse_index(self):
- index = []
- oldestDate = date.today() - timedelta(days = self.oldest_article)
- oldest = oldestDate.strftime('%Y%m%d')
- for feed in self.feeds:
- cartoons = []
- soup = self.index_to_soup(feed[1])
-
- cartoon = {'title': 'Current', 'date': None, 'url': feed[1], 'description' : ''}
- cartoons.append(cartoon)
-
- select = soup.find('select', attrs = {'name': ['url', 'dest']})
- if select:
- cartoonCandidates = []
- if select['name'] == 'url':
- cartoonCandidates = self.cartoonCandidatesWaPo(select, oldest)
- else:
- cartoonCandidates = self.cartoonCandidatesCreatorsCom(select, oldest)
-
- for cartoon in cartoonCandidates:
- cartoons.append(cartoon)
-
- index.append([feed[0], cartoons])
-
- return index
-
- def preprocess_html(self, soup):
- freshSoup = self.getFreshSoup(soup)
-
- div = soup.find('div', attrs = {'id': 'name'})
- if div:
- freshSoup.body.append(div)
- comic = soup.find('div', attrs = {'id': 'comic_full'})
-
- img = comic.find('img')
- if '&' in img['src']:
- img['src'], sep, bad = img['src'].rpartition('&')
-
- freshSoup.body.append(comic)
- freshSoup.body.append(soup.find('div', attrs = {'id': 'copyright'}))
- else:
- span = soup.find('span', attrs = {'class': 'title'})
- if span:
- del span['class']
- span['id'] = 'name'
- span.name = 'div'
- freshSoup.body.append(span)
-
- img = soup.find('img', attrs = {'class': 'pic_big'})
- if img:
- td = img.parent
- if td.has_key('style'):
- del td['style']
- td.name = 'div'
- td['id'] = 'comic_full'
- freshSoup.body.append(td)
-
- td = soup.find('td', attrs = {'class': 'copy'})
- if td:
- for a in td.find('a'):
- a.extract()
- del td['class']
- td['id'] = 'copyright'
- td.name = 'div'
- freshSoup.body.append(td)
-
- return freshSoup
-
- def getFreshSoup(self, oldSoup):
- freshSoup = BeautifulSoup('
')
- if oldSoup.head.title:
- freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
- return freshSoup
-
- def cartoonCandidatesWaPo(self, select, oldest):
- opts = select.findAll('option')
- for i in range(1, len(opts)):
- url = opts[i]['value'].rstrip('/')
- dateparts = url.split('/')[-3:]
- datenum = str(dateparts[0]) + str(dateparts[1]) + str(dateparts[2])
- if datenum >= oldest:
- yield {'title': self.tag_to_string(opts[i]), 'date': None, 'url': url, 'description': ''}
- else:
- return
-
- def cartoonCandidatesCreatorsCom(self, select, oldest):
- monthNames = {'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05',
- 'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10',
- 'November': '11', 'December': '12'}
-
- opts = select.findAll('option')
- for i in range(1, len(opts)):
- if opts[i].has_key('selected'):
- continue
-
- dateString = self.tag_to_string(opts[i])
- rest, sep, year = dateString.rpartition(', ')
- parts = rest.split(' ')
- day = parts[2].rjust(2, '0')
- month = monthNames[parts[1]]
- datenum = str(year) + month + str(day)
- if datenum >= oldest:
- yield {'title': dateString, 'date': None, 'url': opts[i]['value'], 'description': ''}
- else:
- return
-
-
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from datetime import date, timedelta
+
+class WaPoCartoonsRecipe(BasicNewsRecipe):
+ __license__ = 'GPL v3'
+ __author__ = 'kwetal'
+ language = 'en'
+ version = 2
+
+ title = u'Washington Post Cartoons'
+ publisher = u'Washington Post'
+ category = u'News, Cartoons'
+ description = u'Cartoons from the Washington Post'
+
+ oldest_article = 7
+ max_articles_per_feed = 100
+ use_embedded_content = False
+ no_stylesheets = True
+
+ feeds = []
+ feeds.append((u'Anderson', u'http://www.uclick.com/client/wpc/wpnan/'))
+ feeds.append((u'Auth', u'http://www.uclick.com/client/wpc/ta/'))
+ feeds.append((u'Bok', u'http://www.creators.com/featurepages/11_editorialcartoons_chip-bok.html?name=cb'))
+ feeds.append((u'Carlson', u'http://www.uclick.com/client/wpc/sc/'))
+ feeds.append((u'Luckovich', u'http://www.creators.com/featurepages/11_editorialcartoons_mike-luckovich.html?name=lk'))
+ feeds.append((u'McCoy', u'http://www.uclick.com/client/wpc/gm/'))
+ feeds.append((u'Pat Oliphant', u'http://www.uclick.com/client/wpc/po/'))
+ feeds.append((u'Sargent', u'http://wpcomics.washingtonpost.com/client/wpc/bs/'))
+ feeds.append((u'Wilkinson', u'http://www.uclick.com/client/wpc/wpswi/'))
+
+ extra_css = '''
+ body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
+ h1 {font-size: medium; font-weight: bold; margin-bottom: -0.1em; padding: 0em; text-align: left;}
+ #name {margin-bottom: 0.2em}
+ #copyright {font-size: xx-small; color: #696969; text-align: right; margin-top: 0.2em;}
+ '''
+
+ def parse_index(self):
+ index = []
+ oldestDate = date.today() - timedelta(days = self.oldest_article)
+ oldest = oldestDate.strftime('%Y%m%d')
+ for feed in self.feeds:
+ cartoons = []
+ soup = self.index_to_soup(feed[1])
+
+ cartoon = {'title': 'Current', 'date': None, 'url': feed[1], 'description' : ''}
+ cartoons.append(cartoon)
+
+ select = soup.find('select', attrs = {'name': ['url', 'dest']})
+ if select:
+ cartoonCandidates = []
+ if select['name'] == 'url':
+ cartoonCandidates = self.cartoonCandidatesWaPo(select, oldest)
+ else:
+ cartoonCandidates = self.cartoonCandidatesCreatorsCom(select, oldest)
+
+ for cartoon in cartoonCandidates:
+ cartoons.append(cartoon)
+
+ index.append([feed[0], cartoons])
+
+ return index
+
+ def preprocess_html(self, soup):
+ freshSoup = self.getFreshSoup(soup)
+
+ div = soup.find('div', attrs = {'id': 'name'})
+ if div:
+ freshSoup.body.append(div)
+ comic = soup.find('div', attrs = {'id': 'comic_full'})
+
+ img = comic.find('img')
+ if '&' in img['src']:
+ img['src'], sep, bad = img['src'].rpartition('&')
+
+ freshSoup.body.append(comic)
+ freshSoup.body.append(soup.find('div', attrs = {'id': 'copyright'}))
+ else:
+ span = soup.find('span', attrs = {'class': 'title'})
+ if span:
+ del span['class']
+ span['id'] = 'name'
+ span.name = 'div'
+ freshSoup.body.append(span)
+
+ img = soup.find('img', attrs = {'class': 'pic_big'})
+ if img:
+ td = img.parent
+ if td.has_key('style'):
+ del td['style']
+ td.name = 'div'
+ td['id'] = 'comic_full'
+ freshSoup.body.append(td)
+
+ td = soup.find('td', attrs = {'class': 'copy'})
+ if td:
+ for a in td.find('a'):
+ a.extract()
+ del td['class']
+ td['id'] = 'copyright'
+ td.name = 'div'
+ freshSoup.body.append(td)
+
+ return freshSoup
+
+ def getFreshSoup(self, oldSoup):
+ freshSoup = BeautifulSoup('')
+ if oldSoup.head.title:
+ freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
+ return freshSoup
+
+ def cartoonCandidatesWaPo(self, select, oldest):
+ opts = select.findAll('option')
+ for i in range(1, len(opts)):
+ url = opts[i]['value'].rstrip('/')
+ dateparts = url.split('/')[-3:]
+ datenum = str(dateparts[0]) + str(dateparts[1]) + str(dateparts[2])
+ if datenum >= oldest:
+ yield {'title': self.tag_to_string(opts[i]), 'date': None, 'url': url, 'description': ''}
+ else:
+ return
+
+ def cartoonCandidatesCreatorsCom(self, select, oldest):
+ monthNames = {'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05',
+ 'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10',
+ 'November': '11', 'December': '12'}
+
+ opts = select.findAll('option')
+ for i in range(1, len(opts)):
+ if opts[i].has_key('selected'):
+ continue
+
+ dateString = self.tag_to_string(opts[i])
+ rest, sep, year = dateString.rpartition(', ')
+ parts = rest.split(' ')
+ day = parts[2].rjust(2, '0')
+ month = monthNames[parts[1]]
+ datenum = str(year) + month + str(day)
+ if datenum >= oldest:
+ yield {'title': dateString, 'date': None, 'url': opts[i]['value'], 'description': ''}
+ else:
+ return
+
+
diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py
index 064a1d1bdd..9ed8bb6255 100644
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@@ -264,6 +264,11 @@ class EPUBOutput(OutputFormatPlugin):
if body:
body = body[0]
+ # Add id attribute to tags that have name
+ for x in XPath('//h:a[@name]')(body):
+ if not x.get('id', False):
+ x.set('id', x.get('name'))
+
# Replace
that are children of as ADE doesn't handle them
if hasattr(body, 'xpath'):
for br in XPath('./h:br')(body):