mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Port use of renderContents and BeautifulStoneSoup
This commit is contained in:
parent
c89b656df4
commit
256c7563b6
@ -28,10 +28,10 @@ class Adventure_zone(BasicNewsRecipe):
|
|||||||
def skip_ad_pages(self, soup):
|
def skip_ad_pages(self, soup):
|
||||||
skip_tag = soup.body.find(attrs={'class':'subject'})
|
skip_tag = soup.body.find(attrs={'class':'subject'})
|
||||||
skip_tag = skip_tag.findAll(name='a', href=True)
|
skip_tag = skip_tag.findAll(name='a', href=True)
|
||||||
title = soup.title.renderContents().lower()
|
title = soup.title.renderContents().decode('utf-8').lower()
|
||||||
if self._is_linked_text(title):
|
if self._is_linked_text(title):
|
||||||
for r in skip_tag:
|
for r in skip_tag:
|
||||||
word = r.renderContents()
|
word = r.renderContents().decode('utf-8')
|
||||||
if not word:
|
if not word:
|
||||||
continue
|
continue
|
||||||
word = word.lower()
|
word = word.lower()
|
||||||
|
@ -104,7 +104,7 @@ class BerlinPolicyJournal(BasicNewsRecipe):
|
|||||||
div = soup.find('div', {'class': 'meta-info'})
|
div = soup.find('div', {'class': 'meta-info'})
|
||||||
authors = ''
|
authors = ''
|
||||||
for entry in div.findAll('span', {'class': 'entry-author'}):
|
for entry in div.findAll('span', {'class': 'entry-author'}):
|
||||||
authors = authors + entry.a.span.renderContents().strip() + ', '
|
authors = authors + entry.a.span.renderContents().decode('utf-8').strip() + ', '
|
||||||
date = div.find('time').renderContents().strip()
|
date = div.find('time').renderContents().decode('utf-8').strip()
|
||||||
div.replaceWith('<div>' + date + ' | ' + authors[:-2] + '<br/></div>')
|
div.replaceWith('<div>' + date + ' | ' + authors[:-2] + '<br/></div>')
|
||||||
return soup
|
return soup
|
||||||
|
@ -83,5 +83,5 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|||||||
br.replaceWith(' ')
|
br.replaceWith(' ')
|
||||||
# remove all links
|
# remove all links
|
||||||
for a in soup.findAll('a'):
|
for a in soup.findAll('a'):
|
||||||
a.replaceWith(a.renderContents())
|
a.replaceWith(a.renderContents().decode('utf-8'))
|
||||||
return soup
|
return soup
|
||||||
|
@ -8,7 +8,7 @@ www.canada.com
|
|||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
def new_tag(soup, name, attrs=()):
|
||||||
@ -183,15 +183,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
return description
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(
|
|
||||||
description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&", "&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
if first:
|
if first:
|
||||||
|
@ -8,7 +8,7 @@ www.canada.com
|
|||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
def new_tag(soup, name, attrs=()):
|
||||||
@ -183,15 +183,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
return description
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(
|
|
||||||
description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&", "&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
if first:
|
if first:
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
|
||||||
class Ekathimerini(BasicNewsRecipe):
|
class Ekathimerini(BasicNewsRecipe):
|
||||||
@ -41,12 +42,10 @@ class Ekathimerini(BasicNewsRecipe):
|
|||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
idx_contents = self.browser.open(self.rss_url).read()
|
idx_contents = self.browser.open(self.rss_url).read()
|
||||||
idx = BeautifulStoneSoup(
|
idx = etree.fromstring(idx_contents)
|
||||||
idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
|
|
||||||
|
|
||||||
cats = list(set([self.tag_to_string(subcat)
|
cats = sorted({self.tag_to_string(subcat)
|
||||||
for subcat in idx.findAll('subcat')]))
|
for subcat in idx.xpath('//*[local-name()="subcat"]')})
|
||||||
cats.sort()
|
|
||||||
|
|
||||||
feeds = [(u'News', list(self.find_articles(idx, u'')))]
|
feeds = [(u'News', list(self.find_articles(idx, u'')))]
|
||||||
|
|
||||||
|
@ -52,29 +52,29 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe):
|
|||||||
# If there's only one, there is just a link with the dayname.
|
# If there's only one, there is just a link with the dayname.
|
||||||
# If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>.
|
# If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>.
|
||||||
# In that case we're interested in the last two.
|
# In that case we're interested in the last two.
|
||||||
if links[i].renderContents() in dayNames:
|
if links[i].renderContents().decode('utf-8') in dayNames:
|
||||||
# If the link is not in daynames, we processed it already, but if it is, let's see
|
# If the link is not in daynames, we processed it already, but if it is, let's see
|
||||||
# if the next one has '1' as content
|
# if the next one has '1' as content
|
||||||
if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1'):
|
if (i + 1 <= maxIndex) and (links[i + 1].renderContents().decode('utf-8') == '1'):
|
||||||
# Got you! Add it to the list
|
# Got you! Add it to the list
|
||||||
article = {'title': links[i].renderContents(
|
article = {'title': links[i].renderContents().decode('utf-8'
|
||||||
) + ' 1', 'date': u'', 'url': self.INDEX + links[i + 1]['href'], 'description': ''}
|
)+ ' 1', 'date': u'', 'url': self.INDEX + links[i + 1]['href'], 'description': ''}
|
||||||
articles.append(article)
|
articles.append(article)
|
||||||
# If there is a '1', there should be a '2' as well, but
|
# If there is a '1', there should be a '2' as well, but
|
||||||
# better save than sorry
|
# better save than sorry
|
||||||
if (i + 2 <= maxIndex) and (links[i + 2].renderContents() == '2'):
|
if (i + 2 <= maxIndex) and (links[i + 2].renderContents().decode('utf-8') == '2'):
|
||||||
# Got you! Add it to the list
|
# Got you! Add it to the list
|
||||||
article = {'title': links[i].renderContents(
|
article = {'title': links[i].renderContents(
|
||||||
) + ' 2', 'date': u'', 'url': self.INDEX + links[i + 2]['href'], 'description': ''}
|
).decode('utf-8') + ' 2', 'date': u'', 'url': self.INDEX + links[i + 2]['href'], 'description': ''}
|
||||||
articles.append(article)
|
articles.append(article)
|
||||||
else:
|
else:
|
||||||
# There is only one cartoon for this day. Add it to the
|
# There is only one cartoon for this day. Add it to the
|
||||||
# list.
|
# list.
|
||||||
article = {'title': links[i].renderContents(
|
article = {'title': links[i].renderContents(
|
||||||
), 'date': u'', 'url': self.INDEX + links[i]['href'], 'description': ''}
|
).decode('utf-8'), 'date': u'', 'url': self.INDEX + links[i]['href'], 'description': ''}
|
||||||
articles.append(article)
|
articles.append(article)
|
||||||
# Might as well use the weeknumber as title
|
# Might as well use the weeknumber as title
|
||||||
week = index.find('span', attrs={'class': 'week'}).renderContents()
|
week = index.find('span', attrs={'class': 'week'}).renderContents().decode('utf-8')
|
||||||
|
|
||||||
return [[week, articles]]
|
return [[week, articles]]
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ class Gildia(BasicNewsRecipe):
|
|||||||
|
|
||||||
words = ('recenzj', 'zapowied', 'fragmen',
|
words = ('recenzj', 'zapowied', 'fragmen',
|
||||||
'relacj', 'wywiad', 'nominacj')
|
'relacj', 'wywiad', 'nominacj')
|
||||||
document_title = soup.title.renderContents().lower()
|
document_title = soup.title.renderContents().decode('utf-8').lower()
|
||||||
for word in words:
|
for word in words:
|
||||||
if word in document_title:
|
if word in document_title:
|
||||||
for link in content.findAll(name='a'):
|
for link in content.findAll(name='a'):
|
||||||
@ -57,7 +57,7 @@ class Gildia(BasicNewsRecipe):
|
|||||||
return self.index_to_soup(tag['href'], raw=True)
|
return self.index_to_soup(tag['href'], raw=True)
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
title = soup.title.renderContents().lower()
|
title = soup.title.renderContents().decode('utf-8').lower()
|
||||||
for a in soup('a', href=True):
|
for a in soup('a', href=True):
|
||||||
if not a['href'].startswith('http'):
|
if not a['href'].startswith('http'):
|
||||||
if '/gry/' in a['href']:
|
if '/gry/' in a['href']:
|
||||||
|
@ -129,7 +129,7 @@ class Handelsblatt(BasicNewsRecipe):
|
|||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
# convert lists of author(s) and date(s) into simple text
|
# convert lists of author(s) and date(s) into simple text
|
||||||
for cap in soup.findAll('div', {'class': re.compile('vhb-article-caption')}):
|
for cap in soup.findAll('div', {'class': re.compile('vhb-article-caption')}):
|
||||||
cap.replaceWith(cap.renderContents().strip() + ' ')
|
cap.replaceWith(cap.renderContents().decode('utf-8').strip() + ' ')
|
||||||
for row in soup.findAll('div', {'class': 'vhb-article-author-row'}):
|
for row in soup.findAll('div', {'class': 'vhb-article-author-row'}):
|
||||||
for ul in row.findAll('ul'):
|
for ul in row.findAll('ul'):
|
||||||
entry = ''
|
entry = ''
|
||||||
@ -141,7 +141,7 @@ class Handelsblatt(BasicNewsRecipe):
|
|||||||
# remove all local hyperlinks
|
# remove all local hyperlinks
|
||||||
for a in soup.findAll('a', {'href': True}):
|
for a in soup.findAll('a', {'href': True}):
|
||||||
if a['href'] and a['href'][0] in ['/', '#']:
|
if a['href'] and a['href'][0] in ['/', '#']:
|
||||||
a.replaceWith(a.renderContents())
|
a.replaceWith(a.renderContents().decode('utf-8'))
|
||||||
# make sure that all figure captions (including the source) are shown
|
# make sure that all figure captions (including the source) are shown
|
||||||
# without linebreaks by using the alternative text given within <img/>
|
# without linebreaks by using the alternative text given within <img/>
|
||||||
# instead of the original text (which is oddly formatted)
|
# instead of the original text (which is oddly formatted)
|
||||||
|
@ -63,7 +63,7 @@ class JoopRecipe(BasicNewsRecipe):
|
|||||||
for section in sections:
|
for section in sections:
|
||||||
articles = []
|
articles = []
|
||||||
h2 = div.find(lambda tag: tag.name ==
|
h2 = div.find(lambda tag: tag.name ==
|
||||||
'h2' and tag.renderContents() == section)
|
'h2' and tag.renderContents().decode('utf-8') == section)
|
||||||
if h2:
|
if h2:
|
||||||
ul = h2.findNextSibling('ul', 'linklist')
|
ul = h2.findNextSibling('ul', 'linklist')
|
||||||
if ul:
|
if ul:
|
||||||
|
@ -65,14 +65,14 @@ class Mediapart(BasicNewsRecipe):
|
|||||||
|
|
||||||
# print "found fil ",title
|
# print "found fil ",title
|
||||||
article_type = article.find('a', {'href': re.compile(
|
article_type = article.find('a', {'href': re.compile(
|
||||||
r'.*\/type-darticles\/.*')}).renderContents()
|
r'.*\/type-darticles\/.*')}).renderContents().decode('utf-8')
|
||||||
# print "kind: ",article_type
|
# print "kind: ",article_type
|
||||||
|
|
||||||
for s in title('span'):
|
for s in title('span'):
|
||||||
s.replaceWith(s.renderContents() + "\n")
|
s.replaceWith(s.renderContents().decode('utf-8') + "\n")
|
||||||
url = title.find('a', href=True)['href']
|
url = title.find('a', href=True)['href']
|
||||||
|
|
||||||
# article_date = self.parse_french_date(article.find("span", "article-date").renderContents())
|
# article_date = self.parse_french_date(article.find("span", "article-date").renderContents().decode('utf-8'))
|
||||||
# print("################################# 9")
|
# print("################################# 9")
|
||||||
# print(article_date)
|
# print(article_date)
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ www.canada.com
|
|||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
def new_tag(soup, name, attrs=()):
|
||||||
@ -183,15 +183,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
return description
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(
|
|
||||||
description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&", "&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
if first:
|
if first:
|
||||||
|
@ -76,7 +76,7 @@ class NrcNextRecipe(BasicNewsRecipe):
|
|||||||
# In this feed/page articles can be written by more than one author.
|
# In this feed/page articles can be written by more than one author.
|
||||||
# It is nice to see their names in the titles.
|
# It is nice to see their names in the titles.
|
||||||
flag = post.find('h2', attrs={'class': 'vlag'})
|
flag = post.find('h2', attrs={'class': 'vlag'})
|
||||||
author = flag.contents[0].renderContents()
|
author = flag.contents[0].renderContents().decode('utf-8')
|
||||||
completeTitle = u''.join([author, u': ', title])
|
completeTitle = u''.join([author, u': ', title])
|
||||||
else:
|
else:
|
||||||
completeTitle = title
|
completeTitle = title
|
||||||
|
@ -8,7 +8,7 @@ www.canada.com
|
|||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
def new_tag(soup, name, attrs=()):
|
||||||
@ -183,15 +183,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
return description
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(
|
|
||||||
description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&", "&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
if first:
|
if first:
|
||||||
|
@ -9,7 +9,6 @@ www.canada.com
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
|
||||||
|
|
||||||
|
|
||||||
class CanWestPaper(BasicNewsRecipe):
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
@ -144,15 +143,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
return description
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(
|
|
||||||
description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&", "&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
if first:
|
if first:
|
||||||
|
@ -9,7 +9,6 @@ www.canada.com
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
|
||||||
|
|
||||||
|
|
||||||
class CanWestPaper(BasicNewsRecipe):
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
@ -144,15 +143,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
return description
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(
|
|
||||||
description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&", "&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
if first:
|
if first:
|
||||||
|
@ -8,7 +8,7 @@ www.canada.com
|
|||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
def new_tag(soup, name, attrs=()):
|
||||||
@ -196,15 +196,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
return description
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(
|
|
||||||
description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&", "&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
if first:
|
if first:
|
||||||
|
@ -8,7 +8,7 @@ www.canada.com
|
|||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
def new_tag(soup, name, attrs=()):
|
||||||
@ -184,15 +184,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
return description
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(
|
|
||||||
description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&", "&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
if first:
|
if first:
|
||||||
|
@ -9,7 +9,7 @@ www.canada.com
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
def new_tag(soup, name, attrs=()):
|
||||||
@ -147,15 +147,7 @@ class TimesColonist(BasicNewsRecipe):
|
|||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
return description
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(
|
|
||||||
description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&", "&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
if first:
|
if first:
|
||||||
|
@ -9,7 +9,6 @@ www.canada.com
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
|
||||||
|
|
||||||
|
|
||||||
class CanWestPaper(BasicNewsRecipe):
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
@ -144,15 +143,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
return description
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(
|
|
||||||
description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&", "&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
if first:
|
if first:
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
import bs4
|
||||||
from bs4 import ( # noqa
|
from bs4 import ( # noqa
|
||||||
CData, Comment, Declaration, NavigableString, ProcessingInstruction, Tag,
|
CData, Comment, Declaration, NavigableString, ProcessingInstruction, Tag,
|
||||||
__version__
|
__version__
|
||||||
@ -27,3 +28,7 @@ def parse_html(markup):
|
|||||||
|
|
||||||
def BeautifulSoup(markup='', *a, **kw):
|
def BeautifulSoup(markup='', *a, **kw):
|
||||||
return parse_html(markup)
|
return parse_html(markup)
|
||||||
|
|
||||||
|
|
||||||
|
def BeautifulStoneSoup(markup='', *a, **kw):
|
||||||
|
return bs4.BeautifulSoup(markup, 'xml')
|
||||||
|
@ -39,7 +39,7 @@ def _metadata_from_table(soup, searchfor):
|
|||||||
td = td.parent
|
td = td.parent
|
||||||
# there appears to be multiple ways of structuring the metadata
|
# there appears to be multiple ways of structuring the metadata
|
||||||
# on the home page. cue some nasty special-case hacks...
|
# on the home page. cue some nasty special-case hacks...
|
||||||
if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(None), flags=re.I):
|
if re.match(r'^\s*'+searchfor+r'\s*$', td.decode_contents(), flags=re.I):
|
||||||
meta = _detag(td.findNextSibling('td'))
|
meta = _detag(td.findNextSibling('td'))
|
||||||
return re.sub('^:', '', meta).strip()
|
return re.sub('^:', '', meta).strip()
|
||||||
else:
|
else:
|
||||||
@ -52,7 +52,7 @@ def _metadata_from_span(soup, searchfor):
|
|||||||
if span is None:
|
if span is None:
|
||||||
return None
|
return None
|
||||||
# this metadata might need some cleaning up still :/
|
# this metadata might need some cleaning up still :/
|
||||||
return _detag(span.renderContents(None).strip())
|
return _detag(span.decode_contents().strip())
|
||||||
|
|
||||||
|
|
||||||
def _get_authors(soup):
|
def _get_authors(soup):
|
||||||
|
@ -5,35 +5,31 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
Compile a LRS file into a LRF file.
|
Compile a LRS file into a LRF file.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import sys, os, logging
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
from calibre import setup_cli_handlers
|
from calibre import setup_cli_handlers
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.ebooks.BeautifulSoup import (
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, \
|
BeautifulStoneSoup, CData, NavigableString, Tag
|
||||||
CData, Tag
|
)
|
||||||
from calibre.ebooks.lrf.pylrs.pylrs import Book, PageStyle, TextStyle, \
|
|
||||||
BlockStyle, ImageStream, Font, StyleDefault, BookSetting, Header, \
|
|
||||||
Image, ImageBlock, Page, TextBlock, Canvas, Paragraph, CR, Span, \
|
|
||||||
Italic, Sup, Sub, Bold, EmpLine, JumpButton, CharButton, Plot, \
|
|
||||||
DropCaps, Footer, RuledLine
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.lrf.pylrs.pylrs import (
|
||||||
|
CR, BlockStyle, Bold, Book, BookSetting, Canvas, CharButton, DropCaps, EmpLine,
|
||||||
|
Font, Footer, Header, Image, ImageBlock, ImageStream, Italic, JumpButton, Page,
|
||||||
|
PageStyle, Paragraph, Plot, RuledLine, Span, StyleDefault, Sub, Sup, TextBlock,
|
||||||
|
TextStyle
|
||||||
|
)
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
from polyglot.builtins import string_or_bytes
|
from polyglot.builtins import string_or_bytes
|
||||||
|
|
||||||
|
|
||||||
class LrsParser(object):
|
class LrsParser(object):
|
||||||
|
|
||||||
SELF_CLOSING_TAGS = [i.lower() for i in ['CR', 'Plot', 'NoBR', 'Space',
|
|
||||||
'PutObj', 'RuledLine',
|
|
||||||
'Plot', 'SetDefault', 'BookSetting', 'RegistFont',
|
|
||||||
'PageStyle', 'TextStyle', 'BlockStyle', 'JumpTo',
|
|
||||||
'ImageStream', 'Image']]
|
|
||||||
|
|
||||||
def __init__(self, stream, logger):
|
def __init__(self, stream, logger):
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
src = stream.read()
|
src = stream.read()
|
||||||
self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0],
|
self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0])
|
||||||
convertEntities=BeautifulStoneSoup.XML_ENTITIES,
|
|
||||||
selfClosingTags=self.SELF_CLOSING_TAGS)
|
|
||||||
self.objects = {}
|
self.objects = {}
|
||||||
for obj in self.soup.findAll(objid=True):
|
for obj in self.soup.findAll(objid=True):
|
||||||
self.objects[obj['objid']] = obj
|
self.objects[obj['objid']] = obj
|
||||||
|
@ -1,22 +1,29 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
from __future__ import with_statement
|
from __future__ import print_function, with_statement
|
||||||
from __future__ import print_function
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
'''Read meta information from epub files'''
|
'''Read meta information from epub files'''
|
||||||
|
|
||||||
import io, os, re, posixpath
|
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import posixpath
|
||||||
|
import re
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
|
from lxml import etree
|
||||||
from calibre.utils.localunzip import LocalZipFile
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
|
||||||
from calibre.ebooks.metadata.opf import get_metadata as get_metadata_from_opf, set_metadata as set_metadata_opf
|
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
|
||||||
from calibre import CurrentDir, walk
|
from calibre import CurrentDir, walk
|
||||||
from calibre.constants import isosx
|
from calibre.constants import isosx
|
||||||
|
from calibre.ebooks.metadata.opf import (
|
||||||
|
get_metadata as get_metadata_from_opf, set_metadata as set_metadata_opf
|
||||||
|
)
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.utils.localunzip import LocalZipFile
|
||||||
|
from calibre.utils.zipfile import BadZipfile, ZipFile, safe_replace
|
||||||
|
|
||||||
|
|
||||||
class EPubException(Exception):
|
class EPubException(Exception):
|
||||||
@ -36,20 +43,17 @@ class Container(dict):
|
|||||||
def __init__(self, stream=None):
|
def __init__(self, stream=None):
|
||||||
if not stream:
|
if not stream:
|
||||||
return
|
return
|
||||||
soup = BeautifulStoneSoup(stream.read())
|
container = etree.fromstring(stream.read())
|
||||||
container = soup.find(name=re.compile(r'container$', re.I))
|
|
||||||
if not container:
|
|
||||||
raise OCFException("<container> element missing")
|
|
||||||
if container.get('version', None) != '1.0':
|
if container.get('version', None) != '1.0':
|
||||||
raise EPubException("unsupported version of OCF")
|
raise EPubException("unsupported version of OCF")
|
||||||
rootfiles = container.find(re.compile(r'rootfiles$', re.I))
|
rootfiles = container.xpath('./*[local-name()="rootfiles"]')
|
||||||
if not rootfiles:
|
if not rootfiles:
|
||||||
raise EPubException("<rootfiles/> element missing")
|
raise EPubException("<rootfiles/> element missing")
|
||||||
for rootfile in rootfiles.findAll(re.compile(r'rootfile$', re.I)):
|
for rootfile in rootfiles[0].xpath('./*[local-name()="rootfile"]'):
|
||||||
try:
|
mt, fp = rootfile.get('media-type'), rootfile.get('full-path')
|
||||||
self[rootfile['media-type']] = rootfile['full-path']
|
if not mt or not fp:
|
||||||
except KeyError:
|
|
||||||
raise EPubException("<rootfile/> element malformed")
|
raise EPubException("<rootfile/> element malformed")
|
||||||
|
self[mt] = fp
|
||||||
|
|
||||||
|
|
||||||
class OCF(object):
|
class OCF(object):
|
||||||
|
@ -340,8 +340,7 @@ def render_jacket(mi, output_profile,
|
|||||||
if hr_tag is not None:
|
if hr_tag is not None:
|
||||||
hr_tag.extract()
|
hr_tag.extract()
|
||||||
|
|
||||||
return strip_encoding_declarations(
|
return strip_encoding_declarations(soup.decode_contents())
|
||||||
soup.renderContents('utf-8').decode('utf-8'))
|
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import RECOVER_PARSER
|
from calibre.ebooks.oeb.base import RECOVER_PARSER
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ from copy import deepcopy
|
|||||||
from xml.sax.saxutils import escape
|
from xml.sax.saxutils import escape
|
||||||
|
|
||||||
from calibre import (
|
from calibre import (
|
||||||
prepare_string_for_xml, strftime, force_unicode, isbytestring, replace_entities, as_unicode)
|
prepare_string_for_xml, strftime, force_unicode, isbytestring, replace_entities, as_unicode, xml_replace_entities)
|
||||||
from calibre.constants import isosx, cache_dir
|
from calibre.constants import isosx, cache_dir
|
||||||
from calibre.customize.conversion import DummyReporter
|
from calibre.customize.conversion import DummyReporter
|
||||||
from calibre.customize.ui import output_profiles
|
from calibre.customize.ui import output_profiles
|
||||||
@ -29,6 +29,9 @@ from calibre.utils.localization import get_lang, lang_as_iso639_1
|
|||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
|
NBSP = u'\u00a0'
|
||||||
|
|
||||||
|
|
||||||
class Formatter(TemplateFormatter):
|
class Formatter(TemplateFormatter):
|
||||||
|
|
||||||
def get_value(self, key, args, kwargs):
|
def get_value(self, key, args, kwargs):
|
||||||
@ -112,7 +115,7 @@ class CatalogBuilder(object):
|
|||||||
if self.generate_for_kindle_mobi:
|
if self.generate_for_kindle_mobi:
|
||||||
return '▷'
|
return '▷'
|
||||||
else:
|
else:
|
||||||
return ' '
|
return NBSP
|
||||||
|
|
||||||
def __init__(self, db, _opts, plugin,
|
def __init__(self, db, _opts, plugin,
|
||||||
report_progress=DummyReporter(),
|
report_progress=DummyReporter(),
|
||||||
@ -1326,7 +1329,7 @@ class CatalogBuilder(object):
|
|||||||
"""
|
"""
|
||||||
# Kindle TOC descriptions won't render certain characters
|
# Kindle TOC descriptions won't render certain characters
|
||||||
# Fix up
|
# Fix up
|
||||||
massaged = unicode_type(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
massaged = xml_replace_entities(unicode_type(description))
|
||||||
|
|
||||||
# Replace '&' with '&'
|
# Replace '&' with '&'
|
||||||
massaged = re.sub("&", "&", massaged)
|
massaged = re.sub("&", "&", massaged)
|
||||||
@ -1354,7 +1357,7 @@ class CatalogBuilder(object):
|
|||||||
if self.opts.fmt == 'mobi':
|
if self.opts.fmt == 'mobi':
|
||||||
codeTag = soup.new_tag("code")
|
codeTag = soup.new_tag("code")
|
||||||
if prefix_char is None:
|
if prefix_char is None:
|
||||||
codeTag.insert(0, NavigableString(' '))
|
codeTag.insert(0, NavigableString(NBSP))
|
||||||
else:
|
else:
|
||||||
codeTag.insert(0, NavigableString(prefix_char))
|
codeTag.insert(0, NavigableString(prefix_char))
|
||||||
return codeTag
|
return codeTag
|
||||||
@ -1362,7 +1365,7 @@ class CatalogBuilder(object):
|
|||||||
spanTag = soup.new_tag("span")
|
spanTag = soup.new_tag("span")
|
||||||
spanTag['class'] = "prefix"
|
spanTag['class'] = "prefix"
|
||||||
if prefix_char is None:
|
if prefix_char is None:
|
||||||
prefix_char = " "
|
prefix_char = NBSP
|
||||||
spanTag.insert(0, NavigableString(prefix_char))
|
spanTag.insert(0, NavigableString(prefix_char))
|
||||||
return spanTag
|
return spanTag
|
||||||
|
|
||||||
@ -2711,7 +2714,7 @@ class CatalogBuilder(object):
|
|||||||
if i < len(book['genres']) - 1:
|
if i < len(book['genres']) - 1:
|
||||||
genresTag.insert(gtc, NavigableString(' · '))
|
genresTag.insert(gtc, NavigableString(' · '))
|
||||||
gtc += 1
|
gtc += 1
|
||||||
genres = genresTag.renderContents()
|
genres = genresTag.decode_contents()
|
||||||
|
|
||||||
# Formats
|
# Formats
|
||||||
formats = []
|
formats = []
|
||||||
@ -2793,7 +2796,7 @@ class CatalogBuilder(object):
|
|||||||
if publisher == ' ':
|
if publisher == ' ':
|
||||||
publisherTag = body.find('td', attrs={'class': 'publisher'})
|
publisherTag = body.find('td', attrs={'class': 'publisher'})
|
||||||
if publisherTag:
|
if publisherTag:
|
||||||
publisherTag.contents[0].replaceWith(' ')
|
publisherTag.contents[0].replaceWith(NBSP)
|
||||||
|
|
||||||
if not genres:
|
if not genres:
|
||||||
genresTag = body.find('p', attrs={'class': 'genres'})
|
genresTag = body.find('p', attrs={'class': 'genres'})
|
||||||
@ -2808,12 +2811,12 @@ class CatalogBuilder(object):
|
|||||||
if note_content == '':
|
if note_content == '':
|
||||||
tdTag = body.find('td', attrs={'class': 'notes'})
|
tdTag = body.find('td', attrs={'class': 'notes'})
|
||||||
if tdTag:
|
if tdTag:
|
||||||
tdTag.contents[0].replaceWith(' ')
|
tdTag.contents[0].replaceWith(NBSP)
|
||||||
|
|
||||||
emptyTags = body.findAll('td', attrs={'class': 'empty'})
|
emptyTags = body.findAll('td', attrs={'class': 'empty'})
|
||||||
for mt in emptyTags:
|
for mt in emptyTags:
|
||||||
newEmptyTag = soup.new_tag('td')
|
newEmptyTag = soup.new_tag('td')
|
||||||
newEmptyTag.insert(0, '\xa0')
|
newEmptyTag.insert(0, NBSP)
|
||||||
mt.replaceWith(newEmptyTag)
|
mt.replaceWith(newEmptyTag)
|
||||||
|
|
||||||
return soup
|
return soup
|
||||||
@ -2974,7 +2977,7 @@ class CatalogBuilder(object):
|
|||||||
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata" version="2005-1" xml:lang="en">
|
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata" version="2005-1" xml:lang="en">
|
||||||
</ncx>
|
</ncx>
|
||||||
'''
|
'''
|
||||||
soup = BeautifulStoneSoup(header, selfClosingTags=['content', 'calibre:meta-img'])
|
soup = BeautifulStoneSoup(header)
|
||||||
|
|
||||||
ncx = soup.find('ncx')
|
ncx = soup.find('ncx')
|
||||||
navMapTag = soup.new_tag('navMap')
|
navMapTag = soup.new_tag('navMap')
|
||||||
@ -4033,7 +4036,7 @@ class CatalogBuilder(object):
|
|||||||
</package>
|
</package>
|
||||||
'''.replace('LANG', lang)
|
'''.replace('LANG', lang)
|
||||||
# Add the supplied metadata tags
|
# Add the supplied metadata tags
|
||||||
soup = BeautifulStoneSoup(header, selfClosingTags=['item', 'itemref', 'meta', 'reference'])
|
soup = BeautifulStoneSoup(header)
|
||||||
metadata = soup.find('metadata')
|
metadata = soup.find('metadata')
|
||||||
mtc = 0
|
mtc = 0
|
||||||
|
|
||||||
@ -4171,8 +4174,11 @@ class CatalogBuilder(object):
|
|||||||
guide.insert(0, referenceTag)
|
guide.insert(0, referenceTag)
|
||||||
|
|
||||||
# Write the OPF file
|
# Write the OPF file
|
||||||
outfile = open("%s/%s.opf" % (self.catalog_path, self.opts.basename), 'w')
|
output = soup.prettify(encoding='utf-8')
|
||||||
outfile.write(soup.prettify())
|
if isinstance(output, unicode_type):
|
||||||
|
output = output.encode('utf-8')
|
||||||
|
with lopen("%s/%s.opf" % (self.catalog_path, self.opts.basename), 'wb') as outfile:
|
||||||
|
outfile.write(output)
|
||||||
|
|
||||||
def generate_rating_string(self, book):
|
def generate_rating_string(self, book):
|
||||||
""" Generate rating string for Descriptions.
|
""" Generate rating string for Descriptions.
|
||||||
@ -4657,7 +4663,7 @@ class CatalogBuilder(object):
|
|||||||
elem.extract()
|
elem.extract()
|
||||||
|
|
||||||
# Reconstruct comments w/o <div>s
|
# Reconstruct comments w/o <div>s
|
||||||
comments = soup.renderContents(None)
|
comments = soup.decode_contents()
|
||||||
|
|
||||||
# Convert \n\n to <p>s
|
# Convert \n\n to <p>s
|
||||||
if re.search('\n\n', comments):
|
if re.search('\n\n', comments):
|
||||||
@ -4669,7 +4675,7 @@ class CatalogBuilder(object):
|
|||||||
pTag.insert(0, p)
|
pTag.insert(0, p)
|
||||||
soup.insert(tsc, pTag)
|
soup.insert(tsc, pTag)
|
||||||
tsc += 1
|
tsc += 1
|
||||||
comments = soup.renderContents(None)
|
comments = soup.decode_contents()
|
||||||
|
|
||||||
# Convert solo returns to <br />
|
# Convert solo returns to <br />
|
||||||
comments = re.sub('[\r\n]', '<br />', comments)
|
comments = re.sub('[\r\n]', '<br />', comments)
|
||||||
@ -4726,7 +4732,7 @@ class CatalogBuilder(object):
|
|||||||
result.insert(rtc, elem)
|
result.insert(rtc, elem)
|
||||||
rtc += 1
|
rtc += 1
|
||||||
|
|
||||||
return result.renderContents(encoding=None)
|
return result.decode_contents()
|
||||||
|
|
||||||
def merge_comments(self, record):
|
def merge_comments(self, record):
|
||||||
""" Merge comments with custom column content.
|
""" Merge comments with custom column content.
|
||||||
@ -4954,6 +4960,9 @@ class CatalogBuilder(object):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
self.update_progress_full_step(_("Saving NCX"))
|
self.update_progress_full_step(_("Saving NCX"))
|
||||||
|
ncx = self.ncx_soup.prettify(encoding='utf-8')
|
||||||
|
if isinstance(ncx, unicode_type):
|
||||||
|
ncx = ncx.encode('utf-8')
|
||||||
|
|
||||||
outfile = open("%s/%s.ncx" % (self.catalog_path, self.opts.basename), 'w')
|
with lopen("%s/%s.ncx" % (self.catalog_path, self.opts.basename), 'wb') as outfile:
|
||||||
outfile.write(self.ncx_soup.prettify())
|
outfile.write(ncx)
|
||||||
|
@ -131,7 +131,7 @@ def comments_to_html(comments):
|
|||||||
for t in result.findAll(text=True):
|
for t in result.findAll(text=True):
|
||||||
t.replaceWith(prepare_string_for_xml(unicode_type(t)))
|
t.replaceWith(prepare_string_for_xml(unicode_type(t)))
|
||||||
|
|
||||||
return result.renderContents(encoding=None)
|
return result.decode_contents()
|
||||||
|
|
||||||
|
|
||||||
def markdown(val):
|
def markdown(val):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user