Fix #766

2025-07-09 03:04:10 -04:00 · 2008-06-11 15:23:14 -07:00 · 2008-06-11 15:23:14 -07:00 · 257bf6a2c9
commit 257bf6a2c9
parent 38a5eaf141
3 changed files with 22 additions and 12 deletions
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -100,12 +100,21 @@ class MetaInformation(object):
    def __str__(self):
        ans = u''
        ans += u'Title    : ' + unicode(self.title) + u'\n'
-        ans += u'Author   : ' + (', '.join(self.authors) if self.authors is not None else u'None')
+        if self.authors:
-        ans += ((' (' + self.author_sort + ')') if self.author_sort else '') + u'\n'
+            ans += u'Author   : ' + (', '.join(self.authors) if self.authors is not None else u'None')
-        ans += u'Publisher: '+ unicode(self.publisher) + u'\n' 
+            ans += ((' (' + self.author_sort + ')') if self.author_sort else '') + u'\n'
-        ans += u'Category : ' + unicode(self.category) + u'\n'
+        if self.publisher:
-        ans += u'Comments : ' + unicode(self.comments) + u'\n'
+            ans += u'Publisher: '+ unicode(self.publisher) + u'\n'
-        ans += u'ISBN     : '     + unicode(self.isbn) + u'\n'
+        if self.category: 
            ans += u'Category : ' + unicode(self.category) + u'\n'
        if self.comments:
            ans += u'Comments : ' + unicode(self.comments) + u'\n'
        if self.isbn:
            ans += u'ISBN     : '     + unicode(self.isbn) + u'\n'
        if self.tags:
            ans += u'Tags     : ' +unicode(self.tags) + '\n'
        if self.series:
            ans += u'Series   : '+unicode(self.series) + '(%d)'%self.series_index  
        return ans.strip()
    def __nonzero__(self):
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -435,7 +435,7 @@ class BasicNewsRecipe(object, LoggingInterface):
            self.simultaneous_downloads = 1
        self.navbar = templates.NavBarTemplate()
-        self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header'])
+        self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header', '--encoding', 'utf-8'])
        if '--base-font-size' not in self.html2lrf_options:
            self.html2lrf_options.extend(['--base-font-size', '12'])
        self.failed_downloads = []
@ -645,9 +645,9 @@ class BasicNewsRecipe(object, LoggingInterface):
            html = self.feed2index(feed)
            feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
            open(os.path.join(feed_dir, 'index.html'), 'wb').write(html)
        self.create_opf(feeds)
        self.report_progress(1, _('Feeds downloaded to %s')%index)
        return index
    def download_cover(self):
@ -704,7 +704,7 @@ class BasicNewsRecipe(object, LoggingInterface):
                        entries.append(relp.replace(os.sep, '/'))
                        last = sp
-                    src = open(last, 'rb').read()
+                    src = open(last, 'rb').read().decode('utf-8')
                    soup = BeautifulSoup(src)
                    body = soup.find('body')
                    if body is not None:
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -7,7 +7,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Fetch a webpage and its links recursively. The webpages are saved to disk in
 UTF-8 encoding with any charset declarations removed.
 '''
-import sys, socket, os, urlparse, codecs, logging, re, time, copy, urllib2, threading, traceback
+import sys, socket, os, urlparse, logging, re, time, copy, urllib2, threading, traceback
 from urllib import url2pathname
 from httplib import responses
@ -43,8 +43,9 @@ def save_soup(soup, target):
            if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path):
                tag[key] = relpath(path, selfdir).replace(os.sep, '/')
-    f = codecs.open(target, 'w', 'utf-8')
+    f = open(target, 'wb')
-    f.write(unicode(soup))
+    html = unicode(soup)
    f.write(html.encode('utf-8'))
    f.close()