diff --git a/resources/catalog/stylesheet.css b/resources/catalog/stylesheet.css index 458d1a9bf0..5fe7b2fd85 100644 --- a/resources/catalog/stylesheet.css +++ b/resources/catalog/stylesheet.css @@ -8,7 +8,7 @@ p.title { font-size:xx-large; border-bottom: solid black 4px; } - + p.author { margin-top:0em; margin-bottom:0em; @@ -31,25 +31,33 @@ p.description { margin-top: 0em; } +p.date_index { + font-size:x-large; + text-align:center; + font-style:bold; + margin-top:1em; + margin-bottom:0px; + } + p.letter_index { font-size:x-large; text-align:left; margin-top:0px; - margin-bottom:0px; + margin-bottom:0px; } p.author_index { font-size:large; text-align:left; margin-top:0px; - margin-bottom:0px; + margin-bottom:0px; text-indent: 0em; } p.read_book { text-align:left; margin-top:0px; - margin-bottom:0px; + margin-bottom:0px; margin-left:2em; text-indent:-2em; } @@ -57,8 +65,8 @@ p.read_book { p.unread_book { text-align:left; margin-top:0px; - margin-bottom:0px; + margin-bottom:0px; margin-left:2em; text-indent:-2em; } - + diff --git a/resources/viewer/images.js b/resources/viewer/images.js index ea68009254..7b10f6169a 100644 --- a/resources/viewer/images.js +++ b/resources/viewer/images.js @@ -20,4 +20,20 @@ function setup_image_scaling_handlers() { }); } +function extract_svged_images() { + $("svg").each(function() { + var children = $(this).children("img"); + if (children.length == 1) { + var img = $(children[0]); + var href = img.attr('xlink:href'); + if (href != undefined) { + $(this).replaceWith('
SVG Image
'); + } + } + }); +} + +$(document).ready(function() { + //extract_svged_images(); +}); diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index e32c03fe13..e5e284fb5b 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -132,9 +132,12 @@ def prints(*args, **kwargs): try: arg = arg.encode(enc) except UnicodeEncodeError: - if not safe_encode: - raise - arg = repr(arg) + try: + arg = arg.encode('utf-8') + except: + if not safe_encode: + raise + arg = repr(arg) file.write(arg) if i != len(args)-1: diff --git a/src/calibre/ebooks/oeb/transforms/guide.py b/src/calibre/ebooks/oeb/transforms/guide.py index e911fe9cf7..3941904fdc 100644 --- a/src/calibre/ebooks/oeb/transforms/guide.py +++ b/src/calibre/ebooks/oeb/transforms/guide.py @@ -34,7 +34,6 @@ class Clean(object): for x in list(self.oeb.guide): href = urldefrag(self.oeb.guide[x].href)[0] - print "ebooks.oeb.transforms.guide:Clean(): checking x.lower(): %s" % x.lower() if x.lower() not in ('cover', 'titlepage', 'masthead', 'toc', 'title-page', 'copyright-page', 'start'): self.oeb.guide.remove(x) diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py index a379899af5..a6ee16db15 100644 --- a/src/calibre/ebooks/pdb/ereader/writer.py +++ b/src/calibre/ebooks/pdb/ereader/writer.py @@ -42,7 +42,9 @@ class Writer(FormatWriter): pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace') text, text_sizes = self._text(pml) - chapter_index = self._index_item(r'(?s)\\C(?P\d)="(?P.+?)"', pml) + chapter_index = self._index_item(r'(?s)\\C(?P[0-4)="(?P.+?)"', pml) + chapter_index += self.index_item(r'(?s)\\X(?P[0-4])(?P.+?)\\X[0-4]', pml) + chapter_index += self.index_item(r'(?s)\\x(?P.+?)\\x', pml) link_index = self._index_item(r'(?s)\\Q="(?P.+?)"', pml) images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs) metadata = [self._metadata(metadata)] diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index bf2d921a10..42c16225d2 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -256,11 +256,16 @@ class Region(object): return len(self.columns) == 0 @property - def is_small(self): + def line_count(self): max_lines = 0 for c in self.columns: max_lines = max(max_lines, len(c)) - return max_lines > 2 + return max_lines + + + @property + def is_small(self): + return self.line_count < 3 def absorb(self, singleton): @@ -431,7 +436,7 @@ class Page(object): def coalesce_regions(self): # find contiguous sets of small regions # absorb into a neighboring region (prefer the one with number of cols - # closer to the avg number of cols in the set, if equal use large + # closer to the avg number of cols in the set, if equal use larger # region) # merge contiguous regions that can contain each other absorbed = set([]) diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index aa2ff117a4..356e2679ee 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -171,6 +171,9 @@ class PML_HTMLizer(object): # &. It will display as & pml = pml.replace('&', '&') + pml = re.sub(r'(?<=\\x)(?P.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml) + pml = re.sub(r'(?<=\\X[0-4])(?P.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml) + pml = re.sub(r'\\a(?P\d{3})', lambda match: '&#%s;' % match.group('num'), pml) pml = re.sub(r'\\U(?P[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml) @@ -178,6 +181,19 @@ class PML_HTMLizer(object): return pml + def strip_pml(self, pml): + pml = re.sub(r'\\.\d=""', '', pml) + pml = re.sub(r'\\.=""', '', pml) + pml = re.sub(r'\\.\d', '', pml) + pml = re.sub(r'\\.', '', pml) + pml = re.sub(r'\\a\d\d\d', '', pml) + pml = re.sub(r'\\U\d\d\d\d', '', pml) + pml.replace('\r\n', ' ') + pml.replace('\n', ' ') + pml.replace('\r', ' ') + + return pml + def cleanup_html(self, html): old = html html = self.cleanup_html_remove_redundant(html) @@ -503,9 +519,9 @@ class PML_HTMLizer(object): if c == '\\': c = line.read(1) - if c in 'xqcrtTiIuobBlk': + if c in 'qcrtTiIuobBlk': text = self.process_code(c, line) - elif c in 'FSX': + elif c in 'FS': l = line.read(1) if '%s%s' % (c, l) == 'Fn': text = self.process_code('Fn', line, 'fn') @@ -515,8 +531,24 @@ class PML_HTMLizer(object): text = self.process_code('SB', line) elif '%s%s' % (c, l) == 'Sd': text = self.process_code('Sd', line, 'sb') + elif c in 'xXC': + # The PML was modified eariler so x and X put the text + # inside of ="" so we don't have do special processing + # for C. + t = '' + if c in 'XC': + level = line.read(1) + id = 'pml_toc-%s' % len(self.toc) + value = self.code_value(line) + if c == 'x': + t = self.process_code(c, line) + elif c == 'X': + t = self.process_code('%s%s' % (c, level), line) + if not value or value == '': + text = t else: - text = self.process_code('%s%s' % (c, l), line) + self.toc.add_item(os.path.basename(self.file_name), id, value) + text = '%s' % (id, t) elif c == 'm': empty = False src = self.code_value(line) @@ -528,11 +560,6 @@ class PML_HTMLizer(object): elif c == 'p': empty = False text = '

' - elif c == 'C': - line.read(1) - id = 'pml_toc-%s' % len(self.toc) - self.toc.add_item(os.path.basename(self.file_name), id, self.code_value(line)) - text = '' % id elif c == 'n': pass elif c == 'w': diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index 7427a77c2f..d57ed136f6 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -233,7 +233,7 @@ class PMLMLizer(object): w += '="50%"' text.append(w) toc_id = elem.attrib.get('id', None) - if toc_id: + if toc_id and tag not in ('h1', 'h2','h3','h4','h5','h6',): if self.toc.get(page.href, None): toc_title = self.toc[page.href].get(toc_id, None) if toc_title: diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index f6fce62eac..6b95a4dcaa 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -10,7 +10,7 @@ from base64 import b64encode from PyQt4.Qt import QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \ QPainter, QPalette, QBrush, QFontDatabase, QDialog, \ QColor, QPoint, QImage, QRegion, QVariant, QIcon, \ - QFont, pyqtSignature, QAction + QFont, pyqtSignature, QAction, QByteArray from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings from calibre.utils.config import Config, StringConfig @@ -514,14 +514,18 @@ class DocumentView(QWebView): mt = guess_type(path)[0] html = open(path, 'rb').read().decode(path.encoding, 'replace') html = EntityDeclarationProcessor(html).processed_html + has_svg = re.search(r'<[:a-z]*svg', html) is not None + if 'xhtml' in mt: html = self.self_closing_pat.sub(self.self_closing_sub, html) if self.manager is not None: self.manager.load_started() self.loading_url = QUrl.fromLocalFile(path) - #self.setContent(QByteArray(html.encode(path.encoding)), mt, QUrl.fromLocalFile(path)) - #open('/tmp/t.html', 'wb').write(html.encode(path.encoding)) - self.setHtml(html, self.loading_url) + if has_svg: + prints('Rendering as XHTML...') + self.setContent(QByteArray(html.encode(path.encoding)), mt, QUrl.fromLocalFile(path)) + else: + self.setHtml(html, self.loading_url) self.turn_off_internal_scrollbars() def initialize_scrollbar(self): diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 529524371f..c859ff6e32 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1,6 +1,7 @@ import os, re, shutil, htmlentitydefs from collections import namedtuple +from datetime import date from xml.sax.saxutils import escape from calibre import filesystem_encoding, prints @@ -94,6 +95,10 @@ class CSV_XML(CatalogPlugin): item = ', '.join(fmt_list) elif field in ['authors','tags']: item = ', '.join(item) + elif field == 'isbn': + # Could be 9, 10 or 13 digits + field = '%s' % re.sub('[\D]','',field) + if x < len(fields) - 1: if item is not None: outstr += u'"%s",' % unicode(item).replace('"','""') @@ -484,7 +489,7 @@ class EPUB_MOBI(CatalogPlugin): total_steps = 13.0 # Used to xlate pubdate to friendly format - MONTHS = ['January', 'February','March','April','May','June', + MONTHS = ['','January', 'February','March','April','May','June', 'July','August','September','October','November','December'] THUMB_WIDTH = 75 THUMB_HEIGHT = 100 @@ -763,11 +768,14 @@ class EPUB_MOBI(CatalogPlugin): if getattr(self.reporter, 'cancel_requested', False): return 1 self.generateHTMLDescriptions() + if getattr(self.reporter, 'cancel_requested', False): return 1 + self.generateHTMLByAuthor() + if getattr(self.reporter, 'cancel_requested', False): return 1 self.generateHTMLByTitle() if getattr(self.reporter, 'cancel_requested', False): return 1 - self.generateHTMLByAuthor() + self.generateHTMLByDateAdded() if getattr(self.reporter, 'cancel_requested', False): return 1 self.generateHTMLByTags() @@ -786,11 +794,14 @@ class EPUB_MOBI(CatalogPlugin): if getattr(self.reporter, 'cancel_requested', False): return 1 self.generateNCXDescriptions("Descriptions") + if getattr(self.reporter, 'cancel_requested', False): return 1 + self.generateNCXByAuthor("Authors") + if getattr(self.reporter, 'cancel_requested', False): return 1 self.generateNCXByTitle("Titles") if getattr(self.reporter, 'cancel_requested', False): return 1 - self.generateNCXByAuthor("Authors") + self.generateNCXByDateAdded("Recently Added") if getattr(self.reporter, 'cancel_requested', False): return 1 self.generateNCXByTags("Genres") @@ -869,8 +880,8 @@ class EPUB_MOBI(CatalogPlugin): this_title['rating'] = record['rating'] if record['rating'] else 0 # 2009-11-05 09:29:37 date_strings = str(record['pubdate']).split("-") - this_title['date'] = '%s %s' % (self.MONTHS[int(date_strings[1])-1], date_strings[0]) - + this_title['date'] = '%s %s' % (self.MONTHS[int(date_strings[1])], date_strings[0]) + this_title['timestamp'] = record['timestamp'] if record['comments']: this_title['description'] = re.sub('&', '&', record['comments']) this_title['short_description'] = self.generateShortDescription(this_title['description']) @@ -1317,6 +1328,142 @@ class EPUB_MOBI(CatalogPlugin): outfile.close() self.htmlFileList.append("content/ByAlphaAuthor.html") + def generateHTMLByDateAdded(self): + + def add_books_to_HTML(this_months_list, dtc): + if len(this_months_list): + this_months_list = sorted(this_months_list, + key=lambda x:(x['title_sort'], x['title_sort'])) + this_months_list = sorted(this_months_list, + key=lambda x:(x['author_sort'], x['author_sort'])) + print "Books added in %s %s" % (self.MONTHS[current_date.month], current_date.year) + + # Create a new month anchor + pIndexTag = Tag(soup, "p") + pIndexTag['class'] = "date_index" + aTag = Tag(soup, "a") + aTag['name'] = "%s-%s" % (current_date.year, current_date.month) + pIndexTag.insert(0,aTag) + pIndexTag.insert(1,NavigableString('Books added in %s %s' % \ + (self.MONTHS[current_date.month],current_date.year))) + divTag.insert(dtc,pIndexTag) + dtc += 1 + current_author = None + + for purchase in this_months_list: + print " %-40s \t %-20s \t %s" % (purchase['title'], purchase['author'], purchase['timestamp']) + + + if purchase['author'] != current_author: + # Start a new author + current_author = purchase['author'] + pAuthorTag = Tag(soup, "p") + pAuthorTag['class'] = "author_index" + emTag = Tag(soup, "em") + aTag = Tag(soup, "a") + aTag['name'] = "%s" % self.generateAuthorAnchor(current_author) + aTag.insert(0,NavigableString(current_author)) + emTag.insert(0,aTag) + pAuthorTag.insert(0,emTag) + divTag.insert(dtc,pAuthorTag) + dtc += 1 + + # Add books + pBookTag = Tag(soup, "p") + ptc = 0 + + # Prefix book with read/unread symbol + if purchase['read']: + # check mark + pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL)) + pBookTag['class'] = "read_book" + ptc += 1 + else: + # hidden check mark + pBookTag['class'] = "unread_book" + pBookTag.insert(ptc,NavigableString(self.NOT_READ_SYMBOL)) + ptc += 1 + + aTag = Tag(soup, "a") + aTag['href'] = "book_%d.html" % (int(float(purchase['id']))) + aTag.insert(0,escape(purchase['title'])) + pBookTag.insert(ptc, aTag) + ptc += 1 + + divTag.insert(dtc, pBookTag) + dtc += 1 + return dtc + + # Write books by reverse chronological order + self.opts.log.info(self.updateProgressFullStep("generateHTMLByDateAdded()")) + + # Sort titles case-insensitive + self.booksByDate = sorted(self.booksByTitle, + key=lambda x:(x['timestamp'], x['timestamp']),reverse=True) + + friendly_name = "Recently Added" + + soup = self.generateHTMLEmptyHeader(friendly_name) + body = soup.find('body') + + btc = 0 + + # Insert section tag + aTag = Tag(soup,'a') + aTag['name'] = 'section_start' + body.insert(btc, aTag) + btc += 1 + + # Insert the anchor + aTag = Tag(soup, "a") + anchor_name = friendly_name.lower() + aTag['name'] = anchor_name.replace(" ","") + body.insert(btc, aTag) + btc += 1 + ''' + # We don't need this because the kindle inserts section titles + #

By Author

+ h2Tag = Tag(soup, "h2") + aTag = Tag(soup, "a") + anchor_name = friendly_name.lower() + aTag['name'] = anchor_name.replace(" ","") + h2Tag.insert(0,aTag) + h2Tag.insert(1,NavigableString('%s' % friendly_name)) + body.insert(btc,h2Tag) + btc += 1 + ''' + + #

+ #

+ divTag = Tag(soup, "div") + dtc = 0 + + current_date = date.fromordinal(1) + current_author = None + + # Loop through books by date + this_months_list = [] + for book in self.booksByDate: + if book['timestamp'].month != current_date.month or \ + book['timestamp'].year != current_date.year: + dtc = add_books_to_HTML(this_months_list, dtc) + this_months_list = [] + current_date = book['timestamp'].date() + this_months_list.append(book) + + # Add the last month's list + add_books_to_HTML(this_months_list, dtc) + + # Add the divTag to the body + body.insert(btc, divTag) + + # Write the generated file to contentdir + outfile_spec = "%s/ByDateAdded.html" % (self.contentDir) + outfile = open(outfile_spec, 'w') + outfile.write(soup.prettify()) + outfile.close() + self.htmlFileList.append("content/ByDateAdded.html") + def generateHTMLByTags(self): # Generate individual HTML files for each tag, e.g. Fiction, Nonfiction ... # Note that special tags - ~+*[] - have already been filtered from books[] @@ -1773,7 +1920,7 @@ class EPUB_MOBI(CatalogPlugin): self.playOrder += 1 navLabelTag = Tag(soup, 'navLabel') textTag = Tag(soup, 'text') - textTag.insert(0, NavigableString("Titles beginning with %s" % \ + textTag.insert(0, NavigableString(u"Titles beginning with %s" % \ (title_letters[i] if len(title_letters[i])>1 else "'" + title_letters[i] + "'"))) navLabelTag.insert(0, textTag) navPointByLetterTag.insert(0,navLabelTag) @@ -1894,6 +2041,102 @@ class EPUB_MOBI(CatalogPlugin): self.ncxSoup = soup + def generateNCXByDateAdded(self, tocTitle): + + self.opts.log.info(self.updateProgressFullStep("generateNCXByDateAdded()")) + + soup = self.ncxSoup + HTML_file = "content/ByDateAdded.html" + body = soup.find("navPoint") + btc = len(body.contents) + + # --- Construct the 'Recently Added' *section* --- + navPointTag = Tag(soup, 'navPoint') + navPointTag['class'] = "section" + file_ID = "%s" % tocTitle.lower() + file_ID = file_ID.replace(" ","") + navPointTag['id'] = "%s-ID" % file_ID + navPointTag['playOrder'] = self.playOrder + self.playOrder += 1 + navLabelTag = Tag(soup, 'navLabel') + textTag = Tag(soup, 'text') + textTag.insert(0, NavigableString('%s' % tocTitle)) + navLabelTag.insert(0, textTag) + nptc = 0 + navPointTag.insert(nptc, navLabelTag) + nptc += 1 + contentTag = Tag(soup,"content") + contentTag['src'] = "%s#section_start" % HTML_file + navPointTag.insert(nptc, contentTag) + nptc += 1 + + # Create an NCX article entry for each populated month + # Loop over the booksByDate list, find start of each month, + # add description_preview_count titles + # self.authors[0]:friendly [1]:author_sort [2]:book_count + current_titles_list = [] + current_author_list = [] + master_month_list = [] + current_date = self.booksByDate[0]['timestamp'] + + for book in self.booksByDate: + if book['timestamp'].month != current_date.month or \ + book['timestamp'].year != current_date.year: + # Save the old lists + current_titles_list = " • ".join(current_titles_list) + if len(current_titles_list) == self.descriptionClip: + title_list += " …" + + current_titles_list = self.formatNCXText(current_titles_list) + master_month_list.append((current_titles_list, current_date)) + + # Start the new list + current_date = book['timestamp'].date() + current_titles_list = [book['title']] + else: + if len(current_titles_list) < self.descriptionClip: + current_titles_list.append(book['title']) + + # Add the last author list + current_titles_list = " • ".join(current_titles_list) + master_month_list.append((current_titles_list, current_date)) + + # Add *article* entries for each populated author initial letter + # master_months_list{}: [0]:titles list [1]:date + for books_by_month in master_month_list: + print "titles:%s \ndate:%s" % books_by_month + navPointByLetterTag = Tag(soup, 'navPoint') + navPointByLetterTag['class'] = "article" + navPointByLetterTag['id'] = "%s-%s-ID" % (books_by_month[1].year,books_by_month[1].month ) + navPointTag['playOrder'] = self.playOrder + self.playOrder += 1 + navLabelTag = Tag(soup, 'navLabel') + textTag = Tag(soup, 'text') + textTag.insert(0, NavigableString("Books added in %s %s" % \ + (self.MONTHS[books_by_month[1].month], books_by_month[1].year))) + navLabelTag.insert(0, textTag) + navPointByLetterTag.insert(0,navLabelTag) + contentTag = Tag(soup, 'content') + contentTag['src'] = "%s#%s-%s" % (HTML_file, + books_by_month[1].year,books_by_month[1].month) + + navPointByLetterTag.insert(1,contentTag) + + if self.generateForKindle: + cmTag = Tag(soup, '%s' % 'calibre:meta') + cmTag['name'] = "description" + cmTag.insert(0, NavigableString(books_by_month[0])) + navPointByLetterTag.insert(2, cmTag) + + navPointTag.insert(nptc, navPointByLetterTag) + nptc += 1 + + # Add this section to the body + body.insert(btc, navPointTag) + btc += 1 + print soup.prettify() + self.ncxSoup = soup + def generateNCXByTags(self, tocTitle): # Create an NCX section for 'By Genre' # Add each genre as an article diff --git a/src/calibre/utils/ipc/job.py b/src/calibre/utils/ipc/job.py index 458d5adb8a..a6c39ffc6b 100644 --- a/src/calibre/utils/ipc/job.py +++ b/src/calibre/utils/ipc/job.py @@ -52,10 +52,13 @@ class BaseJob(object): else: self._status_text = _('Error') if self.failed else _('Finished') if DEBUG: - prints('Job:', self.id, self.description, 'finished', + try: + prints('Job:', self.id, self.description, 'finished', safe_encode=True) - prints('\t'.join(self.details.splitlines(True)), + prints('\t'.join(self.details.splitlines(True)), safe_encode=True) + except: + pass if not self._done_called: self._done_called = True try: diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 5feceb6faa..5cd90ec49a 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -111,7 +111,9 @@ class BasicNewsRecipe(Recipe): #: Specify an override encoding for sites that have an incorrect #: charset specification. The most common being specifying ``latin1`` and - #: using ``cp1252``. If None, try to detect the encoding. + #: using ``cp1252``. If None, try to detect the encoding. If it is a + #: callable, the callable is called with two arguments: The recipe object + #: and the source to be decoded. It must return the decoded source. encoding = None #: Normally we try to guess if a feed has full articles embedded in it @@ -436,7 +438,10 @@ class BasicNewsRecipe(Recipe): if raw: return _raw if not isinstance(_raw, unicode) and self.encoding: - _raw = _raw.decode(self.encoding, 'replace') + if callable(self.encoding): + _raw = self.encoding(_raw) + else: + _raw = _raw.decode(self.encoding, 'replace') massage = list(BeautifulSoup.MARKUP_MASSAGE) enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding massage.append((re.compile(r'&(\S+?);'), lambda match: @@ -756,11 +761,16 @@ class BasicNewsRecipe(Recipe): self.report_progress(0, _('Trying to download cover...')) self.download_cover() self.report_progress(0, _('Generating masthead...')) - if self.get_masthead_url(): - self.download_masthead() + try: + murl = self.get_masthead_url() + except: + self.log.exception('Failed to get masthead url') + murl = None + if murl is not None: + self.download_masthead(murl) else: - mpath = os.path.join(self.output_dir, 'mastheadImage.jpg') - self.default_masthead_image(mpath) + self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg') + self.default_masthead_image(self.masthead_path) if self.test: feeds = feeds[:2] @@ -878,55 +888,34 @@ class BasicNewsRecipe(Recipe): self.log.exception('Failed to download cover') self.cover_path = None - ''' - def convert_image(self, name): - image_ext = name.rpartition('.')[2].lower() - if image_ext in ['jpg','jpeg']: - return name - if image_ext not in ['gif']: - raise RuntimeError("web.feeds.news:BasicNewsRecipe.convert_image(): '%s' is not a supported mastheadImage format" % image_ext) - import calibre.utils.PythonMagickWand as p - img = p.NewMagickWand() - if img < 0: - raise RuntimeError('Cannot create wand.') - if not p.MagickReadImage(img, name): - self.log.warn('Failed to read image:', name) - name = name.replace('.%s' % image_ext, '.jpg') - p.MagickWriteImage(img, name) - p.DestroyMagickWand(img) - return name - ''' + def _download_masthead(self, mu): + ext = mu.rpartition('.')[-1] + if '?' in ext: + ext = '' + ext = ext.lower() if ext else 'jpg' + mpath = os.path.join(self.output_dir, 'masthead_source.'+ext) + self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg') + if os.access(mu, os.R_OK): + with open(mpath, 'wb') as mfile: + mfile.write(open(mu, 'rb').read()) + else: + with nested(open(mpath, 'wb'), closing(self.browser.open(mu))) as (mfile, r): + mfile.write(r.read()) + self.report_progress(1, _('Masthead image downloaded')) + self.prepare_masthead_image(mpath, self.masthead_path) + if os.path.exists(mpath): + os.remove(mpath) - def _download_masthead(self): + + def download_masthead(self, url): + br = BasicNewsRecipe.get_browser() try: - mu = self.get_masthead_url() - except Exception, err: - mu = None - self.log.error(_('Could not download masthead: %s')%str(err)) - self.log.debug(traceback.format_exc()) - if mu is not None: - ext = mu.rpartition('.')[-1] - if '?' in ext: - ext = '' - ext = ext.lower() if ext else 'jpg' - mpath = os.path.join(self.output_dir, 'mastheadImage.'+ext) - outfile = mpath.rpartition('.')[0] + '.jpg' - if os.access(mu, os.R_OK): - with open(mpath, 'wb') as mfile: - mfile.write(open(mu, 'rb').read()) - else: - self.report_progress(1, _('Downloading masthead from %s')%mu) - with nested(open(mpath, 'wb'), closing(self.browser.open(mu))) as (mfile, r): - mfile.write(r.read()) - self.masthead_path = self.prepare_masthead_image(mpath,outfile) - - - def download_masthead(self): - try: - self._download_masthead() + br.open(url) + self._download_masthead(url) except: - self.log.exception('Failed to download masthead') - + self.log.exception("Failed to download supplied masthead_url, synthesizing") + self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg') + self.default_masthead_image(self.masthead_path) def default_cover(self, cover_file): ''' @@ -995,6 +984,9 @@ class BasicNewsRecipe(Recipe): 'Override in subclass to use something other than the recipe title' return self.title + MI_WIDTH = 600 + MI_HEIGHT = 60 + def default_masthead_image(self, out_path): try: from PIL import Image, ImageDraw, ImageFont @@ -1002,14 +994,13 @@ class BasicNewsRecipe(Recipe): except ImportError: import Image, ImageDraw, ImageFont - - img = Image.new('RGB', (600, 100), 'white') + img = Image.new('RGB', (self.MI_WIDTH, self.MI_HEIGHT), 'white') draw = ImageDraw.Draw(img) font = ImageFont.truetype(P('fonts/liberation/LiberationSerif-Bold.ttf'), 48) text = self.get_masthead_title().encode('utf-8') width, height = draw.textsize(text, font=font) - left = max(int((600 - width)/2.), 0) - top = max(int((100 - height)/2.), 0) + left = max(int((self.MI_WIDTH - width)/2.), 0) + top = max(int((self.MI_HEIGHT - height)/2.), 0) draw.text((left, top), text, fill=(0,0,0), font=font) img.save(open(out_path, 'wb'), 'JPEG') @@ -1032,10 +1023,10 @@ class BasicNewsRecipe(Recipe): %(path_to_image, msg)) pw.PixelSetColor(p, 'white') width, height = pw.MagickGetImageWidth(img),pw.MagickGetImageHeight(img) - scaled, nwidth, nheight = fit_image(width, height, 600, 100) + scaled, nwidth, nheight = fit_image(width, height, self.MI_WIDTH, self.MI_HEIGHT) if not pw.MagickNewImage(img2, width, height, p): raise RuntimeError('Out of memory') - if not pw.MagickNewImage(frame, 600, 100, p): + if not pw.MagickNewImage(frame, self.MI_WIDTH, self.MI_HEIGHT, p): raise RuntimeError('Out of memory') if not pw.MagickCompositeImage(img2, img, pw.OverCompositeOp, 0, 0): raise RuntimeError('Out of memory') @@ -1043,8 +1034,8 @@ class BasicNewsRecipe(Recipe): if not pw.MagickResizeImage(img2, nwidth, nheight, pw.LanczosFilter, 0.5): raise RuntimeError('Out of memory') - left = int((600 - nwidth)/2.0) - top = int((100 - nheight)/2.0) + left = int((self.MI_WIDTH - nwidth)/2.0) + top = int((self.MI_HEIGHT - nheight)/2.0) if not pw.MagickCompositeImage(frame, img2, pw.OverCompositeOp, left, top): raise RuntimeError('Out of memory') @@ -1055,8 +1046,6 @@ class BasicNewsRecipe(Recipe): for x in (img, img2, frame): pw.DestroyMagickWand(x) - return out_path - def create_opf(self, feeds, dir=None): if dir is None: dir = self.output_dir @@ -1072,14 +1061,15 @@ class BasicNewsRecipe(Recipe): opf_path = os.path.join(dir, 'index.opf') ncx_path = os.path.join(dir, 'index.ncx') - # Add mastheadImage entry to section - from calibre.ebooks.metadata.opf2 import Guide - mi.guide = Guide() - ref = Guide.Reference('mastheadImage.jpg', os.getcwdu()) - ref.type = 'masthead' - ref.title = 'Masthead Image' - mi.guide.append(ref) opf = OPFCreator(dir, mi) + # Add mastheadImage entry to section + mp = getattr(self, 'masthead_path', None) + if mp is not None: + from calibre.ebooks.metadata.opf2 import Guide + ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu()) + ref.type = 'masthead' + ref.title = 'Masthead Image' + opf.guide.append(ref) manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] manifest.append(os.path.join(dir, 'index.html')) @@ -1099,7 +1089,6 @@ class BasicNewsRecipe(Recipe): mpath = getattr(self, 'masthead_path', None) if mpath is not None and os.access(mpath, os.R_OK): manifest.append(mpath) - opf.manifest = mpath opf.create_manifest_from_files_in(manifest) for mani in opf.manifest: