From 0168ce58667ce362d62bbc4a1f524e5882f39184 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 3 May 2011 19:13:57 -0400 Subject: [PATCH 01/15] GUI: OS X fix, show books in library when device connected as menu item so user can go to their library. --- src/calibre/gui2/layout.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py index b5cc0163ed..b3c9bd3a02 100644 --- a/src/calibre/gui2/layout.py +++ b/src/calibre/gui2/layout.py @@ -44,18 +44,19 @@ class LocationManager(QObject): # {{{ receiver = partial(self._location_selected, name) ac.triggered.connect(receiver) self.tooltips[name] = tooltip + + m = QMenu(parent) + self._mem.append(m) + a = m.addAction(icon, tooltip) + a.triggered.connect(receiver) if name != 'library': - m = QMenu(parent) - self._mem.append(m) - a = m.addAction(icon, tooltip) - a.triggered.connect(receiver) self._mem.append(a) a = m.addAction(QIcon(I('eject.png')), _('Eject this device')) a.triggered.connect(self._eject_requested) - ac.setMenu(m) self._mem.append(a) else: ac.setToolTip(tooltip) + ac.setMenu(m) ac.calibre_name = name return ac @@ -71,7 +72,12 @@ class LocationManager(QObject): # {{{ def set_switch_actions(self, quick_actions, rename_actions, delete_actions, switch_actions, choose_action): - self.switch_menu = QMenu() + self.switch_menu = self.library_action.menu() + if self.switch_menu: + self.switch_menu.addSeparator() + else: + self.switch_menu = QMenu() + self.switch_menu.addAction(choose_action) self.cs_menus = [] for t, acs in [(_('Quick switch'), quick_actions), @@ -85,7 +91,9 @@ class LocationManager(QObject): # {{{ self.switch_menu.addSeparator() for ac in switch_actions: self.switch_menu.addAction(ac) - self.library_action.setMenu(self.switch_menu) + + if self.switch_menu != self.library_action.menu(): + self.library_action.setMenu(self.switch_menu) def _location_selected(self, location, *args): if location != self.current_location and hasattr(self, From ce28d66991ebd825ce41ecedb68503be2c243267 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 3 May 2011 19:37:27 -0400 Subject: [PATCH 02/15] Store: Open external quick check in search dialog. --- src/calibre/gui2/store/search/search.py | 5 ++++- src/calibre/gui2/store/search/search.ui | 12 +++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/store/search/search.py b/src/calibre/gui2/store/search/search.py index 5654df8ffc..07d4afca54 100644 --- a/src/calibre/gui2/store/search/search.py +++ b/src/calibre/gui2/store/search/search.py @@ -155,6 +155,7 @@ class SearchDialog(QDialog, Ui_Dialog): self.config['results_view_column_width'] = [self.results_view.columnWidth(i) for i in range(self.results_view.model().columnCount())] self.config['sort_col'] = self.results_view.model().sort_col self.config['sort_order'] = self.results_view.model().sort_order + self.config['open_external'] = self.open_external.isChecked() store_check = {} for n in self.store_plugins: @@ -179,6 +180,8 @@ class SearchDialog(QDialog, Ui_Dialog): else: self.resize_columns() + self.open_external.setChecked(self.config.get('open_external', False)) + store_check = self.config.get('store_checked', None) if store_check: for n in store_check: @@ -212,7 +215,7 @@ class SearchDialog(QDialog, Ui_Dialog): def open_store(self, index): result = self.results_view.model().get_result(index) - self.store_plugins[result.store_name].open(self, result.detail_item) + self.store_plugins[result.store_name].open(self, result.detail_item, self.open_external.isChecked()) def check_progress(self): if not self.search_pool.threads_running() and not self.results_view.model().cover_pool.threads_running() and not self.results_view.model().details_pool.threads_running(): diff --git a/src/calibre/gui2/store/search/search.ui b/src/calibre/gui2/store/search/search.ui index 0d39a70a29..7e8dd36284 100644 --- a/src/calibre/gui2/store/search/search.ui +++ b/src/calibre/gui2/store/search/search.ui @@ -70,7 +70,7 @@ 0 0 215 - 116 + 93 @@ -101,6 +101,16 @@ + + + + Open a selected book in the system's web browser + + + Open external + + + From bb0e6a60e749ad5fe9768fcee11a45b3d4ded7de Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 May 2011 10:24:23 -0600 Subject: [PATCH 03/15] ... --- recipes/telepolis.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/telepolis.recipe b/recipes/telepolis.recipe index 4ca57f8275..8109e3e39a 100644 --- a/recipes/telepolis.recipe +++ b/recipes/telepolis.recipe @@ -18,7 +18,7 @@ class TelepolisNews(BasicNewsRecipe): recursion = 0 no_stylesheets = True encoding = "utf-8" - language = 'de_AT' + language = 'de' use_embedded_content =False remove_empty_feeds = True From bfbd42dd6d0bc5494162cea64c981e46ab8ab8be Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 May 2011 10:39:20 -0600 Subject: [PATCH 04/15] Fix USA Today --- recipes/usatoday.recipe | 397 ++-------------------------------------- 1 file changed, 20 insertions(+), 377 deletions(-) diff --git a/recipes/usatoday.recipe b/recipes/usatoday.recipe index bd47262563..a4899b7187 100644 --- a/recipes/usatoday.recipe +++ b/recipes/usatoday.recipe @@ -7,13 +7,11 @@ usatoday.com ''' from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag -import re class USAToday(BasicNewsRecipe): title = 'USA Today' - __author__ = 'GRiker' + __author__ = 'Kovid Goyal' oldest_article = 1 timefmt = '' max_articles_per_feed = 20 @@ -31,7 +29,6 @@ class USAToday(BasicNewsRecipe): margin-bottom: 0em; \ font-size: smaller;}\n \ .articleBody {text-align: left;}\n ' - conversion_options = { 'linearize_tables' : True } #simultaneous_downloads = 1 feeds = [ ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'), @@ -47,63 +44,26 @@ class USAToday(BasicNewsRecipe): ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'), ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'), ] - keep_only_tags = [dict(attrs={'class':[ - 'byLine', - 'inside-copy', - 'inside-head', - 'inside-head2', - 'item', - 'item-block', - 'photo-container', - ]}), - dict(id=[ - 'applyMainStoryPhoto', - 'permalink', - ])] + keep_only_tags = [dict(attrs={'class':'story'})] + remove_tags = [ + dict(attrs={'class':[ + 'share', + 'reprints', + 'inline-h3', + 'info-extras', + 'ppy-outer', + 'ppy-caption', + 'comments', + 'jump', + 'pagetools', + 'post-attributes', + 'tags', + 'bottom-tools', + 'sponsoredlinks', + ]}), + dict(id=['pluck']), + ] - remove_tags = [dict(attrs={'class':[ - 'comments', - 'jump', - 'pagetools', - 'post-attributes', - 'tags', - ]}), - dict(id=[])] - - #feeds = [('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles')] - - def dump_hex(self, src, length=16): - ''' Diagnostic ''' - FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)]) - N=0; result='' - while src: - s,src = src[:length],src[length:] - hexa = ' '.join(["%02X"%ord(x) for x in s]) - s = s.translate(FILTER) - result += "%04X %-*s %s\n" % (N, length*3, hexa, s) - N+=length - print result - - def fixChars(self,string): - # Replace lsquo (\x91) - fixed = re.sub("\x91","‘",string) - - # Replace rsquo (\x92) - fixed = re.sub("\x92","’",fixed) - - # Replace ldquo (\x93) - fixed = re.sub("\x93","“",fixed) - - # Replace rdquo (\x94) - fixed = re.sub("\x94","”",fixed) - - # Replace ndash (\x96) - fixed = re.sub("\x96","–",fixed) - - # Replace mdash (\x97) - fixed = re.sub("\x97","—",fixed) - - return fixed def get_masthead_url(self): masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif' @@ -115,321 +75,4 @@ class USAToday(BasicNewsRecipe): masthead = None return masthead - def massageNCXText(self, description): - # Kindle TOC descriptions won't render certain characters - if description: - massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)) - # Replace '&' with '&' - massaged = re.sub("&","&", massaged) - return self.fixChars(massaged) - else: - return description - def parse_feeds(self, *args, **kwargs): - parsed_feeds = BasicNewsRecipe.parse_feeds(self, *args, **kwargs) - # Count articles for progress dialog - article_count = 0 - for feed in parsed_feeds: - article_count += len(feed) - self.log( "Queued %d articles" % article_count) - return parsed_feeds - - def preprocess_html(self, soup): - soup = self.strip_anchors(soup) - return soup - - def postprocess_html(self, soup, first_fetch): - - # Remove navLinks
- navLinks = soup.find(True,{'style':'padding-bottom:3px'}) - if navLinks: - navLinks.extract() - - # Remove
- gibberish = soup.find(True,{'style':'margin-bottom:10px'}) - if gibberish: - gibberish.extract() - - # Change to

- headline = soup.find(True, {'class':['inside-head','inside-head2']}) - if not headline: - headline = soup.find('h3') - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, headline.contents[0]) - headline.replaceWith(tag) - else: - print "unable to find headline:\n%s\n" % soup - - # Change byLine to byline, change commas to middot - # Kindle renders commas in byline as '&' - byline = soup.find(True, {'class':'byLine'}) - if byline: - byline['class'] = 'byline' - # Replace comma with middot - byline.contents[0].replaceWith(re.sub(","," ·", byline.renderContents())) - - jumpout_punc_list = [':','?'] - # Remove the inline jumpouts in
- paras = soup.findAll(True, {'class':'inside-copy'}) - for para in paras: - if re.match("[\w\W]+ ",para.renderContents()): - p = para.find('b') - for punc in jumpout_punc_list: - punc_offset = p.contents[0].find(punc) - if punc_offset == -1: - continue - if punc_offset > 1: - if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper(): - #print "extracting \n%s\n" % para.prettify() - para.extract() - - # Reset class for remaining - paras = soup.findAll(True, {'class':'inside-copy'}) - for para in paras: - para['class'] = 'articleBody' - - # Remove inline jumpouts in

- paras = soup.findAll(['p']) - for p in paras: - if hasattr(p,'contents') and len(p.contents): - for punc in jumpout_punc_list: - punc_offset = p.contents[0].find(punc) - if punc_offset == -1: - continue - if punc_offset > 2 and hasattr(p,'a') and len(p.contents): - #print "evaluating %s\n" % p.contents[0][:punc_offset+1] - if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper(): - #print "extracting \n%s\n" % p.prettify() - p.extract() - - # Capture the first img, insert after headline - imgs = soup.findAll('img') - print "postprocess_html(): %d images" % len(imgs) - if imgs: - divTag = Tag(soup, 'div') - divTag['class'] = 'image' - body = soup.find('body') - img = imgs[0] - #print "img: \n%s\n" % img.prettify() - - # Table for photo and credit - tableTag = Tag(soup,'table') - - # Photo - trimgTag = Tag(soup, 'tr') - tdimgTag = Tag(soup, 'td') - tdimgTag.insert(0,img) - trimgTag.insert(0,tdimgTag) - tableTag.insert(0,trimgTag) - - # Credit - trcreditTag = Tag(soup, 'tr') - - tdcreditTag = Tag(soup, 'td') - tdcreditTag['class'] = 'credit' - credit = soup.find('td',{'class':'photoCredit'}) - if credit: - tdcreditTag.insert(0,NavigableString(credit.renderContents())) - else: - credit = img['credit'] - if credit: - tdcreditTag.insert(0,NavigableString(credit)) - else: - tdcreditTag.insert(0,NavigableString('')) - - trcreditTag.insert(0,tdcreditTag) - tableTag.insert(1,trcreditTag) - dtc = 0 - divTag.insert(dtc,tableTag) - dtc += 1 - - if False: - # Add the caption in the table - tableCaptionTag = Tag(soup,'caption') - tableCaptionTag.insert(0,soup.find('td',{'class':'photoCredit'}).renderContents()) - tableTag.insert(1,tableCaptionTag) - divTag.insert(dtc,tableTag) - dtc += 1 - body.insert(1,divTag) - else: - # Add the caption below the table - #print "Looking for caption in this soup:\n%s" % img.prettify() - captionTag = Tag(soup,'p') - captionTag['class'] = 'caption' - if hasattr(img,'alt') and img['alt']: - captionTag.insert(0,NavigableString('

%s
' % img['alt'])) - divTag.insert(dtc, captionTag) - dtc += 1 - else: - try: - captionTag.insert(0,NavigableString('
%s
' % img['cutline'])) - divTag.insert(dtc, captionTag) - dtc += 1 - except: - pass - - hrTag = Tag(soup, 'hr') - divTag.insert(dtc, hrTag) - dtc += 1 - - # Delete
- restructure - tag = body.find(True) - while True: - insertLoc += 1 - try: - if hasattr(tag,'class') and tag['class'] == 'headline': - headline_found = True - tag.insert(insertLoc,divTag) - break - except: - pass - tag = tag.next - if not tag: - break - - # Yank out headline, img and caption - headline = body.find('h2','headline') - img = body.find('div','image') - caption = body.find('p''class') - - # body(0) is calibre_navbar - # body(1) is
- - btc = 1 - headline.extract() - body.insert(1, headline) - btc += 1 - if img: - img.extract() - body.insert(btc, img) - btc += 1 - if caption: - caption.extract() - body.insert(btc, caption) - btc += 1 - - if len(imgs) > 1: - if True: - [img.extract() for img in imgs[1:]] - else: - # Format the remaining images - # This doesn't work yet - for img in imgs[1:]: - print "img:\n%s\n" % img.prettify() - divTag = Tag(soup, 'div') - divTag['class'] = 'image' - - # Table for photo and credit - tableTag = Tag(soup,'table') - - # Photo - trimgTag = Tag(soup, 'tr') - tdimgTag = Tag(soup, 'td') - tdimgTag.insert(0,img) - trimgTag.insert(0,tdimgTag) - tableTag.insert(0,trimgTag) - - # Credit - trcreditTag = Tag(soup, 'tr') - - tdcreditTag = Tag(soup, 'td') - tdcreditTag['class'] = 'credit' - try: - tdcreditTag.insert(0,NavigableString(img['credit'])) - except: - tdcreditTag.insert(0,NavigableString('')) - trcreditTag.insert(0,tdcreditTag) - tableTag.insert(1,trcreditTag) - divTag.insert(0,tableTag) - soup.img.replaceWith(divTag) - - return soup - - def postprocess_book(self, oeb, opts, log) : - - def extract_byline(href) : - # '' : - return self.massageNCXText(self.tag_to_string(p,use_alt=False)) - else: - print "Didn't find
in this soup:\n%s" % soup.prettify() - return None - - # Method entry point here - # Single section toc looks different than multi-section tocs - if oeb.toc.depth() == 2 : - for article in oeb.toc : - if article.author is None : - article.author = extract_byline(article.href) - if article.description is None : - article.description = extract_description(article.href) - elif oeb.toc.depth() == 3 : - for section in oeb.toc : - for article in section : - article.author = extract_byline(article.href) - ''' - if article.author is None : - article.author = self.massageNCXText(extract_byline(article.href)) - else: - article.author = self.massageNCXText(article.author) - ''' - if article.description is None : - article.description = extract_description(article.href) - - def strip_anchors(self,soup): - paras = soup.findAll(True) - for para in paras: - aTags = para.findAll('a') - for a in aTags: - if a.img is None: - a.replaceWith(a.renderContents().decode('cp1252','replace')) - return soup From 1400f54107185d80ea880bc786de78910a382e48 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 May 2011 11:02:56 -0600 Subject: [PATCH 05/15] EPUB metadata: When extracting covers from epub files handle invalid epubs that specify their content as a raster cover. Apparently PG produces these. --- src/calibre/ebooks/metadata/opf2.py | 4 +++- src/calibre/gui2/actions/choose_library.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 58c887bfdb..1d91236757 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -966,7 +966,9 @@ class OPF(object): # {{{ cover_id = covers[0].get('content') for item in self.itermanifest(): if item.get('id', None) == cover_id: - return item.get('href', None) + mt = item.get('media-type', '') + if 'xml' not in mt: + return item.get('href', None) @dynamic_property def cover(self): diff --git a/src/calibre/gui2/actions/choose_library.py b/src/calibre/gui2/actions/choose_library.py index 4b262ad9dd..a663f288af 100644 --- a/src/calibre/gui2/actions/choose_library.py +++ b/src/calibre/gui2/actions/choose_library.py @@ -246,7 +246,7 @@ class ChooseLibraryAction(InterfaceAction): def delete_requested(self, name, location): loc = location.replace('/', os.sep) if not question_dialog(self.gui, _('Are you sure?'), '

'+ - _('All files from %s will be ' + _('All files from %s will be ' 'permanently deleted. Are you sure?') % loc, show_copy_button=False): return From fe90a1b04fa8fecd5abfacbc795ca8b526897b11 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 May 2011 13:13:38 -0600 Subject: [PATCH 06/15] Implement #777001 (Add Preview ePub button to Tweak ePub dialog) --- src/calibre/gui2/dialogs/tweak_epub.py | 38 ++++++++++++++--- src/calibre/gui2/dialogs/tweak_epub.ui | 57 +++++++++++++++----------- 2 files changed, 66 insertions(+), 29 deletions(-) diff --git a/src/calibre/gui2/dialogs/tweak_epub.py b/src/calibre/gui2/dialogs/tweak_epub.py index edc274c9b2..732d74b77d 100755 --- a/src/calibre/gui2/dialogs/tweak_epub.py +++ b/src/calibre/gui2/dialogs/tweak_epub.py @@ -7,16 +7,16 @@ __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' import os, shutil -from contextlib import closing from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED from PyQt4.Qt import QDialog from calibre.constants import isosx -from calibre.gui2 import open_local_file +from calibre.gui2 import open_local_file, error_dialog from calibre.gui2.dialogs.tweak_epub_ui import Ui_Dialog from calibre.libunzip import extract as zipextract -from calibre.ptempfile import PersistentTemporaryDirectory +from calibre.ptempfile import (PersistentTemporaryDirectory, + PersistentTemporaryFile) class TweakEpub(QDialog, Ui_Dialog): ''' @@ -37,11 +37,15 @@ class TweakEpub(QDialog, Ui_Dialog): self.cancel_button.clicked.connect(self.reject) self.explode_button.clicked.connect(self.explode) self.rebuild_button.clicked.connect(self.rebuild) + self.preview_button.clicked.connect(self.preview) # Position update dialog overlaying top left of app window parent_loc = parent.pos() self.move(parent_loc.x(),parent_loc.y()) + self.gui = parent + self._preview_files = [] + def cleanup(self): if isosx: try: @@ -55,6 +59,11 @@ class TweakEpub(QDialog, Ui_Dialog): # Delete directory containing exploded ePub if self._exploded is not None: shutil.rmtree(self._exploded, ignore_errors=True) + for x in self._preview_files: + try: + os.remove(x) + except: + pass def display_exploded(self): ''' @@ -71,9 +80,8 @@ class TweakEpub(QDialog, Ui_Dialog): self.rebuild_button.setEnabled(True) self.explode_button.setEnabled(False) - def rebuild(self, *args): - self._output = os.path.join(self._exploded, 'rebuilt.epub') - with closing(ZipFile(self._output, 'w', compression=ZIP_DEFLATED)) as zf: + def do_rebuild(self, src): + with ZipFile(src, 'w', compression=ZIP_DEFLATED) as zf: # Write mimetype zf.write(os.path.join(self._exploded,'mimetype'), 'mimetype', compress_type=ZIP_STORED) # Write everything else @@ -86,5 +94,23 @@ class TweakEpub(QDialog, Ui_Dialog): zfn = os.path.relpath(absfn, self._exploded).replace(os.sep, '/') zf.write(absfn, zfn) + + def preview(self): + if not self._exploded: + return error_dialog(self, _('Cannot preview'), + _('You must first explode the epub before previewing.'), + show=True) + + tf = PersistentTemporaryFile('.epub') + tf.close() + self._preview_files.append(tf.name) + + self.do_rebuild(tf.name) + + self.gui.iactions['View']._view_file(tf.name) + + def rebuild(self, *args): + self._output = os.path.join(self._exploded, 'rebuilt.epub') + self.do_rebuild(self._output) return QDialog.accept(self) diff --git a/src/calibre/gui2/dialogs/tweak_epub.ui b/src/calibre/gui2/dialogs/tweak_epub.ui index fc6f24675f..a59af4fde1 100644 --- a/src/calibre/gui2/dialogs/tweak_epub.ui +++ b/src/calibre/gui2/dialogs/tweak_epub.ui @@ -23,6 +23,16 @@ false + + + + <p>Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window <b>and the editor windows you used to edit files in the epub</b>.</p><p>Rebuild the ePub, updating your calibre library.</p> + + + true + + + @@ -37,23 +47,6 @@ - - - - false - - - Rebuild ePub from exploded contents - - - &Rebuild ePub - - - - :/images/exec.png:/images/exec.png - - - @@ -68,13 +61,31 @@ - - - - <p>Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window <b>and the editor windows you used to edit files in the epub</b>.</p><p>Rebuild the ePub, updating your calibre library.</p> + + + + false - - true + + Rebuild ePub from exploded contents + + + &Rebuild ePub + + + + :/images/exec.png:/images/exec.png + + + + + + + &Preview ePub + + + + :/images/view.png:/images/view.png From c84c5f297bcfcd30f99f503768acd86235f1da43 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 May 2011 15:18:37 -0600 Subject: [PATCH 07/15] Add select all/none buttons to the metadata download prefs --- .../gui2/preferences/metadata_sources.py | 13 ++++++++++++- .../gui2/preferences/metadata_sources.ui | 18 ++++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/preferences/metadata_sources.py b/src/calibre/gui2/preferences/metadata_sources.py index f487051d07..05ff23987d 100644 --- a/src/calibre/gui2/preferences/metadata_sources.py +++ b/src/calibre/gui2/preferences/metadata_sources.py @@ -190,7 +190,15 @@ class FieldsModel(QAbstractListModel): # {{{ return ans | Qt.ItemIsUserCheckable def restore_defaults(self): - self.overrides = dict([(f, self.state(f, True)) for f in self.fields]) + self.overrides = dict([(f, self.state(f, Qt.Checked)) for f in self.fields]) + self.reset() + + def select_all(self): + self.overrides = dict([(f, Qt.Checked) for f in self.fields]) + self.reset() + + def clear_all(self): + self.overrides = dict([(f, Qt.Unchecked) for f in self.fields]) self.reset() def setData(self, index, val, role): @@ -273,6 +281,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.fields_view.setModel(self.fields_model) self.fields_model.dataChanged.connect(self.changed_signal) + self.select_all_button.clicked.connect(self.fields_model.select_all) + self.clear_all_button.clicked.connect(self.fields_model.clear_all) + def configure_plugin(self): for index in self.sources_view.selectionModel().selectedRows(): plugin = self.sources_model.data(index, Qt.UserRole) diff --git a/src/calibre/gui2/preferences/metadata_sources.ui b/src/calibre/gui2/preferences/metadata_sources.ui index e46069b036..ff161654dd 100644 --- a/src/calibre/gui2/preferences/metadata_sources.ui +++ b/src/calibre/gui2/preferences/metadata_sources.ui @@ -77,8 +77,8 @@ Downloaded metadata fields - - + + If you uncheck any fields, metadata for those fields will not be downloaded @@ -88,6 +88,20 @@ + + + + &Select all + + + + + + + &Clear all + + + From 45ec80f4bb580b88d6c957906f5201648c2b9edf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 May 2011 15:49:34 -0600 Subject: [PATCH 08/15] For the lazy among us --- src/calibre/gui2/actions/choose_library.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/actions/choose_library.py b/src/calibre/gui2/actions/choose_library.py index a663f288af..f6b19fc4aa 100644 --- a/src/calibre/gui2/actions/choose_library.py +++ b/src/calibre/gui2/actions/choose_library.py @@ -246,7 +246,7 @@ class ChooseLibraryAction(InterfaceAction): def delete_requested(self, name, location): loc = location.replace('/', os.sep) if not question_dialog(self.gui, _('Are you sure?'), '

'+ - _('All files from %s will be ' + _('All files from

%s

will be ' 'permanently deleted. Are you sure?') % loc, show_copy_button=False): return From a34ea87a940feaf743ade8eddb893ce353aef6be Mon Sep 17 00:00:00 2001 From: John Schember Date: Wed, 4 May 2011 18:56:30 -0400 Subject: [PATCH 09/15] Fix Bug #763105: RTF output displays some unicode characters incorrectly. --- src/calibre/ebooks/rtf/rtfml.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py index 97fa175d1a..cd877c63c2 100644 --- a/src/calibre/ebooks/rtf/rtfml.py +++ b/src/calibre/ebooks/rtf/rtfml.py @@ -79,8 +79,7 @@ def txt2rtf(text): elif val <= 127: buf.write(x) else: - repl = ascii_text(x) - c = r'\uc{2}\u{0:d}{1}'.format(val, repl, len(repl)) + c = r'\u{0:d}?'.format(val) buf.write(c) return buf.getvalue() From e15ee70a1ded03bf4b84f5a30f8fce89aeefa56e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 May 2011 18:56:07 -0600 Subject: [PATCH 10/15] ODT Input: Speed up conversion of ODT files that define huge amounts of redundant style information. Fixes #777468 (Conversion from ODT to EPUB extremely slow) --- src/calibre/ebooks/odt/input.py | 55 ++++++++++++++++++++++++++++++--- src/odf/odf2xhtml.py | 14 +++++++-- 2 files changed, 62 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/odt/input.py b/src/calibre/ebooks/odt/input.py index 1184148e80..10553dac2b 100644 --- a/src/calibre/ebooks/odt/input.py +++ b/src/calibre/ebooks/odt/input.py @@ -7,6 +7,8 @@ __docformat__ = 'restructuredtext en' Convert an ODT file into a Open Ebook ''' import os + +from lxml import etree from odf.odf2xhtml import ODF2XHTML from calibre import CurrentDir, walk @@ -23,7 +25,48 @@ class Extract(ODF2XHTML): with open(name, 'wb') as f: f.write(data) - def __call__(self, stream, odir): + def filter_css(self, html, log): + root = etree.fromstring(html) + style = root.xpath('//*[local-name() = "style" and @type="text/css"]') + if style: + style = style[0] + css = style.text + if css: + style.text, sel_map = self.do_filter_css(css) + for x in root.xpath('//*[@class]'): + extra = [] + orig = x.get('class') + for cls in orig.split(): + extra.extend(sel_map.get(cls, [])) + if extra: + x.set('class', orig + ' ' + ' '.join(extra)) + html = etree.tostring(root, encoding='utf-8', + xml_declaration=True) + return html + + def do_filter_css(self, css): + from cssutils import parseString + from cssutils.css import CSSRule + sheet = parseString(css) + rules = list(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) + sel_map = {} + count = 0 + for r in rules: + # Check if we have only class selectors for this rule + nc = [x for x in r.selectorList if not + x.selectorText.startswith('.')] + if len(r.selectorList) > 1 and not nc: + replace_name = 'c_odt%d'%count + count += 1 + for sel in r.selectorList: + s = sel.selectorText[1:] + if s not in sel_map: + sel_map[s] = [] + sel_map[s].append(replace_name) + r.selectorText = '.'+replace_name + return sheet.cssText, sel_map + + def __call__(self, stream, odir, log): from calibre.utils.zipfile import ZipFile from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator @@ -32,13 +75,17 @@ class Extract(ODF2XHTML): if not os.path.exists(odir): os.makedirs(odir) with CurrentDir(odir): - print 'Extracting ODT file...' + log('Extracting ODT file...') html = self.odf2xhtml(stream) # A blanket img specification like this causes problems - # with EPUB output as the contaiing element often has + # with EPUB output as the containing element often has # an absolute height and width set that is larger than # the available screen real estate html = html.replace('img { width: 100%; height: 100%; }', '') + try: + html = self.filter_css(html, log) + except: + log.exception('Failed to filter CSS, conversion may be slow') with open('index.xhtml', 'wb') as f: f.write(html.encode('utf-8')) zf = ZipFile(stream, 'r') @@ -67,7 +114,7 @@ class ODTInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): - return Extract()(stream, '.') + return Extract()(stream, '.', log) def postprocess_book(self, oeb, opts, log): # Fix

constructs as the asinine epubchecker complains diff --git a/src/odf/odf2xhtml.py b/src/odf/odf2xhtml.py index 26da9d9905..a04aa48bf7 100644 --- a/src/odf/odf2xhtml.py +++ b/src/odf/odf2xhtml.py @@ -841,11 +841,19 @@ ol, ul { padding-left: 2em; } self.styledict[name] = styles # Write the styles to HTML self.writeout(self.default_styles) + # Changed by Kovid to not write out endless copies of the same style + css_styles = {} for name in self.stylestack: styles = self.styledict.get(name) - css2 = self.cs.convert_styles(styles) - self.writeout("%s {\n" % name) - for style, val in css2.items(): + css2 = tuple(self.cs.convert_styles(styles).iteritems()) + if css2 in css_styles: + css_styles[css2].append(name) + else: + css_styles[css2] = [name] + + for css2, names in css_styles.iteritems(): + self.writeout("%s {\n" % ', '.join(names)) + for style, val in css2: self.writeout("\t%s: %s;\n" % (style, val) ) self.writeout("}\n") From 07da7b239c377a856fbc43f16aa2f01042964e3d Mon Sep 17 00:00:00 2001 From: John Schember Date: Wed, 4 May 2011 21:48:44 -0400 Subject: [PATCH 11/15] Fix Bug #775669: HTMLZ OPF sets cover, adds cover properly when updating metadata, reads cover form guide section of opf in archive. --- src/calibre/ebooks/htmlz/input.py | 25 +++++++++++++++++-- src/calibre/ebooks/htmlz/output.py | 25 ++++++++++++++++++- src/calibre/ebooks/metadata/extz.py | 24 ++++++++++-------- src/calibre/ebooks/oeb/transforms/metadata.py | 2 +- 4 files changed, 61 insertions(+), 15 deletions(-) diff --git a/src/calibre/ebooks/htmlz/input.py b/src/calibre/ebooks/htmlz/input.py index dcf2ed0ed3..d083fcc4ab 100644 --- a/src/calibre/ebooks/htmlz/input.py +++ b/src/calibre/ebooks/htmlz/input.py @@ -7,10 +7,12 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' import os +import posixpath -from calibre import walk +from calibre import guess_type, walk from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.chardet import xml_to_unicode +from calibre.ebooks.metadata.opf2 import OPF from calibre.utils.zipfile import ZipFile class HTMLZInput(InputFormatPlugin): @@ -27,7 +29,7 @@ class HTMLZInput(InputFormatPlugin): # Extract content from zip archive. zf = ZipFile(stream) - zf.extractall('.') + zf.extractall() for x in walk('.'): if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'): @@ -70,5 +72,24 @@ class HTMLZInput(InputFormatPlugin): from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata mi = get_file_type_metadata(stream, file_ext) meta_info_to_oeb_metadata(mi, oeb.metadata, log) + + # Get the cover path from the OPF. + cover_href = None + opf = None + for x in walk('.'): + if os.path.splitext(x)[1].lower() in ('.opf'): + opf = x + break + if opf: + opf = OPF(opf) + cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name)) + # Set the cover. + if cover_href: + cdata = None + with open(cover_href, 'rb') as cf: + cdata = cf.read() + id, href = oeb.manifest.generate('cover', cover_href) + oeb.manifest.add(id, href, guess_type(cover_href)[0], data=cdata) + oeb.guide.add('cover', 'Cover', href) return oeb diff --git a/src/calibre/ebooks/htmlz/output.py b/src/calibre/ebooks/htmlz/output.py index 6d2ad54a12..a1ef57af2c 100644 --- a/src/calibre/ebooks/htmlz/output.py +++ b/src/calibre/ebooks/htmlz/output.py @@ -7,11 +7,13 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' import os +from cStringIO import StringIO from lxml import etree from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation +from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf from calibre.ptempfile import TemporaryDirectory from calibre.utils.zipfile import ZipFile @@ -79,10 +81,31 @@ class HTMLZOutput(OutputFormatPlugin): fname = os.path.join(tdir, 'images', images[item.href]) with open(fname, 'wb') as img: img.write(data) + + # Cover + cover_path = None + try: + cover_data = None + if oeb_book.metadata.cover: + term = oeb_book.metadata.cover[0].term + cover_data = oeb_book.guide[term].item.data + if cover_data: + from calibre.utils.magick.draw import save_cover_data_to + cover_path = os.path.join(tdir, 'cover.jpg') + with open(cover_path, 'w') as cf: + cf.write('') + save_cover_data_to(cover_data, cover_path) + except: + import traceback + traceback.print_exc() # Metadata with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf: - mdataf.write(etree.tostring(oeb_book.metadata.to_opf1())) + opf = OPF(StringIO(etree.tostring(oeb_book.metadata.to_opf1()))) + mi = opf.to_book_metadata() + if cover_path: + mi.cover = 'cover.jpg' + mdataf.write(metadata_to_opf(mi)) htmlz = ZipFile(output_path, 'w') htmlz.add_dir(tdir) diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index 18069b2a6a..21c10278e1 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -13,7 +13,7 @@ import posixpath from cStringIO import StringIO from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.metadata.opf2 import OPF +from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.zipfile import ZipFile, safe_replace @@ -31,9 +31,9 @@ def get_metadata(stream, extract_cover=True): opf = OPF(opf_stream) mi = opf.to_book_metadata() if extract_cover: - cover_name = opf.raster_cover - if cover_name: - mi.cover_data = ('jpg', zf.read(cover_name)) + cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name)) + if cover_href: + mi.cover_data = ('jpg', zf.read(cover_href)) except: return mi return mi @@ -59,17 +59,19 @@ def set_metadata(stream, mi): except: pass if new_cdata: - raster_cover = opf.raster_cover - if not raster_cover: - raster_cover = 'cover.jpg' - cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover) + cover = opf.cover + if not cover: + cover = 'cover.jpg' + cpath = posixpath.join(posixpath.dirname(opf_path), cover) new_cover = _write_new_cover(new_cdata, cpath) replacements[cpath] = open(new_cover.name, 'rb') + mi.cover = cover # Update the metadata. - opf.smart_update(mi, replace_metadata=True) - newopf = StringIO(opf.render()) - safe_replace(stream, opf_path, newopf, extra_replacements=replacements) + old_mi = opf.to_book_metadata() + old_mi.smart_update(mi) + newopf = StringIO(metadata_to_opf(old_mi)) + safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True) # Cleanup temporary files. try: diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py index 19c209b74d..f719ee3eb5 100644 --- a/src/calibre/ebooks/oeb/transforms/metadata.py +++ b/src/calibre/ebooks/oeb/transforms/metadata.py @@ -36,7 +36,7 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False): m.clear('description') m.add('description', mi.comments) elif override_input_metadata: - m.clear('description') + m.clear('description') if not mi.is_null('publisher'): m.clear('publisher') m.add('publisher', mi.publisher) From 6fbb996d2a30afd36da0e3566cc8e22e23a912ab Mon Sep 17 00:00:00 2001 From: John Schember Date: Wed, 4 May 2011 21:50:08 -0400 Subject: [PATCH 12/15] HTMLZ: Keep existing non-metadata sections when updating. --- src/calibre/ebooks/metadata/extz.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index 21c10278e1..1bda263015 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -70,7 +70,8 @@ def set_metadata(stream, mi): # Update the metadata. old_mi = opf.to_book_metadata() old_mi.smart_update(mi) - newopf = StringIO(metadata_to_opf(old_mi)) + opf.smart_update(metadata_to_opf(old_mi)) + newopf = StringIO(opf.render()) safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True) # Cleanup temporary files. From b461f5bc26030f3f8ce43d02facc3914ade45c5e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 May 2011 20:30:34 -0600 Subject: [PATCH 13/15] ... --- src/calibre/ebooks/odt/input.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/ebooks/odt/input.py b/src/calibre/ebooks/odt/input.py index 10553dac2b..e724acb981 100644 --- a/src/calibre/ebooks/odt/input.py +++ b/src/calibre/ebooks/odt/input.py @@ -56,6 +56,9 @@ class Extract(ODF2XHTML): nc = [x for x in r.selectorList if not x.selectorText.startswith('.')] if len(r.selectorList) > 1 and not nc: + # Replace all the class selectors with a single class selector + # This will be added to the class attribute of all elements + # that have one of these selectors. replace_name = 'c_odt%d'%count count += 1 for sel in r.selectorList: From e101896e5720e58685633c3a5828cf88263ea323 Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 5 May 2011 06:49:36 -0400 Subject: [PATCH 14/15] HTMLZ: metadata replace metadata after merging. APNX don't use tag starting with d as paragraph tag as it breaks more than it fixes. --- src/calibre/devices/kindle/apnx.py | 2 +- src/calibre/ebooks/metadata/extz.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index ee519750e0..178c1091f3 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -164,7 +164,7 @@ class APNXBuilder(object): if c == '/': closing = True continue - elif c in ('d', 'p'): + elif c == 'p': if closing: in_p = False else: diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index 1bda263015..021450fca5 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -70,7 +70,7 @@ def set_metadata(stream, mi): # Update the metadata. old_mi = opf.to_book_metadata() old_mi.smart_update(mi) - opf.smart_update(metadata_to_opf(old_mi)) + opf.smart_update(metadata_to_opf(old_mi), replace_metadata=True) newopf = StringIO(opf.render()) safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True) From 6dc8803ac569d739f1b53a76faaf6a510c80eb3f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 5 May 2011 09:21:31 -0600 Subject: [PATCH 15/15] Add uri->url identifiers mapping --- src/calibre/ebooks/metadata/sources/identify.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index 3d4807ac02..1bd071d6f9 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -13,6 +13,7 @@ from Queue import Queue, Empty from threading import Thread from io import BytesIO from operator import attrgetter +from urlparse import urlparse from calibre.customize.ui import metadata_plugins, all_metadata_plugins from calibre.ebooks.metadata.sources.base import create_log, msprefs @@ -458,6 +459,14 @@ def urls_from_identifiers(identifiers): # {{{ if oclc: ans.append(('OCLC', 'oclc', oclc, 'http://www.worldcat.org/oclc/'+oclc)) + url = identifiers.get('uri', None) + if url is None: + url = identifiers.get('url', None) + if url and url.startswith('http'): + url = url[:8].replace('|', ':') + url[8:].replace('|', ',') + parts = urlparse(url) + name = parts.netloc + ans.append((name, 'url', url, url)) return ans # }}}