diff --git a/recipes/telepolis.recipe b/recipes/telepolis.recipe index 4ca57f8275..8109e3e39a 100644 --- a/recipes/telepolis.recipe +++ b/recipes/telepolis.recipe @@ -18,7 +18,7 @@ class TelepolisNews(BasicNewsRecipe): recursion = 0 no_stylesheets = True encoding = "utf-8" - language = 'de_AT' + language = 'de' use_embedded_content =False remove_empty_feeds = True diff --git a/recipes/usatoday.recipe b/recipes/usatoday.recipe index bd47262563..a4899b7187 100644 --- a/recipes/usatoday.recipe +++ b/recipes/usatoday.recipe @@ -7,13 +7,11 @@ usatoday.com ''' from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag -import re class USAToday(BasicNewsRecipe): title = 'USA Today' - __author__ = 'GRiker' + __author__ = 'Kovid Goyal' oldest_article = 1 timefmt = '' max_articles_per_feed = 20 @@ -31,7 +29,6 @@ class USAToday(BasicNewsRecipe): margin-bottom: 0em; \ font-size: smaller;}\n \ .articleBody {text-align: left;}\n ' - conversion_options = { 'linearize_tables' : True } #simultaneous_downloads = 1 feeds = [ ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'), @@ -47,63 +44,26 @@ class USAToday(BasicNewsRecipe): ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'), ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'), ] - keep_only_tags = [dict(attrs={'class':[ - 'byLine', - 'inside-copy', - 'inside-head', - 'inside-head2', - 'item', - 'item-block', - 'photo-container', - ]}), - dict(id=[ - 'applyMainStoryPhoto', - 'permalink', - ])] + keep_only_tags = [dict(attrs={'class':'story'})] + remove_tags = [ + dict(attrs={'class':[ + 'share', + 'reprints', + 'inline-h3', + 'info-extras', + 'ppy-outer', + 'ppy-caption', + 'comments', + 'jump', + 'pagetools', + 'post-attributes', + 'tags', + 'bottom-tools', + 'sponsoredlinks', + ]}), + dict(id=['pluck']), + ] - remove_tags = [dict(attrs={'class':[ - 'comments', - 'jump', - 'pagetools', - 'post-attributes', - 'tags', - ]}), - dict(id=[])] - - #feeds = [('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles')] - - def dump_hex(self, src, length=16): - ''' Diagnostic ''' - FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)]) - N=0; result='' - while src: - s,src = src[:length],src[length:] - hexa = ' '.join(["%02X"%ord(x) for x in s]) - s = s.translate(FILTER) - result += "%04X %-*s %s\n" % (N, length*3, hexa, s) - N+=length - print result - - def fixChars(self,string): - # Replace lsquo (\x91) - fixed = re.sub("\x91","‘",string) - - # Replace rsquo (\x92) - fixed = re.sub("\x92","’",fixed) - - # Replace ldquo (\x93) - fixed = re.sub("\x93","“",fixed) - - # Replace rdquo (\x94) - fixed = re.sub("\x94","”",fixed) - - # Replace ndash (\x96) - fixed = re.sub("\x96","–",fixed) - - # Replace mdash (\x97) - fixed = re.sub("\x97","—",fixed) - - return fixed def get_masthead_url(self): masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif' @@ -115,321 +75,4 @@ class USAToday(BasicNewsRecipe): masthead = None return masthead - def massageNCXText(self, description): - # Kindle TOC descriptions won't render certain characters - if description: - massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)) - # Replace '&' with '&' - massaged = re.sub("&","&", massaged) - return self.fixChars(massaged) - else: - return description - def parse_feeds(self, *args, **kwargs): - parsed_feeds = BasicNewsRecipe.parse_feeds(self, *args, **kwargs) - # Count articles for progress dialog - article_count = 0 - for feed in parsed_feeds: - article_count += len(feed) - self.log( "Queued %d articles" % article_count) - return parsed_feeds - - def preprocess_html(self, soup): - soup = self.strip_anchors(soup) - return soup - - def postprocess_html(self, soup, first_fetch): - - # Remove navLinks
- navLinks = soup.find(True,{'style':'padding-bottom:3px'}) - if navLinks: - navLinks.extract() - - # Remove
- gibberish = soup.find(True,{'style':'margin-bottom:10px'}) - if gibberish: - gibberish.extract() - - # Change to

- headline = soup.find(True, {'class':['inside-head','inside-head2']}) - if not headline: - headline = soup.find('h3') - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, headline.contents[0]) - headline.replaceWith(tag) - else: - print "unable to find headline:\n%s\n" % soup - - # Change byLine to byline, change commas to middot - # Kindle renders commas in byline as '&' - byline = soup.find(True, {'class':'byLine'}) - if byline: - byline['class'] = 'byline' - # Replace comma with middot - byline.contents[0].replaceWith(re.sub(","," ·", byline.renderContents())) - - jumpout_punc_list = [':','?'] - # Remove the inline jumpouts in
- paras = soup.findAll(True, {'class':'inside-copy'}) - for para in paras: - if re.match("[\w\W]+ ",para.renderContents()): - p = para.find('b') - for punc in jumpout_punc_list: - punc_offset = p.contents[0].find(punc) - if punc_offset == -1: - continue - if punc_offset > 1: - if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper(): - #print "extracting \n%s\n" % para.prettify() - para.extract() - - # Reset class for remaining - paras = soup.findAll(True, {'class':'inside-copy'}) - for para in paras: - para['class'] = 'articleBody' - - # Remove inline jumpouts in

- paras = soup.findAll(['p']) - for p in paras: - if hasattr(p,'contents') and len(p.contents): - for punc in jumpout_punc_list: - punc_offset = p.contents[0].find(punc) - if punc_offset == -1: - continue - if punc_offset > 2 and hasattr(p,'a') and len(p.contents): - #print "evaluating %s\n" % p.contents[0][:punc_offset+1] - if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper(): - #print "extracting \n%s\n" % p.prettify() - p.extract() - - # Capture the first img, insert after headline - imgs = soup.findAll('img') - print "postprocess_html(): %d images" % len(imgs) - if imgs: - divTag = Tag(soup, 'div') - divTag['class'] = 'image' - body = soup.find('body') - img = imgs[0] - #print "img: \n%s\n" % img.prettify() - - # Table for photo and credit - tableTag = Tag(soup,'table') - - # Photo - trimgTag = Tag(soup, 'tr') - tdimgTag = Tag(soup, 'td') - tdimgTag.insert(0,img) - trimgTag.insert(0,tdimgTag) - tableTag.insert(0,trimgTag) - - # Credit - trcreditTag = Tag(soup, 'tr') - - tdcreditTag = Tag(soup, 'td') - tdcreditTag['class'] = 'credit' - credit = soup.find('td',{'class':'photoCredit'}) - if credit: - tdcreditTag.insert(0,NavigableString(credit.renderContents())) - else: - credit = img['credit'] - if credit: - tdcreditTag.insert(0,NavigableString(credit)) - else: - tdcreditTag.insert(0,NavigableString('')) - - trcreditTag.insert(0,tdcreditTag) - tableTag.insert(1,trcreditTag) - dtc = 0 - divTag.insert(dtc,tableTag) - dtc += 1 - - if False: - # Add the caption in the table - tableCaptionTag = Tag(soup,'caption') - tableCaptionTag.insert(0,soup.find('td',{'class':'photoCredit'}).renderContents()) - tableTag.insert(1,tableCaptionTag) - divTag.insert(dtc,tableTag) - dtc += 1 - body.insert(1,divTag) - else: - # Add the caption below the table - #print "Looking for caption in this soup:\n%s" % img.prettify() - captionTag = Tag(soup,'p') - captionTag['class'] = 'caption' - if hasattr(img,'alt') and img['alt']: - captionTag.insert(0,NavigableString('

%s
' % img['alt'])) - divTag.insert(dtc, captionTag) - dtc += 1 - else: - try: - captionTag.insert(0,NavigableString('
%s
' % img['cutline'])) - divTag.insert(dtc, captionTag) - dtc += 1 - except: - pass - - hrTag = Tag(soup, 'hr') - divTag.insert(dtc, hrTag) - dtc += 1 - - # Delete
- restructure - tag = body.find(True) - while True: - insertLoc += 1 - try: - if hasattr(tag,'class') and tag['class'] == 'headline': - headline_found = True - tag.insert(insertLoc,divTag) - break - except: - pass - tag = tag.next - if not tag: - break - - # Yank out headline, img and caption - headline = body.find('h2','headline') - img = body.find('div','image') - caption = body.find('p''class') - - # body(0) is calibre_navbar - # body(1) is
- - btc = 1 - headline.extract() - body.insert(1, headline) - btc += 1 - if img: - img.extract() - body.insert(btc, img) - btc += 1 - if caption: - caption.extract() - body.insert(btc, caption) - btc += 1 - - if len(imgs) > 1: - if True: - [img.extract() for img in imgs[1:]] - else: - # Format the remaining images - # This doesn't work yet - for img in imgs[1:]: - print "img:\n%s\n" % img.prettify() - divTag = Tag(soup, 'div') - divTag['class'] = 'image' - - # Table for photo and credit - tableTag = Tag(soup,'table') - - # Photo - trimgTag = Tag(soup, 'tr') - tdimgTag = Tag(soup, 'td') - tdimgTag.insert(0,img) - trimgTag.insert(0,tdimgTag) - tableTag.insert(0,trimgTag) - - # Credit - trcreditTag = Tag(soup, 'tr') - - tdcreditTag = Tag(soup, 'td') - tdcreditTag['class'] = 'credit' - try: - tdcreditTag.insert(0,NavigableString(img['credit'])) - except: - tdcreditTag.insert(0,NavigableString('')) - trcreditTag.insert(0,tdcreditTag) - tableTag.insert(1,trcreditTag) - divTag.insert(0,tableTag) - soup.img.replaceWith(divTag) - - return soup - - def postprocess_book(self, oeb, opts, log) : - - def extract_byline(href) : - # '' : - return self.massageNCXText(self.tag_to_string(p,use_alt=False)) - else: - print "Didn't find
in this soup:\n%s" % soup.prettify() - return None - - # Method entry point here - # Single section toc looks different than multi-section tocs - if oeb.toc.depth() == 2 : - for article in oeb.toc : - if article.author is None : - article.author = extract_byline(article.href) - if article.description is None : - article.description = extract_description(article.href) - elif oeb.toc.depth() == 3 : - for section in oeb.toc : - for article in section : - article.author = extract_byline(article.href) - ''' - if article.author is None : - article.author = self.massageNCXText(extract_byline(article.href)) - else: - article.author = self.massageNCXText(article.author) - ''' - if article.description is None : - article.description = extract_description(article.href) - - def strip_anchors(self,soup): - paras = soup.findAll(True) - for para in paras: - aTags = para.findAll('a') - for a in aTags: - if a.img is None: - a.replaceWith(a.renderContents().decode('cp1252','replace')) - return soup diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 58c887bfdb..1d91236757 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -966,7 +966,9 @@ class OPF(object): # {{{ cover_id = covers[0].get('content') for item in self.itermanifest(): if item.get('id', None) == cover_id: - return item.get('href', None) + mt = item.get('media-type', '') + if 'xml' not in mt: + return item.get('href', None) @dynamic_property def cover(self): diff --git a/src/calibre/gui2/actions/choose_library.py b/src/calibre/gui2/actions/choose_library.py index 4b262ad9dd..f6b19fc4aa 100644 --- a/src/calibre/gui2/actions/choose_library.py +++ b/src/calibre/gui2/actions/choose_library.py @@ -246,7 +246,7 @@ class ChooseLibraryAction(InterfaceAction): def delete_requested(self, name, location): loc = location.replace('/', os.sep) if not question_dialog(self.gui, _('Are you sure?'), '

'+ - _('All files from %s will be ' + _('All files from

%s

will be ' 'permanently deleted. Are you sure?') % loc, show_copy_button=False): return diff --git a/src/calibre/gui2/dialogs/tweak_epub.py b/src/calibre/gui2/dialogs/tweak_epub.py index edc274c9b2..732d74b77d 100755 --- a/src/calibre/gui2/dialogs/tweak_epub.py +++ b/src/calibre/gui2/dialogs/tweak_epub.py @@ -7,16 +7,16 @@ __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' import os, shutil -from contextlib import closing from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED from PyQt4.Qt import QDialog from calibre.constants import isosx -from calibre.gui2 import open_local_file +from calibre.gui2 import open_local_file, error_dialog from calibre.gui2.dialogs.tweak_epub_ui import Ui_Dialog from calibre.libunzip import extract as zipextract -from calibre.ptempfile import PersistentTemporaryDirectory +from calibre.ptempfile import (PersistentTemporaryDirectory, + PersistentTemporaryFile) class TweakEpub(QDialog, Ui_Dialog): ''' @@ -37,11 +37,15 @@ class TweakEpub(QDialog, Ui_Dialog): self.cancel_button.clicked.connect(self.reject) self.explode_button.clicked.connect(self.explode) self.rebuild_button.clicked.connect(self.rebuild) + self.preview_button.clicked.connect(self.preview) # Position update dialog overlaying top left of app window parent_loc = parent.pos() self.move(parent_loc.x(),parent_loc.y()) + self.gui = parent + self._preview_files = [] + def cleanup(self): if isosx: try: @@ -55,6 +59,11 @@ class TweakEpub(QDialog, Ui_Dialog): # Delete directory containing exploded ePub if self._exploded is not None: shutil.rmtree(self._exploded, ignore_errors=True) + for x in self._preview_files: + try: + os.remove(x) + except: + pass def display_exploded(self): ''' @@ -71,9 +80,8 @@ class TweakEpub(QDialog, Ui_Dialog): self.rebuild_button.setEnabled(True) self.explode_button.setEnabled(False) - def rebuild(self, *args): - self._output = os.path.join(self._exploded, 'rebuilt.epub') - with closing(ZipFile(self._output, 'w', compression=ZIP_DEFLATED)) as zf: + def do_rebuild(self, src): + with ZipFile(src, 'w', compression=ZIP_DEFLATED) as zf: # Write mimetype zf.write(os.path.join(self._exploded,'mimetype'), 'mimetype', compress_type=ZIP_STORED) # Write everything else @@ -86,5 +94,23 @@ class TweakEpub(QDialog, Ui_Dialog): zfn = os.path.relpath(absfn, self._exploded).replace(os.sep, '/') zf.write(absfn, zfn) + + def preview(self): + if not self._exploded: + return error_dialog(self, _('Cannot preview'), + _('You must first explode the epub before previewing.'), + show=True) + + tf = PersistentTemporaryFile('.epub') + tf.close() + self._preview_files.append(tf.name) + + self.do_rebuild(tf.name) + + self.gui.iactions['View']._view_file(tf.name) + + def rebuild(self, *args): + self._output = os.path.join(self._exploded, 'rebuilt.epub') + self.do_rebuild(self._output) return QDialog.accept(self) diff --git a/src/calibre/gui2/dialogs/tweak_epub.ui b/src/calibre/gui2/dialogs/tweak_epub.ui index fc6f24675f..a59af4fde1 100644 --- a/src/calibre/gui2/dialogs/tweak_epub.ui +++ b/src/calibre/gui2/dialogs/tweak_epub.ui @@ -23,6 +23,16 @@ false + + + + <p>Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window <b>and the editor windows you used to edit files in the epub</b>.</p><p>Rebuild the ePub, updating your calibre library.</p> + + + true + + + @@ -37,23 +47,6 @@ - - - - false - - - Rebuild ePub from exploded contents - - - &Rebuild ePub - - - - :/images/exec.png:/images/exec.png - - - @@ -68,13 +61,31 @@ - - - - <p>Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window <b>and the editor windows you used to edit files in the epub</b>.</p><p>Rebuild the ePub, updating your calibre library.</p> + + + + false - - true + + Rebuild ePub from exploded contents + + + &Rebuild ePub + + + + :/images/exec.png:/images/exec.png + + + + + + + &Preview ePub + + + + :/images/view.png:/images/view.png diff --git a/src/calibre/gui2/preferences/metadata_sources.py b/src/calibre/gui2/preferences/metadata_sources.py index f487051d07..05ff23987d 100644 --- a/src/calibre/gui2/preferences/metadata_sources.py +++ b/src/calibre/gui2/preferences/metadata_sources.py @@ -190,7 +190,15 @@ class FieldsModel(QAbstractListModel): # {{{ return ans | Qt.ItemIsUserCheckable def restore_defaults(self): - self.overrides = dict([(f, self.state(f, True)) for f in self.fields]) + self.overrides = dict([(f, self.state(f, Qt.Checked)) for f in self.fields]) + self.reset() + + def select_all(self): + self.overrides = dict([(f, Qt.Checked) for f in self.fields]) + self.reset() + + def clear_all(self): + self.overrides = dict([(f, Qt.Unchecked) for f in self.fields]) self.reset() def setData(self, index, val, role): @@ -273,6 +281,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.fields_view.setModel(self.fields_model) self.fields_model.dataChanged.connect(self.changed_signal) + self.select_all_button.clicked.connect(self.fields_model.select_all) + self.clear_all_button.clicked.connect(self.fields_model.clear_all) + def configure_plugin(self): for index in self.sources_view.selectionModel().selectedRows(): plugin = self.sources_model.data(index, Qt.UserRole) diff --git a/src/calibre/gui2/preferences/metadata_sources.ui b/src/calibre/gui2/preferences/metadata_sources.ui index e46069b036..ff161654dd 100644 --- a/src/calibre/gui2/preferences/metadata_sources.ui +++ b/src/calibre/gui2/preferences/metadata_sources.ui @@ -77,8 +77,8 @@ Downloaded metadata fields - - + + If you uncheck any fields, metadata for those fields will not be downloaded @@ -88,6 +88,20 @@ + + + + &Select all + + + + + + + &Clear all + + + diff --git a/src/calibre/gui2/store/search/search.ui b/src/calibre/gui2/store/search/search.ui index 7e8dd36284..fe5aaceda4 100644 --- a/src/calibre/gui2/store/search/search.ui +++ b/src/calibre/gui2/store/search/search.ui @@ -107,7 +107,7 @@ Open a selected book in the system's web browser - Open external + Open in &external browser