From d7908b9f1fc49a5ccde8f12d14693a35a95719f9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 1 Apr 2012 17:26:58 +0530 Subject: [PATCH 01/27] Update Qt to 4.8.1 in windows binary build --- setup/installer/windows/freeze.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/installer/windows/freeze.py b/setup/installer/windows/freeze.py index 3e251d2dcf..c5ea18e2e9 100644 --- a/setup/installer/windows/freeze.py +++ b/setup/installer/windows/freeze.py @@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC from setup.installer.windows.wix import WixMixIn OPENSSL_DIR = r'Q:\openssl' -QT_DIR = 'Q:\\Qt\\4.8.0' +QT_DIR = 'Q:\\Qt\\4.8.1' QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns'] LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll' SW = r'C:\cygwin\home\kovid\sw' From e48a9d1e572734e6228c5f239e48837ea1e97f16 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 1 Apr 2012 21:58:58 +0530 Subject: [PATCH 02/27] Edit metadata dialog: Change the remove unused series button to a clear series button (as the remove unused series function is now automatic) --- src/calibre/gui2/actions/edit_metadata.py | 87 +++++++++++++++++------ 1 file changed, 65 insertions(+), 22 deletions(-) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 527beae0ab..15e47b49ff 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os +import os, shutil from functools import partial from PyQt4.Qt import QMenu, QModelIndex, QTimer @@ -16,6 +16,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.device_category_editor import DeviceCategoryEditor from calibre.gui2.actions import InterfaceAction from calibre.ebooks.metadata import authors_to_string +from calibre.ebooks.metadata.opf2 import OPF from calibre.utils.icu import sort_key from calibre.db.errors import NoSuchFormat @@ -79,14 +80,27 @@ class EditMetadataAction(InterfaceAction): Dispatcher(self.metadata_downloaded), ensure_fields=ensure_fields) + def cleanup_bulk_download(self, tdir, log_file): + try: + shutil.rmtree(tdir, ignore_errors=True) + except: + pass + try: + os.remove(log_file) + except: + pass + def metadata_downloaded(self, job): if job.failed: self.gui.job_exception(job, dialog_title=_('Failed to download metadata')) return from calibre.gui2.metadata.bulk_download import get_job_details - id_map, failed_ids, failed_covers, all_failed, det_msg = \ - get_job_details(job) + (aborted, id_map, tdir, log_file, failed_ids, failed_covers, all_failed, + det_msg, lm_map) = get_job_details(job) + if aborted: + return self.cleanup_bulk_download(tdir, log_file) if all_failed: + self.cleanup_bulk_download(tdir, log_file) return error_dialog(self.gui, _('Download failed'), _('Failed to download metadata or covers for any of the %d' ' book(s).') % len(id_map), det_msg=det_msg, show=True) @@ -103,28 +117,26 @@ class EditMetadataAction(InterfaceAction): msg += '

'+_('Could not download metadata and/or covers for %d of the books. Click' ' "Show details" to see which books.')%num - payload = (id_map, failed_ids, failed_covers) + payload = (id_map, tdir, log_file, lm_map) from calibre.gui2.dialogs.message_box import ProceedNotification p = ProceedNotification(self.apply_downloaded_metadata, - payload, job.html_details, + payload, open(log_file, 'rb').read().decode('utf-8'), _('Download log'), _('Download complete'), msg, det_msg=det_msg, show_copy_button=show_copy_button, parent=self.gui) p.show() def apply_downloaded_metadata(self, payload): - id_map, failed_ids, failed_covers = payload - id_map = dict([(k, v) for k, v in id_map.iteritems() if k not in - failed_ids]) - if not id_map: + good_ids, tdir, log_file, lm_map = payload + if not good_ids: return modified = set() db = self.gui.current_db - for i, mi in id_map.iteritems(): + for i in good_ids: lm = db.metadata_last_modified(i, index_is_id=True) - if lm > mi.last_modified: + if lm > lm_map[i]: title = db.title(i, index_is_id=True) authors = db.authors(i, index_is_id=True) if authors: @@ -144,7 +156,18 @@ class EditMetadataAction(InterfaceAction): 'Do you want to proceed?'), det_msg='\n'.join(modified)): return - self.apply_metadata_changes(id_map) + id_map = {} + for bid in good_ids: + opf = os.path.join(tdir, '%d.mi') + if not os.path.exists(opf): + opf = None + cov = os.path.join(tdir, '%d.cover'%bid) + if not os.path.exists(cov): + cov = None + id_map[bid] = (opf, cov) + + self.apply_metadata_changes(id_map, callback=lambda x: + self.cleanup_bulk_download(tdir, log_file)) # }}} @@ -468,6 +491,11 @@ class EditMetadataAction(InterfaceAction): callback can be either None or a function accepting a single argument, in which case it is called after applying is complete with the list of changed ids. + + id_map can also be a mapping of ids to 2-tuple's where each 2-tuple + contains the absolute paths to an OPF and cover file respectively. If + either of the paths is None, then the corresponding metadata is not + updated. ''' if title is None: title = _('Applying changed metadata') @@ -492,28 +520,48 @@ class EditMetadataAction(InterfaceAction): return self.finalize_apply() i, mi = self.apply_id_map[self.apply_current_idx] + if isinstance(mi, tuple): + opf, cover = mi + if opf: + mi = OPF(open(opf, 'rb'), basedir=os.path.dirname(opf), + populate_spine=False).to_book_metadata() + self.apply_mi(i, mi) + if cover: + self.gui.current_db.set_cover(i, open(cover, 'rb'), + notify=False, commit=False) + else: + self.apply_mi(i, mi) + + self.apply_current_idx += 1 + if self.apply_pd is not None: + self.apply_pd.value += 1 + QTimer.singleShot(50, self.do_one_apply) + + + def apply_mi(self, book_id, mi): db = self.gui.current_db + try: set_title = not mi.is_null('title') set_authors = not mi.is_null('authors') - idents = db.get_identifiers(i, index_is_id=True) + idents = db.get_identifiers(book_id, index_is_id=True) if mi.identifiers: idents.update(mi.identifiers) mi.identifiers = idents if mi.is_null('series'): mi.series_index = None if self._am_merge_tags: - old_tags = db.tags(i, index_is_id=True) + old_tags = db.tags(book_id, index_is_id=True) if old_tags: tags = [x.strip() for x in old_tags.split(',')] + ( mi.tags if mi.tags else []) mi.tags = list(set(tags)) - db.set_metadata(i, mi, commit=False, set_title=set_title, + db.set_metadata(book_id, mi, commit=False, set_title=set_title, set_authors=set_authors, notify=False) - self.applied_ids.append(i) + self.applied_ids.append(book_id) except: import traceback - self.apply_failures.append((i, traceback.format_exc())) + self.apply_failures.append((book_id, traceback.format_exc())) try: if mi.cover: @@ -521,11 +569,6 @@ class EditMetadataAction(InterfaceAction): except: pass - self.apply_current_idx += 1 - if self.apply_pd is not None: - self.apply_pd.value += 1 - QTimer.singleShot(50, self.do_one_apply) - def finalize_apply(self): db = self.gui.current_db db.commit() From ee108790db88f52b6b73dd935baf97cbcf4c01f7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 08:46:41 +0530 Subject: [PATCH 03/27] Fix Soldier's Magazine --- recipes/soldiers.recipe | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/recipes/soldiers.recipe b/recipes/soldiers.recipe index fb96e5a2ed..a1e9e5ca23 100644 --- a/recipes/soldiers.recipe +++ b/recipes/soldiers.recipe @@ -15,6 +15,8 @@ class Soldiers(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + auto_cleanup = True + auto_cleanup_keep = '//div[@id="mediaWrapper"]' simultaneous_downloads = 1 delay = 4 max_connections = 1 @@ -31,14 +33,14 @@ class Soldiers(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})] + #keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})] - remove_tags = [ - dict(name='div', attrs={'id':['addThis','comment','articleFooter']}) - ,dict(name=['object','link']) - ] + #remove_tags = [ + #dict(name='div', attrs={'id':['addThis','comment','articleFooter']}) + #,dict(name=['object','link']) + #] - feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )] + feeds = [(u'Frontpage', u'http://www.army.mil/rss/2/' )] def get_cover_url(self): From bb443d01f10d4640e96cd62f3bd23177dec34723 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 09:00:25 +0530 Subject: [PATCH 04/27] Updated OReilly Premium and Real Clear --- recipes/oreilly_premium.recipe | 325 +++++++++++++++++++++++---------- recipes/real_clear.recipe | 68 +++++-- 2 files changed, 284 insertions(+), 109 deletions(-) diff --git a/recipes/oreilly_premium.recipe b/recipes/oreilly_premium.recipe index 9dc11059c4..4a9b9e54c3 100644 --- a/recipes/oreilly_premium.recipe +++ b/recipes/oreilly_premium.recipe @@ -1,45 +1,73 @@ -# Talking Points is not grabbing everything. -# The look is right, but only the last one added? -import re +import string, re import time +import traceback +# above for debugging via stack from calibre.web.feeds.recipes import BasicNewsRecipe # Allows the Python soup converter, which makes parsing easier. from calibre.ebooks.BeautifulSoup import BeautifulSoup -# strip ads and graphics -# Current Column lacks a title. -# Talking Points Memo - shorten title - Remove year and Bill's name -# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries. -# Newsletters: Talking Points Memos covered by cat12 +import os, time, traceback, re, urlparse, sys, cStringIO +from collections import defaultdict +from functools import partial +from contextlib import nested, closing + + +from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed +from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending + + +# To Do: strip ads and graphics, Current Column lacks a title. +# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries. +# Newsletters: Talking Points Memos covered by cat12 +# ./ebook-convert --username xxx --password xxx + +# this is derived from BasicNewsRecipe, so it can only overload those. +# Soome of what we need is otherwise in article, so we have more copy to do than otherwise. class OReillyPremium(BasicNewsRecipe): title = u'OReilly Premium' __author__ = 'TMcN' - language = 'en' description = 'Retrieves Premium and News Letter content from BillOReilly.com. Requires a Bill OReilly Premium Membership.' cover_url = 'http://images.billoreilly.com/images/headers/billgray_header.png' + custom_title = 'Bill O\'Reilly Premium - '+ time.strftime('%d %b %Y') + title = 'Bill O\'Reilly Premium' auto_cleanup = True + conversion_options = {'linearize_tables': True} encoding = 'utf8' - needs_subscription = True + language = 'en' no_stylesheets = True - oldest_article = 20 + needs_subscription = True + oldest_article = 31 remove_javascript = True remove_tags = [dict(name='img', attrs={})] # Don't go down recursions = 0 - max_articles_per_feed = 2000 - + max_articles_per_feed = 20 + debugMessages = True - + # Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []], - ["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []], - ["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []], - ["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []], - ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []], + # ["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []], + # ["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []], + # ["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []], + # ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []], ["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []] ] - + + feeds = [ + (u'No Spin', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=7'), + (u'Daily Briefing', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=11'), + (u'Talking Points', u'https://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=12'), + (u'Blog', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=0'), + (u'StratFor', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=5') + ] + # http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=8 is word for the day. + + # Note: Talking Points is broken in the above model; the site changed to more Ajax-y. + # Now using RSS + def get_browser(self): + print("In get_browser") br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp') @@ -48,7 +76,7 @@ class OReillyPremium(BasicNewsRecipe): br['formPasswordField'] = self.password br.submit() return br - + # Returns the best-guess print url. # The second parameter (pageURL) is returned if nothing is found. def extractPrintURL(self, baseURL, pageURL, printString): @@ -62,17 +90,19 @@ class OReillyPremium(BasicNewsRecipe): tag = printText.parent tagURL = baseURL+tag['href'] return tagURL - + def stripBadChars(self, inString) : return inString.replace("\'", "") - + + def parseGeneric(self, baseURL): - # Does a generic parsing of the articles. There are six categories (0-5) + # Does a generic parsing of the articles. There are six categories (0-5) # Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList # NoSpin and TV are generic fullReturn = [] - for i in range(len(self.catList)) : + for i in range(len(self.catList)) : articleList = [] + print("In "+self.catList[i][0]+", index: "+ str(i)) soup = self.index_to_soup(self.catList[i][1]) # Set defaults description = 'None' @@ -80,15 +110,13 @@ class OReillyPremium(BasicNewsRecipe): # Problem: 0-2 create many in an array # 3-5 create one. # So no for-div for 3-5 - - if i < 3 : + + if i == 0 : + print("Starting TV Archives") for div in soup.findAll(self.catList[i][2], self.catList[i][3]): + print("Next DIV:") print(div) - if i == 1: - a = div.find('a', href=True) - else : - a = div - print(a) + a = div summary = div.find(True, attrs={'class':'summary'}) if summary: description = self.tag_to_string(summary, use_alt=False) @@ -96,82 +124,63 @@ class OReillyPremium(BasicNewsRecipe): continue # url = baseURL+re.sub(r'\?.*', '', a['href']) url = baseURL+a['href'] - if i < 2 : - url = self.extractPrintURL(baseURL, url, "Print this entry") - title = self.tag_to_string(a, use_alt=True).strip() - elif i == 2 : - # Daily Briefs - url = self.extractPrintURL(baseURL, url, "Print this entry") - title = div.contents[0] - if self.debugMessages : - print(title+" @ "+url) + url = self.extractPrintURL(baseURL, url, "Print this entry") + title = self.tag_to_string(a, use_alt=True).strip() articleList.append(dict(title=title, url=url, date=pubdate, description=description, content='')) - elif i == 3 : # Stratfor - a = soup.find('a', self.catList[i][3]) - if a is None : - continue - url = baseURL+a['href'] - title = self.tag_to_string(a, use_alt=True).strip() - # Get Stratfor contents so we can get the real title. - stratSoup = self.index_to_soup(url) - title = stratSoup.html.head.title.string - stratIndex = title.find('Stratfor.com:', 0) - if (stratIndex > -1) : - title = title[stratIndex+14:-1] - # Look for first blogBody 2K, it is used as the article. + + # calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles. # returns a list of tuple ('feed title', list of articles) # { @@ -182,16 +191,148 @@ class OReillyPremium(BasicNewsRecipe): # 'content' : The full article (can be an empty string). This is used by FullContentProfile # } # this is used instead of BasicNewsRecipe.parse_feeds(). + # it is called by download def parse_index(self): # Parse the page into Python Soup + print("Entering recipe print_index from:") + traceback.print_stack() + print("web") baseURL = "https://www.billoreilly.com" - return self.parseGeneric(baseURL) - + masterList = self.parseGeneric(baseURL) + #print(masterList) + return masterList + def preprocess_html(self, soup): + print("In preprocess_html") refresh = soup.find('meta', {'http-equiv':'refresh'}) if refresh is None: return soup content = refresh.get('content').partition('=')[2] raw = self.browser.open('https://www.billoreilly.com'+content).read() return BeautifulSoup(raw.decode('cp1252', 'replace')) + + def build_index(self): + print("In OReilly build_index()\n\n") + feedsRSS = [] + self.report_progress(0, _('Fetching feeds...')) + #try: + feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article, + max_articles_per_feed=self.max_articles_per_feed, + log=self.log) + self.report_progress(0, _('Got feeds from index page')) + #except NotImplementedError: + # feeds = self.parse_feeds() + # Now add regular feeds. + feedsRSS = self.parse_feeds() + print ("feedsRSS is type "+feedsRSS.__class__.__name__) + + for articles in feedsRSS: + print("articles is type "+articles.__class__.__name__) + print("Title:" + articles.title) + feeds.append(articles) + if not feeds: + raise ValueError('No articles found, aborting') + + #feeds = FeedCollection(feeds) + + self.report_progress(0, _('Trying to download cover...')) + self.download_cover() + self.report_progress(0, _('Generating masthead...')) + self.masthead_path = None + + try: + murl = self.get_masthead_url() + except: + self.log.exception('Failed to get masthead url') + murl = None + + if murl is not None: + # Try downloading the user-supplied masthead_url + # Failure sets self.masthead_path to None + self.download_masthead(murl) + if self.masthead_path is None: + self.log.info("Synthesizing mastheadImage") + self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg') + try: + self.default_masthead_image(self.masthead_path) + except: + self.log.exception('Failed to generate default masthead image') + self.masthead_path = None + + if self.test: + feeds = feeds[:2] + self.has_single_feed = len(feeds) == 1 + + index = os.path.join(self.output_dir, 'index.html') + + html = self.feeds2index(feeds) + with open(index, 'wb') as fi: + fi.write(html) + + self.jobs = [] + + if self.reverse_article_order: + for feed in feeds: + if hasattr(feed, 'reverse'): + feed.reverse() + + self.feed_objects = feeds + for f, feed in enumerate(feeds): + feed_dir = os.path.join(self.output_dir, 'feed_%d'%f) + if not os.path.isdir(feed_dir): + os.makedirs(feed_dir) + + for a, article in enumerate(feed): + if a >= self.max_articles_per_feed: + break + art_dir = os.path.join(feed_dir, 'article_%d'%a) + if not os.path.isdir(art_dir): + os.makedirs(art_dir) + try: + url = self.print_version(article.url) + except NotImplementedError: + url = article.url + except: + self.log.exception('Failed to find print version for: '+article.url) + url = None + if not url: + continue + func, arg = (self.fetch_embedded_article, article) \ + if self.use_embedded_content or (self.use_embedded_content == None and feed.has_embedded_content()) \ + else \ + ((self.fetch_obfuscated_article if self.articles_are_obfuscated \ + else self.fetch_article), url) + req = WorkRequest(func, (arg, art_dir, f, a, len(feed)), + {}, (f, a), self.article_downloaded, + self.error_in_article_download) + req.feed = feed + req.article = article + req.feed_dir = feed_dir + self.jobs.append(req) + + + self.jobs_done = 0 + tp = ThreadPool(self.simultaneous_downloads) + for req in self.jobs: + tp.putRequest(req, block=True, timeout=0) + + + self.report_progress(0, _('Starting download [%d thread(s)]...')%self.simultaneous_downloads) + while True: + try: + tp.poll() + time.sleep(0.1) + except NoResultsPending: + break + for f, feed in enumerate(feeds): + print("Writing feeds for "+feed.title) + html = self.feed2index(f,feeds) + feed_dir = os.path.join(self.output_dir, 'feed_%d'%f) + with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi: + fi.write(html) + self.create_opf(feeds) + self.report_progress(1, _('Feeds downloaded to %s')%index) + + return index + diff --git a/recipes/real_clear.recipe b/recipes/real_clear.recipe index 19add74fcd..2dfe56d207 100644 --- a/recipes/real_clear.recipe +++ b/recipes/real_clear.recipe @@ -1,7 +1,9 @@ # Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug +import string, re import time +from urlparse import urlparse from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import NavigableString +from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString class RealClear(BasicNewsRecipe): title = u'Real Clear' @@ -20,12 +22,13 @@ class RealClear(BasicNewsRecipe): # Don't go down recursions = 0 max_articles_per_feed = 400 - debugMessages = False - - # Numeric parameter is type, controls whether we look for + debugMessages = True + + # Numeric parameter is type, controls whether we look for feedsets = [ - ["Politics", "http://www.realclearpolitics.com/index.xml", 0], - ["Science", "http://www.realclearscience.com/index.xml", 0], + ["Politics", "http://www.realclearpolitics.com/index.xml", 0], + ["Policy", "http://www.realclearpolicy.com/index.xml", 0], + ["Science", "http://www.realclearscience.com/index.xml", 0], ["Tech", "http://www.realcleartechnology.com/index.xml", 0], # The feedburner is essentially the same as the top feed, politics. # ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1], @@ -37,22 +40,37 @@ class RealClear(BasicNewsRecipe): ] # Hints to extractPrintURL. # First column is the URL snippet. Then the string to search for as text, and the attributes to look for above it. Start with attributes and drill down. - printhints = [ + phUrlSnip, phLinkText, phMainSearch, phHrefSearch = range(4) + + printhints = [ ["realclear", "", '' , 'printpage'], ["billoreilly.com", "Print this entry", 'a', ''], ["billoreilly.com", "Print This Article", 'a', ''], - ["politico.com", "Print", 'a', 'share-print'], + ["politico.com", "Print", 'a', 'share-print'], ["nationalreview.com", ">Print<", 'a', ''], ["reason.com", "", 'a', 'printer'] # The following are not supported due to JavaScripting, and would require obfuscated_article to handle - # forbes, + # forbes, # usatoday - just prints with all current crap anyhow - + ] - + # RCP - look for a strange compound. See http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879.html + # The print link isn't obvious, and only the end is needed (the -full append.) SO maybe try that first?s + # http://www.realclearpolitics.com/printpage/?url=http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879-full.html + # Single page articles don't have a _full; e.g. http://www.realclearpolitics.com/articles/2012/01/25/obamas_green_robber_barons_112897.html + # Use the FULL PRINTPAGE URL; it formats it better too! + # + # NYT - try single page... + # Need special code - is it one page or several? Which URL? + # from http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1 + # to http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1&pagewanted=all + # which is at link rel="canonical" and at 0 and len(self.printhints[x][1]) == 0: + if len(self.printhints[x][self.phHrefSearch])>0 and len(self.printhints[x][self.phLinkText]) == 0: + # e.g. RealClear if self.debugMessages == True : - print("search1") + print("Search by href: "+self.printhints[x][self.phHrefSearch]) + printFind = soup.find(href=re.compile(self.printhints[x][self.phHrefSearch])) + elif len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0: + if self.debugMessages == True : + print("Search 1: "+self.printhints[x][2]+" Attributes: ") + print(self.printhints[x][3]) printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3]) elif len(self.printhints[x][3])>0 : if self.debugMessages == True : print("search2") printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1]) else : + if self.debugMessages == True: + print("Default Search: "+self.printhints[x][2]+" Text: "+self.printhints[x][1]) printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1]) if printFind is None: if self.debugMessages == True : print("Not Found") + # print(soup) + print("end soup\n\n"); continue + print(printFind) if isinstance(printFind, NavigableString)==False: if printFind['href'] is not None: + print("Check "+printFind['href']+" for base of "+baseURL) + if printFind['href'].find("http")!=0 : + return baseURL+printFind['href'] return printFind['href'] tag = printFind.parent print(tag) @@ -98,7 +130,7 @@ class RealClear(BasicNewsRecipe): print("In get_browser") br = BasicNewsRecipe.get_browser() return br - + def parseRSS(self, index) : if self.debugMessages == True : print("\n\nStarting "+self.feedsets[index][0]) @@ -128,7 +160,7 @@ class RealClear(BasicNewsRecipe): pubDateEl = div.find("pubDate") if pubDateEl is None : pubDateEl = div.find("pubdate") - if pubDateEl is None : + if pubDateEl is None : pubDate = time.strftime('%a, %d %b') else : pubDate = pubDateEl.contents[0] @@ -144,7 +176,7 @@ class RealClear(BasicNewsRecipe): pubdate = time.strftime('%a, %d %b') articleList.append(dict(title=title, url=url, date=pubdate, description=description, content='')) return articleList - + # calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles. # returns a list of tuple ('feed title', list of articles) # { @@ -157,7 +189,8 @@ class RealClear(BasicNewsRecipe): # this is used instead of BasicNewsRecipe.parse_feeds(). def parse_index(self): # Parse the page into Python Soup - + + articleList = [] ans = [] feedsCount = len(self.feedsets) for x in range(0,feedsCount): # should be ,4 @@ -167,4 +200,5 @@ class RealClear(BasicNewsRecipe): if self.debugMessages == True : print(ans) return ans + From edd99987fad9c6cfd2e374bafd45a2ad72458969 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 09:43:12 +0530 Subject: [PATCH 05/27] ... --- src/calibre/web/feeds/news.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 77428e4c07..75e9d03d6e 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -648,7 +648,10 @@ class BasicNewsRecipe(Recipe): 'url' : URL of print version, 'date' : The publication date of the article as a string, 'description' : A summary of the article - 'content' : The full article (can be an empty string). This is used by FullContentProfile + 'content' : The full article (can be an empty string). Obsolete + do not use, instead save the content to a temporary + file and pass a file:///path/to/temp/file.html as + the URL. } For an example, see the recipe for downloading `The Atlantic`. From 310c5c17d205edfc80c0a36b4b979c404c80c94a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 10:24:26 +0530 Subject: [PATCH 06/27] Edit metadata dialog: Change the remove unused series button to a clear series button (as the remove unused series function is now automatic) --- src/calibre/gui2/metadata/single.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py index 840753c706..23728b5901 100644 --- a/src/calibre/gui2/metadata/single.py +++ b/src/calibre/gui2/metadata/single.py @@ -161,10 +161,10 @@ class MetadataSingleDialogBase(ResizableDialog): self.manage_authors_button.clicked.connect(self.authors.manage_authors) self.series = SeriesEdit(self) - self.remove_unused_series_button = QToolButton(self) - self.remove_unused_series_button.setToolTip( - _('Remove unused series (Series that have no books)') ) - self.remove_unused_series_button.clicked.connect(self.remove_unused_series) + self.clear_series_button = QToolButton(self) + self.clear_series_button.setToolTip( + _('Clear series') ) + self.clear_series_button.clicked.connect(self.series.clear) self.series_index = SeriesIndexEdit(self, self.series) self.basic_metadata_widgets.extend([self.series, self.series_index]) @@ -198,6 +198,7 @@ class MetadataSingleDialogBase(ResizableDialog): self.basic_metadata_widgets.append(self.identifiers) self.clear_identifiers_button = QToolButton(self) self.clear_identifiers_button.setIcon(QIcon(I('trash.png'))) + self.clear_identifiers_button.setToolTip(_('Clear Ids')) self.clear_identifiers_button.clicked.connect(self.identifiers.clear) self.paste_isbn_button = QToolButton(self) self.paste_isbn_button.setToolTip('

' + @@ -303,17 +304,6 @@ class MetadataSingleDialogBase(ResizableDialog): self.title_sort.auto_generate() self.author_sort.auto_generate() - def remove_unused_series(self, *args): - self.db.remove_unused_series() - idx = self.series.current_val - self.series.clear() - self.series.initialize(self.db, self.book_id) - if idx: - for i in range(self.series.count()): - if unicode(self.series.itemText(i)) == idx: - self.series.setCurrentIndex(i) - break - def tags_editor(self, *args): self.tags.edit(self.db, self.book_id) @@ -591,7 +581,7 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{ sto(self.title_sort, self.authors) create_row(1, self.authors, self.deduce_author_sort_button, self.author_sort) sto(self.author_sort, self.series) - create_row(2, self.series, self.remove_unused_series_button, + create_row(2, self.series, self.clear_series_button, self.series_index, icon='trash.png') sto(self.series_index, self.swap_title_author_button) sto(self.swap_title_author_button, self.manage_authors_button) @@ -756,7 +746,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{ span=2, icon='auto_author_sort.png') create_row(3, self.author_sort, self.series) create_row(4, self.series, self.series_index, - button=self.remove_unused_series_button, icon='trash.png') + button=self.clear_series_button, icon='trash.png') create_row(5, self.series_index, self.tags) create_row(6, self.tags, self.rating, button=self.tags_editor_button) create_row(7, self.rating, self.pubdate) @@ -892,7 +882,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{ span=2, icon='auto_author_sort.png') create_row(3, self.author_sort, self.series) create_row(4, self.series, self.series_index, - button=self.remove_unused_series_button, icon='trash.png') + button=self.clear_series_button, icon='trash.png') create_row(5, self.series_index, self.tags) create_row(6, self.tags, self.rating, button=self.tags_editor_button) create_row(7, self.rating, self.pubdate) From 927b7471b7c262833bc8aaba467aab56f201e1e1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 10:31:40 +0530 Subject: [PATCH 07/27] Run bulk metadata downloads in a separate process to workaround the problem of third party metadata download plugins with memory leaks. Also removes the need to batch metadata downloads into groups of 100 books at a time. --- src/calibre/ebooks/metadata/sources/worker.py | 95 ++++++++++ src/calibre/gui2/metadata/bulk_download.py | 171 +++++++++--------- 2 files changed, 178 insertions(+), 88 deletions(-) create mode 100644 src/calibre/ebooks/metadata/sources/worker.py diff --git a/src/calibre/ebooks/metadata/sources/worker.py b/src/calibre/ebooks/metadata/sources/worker.py new file mode 100644 index 0000000000..91ca31d2b8 --- /dev/null +++ b/src/calibre/ebooks/metadata/sources/worker.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os +from threading import Event +from io import BytesIO + +from calibre.utils.date import as_utc +from calibre.ebooks.metadata.sources.identify import identify, msprefs +from calibre.ebooks.metadata.book.base import Metadata +from calibre.customize.ui import metadata_plugins +from calibre.ebooks.metadata.sources.covers import download_cover +from calibre.utils.logging import GUILog +from calibre.ebooks.metadata.opf2 import metadata_to_opf, OPF + +def merge_result(oldmi, newmi, ensure_fields=None): + dummy = Metadata(_('Unknown')) + for f in msprefs['ignore_fields']: + if ':' in f or (ensure_fields and f in ensure_fields): + continue + setattr(newmi, f, getattr(dummy, f)) + fields = set() + for plugin in metadata_plugins(['identify']): + fields |= plugin.touched_fields + + def is_equal(x, y): + if hasattr(x, 'tzinfo'): + x = as_utc(x) + if hasattr(y, 'tzinfo'): + y = as_utc(y) + return x == y + + for f in fields: + # Optimize so that set_metadata does not have to do extra work later + if not f.startswith('identifier:'): + if (not newmi.is_null(f) and is_equal(getattr(newmi, f), + getattr(oldmi, f))): + setattr(newmi, f, getattr(dummy, f)) + + return newmi + +def main(do_identify, covers, metadata, ensure_fields): + failed_ids = set() + failed_covers = set() + all_failed = True + log = GUILog() + + for book_id, mi in metadata.iteritems(): + mi = OPF(BytesIO(mi), basedir=os.getcwdu(), + populate_spine=False).to_book_metadata() + title, authors, identifiers = mi.title, mi.authors, mi.identifiers + cdata = None + log.clear() + + if do_identify: + results = [] + try: + results = identify(log, Event(), title=title, authors=authors, + identifiers=identifiers) + except: + pass + if results: + all_failed = False + mi = merge_result(mi, results[0], ensure_fields=ensure_fields) + identifiers = mi.identifiers + if not mi.is_null('rating'): + # set_metadata expects a rating out of 10 + mi.rating *= 2 + with open('%d.mi'%book_id, 'wb') as f: + f.write(metadata_to_opf(mi, default_lang='und')) + else: + log.error('Failed to download metadata for', title) + failed_ids.add(book_id) + + if covers: + cdata = download_cover(log, title=title, authors=authors, + identifiers=identifiers) + if cdata is None: + failed_covers.add(book_id) + else: + with open('%d.cover'%book_id, 'wb') as f: + f.write(cdata[-1]) + all_failed = False + + with open('%d.log'%book_id, 'wb') as f: + f.write(log.html.encode('utf-8')) + + return failed_ids, failed_covers, all_failed + diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py index 976dfad2bb..24d081dc77 100644 --- a/src/calibre/gui2/metadata/bulk_download.py +++ b/src/calibre/gui2/metadata/bulk_download.py @@ -7,20 +7,17 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import os, time, shutil from functools import partial -from itertools import izip -from threading import Event from PyQt4.Qt import (QIcon, QDialog, QDialogButtonBox, QLabel, QGridLayout, QPixmap, Qt) from calibre.gui2.threaded_jobs import ThreadedJob -from calibre.ebooks.metadata.sources.identify import identify, msprefs -from calibre.ebooks.metadata.sources.covers import download_cover -from calibre.ebooks.metadata.book.base import Metadata -from calibre.customize.ui import metadata_plugins -from calibre.ptempfile import PersistentTemporaryFile -from calibre.utils.date import as_utc +from calibre.ebooks.metadata.opf2 import metadata_to_opf +from calibre.utils.ipc.simple_worker import fork_job, WorkerError +from calibre.ptempfile import (PersistentTemporaryDirectory, + PersistentTemporaryFile) # Start download {{{ def show_config(gui, parent): @@ -105,18 +102,18 @@ def start_download(gui, ids, callback, ensure_fields=None): if ret != d.Accepted: return - for batch in split_jobs(ids): - job = ThreadedJob('metadata bulk download', - _('Download metadata for %d books')%len(batch), - download, (batch, gui.current_db, d.identify, d.covers, - ensure_fields), {}, callback) - gui.job_manager.run_threaded_job(job) + job = ThreadedJob('metadata bulk download', + _('Download metadata for %d books')%len(ids), + download, (ids, gui.current_db, d.identify, d.covers, + ensure_fields), {}, callback) + gui.job_manager.run_threaded_job(job) gui.status_bar.show_message(_('Metadata download started'), 3000) # }}} def get_job_details(job): - id_map, failed_ids, failed_covers, title_map, all_failed = job.result + (aborted, good_ids, tdir, log_file, failed_ids, failed_covers, title_map, + lm_map, all_failed) = job.result det_msg = [] for i in failed_ids | failed_covers: title = title_map[i] @@ -126,92 +123,90 @@ def get_job_details(job): title += (' ' + _('(Failed cover)')) det_msg.append(title) det_msg = '\n'.join(det_msg) - return id_map, failed_ids, failed_covers, all_failed, det_msg + return (aborted, good_ids, tdir, log_file, failed_ids, failed_covers, + all_failed, det_msg, lm_map) -def merge_result(oldmi, newmi, ensure_fields=None): - dummy = Metadata(_('Unknown')) - for f in msprefs['ignore_fields']: - if ':' in f or (ensure_fields and f in ensure_fields): - continue - setattr(newmi, f, getattr(dummy, f)) - fields = set() - for plugin in metadata_plugins(['identify']): - fields |= plugin.touched_fields +class HeartBeat(object): + CHECK_INTERVAL = 300 # seconds + ''' Check that the file count in tdir changes every five minutes ''' - def is_equal(x, y): - if hasattr(x, 'tzinfo'): - x = as_utc(x) - if hasattr(y, 'tzinfo'): - y = as_utc(y) - return x == y + def __init__(self, tdir): + self.tdir = tdir + self.last_count = len(os.listdir(self.tdir)) + self.last_time = time.time() - for f in fields: - # Optimize so that set_metadata does not have to do extra work later - if not f.startswith('identifier:'): - if (not newmi.is_null(f) and is_equal(getattr(newmi, f), - getattr(oldmi, f))): - setattr(newmi, f, getattr(dummy, f)) + def __call__(self): + if time.time() - self.last_time > self.CHECK_INTERVAL: + c = len(os.listdir(self.tdir)) + if c == self.last_count: + return False + self.last_count = c + self.last_time = time.time() + return True - newmi.last_modified = oldmi.last_modified +# Fix log viewer, get_job_details, database update code +# Test: abort, covers only, metadata only, both, 200 entry download, memory +# consumption, all errors and on and on - return newmi - -def download(ids, db, do_identify, covers, ensure_fields, +def download(all_ids, db, do_identify, covers, ensure_fields, log=None, abort=None, notifications=None): - ids = list(ids) - metadata = [db.get_metadata(i, index_is_id=True, get_user_categories=False) - for i in ids] + batch_size = 10 + batches = split_jobs(all_ids, batch_size=batch_size) + tdir = PersistentTemporaryDirectory('_metadata_bulk_') + tf = PersistentTemporaryFile('_metadata_bulk_log_') + tf.close() + tf = tf.name + heartbeat = HeartBeat(tdir) + failed_ids = set() failed_covers = set() title_map = {} - ans = {} - count = 0 + lm_map = {} + ans = set() all_failed = True - ''' - # Test apply dialog - all_failed = do_identify = covers = False - ''' - for i, mi in izip(ids, metadata): + aborted = False + count = 0 + + for ids in batches: if abort.is_set(): log.error('Aborting...') break - title, authors, identifiers = mi.title, mi.authors, mi.identifiers - title_map[i] = title - if do_identify: - results = [] - try: - results = identify(log, Event(), title=title, authors=authors, - identifiers=identifiers) - except: - pass - if results: - all_failed = False - mi = merge_result(mi, results[0], ensure_fields=ensure_fields) - identifiers = mi.identifiers - if not mi.is_null('rating'): - # set_metadata expects a rating out of 10 - mi.rating *= 2 - else: - log.error('Failed to download metadata for', title) - failed_ids.add(i) - # We don't want set_metadata operating on anything but covers - mi = merge_result(mi, mi, ensure_fields=ensure_fields) - if covers: - cdata = download_cover(log, title=title, authors=authors, - identifiers=identifiers) - if cdata is not None: - with PersistentTemporaryFile('.jpg', 'downloaded-cover-') as f: - f.write(cdata[-1]) - mi.cover = f.name - all_failed = False - else: - failed_covers.add(i) - ans[i] = mi - count += 1 + metadata = {i:db.get_metadata(i, index_is_id=True, + get_user_categories=False) for i in ids} + for i in ids: + title_map[i] = metadata[i].title + lm_map[i] = metadata[i].last_modified + metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in + metadata.iteritems()} + try: + ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main', + (do_identify, covers, metadata, ensure_fields), + cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True) + except WorkerError as e: + if e.orig_tb: + raise Exception('Failed to download metadata. Original ' + 'traceback: \n\n'+e.orig_tb) + raise + count += batch_size notifications.put((count/len(ids), - _('Downloaded %(num)d of %(tot)d')%dict(num=count, tot=len(ids)))) + _('Downloaded %(num)d of %(tot)d')%dict( + num=count, tot=len(all_ids)))) + + fids, fcovs, allf = ret['result'] + if not allf: + all_failed = False + failed_ids = failed_ids.union(fids) + failed_covers = failed_covers.union(fcovs) + ans = ans.union(set(ids) - fids) + for book_id in ids: + lp = os.path.join(tdir, '%d.log'%book_id) + if os.path.exists(lp): + with open(lp, 'rb') as f, open(tf, 'ab') as d: + shutil.copyfileobj(f, d) + + if abort.is_set(): + aborted = True log('Download complete, with %d failures'%len(failed_ids)) - return (ans, failed_ids, failed_covers, title_map, all_failed) - - + return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map, + lm_map, all_failed) From bf06cebc2dc95a1f44f157356795d3811d76a0b6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 10:39:59 +0530 Subject: [PATCH 08/27] ... --- src/calibre/gui2/actions/edit_metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 15e47b49ff..4e6a4723f7 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -158,7 +158,7 @@ class EditMetadataAction(InterfaceAction): id_map = {} for bid in good_ids: - opf = os.path.join(tdir, '%d.mi') + opf = os.path.join(tdir, '%d.mi'%bid) if not os.path.exists(opf): opf = None cov = os.path.join(tdir, '%d.cover'%bid) From d24e8e842e39def140f0e5f770e58444f86b8741 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 15:25:46 +0530 Subject: [PATCH 09/27] Fix logging for the new bulk metadata download path --- src/calibre/ebooks/metadata/sources/worker.py | 2 +- src/calibre/gui2/actions/edit_metadata.py | 16 +++---- src/calibre/gui2/dialogs/message_box.py | 11 ++++- src/calibre/gui2/jobs.py | 3 +- src/calibre/gui2/metadata/bulk_download.py | 42 +++++++++++++++---- 5 files changed, 51 insertions(+), 23 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/worker.py b/src/calibre/ebooks/metadata/sources/worker.py index 91ca31d2b8..f2db60e01f 100644 --- a/src/calibre/ebooks/metadata/sources/worker.py +++ b/src/calibre/ebooks/metadata/sources/worker.py @@ -89,7 +89,7 @@ def main(do_identify, covers, metadata, ensure_fields): all_failed = False with open('%d.log'%book_id, 'wb') as f: - f.write(log.html.encode('utf-8')) + f.write(log.plain_text.encode('utf-8')) return failed_ids, failed_covers, all_failed diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 4e6a4723f7..4a0d12e3d3 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -80,15 +80,11 @@ class EditMetadataAction(InterfaceAction): Dispatcher(self.metadata_downloaded), ensure_fields=ensure_fields) - def cleanup_bulk_download(self, tdir, log_file): + def cleanup_bulk_download(self, tdir): try: shutil.rmtree(tdir, ignore_errors=True) except: pass - try: - os.remove(log_file) - except: - pass def metadata_downloaded(self, job): if job.failed: @@ -98,9 +94,9 @@ class EditMetadataAction(InterfaceAction): (aborted, id_map, tdir, log_file, failed_ids, failed_covers, all_failed, det_msg, lm_map) = get_job_details(job) if aborted: - return self.cleanup_bulk_download(tdir, log_file) + return self.cleanup_bulk_download(tdir) if all_failed: - self.cleanup_bulk_download(tdir, log_file) + self.cleanup_bulk_download(tdir) return error_dialog(self.gui, _('Download failed'), _('Failed to download metadata or covers for any of the %d' ' book(s).') % len(id_map), det_msg=det_msg, show=True) @@ -120,10 +116,10 @@ class EditMetadataAction(InterfaceAction): payload = (id_map, tdir, log_file, lm_map) from calibre.gui2.dialogs.message_box import ProceedNotification p = ProceedNotification(self.apply_downloaded_metadata, - payload, open(log_file, 'rb').read().decode('utf-8'), + payload, log_file, _('Download log'), _('Download complete'), msg, det_msg=det_msg, show_copy_button=show_copy_button, - parent=self.gui) + parent=self.gui, log_is_file=True) p.show() def apply_downloaded_metadata(self, payload): @@ -167,7 +163,7 @@ class EditMetadataAction(InterfaceAction): id_map[bid] = (opf, cov) self.apply_metadata_changes(id_map, callback=lambda x: - self.cleanup_bulk_download(tdir, log_file)) + self.cleanup_bulk_download(tdir)) # }}} diff --git a/src/calibre/gui2/dialogs/message_box.py b/src/calibre/gui2/dialogs/message_box.py index fb0725651b..64c8bf75ba 100644 --- a/src/calibre/gui2/dialogs/message_box.py +++ b/src/calibre/gui2/dialogs/message_box.py @@ -160,7 +160,7 @@ class ProceedNotification(MessageBox): # {{{ def __init__(self, callback, payload, html_log, log_viewer_title, title, msg, det_msg='', show_copy_button=False, parent=None, - cancel_callback=None): + cancel_callback=None, log_is_file=False): ''' A non modal popup that notifies the user that a background task has been completed. @@ -175,12 +175,15 @@ class ProceedNotification(MessageBox): # {{{ :param title: The title for this popup :param msg: The msg to display :param det_msg: Detailed message + :param log_is_file: If True the html_log parameter is interpreted as + the path to a file on disk containing the log encoded with utf-8 ''' MessageBox.__init__(self, MessageBox.QUESTION, title, msg, det_msg=det_msg, show_copy_button=show_copy_button, parent=parent) self.payload = payload self.html_log = html_log + self.log_is_file = log_is_file self.log_viewer_title = log_viewer_title self.vlb = self.bb.addButton(_('View log'), self.bb.ActionRole) @@ -192,7 +195,11 @@ class ProceedNotification(MessageBox): # {{{ _proceed_memory.append(self) def show_log(self): - self.log_viewer = ViewLog(self.log_viewer_title, self.html_log, + log = self.html_log + if self.log_is_file: + with open(log, 'rb') as f: + log = f.read().decode('utf-8') + self.log_viewer = ViewLog(self.log_viewer_title, log, parent=self) def do_proceed(self, result): diff --git a/src/calibre/gui2/jobs.py b/src/calibre/gui2/jobs.py index 8c1b5388d7..c0d61332ab 100644 --- a/src/calibre/gui2/jobs.py +++ b/src/calibre/gui2/jobs.py @@ -402,7 +402,8 @@ class DetailView(QDialog, Ui_Dialog): # {{{ self.setupUi(self) self.setWindowTitle(job.description) self.job = job - self.html_view = hasattr(job, 'html_details') + self.html_view = (hasattr(job, 'html_details') and not getattr(job, + 'ignore_html_details', False)) if self.html_view: self.log.setVisible(False) else: diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py index 24d081dc77..3487ffd8f2 100644 --- a/src/calibre/gui2/metadata/bulk_download.py +++ b/src/calibre/gui2/metadata/bulk_download.py @@ -20,6 +20,28 @@ from calibre.ptempfile import (PersistentTemporaryDirectory, PersistentTemporaryFile) # Start download {{{ + +class Job(ThreadedJob): + + ignore_html_details = True + + def consolidate_log(self): + self.consolidated_log = self.log.plain_text + self.log = None + + def read_consolidated_log(self): + return self.consolidated_log + + @property + def details(self): + if self.consolidated_log is None: + return self.log.plain_text + return self.read_consolidated_log() + + @property + def log_file(self): + return open(self.download_debug_log, 'rb') + def show_config(gui, parent): from calibre.gui2.preferences import show_config_widget show_config_widget('Sharing', 'Metadata download', parent=parent, @@ -101,11 +123,14 @@ def start_download(gui, ids, callback, ensure_fields=None): d.b.clicked.disconnect() if ret != d.Accepted: return + tf = PersistentTemporaryFile('_metadata_bulk_log_') + tf.close() - job = ThreadedJob('metadata bulk download', + job = Job('metadata bulk download', _('Download metadata for %d books')%len(ids), - download, (ids, gui.current_db, d.identify, d.covers, + download, (ids, tf.name, gui.current_db, d.identify, d.covers, ensure_fields), {}, callback) + job.download_debug_log = tf.name gui.job_manager.run_threaded_job(job) gui.status_bar.show_message(_('Metadata download started'), 3000) @@ -144,18 +169,15 @@ class HeartBeat(object): self.last_time = time.time() return True -# Fix log viewer, get_job_details, database update code +# Fix log viewer, ratings # Test: abort, covers only, metadata only, both, 200 entry download, memory # consumption, all errors and on and on -def download(all_ids, db, do_identify, covers, ensure_fields, +def download(all_ids, tf, db, do_identify, covers, ensure_fields, log=None, abort=None, notifications=None): batch_size = 10 batches = split_jobs(all_ids, batch_size=batch_size) tdir = PersistentTemporaryDirectory('_metadata_bulk_') - tf = PersistentTemporaryFile('_metadata_bulk_log_') - tf.close() - tf = tf.name heartbeat = HeartBeat(tdir) failed_ids = set() @@ -201,8 +223,10 @@ def download(all_ids, db, do_identify, covers, ensure_fields, for book_id in ids: lp = os.path.join(tdir, '%d.log'%book_id) if os.path.exists(lp): - with open(lp, 'rb') as f, open(tf, 'ab') as d: - shutil.copyfileobj(f, d) + with open(tf, 'ab') as dest, open(lp, 'rb') as src: + dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] + + '#'*20+'\n').encode('utf-8')) + shutil.copyfileobj(src, dest) if abort.is_set(): aborted = True From 4eaf86efcc5a42bd333d768347c03f7fa434a438 Mon Sep 17 00:00:00 2001 From: GRiker Date: Mon, 2 Apr 2012 04:10:39 -0600 Subject: [PATCH 10/27] Revised connect/share description for 'Connect to iTunes' --- src/calibre/manual/gui.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst index f048d99d1b..d82db2772a 100755 --- a/src/calibre/manual/gui.rst +++ b/src/calibre/manual/gui.rst @@ -73,7 +73,7 @@ Edit metadata |emii| The :guilabel:`Edit metadata` action has four variations which can be accessed by doing a right-click on the button. - 1. **Edit metadata individually**: Allows you to edit the metadata of books one-by-one with the option of fetching metadata, including covers, from the Internet. It also allows you to add or remove particular ebook formats from a book. + 1. **Edit metadata individually**: Allows you to edit the metadata of books one-by-one with the option of fetching metadata, including covers, from the Internet. It also allows you to add or remove particular ebook formats from a book. 2. **Edit metadata in bulk**: Allows you to edit common metadata fields for large numbers of books simulataneously. It operates on all the books you have selected in the :ref:`Library view `. 3. **Download metadata and covers**: Downloads metadata and covers (if available) for the books that are selected in the book list. 4. **Merge book records**: Gives you the capability of merging the metadata and formats of two or more book records. You can choose to either delete or keep the records that were not clicked first. @@ -117,7 +117,7 @@ View |vi| The :guilabel:`View` action displays the book in an ebook viewer program. |app| has a built-in viewer for many ebook formats. For other formats it uses the default operating system application. You can configure which formats should open with the internal viewer via -Preferences->Behavior. If a book has more than one format, you can view a particular format by doing a right-click on the button. +Preferences->Behavior. If a book has more than one format, you can view a particular format by doing a right-click on the button. .. _send_to_device: @@ -175,7 +175,7 @@ Library 5. ****: Actions 5, 6 etc... give you immediate switch access between multiple libraries that you have created or attached to. This list contains only the 5 most frequently used libraries. For the complete list, use the Quick Switch menu. 6. **Library maintenance**: Allows you to check the current library for data consistency issues and restore the current library's database from backups. -.. note:: Metadata about your ebooks, e.g. title, author, and tags, is stored in a single file in your |app| library folder called metadata.db. If this file gets corrupted (a very rare event), you can lose the metadata. Fortunately, |app| automatically backs up the metadata for every individual book in the book's folder as an OPF file. By using the Restore Library action under Library Maintenance described above, you can have |app| rebuild the metadata.db file from the individual OPF files for you. +.. note:: Metadata about your ebooks, e.g. title, author, and tags, is stored in a single file in your |app| library folder called metadata.db. If this file gets corrupted (a very rare event), you can lose the metadata. Fortunately, |app| automatically backs up the metadata for every individual book in the book's folder as an OPF file. By using the Restore Library action under Library Maintenance described above, you can have |app| rebuild the metadata.db file from the individual OPF files for you. You can copy or move books between different libraries (once you have more than one library setup) by right clicking on the book and selecting the action :guilabel:`Copy to library`. @@ -235,7 +235,7 @@ Connect/Share 1. **Connect to folder**: Allows you to connect to any folder on your computer as though it were a device and use all the facilities |app| has for devices with that folder. Useful if your device cannot be supported by |app| but is available as a USB disk. - 2. **Connect to iTunes**: Allows you to connect to your iTunes books database as though it were a device. Once the books are sent to iTunes, you can use iTunes to make them available to your various iDevices. This is useful if you would rather not have |app| send books to your iDevice directly. + 2. **Connect to iTunes**: Allows you to connect to your iTunes books database as though it were a device. Once the books are sent to iTunes, you can use iTunes to make them available to your various iDevices. 3. **Start Content Server**: Starts |app|'s built-in web server. When started, your |app| library will be accessible via a web browser from the Internet (if you choose). You can configure how the web server is accessed by setting preferences at :guilabel:`Preferences->Sharing->Sharing over the net` @@ -338,9 +338,9 @@ Two other kinds of searches are available: equality search and search using `reg Equality searches are indicated by prefixing the search string with an equals sign (=). For example, the query ``tag:"=science"`` will match "science", but not "science fiction" or "hard science". Regular expression searches are indicated by prefixing the search string with a tilde (~). Any `python-compatible regular expression `_ can -be used. Note that backslashes used to escape special characters in reqular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(``. Regular expression searches are 'contains' searches unless the expression contains anchors. +be used. Note that backslashes used to escape special characters in reqular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(``. Regular expression searches are 'contains' searches unless the expression contains anchors. -Should you need to search for a string with a leading equals or tilde, prefix the string with a backslash. +Should you need to search for a string with a leading equals or tilde, prefix the string with a backslash. Enclose search strings with quotes (") if the string contains parenthesis or spaces. For example, to search for the tag ``Science Fiction`` you would need to search for ``tag:"=science fiction"``. If you search for @@ -362,7 +362,7 @@ The syntax for searching for dates is:: If the date is ambiguous, the current locale is used for date comparison. For example, in an mm/dd/yyyy locale 2/1/2009 is interpreted as 1 Feb 2009. In a dd/mm/yyyy locale it is interpreted as 2 Jan 2009. Some special date strings are available. The string ``today`` translates to today's date, whatever it is. The -strings ``yesterday`` and ``thismonth`` (or the translated equivalent in the current language) also work. +strings ``yesterday`` and ``thismonth`` (or the translated equivalent in the current language) also work. In addition, the string ``daysago`` (also translated) can be used to compare to a date some number of days ago. For example:: From ee808594ca3df4dbbe0eb134da37eec6ad424a38 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 16:25:58 +0530 Subject: [PATCH 11/27] EPUB metadata: Strip pointless urn:isbn: prefix from ISBN declaration when reading metadata --- src/calibre/ebooks/metadata/opf2.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index c30545e6e1..d21727f3c3 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -883,6 +883,8 @@ class OPF(object): # {{{ val = etree.tostring(x, with_tail=False, encoding=unicode, method='text').strip() if val and typ not in ('calibre', 'uuid'): + if typ == 'isbn' and val.lower().startswith('urn:isbn:'): + val = val[len('urn:isbn:'):] identifiers[typ] = val found_scheme = True break From d5ae96dc4c590b0a91bae47fe15a4e73b940f61c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 16:30:11 +0530 Subject: [PATCH 12/27] Fix non integer rating not read from OPF files --- src/calibre/ebooks/metadata/opf2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index d21727f3c3..92aa960be6 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -535,7 +535,7 @@ class OPF(object): # {{{ series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1) title_sort = TitleSortField('title_sort', is_dc=False) - rating = MetadataField('rating', is_dc=False, formatter=int) + rating = MetadataField('rating', is_dc=False, formatter=float) pubdate = MetadataField('date', formatter=parse_date, renderer=isoformat) publication_type = MetadataField('publication_type', is_dc=False) From 09122265fde6b4b276a92c3f06deb9521e26c2ac Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 17:31:45 +0530 Subject: [PATCH 13/27] ... --- src/calibre/gui2/actions/edit_metadata.py | 12 +++++++----- src/calibre/gui2/metadata/bulk_download.py | 4 ---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 4a0d12e3d3..3eb9a79122 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -96,10 +96,11 @@ class EditMetadataAction(InterfaceAction): if aborted: return self.cleanup_bulk_download(tdir) if all_failed: + num = len(failed_ids | failed_covers) self.cleanup_bulk_download(tdir) return error_dialog(self.gui, _('Download failed'), _('Failed to download metadata or covers for any of the %d' - ' book(s).') % len(id_map), det_msg=det_msg, show=True) + ' book(s).') % num, det_msg=det_msg, show=True) self.gui.status_bar.show_message(_('Metadata download completed'), 3000) @@ -498,7 +499,7 @@ class EditMetadataAction(InterfaceAction): self.apply_id_map = list(id_map.iteritems()) self.apply_current_idx = 0 self.apply_failures = [] - self.applied_ids = [] + self.applied_ids = set() self.apply_pd = None self.apply_callback = callback if len(self.apply_id_map) > 1: @@ -525,6 +526,7 @@ class EditMetadataAction(InterfaceAction): if cover: self.gui.current_db.set_cover(i, open(cover, 'rb'), notify=False, commit=False) + self.applied_ids.add(i) else: self.apply_mi(i, mi) @@ -554,7 +556,7 @@ class EditMetadataAction(InterfaceAction): mi.tags = list(set(tags)) db.set_metadata(book_id, mi, commit=False, set_title=set_title, set_authors=set_authors, notify=False) - self.applied_ids.append(book_id) + self.applied_ids.add(book_id) except: import traceback self.apply_failures.append((book_id, traceback.format_exc())) @@ -589,7 +591,7 @@ class EditMetadataAction(InterfaceAction): if self.applied_ids: cr = self.gui.library_view.currentIndex().row() self.gui.library_view.model().refresh_ids( - self.applied_ids, cr) + list(self.applied_ids), cr) if self.gui.cover_flow: self.gui.cover_flow.dataChanged() self.gui.tags_view.recount() @@ -598,7 +600,7 @@ class EditMetadataAction(InterfaceAction): self.apply_pd = None try: if callable(self.apply_callback): - self.apply_callback(self.applied_ids) + self.apply_callback(list(self.applied_ids)) finally: self.apply_callback = None diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py index 3487ffd8f2..0f7097a4e6 100644 --- a/src/calibre/gui2/metadata/bulk_download.py +++ b/src/calibre/gui2/metadata/bulk_download.py @@ -169,10 +169,6 @@ class HeartBeat(object): self.last_time = time.time() return True -# Fix log viewer, ratings -# Test: abort, covers only, metadata only, both, 200 entry download, memory -# consumption, all errors and on and on - def download(all_ids, tf, db, do_identify, covers, ensure_fields, log=None, abort=None, notifications=None): batch_size = 10 From e3873e425440e812c47297723f4eed05a4b76c9a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 17:59:01 +0530 Subject: [PATCH 14/27] ... --- src/calibre/utils/ipc/launch.py | 3 ++- src/calibre/utils/pyconsole/interpreter.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/calibre/utils/ipc/launch.py b/src/calibre/utils/ipc/launch.py index 508e302708..3a6344132d 100644 --- a/src/calibre/utils/ipc/launch.py +++ b/src/calibre/utils/ipc/launch.py @@ -167,7 +167,8 @@ class Worker(object): ''' exe = self.gui_executable if self.gui else self.executable env = self.env - env['ORIGWD'] = cwd or os.path.abspath(os.getcwd()) + env[b'ORIGWD'] = binascii.hexlify(cPickle.dumps(cwd or + os.path.abspath(os.getcwdu()))) _cwd = cwd if priority is None: priority = prefs['worker_process_priority'] diff --git a/src/calibre/utils/pyconsole/interpreter.py b/src/calibre/utils/pyconsole/interpreter.py index 3cd0d94711..1c06f8102e 100644 --- a/src/calibre/utils/pyconsole/interpreter.py +++ b/src/calibre/utils/pyconsole/interpreter.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import sys, cPickle, os +import sys, cPickle, os, binascii from code import InteractiveInterpreter from Queue import Queue, Empty from threading import Thread @@ -130,7 +130,7 @@ class Interpreter(InteractiveInterpreter): # {{{ # }}} def connect(): - os.chdir(os.environ['ORIGWD']) + os.chdir(cPickle.loads(binascii.unhexlify(os.environ['ORIGWD']))) address = cPickle.loads(unhexlify(os.environ['CALIBRE_WORKER_ADDRESS'])) key = unhexlify(os.environ['CALIBRE_WORKER_KEY']) return Client(address, authkey=key) From 945a8e3ae8f40ca2e33d9f09b6637605ec4ac9eb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 18:24:15 +0530 Subject: [PATCH 15/27] More granular progress reporting during bulk metadata download --- src/calibre/gui2/metadata/bulk_download.py | 118 +++++++++++++-------- 1 file changed, 76 insertions(+), 42 deletions(-) diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py index 0f7097a4e6..0f2f5ae9be 100644 --- a/src/calibre/gui2/metadata/bulk_download.py +++ b/src/calibre/gui2/metadata/bulk_download.py @@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en' import os, time, shutil from functools import partial +from threading import Thread from PyQt4.Qt import (QIcon, QDialog, QDialogButtonBox, QLabel, QGridLayout, QPixmap, Qt) @@ -169,6 +170,36 @@ class HeartBeat(object): self.last_time = time.time() return True +class Notifier(Thread): + + def __init__(self, notifications, title_map, tdir, total): + Thread.__init__(self) + self.daemon = True + self.notifications, self.title_map = notifications, title_map + self.tdir, self.total = tdir, total + self.seen = set() + self.keep_going = True + + def run(self): + while self.keep_going: + try: + names = os.listdir(self.tdir) + except: + pass + else: + for x in names: + if x.endswith('.log'): + try: + book_id = int(x.partition('.')[0]) + except: + continue + if book_id not in self.seen and book_id in self.title_map: + self.seen.add(book_id) + self.notifications.put(( + float(len(self.seen))/self.total, + _('Processed %s')%self.title_map[book_id])) + time.sleep(1) + def download(all_ids, tf, db, do_identify, covers, ensure_fields, log=None, abort=None, notifications=None): batch_size = 10 @@ -184,49 +215,52 @@ def download(all_ids, tf, db, do_identify, covers, ensure_fields, all_failed = True aborted = False count = 0 + notifier = Notifier(notifications, title_map, tdir, len(all_ids)) + notifier.start() + + try: + for ids in batches: + if abort.is_set(): + log.error('Aborting...') + break + metadata = {i:db.get_metadata(i, index_is_id=True, + get_user_categories=False) for i in ids} + for i in ids: + title_map[i] = metadata[i].title + lm_map[i] = metadata[i].last_modified + metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in + metadata.iteritems()} + try: + ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main', + (do_identify, covers, metadata, ensure_fields), + cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True) + except WorkerError as e: + if e.orig_tb: + raise Exception('Failed to download metadata. Original ' + 'traceback: \n\n'+e.orig_tb) + raise + count += batch_size + + fids, fcovs, allf = ret['result'] + if not allf: + all_failed = False + failed_ids = failed_ids.union(fids) + failed_covers = failed_covers.union(fcovs) + ans = ans.union(set(ids) - fids) + for book_id in ids: + lp = os.path.join(tdir, '%d.log'%book_id) + if os.path.exists(lp): + with open(tf, 'ab') as dest, open(lp, 'rb') as src: + dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] + + '#'*20+'\n').encode('utf-8')) + shutil.copyfileobj(src, dest) - for ids in batches: if abort.is_set(): - log.error('Aborting...') - break - metadata = {i:db.get_metadata(i, index_is_id=True, - get_user_categories=False) for i in ids} - for i in ids: - title_map[i] = metadata[i].title - lm_map[i] = metadata[i].last_modified - metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in - metadata.iteritems()} - try: - ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main', - (do_identify, covers, metadata, ensure_fields), - cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True) - except WorkerError as e: - if e.orig_tb: - raise Exception('Failed to download metadata. Original ' - 'traceback: \n\n'+e.orig_tb) - raise - count += batch_size - notifications.put((count/len(ids), - _('Downloaded %(num)d of %(tot)d')%dict( - num=count, tot=len(all_ids)))) + aborted = True + log('Download complete, with %d failures'%len(failed_ids)) + return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map, + lm_map, all_failed) + finally: + notifier.keep_going = False - fids, fcovs, allf = ret['result'] - if not allf: - all_failed = False - failed_ids = failed_ids.union(fids) - failed_covers = failed_covers.union(fcovs) - ans = ans.union(set(ids) - fids) - for book_id in ids: - lp = os.path.join(tdir, '%d.log'%book_id) - if os.path.exists(lp): - with open(tf, 'ab') as dest, open(lp, 'rb') as src: - dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] + - '#'*20+'\n').encode('utf-8')) - shutil.copyfileobj(src, dest) - - if abort.is_set(): - aborted = True - log('Download complete, with %d failures'%len(failed_ids)) - return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map, - lm_map, all_failed) From c27bebd6faf9cf91e934f124439ff7389e4dd307 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 18:32:57 +0530 Subject: [PATCH 16/27] ... --- src/calibre/gui2/actions/edit_metadata.py | 1 + src/calibre/gui2/metadata/bulk_download.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 3eb9a79122..a58bae25fd 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -120,6 +120,7 @@ class EditMetadataAction(InterfaceAction): payload, log_file, _('Download log'), _('Download complete'), msg, det_msg=det_msg, show_copy_button=show_copy_button, + cancel_callback=lambda x:self.cleanup_bulk_download(tdir), parent=self.gui, log_is_file=True) p.show() diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py index 0f2f5ae9be..b199468309 100644 --- a/src/calibre/gui2/metadata/bulk_download.py +++ b/src/calibre/gui2/metadata/bulk_download.py @@ -124,7 +124,7 @@ def start_download(gui, ids, callback, ensure_fields=None): d.b.clicked.disconnect() if ret != d.Accepted: return - tf = PersistentTemporaryFile('_metadata_bulk_log_') + tf = PersistentTemporaryFile('_metadata_bulk.log') tf.close() job = Job('metadata bulk download', @@ -204,7 +204,7 @@ def download(all_ids, tf, db, do_identify, covers, ensure_fields, log=None, abort=None, notifications=None): batch_size = 10 batches = split_jobs(all_ids, batch_size=batch_size) - tdir = PersistentTemporaryDirectory('_metadata_bulk_') + tdir = PersistentTemporaryDirectory('_metadata_bulk') heartbeat = HeartBeat(tdir) failed_ids = set() From efc200f0b1cbc8ccac277f4cbe66bd7aa9a5e84f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 18:35:09 +0530 Subject: [PATCH 17/27] Buenos Aires Herald by Darko Miletic. Fixes #971517 (New recipe for Buenos Aires Herald) --- recipes/ba_herald.recipe | 82 ++++++++++++++++++++++++++++++++++++ recipes/icons/ba_herald.png | Bin 0 -> 978 bytes 2 files changed, 82 insertions(+) create mode 100644 recipes/ba_herald.recipe create mode 100644 recipes/icons/ba_herald.png diff --git a/recipes/ba_herald.recipe b/recipes/ba_herald.recipe new file mode 100644 index 0000000000..e966fd5676 --- /dev/null +++ b/recipes/ba_herald.recipe @@ -0,0 +1,82 @@ +__license__ = 'GPL v3' +__copyright__ = '2012, Darko Miletic ' +''' +www.buenosairesherald.com +''' + +import re +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +class BuenosAiresHerald(BasicNewsRecipe): + title = 'Buenos Aires Herald' + __author__ = 'Darko Miletic' + description = 'A world of information in a few words' + publisher = 'Editorial Nefir S.A.' + category = 'news, politics, Argentina' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'en_AR' + remove_empty_feeds = True + publication_type = 'newspaper' + masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg' + INDEX = 'http://www.buenosairesherald.com' + extra_css = """ + body{font-family: Arial,Helvetica,sans-serif } + img{margin-bottom: 0.4em; display:block} + h1{font-family: Georgia,serif} + #fecha{text-align: right; font-size: small} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [dict(name=['meta','link','iframe'])] + keep_only_tags = [dict(attrs={'class':'nota_texto p'})] + + + feeds = [ + (u'Argentina' , u'http://www.buenosairesherald.com/argentina' ) + ,(u'World' , u'http://www.buenosairesherald.com/world' ) + ,(u'Latin America' , u'http://www.buenosairesherald.com/latin-america' ) + ,(u'Entertainment' , u'http://www.buenosairesherald.com/entertainment' ) + ,(u'Sports' , u'http://www.buenosairesherald.com/sports' ) + ] + + def print_version(self, url): + artidraw = url.rpartition('/article/')[2] + artid = artidraw.partition('/')[0] + return 'http://www.buenosairesherald.com/articles/print.aspx?ix=' + artid + + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + for item in soup.findAll('div', attrs={'class':'nota_texto_seccion'}): + description = self.tag_to_string(item.h2) + atag = item.h2.find('a') + if atag and atag.has_key('href'): + url = self.INDEX + atag['href'] + title = description + date = strftime(self.timefmt) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds diff --git a/recipes/icons/ba_herald.png b/recipes/icons/ba_herald.png new file mode 100644 index 0000000000000000000000000000000000000000..2b02a4ae93c16e98fbe93c904fa557ea36a64075 GIT binary patch literal 978 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GXl4m>B|mLR_VdCv%%jkkGBOjHqKZ z?%+1);4<$JHtXdv>k_e?^5@rI4#Q?{LL9gM@-~T_p{^K!f<}~l& zH|gdwZs##-`}hCv&wszTj5~qqI81uE%_d11P2w|YU8F0iuro|9;3D)v*~i za_KjG`}&o~w3kc2iN&au+qCiDzYm6i6Mz5w#$(vXW7zWV-#;#+&ObmazyJRSH2U`+ zezR^q<2GjPMs|Z9PNSB;|Ne7oH*uS^v6!^*8n>|-1Idm*|Na7fHws2WfcOw#@>YHZ zRNd?8;uumfC;5SJk-+AZ?GG*qi<@70^7wHwLsE4eqd?~Q_1vNXPq(viZumIciS@}3 zQAZ}DzHJ4J(ssqg)sgO;D;SvfTP<&F+$%JRp`C-j-&T7PgYx|D&b@ONo5VCor)Y48 zval><^7&(>z{r{`x}jm>%)L`)sjQ#4ck&dw2nXkA?;DX*`XeLbZg6QZFc<_$OqX~# QEf?esPgg&ebxsLQ0LyWs=Kufz literal 0 HcmV?d00001 From 42429dd12bf0c05383f80a02a9e40b357b9d302c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Apr 2012 22:07:42 +0530 Subject: [PATCH 18/27] The Southern Star by watou --- recipes/southernstar.recipe | 136 ++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 recipes/southernstar.recipe diff --git a/recipes/southernstar.recipe b/recipes/southernstar.recipe new file mode 100644 index 0000000000..69a81e2fb6 --- /dev/null +++ b/recipes/southernstar.recipe @@ -0,0 +1,136 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2012, watou' +''' +southernstar.ie +''' +import re +import tempfile +import os +import codecs + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag, NavigableString + +class TheSouthernStar(BasicNewsRecipe): + + title = 'The Southern Star' + __author__ = 'watou' + description = 'West Cork\'s leading news and information provider since 1889' + NEWS_INDEX = 'http://www.southernstar.ie/news.php' + LOCAL_NOTES = 'http://www.southernstar.ie/localnotes.php' + SPORT_INDEX = 'http://www.southernstar.ie/sport.php' + CLASSIFIEDS = 'http://www.southernstar.ie/classifieds.php' + language = 'en_IE' + encoding = 'cp1252' + + publication_type = 'newspaper' + masthead_url = 'http://www.southernstar.ie/images/logo.gif' + remove_tags_before = dict(name='div', attrs={'class':'article'}) + remove_tags_after = dict(name='div', attrs={'class':'article'}) + remove_tags = [dict(name='div', attrs={'style':'width:300px; position:relative'}), + dict(name='form'), + dict(name='div', attrs={'class':'endpanel'})] + no_stylesheets = True + tempfiles = [] + pubdate = '' + + preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] + + def parse_index(self): + feeds = [] + seen_titles = set([]) + + articles = self.fetch_ss_articles(self.NEWS_INDEX, seen_titles) + if articles: + feeds.append(('News', articles)) + + articles = self.fetch_ss_notes(self.LOCAL_NOTES) + if articles: + feeds.append(('Local Notes', articles)) + + articles = self.fetch_ss_articles(self.SPORT_INDEX, seen_titles) + if articles: + feeds.append(('Sport', articles)) + + articles = self.fetch_ss_notes(self.CLASSIFIEDS) + if articles: + feeds.append(('Classifieds', articles)) + + return feeds + + def fetch_ss_articles(self, index, seen_titles): + articles = [] + soup = self.index_to_soup(index) + ts = soup.find('div', {'class':'article'}) + ds = self.tag_to_string(ts.find('strong')) + self.pubdate = ' ['+ds+']' + self.timefmt = ' [%s]'%ds + + for post in ts.findAll('h1'): + a = post.find('a', href=True) + title = self.tag_to_string(a) + if title in seen_titles: + continue + seen_titles.add(title) + url = a['href'] + if url.startswith('article'): + url = 'http://www.southernstar.ie/'+url + self.log('\tFound article:', title, 'at', url) + p = post.findNextSibling('p') + desc = None + if p is not None: + desc = str(p) + articles.append({'title':title, 'url':url, 'description':desc, + 'date':self.pubdate}) + + return articles + + def fetch_ss_notes(self, page): + articles = [] + + soup = self.index_to_soup(page) + ts = soup.find('div', {'class':'content'}) + for post in ts.findAll('h1'): + title = self.tag_to_string(post) + self.log('\tFound note:', title) + f = tempfile.NamedTemporaryFile(suffix='.html',delete=False) + f.close() + f = codecs.open(f.name, 'w+b', self.encoding, 'replace') + url = "file://" + f.name + f.write(u'

'+title+'

') + f.write(str(post.findNextSibling('p'))) + f.write(u'') + self.log('\tWrote note to', f.name) + f.close() + self.tempfiles.append(f) + articles.append({'title':title, 'url':url, 'date':self.pubdate}) + + return articles + + def postprocess_html(self, soup, first): + for table in soup.findAll('table', align='right'): + img = table.find('img') + if img is not None: + img.extract() + caption = self.tag_to_string(table).strip() + div = Tag(soup, 'div') + div['style'] = 'text-align:center' + div.insert(0, img) + div.insert(1, Tag(soup, 'br')) + if caption: + div.insert(2, NavigableString(caption)) + table.replaceWith(div) + + return soup + + def image_url_processor(self, baseurl, url): + return url.replace(' ','%20') + + def cleanup(self): + self.log('cleaning up') + for f in self.tempfiles: + os.unlink(f.name) + self.tempfiles = [] From e278da92199b483cd364388494a0c4cd4635d830 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Apr 2012 12:07:38 +0530 Subject: [PATCH 19/27] ... --- src/calibre/library/server/utils.py | 4 ++-- src/calibre/manual/faq.rst | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/calibre/library/server/utils.py b/src/calibre/library/server/utils.py index 9b8ec98d87..111f535686 100644 --- a/src/calibre/library/server/utils.py +++ b/src/calibre/library/server/utils.py @@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en' import time, sys from urllib import quote as quote_, unquote as unquote_ +from functools import wraps import cherrypy @@ -40,6 +41,7 @@ class Offsets(object): def expose(func): + @wraps(func) def do(*args, **kwargs): self = func.im_self if self.opts.develop: @@ -54,8 +56,6 @@ def expose(func): prints('\tTime:', func.__name__, time.time()-start) return ans - do.__name__ = func.__name__ - return do diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index a6d1467cab..a248962abd 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -381,6 +381,18 @@ that allows you to create collections on your Kindle from the |app| metadata. It .. note:: Amazon have removed the ability to manipulate collections completely in their newer models, like the Kindle Touch and Kindle Fire, making even the above plugin useless. If you really want the ability to manage collections on your Kindle via a USB connection, we encourage you to complain to Amazon about it, or get a reader where this is supported, like the SONY Readers. +I am getting an error when I try to use |app| with my Kobo Touch? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Kobo Touch has very buggy firmware. Connecting to it has been known to fail at random. Certain combinations of motherboard, USB ports/cables/hubs can exacerbate this tendency to fail. If you are getting an error when connecting to your touch with |app| try the following, each of which has solved the problem for *some* |app| users. + + * Connect the Kobo directly to your computer, not via USB Hub + * Try a different USB cable and a different USB port on your computer + * Try a different computer (preferably an older model) + * Try upgrading the firmware on your Kobo Touch to the latest + * Try resetting the Kobo (sometimes this cures the problem for a little while, but then it re-appears, in which case you have to reset again and again) + * Try only putting one or two books onto the Kobo at a time and do not keep large collections on the Kobo + Library Management ------------------ From 103854e2421ee3c3a9b8597349a0d284929395fe Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Apr 2012 15:39:19 +0530 Subject: [PATCH 20/27] Fix bugs in cherrypy auth_digest --- src/cherrypy/lib/auth_digest.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/cherrypy/lib/auth_digest.py b/src/cherrypy/lib/auth_digest.py index 67578e0015..490b431577 100644 --- a/src/cherrypy/lib/auth_digest.py +++ b/src/cherrypy/lib/auth_digest.py @@ -33,7 +33,8 @@ qop_auth = 'auth' qop_auth_int = 'auth-int' valid_qops = (qop_auth, qop_auth_int) -valid_algorithms = ('MD5', 'MD5-sess') +valid_algorithms = ('MD5', 'MD5-sess', 'md5', 'md5-sess') # Changed by Kovid to + # add lowercase def TRACE(msg): @@ -67,7 +68,7 @@ def get_ha1_dict(user_ha1_dict): argument to digest_auth(). """ def get_ha1(realm, username): - return user_ha1_dict.get(user) + return user_ha1_dict.get(username) # Changed by Kovid to fix typo return get_ha1 @@ -107,10 +108,10 @@ def synthesize_nonce(s, key, timestamp=None): key A secret string known only to the server. - + timestamp An integer seconds-since-the-epoch timestamp - + """ if timestamp is None: timestamp = int(time.time()) @@ -190,10 +191,10 @@ class HttpDigestAuthorization (object): s A string related to the resource, such as the hostname of the server. - + key A secret string known only to the server. - + Both s and key must be the same values which were used to synthesize the nonce we are trying to validate. """ @@ -256,7 +257,7 @@ class HttpDigestAuthorization (object): 4.3. This refers to the entity the user agent sent in the request which has the Authorization header. Typically GET requests don't have an entity, and POST requests do. - + """ ha2 = self.HA2(entity_body) # Request-Digest -- RFC 2617 3.2.2.1 @@ -302,16 +303,16 @@ def www_authenticate(realm, key, algorithm='MD5', nonce=None, qop=qop_auth, stal def digest_auth(realm, get_ha1, key, debug=False): """A CherryPy tool which hooks at before_handler to perform HTTP Digest Access Authentication, as specified in :rfc:`2617`. - + If the request has an 'authorization' header with a 'Digest' scheme, this tool authenticates the credentials supplied in that header. If the request has no 'authorization' header, or if it does but the scheme is not "Digest", or if authentication fails, the tool sends a 401 response with a 'WWW-Authenticate' Digest header. - + realm A string containing the authentication realm. - + get_ha1 A callable which looks up a username in a credentials store and returns the HA1 string, which is defined in the RFC to be @@ -320,13 +321,13 @@ def digest_auth(realm, get_ha1, key, debug=False): where username is obtained from the request's 'authorization' header. If username is not found in the credentials store, get_ha1() returns None. - + key A secret string known only to the server, used in the synthesis of nonces. - + """ request = cherrypy.serving.request - + auth_header = request.headers.get('authorization') nonce_is_stale = False if auth_header is not None: @@ -334,10 +335,10 @@ def digest_auth(realm, get_ha1, key, debug=False): auth = HttpDigestAuthorization(auth_header, request.method, debug=debug) except ValueError: raise cherrypy.HTTPError(400, "The Authorization header could not be parsed.") - + if debug: TRACE(str(auth)) - + if auth.validate_nonce(realm, key): ha1 = get_ha1(realm, auth.username) if ha1 is not None: @@ -355,7 +356,7 @@ def digest_auth(realm, get_ha1, key, debug=False): if debug: TRACE("authentication of %s successful" % auth.username) return - + # Respond with 401 status and a WWW-Authenticate header header = www_authenticate(realm, key, stale=nonce_is_stale) if debug: From 1ed30fdcb5265a19a15eeb5d341969629255bd9a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Apr 2012 15:46:22 +0530 Subject: [PATCH 21/27] Content server: Workaround for android stock browser not support HTTP AUTH. --- src/calibre/library/server/base.py | 23 ++++++------ src/calibre/library/server/content.py | 3 +- src/calibre/library/server/utils.py | 50 ++++++++++++++++++++++++++- 3 files changed, 64 insertions(+), 12 deletions(-) diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py index 46db62a299..0b5fead634 100644 --- a/src/calibre/library/server/base.py +++ b/src/calibre/library/server/base.py @@ -15,7 +15,7 @@ from cherrypy.process.plugins import SimplePlugin from calibre.constants import __appname__, __version__ from calibre.utils.date import fromtimestamp from calibre.library.server import listen_on, log_access_file, log_error_file -from calibre.library.server.utils import expose +from calibre.library.server.utils import expose, AuthController from calibre.utils.mdns import publish as publish_zeroconf, \ stop_server as stop_zeroconf, get_external_ip from calibre.library.server.content import ContentServer @@ -31,10 +31,11 @@ from calibre import prints, as_unicode class DispatchController(object): # {{{ - def __init__(self, prefix, wsgi=False): + def __init__(self, prefix, wsgi=False, auth_controller=None): self.dispatcher = cherrypy.dispatch.RoutesDispatcher() self.funcs = [] self.seen = set() + self.auth_controller = auth_controller self.prefix = prefix if prefix else '' if wsgi: self.prefix = '' @@ -44,6 +45,7 @@ class DispatchController(object): # {{{ raise NameError('Route name: '+ repr(name) + ' already used') self.seen.add(name) kwargs['action'] = 'f_%d'%len(self.funcs) + aw = kwargs.pop('android_workaround', False) if route != '/': route = self.prefix + route elif self.prefix: @@ -52,6 +54,8 @@ class DispatchController(object): # {{{ self.dispatcher.connect(name+'prefix_extra_trailing', self.prefix+'/', self, **kwargs) self.dispatcher.connect(name, route, self, **kwargs) + if self.auth_controller is not None: + func = self.auth_controller(func, aw) self.funcs.append(expose(func)) def __getattr__(self, attr): @@ -156,6 +160,8 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, self.config = {} self.is_running = False self.exception = None + auth_controller = None + self.users_dict = {} #self.config['/'] = { # 'tools.sessions.on' : True, # 'tools.sessions.timeout': 60, # Session times out after 60 minutes @@ -171,15 +177,12 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, } if opts.password: - self.config['/'] = { - 'tools.digest_auth.on' : True, - 'tools.digest_auth.realm' : ( - 'Your calibre library. Username: ' - + opts.username.strip()), - 'tools.digest_auth.users' : {opts.username.strip():opts.password.strip()}, - } + self.users_dict[opts.username.strip()] = opts.password.strip() + auth_controller = AuthController('Your calibre library', + self.users_dict) - self.__dispatcher__ = DispatchController(self.opts.url_prefix, wsgi) + self.__dispatcher__ = DispatchController(self.opts.url_prefix, + wsgi=wsgi, auth_controller=auth_controller) for x in self.__class__.__bases__: if hasattr(x, 'add_routes'): x.__init__(self) diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 8ab44d27f3..5b723d078e 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -41,7 +41,8 @@ class ContentServer(object): connect('root', '/', self.index) connect('old', '/old', self.old) connect('get', '/get/{what}/{id}', self.get, - conditions=dict(method=["GET", "HEAD"])) + conditions=dict(method=["GET", "HEAD"]), + android_workaround=True) connect('static', '/static/{name:.*?}', self.static, conditions=dict(method=["GET", "HEAD"])) connect('favicon', '/favicon.png', self.favicon, diff --git a/src/calibre/library/server/utils.py b/src/calibre/library/server/utils.py index 111f535686..1c58e4fa8e 100644 --- a/src/calibre/library/server/utils.py +++ b/src/calibre/library/server/utils.py @@ -5,11 +5,12 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import time, sys +import time, sys, uuid, hashlib from urllib import quote as quote_, unquote as unquote_ from functools import wraps import cherrypy +from cherrypy.lib.auth_digest import digest_auth, get_ha1_dict_plain from calibre import strftime as _strftime, prints, isbytestring from calibre.utils.date import now as nowf @@ -58,6 +59,53 @@ def expose(func): return do +class AuthController(object): + + MAX_AGE = 3600 # Number of seconds after a successful digest auth for which + # the cookie auth will be allowed + + def __init__(self, realm, users_dict): + self.realm = realm + self.users_dict = users_dict + self.secret = bytes(uuid.uuid4().hex) + self.cookie_name = 'android_workaround' + + def hashit(self, raw): + return hashlib.sha1(raw).hexdigest() + + def __call__(self, func, allow_cookie_auth): + + @wraps(func) + def authenticate(*args, **kwargs): + cookie = cherrypy.request.cookie.get(self.cookie_name, None) + if not (allow_cookie_auth and self.is_valid(cookie)): + digest_auth(self.realm, get_ha1_dict_plain(self.users_dict), + self.secret) + + cookie = cherrypy.response.cookie + cookie[self.cookie_name] = self.generate_cookie() + cookie[self.cookie_name]['path'] = '/' + cookie[self.cookie_name]['version'] = '1' + + return func(*args, **kwargs) + + authenticate.im_self = func.im_self + return authenticate + + def generate_cookie(self, timestamp=None): + timestamp = int(time.time()) if timestamp is None else timestamp + key = self.hashit('%d:%s'%(timestamp, self.secret)) + return '%d:%s'%(timestamp, key) + + def is_valid(self, cookie): + try: + timestamp, hashpart = cookie.value.split(':', 1) + timestamp = int(timestamp) + except: + return False + s_timestamp, s_hashpart = self.generate_cookie(timestamp).split(':', 1) + is_valid = s_hashpart == hashpart + return (is_valid and (time.time() - timestamp) < self.MAX_AGE) def strftime(fmt='%Y/%m/%d %H:%M:%S', dt=None): if not hasattr(dt, 'timetuple'): From fcbbd51cd5b65adcb6c8b1f57147a295048d270c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Apr 2012 16:55:58 +0530 Subject: [PATCH 22/27] ... --- src/calibre/library/server/utils.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/calibre/library/server/utils.py b/src/calibre/library/server/utils.py index 1c58e4fa8e..f9ff6f5b17 100644 --- a/src/calibre/library/server/utils.py +++ b/src/calibre/library/server/utils.py @@ -61,6 +61,14 @@ def expose(func): class AuthController(object): + ''' + Implement Digest authentication for the content server. Android browsers + cannot handle HTTP AUTH when downloading files, as the download is handed + off to a separate process. So we use a cookie based authentication scheme + for some endpoints (/get) to allow downloads to work on android. Apparently, + cookies are passed to the download process. + ''' + MAX_AGE = 3600 # Number of seconds after a successful digest auth for which # the cookie auth will be allowed @@ -93,11 +101,21 @@ class AuthController(object): return authenticate def generate_cookie(self, timestamp=None): + ''' + Generate a cookie. The cookie contains a plain text timestamp and a + hashe of the timestamp and the server secret. + ''' timestamp = int(time.time()) if timestamp is None else timestamp key = self.hashit('%d:%s'%(timestamp, self.secret)) return '%d:%s'%(timestamp, key) def is_valid(self, cookie): + ''' + Check that cookie has not been spoofed (i.e. verify the declared + timestamp against the hashed timestamp). If the timestamps match, check + that the cookie has not expired. Return True iff the cookie has not + been spoofed and has not expired. + ''' try: timestamp, hashpart = cookie.value.split(':', 1) timestamp = int(timestamp) From 3a986cfd43a0211b9f25dce16fa3d0472f6f8336 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Apr 2012 17:11:15 +0530 Subject: [PATCH 23/27] ... --- src/calibre/library/server/utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/calibre/library/server/utils.py b/src/calibre/library/server/utils.py index f9ff6f5b17..39b4ffff54 100644 --- a/src/calibre/library/server/utils.py +++ b/src/calibre/library/server/utils.py @@ -66,7 +66,14 @@ class AuthController(object): cannot handle HTTP AUTH when downloading files, as the download is handed off to a separate process. So we use a cookie based authentication scheme for some endpoints (/get) to allow downloads to work on android. Apparently, - cookies are passed to the download process. + cookies are passed to the download process. The cookie expires after + MAX_AGE seconds. + + Note that this makes the server vulnerable to session-hijacking (i.e. some + one can sniff the traffic and create their own requests to /get with the + appropriate cookie, for an hour). The fix is to use https, but since this + is usually run as a private server, that cannot be done. If you care about + this vulnerability, run the server behind a reverse proxy that uses HTTPS. ''' MAX_AGE = 3600 # Number of seconds after a successful digest auth for which From 9bb9ad53f1f7492cc69860064df80095fdf61740 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Apr 2012 17:40:03 +0530 Subject: [PATCH 24/27] E-book viewer: When reading a MOBI file that is actually a KF8 book, show the format as being KF8 --- src/calibre/ebooks/conversion/plugins/mobi_input.py | 2 ++ src/calibre/ebooks/oeb/iterator.py | 4 ++++ src/calibre/gui2/viewer/main.py | 5 +++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/conversion/plugins/mobi_input.py b/src/calibre/ebooks/conversion/plugins/mobi_input.py index 49a57cbde1..0e12dd5db7 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_input.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_input.py @@ -32,6 +32,7 @@ class MOBIInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + self.is_kf8 = False if os.environ.get('USE_MOBIUNPACK', None) is not None: pos = stream.tell() @@ -62,6 +63,7 @@ class MOBIInput(InputFormatPlugin): mr = Mobi8Reader(mr, log) opf = os.path.abspath(mr()) self.encrypted_fonts = mr.encrypted_fonts + self.is_kf8 = True return opf raw = parse_cache.pop('calibre_raw_mobi_markup', False) diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index 3f2f7584c0..03e96f425f 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -217,6 +217,10 @@ class EbookIterator(object): if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) + self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper() + if getattr(plumber.input_plugin, 'is_kf8', False): + self.book_format = 'KF8' + self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None) if self.opf is None: self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index a0ea6ed914..c22603cee5 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -822,7 +822,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer): as_unicode(r), det_msg=worker.traceback, show=True) self.close_progress_indicator() else: - self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:]) + self.metadata.show_opf(self.iterator.opf, + self.iterator.book_format) self.view.current_language = self.iterator.language title = self.iterator.opf.title if not title: @@ -849,7 +850,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.current_book_has_toc = bool(self.iterator.toc) self.current_title = title self.setWindowTitle(self.base_window_title+' - '+title + - ' [%s]'%os.path.splitext(pathtoebook)[1][1:].upper()) + ' [%s]'%self.iterator.book_format) self.pos.setMaximum(sum(self.iterator.pages)) self.pos.setSuffix(' / %d'%sum(self.iterator.pages)) self.vertical_scrollbar.setMinimum(100) From 734a0ba2e29cf4c199d2c5d4cd633cc706fa1ea3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Apr 2012 18:40:17 +0530 Subject: [PATCH 25/27] ... --- src/calibre/gui2/preferences/server.ui | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/calibre/gui2/preferences/server.ui b/src/calibre/gui2/preferences/server.ui index 64af212265..a229da594f 100644 --- a/src/calibre/gui2/preferences/server.ui +++ b/src/calibre/gui2/preferences/server.ui @@ -35,9 +35,7 @@ <p>If you leave the password blank, anyone will be able to access your book collection using the web interface. <br> -<p>Note that passwords do not work with Android devices. -Leave this blank if you intend to use the server with an - Android phone or tablet. +<p>Some devices have browsers that do not support authentication. If you are having trouble downloading files from the content server, try removing the password. @@ -167,17 +165,13 @@ Leave this blank if you intend to use the server with an - <p>Because of a bug in Google's Android, setting a password - will prevent the server from working with Android devices. -<br> -<p>Do not set a password if you plan to use the server with an - Android phone or tablet. + <p>Some devices have browsers that do not support authentication. If you are having trouble downloading files from the content server, trying removing the password. QLabel {color:red} - Password incompatible with Android devices + Password incompatible with some devices From 5955aa775f616f448f14e9779186dbb89bfaf7a6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Apr 2012 18:45:35 +0530 Subject: [PATCH 26/27] ... --- src/calibre/library/server/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/calibre/library/server/utils.py b/src/calibre/library/server/utils.py index 39b4ffff54..db90e42fe7 100644 --- a/src/calibre/library/server/utils.py +++ b/src/calibre/library/server/utils.py @@ -69,6 +69,13 @@ class AuthController(object): cookies are passed to the download process. The cookie expires after MAX_AGE seconds. + The android browser appears to send a GET request to the server and only if + that request succeeds is the download handed off to the download process. + Therefore, even if the user clicks Get after MAX_AGE, it should still work. + In fact, we could reduce MAX_AGE, but we leave it high as the download + process might have downloads queued and therefore not start the download + immediately. + Note that this makes the server vulnerable to session-hijacking (i.e. some one can sniff the traffic and create their own requests to /get with the appropriate cookie, for an hour). The fix is to use https, but since this From 60edd8c7b192193ba084de829893f3b1120fb90f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Apr 2012 11:22:08 +0530 Subject: [PATCH 27/27] ... --- src/calibre/gui2/tools.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index 242cac5d79..36c1a6f30f 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -241,12 +241,6 @@ def fetch_scheduled_recipe(arg): # {{{ if 'output_profile' in ps: recs.append(('output_profile', ps['output_profile'], OptionRecommendation.HIGH)) - # Disabled since apparently some people use - # K4PC and, surprise, surprise, it doesn't support - # indexed MOBIs. - #if ps['output_profile'] == 'kindle': - # recs.append(('no_inline_toc', True, - # OptionRecommendation.HIGH)) lf = load_defaults('look_and_feel') if lf.get('base_font_size', 0.0) != 0.0: