diff --git a/recipes/high_country_news.recipe b/recipes/high_country_news.recipe
index 15db60a957..91602d950b 100644
--- a/recipes/high_country_news.recipe
+++ b/recipes/high_country_news.recipe
@@ -13,7 +13,7 @@ class HighCountryNews(BasicNewsRecipe):
__author__ = 'Armin Geller' # 2012-01-31
publisher = 'High Country News'
timefmt = ' [%a, %d %b %Y]'
- language = 'en-Us'
+ language = 'en'
encoding = 'UTF-8'
publication_type = 'newspaper'
oldest_article = 7
diff --git a/recipes/oreilly_premium.recipe b/recipes/oreilly_premium.recipe
index 9dc11059c4..4a9b9e54c3 100644
--- a/recipes/oreilly_premium.recipe
+++ b/recipes/oreilly_premium.recipe
@@ -1,45 +1,73 @@
-# Talking Points is not grabbing everything.
-# The look is right, but only the last one added?
-import re
+import string, re
import time
+import traceback
+# above for debugging via stack
from calibre.web.feeds.recipes import BasicNewsRecipe
# Allows the Python soup converter, which makes parsing easier.
from calibre.ebooks.BeautifulSoup import BeautifulSoup
-# strip ads and graphics
-# Current Column lacks a title.
-# Talking Points Memo - shorten title - Remove year and Bill's name
-# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
-# Newsletters: Talking Points Memos covered by cat12
+import os, time, traceback, re, urlparse, sys, cStringIO
+from collections import defaultdict
+from functools import partial
+from contextlib import nested, closing
+
+
+from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
+from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
+
+
+# To Do: strip ads and graphics, Current Column lacks a title.
+# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
+# Newsletters: Talking Points Memos covered by cat12
+# ./ebook-convert --username xxx --password xxx
+
+# this is derived from BasicNewsRecipe, so it can only overload those.
+# Soome of what we need is otherwise in article, so we have more copy to do than otherwise.
class OReillyPremium(BasicNewsRecipe):
title = u'OReilly Premium'
__author__ = 'TMcN'
- language = 'en'
description = 'Retrieves Premium and News Letter content from BillOReilly.com. Requires a Bill OReilly Premium Membership.'
cover_url = 'http://images.billoreilly.com/images/headers/billgray_header.png'
+ custom_title = 'Bill O\'Reilly Premium - '+ time.strftime('%d %b %Y')
+ title = 'Bill O\'Reilly Premium'
auto_cleanup = True
+ conversion_options = {'linearize_tables': True}
encoding = 'utf8'
- needs_subscription = True
+ language = 'en'
no_stylesheets = True
- oldest_article = 20
+ needs_subscription = True
+ oldest_article = 31
remove_javascript = True
remove_tags = [dict(name='img', attrs={})]
# Don't go down
recursions = 0
- max_articles_per_feed = 2000
-
+ max_articles_per_feed = 20
+
debugMessages = True
-
+
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []],
- ["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
- ["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
- ["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
- ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
+ # ["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
+ # ["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
+ # ["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
+ # ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []]
]
-
+
+ feeds = [
+ (u'No Spin', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=7'),
+ (u'Daily Briefing', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=11'),
+ (u'Talking Points', u'https://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=12'),
+ (u'Blog', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=0'),
+ (u'StratFor', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=5')
+ ]
+ # http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=8 is word for the day.
+
+ # Note: Talking Points is broken in the above model; the site changed to more Ajax-y.
+ # Now using RSS
+
def get_browser(self):
+ print("In get_browser")
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp')
@@ -48,7 +76,7 @@ class OReillyPremium(BasicNewsRecipe):
br['formPasswordField'] = self.password
br.submit()
return br
-
+
# Returns the best-guess print url.
# The second parameter (pageURL) is returned if nothing is found.
def extractPrintURL(self, baseURL, pageURL, printString):
@@ -62,17 +90,19 @@ class OReillyPremium(BasicNewsRecipe):
tag = printText.parent
tagURL = baseURL+tag['href']
return tagURL
-
+
def stripBadChars(self, inString) :
return inString.replace("\'", "")
-
+
+
def parseGeneric(self, baseURL):
- # Does a generic parsing of the articles. There are six categories (0-5)
+ # Does a generic parsing of the articles. There are six categories (0-5)
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
# NoSpin and TV are generic
fullReturn = []
- for i in range(len(self.catList)) :
+ for i in range(len(self.catList)) :
articleList = []
+ print("In "+self.catList[i][0]+", index: "+ str(i))
soup = self.index_to_soup(self.catList[i][1])
# Set defaults
description = 'None'
@@ -80,15 +110,13 @@ class OReillyPremium(BasicNewsRecipe):
# Problem: 0-2 create many in an array
# 3-5 create one.
# So no for-div for 3-5
-
- if i < 3 :
+
+ if i == 0 :
+ print("Starting TV Archives")
for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
+ print("Next DIV:")
print(div)
- if i == 1:
- a = div.find('a', href=True)
- else :
- a = div
- print(a)
+ a = div
summary = div.find(True, attrs={'class':'summary'})
if summary:
description = self.tag_to_string(summary, use_alt=False)
@@ -96,82 +124,63 @@ class OReillyPremium(BasicNewsRecipe):
continue
# url = baseURL+re.sub(r'\?.*', '', a['href'])
url = baseURL+a['href']
- if i < 2 :
- url = self.extractPrintURL(baseURL, url, "Print this entry")
- title = self.tag_to_string(a, use_alt=True).strip()
- elif i == 2 :
- # Daily Briefs
- url = self.extractPrintURL(baseURL, url, "Print this entry")
- title = div.contents[0]
- if self.debugMessages :
- print(title+" @ "+url)
+ url = self.extractPrintURL(baseURL, url, "Print this entry")
+ title = self.tag_to_string(a, use_alt=True).strip()
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
- elif i == 3 : # Stratfor
- a = soup.find('a', self.catList[i][3])
- if a is None :
- continue
- url = baseURL+a['href']
- title = self.tag_to_string(a, use_alt=True).strip()
- # Get Stratfor contents so we can get the real title.
- stratSoup = self.index_to_soup(url)
- title = stratSoup.html.head.title.string
- stratIndex = title.find('Stratfor.com:', 0)
- if (stratIndex > -1) :
- title = title[stratIndex+14:-1]
- # Look for first blogBody
2K, it is used as the article.
+
+
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
# returns a list of tuple ('feed title', list of articles)
# {
@@ -182,16 +191,148 @@ class OReillyPremium(BasicNewsRecipe):
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
# }
# this is used instead of BasicNewsRecipe.parse_feeds().
+ # it is called by download
def parse_index(self):
# Parse the page into Python Soup
+ print("Entering recipe print_index from:")
+ traceback.print_stack()
+ print("web")
baseURL = "https://www.billoreilly.com"
- return self.parseGeneric(baseURL)
-
+ masterList = self.parseGeneric(baseURL)
+ #print(masterList)
+ return masterList
+
def preprocess_html(self, soup):
+ print("In preprocess_html")
refresh = soup.find('meta', {'http-equiv':'refresh'})
if refresh is None:
return soup
content = refresh.get('content').partition('=')[2]
raw = self.browser.open('https://www.billoreilly.com'+content).read()
return BeautifulSoup(raw.decode('cp1252', 'replace'))
+
+ def build_index(self):
+ print("In OReilly build_index()\n\n")
+ feedsRSS = []
+ self.report_progress(0, _('Fetching feeds...'))
+ #try:
+ feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
+ max_articles_per_feed=self.max_articles_per_feed,
+ log=self.log)
+ self.report_progress(0, _('Got feeds from index page'))
+ #except NotImplementedError:
+ # feeds = self.parse_feeds()
+ # Now add regular feeds.
+ feedsRSS = self.parse_feeds()
+ print ("feedsRSS is type "+feedsRSS.__class__.__name__)
+
+ for articles in feedsRSS:
+ print("articles is type "+articles.__class__.__name__)
+ print("Title:" + articles.title)
+ feeds.append(articles)
+ if not feeds:
+ raise ValueError('No articles found, aborting')
+
+ #feeds = FeedCollection(feeds)
+
+ self.report_progress(0, _('Trying to download cover...'))
+ self.download_cover()
+ self.report_progress(0, _('Generating masthead...'))
+ self.masthead_path = None
+
+ try:
+ murl = self.get_masthead_url()
+ except:
+ self.log.exception('Failed to get masthead url')
+ murl = None
+
+ if murl is not None:
+ # Try downloading the user-supplied masthead_url
+ # Failure sets self.masthead_path to None
+ self.download_masthead(murl)
+ if self.masthead_path is None:
+ self.log.info("Synthesizing mastheadImage")
+ self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg')
+ try:
+ self.default_masthead_image(self.masthead_path)
+ except:
+ self.log.exception('Failed to generate default masthead image')
+ self.masthead_path = None
+
+ if self.test:
+ feeds = feeds[:2]
+ self.has_single_feed = len(feeds) == 1
+
+ index = os.path.join(self.output_dir, 'index.html')
+
+ html = self.feeds2index(feeds)
+ with open(index, 'wb') as fi:
+ fi.write(html)
+
+ self.jobs = []
+
+ if self.reverse_article_order:
+ for feed in feeds:
+ if hasattr(feed, 'reverse'):
+ feed.reverse()
+
+ self.feed_objects = feeds
+ for f, feed in enumerate(feeds):
+ feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
+ if not os.path.isdir(feed_dir):
+ os.makedirs(feed_dir)
+
+ for a, article in enumerate(feed):
+ if a >= self.max_articles_per_feed:
+ break
+ art_dir = os.path.join(feed_dir, 'article_%d'%a)
+ if not os.path.isdir(art_dir):
+ os.makedirs(art_dir)
+ try:
+ url = self.print_version(article.url)
+ except NotImplementedError:
+ url = article.url
+ except:
+ self.log.exception('Failed to find print version for: '+article.url)
+ url = None
+ if not url:
+ continue
+ func, arg = (self.fetch_embedded_article, article) \
+ if self.use_embedded_content or (self.use_embedded_content == None and feed.has_embedded_content()) \
+ else \
+ ((self.fetch_obfuscated_article if self.articles_are_obfuscated \
+ else self.fetch_article), url)
+ req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
+ {}, (f, a), self.article_downloaded,
+ self.error_in_article_download)
+ req.feed = feed
+ req.article = article
+ req.feed_dir = feed_dir
+ self.jobs.append(req)
+
+
+ self.jobs_done = 0
+ tp = ThreadPool(self.simultaneous_downloads)
+ for req in self.jobs:
+ tp.putRequest(req, block=True, timeout=0)
+
+
+ self.report_progress(0, _('Starting download [%d thread(s)]...')%self.simultaneous_downloads)
+ while True:
+ try:
+ tp.poll()
+ time.sleep(0.1)
+ except NoResultsPending:
+ break
+ for f, feed in enumerate(feeds):
+ print("Writing feeds for "+feed.title)
+ html = self.feed2index(f,feeds)
+ feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
+ with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
+ fi.write(html)
+ self.create_opf(feeds)
+ self.report_progress(1, _('Feeds downloaded to %s')%index)
+
+ return index
+
diff --git a/recipes/real_clear.recipe b/recipes/real_clear.recipe
index 19add74fcd..2dfe56d207 100644
--- a/recipes/real_clear.recipe
+++ b/recipes/real_clear.recipe
@@ -1,7 +1,9 @@
# Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
+import string, re
import time
+from urlparse import urlparse
from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import NavigableString
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
class RealClear(BasicNewsRecipe):
title = u'Real Clear'
@@ -20,12 +22,13 @@ class RealClear(BasicNewsRecipe):
# Don't go down
recursions = 0
max_articles_per_feed = 400
- debugMessages = False
-
- # Numeric parameter is type, controls whether we look for
+ debugMessages = True
+
+ # Numeric parameter is type, controls whether we look for
feedsets = [
- ["Politics", "http://www.realclearpolitics.com/index.xml", 0],
- ["Science", "http://www.realclearscience.com/index.xml", 0],
+ ["Politics", "http://www.realclearpolitics.com/index.xml", 0],
+ ["Policy", "http://www.realclearpolicy.com/index.xml", 0],
+ ["Science", "http://www.realclearscience.com/index.xml", 0],
["Tech", "http://www.realcleartechnology.com/index.xml", 0],
# The feedburner is essentially the same as the top feed, politics.
# ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
@@ -37,22 +40,37 @@ class RealClear(BasicNewsRecipe):
]
# Hints to extractPrintURL.
# First column is the URL snippet. Then the string to search for as text, and the attributes to look for above it. Start with attributes and drill down.
- printhints = [
+ phUrlSnip, phLinkText, phMainSearch, phHrefSearch = range(4)
+
+ printhints = [ ["realclear", "", '' , 'printpage'],
["billoreilly.com", "Print this entry", 'a', ''],
["billoreilly.com", "Print This Article", 'a', ''],
- ["politico.com", "Print", 'a', 'share-print'],
+ ["politico.com", "Print", 'a', 'share-print'],
["nationalreview.com", ">Print<", 'a', ''],
["reason.com", "", 'a', 'printer']
# The following are not supported due to JavaScripting, and would require obfuscated_article to handle
- # forbes,
+ # forbes,
# usatoday - just prints with all current crap anyhow
-
+
]
-
+ # RCP - look for a strange compound. See http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879.html
+ # The print link isn't obvious, and only the end is needed (the -full append.) SO maybe try that first?s
+ # http://www.realclearpolitics.com/printpage/?url=http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879-full.html
+ # Single page articles don't have a _full; e.g. http://www.realclearpolitics.com/articles/2012/01/25/obamas_green_robber_barons_112897.html
+ # Use the FULL PRINTPAGE URL; it formats it better too!
+ #
+ # NYT - try single page...
+ # Need special code - is it one page or several? Which URL?
+ # from http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1
+ # to http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1&pagewanted=all
+ # which is at link rel="canonical" and at 0 and len(self.printhints[x][1]) == 0:
+ if len(self.printhints[x][self.phHrefSearch])>0 and len(self.printhints[x][self.phLinkText]) == 0:
+ # e.g. RealClear
if self.debugMessages == True :
- print("search1")
+ print("Search by href: "+self.printhints[x][self.phHrefSearch])
+ printFind = soup.find(href=re.compile(self.printhints[x][self.phHrefSearch]))
+ elif len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
+ if self.debugMessages == True :
+ print("Search 1: "+self.printhints[x][2]+" Attributes: ")
+ print(self.printhints[x][3])
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3])
elif len(self.printhints[x][3])>0 :
if self.debugMessages == True :
print("search2")
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1])
else :
+ if self.debugMessages == True:
+ print("Default Search: "+self.printhints[x][2]+" Text: "+self.printhints[x][1])
printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1])
if printFind is None:
if self.debugMessages == True :
print("Not Found")
+ # print(soup)
+ print("end soup\n\n");
continue
+
print(printFind)
if isinstance(printFind, NavigableString)==False:
if printFind['href'] is not None:
+ print("Check "+printFind['href']+" for base of "+baseURL)
+ if printFind['href'].find("http")!=0 :
+ return baseURL+printFind['href']
return printFind['href']
tag = printFind.parent
print(tag)
@@ -98,7 +130,7 @@ class RealClear(BasicNewsRecipe):
print("In get_browser")
br = BasicNewsRecipe.get_browser()
return br
-
+
def parseRSS(self, index) :
if self.debugMessages == True :
print("\n\nStarting "+self.feedsets[index][0])
@@ -128,7 +160,7 @@ class RealClear(BasicNewsRecipe):
pubDateEl = div.find("pubDate")
if pubDateEl is None :
pubDateEl = div.find("pubdate")
- if pubDateEl is None :
+ if pubDateEl is None :
pubDate = time.strftime('%a, %d %b')
else :
pubDate = pubDateEl.contents[0]
@@ -144,7 +176,7 @@ class RealClear(BasicNewsRecipe):
pubdate = time.strftime('%a, %d %b')
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
return articleList
-
+
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
# returns a list of tuple ('feed title', list of articles)
# {
@@ -157,7 +189,8 @@ class RealClear(BasicNewsRecipe):
# this is used instead of BasicNewsRecipe.parse_feeds().
def parse_index(self):
# Parse the page into Python Soup
-
+
+ articleList = []
ans = []
feedsCount = len(self.feedsets)
for x in range(0,feedsCount): # should be ,4
@@ -167,4 +200,5 @@ class RealClear(BasicNewsRecipe):
if self.debugMessages == True :
print(ans)
return ans
+
diff --git a/recipes/soldiers.recipe b/recipes/soldiers.recipe
index fb96e5a2ed..a1e9e5ca23 100644
--- a/recipes/soldiers.recipe
+++ b/recipes/soldiers.recipe
@@ -15,6 +15,8 @@ class Soldiers(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
+ auto_cleanup = True
+ auto_cleanup_keep = '//div[@id="mediaWrapper"]'
simultaneous_downloads = 1
delay = 4
max_connections = 1
@@ -31,14 +33,14 @@ class Soldiers(BasicNewsRecipe):
, 'language' : language
}
- keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
+ #keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
- remove_tags = [
- dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
- ,dict(name=['object','link'])
- ]
+ #remove_tags = [
+ #dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
+ #,dict(name=['object','link'])
+ #]
- feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )]
+ feeds = [(u'Frontpage', u'http://www.army.mil/rss/2/' )]
def get_cover_url(self):
diff --git a/setup/installer/windows/freeze.py b/setup/installer/windows/freeze.py
index 3e251d2dcf..c5ea18e2e9 100644
--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
from setup.installer.windows.wix import WixMixIn
OPENSSL_DIR = r'Q:\openssl'
-QT_DIR = 'Q:\\Qt\\4.8.0'
+QT_DIR = 'Q:\\Qt\\4.8.1'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
SW = r'C:\cygwin\home\kovid\sw'
diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index ce5a076fdf..e213b50bd2 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -107,6 +107,7 @@ class ANDROID(USBMS):
0xc004 : [0x0226],
0x8801 : [0x0226, 0x0227],
0xe115 : [0x0216], # PocketBook A10
+ 0xe107 : [0x326], # PocketBook 622
},
# Acer
diff --git a/src/calibre/ebooks/metadata/sources/worker.py b/src/calibre/ebooks/metadata/sources/worker.py
new file mode 100644
index 0000000000..f2db60e01f
--- /dev/null
+++ b/src/calibre/ebooks/metadata/sources/worker.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+ print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import os
+from threading import Event
+from io import BytesIO
+
+from calibre.utils.date import as_utc
+from calibre.ebooks.metadata.sources.identify import identify, msprefs
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre.customize.ui import metadata_plugins
+from calibre.ebooks.metadata.sources.covers import download_cover
+from calibre.utils.logging import GUILog
+from calibre.ebooks.metadata.opf2 import metadata_to_opf, OPF
+
+def merge_result(oldmi, newmi, ensure_fields=None):
+ dummy = Metadata(_('Unknown'))
+ for f in msprefs['ignore_fields']:
+ if ':' in f or (ensure_fields and f in ensure_fields):
+ continue
+ setattr(newmi, f, getattr(dummy, f))
+ fields = set()
+ for plugin in metadata_plugins(['identify']):
+ fields |= plugin.touched_fields
+
+ def is_equal(x, y):
+ if hasattr(x, 'tzinfo'):
+ x = as_utc(x)
+ if hasattr(y, 'tzinfo'):
+ y = as_utc(y)
+ return x == y
+
+ for f in fields:
+ # Optimize so that set_metadata does not have to do extra work later
+ if not f.startswith('identifier:'):
+ if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
+ getattr(oldmi, f))):
+ setattr(newmi, f, getattr(dummy, f))
+
+ return newmi
+
+def main(do_identify, covers, metadata, ensure_fields):
+ failed_ids = set()
+ failed_covers = set()
+ all_failed = True
+ log = GUILog()
+
+ for book_id, mi in metadata.iteritems():
+ mi = OPF(BytesIO(mi), basedir=os.getcwdu(),
+ populate_spine=False).to_book_metadata()
+ title, authors, identifiers = mi.title, mi.authors, mi.identifiers
+ cdata = None
+ log.clear()
+
+ if do_identify:
+ results = []
+ try:
+ results = identify(log, Event(), title=title, authors=authors,
+ identifiers=identifiers)
+ except:
+ pass
+ if results:
+ all_failed = False
+ mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
+ identifiers = mi.identifiers
+ if not mi.is_null('rating'):
+ # set_metadata expects a rating out of 10
+ mi.rating *= 2
+ with open('%d.mi'%book_id, 'wb') as f:
+ f.write(metadata_to_opf(mi, default_lang='und'))
+ else:
+ log.error('Failed to download metadata for', title)
+ failed_ids.add(book_id)
+
+ if covers:
+ cdata = download_cover(log, title=title, authors=authors,
+ identifiers=identifiers)
+ if cdata is None:
+ failed_covers.add(book_id)
+ else:
+ with open('%d.cover'%book_id, 'wb') as f:
+ f.write(cdata[-1])
+ all_failed = False
+
+ with open('%d.log'%book_id, 'wb') as f:
+ f.write(log.plain_text.encode('utf-8'))
+
+ return failed_ids, failed_covers, all_failed
+
diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py
index d2254e00d8..0ca5341780 100644
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@@ -10,13 +10,19 @@ __docformat__ = 'restructuredtext en'
import struct, re, os, imghdr
from collections import namedtuple
from itertools import repeat
+from urlparse import urldefrag
+
+from lxml import etree
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
from calibre.ebooks.mobi.reader.index import read_index
from calibre.ebooks.mobi.reader.ncx import read_ncx, build_toc
from calibre.ebooks.mobi.reader.markup import expand_mobi8_markup
from calibre.ebooks.metadata.opf2 import Guide, OPFCreator
+from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.mobi.utils import read_font_record
+from calibre.ebooks.oeb.parse_utils import parse_html
+from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
Part = namedtuple('Part',
'num type filename start end aid')
@@ -383,6 +389,19 @@ class Mobi8Reader(object):
len(resource_map)):
mi.cover = resource_map[self.cover_offset]
+ if len(list(toc)) < 2:
+ self.log.warn('KF8 has no metadata Table of Contents')
+
+ for ref in guide:
+ if ref.type == 'toc':
+ href = ref.href()
+ href, frag = urldefrag(href)
+ if os.path.exists(href.replace('/', os.sep)):
+ try:
+ toc = self.read_inline_toc(href, frag)
+ except:
+ self.log.exception('Failed to read inline ToC')
+
opf = OPFCreator(os.getcwdu(), mi)
opf.guide = guide
@@ -397,4 +416,70 @@ class Mobi8Reader(object):
opf.render(of, ncx, 'toc.ncx')
return 'metadata.opf'
+ def read_inline_toc(self, href, frag):
+ ans = TOC()
+ base_href = '/'.join(href.split('/')[:-1])
+ with open(href.replace('/', os.sep), 'rb') as f:
+ raw = f.read().decode(self.header.codec)
+ root = parse_html(raw, log=self.log)
+ body = XPath('//h:body')(root)
+ reached = False
+ if body:
+ start = body[0]
+ else:
+ start = None
+ reached = True
+ if frag:
+ elems = XPath('//*[@id="%s"]'%frag)
+ if elems:
+ start = elems[0]
+
+ def node_depth(elem):
+ ans = 0
+ parent = elem.getparent()
+ while parent is not None:
+ parent = parent.getparent()
+ ans += 1
+ return ans
+
+ # Layer the ToC based on nesting order in the source HTML
+ current_depth = None
+ parent = ans
+ seen = set()
+ links = []
+ for elem in root.iterdescendants(etree.Element):
+ if reached and elem.tag == XHTML('a') and elem.get('href',
+ False):
+ href = elem.get('href')
+ href, frag = urldefrag(href)
+ href = base_href + '/' + href
+ text = xml2text(elem).strip()
+ if (text, href, frag) in seen:
+ continue
+ seen.add((text, href, frag))
+ links.append((text, href, frag, node_depth(elem)))
+ elif elem is start:
+ reached = True
+
+ depths = sorted(set(x[-1] for x in links))
+ depth_map = {x:i for i, x in enumerate(depths)}
+ for text, href, frag, depth in links:
+ depth = depth_map[depth]
+ if current_depth is None:
+ current_depth = 0
+ parent.add_item(href, frag, text)
+ elif current_depth == depth:
+ parent.add_item(href, frag, text)
+ elif current_depth < depth:
+ parent = parent[-1] if len(parent) > 0 else parent
+ parent.add_item(href, frag, text)
+ current_depth += 1
+ else:
+ delta = current_depth - depth
+ while delta > 0 and parent.parent is not None:
+ parent = parent.parent
+ delta -= 1
+ parent.add_item(href, frag, text)
+ current_depth = depth
+ return ans
diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py
index beeb31f3c5..a680d61188 100644
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@@ -40,27 +40,34 @@ def get_custom_size(opts):
custom_size = None
return custom_size
-def get_pdf_printer(opts, for_comic=False):
+def get_pdf_printer(opts, for_comic=False, output_file_name=None):
from calibre.gui2 import is_ok_to_use_qt
if not is_ok_to_use_qt():
raise Exception('Not OK to use Qt')
printer = QPrinter(QPrinter.HighResolution)
custom_size = get_custom_size(opts)
-
- if opts.output_profile.short_name == 'default' or \
- opts.output_profile.width > 9999:
- if custom_size is None:
- printer.setPaperSize(paper_size(opts.paper_size))
- else:
- printer.setPaperSize(QSizeF(custom_size[0], custom_size[1]), unit(opts.unit))
+ if isosx and not for_comic:
+ # On OSX, the native engine can only produce a single page size
+ # (usually A4). The Qt engine on the other hand produces image based
+ # PDFs. If we set a custom page size using QSizeF the native engine
+ # produces unreadable output, so we just ignore the custom size
+ # settings.
+ printer.setPaperSize(paper_size(opts.paper_size))
else:
- w = opts.output_profile.comic_screen_size[0] if for_comic else \
- opts.output_profile.width
- h = opts.output_profile.comic_screen_size[1] if for_comic else \
- opts.output_profile.height
- dpi = opts.output_profile.dpi
- printer.setPaperSize(QSizeF(float(w) / dpi, float(h) / dpi), QPrinter.Inch)
+ if opts.output_profile.short_name == 'default' or \
+ opts.output_profile.width > 9999:
+ if custom_size is None:
+ printer.setPaperSize(paper_size(opts.paper_size))
+ else:
+ printer.setPaperSize(QSizeF(custom_size[0], custom_size[1]), unit(opts.unit))
+ else:
+ w = opts.output_profile.comic_screen_size[0] if for_comic else \
+ opts.output_profile.width
+ h = opts.output_profile.comic_screen_size[1] if for_comic else \
+ opts.output_profile.height
+ dpi = opts.output_profile.dpi
+ printer.setPaperSize(QSizeF(float(w) / dpi, float(h) / dpi), QPrinter.Inch)
if for_comic:
# Comic pages typically have their own margins, or their background
@@ -72,6 +79,12 @@ def get_pdf_printer(opts, for_comic=False):
printer.setOrientation(orientation(opts.orientation))
printer.setOutputFormat(QPrinter.PdfFormat)
printer.setFullPage(for_comic)
+ if output_file_name:
+ printer.setOutputFileName(output_file_name)
+ if isosx and not for_comic:
+ # Ensure we are not generating enormous image based PDFs
+ printer.setOutputFormat(QPrinter.NativeFormat)
+
return printer
def get_printer_page_size(opts, for_comic=False):
@@ -163,15 +176,7 @@ class PDFWriter(QObject): # {{{
if ok:
item_path = os.path.join(self.tmp_path, '%i.pdf' % len(self.combine_queue))
self.logger.debug('\tRendering item %s as %i.pdf' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue)))
- printer = get_pdf_printer(self.opts)
- printer.setOutputFileName(item_path)
- # We have to set the engine to Native on OS X after the call to set
- # filename. Setting a filename with .pdf as the extension causes
- # Qt to set the format to use Qt's PDF engine even if native was
- # previously set on the printer. Qt's PDF engine produces image
- # based PDFs on OS X, so we cannot use it.
- if isosx:
- printer.setOutputFormat(QPrinter.NativeFormat)
+ printer = get_pdf_printer(self.opts, output_file_name=item_path)
self.view.page().mainFrame().evaluateJavaScript('''
document.body.style.backgroundColor = "white";
@@ -193,10 +198,7 @@ class PDFWriter(QObject): # {{{
if self.cover_data is None:
return
item_path = os.path.join(self.tmp_path, 'cover.pdf')
- printer = get_pdf_printer(self.opts)
- printer.setOutputFileName(item_path)
- if isosx:
- printer.setOutputFormat(QPrinter.NativeFormat)
+ printer = get_pdf_printer(self.opts, output_file_name=item_path)
self.combine_queue.insert(0, item_path)
p = QPixmap()
p.loadFromData(self.cover_data)
@@ -248,10 +250,8 @@ class ImagePDFWriter(object):
os.remove(f.name)
def render_images(self, outpath, mi, items):
- printer = get_pdf_printer(self.opts, for_comic=True)
- printer.setOutputFileName(outpath)
- if isosx:
- printer.setOutputFormat(QPrinter.NativeFormat)
+ printer = get_pdf_printer(self.opts, for_comic=True,
+ output_file_name=outpath)
printer.setDocName(mi.title)
printer.setCreator(u'%s [%s]'%(__appname__, __version__))
# Seems to be no way to set author
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index e6d4ccaac0..d334816985 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -105,6 +105,7 @@ gprefs.defaults['show_files_after_save'] = True
gprefs.defaults['auto_add_path'] = None
gprefs.defaults['auto_add_check_for_duplicates'] = False
gprefs.defaults['blocked_auto_formats'] = []
+gprefs.defaults['auto_add_auto_convert'] = True
# }}}
NONE = QVariant() #: Null value to return from the data function of item models
diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py
index bbdef5b1b5..cf47684063 100644
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@@ -71,7 +71,7 @@ class AddAction(InterfaceAction):
ma('add-formats', _('Add files to selected book records'),
triggered=self.add_formats, shortcut=_('Shift+A'))
self.add_menu.addSeparator()
- ma('add-config', _('Configure the adding of books'),
+ ma('add-config', _('Control the adding of books'),
triggered=self.add_config)
self.qaction.triggered.connect(self.add_books)
diff --git a/src/calibre/gui2/actions/convert.py b/src/calibre/gui2/actions/convert.py
index fc1d166685..34e03dc275 100644
--- a/src/calibre/gui2/actions/convert.py
+++ b/src/calibre/gui2/actions/convert.py
@@ -53,6 +53,24 @@ class ConvertAction(InterfaceAction):
self.queue_convert_jobs(jobs, changed, bad, rows, previous,
self.book_auto_converted, extra_job_args=[on_card])
+ def auto_convert_auto_add(self, book_ids):
+ previous = self.gui.library_view.currentIndex()
+ db = self.gui.current_db
+ needed = set()
+ of = prefs['output_format'].lower()
+ for book_id in book_ids:
+ fmts = db.formats(book_id, index_is_id=True)
+ fmts = set(x.lower() for x in fmts.split(',')) if fmts else set()
+ if of not in fmts:
+ needed.add(book_id)
+ if needed:
+ jobs, changed, bad = convert_single_ebook(self.gui,
+ self.gui.library_view.model().db, needed, True, of,
+ show_no_format_warning=False)
+ if not jobs: return
+ self.queue_convert_jobs(jobs, changed, bad, list(needed), previous,
+ self.book_converted, rows_are_ids=True)
+
def auto_convert_mail(self, to, fmts, delete_from_library, book_ids, format, subject):
previous = self.gui.library_view.currentIndex()
rows = [x.row() for x in \
@@ -118,7 +136,7 @@ class ConvertAction(InterfaceAction):
num, 2000)
def queue_convert_jobs(self, jobs, changed, bad, rows, previous,
- converted_func, extra_job_args=[]):
+ converted_func, extra_job_args=[], rows_are_ids=False):
for func, args, desc, fmt, id, temp_files in jobs:
func, _, same_fmt = func.partition(':')
same_fmt = same_fmt == 'same_fmt'
@@ -140,7 +158,11 @@ class ConvertAction(InterfaceAction):
self.conversion_jobs[job] = tuple(args)
if changed:
- self.gui.library_view.model().refresh_rows(rows)
+ m = self.gui.library_view.model()
+ if rows_are_ids:
+ m.refresh_ids(rows)
+ else:
+ m.refresh_rows(rows)
current = self.gui.library_view.currentIndex()
self.gui.library_view.model().current_changed(current, previous)
diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index 527beae0ab..4a0d12e3d3 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import os
+import os, shutil
from functools import partial
from PyQt4.Qt import QMenu, QModelIndex, QTimer
@@ -16,6 +16,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.dialogs.device_category_editor import DeviceCategoryEditor
from calibre.gui2.actions import InterfaceAction
from calibre.ebooks.metadata import authors_to_string
+from calibre.ebooks.metadata.opf2 import OPF
from calibre.utils.icu import sort_key
from calibre.db.errors import NoSuchFormat
@@ -79,14 +80,23 @@ class EditMetadataAction(InterfaceAction):
Dispatcher(self.metadata_downloaded),
ensure_fields=ensure_fields)
+ def cleanup_bulk_download(self, tdir):
+ try:
+ shutil.rmtree(tdir, ignore_errors=True)
+ except:
+ pass
+
def metadata_downloaded(self, job):
if job.failed:
self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
return
from calibre.gui2.metadata.bulk_download import get_job_details
- id_map, failed_ids, failed_covers, all_failed, det_msg = \
- get_job_details(job)
+ (aborted, id_map, tdir, log_file, failed_ids, failed_covers, all_failed,
+ det_msg, lm_map) = get_job_details(job)
+ if aborted:
+ return self.cleanup_bulk_download(tdir)
if all_failed:
+ self.cleanup_bulk_download(tdir)
return error_dialog(self.gui, _('Download failed'),
_('Failed to download metadata or covers for any of the %d'
' book(s).') % len(id_map), det_msg=det_msg, show=True)
@@ -103,28 +113,26 @@ class EditMetadataAction(InterfaceAction):
msg += ' '+_('Could not download metadata and/or covers for %d of the books. Click'
' "Show details" to see which books.')%num
- payload = (id_map, failed_ids, failed_covers)
+ payload = (id_map, tdir, log_file, lm_map)
from calibre.gui2.dialogs.message_box import ProceedNotification
p = ProceedNotification(self.apply_downloaded_metadata,
- payload, job.html_details,
+ payload, log_file,
_('Download log'), _('Download complete'), msg,
det_msg=det_msg, show_copy_button=show_copy_button,
- parent=self.gui)
+ parent=self.gui, log_is_file=True)
p.show()
def apply_downloaded_metadata(self, payload):
- id_map, failed_ids, failed_covers = payload
- id_map = dict([(k, v) for k, v in id_map.iteritems() if k not in
- failed_ids])
- if not id_map:
+ good_ids, tdir, log_file, lm_map = payload
+ if not good_ids:
return
modified = set()
db = self.gui.current_db
- for i, mi in id_map.iteritems():
+ for i in good_ids:
lm = db.metadata_last_modified(i, index_is_id=True)
- if lm > mi.last_modified:
+ if lm > lm_map[i]:
title = db.title(i, index_is_id=True)
authors = db.authors(i, index_is_id=True)
if authors:
@@ -144,7 +152,18 @@ class EditMetadataAction(InterfaceAction):
'Do you want to proceed?'), det_msg='\n'.join(modified)):
return
- self.apply_metadata_changes(id_map)
+ id_map = {}
+ for bid in good_ids:
+ opf = os.path.join(tdir, '%d.mi'%bid)
+ if not os.path.exists(opf):
+ opf = None
+ cov = os.path.join(tdir, '%d.cover'%bid)
+ if not os.path.exists(cov):
+ cov = None
+ id_map[bid] = (opf, cov)
+
+ self.apply_metadata_changes(id_map, callback=lambda x:
+ self.cleanup_bulk_download(tdir))
# }}}
@@ -468,6 +487,11 @@ class EditMetadataAction(InterfaceAction):
callback can be either None or a function accepting a single argument,
in which case it is called after applying is complete with the list of
changed ids.
+
+ id_map can also be a mapping of ids to 2-tuple's where each 2-tuple
+ contains the absolute paths to an OPF and cover file respectively. If
+ either of the paths is None, then the corresponding metadata is not
+ updated.
'''
if title is None:
title = _('Applying changed metadata')
@@ -492,28 +516,48 @@ class EditMetadataAction(InterfaceAction):
return self.finalize_apply()
i, mi = self.apply_id_map[self.apply_current_idx]
+ if isinstance(mi, tuple):
+ opf, cover = mi
+ if opf:
+ mi = OPF(open(opf, 'rb'), basedir=os.path.dirname(opf),
+ populate_spine=False).to_book_metadata()
+ self.apply_mi(i, mi)
+ if cover:
+ self.gui.current_db.set_cover(i, open(cover, 'rb'),
+ notify=False, commit=False)
+ else:
+ self.apply_mi(i, mi)
+
+ self.apply_current_idx += 1
+ if self.apply_pd is not None:
+ self.apply_pd.value += 1
+ QTimer.singleShot(50, self.do_one_apply)
+
+
+ def apply_mi(self, book_id, mi):
db = self.gui.current_db
+
try:
set_title = not mi.is_null('title')
set_authors = not mi.is_null('authors')
- idents = db.get_identifiers(i, index_is_id=True)
+ idents = db.get_identifiers(book_id, index_is_id=True)
if mi.identifiers:
idents.update(mi.identifiers)
mi.identifiers = idents
if mi.is_null('series'):
mi.series_index = None
if self._am_merge_tags:
- old_tags = db.tags(i, index_is_id=True)
+ old_tags = db.tags(book_id, index_is_id=True)
if old_tags:
tags = [x.strip() for x in old_tags.split(',')] + (
mi.tags if mi.tags else [])
mi.tags = list(set(tags))
- db.set_metadata(i, mi, commit=False, set_title=set_title,
+ db.set_metadata(book_id, mi, commit=False, set_title=set_title,
set_authors=set_authors, notify=False)
- self.applied_ids.append(i)
+ self.applied_ids.append(book_id)
except:
import traceback
- self.apply_failures.append((i, traceback.format_exc()))
+ self.apply_failures.append((book_id, traceback.format_exc()))
try:
if mi.cover:
@@ -521,11 +565,6 @@ class EditMetadataAction(InterfaceAction):
except:
pass
- self.apply_current_idx += 1
- if self.apply_pd is not None:
- self.apply_pd.value += 1
- QTimer.singleShot(50, self.do_one_apply)
-
def finalize_apply(self):
db = self.gui.current_db
db.commit()
diff --git a/src/calibre/gui2/auto_add.py b/src/calibre/gui2/auto_add.py
index a0be1b72fb..033d7124d5 100644
--- a/src/calibre/gui2/auto_add.py
+++ b/src/calibre/gui2/auto_add.py
@@ -113,6 +113,7 @@ class Worker(Thread):
class AutoAdder(QObject):
metadata_read = pyqtSignal(object)
+ auto_convert = pyqtSignal(object)
def __init__(self, path, parent):
QObject.__init__(self, parent)
@@ -124,6 +125,8 @@ class AutoAdder(QObject):
self.metadata_read.connect(self.add_to_db,
type=Qt.QueuedConnection)
QTimer.singleShot(2000, self.initialize)
+ self.auto_convert.connect(self.do_auto_convert,
+ type=Qt.QueuedConnection)
elif path:
prints(path,
'is not a valid directory to watch for new ebooks, ignoring')
@@ -163,6 +166,7 @@ class AutoAdder(QObject):
needs_rescan = False
duplicates = []
+ added_ids = set()
for fname, tdir in data.iteritems():
paths = [os.path.join(self.worker.path, fname)]
@@ -187,9 +191,12 @@ class AutoAdder(QObject):
continue
mi = [OPF(open(mi, 'rb'), tdir,
populate_spine=False).to_book_metadata()]
- dups, num = m.add_books(paths,
+ dups, ids = m.add_books(paths,
[os.path.splitext(fname)[1][1:].upper()], mi,
- add_duplicates=not gprefs['auto_add_check_for_duplicates'])
+ add_duplicates=not gprefs['auto_add_check_for_duplicates'],
+ return_ids=True)
+ added_ids |= set(ids)
+ num = len(ids)
if dups:
path = dups[0][0]
with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()),
@@ -217,8 +224,10 @@ class AutoAdder(QObject):
_('Books with the same title as the following already '
'exist in the database. Add them anyway?'),
'\n'.join(files)):
- dups, num = m.add_books(paths, formats, metadata,
- add_duplicates=True)
+ dups, ids = m.add_books(paths, formats, metadata,
+ add_duplicates=True, return_ids=True)
+ added_ids |= set(ids)
+ num = len(ids)
count += num
for tdir in data.itervalues():
@@ -227,6 +236,9 @@ class AutoAdder(QObject):
except:
pass
+ if added_ids and gprefs['auto_add_auto_convert']:
+ self.auto_convert.emit(added_ids)
+
if count > 0:
m.books_added(count)
gui.status_bar.show_message(_(
@@ -238,4 +250,7 @@ class AutoAdder(QObject):
if needs_rescan:
QTimer.singleShot(2000, self.dir_changed)
+ def do_auto_convert(self, added_ids):
+ gui = self.parent()
+ gui.iactions['Convert Books'].auto_convert_auto_add(added_ids)
diff --git a/src/calibre/gui2/dialogs/message_box.py b/src/calibre/gui2/dialogs/message_box.py
index cd1e38682e..64c8bf75ba 100644
--- a/src/calibre/gui2/dialogs/message_box.py
+++ b/src/calibre/gui2/dialogs/message_box.py
@@ -160,7 +160,7 @@ class ProceedNotification(MessageBox): # {{{
def __init__(self, callback, payload, html_log, log_viewer_title, title, msg,
det_msg='', show_copy_button=False, parent=None,
- cancel_callback=None):
+ cancel_callback=None, log_is_file=False):
'''
A non modal popup that notifies the user that a background task has
been completed.
@@ -175,12 +175,15 @@ class ProceedNotification(MessageBox): # {{{
:param title: The title for this popup
:param msg: The msg to display
:param det_msg: Detailed message
+ :param log_is_file: If True the html_log parameter is interpreted as
+ the path to a file on disk containing the log encoded with utf-8
'''
MessageBox.__init__(self, MessageBox.QUESTION, title, msg,
det_msg=det_msg, show_copy_button=show_copy_button,
parent=parent)
self.payload = payload
self.html_log = html_log
+ self.log_is_file = log_is_file
self.log_viewer_title = log_viewer_title
self.vlb = self.bb.addButton(_('View log'), self.bb.ActionRole)
@@ -192,7 +195,11 @@ class ProceedNotification(MessageBox): # {{{
_proceed_memory.append(self)
def show_log(self):
- self.log_viewer = ViewLog(self.log_viewer_title, self.html_log,
+ log = self.html_log
+ if self.log_is_file:
+ with open(log, 'rb') as f:
+ log = f.read().decode('utf-8')
+ self.log_viewer = ViewLog(self.log_viewer_title, log,
parent=self)
def do_proceed(self, result):
@@ -202,9 +209,9 @@ class ProceedNotification(MessageBox): # {{{
gui = get_gui()
gui.proceed_requested.emit(func, self.payload)
# Ensure this notification is garbage collected
+ self.vlb.clicked.disconnect()
self.callback = self.cancel_callback = self.payload = None
self.setParent(None)
- self.vlb.clicked.disconnect()
_proceed_memory.remove(self)
def done(self, r):
diff --git a/src/calibre/gui2/dialogs/search.ui b/src/calibre/gui2/dialogs/search.ui
index f3f96547bd..0a536010ef 100644
--- a/src/calibre/gui2/dialogs/search.ui
+++ b/src/calibre/gui2/dialogs/search.ui
@@ -140,34 +140,6 @@
- -
-
-
-
- 16777215
- 60
-
-
-
-
-
-
-
-
- 40
- 0
-
-
-
-
-
-
- matchkind
-
-
-
-
-
-
-
diff --git a/src/calibre/gui2/jobs.py b/src/calibre/gui2/jobs.py
index 8c1b5388d7..c0d61332ab 100644
--- a/src/calibre/gui2/jobs.py
+++ b/src/calibre/gui2/jobs.py
@@ -402,7 +402,8 @@ class DetailView(QDialog, Ui_Dialog): # {{{
self.setupUi(self)
self.setWindowTitle(job.description)
self.job = job
- self.html_view = hasattr(job, 'html_details')
+ self.html_view = (hasattr(job, 'html_details') and not getattr(job,
+ 'ignore_html_details', False))
if self.html_view:
self.log.setVisible(False)
else:
diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py
index 0b3c048a2e..e0047c2a70 100644
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@@ -187,9 +187,10 @@ class BooksModel(QAbstractTableModel): # {{{
self.db = None
self.reset()
- def add_books(self, paths, formats, metadata, add_duplicates=False):
+ def add_books(self, paths, formats, metadata, add_duplicates=False,
+ return_ids=False):
ret = self.db.add_books(paths, formats, metadata,
- add_duplicates=add_duplicates)
+ add_duplicates=add_duplicates, return_ids=return_ids)
self.count_changed()
return ret
diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py
index 976dfad2bb..3487ffd8f2 100644
--- a/src/calibre/gui2/metadata/bulk_download.py
+++ b/src/calibre/gui2/metadata/bulk_download.py
@@ -7,22 +7,41 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal '
__docformat__ = 'restructuredtext en'
+import os, time, shutil
from functools import partial
-from itertools import izip
-from threading import Event
from PyQt4.Qt import (QIcon, QDialog,
QDialogButtonBox, QLabel, QGridLayout, QPixmap, Qt)
from calibre.gui2.threaded_jobs import ThreadedJob
-from calibre.ebooks.metadata.sources.identify import identify, msprefs
-from calibre.ebooks.metadata.sources.covers import download_cover
-from calibre.ebooks.metadata.book.base import Metadata
-from calibre.customize.ui import metadata_plugins
-from calibre.ptempfile import PersistentTemporaryFile
-from calibre.utils.date import as_utc
+from calibre.ebooks.metadata.opf2 import metadata_to_opf
+from calibre.utils.ipc.simple_worker import fork_job, WorkerError
+from calibre.ptempfile import (PersistentTemporaryDirectory,
+ PersistentTemporaryFile)
# Start download {{{
+
+class Job(ThreadedJob):
+
+ ignore_html_details = True
+
+ def consolidate_log(self):
+ self.consolidated_log = self.log.plain_text
+ self.log = None
+
+ def read_consolidated_log(self):
+ return self.consolidated_log
+
+ @property
+ def details(self):
+ if self.consolidated_log is None:
+ return self.log.plain_text
+ return self.read_consolidated_log()
+
+ @property
+ def log_file(self):
+ return open(self.download_debug_log, 'rb')
+
def show_config(gui, parent):
from calibre.gui2.preferences import show_config_widget
show_config_widget('Sharing', 'Metadata download', parent=parent,
@@ -104,19 +123,22 @@ def start_download(gui, ids, callback, ensure_fields=None):
d.b.clicked.disconnect()
if ret != d.Accepted:
return
+ tf = PersistentTemporaryFile('_metadata_bulk_log_')
+ tf.close()
- for batch in split_jobs(ids):
- job = ThreadedJob('metadata bulk download',
- _('Download metadata for %d books')%len(batch),
- download, (batch, gui.current_db, d.identify, d.covers,
- ensure_fields), {}, callback)
- gui.job_manager.run_threaded_job(job)
+ job = Job('metadata bulk download',
+ _('Download metadata for %d books')%len(ids),
+ download, (ids, tf.name, gui.current_db, d.identify, d.covers,
+ ensure_fields), {}, callback)
+ job.download_debug_log = tf.name
+ gui.job_manager.run_threaded_job(job)
gui.status_bar.show_message(_('Metadata download started'), 3000)
# }}}
def get_job_details(job):
- id_map, failed_ids, failed_covers, title_map, all_failed = job.result
+ (aborted, good_ids, tdir, log_file, failed_ids, failed_covers, title_map,
+ lm_map, all_failed) = job.result
det_msg = []
for i in failed_ids | failed_covers:
title = title_map[i]
@@ -126,92 +148,89 @@ def get_job_details(job):
title += (' ' + _('(Failed cover)'))
det_msg.append(title)
det_msg = '\n'.join(det_msg)
- return id_map, failed_ids, failed_covers, all_failed, det_msg
+ return (aborted, good_ids, tdir, log_file, failed_ids, failed_covers,
+ all_failed, det_msg, lm_map)
-def merge_result(oldmi, newmi, ensure_fields=None):
- dummy = Metadata(_('Unknown'))
- for f in msprefs['ignore_fields']:
- if ':' in f or (ensure_fields and f in ensure_fields):
- continue
- setattr(newmi, f, getattr(dummy, f))
- fields = set()
- for plugin in metadata_plugins(['identify']):
- fields |= plugin.touched_fields
+class HeartBeat(object):
+ CHECK_INTERVAL = 300 # seconds
+ ''' Check that the file count in tdir changes every five minutes '''
- def is_equal(x, y):
- if hasattr(x, 'tzinfo'):
- x = as_utc(x)
- if hasattr(y, 'tzinfo'):
- y = as_utc(y)
- return x == y
+ def __init__(self, tdir):
+ self.tdir = tdir
+ self.last_count = len(os.listdir(self.tdir))
+ self.last_time = time.time()
- for f in fields:
- # Optimize so that set_metadata does not have to do extra work later
- if not f.startswith('identifier:'):
- if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
- getattr(oldmi, f))):
- setattr(newmi, f, getattr(dummy, f))
+ def __call__(self):
+ if time.time() - self.last_time > self.CHECK_INTERVAL:
+ c = len(os.listdir(self.tdir))
+ if c == self.last_count:
+ return False
+ self.last_count = c
+ self.last_time = time.time()
+ return True
- newmi.last_modified = oldmi.last_modified
+# Fix log viewer, ratings
+# Test: abort, covers only, metadata only, both, 200 entry download, memory
+# consumption, all errors and on and on
- return newmi
-
-def download(ids, db, do_identify, covers, ensure_fields,
+def download(all_ids, tf, db, do_identify, covers, ensure_fields,
log=None, abort=None, notifications=None):
- ids = list(ids)
- metadata = [db.get_metadata(i, index_is_id=True, get_user_categories=False)
- for i in ids]
+ batch_size = 10
+ batches = split_jobs(all_ids, batch_size=batch_size)
+ tdir = PersistentTemporaryDirectory('_metadata_bulk_')
+ heartbeat = HeartBeat(tdir)
+
failed_ids = set()
failed_covers = set()
title_map = {}
- ans = {}
- count = 0
+ lm_map = {}
+ ans = set()
all_failed = True
- '''
- # Test apply dialog
- all_failed = do_identify = covers = False
- '''
- for i, mi in izip(ids, metadata):
+ aborted = False
+ count = 0
+
+ for ids in batches:
if abort.is_set():
log.error('Aborting...')
break
- title, authors, identifiers = mi.title, mi.authors, mi.identifiers
- title_map[i] = title
- if do_identify:
- results = []
- try:
- results = identify(log, Event(), title=title, authors=authors,
- identifiers=identifiers)
- except:
- pass
- if results:
- all_failed = False
- mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
- identifiers = mi.identifiers
- if not mi.is_null('rating'):
- # set_metadata expects a rating out of 10
- mi.rating *= 2
- else:
- log.error('Failed to download metadata for', title)
- failed_ids.add(i)
- # We don't want set_metadata operating on anything but covers
- mi = merge_result(mi, mi, ensure_fields=ensure_fields)
- if covers:
- cdata = download_cover(log, title=title, authors=authors,
- identifiers=identifiers)
- if cdata is not None:
- with PersistentTemporaryFile('.jpg', 'downloaded-cover-') as f:
- f.write(cdata[-1])
- mi.cover = f.name
- all_failed = False
- else:
- failed_covers.add(i)
- ans[i] = mi
- count += 1
+ metadata = {i:db.get_metadata(i, index_is_id=True,
+ get_user_categories=False) for i in ids}
+ for i in ids:
+ title_map[i] = metadata[i].title
+ lm_map[i] = metadata[i].last_modified
+ metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in
+ metadata.iteritems()}
+ try:
+ ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main',
+ (do_identify, covers, metadata, ensure_fields),
+ cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True)
+ except WorkerError as e:
+ if e.orig_tb:
+ raise Exception('Failed to download metadata. Original '
+ 'traceback: \n\n'+e.orig_tb)
+ raise
+ count += batch_size
notifications.put((count/len(ids),
- _('Downloaded %(num)d of %(tot)d')%dict(num=count, tot=len(ids))))
+ _('Downloaded %(num)d of %(tot)d')%dict(
+ num=count, tot=len(all_ids))))
+
+ fids, fcovs, allf = ret['result']
+ if not allf:
+ all_failed = False
+ failed_ids = failed_ids.union(fids)
+ failed_covers = failed_covers.union(fcovs)
+ ans = ans.union(set(ids) - fids)
+ for book_id in ids:
+ lp = os.path.join(tdir, '%d.log'%book_id)
+ if os.path.exists(lp):
+ with open(tf, 'ab') as dest, open(lp, 'rb') as src:
+ dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] +
+ '#'*20+'\n').encode('utf-8'))
+ shutil.copyfileobj(src, dest)
+
+ if abort.is_set():
+ aborted = True
log('Download complete, with %d failures'%len(failed_ids))
- return (ans, failed_ids, failed_covers, title_map, all_failed)
-
-
+ return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map,
+ lm_map, all_failed)
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index 840753c706..23728b5901 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -161,10 +161,10 @@ class MetadataSingleDialogBase(ResizableDialog):
self.manage_authors_button.clicked.connect(self.authors.manage_authors)
self.series = SeriesEdit(self)
- self.remove_unused_series_button = QToolButton(self)
- self.remove_unused_series_button.setToolTip(
- _('Remove unused series (Series that have no books)') )
- self.remove_unused_series_button.clicked.connect(self.remove_unused_series)
+ self.clear_series_button = QToolButton(self)
+ self.clear_series_button.setToolTip(
+ _('Clear series') )
+ self.clear_series_button.clicked.connect(self.series.clear)
self.series_index = SeriesIndexEdit(self, self.series)
self.basic_metadata_widgets.extend([self.series, self.series_index])
@@ -198,6 +198,7 @@ class MetadataSingleDialogBase(ResizableDialog):
self.basic_metadata_widgets.append(self.identifiers)
self.clear_identifiers_button = QToolButton(self)
self.clear_identifiers_button.setIcon(QIcon(I('trash.png')))
+ self.clear_identifiers_button.setToolTip(_('Clear Ids'))
self.clear_identifiers_button.clicked.connect(self.identifiers.clear)
self.paste_isbn_button = QToolButton(self)
self.paste_isbn_button.setToolTip('
' +
@@ -303,17 +304,6 @@ class MetadataSingleDialogBase(ResizableDialog):
self.title_sort.auto_generate()
self.author_sort.auto_generate()
- def remove_unused_series(self, *args):
- self.db.remove_unused_series()
- idx = self.series.current_val
- self.series.clear()
- self.series.initialize(self.db, self.book_id)
- if idx:
- for i in range(self.series.count()):
- if unicode(self.series.itemText(i)) == idx:
- self.series.setCurrentIndex(i)
- break
-
def tags_editor(self, *args):
self.tags.edit(self.db, self.book_id)
@@ -591,7 +581,7 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
sto(self.title_sort, self.authors)
create_row(1, self.authors, self.deduce_author_sort_button, self.author_sort)
sto(self.author_sort, self.series)
- create_row(2, self.series, self.remove_unused_series_button,
+ create_row(2, self.series, self.clear_series_button,
self.series_index, icon='trash.png')
sto(self.series_index, self.swap_title_author_button)
sto(self.swap_title_author_button, self.manage_authors_button)
@@ -756,7 +746,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
span=2, icon='auto_author_sort.png')
create_row(3, self.author_sort, self.series)
create_row(4, self.series, self.series_index,
- button=self.remove_unused_series_button, icon='trash.png')
+ button=self.clear_series_button, icon='trash.png')
create_row(5, self.series_index, self.tags)
create_row(6, self.tags, self.rating, button=self.tags_editor_button)
create_row(7, self.rating, self.pubdate)
@@ -892,7 +882,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
span=2, icon='auto_author_sort.png')
create_row(3, self.author_sort, self.series)
create_row(4, self.series, self.series_index,
- button=self.remove_unused_series_button, icon='trash.png')
+ button=self.clear_series_button, icon='trash.png')
create_row(5, self.series_index, self.tags)
create_row(6, self.tags, self.rating, button=self.tags_editor_button)
create_row(7, self.rating, self.pubdate)
diff --git a/src/calibre/gui2/preferences/adding.py b/src/calibre/gui2/preferences/adding.py
index 1e8395b4f3..fafc5b5a1c 100644
--- a/src/calibre/gui2/preferences/adding.py
+++ b/src/calibre/gui2/preferences/adding.py
@@ -36,6 +36,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
r('new_book_tags', prefs, setting=CommaSeparatedList)
r('auto_add_path', gprefs, restart_required=True)
r('auto_add_check_for_duplicates', gprefs)
+ r('auto_add_auto_convert', gprefs)
self.filename_pattern = FilenamePattern(self)
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
diff --git a/src/calibre/gui2/preferences/adding.ui b/src/calibre/gui2/preferences/adding.ui
index 900ed62103..f04d55ff28 100644
--- a/src/calibre/gui2/preferences/adding.ui
+++ b/src/calibre/gui2/preferences/adding.ui
@@ -151,6 +151,19 @@ Author matching is exact.
&Automatic Adding
+ -
+
+
+ If set, this option will causes calibre to check if a file
+ being auto-added is already in the calibre library.
+ If it is, a meesage will pop up asking you whether
+ you want to add it anyway.
+
+
+ Check for &duplicates when auto-adding files
+
+
+
-
@@ -168,7 +181,7 @@ Author matching is exact.
- -
+
-
Ignore files with the following extensions when automatically adding
@@ -187,7 +200,7 @@ Author matching is exact.
- -
+
-
Qt::Horizontal
@@ -225,16 +238,10 @@ Author matching is exact.
- -
-
-
- If set, this option will causes calibre to check if a file
- being auto-added is already in the calibre library.
- If it is, a meesage will pop up asking you whether
- you want to add it anyway.
-
+
-
+
- Check for &duplicates when auto-adding files
+ Automatically &convert added files to the current output format
diff --git a/src/calibre/gui2/store/opensearch_store.py b/src/calibre/gui2/store/opensearch_store.py
index bcc92b25f1..a66418aa77 100644
--- a/src/calibre/gui2/store/opensearch_store.py
+++ b/src/calibre/gui2/store/opensearch_store.py
@@ -73,11 +73,13 @@ class OpenSearchOPDSStore(StorePlugin):
type = link.get('type')
if rel and href and type:
- if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
+ if 'http://opds-spec.org/thumbnail' in rel:
s.cover_url = href
- elif rel == u'http://opds-spec.org/acquisition/buy':
+ elif 'http://opds-spec.org/image/thumbnail' in rel:
+ s.cover_url = href
+ elif 'http://opds-spec.org/acquisition/buy' in rel:
s.detail_item = href
- elif rel == u'http://opds-spec.org/acquisition':
+ elif 'http://opds-spec.org/acquisition' in rel:
if type:
ext = mimetypes.guess_extension(type)
if ext:
diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py
index f1df707ad4..242cac5d79 100644
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@@ -25,7 +25,7 @@ from calibre.ebooks.conversion.config import GuiRecommendations, \
from calibre.gui2.convert import bulk_defaults_for_input_format
def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{
- out_format=None):
+ out_format=None, show_no_format_warning=True):
changed = False
jobs = []
bad = []
@@ -91,7 +91,7 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{
except NoSupportedInputFormats:
bad.append(book_id)
- if bad != []:
+ if bad and show_no_format_warning:
res = []
for id in bad:
title = db.title(id, True)
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 1b4e8390f1..72ff9cd08d 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -3243,7 +3243,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
return id
- def add_books(self, paths, formats, metadata, add_duplicates=True):
+ def add_books(self, paths, formats, metadata, add_duplicates=True,
+ return_ids=False):
'''
Add a book to the database. The result cache is not updated.
:param:`paths` List of paths to book files or file-like objects
@@ -3289,7 +3290,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
formats = list(duplicate[1] for duplicate in duplicates)
metadata = list(duplicate[2] for duplicate in duplicates)
return (paths, formats, metadata), len(ids)
- return None, len(ids)
+ return None, (ids if return_ids else len(ids))
def import_book(self, mi, formats, notify=True, import_hooks=True,
apply_import_tags=True, preserve_uuid=False):
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 77428e4c07..75e9d03d6e 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -648,7 +648,10 @@ class BasicNewsRecipe(Recipe):
'url' : URL of print version,
'date' : The publication date of the article as a string,
'description' : A summary of the article
- 'content' : The full article (can be an empty string). This is used by FullContentProfile
+ 'content' : The full article (can be an empty string). Obsolete
+ do not use, instead save the content to a temporary
+ file and pass a file:///path/to/temp/file.html as
+ the URL.
}
For an example, see the recipe for downloading `The Atlantic`.
|