mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
0.8.45+
This commit is contained in:
commit
bf2850019e
@ -13,7 +13,7 @@ class HighCountryNews(BasicNewsRecipe):
|
||||
__author__ = 'Armin Geller' # 2012-01-31
|
||||
publisher = 'High Country News'
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
language = 'en-Us'
|
||||
language = 'en'
|
||||
encoding = 'UTF-8'
|
||||
publication_type = 'newspaper'
|
||||
oldest_article = 7
|
||||
|
@ -1,45 +1,73 @@
|
||||
# Talking Points is not grabbing everything.
|
||||
# The look is right, but only the last one added?
|
||||
import re
|
||||
import string, re
|
||||
import time
|
||||
import traceback
|
||||
# above for debugging via stack
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
# Allows the Python soup converter, which makes parsing easier.
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
# strip ads and graphics
|
||||
# Current Column lacks a title.
|
||||
# Talking Points Memo - shorten title - Remove year and Bill's name
|
||||
|
||||
import os, time, traceback, re, urlparse, sys, cStringIO
|
||||
from collections import defaultdict
|
||||
from functools import partial
|
||||
from contextlib import nested, closing
|
||||
|
||||
|
||||
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
|
||||
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
|
||||
|
||||
|
||||
# To Do: strip ads and graphics, Current Column lacks a title.
|
||||
# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
|
||||
# Newsletters: Talking Points Memos covered by cat12
|
||||
# ./ebook-convert --username xxx --password xxx
|
||||
|
||||
# this is derived from BasicNewsRecipe, so it can only overload those.
|
||||
# Soome of what we need is otherwise in article, so we have more copy to do than otherwise.
|
||||
class OReillyPremium(BasicNewsRecipe):
|
||||
title = u'OReilly Premium'
|
||||
__author__ = 'TMcN'
|
||||
language = 'en'
|
||||
description = 'Retrieves Premium and News Letter content from BillOReilly.com. Requires a Bill OReilly Premium Membership.'
|
||||
cover_url = 'http://images.billoreilly.com/images/headers/billgray_header.png'
|
||||
custom_title = 'Bill O\'Reilly Premium - '+ time.strftime('%d %b %Y')
|
||||
title = 'Bill O\'Reilly Premium'
|
||||
auto_cleanup = True
|
||||
conversion_options = {'linearize_tables': True}
|
||||
encoding = 'utf8'
|
||||
needs_subscription = True
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
oldest_article = 20
|
||||
needs_subscription = True
|
||||
oldest_article = 31
|
||||
remove_javascript = True
|
||||
remove_tags = [dict(name='img', attrs={})]
|
||||
# Don't go down
|
||||
recursions = 0
|
||||
max_articles_per_feed = 2000
|
||||
max_articles_per_feed = 20
|
||||
|
||||
debugMessages = True
|
||||
|
||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||
catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []],
|
||||
["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
|
||||
["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
|
||||
["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
|
||||
["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
|
||||
# ["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
|
||||
# ["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
|
||||
# ["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
|
||||
# ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
|
||||
["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []]
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'No Spin', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=7'),
|
||||
(u'Daily Briefing', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=11'),
|
||||
(u'Talking Points', u'https://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=12'),
|
||||
(u'Blog', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=0'),
|
||||
(u'StratFor', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=5')
|
||||
]
|
||||
# http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=8 is word for the day.
|
||||
|
||||
# Note: Talking Points is broken in the above model; the site changed to more Ajax-y.
|
||||
# Now using RSS
|
||||
|
||||
def get_browser(self):
|
||||
print("In get_browser")
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp')
|
||||
@ -66,6 +94,7 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
def stripBadChars(self, inString) :
|
||||
return inString.replace("\'", "")
|
||||
|
||||
|
||||
def parseGeneric(self, baseURL):
|
||||
# Does a generic parsing of the articles. There are six categories (0-5)
|
||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||
@ -73,6 +102,7 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
fullReturn = []
|
||||
for i in range(len(self.catList)) :
|
||||
articleList = []
|
||||
print("In "+self.catList[i][0]+", index: "+ str(i))
|
||||
soup = self.index_to_soup(self.catList[i][1])
|
||||
# Set defaults
|
||||
description = 'None'
|
||||
@ -81,14 +111,12 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
# 3-5 create one.
|
||||
# So no for-div for 3-5
|
||||
|
||||
if i < 3 :
|
||||
if i == 0 :
|
||||
print("Starting TV Archives")
|
||||
for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
|
||||
print("Next DIV:")
|
||||
print(div)
|
||||
if i == 1:
|
||||
a = div.find('a', href=True)
|
||||
else :
|
||||
a = div
|
||||
print(a)
|
||||
a = div
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
@ -96,82 +124,63 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
continue
|
||||
# url = baseURL+re.sub(r'\?.*', '', a['href'])
|
||||
url = baseURL+a['href']
|
||||
if i < 2 :
|
||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
elif i == 2 :
|
||||
# Daily Briefs
|
||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
title = div.contents[0]
|
||||
if self.debugMessages :
|
||||
print(title+" @ "+url)
|
||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
|
||||
elif i == 3 : # Stratfor
|
||||
a = soup.find('a', self.catList[i][3])
|
||||
if a is None :
|
||||
continue
|
||||
url = baseURL+a['href']
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
# Get Stratfor contents so we can get the real title.
|
||||
stratSoup = self.index_to_soup(url)
|
||||
title = stratSoup.html.head.title.string
|
||||
stratIndex = title.find('Stratfor.com:', 0)
|
||||
if (stratIndex > -1) :
|
||||
title = title[stratIndex+14:-1]
|
||||
# Look for first blogBody <td class="blogBody"
|
||||
# Changed 12 Jan 2012 - new page format
|
||||
#stratBlogTable = stratSoup.find('td', {'class':['blogBody']}).findParent('table')
|
||||
#stratBody = stratSoup.find('td', {'class':['blogBody']})
|
||||
elif i == 4 : # Talking Points
|
||||
topDate = soup.find("td", "blogBody")
|
||||
if not topDate :
|
||||
print("Failed to find date in Talking Points")
|
||||
# This page has the contents in double-wrapped tables!
|
||||
myTable = topDate.findParents('table')[0]
|
||||
if myTable is not None:
|
||||
upOneTable = myTable.findParents('table')[0]
|
||||
if upOneTable is not None:
|
||||
upTwo = upOneTable.findParents('table')[0]
|
||||
if upTwo is None:
|
||||
continue
|
||||
# Now navigate rows of upTwo
|
||||
if self.debugMessages :
|
||||
print("Entering rows")
|
||||
for rows in upTwo.findChildren("tr", recursive=False):
|
||||
# Inside top level table, each row is an article
|
||||
rowTable = rows.find("table")
|
||||
articleTable = rowTable.find("table")
|
||||
# This looks wrong.
|
||||
articleTable = rows.find("tr")
|
||||
# The middle table is just for formatting the article buffer... but this means we can skip the inner table.
|
||||
blogDate = articleTable.find("a","blogDate").contents[0]
|
||||
# Skip to second blogBody for this.
|
||||
blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
|
||||
blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
|
||||
url = baseURL+re.sub(r'\?.*', '', blogURL)
|
||||
title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
|
||||
if self.debugMessages :
|
||||
print("Talking Points Memo title "+title+" at url: "+url)
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description='None', content=''))
|
||||
else : # Current Column
|
||||
titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
|
||||
if titleSpan is None :
|
||||
print("No Current Column Title Span")
|
||||
print(soup)
|
||||
continue
|
||||
title = titleSpan.contents[0]
|
||||
url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
|
||||
if i == 3 or i == 5 :
|
||||
if i == 1 :
|
||||
if self.debugMessages :
|
||||
print(self.catList[i][0]+" Title:"+title+" at url: "+url)
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
print("At Summary")
|
||||
print(summary)
|
||||
if summary is not None:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
print("At append")
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
self.catList[i][3] = articleList
|
||||
fullReturn.append((self.catList[i][0], articleList))
|
||||
print("Returning")
|
||||
# print fullReturn
|
||||
return fullReturn
|
||||
|
||||
|
||||
# build_index() starts with:
|
||||
# try:
|
||||
# feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
|
||||
# max_articles_per_feed=self.max_articles_per_feed,
|
||||
# log=self.log)
|
||||
# self.report_progress(0, _('Got feeds from index page'))
|
||||
# except NotImplementedError:
|
||||
# feeds = self.parse_feeds()
|
||||
|
||||
# which in turn is from __init__.py
|
||||
#def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100,
|
||||
# log=default_log):
|
||||
#'''
|
||||
#@param index: A parsed index as returned by L{BasicNewsRecipe.parse_index}.
|
||||
#@return: A list of L{Feed} objects.
|
||||
#@rtype: list
|
||||
#'''
|
||||
#feeds = []
|
||||
#for title, articles in index:
|
||||
# pfeed = Feed(log=log)
|
||||
# pfeed.populate_from_preparsed_feed(title, articles, oldest_article=oldest_article,
|
||||
# max_articles_per_feed=max_articles_per_feed)
|
||||
# feeds.append(pfeed)
|
||||
# return feeds
|
||||
|
||||
# use_embedded_content defaults to None, at which point if the content is > 2K, it is used as the article.
|
||||
|
||||
|
||||
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
|
||||
# returns a list of tuple ('feed title', list of articles)
|
||||
# {
|
||||
@ -182,12 +191,19 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
||||
# }
|
||||
# this is used instead of BasicNewsRecipe.parse_feeds().
|
||||
# it is called by download
|
||||
def parse_index(self):
|
||||
# Parse the page into Python Soup
|
||||
print("Entering recipe print_index from:")
|
||||
traceback.print_stack()
|
||||
print("web")
|
||||
baseURL = "https://www.billoreilly.com"
|
||||
return self.parseGeneric(baseURL)
|
||||
masterList = self.parseGeneric(baseURL)
|
||||
#print(masterList)
|
||||
return masterList
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
print("In preprocess_html")
|
||||
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
||||
if refresh is None:
|
||||
return soup
|
||||
@ -195,3 +211,128 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
raw = self.browser.open('https://www.billoreilly.com'+content).read()
|
||||
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
||||
|
||||
def build_index(self):
|
||||
print("In OReilly build_index()\n\n")
|
||||
feedsRSS = []
|
||||
self.report_progress(0, _('Fetching feeds...'))
|
||||
#try:
|
||||
feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
|
||||
max_articles_per_feed=self.max_articles_per_feed,
|
||||
log=self.log)
|
||||
self.report_progress(0, _('Got feeds from index page'))
|
||||
#except NotImplementedError:
|
||||
# feeds = self.parse_feeds()
|
||||
# Now add regular feeds.
|
||||
feedsRSS = self.parse_feeds()
|
||||
print ("feedsRSS is type "+feedsRSS.__class__.__name__)
|
||||
|
||||
for articles in feedsRSS:
|
||||
print("articles is type "+articles.__class__.__name__)
|
||||
print("Title:" + articles.title)
|
||||
feeds.append(articles)
|
||||
if not feeds:
|
||||
raise ValueError('No articles found, aborting')
|
||||
|
||||
#feeds = FeedCollection(feeds)
|
||||
|
||||
self.report_progress(0, _('Trying to download cover...'))
|
||||
self.download_cover()
|
||||
self.report_progress(0, _('Generating masthead...'))
|
||||
self.masthead_path = None
|
||||
|
||||
try:
|
||||
murl = self.get_masthead_url()
|
||||
except:
|
||||
self.log.exception('Failed to get masthead url')
|
||||
murl = None
|
||||
|
||||
if murl is not None:
|
||||
# Try downloading the user-supplied masthead_url
|
||||
# Failure sets self.masthead_path to None
|
||||
self.download_masthead(murl)
|
||||
if self.masthead_path is None:
|
||||
self.log.info("Synthesizing mastheadImage")
|
||||
self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg')
|
||||
try:
|
||||
self.default_masthead_image(self.masthead_path)
|
||||
except:
|
||||
self.log.exception('Failed to generate default masthead image')
|
||||
self.masthead_path = None
|
||||
|
||||
if self.test:
|
||||
feeds = feeds[:2]
|
||||
self.has_single_feed = len(feeds) == 1
|
||||
|
||||
index = os.path.join(self.output_dir, 'index.html')
|
||||
|
||||
html = self.feeds2index(feeds)
|
||||
with open(index, 'wb') as fi:
|
||||
fi.write(html)
|
||||
|
||||
self.jobs = []
|
||||
|
||||
if self.reverse_article_order:
|
||||
for feed in feeds:
|
||||
if hasattr(feed, 'reverse'):
|
||||
feed.reverse()
|
||||
|
||||
self.feed_objects = feeds
|
||||
for f, feed in enumerate(feeds):
|
||||
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||
if not os.path.isdir(feed_dir):
|
||||
os.makedirs(feed_dir)
|
||||
|
||||
for a, article in enumerate(feed):
|
||||
if a >= self.max_articles_per_feed:
|
||||
break
|
||||
art_dir = os.path.join(feed_dir, 'article_%d'%a)
|
||||
if not os.path.isdir(art_dir):
|
||||
os.makedirs(art_dir)
|
||||
try:
|
||||
url = self.print_version(article.url)
|
||||
except NotImplementedError:
|
||||
url = article.url
|
||||
except:
|
||||
self.log.exception('Failed to find print version for: '+article.url)
|
||||
url = None
|
||||
if not url:
|
||||
continue
|
||||
func, arg = (self.fetch_embedded_article, article) \
|
||||
if self.use_embedded_content or (self.use_embedded_content == None and feed.has_embedded_content()) \
|
||||
else \
|
||||
((self.fetch_obfuscated_article if self.articles_are_obfuscated \
|
||||
else self.fetch_article), url)
|
||||
req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
|
||||
{}, (f, a), self.article_downloaded,
|
||||
self.error_in_article_download)
|
||||
req.feed = feed
|
||||
req.article = article
|
||||
req.feed_dir = feed_dir
|
||||
self.jobs.append(req)
|
||||
|
||||
|
||||
self.jobs_done = 0
|
||||
tp = ThreadPool(self.simultaneous_downloads)
|
||||
for req in self.jobs:
|
||||
tp.putRequest(req, block=True, timeout=0)
|
||||
|
||||
|
||||
self.report_progress(0, _('Starting download [%d thread(s)]...')%self.simultaneous_downloads)
|
||||
while True:
|
||||
try:
|
||||
tp.poll()
|
||||
time.sleep(0.1)
|
||||
except NoResultsPending:
|
||||
break
|
||||
for f, feed in enumerate(feeds):
|
||||
print("Writing feeds for "+feed.title)
|
||||
html = self.feed2index(f,feeds)
|
||||
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
|
||||
fi.write(html)
|
||||
self.create_opf(feeds)
|
||||
self.report_progress(1, _('Feeds downloaded to %s')%index)
|
||||
|
||||
return index
|
||||
|
||||
|
||||
|
@ -1,7 +1,9 @@
|
||||
# Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
|
||||
import string, re
|
||||
import time
|
||||
from urlparse import urlparse
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import NavigableString
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
||||
|
||||
class RealClear(BasicNewsRecipe):
|
||||
title = u'Real Clear'
|
||||
@ -20,12 +22,13 @@ class RealClear(BasicNewsRecipe):
|
||||
# Don't go down
|
||||
recursions = 0
|
||||
max_articles_per_feed = 400
|
||||
debugMessages = False
|
||||
debugMessages = True
|
||||
|
||||
# Numeric parameter is type, controls whether we look for
|
||||
feedsets = [
|
||||
["Politics", "http://www.realclearpolitics.com/index.xml", 0],
|
||||
["Science", "http://www.realclearscience.com/index.xml", 0],
|
||||
["Politics", "http://www.realclearpolitics.com/index.xml", 0],
|
||||
["Policy", "http://www.realclearpolicy.com/index.xml", 0],
|
||||
["Science", "http://www.realclearscience.com/index.xml", 0],
|
||||
["Tech", "http://www.realcleartechnology.com/index.xml", 0],
|
||||
# The feedburner is essentially the same as the top feed, politics.
|
||||
# ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
|
||||
@ -37,7 +40,9 @@ class RealClear(BasicNewsRecipe):
|
||||
]
|
||||
# Hints to extractPrintURL.
|
||||
# First column is the URL snippet. Then the string to search for as text, and the attributes to look for above it. Start with attributes and drill down.
|
||||
printhints = [
|
||||
phUrlSnip, phLinkText, phMainSearch, phHrefSearch = range(4)
|
||||
|
||||
printhints = [ ["realclear", "", '' , 'printpage'],
|
||||
["billoreilly.com", "Print this entry", 'a', ''],
|
||||
["billoreilly.com", "Print This Article", 'a', ''],
|
||||
["politico.com", "Print", 'a', 'share-print'],
|
||||
@ -48,11 +53,24 @@ class RealClear(BasicNewsRecipe):
|
||||
# usatoday - just prints with all current crap anyhow
|
||||
|
||||
]
|
||||
# RCP - look for a strange compound. See http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879.html
|
||||
# The print link isn't obvious, and only the end is needed (the -full append.) SO maybe try that first?s
|
||||
# http://www.realclearpolitics.com/printpage/?url=http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879-full.html
|
||||
# Single page articles don't have a _full; e.g. http://www.realclearpolitics.com/articles/2012/01/25/obamas_green_robber_barons_112897.html
|
||||
# Use the FULL PRINTPAGE URL; it formats it better too!
|
||||
#
|
||||
# NYT - try single page...
|
||||
# Need special code - is it one page or several? Which URL?
|
||||
# from http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1
|
||||
# to http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1&pagewanted=all
|
||||
# which is at link rel="canonical" and at <meta property="og:url" or look for "Single Page"
|
||||
|
||||
# Returns the best-guess print url.
|
||||
# The second parameter (pageURL) is returned if nothing is found.
|
||||
def extractPrintURL(self, pageURL):
|
||||
tagURL = pageURL
|
||||
baseParse = urlparse(pageURL)
|
||||
baseURL = baseParse[0]+"://"+baseParse[1]
|
||||
hintsCount =len(self.printhints)
|
||||
for x in range(0,hintsCount):
|
||||
if pageURL.find(self.printhints[x][0])== -1 :
|
||||
@ -62,23 +80,37 @@ class RealClear(BasicNewsRecipe):
|
||||
soup = self.index_to_soup(pageURL)
|
||||
if soup is None:
|
||||
return pageURL
|
||||
if len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
|
||||
if len(self.printhints[x][self.phHrefSearch])>0 and len(self.printhints[x][self.phLinkText]) == 0:
|
||||
# e.g. RealClear
|
||||
if self.debugMessages == True :
|
||||
print("search1")
|
||||
print("Search by href: "+self.printhints[x][self.phHrefSearch])
|
||||
printFind = soup.find(href=re.compile(self.printhints[x][self.phHrefSearch]))
|
||||
elif len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
|
||||
if self.debugMessages == True :
|
||||
print("Search 1: "+self.printhints[x][2]+" Attributes: ")
|
||||
print(self.printhints[x][3])
|
||||
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3])
|
||||
elif len(self.printhints[x][3])>0 :
|
||||
if self.debugMessages == True :
|
||||
print("search2")
|
||||
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1])
|
||||
else :
|
||||
if self.debugMessages == True:
|
||||
print("Default Search: "+self.printhints[x][2]+" Text: "+self.printhints[x][1])
|
||||
printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1])
|
||||
if printFind is None:
|
||||
if self.debugMessages == True :
|
||||
print("Not Found")
|
||||
# print(soup)
|
||||
print("end soup\n\n");
|
||||
continue
|
||||
|
||||
print(printFind)
|
||||
if isinstance(printFind, NavigableString)==False:
|
||||
if printFind['href'] is not None:
|
||||
print("Check "+printFind['href']+" for base of "+baseURL)
|
||||
if printFind['href'].find("http")!=0 :
|
||||
return baseURL+printFind['href']
|
||||
return printFind['href']
|
||||
tag = printFind.parent
|
||||
print(tag)
|
||||
@ -158,6 +190,7 @@ class RealClear(BasicNewsRecipe):
|
||||
def parse_index(self):
|
||||
# Parse the page into Python Soup
|
||||
|
||||
articleList = []
|
||||
ans = []
|
||||
feedsCount = len(self.feedsets)
|
||||
for x in range(0,feedsCount): # should be ,4
|
||||
@ -168,3 +201,4 @@ class RealClear(BasicNewsRecipe):
|
||||
print(ans)
|
||||
return ans
|
||||
|
||||
|
||||
|
@ -15,6 +15,8 @@ class Soldiers(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//div[@id="mediaWrapper"]'
|
||||
simultaneous_downloads = 1
|
||||
delay = 4
|
||||
max_connections = 1
|
||||
@ -31,14 +33,14 @@ class Soldiers(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
|
||||
#keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
#remove_tags = [
|
||||
#dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
|
||||
#,dict(name=['object','link'])
|
||||
#]
|
||||
|
||||
feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )]
|
||||
feeds = [(u'Frontpage', u'http://www.army.mil/rss/2/' )]
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
|
@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
|
||||
from setup.installer.windows.wix import WixMixIn
|
||||
|
||||
OPENSSL_DIR = r'Q:\openssl'
|
||||
QT_DIR = 'Q:\\Qt\\4.8.0'
|
||||
QT_DIR = 'Q:\\Qt\\4.8.1'
|
||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||
SW = r'C:\cygwin\home\kovid\sw'
|
||||
|
@ -107,6 +107,7 @@ class ANDROID(USBMS):
|
||||
0xc004 : [0x0226],
|
||||
0x8801 : [0x0226, 0x0227],
|
||||
0xe115 : [0x0216], # PocketBook A10
|
||||
0xe107 : [0x326], # PocketBook 622
|
||||
},
|
||||
|
||||
# Acer
|
||||
|
95
src/calibre/ebooks/metadata/sources/worker.py
Normal file
95
src/calibre/ebooks/metadata/sources/worker.py
Normal file
@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from threading import Event
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.utils.date import as_utc
|
||||
from calibre.ebooks.metadata.sources.identify import identify, msprefs
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
from calibre.ebooks.metadata.sources.covers import download_cover
|
||||
from calibre.utils.logging import GUILog
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf, OPF
|
||||
|
||||
def merge_result(oldmi, newmi, ensure_fields=None):
|
||||
dummy = Metadata(_('Unknown'))
|
||||
for f in msprefs['ignore_fields']:
|
||||
if ':' in f or (ensure_fields and f in ensure_fields):
|
||||
continue
|
||||
setattr(newmi, f, getattr(dummy, f))
|
||||
fields = set()
|
||||
for plugin in metadata_plugins(['identify']):
|
||||
fields |= plugin.touched_fields
|
||||
|
||||
def is_equal(x, y):
|
||||
if hasattr(x, 'tzinfo'):
|
||||
x = as_utc(x)
|
||||
if hasattr(y, 'tzinfo'):
|
||||
y = as_utc(y)
|
||||
return x == y
|
||||
|
||||
for f in fields:
|
||||
# Optimize so that set_metadata does not have to do extra work later
|
||||
if not f.startswith('identifier:'):
|
||||
if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
|
||||
getattr(oldmi, f))):
|
||||
setattr(newmi, f, getattr(dummy, f))
|
||||
|
||||
return newmi
|
||||
|
||||
def main(do_identify, covers, metadata, ensure_fields):
|
||||
failed_ids = set()
|
||||
failed_covers = set()
|
||||
all_failed = True
|
||||
log = GUILog()
|
||||
|
||||
for book_id, mi in metadata.iteritems():
|
||||
mi = OPF(BytesIO(mi), basedir=os.getcwdu(),
|
||||
populate_spine=False).to_book_metadata()
|
||||
title, authors, identifiers = mi.title, mi.authors, mi.identifiers
|
||||
cdata = None
|
||||
log.clear()
|
||||
|
||||
if do_identify:
|
||||
results = []
|
||||
try:
|
||||
results = identify(log, Event(), title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
except:
|
||||
pass
|
||||
if results:
|
||||
all_failed = False
|
||||
mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
|
||||
identifiers = mi.identifiers
|
||||
if not mi.is_null('rating'):
|
||||
# set_metadata expects a rating out of 10
|
||||
mi.rating *= 2
|
||||
with open('%d.mi'%book_id, 'wb') as f:
|
||||
f.write(metadata_to_opf(mi, default_lang='und'))
|
||||
else:
|
||||
log.error('Failed to download metadata for', title)
|
||||
failed_ids.add(book_id)
|
||||
|
||||
if covers:
|
||||
cdata = download_cover(log, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if cdata is None:
|
||||
failed_covers.add(book_id)
|
||||
else:
|
||||
with open('%d.cover'%book_id, 'wb') as f:
|
||||
f.write(cdata[-1])
|
||||
all_failed = False
|
||||
|
||||
with open('%d.log'%book_id, 'wb') as f:
|
||||
f.write(log.plain_text.encode('utf-8'))
|
||||
|
||||
return failed_ids, failed_covers, all_failed
|
||||
|
@ -10,13 +10,19 @@ __docformat__ = 'restructuredtext en'
|
||||
import struct, re, os, imghdr
|
||||
from collections import namedtuple
|
||||
from itertools import repeat
|
||||
from urlparse import urldefrag
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||
from calibre.ebooks.mobi.reader.index import read_index
|
||||
from calibre.ebooks.mobi.reader.ncx import read_ncx, build_toc
|
||||
from calibre.ebooks.mobi.reader.markup import expand_mobi8_markup
|
||||
from calibre.ebooks.metadata.opf2 import Guide, OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.mobi.utils import read_font_record
|
||||
from calibre.ebooks.oeb.parse_utils import parse_html
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
|
||||
|
||||
Part = namedtuple('Part',
|
||||
'num type filename start end aid')
|
||||
@ -383,6 +389,19 @@ class Mobi8Reader(object):
|
||||
len(resource_map)):
|
||||
mi.cover = resource_map[self.cover_offset]
|
||||
|
||||
if len(list(toc)) < 2:
|
||||
self.log.warn('KF8 has no metadata Table of Contents')
|
||||
|
||||
for ref in guide:
|
||||
if ref.type == 'toc':
|
||||
href = ref.href()
|
||||
href, frag = urldefrag(href)
|
||||
if os.path.exists(href.replace('/', os.sep)):
|
||||
try:
|
||||
toc = self.read_inline_toc(href, frag)
|
||||
except:
|
||||
self.log.exception('Failed to read inline ToC')
|
||||
|
||||
opf = OPFCreator(os.getcwdu(), mi)
|
||||
opf.guide = guide
|
||||
|
||||
@ -397,4 +416,70 @@ class Mobi8Reader(object):
|
||||
opf.render(of, ncx, 'toc.ncx')
|
||||
return 'metadata.opf'
|
||||
|
||||
def read_inline_toc(self, href, frag):
|
||||
ans = TOC()
|
||||
base_href = '/'.join(href.split('/')[:-1])
|
||||
with open(href.replace('/', os.sep), 'rb') as f:
|
||||
raw = f.read().decode(self.header.codec)
|
||||
root = parse_html(raw, log=self.log)
|
||||
body = XPath('//h:body')(root)
|
||||
reached = False
|
||||
if body:
|
||||
start = body[0]
|
||||
else:
|
||||
start = None
|
||||
reached = True
|
||||
if frag:
|
||||
elems = XPath('//*[@id="%s"]'%frag)
|
||||
if elems:
|
||||
start = elems[0]
|
||||
|
||||
def node_depth(elem):
|
||||
ans = 0
|
||||
parent = elem.getparent()
|
||||
while parent is not None:
|
||||
parent = parent.getparent()
|
||||
ans += 1
|
||||
return ans
|
||||
|
||||
# Layer the ToC based on nesting order in the source HTML
|
||||
current_depth = None
|
||||
parent = ans
|
||||
seen = set()
|
||||
links = []
|
||||
for elem in root.iterdescendants(etree.Element):
|
||||
if reached and elem.tag == XHTML('a') and elem.get('href',
|
||||
False):
|
||||
href = elem.get('href')
|
||||
href, frag = urldefrag(href)
|
||||
href = base_href + '/' + href
|
||||
text = xml2text(elem).strip()
|
||||
if (text, href, frag) in seen:
|
||||
continue
|
||||
seen.add((text, href, frag))
|
||||
links.append((text, href, frag, node_depth(elem)))
|
||||
elif elem is start:
|
||||
reached = True
|
||||
|
||||
depths = sorted(set(x[-1] for x in links))
|
||||
depth_map = {x:i for i, x in enumerate(depths)}
|
||||
for text, href, frag, depth in links:
|
||||
depth = depth_map[depth]
|
||||
if current_depth is None:
|
||||
current_depth = 0
|
||||
parent.add_item(href, frag, text)
|
||||
elif current_depth == depth:
|
||||
parent.add_item(href, frag, text)
|
||||
elif current_depth < depth:
|
||||
parent = parent[-1] if len(parent) > 0 else parent
|
||||
parent.add_item(href, frag, text)
|
||||
current_depth += 1
|
||||
else:
|
||||
delta = current_depth - depth
|
||||
while delta > 0 and parent.parent is not None:
|
||||
parent = parent.parent
|
||||
delta -= 1
|
||||
parent.add_item(href, frag, text)
|
||||
current_depth = depth
|
||||
return ans
|
||||
|
||||
|
@ -40,27 +40,34 @@ def get_custom_size(opts):
|
||||
custom_size = None
|
||||
return custom_size
|
||||
|
||||
def get_pdf_printer(opts, for_comic=False):
|
||||
def get_pdf_printer(opts, for_comic=False, output_file_name=None):
|
||||
from calibre.gui2 import is_ok_to_use_qt
|
||||
if not is_ok_to_use_qt():
|
||||
raise Exception('Not OK to use Qt')
|
||||
|
||||
printer = QPrinter(QPrinter.HighResolution)
|
||||
custom_size = get_custom_size(opts)
|
||||
|
||||
if opts.output_profile.short_name == 'default' or \
|
||||
opts.output_profile.width > 9999:
|
||||
if custom_size is None:
|
||||
printer.setPaperSize(paper_size(opts.paper_size))
|
||||
else:
|
||||
printer.setPaperSize(QSizeF(custom_size[0], custom_size[1]), unit(opts.unit))
|
||||
if isosx and not for_comic:
|
||||
# On OSX, the native engine can only produce a single page size
|
||||
# (usually A4). The Qt engine on the other hand produces image based
|
||||
# PDFs. If we set a custom page size using QSizeF the native engine
|
||||
# produces unreadable output, so we just ignore the custom size
|
||||
# settings.
|
||||
printer.setPaperSize(paper_size(opts.paper_size))
|
||||
else:
|
||||
w = opts.output_profile.comic_screen_size[0] if for_comic else \
|
||||
opts.output_profile.width
|
||||
h = opts.output_profile.comic_screen_size[1] if for_comic else \
|
||||
opts.output_profile.height
|
||||
dpi = opts.output_profile.dpi
|
||||
printer.setPaperSize(QSizeF(float(w) / dpi, float(h) / dpi), QPrinter.Inch)
|
||||
if opts.output_profile.short_name == 'default' or \
|
||||
opts.output_profile.width > 9999:
|
||||
if custom_size is None:
|
||||
printer.setPaperSize(paper_size(opts.paper_size))
|
||||
else:
|
||||
printer.setPaperSize(QSizeF(custom_size[0], custom_size[1]), unit(opts.unit))
|
||||
else:
|
||||
w = opts.output_profile.comic_screen_size[0] if for_comic else \
|
||||
opts.output_profile.width
|
||||
h = opts.output_profile.comic_screen_size[1] if for_comic else \
|
||||
opts.output_profile.height
|
||||
dpi = opts.output_profile.dpi
|
||||
printer.setPaperSize(QSizeF(float(w) / dpi, float(h) / dpi), QPrinter.Inch)
|
||||
|
||||
if for_comic:
|
||||
# Comic pages typically have their own margins, or their background
|
||||
@ -72,6 +79,12 @@ def get_pdf_printer(opts, for_comic=False):
|
||||
printer.setOrientation(orientation(opts.orientation))
|
||||
printer.setOutputFormat(QPrinter.PdfFormat)
|
||||
printer.setFullPage(for_comic)
|
||||
if output_file_name:
|
||||
printer.setOutputFileName(output_file_name)
|
||||
if isosx and not for_comic:
|
||||
# Ensure we are not generating enormous image based PDFs
|
||||
printer.setOutputFormat(QPrinter.NativeFormat)
|
||||
|
||||
return printer
|
||||
|
||||
def get_printer_page_size(opts, for_comic=False):
|
||||
@ -163,15 +176,7 @@ class PDFWriter(QObject): # {{{
|
||||
if ok:
|
||||
item_path = os.path.join(self.tmp_path, '%i.pdf' % len(self.combine_queue))
|
||||
self.logger.debug('\tRendering item %s as %i.pdf' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue)))
|
||||
printer = get_pdf_printer(self.opts)
|
||||
printer.setOutputFileName(item_path)
|
||||
# We have to set the engine to Native on OS X after the call to set
|
||||
# filename. Setting a filename with .pdf as the extension causes
|
||||
# Qt to set the format to use Qt's PDF engine even if native was
|
||||
# previously set on the printer. Qt's PDF engine produces image
|
||||
# based PDFs on OS X, so we cannot use it.
|
||||
if isosx:
|
||||
printer.setOutputFormat(QPrinter.NativeFormat)
|
||||
printer = get_pdf_printer(self.opts, output_file_name=item_path)
|
||||
self.view.page().mainFrame().evaluateJavaScript('''
|
||||
document.body.style.backgroundColor = "white";
|
||||
|
||||
@ -193,10 +198,7 @@ class PDFWriter(QObject): # {{{
|
||||
if self.cover_data is None:
|
||||
return
|
||||
item_path = os.path.join(self.tmp_path, 'cover.pdf')
|
||||
printer = get_pdf_printer(self.opts)
|
||||
printer.setOutputFileName(item_path)
|
||||
if isosx:
|
||||
printer.setOutputFormat(QPrinter.NativeFormat)
|
||||
printer = get_pdf_printer(self.opts, output_file_name=item_path)
|
||||
self.combine_queue.insert(0, item_path)
|
||||
p = QPixmap()
|
||||
p.loadFromData(self.cover_data)
|
||||
@ -248,10 +250,8 @@ class ImagePDFWriter(object):
|
||||
os.remove(f.name)
|
||||
|
||||
def render_images(self, outpath, mi, items):
|
||||
printer = get_pdf_printer(self.opts, for_comic=True)
|
||||
printer.setOutputFileName(outpath)
|
||||
if isosx:
|
||||
printer.setOutputFormat(QPrinter.NativeFormat)
|
||||
printer = get_pdf_printer(self.opts, for_comic=True,
|
||||
output_file_name=outpath)
|
||||
printer.setDocName(mi.title)
|
||||
printer.setCreator(u'%s [%s]'%(__appname__, __version__))
|
||||
# Seems to be no way to set author
|
||||
|
@ -105,6 +105,7 @@ gprefs.defaults['show_files_after_save'] = True
|
||||
gprefs.defaults['auto_add_path'] = None
|
||||
gprefs.defaults['auto_add_check_for_duplicates'] = False
|
||||
gprefs.defaults['blocked_auto_formats'] = []
|
||||
gprefs.defaults['auto_add_auto_convert'] = True
|
||||
# }}}
|
||||
|
||||
NONE = QVariant() #: Null value to return from the data function of item models
|
||||
|
@ -71,7 +71,7 @@ class AddAction(InterfaceAction):
|
||||
ma('add-formats', _('Add files to selected book records'),
|
||||
triggered=self.add_formats, shortcut=_('Shift+A'))
|
||||
self.add_menu.addSeparator()
|
||||
ma('add-config', _('Configure the adding of books'),
|
||||
ma('add-config', _('Control the adding of books'),
|
||||
triggered=self.add_config)
|
||||
|
||||
self.qaction.triggered.connect(self.add_books)
|
||||
|
@ -53,6 +53,24 @@ class ConvertAction(InterfaceAction):
|
||||
self.queue_convert_jobs(jobs, changed, bad, rows, previous,
|
||||
self.book_auto_converted, extra_job_args=[on_card])
|
||||
|
||||
def auto_convert_auto_add(self, book_ids):
|
||||
previous = self.gui.library_view.currentIndex()
|
||||
db = self.gui.current_db
|
||||
needed = set()
|
||||
of = prefs['output_format'].lower()
|
||||
for book_id in book_ids:
|
||||
fmts = db.formats(book_id, index_is_id=True)
|
||||
fmts = set(x.lower() for x in fmts.split(',')) if fmts else set()
|
||||
if of not in fmts:
|
||||
needed.add(book_id)
|
||||
if needed:
|
||||
jobs, changed, bad = convert_single_ebook(self.gui,
|
||||
self.gui.library_view.model().db, needed, True, of,
|
||||
show_no_format_warning=False)
|
||||
if not jobs: return
|
||||
self.queue_convert_jobs(jobs, changed, bad, list(needed), previous,
|
||||
self.book_converted, rows_are_ids=True)
|
||||
|
||||
def auto_convert_mail(self, to, fmts, delete_from_library, book_ids, format, subject):
|
||||
previous = self.gui.library_view.currentIndex()
|
||||
rows = [x.row() for x in \
|
||||
@ -118,7 +136,7 @@ class ConvertAction(InterfaceAction):
|
||||
num, 2000)
|
||||
|
||||
def queue_convert_jobs(self, jobs, changed, bad, rows, previous,
|
||||
converted_func, extra_job_args=[]):
|
||||
converted_func, extra_job_args=[], rows_are_ids=False):
|
||||
for func, args, desc, fmt, id, temp_files in jobs:
|
||||
func, _, same_fmt = func.partition(':')
|
||||
same_fmt = same_fmt == 'same_fmt'
|
||||
@ -140,7 +158,11 @@ class ConvertAction(InterfaceAction):
|
||||
self.conversion_jobs[job] = tuple(args)
|
||||
|
||||
if changed:
|
||||
self.gui.library_view.model().refresh_rows(rows)
|
||||
m = self.gui.library_view.model()
|
||||
if rows_are_ids:
|
||||
m.refresh_ids(rows)
|
||||
else:
|
||||
m.refresh_rows(rows)
|
||||
current = self.gui.library_view.currentIndex()
|
||||
self.gui.library_view.model().current_changed(current, previous)
|
||||
|
||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
import os, shutil
|
||||
from functools import partial
|
||||
|
||||
from PyQt4.Qt import QMenu, QModelIndex, QTimer
|
||||
@ -16,6 +16,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm
|
||||
from calibre.gui2.dialogs.device_category_editor import DeviceCategoryEditor
|
||||
from calibre.gui2.actions import InterfaceAction
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.db.errors import NoSuchFormat
|
||||
|
||||
@ -79,14 +80,23 @@ class EditMetadataAction(InterfaceAction):
|
||||
Dispatcher(self.metadata_downloaded),
|
||||
ensure_fields=ensure_fields)
|
||||
|
||||
def cleanup_bulk_download(self, tdir):
|
||||
try:
|
||||
shutil.rmtree(tdir, ignore_errors=True)
|
||||
except:
|
||||
pass
|
||||
|
||||
def metadata_downloaded(self, job):
|
||||
if job.failed:
|
||||
self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
|
||||
return
|
||||
from calibre.gui2.metadata.bulk_download import get_job_details
|
||||
id_map, failed_ids, failed_covers, all_failed, det_msg = \
|
||||
get_job_details(job)
|
||||
(aborted, id_map, tdir, log_file, failed_ids, failed_covers, all_failed,
|
||||
det_msg, lm_map) = get_job_details(job)
|
||||
if aborted:
|
||||
return self.cleanup_bulk_download(tdir)
|
||||
if all_failed:
|
||||
self.cleanup_bulk_download(tdir)
|
||||
return error_dialog(self.gui, _('Download failed'),
|
||||
_('Failed to download metadata or covers for any of the %d'
|
||||
' book(s).') % len(id_map), det_msg=det_msg, show=True)
|
||||
@ -103,28 +113,26 @@ class EditMetadataAction(InterfaceAction):
|
||||
msg += '<p>'+_('Could not download metadata and/or covers for %d of the books. Click'
|
||||
' "Show details" to see which books.')%num
|
||||
|
||||
payload = (id_map, failed_ids, failed_covers)
|
||||
payload = (id_map, tdir, log_file, lm_map)
|
||||
from calibre.gui2.dialogs.message_box import ProceedNotification
|
||||
p = ProceedNotification(self.apply_downloaded_metadata,
|
||||
payload, job.html_details,
|
||||
payload, log_file,
|
||||
_('Download log'), _('Download complete'), msg,
|
||||
det_msg=det_msg, show_copy_button=show_copy_button,
|
||||
parent=self.gui)
|
||||
parent=self.gui, log_is_file=True)
|
||||
p.show()
|
||||
|
||||
def apply_downloaded_metadata(self, payload):
|
||||
id_map, failed_ids, failed_covers = payload
|
||||
id_map = dict([(k, v) for k, v in id_map.iteritems() if k not in
|
||||
failed_ids])
|
||||
if not id_map:
|
||||
good_ids, tdir, log_file, lm_map = payload
|
||||
if not good_ids:
|
||||
return
|
||||
|
||||
modified = set()
|
||||
db = self.gui.current_db
|
||||
|
||||
for i, mi in id_map.iteritems():
|
||||
for i in good_ids:
|
||||
lm = db.metadata_last_modified(i, index_is_id=True)
|
||||
if lm > mi.last_modified:
|
||||
if lm > lm_map[i]:
|
||||
title = db.title(i, index_is_id=True)
|
||||
authors = db.authors(i, index_is_id=True)
|
||||
if authors:
|
||||
@ -144,7 +152,18 @@ class EditMetadataAction(InterfaceAction):
|
||||
'Do you want to proceed?'), det_msg='\n'.join(modified)):
|
||||
return
|
||||
|
||||
self.apply_metadata_changes(id_map)
|
||||
id_map = {}
|
||||
for bid in good_ids:
|
||||
opf = os.path.join(tdir, '%d.mi'%bid)
|
||||
if not os.path.exists(opf):
|
||||
opf = None
|
||||
cov = os.path.join(tdir, '%d.cover'%bid)
|
||||
if not os.path.exists(cov):
|
||||
cov = None
|
||||
id_map[bid] = (opf, cov)
|
||||
|
||||
self.apply_metadata_changes(id_map, callback=lambda x:
|
||||
self.cleanup_bulk_download(tdir))
|
||||
|
||||
# }}}
|
||||
|
||||
@ -468,6 +487,11 @@ class EditMetadataAction(InterfaceAction):
|
||||
callback can be either None or a function accepting a single argument,
|
||||
in which case it is called after applying is complete with the list of
|
||||
changed ids.
|
||||
|
||||
id_map can also be a mapping of ids to 2-tuple's where each 2-tuple
|
||||
contains the absolute paths to an OPF and cover file respectively. If
|
||||
either of the paths is None, then the corresponding metadata is not
|
||||
updated.
|
||||
'''
|
||||
if title is None:
|
||||
title = _('Applying changed metadata')
|
||||
@ -492,28 +516,48 @@ class EditMetadataAction(InterfaceAction):
|
||||
return self.finalize_apply()
|
||||
|
||||
i, mi = self.apply_id_map[self.apply_current_idx]
|
||||
if isinstance(mi, tuple):
|
||||
opf, cover = mi
|
||||
if opf:
|
||||
mi = OPF(open(opf, 'rb'), basedir=os.path.dirname(opf),
|
||||
populate_spine=False).to_book_metadata()
|
||||
self.apply_mi(i, mi)
|
||||
if cover:
|
||||
self.gui.current_db.set_cover(i, open(cover, 'rb'),
|
||||
notify=False, commit=False)
|
||||
else:
|
||||
self.apply_mi(i, mi)
|
||||
|
||||
self.apply_current_idx += 1
|
||||
if self.apply_pd is not None:
|
||||
self.apply_pd.value += 1
|
||||
QTimer.singleShot(50, self.do_one_apply)
|
||||
|
||||
|
||||
def apply_mi(self, book_id, mi):
|
||||
db = self.gui.current_db
|
||||
|
||||
try:
|
||||
set_title = not mi.is_null('title')
|
||||
set_authors = not mi.is_null('authors')
|
||||
idents = db.get_identifiers(i, index_is_id=True)
|
||||
idents = db.get_identifiers(book_id, index_is_id=True)
|
||||
if mi.identifiers:
|
||||
idents.update(mi.identifiers)
|
||||
mi.identifiers = idents
|
||||
if mi.is_null('series'):
|
||||
mi.series_index = None
|
||||
if self._am_merge_tags:
|
||||
old_tags = db.tags(i, index_is_id=True)
|
||||
old_tags = db.tags(book_id, index_is_id=True)
|
||||
if old_tags:
|
||||
tags = [x.strip() for x in old_tags.split(',')] + (
|
||||
mi.tags if mi.tags else [])
|
||||
mi.tags = list(set(tags))
|
||||
db.set_metadata(i, mi, commit=False, set_title=set_title,
|
||||
db.set_metadata(book_id, mi, commit=False, set_title=set_title,
|
||||
set_authors=set_authors, notify=False)
|
||||
self.applied_ids.append(i)
|
||||
self.applied_ids.append(book_id)
|
||||
except:
|
||||
import traceback
|
||||
self.apply_failures.append((i, traceback.format_exc()))
|
||||
self.apply_failures.append((book_id, traceback.format_exc()))
|
||||
|
||||
try:
|
||||
if mi.cover:
|
||||
@ -521,11 +565,6 @@ class EditMetadataAction(InterfaceAction):
|
||||
except:
|
||||
pass
|
||||
|
||||
self.apply_current_idx += 1
|
||||
if self.apply_pd is not None:
|
||||
self.apply_pd.value += 1
|
||||
QTimer.singleShot(50, self.do_one_apply)
|
||||
|
||||
def finalize_apply(self):
|
||||
db = self.gui.current_db
|
||||
db.commit()
|
||||
|
@ -113,6 +113,7 @@ class Worker(Thread):
|
||||
class AutoAdder(QObject):
|
||||
|
||||
metadata_read = pyqtSignal(object)
|
||||
auto_convert = pyqtSignal(object)
|
||||
|
||||
def __init__(self, path, parent):
|
||||
QObject.__init__(self, parent)
|
||||
@ -124,6 +125,8 @@ class AutoAdder(QObject):
|
||||
self.metadata_read.connect(self.add_to_db,
|
||||
type=Qt.QueuedConnection)
|
||||
QTimer.singleShot(2000, self.initialize)
|
||||
self.auto_convert.connect(self.do_auto_convert,
|
||||
type=Qt.QueuedConnection)
|
||||
elif path:
|
||||
prints(path,
|
||||
'is not a valid directory to watch for new ebooks, ignoring')
|
||||
@ -163,6 +166,7 @@ class AutoAdder(QObject):
|
||||
|
||||
needs_rescan = False
|
||||
duplicates = []
|
||||
added_ids = set()
|
||||
|
||||
for fname, tdir in data.iteritems():
|
||||
paths = [os.path.join(self.worker.path, fname)]
|
||||
@ -187,9 +191,12 @@ class AutoAdder(QObject):
|
||||
continue
|
||||
mi = [OPF(open(mi, 'rb'), tdir,
|
||||
populate_spine=False).to_book_metadata()]
|
||||
dups, num = m.add_books(paths,
|
||||
dups, ids = m.add_books(paths,
|
||||
[os.path.splitext(fname)[1][1:].upper()], mi,
|
||||
add_duplicates=not gprefs['auto_add_check_for_duplicates'])
|
||||
add_duplicates=not gprefs['auto_add_check_for_duplicates'],
|
||||
return_ids=True)
|
||||
added_ids |= set(ids)
|
||||
num = len(ids)
|
||||
if dups:
|
||||
path = dups[0][0]
|
||||
with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()),
|
||||
@ -217,8 +224,10 @@ class AutoAdder(QObject):
|
||||
_('Books with the same title as the following already '
|
||||
'exist in the database. Add them anyway?'),
|
||||
'\n'.join(files)):
|
||||
dups, num = m.add_books(paths, formats, metadata,
|
||||
add_duplicates=True)
|
||||
dups, ids = m.add_books(paths, formats, metadata,
|
||||
add_duplicates=True, return_ids=True)
|
||||
added_ids |= set(ids)
|
||||
num = len(ids)
|
||||
count += num
|
||||
|
||||
for tdir in data.itervalues():
|
||||
@ -227,6 +236,9 @@ class AutoAdder(QObject):
|
||||
except:
|
||||
pass
|
||||
|
||||
if added_ids and gprefs['auto_add_auto_convert']:
|
||||
self.auto_convert.emit(added_ids)
|
||||
|
||||
if count > 0:
|
||||
m.books_added(count)
|
||||
gui.status_bar.show_message(_(
|
||||
@ -238,4 +250,7 @@ class AutoAdder(QObject):
|
||||
if needs_rescan:
|
||||
QTimer.singleShot(2000, self.dir_changed)
|
||||
|
||||
def do_auto_convert(self, added_ids):
|
||||
gui = self.parent()
|
||||
gui.iactions['Convert Books'].auto_convert_auto_add(added_ids)
|
||||
|
||||
|
@ -160,7 +160,7 @@ class ProceedNotification(MessageBox): # {{{
|
||||
|
||||
def __init__(self, callback, payload, html_log, log_viewer_title, title, msg,
|
||||
det_msg='', show_copy_button=False, parent=None,
|
||||
cancel_callback=None):
|
||||
cancel_callback=None, log_is_file=False):
|
||||
'''
|
||||
A non modal popup that notifies the user that a background task has
|
||||
been completed.
|
||||
@ -175,12 +175,15 @@ class ProceedNotification(MessageBox): # {{{
|
||||
:param title: The title for this popup
|
||||
:param msg: The msg to display
|
||||
:param det_msg: Detailed message
|
||||
:param log_is_file: If True the html_log parameter is interpreted as
|
||||
the path to a file on disk containing the log encoded with utf-8
|
||||
'''
|
||||
MessageBox.__init__(self, MessageBox.QUESTION, title, msg,
|
||||
det_msg=det_msg, show_copy_button=show_copy_button,
|
||||
parent=parent)
|
||||
self.payload = payload
|
||||
self.html_log = html_log
|
||||
self.log_is_file = log_is_file
|
||||
self.log_viewer_title = log_viewer_title
|
||||
|
||||
self.vlb = self.bb.addButton(_('View log'), self.bb.ActionRole)
|
||||
@ -192,7 +195,11 @@ class ProceedNotification(MessageBox): # {{{
|
||||
_proceed_memory.append(self)
|
||||
|
||||
def show_log(self):
|
||||
self.log_viewer = ViewLog(self.log_viewer_title, self.html_log,
|
||||
log = self.html_log
|
||||
if self.log_is_file:
|
||||
with open(log, 'rb') as f:
|
||||
log = f.read().decode('utf-8')
|
||||
self.log_viewer = ViewLog(self.log_viewer_title, log,
|
||||
parent=self)
|
||||
|
||||
def do_proceed(self, result):
|
||||
@ -202,9 +209,9 @@ class ProceedNotification(MessageBox): # {{{
|
||||
gui = get_gui()
|
||||
gui.proceed_requested.emit(func, self.payload)
|
||||
# Ensure this notification is garbage collected
|
||||
self.vlb.clicked.disconnect()
|
||||
self.callback = self.cancel_callback = self.payload = None
|
||||
self.setParent(None)
|
||||
self.vlb.clicked.disconnect()
|
||||
_proceed_memory.remove(self)
|
||||
|
||||
def done(self, r):
|
||||
|
@ -140,34 +140,6 @@
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="groupBox">
|
||||
<property name="maximumSize">
|
||||
<size>
|
||||
<width>16777215</width>
|
||||
<height>60</height>
|
||||
</size>
|
||||
</property>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_5">
|
||||
<item>
|
||||
<widget class="QLabel" name="label_51">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
|
||||
<horstretch>40</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string/>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>matchkind</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLabel" name="label_6">
|
||||
<property name="maximumSize">
|
||||
|
@ -402,7 +402,8 @@ class DetailView(QDialog, Ui_Dialog): # {{{
|
||||
self.setupUi(self)
|
||||
self.setWindowTitle(job.description)
|
||||
self.job = job
|
||||
self.html_view = hasattr(job, 'html_details')
|
||||
self.html_view = (hasattr(job, 'html_details') and not getattr(job,
|
||||
'ignore_html_details', False))
|
||||
if self.html_view:
|
||||
self.log.setVisible(False)
|
||||
else:
|
||||
|
@ -187,9 +187,10 @@ class BooksModel(QAbstractTableModel): # {{{
|
||||
self.db = None
|
||||
self.reset()
|
||||
|
||||
def add_books(self, paths, formats, metadata, add_duplicates=False):
|
||||
def add_books(self, paths, formats, metadata, add_duplicates=False,
|
||||
return_ids=False):
|
||||
ret = self.db.add_books(paths, formats, metadata,
|
||||
add_duplicates=add_duplicates)
|
||||
add_duplicates=add_duplicates, return_ids=return_ids)
|
||||
self.count_changed()
|
||||
return ret
|
||||
|
||||
|
@ -7,22 +7,41 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, time, shutil
|
||||
from functools import partial
|
||||
from itertools import izip
|
||||
from threading import Event
|
||||
|
||||
from PyQt4.Qt import (QIcon, QDialog,
|
||||
QDialogButtonBox, QLabel, QGridLayout, QPixmap, Qt)
|
||||
|
||||
from calibre.gui2.threaded_jobs import ThreadedJob
|
||||
from calibre.ebooks.metadata.sources.identify import identify, msprefs
|
||||
from calibre.ebooks.metadata.sources.covers import download_cover
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.date import as_utc
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
from calibre.ptempfile import (PersistentTemporaryDirectory,
|
||||
PersistentTemporaryFile)
|
||||
|
||||
# Start download {{{
|
||||
|
||||
class Job(ThreadedJob):
|
||||
|
||||
ignore_html_details = True
|
||||
|
||||
def consolidate_log(self):
|
||||
self.consolidated_log = self.log.plain_text
|
||||
self.log = None
|
||||
|
||||
def read_consolidated_log(self):
|
||||
return self.consolidated_log
|
||||
|
||||
@property
|
||||
def details(self):
|
||||
if self.consolidated_log is None:
|
||||
return self.log.plain_text
|
||||
return self.read_consolidated_log()
|
||||
|
||||
@property
|
||||
def log_file(self):
|
||||
return open(self.download_debug_log, 'rb')
|
||||
|
||||
def show_config(gui, parent):
|
||||
from calibre.gui2.preferences import show_config_widget
|
||||
show_config_widget('Sharing', 'Metadata download', parent=parent,
|
||||
@ -104,19 +123,22 @@ def start_download(gui, ids, callback, ensure_fields=None):
|
||||
d.b.clicked.disconnect()
|
||||
if ret != d.Accepted:
|
||||
return
|
||||
tf = PersistentTemporaryFile('_metadata_bulk_log_')
|
||||
tf.close()
|
||||
|
||||
for batch in split_jobs(ids):
|
||||
job = ThreadedJob('metadata bulk download',
|
||||
_('Download metadata for %d books')%len(batch),
|
||||
download, (batch, gui.current_db, d.identify, d.covers,
|
||||
ensure_fields), {}, callback)
|
||||
gui.job_manager.run_threaded_job(job)
|
||||
job = Job('metadata bulk download',
|
||||
_('Download metadata for %d books')%len(ids),
|
||||
download, (ids, tf.name, gui.current_db, d.identify, d.covers,
|
||||
ensure_fields), {}, callback)
|
||||
job.download_debug_log = tf.name
|
||||
gui.job_manager.run_threaded_job(job)
|
||||
gui.status_bar.show_message(_('Metadata download started'), 3000)
|
||||
|
||||
# }}}
|
||||
|
||||
def get_job_details(job):
|
||||
id_map, failed_ids, failed_covers, title_map, all_failed = job.result
|
||||
(aborted, good_ids, tdir, log_file, failed_ids, failed_covers, title_map,
|
||||
lm_map, all_failed) = job.result
|
||||
det_msg = []
|
||||
for i in failed_ids | failed_covers:
|
||||
title = title_map[i]
|
||||
@ -126,92 +148,89 @@ def get_job_details(job):
|
||||
title += (' ' + _('(Failed cover)'))
|
||||
det_msg.append(title)
|
||||
det_msg = '\n'.join(det_msg)
|
||||
return id_map, failed_ids, failed_covers, all_failed, det_msg
|
||||
return (aborted, good_ids, tdir, log_file, failed_ids, failed_covers,
|
||||
all_failed, det_msg, lm_map)
|
||||
|
||||
def merge_result(oldmi, newmi, ensure_fields=None):
|
||||
dummy = Metadata(_('Unknown'))
|
||||
for f in msprefs['ignore_fields']:
|
||||
if ':' in f or (ensure_fields and f in ensure_fields):
|
||||
continue
|
||||
setattr(newmi, f, getattr(dummy, f))
|
||||
fields = set()
|
||||
for plugin in metadata_plugins(['identify']):
|
||||
fields |= plugin.touched_fields
|
||||
class HeartBeat(object):
|
||||
CHECK_INTERVAL = 300 # seconds
|
||||
''' Check that the file count in tdir changes every five minutes '''
|
||||
|
||||
def is_equal(x, y):
|
||||
if hasattr(x, 'tzinfo'):
|
||||
x = as_utc(x)
|
||||
if hasattr(y, 'tzinfo'):
|
||||
y = as_utc(y)
|
||||
return x == y
|
||||
def __init__(self, tdir):
|
||||
self.tdir = tdir
|
||||
self.last_count = len(os.listdir(self.tdir))
|
||||
self.last_time = time.time()
|
||||
|
||||
for f in fields:
|
||||
# Optimize so that set_metadata does not have to do extra work later
|
||||
if not f.startswith('identifier:'):
|
||||
if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
|
||||
getattr(oldmi, f))):
|
||||
setattr(newmi, f, getattr(dummy, f))
|
||||
def __call__(self):
|
||||
if time.time() - self.last_time > self.CHECK_INTERVAL:
|
||||
c = len(os.listdir(self.tdir))
|
||||
if c == self.last_count:
|
||||
return False
|
||||
self.last_count = c
|
||||
self.last_time = time.time()
|
||||
return True
|
||||
|
||||
newmi.last_modified = oldmi.last_modified
|
||||
# Fix log viewer, ratings
|
||||
# Test: abort, covers only, metadata only, both, 200 entry download, memory
|
||||
# consumption, all errors and on and on
|
||||
|
||||
return newmi
|
||||
|
||||
def download(ids, db, do_identify, covers, ensure_fields,
|
||||
def download(all_ids, tf, db, do_identify, covers, ensure_fields,
|
||||
log=None, abort=None, notifications=None):
|
||||
ids = list(ids)
|
||||
metadata = [db.get_metadata(i, index_is_id=True, get_user_categories=False)
|
||||
for i in ids]
|
||||
batch_size = 10
|
||||
batches = split_jobs(all_ids, batch_size=batch_size)
|
||||
tdir = PersistentTemporaryDirectory('_metadata_bulk_')
|
||||
heartbeat = HeartBeat(tdir)
|
||||
|
||||
failed_ids = set()
|
||||
failed_covers = set()
|
||||
title_map = {}
|
||||
ans = {}
|
||||
count = 0
|
||||
lm_map = {}
|
||||
ans = set()
|
||||
all_failed = True
|
||||
'''
|
||||
# Test apply dialog
|
||||
all_failed = do_identify = covers = False
|
||||
'''
|
||||
for i, mi in izip(ids, metadata):
|
||||
aborted = False
|
||||
count = 0
|
||||
|
||||
for ids in batches:
|
||||
if abort.is_set():
|
||||
log.error('Aborting...')
|
||||
break
|
||||
title, authors, identifiers = mi.title, mi.authors, mi.identifiers
|
||||
title_map[i] = title
|
||||
if do_identify:
|
||||
results = []
|
||||
try:
|
||||
results = identify(log, Event(), title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
except:
|
||||
pass
|
||||
if results:
|
||||
all_failed = False
|
||||
mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
|
||||
identifiers = mi.identifiers
|
||||
if not mi.is_null('rating'):
|
||||
# set_metadata expects a rating out of 10
|
||||
mi.rating *= 2
|
||||
else:
|
||||
log.error('Failed to download metadata for', title)
|
||||
failed_ids.add(i)
|
||||
# We don't want set_metadata operating on anything but covers
|
||||
mi = merge_result(mi, mi, ensure_fields=ensure_fields)
|
||||
if covers:
|
||||
cdata = download_cover(log, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if cdata is not None:
|
||||
with PersistentTemporaryFile('.jpg', 'downloaded-cover-') as f:
|
||||
f.write(cdata[-1])
|
||||
mi.cover = f.name
|
||||
all_failed = False
|
||||
else:
|
||||
failed_covers.add(i)
|
||||
ans[i] = mi
|
||||
count += 1
|
||||
metadata = {i:db.get_metadata(i, index_is_id=True,
|
||||
get_user_categories=False) for i in ids}
|
||||
for i in ids:
|
||||
title_map[i] = metadata[i].title
|
||||
lm_map[i] = metadata[i].last_modified
|
||||
metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in
|
||||
metadata.iteritems()}
|
||||
try:
|
||||
ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main',
|
||||
(do_identify, covers, metadata, ensure_fields),
|
||||
cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True)
|
||||
except WorkerError as e:
|
||||
if e.orig_tb:
|
||||
raise Exception('Failed to download metadata. Original '
|
||||
'traceback: \n\n'+e.orig_tb)
|
||||
raise
|
||||
count += batch_size
|
||||
notifications.put((count/len(ids),
|
||||
_('Downloaded %(num)d of %(tot)d')%dict(num=count, tot=len(ids))))
|
||||
_('Downloaded %(num)d of %(tot)d')%dict(
|
||||
num=count, tot=len(all_ids))))
|
||||
|
||||
fids, fcovs, allf = ret['result']
|
||||
if not allf:
|
||||
all_failed = False
|
||||
failed_ids = failed_ids.union(fids)
|
||||
failed_covers = failed_covers.union(fcovs)
|
||||
ans = ans.union(set(ids) - fids)
|
||||
for book_id in ids:
|
||||
lp = os.path.join(tdir, '%d.log'%book_id)
|
||||
if os.path.exists(lp):
|
||||
with open(tf, 'ab') as dest, open(lp, 'rb') as src:
|
||||
dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] +
|
||||
'#'*20+'\n').encode('utf-8'))
|
||||
shutil.copyfileobj(src, dest)
|
||||
|
||||
if abort.is_set():
|
||||
aborted = True
|
||||
log('Download complete, with %d failures'%len(failed_ids))
|
||||
return (ans, failed_ids, failed_covers, title_map, all_failed)
|
||||
|
||||
|
||||
return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map,
|
||||
lm_map, all_failed)
|
||||
|
||||
|
@ -161,10 +161,10 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
self.manage_authors_button.clicked.connect(self.authors.manage_authors)
|
||||
|
||||
self.series = SeriesEdit(self)
|
||||
self.remove_unused_series_button = QToolButton(self)
|
||||
self.remove_unused_series_button.setToolTip(
|
||||
_('Remove unused series (Series that have no books)') )
|
||||
self.remove_unused_series_button.clicked.connect(self.remove_unused_series)
|
||||
self.clear_series_button = QToolButton(self)
|
||||
self.clear_series_button.setToolTip(
|
||||
_('Clear series') )
|
||||
self.clear_series_button.clicked.connect(self.series.clear)
|
||||
self.series_index = SeriesIndexEdit(self, self.series)
|
||||
self.basic_metadata_widgets.extend([self.series, self.series_index])
|
||||
|
||||
@ -198,6 +198,7 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
self.basic_metadata_widgets.append(self.identifiers)
|
||||
self.clear_identifiers_button = QToolButton(self)
|
||||
self.clear_identifiers_button.setIcon(QIcon(I('trash.png')))
|
||||
self.clear_identifiers_button.setToolTip(_('Clear Ids'))
|
||||
self.clear_identifiers_button.clicked.connect(self.identifiers.clear)
|
||||
self.paste_isbn_button = QToolButton(self)
|
||||
self.paste_isbn_button.setToolTip('<p>' +
|
||||
@ -303,17 +304,6 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
self.title_sort.auto_generate()
|
||||
self.author_sort.auto_generate()
|
||||
|
||||
def remove_unused_series(self, *args):
|
||||
self.db.remove_unused_series()
|
||||
idx = self.series.current_val
|
||||
self.series.clear()
|
||||
self.series.initialize(self.db, self.book_id)
|
||||
if idx:
|
||||
for i in range(self.series.count()):
|
||||
if unicode(self.series.itemText(i)) == idx:
|
||||
self.series.setCurrentIndex(i)
|
||||
break
|
||||
|
||||
def tags_editor(self, *args):
|
||||
self.tags.edit(self.db, self.book_id)
|
||||
|
||||
@ -591,7 +581,7 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
|
||||
sto(self.title_sort, self.authors)
|
||||
create_row(1, self.authors, self.deduce_author_sort_button, self.author_sort)
|
||||
sto(self.author_sort, self.series)
|
||||
create_row(2, self.series, self.remove_unused_series_button,
|
||||
create_row(2, self.series, self.clear_series_button,
|
||||
self.series_index, icon='trash.png')
|
||||
sto(self.series_index, self.swap_title_author_button)
|
||||
sto(self.swap_title_author_button, self.manage_authors_button)
|
||||
@ -756,7 +746,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
|
||||
span=2, icon='auto_author_sort.png')
|
||||
create_row(3, self.author_sort, self.series)
|
||||
create_row(4, self.series, self.series_index,
|
||||
button=self.remove_unused_series_button, icon='trash.png')
|
||||
button=self.clear_series_button, icon='trash.png')
|
||||
create_row(5, self.series_index, self.tags)
|
||||
create_row(6, self.tags, self.rating, button=self.tags_editor_button)
|
||||
create_row(7, self.rating, self.pubdate)
|
||||
@ -892,7 +882,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
|
||||
span=2, icon='auto_author_sort.png')
|
||||
create_row(3, self.author_sort, self.series)
|
||||
create_row(4, self.series, self.series_index,
|
||||
button=self.remove_unused_series_button, icon='trash.png')
|
||||
button=self.clear_series_button, icon='trash.png')
|
||||
create_row(5, self.series_index, self.tags)
|
||||
create_row(6, self.tags, self.rating, button=self.tags_editor_button)
|
||||
create_row(7, self.rating, self.pubdate)
|
||||
|
@ -36,6 +36,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
||||
r('new_book_tags', prefs, setting=CommaSeparatedList)
|
||||
r('auto_add_path', gprefs, restart_required=True)
|
||||
r('auto_add_check_for_duplicates', gprefs)
|
||||
r('auto_add_auto_convert', gprefs)
|
||||
|
||||
self.filename_pattern = FilenamePattern(self)
|
||||
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
|
||||
|
@ -151,6 +151,19 @@ Author matching is exact.</string>
|
||||
<string>&Automatic Adding</string>
|
||||
</attribute>
|
||||
<layout class="QGridLayout" name="gridLayout_3">
|
||||
<item row="3" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="opt_auto_add_check_for_duplicates">
|
||||
<property name="toolTip">
|
||||
<string>If set, this option will causes calibre to check if a file
|
||||
being auto-added is already in the calibre library.
|
||||
If it is, a meesage will pop up asking you whether
|
||||
you want to add it anyway.</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Check for &duplicates when auto-adding files</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="0" colspan="2">
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
@ -168,7 +181,7 @@ Author matching is exact.</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="0">
|
||||
<item row="5" column="0">
|
||||
<widget class="QGroupBox" name="groupBox">
|
||||
<property name="title">
|
||||
<string>Ignore files with the following extensions when automatically adding </string>
|
||||
@ -187,7 +200,7 @@ Author matching is exact.</string>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="1">
|
||||
<item row="5" column="1">
|
||||
<spacer name="horizontalSpacer_2">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
@ -225,16 +238,10 @@ Author matching is exact.</string>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="3" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="opt_auto_add_check_for_duplicates">
|
||||
<property name="toolTip">
|
||||
<string>If set, this option will causes calibre to check if a file
|
||||
being auto-added is already in the calibre library.
|
||||
If it is, a meesage will pop up asking you whether
|
||||
you want to add it anyway.</string>
|
||||
</property>
|
||||
<item row="4" column="0">
|
||||
<widget class="QCheckBox" name="opt_auto_add_auto_convert">
|
||||
<property name="text">
|
||||
<string>Check for &duplicates when auto-adding files</string>
|
||||
<string>Automatically &convert added files to the current output format</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -73,11 +73,13 @@ class OpenSearchOPDSStore(StorePlugin):
|
||||
type = link.get('type')
|
||||
|
||||
if rel and href and type:
|
||||
if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
|
||||
if 'http://opds-spec.org/thumbnail' in rel:
|
||||
s.cover_url = href
|
||||
elif rel == u'http://opds-spec.org/acquisition/buy':
|
||||
elif 'http://opds-spec.org/image/thumbnail' in rel:
|
||||
s.cover_url = href
|
||||
elif 'http://opds-spec.org/acquisition/buy' in rel:
|
||||
s.detail_item = href
|
||||
elif rel == u'http://opds-spec.org/acquisition':
|
||||
elif 'http://opds-spec.org/acquisition' in rel:
|
||||
if type:
|
||||
ext = mimetypes.guess_extension(type)
|
||||
if ext:
|
||||
|
@ -25,7 +25,7 @@ from calibre.ebooks.conversion.config import GuiRecommendations, \
|
||||
from calibre.gui2.convert import bulk_defaults_for_input_format
|
||||
|
||||
def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{
|
||||
out_format=None):
|
||||
out_format=None, show_no_format_warning=True):
|
||||
changed = False
|
||||
jobs = []
|
||||
bad = []
|
||||
@ -91,7 +91,7 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{
|
||||
except NoSupportedInputFormats:
|
||||
bad.append(book_id)
|
||||
|
||||
if bad != []:
|
||||
if bad and show_no_format_warning:
|
||||
res = []
|
||||
for id in bad:
|
||||
title = db.title(id, True)
|
||||
|
@ -3243,7 +3243,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
return id
|
||||
|
||||
|
||||
def add_books(self, paths, formats, metadata, add_duplicates=True):
|
||||
def add_books(self, paths, formats, metadata, add_duplicates=True,
|
||||
return_ids=False):
|
||||
'''
|
||||
Add a book to the database. The result cache is not updated.
|
||||
:param:`paths` List of paths to book files or file-like objects
|
||||
@ -3289,7 +3290,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
formats = list(duplicate[1] for duplicate in duplicates)
|
||||
metadata = list(duplicate[2] for duplicate in duplicates)
|
||||
return (paths, formats, metadata), len(ids)
|
||||
return None, len(ids)
|
||||
return None, (ids if return_ids else len(ids))
|
||||
|
||||
def import_book(self, mi, formats, notify=True, import_hooks=True,
|
||||
apply_import_tags=True, preserve_uuid=False):
|
||||
|
@ -648,7 +648,10 @@ class BasicNewsRecipe(Recipe):
|
||||
'url' : URL of print version,
|
||||
'date' : The publication date of the article as a string,
|
||||
'description' : A summary of the article
|
||||
'content' : The full article (can be an empty string). This is used by FullContentProfile
|
||||
'content' : The full article (can be an empty string). Obsolete
|
||||
do not use, instead save the content to a temporary
|
||||
file and pass a file:///path/to/temp/file.html as
|
||||
the URL.
|
||||
}
|
||||
|
||||
For an example, see the recipe for downloading `The Atlantic`.
|
||||
|
Loading…
x
Reference in New Issue
Block a user