mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
0.8.45+
This commit is contained in:
commit
bf2850019e
@ -13,7 +13,7 @@ class HighCountryNews(BasicNewsRecipe):
|
|||||||
__author__ = 'Armin Geller' # 2012-01-31
|
__author__ = 'Armin Geller' # 2012-01-31
|
||||||
publisher = 'High Country News'
|
publisher = 'High Country News'
|
||||||
timefmt = ' [%a, %d %b %Y]'
|
timefmt = ' [%a, %d %b %Y]'
|
||||||
language = 'en-Us'
|
language = 'en'
|
||||||
encoding = 'UTF-8'
|
encoding = 'UTF-8'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
@ -1,45 +1,73 @@
|
|||||||
# Talking Points is not grabbing everything.
|
import string, re
|
||||||
# The look is right, but only the last one added?
|
|
||||||
import re
|
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
|
# above for debugging via stack
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
# Allows the Python soup converter, which makes parsing easier.
|
# Allows the Python soup converter, which makes parsing easier.
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
# strip ads and graphics
|
|
||||||
# Current Column lacks a title.
|
import os, time, traceback, re, urlparse, sys, cStringIO
|
||||||
# Talking Points Memo - shorten title - Remove year and Bill's name
|
from collections import defaultdict
|
||||||
|
from functools import partial
|
||||||
|
from contextlib import nested, closing
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
|
||||||
|
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
|
||||||
|
|
||||||
|
|
||||||
|
# To Do: strip ads and graphics, Current Column lacks a title.
|
||||||
# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
|
# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
|
||||||
# Newsletters: Talking Points Memos covered by cat12
|
# Newsletters: Talking Points Memos covered by cat12
|
||||||
|
# ./ebook-convert --username xxx --password xxx
|
||||||
|
|
||||||
|
# this is derived from BasicNewsRecipe, so it can only overload those.
|
||||||
|
# Soome of what we need is otherwise in article, so we have more copy to do than otherwise.
|
||||||
class OReillyPremium(BasicNewsRecipe):
|
class OReillyPremium(BasicNewsRecipe):
|
||||||
title = u'OReilly Premium'
|
title = u'OReilly Premium'
|
||||||
__author__ = 'TMcN'
|
__author__ = 'TMcN'
|
||||||
language = 'en'
|
|
||||||
description = 'Retrieves Premium and News Letter content from BillOReilly.com. Requires a Bill OReilly Premium Membership.'
|
description = 'Retrieves Premium and News Letter content from BillOReilly.com. Requires a Bill OReilly Premium Membership.'
|
||||||
cover_url = 'http://images.billoreilly.com/images/headers/billgray_header.png'
|
cover_url = 'http://images.billoreilly.com/images/headers/billgray_header.png'
|
||||||
|
custom_title = 'Bill O\'Reilly Premium - '+ time.strftime('%d %b %Y')
|
||||||
|
title = 'Bill O\'Reilly Premium'
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
conversion_options = {'linearize_tables': True}
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
needs_subscription = True
|
language = 'en'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
oldest_article = 20
|
needs_subscription = True
|
||||||
|
oldest_article = 31
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_tags = [dict(name='img', attrs={})]
|
remove_tags = [dict(name='img', attrs={})]
|
||||||
# Don't go down
|
# Don't go down
|
||||||
recursions = 0
|
recursions = 0
|
||||||
max_articles_per_feed = 2000
|
max_articles_per_feed = 20
|
||||||
|
|
||||||
debugMessages = True
|
debugMessages = True
|
||||||
|
|
||||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||||
catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []],
|
catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []],
|
||||||
["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
|
# ["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
|
||||||
["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
|
# ["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
|
||||||
["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
|
# ["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
|
||||||
["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
|
# ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
|
||||||
["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []]
|
["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'No Spin', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=7'),
|
||||||
|
(u'Daily Briefing', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=11'),
|
||||||
|
(u'Talking Points', u'https://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=12'),
|
||||||
|
(u'Blog', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=0'),
|
||||||
|
(u'StratFor', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=5')
|
||||||
|
]
|
||||||
|
# http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=8 is word for the day.
|
||||||
|
|
||||||
|
# Note: Talking Points is broken in the above model; the site changed to more Ajax-y.
|
||||||
|
# Now using RSS
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
|
print("In get_browser")
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp')
|
br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp')
|
||||||
@ -66,6 +94,7 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
def stripBadChars(self, inString) :
|
def stripBadChars(self, inString) :
|
||||||
return inString.replace("\'", "")
|
return inString.replace("\'", "")
|
||||||
|
|
||||||
|
|
||||||
def parseGeneric(self, baseURL):
|
def parseGeneric(self, baseURL):
|
||||||
# Does a generic parsing of the articles. There are six categories (0-5)
|
# Does a generic parsing of the articles. There are six categories (0-5)
|
||||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||||
@ -73,6 +102,7 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
fullReturn = []
|
fullReturn = []
|
||||||
for i in range(len(self.catList)) :
|
for i in range(len(self.catList)) :
|
||||||
articleList = []
|
articleList = []
|
||||||
|
print("In "+self.catList[i][0]+", index: "+ str(i))
|
||||||
soup = self.index_to_soup(self.catList[i][1])
|
soup = self.index_to_soup(self.catList[i][1])
|
||||||
# Set defaults
|
# Set defaults
|
||||||
description = 'None'
|
description = 'None'
|
||||||
@ -81,14 +111,12 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
# 3-5 create one.
|
# 3-5 create one.
|
||||||
# So no for-div for 3-5
|
# So no for-div for 3-5
|
||||||
|
|
||||||
if i < 3 :
|
if i == 0 :
|
||||||
|
print("Starting TV Archives")
|
||||||
for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
|
for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
|
||||||
|
print("Next DIV:")
|
||||||
print(div)
|
print(div)
|
||||||
if i == 1:
|
a = div
|
||||||
a = div.find('a', href=True)
|
|
||||||
else :
|
|
||||||
a = div
|
|
||||||
print(a)
|
|
||||||
summary = div.find(True, attrs={'class':'summary'})
|
summary = div.find(True, attrs={'class':'summary'})
|
||||||
if summary:
|
if summary:
|
||||||
description = self.tag_to_string(summary, use_alt=False)
|
description = self.tag_to_string(summary, use_alt=False)
|
||||||
@ -96,82 +124,63 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
# url = baseURL+re.sub(r'\?.*', '', a['href'])
|
# url = baseURL+re.sub(r'\?.*', '', a['href'])
|
||||||
url = baseURL+a['href']
|
url = baseURL+a['href']
|
||||||
if i < 2 :
|
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
elif i == 2 :
|
|
||||||
# Daily Briefs
|
|
||||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
|
||||||
title = div.contents[0]
|
|
||||||
if self.debugMessages :
|
|
||||||
print(title+" @ "+url)
|
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||||
|
|
||||||
elif i == 3 : # Stratfor
|
|
||||||
a = soup.find('a', self.catList[i][3])
|
|
||||||
if a is None :
|
|
||||||
continue
|
|
||||||
url = baseURL+a['href']
|
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
# Get Stratfor contents so we can get the real title.
|
|
||||||
stratSoup = self.index_to_soup(url)
|
|
||||||
title = stratSoup.html.head.title.string
|
|
||||||
stratIndex = title.find('Stratfor.com:', 0)
|
|
||||||
if (stratIndex > -1) :
|
|
||||||
title = title[stratIndex+14:-1]
|
|
||||||
# Look for first blogBody <td class="blogBody"
|
|
||||||
# Changed 12 Jan 2012 - new page format
|
|
||||||
#stratBlogTable = stratSoup.find('td', {'class':['blogBody']}).findParent('table')
|
|
||||||
#stratBody = stratSoup.find('td', {'class':['blogBody']})
|
|
||||||
elif i == 4 : # Talking Points
|
|
||||||
topDate = soup.find("td", "blogBody")
|
|
||||||
if not topDate :
|
|
||||||
print("Failed to find date in Talking Points")
|
|
||||||
# This page has the contents in double-wrapped tables!
|
|
||||||
myTable = topDate.findParents('table')[0]
|
|
||||||
if myTable is not None:
|
|
||||||
upOneTable = myTable.findParents('table')[0]
|
|
||||||
if upOneTable is not None:
|
|
||||||
upTwo = upOneTable.findParents('table')[0]
|
|
||||||
if upTwo is None:
|
|
||||||
continue
|
|
||||||
# Now navigate rows of upTwo
|
|
||||||
if self.debugMessages :
|
|
||||||
print("Entering rows")
|
|
||||||
for rows in upTwo.findChildren("tr", recursive=False):
|
|
||||||
# Inside top level table, each row is an article
|
|
||||||
rowTable = rows.find("table")
|
|
||||||
articleTable = rowTable.find("table")
|
|
||||||
# This looks wrong.
|
|
||||||
articleTable = rows.find("tr")
|
|
||||||
# The middle table is just for formatting the article buffer... but this means we can skip the inner table.
|
|
||||||
blogDate = articleTable.find("a","blogDate").contents[0]
|
|
||||||
# Skip to second blogBody for this.
|
|
||||||
blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
|
|
||||||
blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
|
|
||||||
url = baseURL+re.sub(r'\?.*', '', blogURL)
|
|
||||||
title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
|
|
||||||
if self.debugMessages :
|
|
||||||
print("Talking Points Memo title "+title+" at url: "+url)
|
|
||||||
pubdate = time.strftime('%a, %d %b')
|
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description='None', content=''))
|
|
||||||
else : # Current Column
|
else : # Current Column
|
||||||
titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
|
titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
|
||||||
if titleSpan is None :
|
if titleSpan is None :
|
||||||
|
print("No Current Column Title Span")
|
||||||
|
print(soup)
|
||||||
continue
|
continue
|
||||||
title = titleSpan.contents[0]
|
title = titleSpan.contents[0]
|
||||||
url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
|
url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
|
||||||
if i == 3 or i == 5 :
|
if i == 1 :
|
||||||
if self.debugMessages :
|
if self.debugMessages :
|
||||||
print(self.catList[i][0]+" Title:"+title+" at url: "+url)
|
print(self.catList[i][0]+" Title:"+title+" at url: "+url)
|
||||||
summary = div.find(True, attrs={'class':'summary'})
|
summary = div.find(True, attrs={'class':'summary'})
|
||||||
if summary:
|
print("At Summary")
|
||||||
|
print(summary)
|
||||||
|
if summary is not None:
|
||||||
description = self.tag_to_string(summary, use_alt=False)
|
description = self.tag_to_string(summary, use_alt=False)
|
||||||
|
print("At append")
|
||||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||||
self.catList[i][3] = articleList
|
self.catList[i][3] = articleList
|
||||||
fullReturn.append((self.catList[i][0], articleList))
|
fullReturn.append((self.catList[i][0], articleList))
|
||||||
|
print("Returning")
|
||||||
|
# print fullReturn
|
||||||
return fullReturn
|
return fullReturn
|
||||||
|
|
||||||
|
|
||||||
|
# build_index() starts with:
|
||||||
|
# try:
|
||||||
|
# feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
|
||||||
|
# max_articles_per_feed=self.max_articles_per_feed,
|
||||||
|
# log=self.log)
|
||||||
|
# self.report_progress(0, _('Got feeds from index page'))
|
||||||
|
# except NotImplementedError:
|
||||||
|
# feeds = self.parse_feeds()
|
||||||
|
|
||||||
|
# which in turn is from __init__.py
|
||||||
|
#def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100,
|
||||||
|
# log=default_log):
|
||||||
|
#'''
|
||||||
|
#@param index: A parsed index as returned by L{BasicNewsRecipe.parse_index}.
|
||||||
|
#@return: A list of L{Feed} objects.
|
||||||
|
#@rtype: list
|
||||||
|
#'''
|
||||||
|
#feeds = []
|
||||||
|
#for title, articles in index:
|
||||||
|
# pfeed = Feed(log=log)
|
||||||
|
# pfeed.populate_from_preparsed_feed(title, articles, oldest_article=oldest_article,
|
||||||
|
# max_articles_per_feed=max_articles_per_feed)
|
||||||
|
# feeds.append(pfeed)
|
||||||
|
# return feeds
|
||||||
|
|
||||||
|
# use_embedded_content defaults to None, at which point if the content is > 2K, it is used as the article.
|
||||||
|
|
||||||
|
|
||||||
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
|
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
|
||||||
# returns a list of tuple ('feed title', list of articles)
|
# returns a list of tuple ('feed title', list of articles)
|
||||||
# {
|
# {
|
||||||
@ -182,12 +191,19 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
||||||
# }
|
# }
|
||||||
# this is used instead of BasicNewsRecipe.parse_feeds().
|
# this is used instead of BasicNewsRecipe.parse_feeds().
|
||||||
|
# it is called by download
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# Parse the page into Python Soup
|
# Parse the page into Python Soup
|
||||||
|
print("Entering recipe print_index from:")
|
||||||
|
traceback.print_stack()
|
||||||
|
print("web")
|
||||||
baseURL = "https://www.billoreilly.com"
|
baseURL = "https://www.billoreilly.com"
|
||||||
return self.parseGeneric(baseURL)
|
masterList = self.parseGeneric(baseURL)
|
||||||
|
#print(masterList)
|
||||||
|
return masterList
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
print("In preprocess_html")
|
||||||
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
||||||
if refresh is None:
|
if refresh is None:
|
||||||
return soup
|
return soup
|
||||||
@ -195,3 +211,128 @@ class OReillyPremium(BasicNewsRecipe):
|
|||||||
raw = self.browser.open('https://www.billoreilly.com'+content).read()
|
raw = self.browser.open('https://www.billoreilly.com'+content).read()
|
||||||
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
||||||
|
|
||||||
|
def build_index(self):
|
||||||
|
print("In OReilly build_index()\n\n")
|
||||||
|
feedsRSS = []
|
||||||
|
self.report_progress(0, _('Fetching feeds...'))
|
||||||
|
#try:
|
||||||
|
feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
|
||||||
|
max_articles_per_feed=self.max_articles_per_feed,
|
||||||
|
log=self.log)
|
||||||
|
self.report_progress(0, _('Got feeds from index page'))
|
||||||
|
#except NotImplementedError:
|
||||||
|
# feeds = self.parse_feeds()
|
||||||
|
# Now add regular feeds.
|
||||||
|
feedsRSS = self.parse_feeds()
|
||||||
|
print ("feedsRSS is type "+feedsRSS.__class__.__name__)
|
||||||
|
|
||||||
|
for articles in feedsRSS:
|
||||||
|
print("articles is type "+articles.__class__.__name__)
|
||||||
|
print("Title:" + articles.title)
|
||||||
|
feeds.append(articles)
|
||||||
|
if not feeds:
|
||||||
|
raise ValueError('No articles found, aborting')
|
||||||
|
|
||||||
|
#feeds = FeedCollection(feeds)
|
||||||
|
|
||||||
|
self.report_progress(0, _('Trying to download cover...'))
|
||||||
|
self.download_cover()
|
||||||
|
self.report_progress(0, _('Generating masthead...'))
|
||||||
|
self.masthead_path = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
murl = self.get_masthead_url()
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to get masthead url')
|
||||||
|
murl = None
|
||||||
|
|
||||||
|
if murl is not None:
|
||||||
|
# Try downloading the user-supplied masthead_url
|
||||||
|
# Failure sets self.masthead_path to None
|
||||||
|
self.download_masthead(murl)
|
||||||
|
if self.masthead_path is None:
|
||||||
|
self.log.info("Synthesizing mastheadImage")
|
||||||
|
self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg')
|
||||||
|
try:
|
||||||
|
self.default_masthead_image(self.masthead_path)
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to generate default masthead image')
|
||||||
|
self.masthead_path = None
|
||||||
|
|
||||||
|
if self.test:
|
||||||
|
feeds = feeds[:2]
|
||||||
|
self.has_single_feed = len(feeds) == 1
|
||||||
|
|
||||||
|
index = os.path.join(self.output_dir, 'index.html')
|
||||||
|
|
||||||
|
html = self.feeds2index(feeds)
|
||||||
|
with open(index, 'wb') as fi:
|
||||||
|
fi.write(html)
|
||||||
|
|
||||||
|
self.jobs = []
|
||||||
|
|
||||||
|
if self.reverse_article_order:
|
||||||
|
for feed in feeds:
|
||||||
|
if hasattr(feed, 'reverse'):
|
||||||
|
feed.reverse()
|
||||||
|
|
||||||
|
self.feed_objects = feeds
|
||||||
|
for f, feed in enumerate(feeds):
|
||||||
|
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||||
|
if not os.path.isdir(feed_dir):
|
||||||
|
os.makedirs(feed_dir)
|
||||||
|
|
||||||
|
for a, article in enumerate(feed):
|
||||||
|
if a >= self.max_articles_per_feed:
|
||||||
|
break
|
||||||
|
art_dir = os.path.join(feed_dir, 'article_%d'%a)
|
||||||
|
if not os.path.isdir(art_dir):
|
||||||
|
os.makedirs(art_dir)
|
||||||
|
try:
|
||||||
|
url = self.print_version(article.url)
|
||||||
|
except NotImplementedError:
|
||||||
|
url = article.url
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to find print version for: '+article.url)
|
||||||
|
url = None
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
func, arg = (self.fetch_embedded_article, article) \
|
||||||
|
if self.use_embedded_content or (self.use_embedded_content == None and feed.has_embedded_content()) \
|
||||||
|
else \
|
||||||
|
((self.fetch_obfuscated_article if self.articles_are_obfuscated \
|
||||||
|
else self.fetch_article), url)
|
||||||
|
req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
|
||||||
|
{}, (f, a), self.article_downloaded,
|
||||||
|
self.error_in_article_download)
|
||||||
|
req.feed = feed
|
||||||
|
req.article = article
|
||||||
|
req.feed_dir = feed_dir
|
||||||
|
self.jobs.append(req)
|
||||||
|
|
||||||
|
|
||||||
|
self.jobs_done = 0
|
||||||
|
tp = ThreadPool(self.simultaneous_downloads)
|
||||||
|
for req in self.jobs:
|
||||||
|
tp.putRequest(req, block=True, timeout=0)
|
||||||
|
|
||||||
|
|
||||||
|
self.report_progress(0, _('Starting download [%d thread(s)]...')%self.simultaneous_downloads)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
tp.poll()
|
||||||
|
time.sleep(0.1)
|
||||||
|
except NoResultsPending:
|
||||||
|
break
|
||||||
|
for f, feed in enumerate(feeds):
|
||||||
|
print("Writing feeds for "+feed.title)
|
||||||
|
html = self.feed2index(f,feeds)
|
||||||
|
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||||
|
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
|
||||||
|
fi.write(html)
|
||||||
|
self.create_opf(feeds)
|
||||||
|
self.report_progress(1, _('Feeds downloaded to %s')%index)
|
||||||
|
|
||||||
|
return index
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
# Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
|
# Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
|
||||||
|
import string, re
|
||||||
import time
|
import time
|
||||||
|
from urlparse import urlparse
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import NavigableString
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
||||||
|
|
||||||
class RealClear(BasicNewsRecipe):
|
class RealClear(BasicNewsRecipe):
|
||||||
title = u'Real Clear'
|
title = u'Real Clear'
|
||||||
@ -20,12 +22,13 @@ class RealClear(BasicNewsRecipe):
|
|||||||
# Don't go down
|
# Don't go down
|
||||||
recursions = 0
|
recursions = 0
|
||||||
max_articles_per_feed = 400
|
max_articles_per_feed = 400
|
||||||
debugMessages = False
|
debugMessages = True
|
||||||
|
|
||||||
# Numeric parameter is type, controls whether we look for
|
# Numeric parameter is type, controls whether we look for
|
||||||
feedsets = [
|
feedsets = [
|
||||||
["Politics", "http://www.realclearpolitics.com/index.xml", 0],
|
["Politics", "http://www.realclearpolitics.com/index.xml", 0],
|
||||||
["Science", "http://www.realclearscience.com/index.xml", 0],
|
["Policy", "http://www.realclearpolicy.com/index.xml", 0],
|
||||||
|
["Science", "http://www.realclearscience.com/index.xml", 0],
|
||||||
["Tech", "http://www.realcleartechnology.com/index.xml", 0],
|
["Tech", "http://www.realcleartechnology.com/index.xml", 0],
|
||||||
# The feedburner is essentially the same as the top feed, politics.
|
# The feedburner is essentially the same as the top feed, politics.
|
||||||
# ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
|
# ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
|
||||||
@ -37,7 +40,9 @@ class RealClear(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
# Hints to extractPrintURL.
|
# Hints to extractPrintURL.
|
||||||
# First column is the URL snippet. Then the string to search for as text, and the attributes to look for above it. Start with attributes and drill down.
|
# First column is the URL snippet. Then the string to search for as text, and the attributes to look for above it. Start with attributes and drill down.
|
||||||
printhints = [
|
phUrlSnip, phLinkText, phMainSearch, phHrefSearch = range(4)
|
||||||
|
|
||||||
|
printhints = [ ["realclear", "", '' , 'printpage'],
|
||||||
["billoreilly.com", "Print this entry", 'a', ''],
|
["billoreilly.com", "Print this entry", 'a', ''],
|
||||||
["billoreilly.com", "Print This Article", 'a', ''],
|
["billoreilly.com", "Print This Article", 'a', ''],
|
||||||
["politico.com", "Print", 'a', 'share-print'],
|
["politico.com", "Print", 'a', 'share-print'],
|
||||||
@ -48,11 +53,24 @@ class RealClear(BasicNewsRecipe):
|
|||||||
# usatoday - just prints with all current crap anyhow
|
# usatoday - just prints with all current crap anyhow
|
||||||
|
|
||||||
]
|
]
|
||||||
|
# RCP - look for a strange compound. See http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879.html
|
||||||
|
# The print link isn't obvious, and only the end is needed (the -full append.) SO maybe try that first?s
|
||||||
|
# http://www.realclearpolitics.com/printpage/?url=http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879-full.html
|
||||||
|
# Single page articles don't have a _full; e.g. http://www.realclearpolitics.com/articles/2012/01/25/obamas_green_robber_barons_112897.html
|
||||||
|
# Use the FULL PRINTPAGE URL; it formats it better too!
|
||||||
|
#
|
||||||
|
# NYT - try single page...
|
||||||
|
# Need special code - is it one page or several? Which URL?
|
||||||
|
# from http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1
|
||||||
|
# to http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1&pagewanted=all
|
||||||
|
# which is at link rel="canonical" and at <meta property="og:url" or look for "Single Page"
|
||||||
|
|
||||||
# Returns the best-guess print url.
|
# Returns the best-guess print url.
|
||||||
# The second parameter (pageURL) is returned if nothing is found.
|
# The second parameter (pageURL) is returned if nothing is found.
|
||||||
def extractPrintURL(self, pageURL):
|
def extractPrintURL(self, pageURL):
|
||||||
tagURL = pageURL
|
tagURL = pageURL
|
||||||
|
baseParse = urlparse(pageURL)
|
||||||
|
baseURL = baseParse[0]+"://"+baseParse[1]
|
||||||
hintsCount =len(self.printhints)
|
hintsCount =len(self.printhints)
|
||||||
for x in range(0,hintsCount):
|
for x in range(0,hintsCount):
|
||||||
if pageURL.find(self.printhints[x][0])== -1 :
|
if pageURL.find(self.printhints[x][0])== -1 :
|
||||||
@ -62,23 +80,37 @@ class RealClear(BasicNewsRecipe):
|
|||||||
soup = self.index_to_soup(pageURL)
|
soup = self.index_to_soup(pageURL)
|
||||||
if soup is None:
|
if soup is None:
|
||||||
return pageURL
|
return pageURL
|
||||||
if len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
|
if len(self.printhints[x][self.phHrefSearch])>0 and len(self.printhints[x][self.phLinkText]) == 0:
|
||||||
|
# e.g. RealClear
|
||||||
if self.debugMessages == True :
|
if self.debugMessages == True :
|
||||||
print("search1")
|
print("Search by href: "+self.printhints[x][self.phHrefSearch])
|
||||||
|
printFind = soup.find(href=re.compile(self.printhints[x][self.phHrefSearch]))
|
||||||
|
elif len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
|
||||||
|
if self.debugMessages == True :
|
||||||
|
print("Search 1: "+self.printhints[x][2]+" Attributes: ")
|
||||||
|
print(self.printhints[x][3])
|
||||||
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3])
|
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3])
|
||||||
elif len(self.printhints[x][3])>0 :
|
elif len(self.printhints[x][3])>0 :
|
||||||
if self.debugMessages == True :
|
if self.debugMessages == True :
|
||||||
print("search2")
|
print("search2")
|
||||||
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1])
|
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1])
|
||||||
else :
|
else :
|
||||||
|
if self.debugMessages == True:
|
||||||
|
print("Default Search: "+self.printhints[x][2]+" Text: "+self.printhints[x][1])
|
||||||
printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1])
|
printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1])
|
||||||
if printFind is None:
|
if printFind is None:
|
||||||
if self.debugMessages == True :
|
if self.debugMessages == True :
|
||||||
print("Not Found")
|
print("Not Found")
|
||||||
|
# print(soup)
|
||||||
|
print("end soup\n\n");
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(printFind)
|
print(printFind)
|
||||||
if isinstance(printFind, NavigableString)==False:
|
if isinstance(printFind, NavigableString)==False:
|
||||||
if printFind['href'] is not None:
|
if printFind['href'] is not None:
|
||||||
|
print("Check "+printFind['href']+" for base of "+baseURL)
|
||||||
|
if printFind['href'].find("http")!=0 :
|
||||||
|
return baseURL+printFind['href']
|
||||||
return printFind['href']
|
return printFind['href']
|
||||||
tag = printFind.parent
|
tag = printFind.parent
|
||||||
print(tag)
|
print(tag)
|
||||||
@ -158,6 +190,7 @@ class RealClear(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# Parse the page into Python Soup
|
# Parse the page into Python Soup
|
||||||
|
|
||||||
|
articleList = []
|
||||||
ans = []
|
ans = []
|
||||||
feedsCount = len(self.feedsets)
|
feedsCount = len(self.feedsets)
|
||||||
for x in range(0,feedsCount): # should be ,4
|
for x in range(0,feedsCount): # should be ,4
|
||||||
@ -168,3 +201,4 @@ class RealClear(BasicNewsRecipe):
|
|||||||
print(ans)
|
print(ans)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,6 +15,8 @@ class Soldiers(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
auto_cleanup = True
|
||||||
|
auto_cleanup_keep = '//div[@id="mediaWrapper"]'
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
delay = 4
|
delay = 4
|
||||||
max_connections = 1
|
max_connections = 1
|
||||||
@ -31,14 +33,14 @@ class Soldiers(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
|
#keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
|
||||||
|
|
||||||
remove_tags = [
|
#remove_tags = [
|
||||||
dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
|
#dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
|
||||||
,dict(name=['object','link'])
|
#,dict(name=['object','link'])
|
||||||
]
|
#]
|
||||||
|
|
||||||
feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )]
|
feeds = [(u'Frontpage', u'http://www.army.mil/rss/2/' )]
|
||||||
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
|
@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
|
|||||||
from setup.installer.windows.wix import WixMixIn
|
from setup.installer.windows.wix import WixMixIn
|
||||||
|
|
||||||
OPENSSL_DIR = r'Q:\openssl'
|
OPENSSL_DIR = r'Q:\openssl'
|
||||||
QT_DIR = 'Q:\\Qt\\4.8.0'
|
QT_DIR = 'Q:\\Qt\\4.8.1'
|
||||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||||
SW = r'C:\cygwin\home\kovid\sw'
|
SW = r'C:\cygwin\home\kovid\sw'
|
||||||
|
@ -107,6 +107,7 @@ class ANDROID(USBMS):
|
|||||||
0xc004 : [0x0226],
|
0xc004 : [0x0226],
|
||||||
0x8801 : [0x0226, 0x0227],
|
0x8801 : [0x0226, 0x0227],
|
||||||
0xe115 : [0x0216], # PocketBook A10
|
0xe115 : [0x0216], # PocketBook A10
|
||||||
|
0xe107 : [0x326], # PocketBook 622
|
||||||
},
|
},
|
||||||
|
|
||||||
# Acer
|
# Acer
|
||||||
|
95
src/calibre/ebooks/metadata/sources/worker.py
Normal file
95
src/calibre/ebooks/metadata/sources/worker.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
from threading import Event
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
from calibre.utils.date import as_utc
|
||||||
|
from calibre.ebooks.metadata.sources.identify import identify, msprefs
|
||||||
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
|
from calibre.customize.ui import metadata_plugins
|
||||||
|
from calibre.ebooks.metadata.sources.covers import download_cover
|
||||||
|
from calibre.utils.logging import GUILog
|
||||||
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf, OPF
|
||||||
|
|
||||||
|
def merge_result(oldmi, newmi, ensure_fields=None):
|
||||||
|
dummy = Metadata(_('Unknown'))
|
||||||
|
for f in msprefs['ignore_fields']:
|
||||||
|
if ':' in f or (ensure_fields and f in ensure_fields):
|
||||||
|
continue
|
||||||
|
setattr(newmi, f, getattr(dummy, f))
|
||||||
|
fields = set()
|
||||||
|
for plugin in metadata_plugins(['identify']):
|
||||||
|
fields |= plugin.touched_fields
|
||||||
|
|
||||||
|
def is_equal(x, y):
|
||||||
|
if hasattr(x, 'tzinfo'):
|
||||||
|
x = as_utc(x)
|
||||||
|
if hasattr(y, 'tzinfo'):
|
||||||
|
y = as_utc(y)
|
||||||
|
return x == y
|
||||||
|
|
||||||
|
for f in fields:
|
||||||
|
# Optimize so that set_metadata does not have to do extra work later
|
||||||
|
if not f.startswith('identifier:'):
|
||||||
|
if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
|
||||||
|
getattr(oldmi, f))):
|
||||||
|
setattr(newmi, f, getattr(dummy, f))
|
||||||
|
|
||||||
|
return newmi
|
||||||
|
|
||||||
|
def main(do_identify, covers, metadata, ensure_fields):
|
||||||
|
failed_ids = set()
|
||||||
|
failed_covers = set()
|
||||||
|
all_failed = True
|
||||||
|
log = GUILog()
|
||||||
|
|
||||||
|
for book_id, mi in metadata.iteritems():
|
||||||
|
mi = OPF(BytesIO(mi), basedir=os.getcwdu(),
|
||||||
|
populate_spine=False).to_book_metadata()
|
||||||
|
title, authors, identifiers = mi.title, mi.authors, mi.identifiers
|
||||||
|
cdata = None
|
||||||
|
log.clear()
|
||||||
|
|
||||||
|
if do_identify:
|
||||||
|
results = []
|
||||||
|
try:
|
||||||
|
results = identify(log, Event(), title=title, authors=authors,
|
||||||
|
identifiers=identifiers)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if results:
|
||||||
|
all_failed = False
|
||||||
|
mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
|
||||||
|
identifiers = mi.identifiers
|
||||||
|
if not mi.is_null('rating'):
|
||||||
|
# set_metadata expects a rating out of 10
|
||||||
|
mi.rating *= 2
|
||||||
|
with open('%d.mi'%book_id, 'wb') as f:
|
||||||
|
f.write(metadata_to_opf(mi, default_lang='und'))
|
||||||
|
else:
|
||||||
|
log.error('Failed to download metadata for', title)
|
||||||
|
failed_ids.add(book_id)
|
||||||
|
|
||||||
|
if covers:
|
||||||
|
cdata = download_cover(log, title=title, authors=authors,
|
||||||
|
identifiers=identifiers)
|
||||||
|
if cdata is None:
|
||||||
|
failed_covers.add(book_id)
|
||||||
|
else:
|
||||||
|
with open('%d.cover'%book_id, 'wb') as f:
|
||||||
|
f.write(cdata[-1])
|
||||||
|
all_failed = False
|
||||||
|
|
||||||
|
with open('%d.log'%book_id, 'wb') as f:
|
||||||
|
f.write(log.plain_text.encode('utf-8'))
|
||||||
|
|
||||||
|
return failed_ids, failed_covers, all_failed
|
||||||
|
|
@ -10,13 +10,19 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import struct, re, os, imghdr
|
import struct, re, os, imghdr
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
|
from urlparse import urldefrag
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||||
from calibre.ebooks.mobi.reader.index import read_index
|
from calibre.ebooks.mobi.reader.index import read_index
|
||||||
from calibre.ebooks.mobi.reader.ncx import read_ncx, build_toc
|
from calibre.ebooks.mobi.reader.ncx import read_ncx, build_toc
|
||||||
from calibre.ebooks.mobi.reader.markup import expand_mobi8_markup
|
from calibre.ebooks.mobi.reader.markup import expand_mobi8_markup
|
||||||
from calibre.ebooks.metadata.opf2 import Guide, OPFCreator
|
from calibre.ebooks.metadata.opf2 import Guide, OPFCreator
|
||||||
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.mobi.utils import read_font_record
|
from calibre.ebooks.mobi.utils import read_font_record
|
||||||
|
from calibre.ebooks.oeb.parse_utils import parse_html
|
||||||
|
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
|
||||||
|
|
||||||
Part = namedtuple('Part',
|
Part = namedtuple('Part',
|
||||||
'num type filename start end aid')
|
'num type filename start end aid')
|
||||||
@ -383,6 +389,19 @@ class Mobi8Reader(object):
|
|||||||
len(resource_map)):
|
len(resource_map)):
|
||||||
mi.cover = resource_map[self.cover_offset]
|
mi.cover = resource_map[self.cover_offset]
|
||||||
|
|
||||||
|
if len(list(toc)) < 2:
|
||||||
|
self.log.warn('KF8 has no metadata Table of Contents')
|
||||||
|
|
||||||
|
for ref in guide:
|
||||||
|
if ref.type == 'toc':
|
||||||
|
href = ref.href()
|
||||||
|
href, frag = urldefrag(href)
|
||||||
|
if os.path.exists(href.replace('/', os.sep)):
|
||||||
|
try:
|
||||||
|
toc = self.read_inline_toc(href, frag)
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to read inline ToC')
|
||||||
|
|
||||||
opf = OPFCreator(os.getcwdu(), mi)
|
opf = OPFCreator(os.getcwdu(), mi)
|
||||||
opf.guide = guide
|
opf.guide = guide
|
||||||
|
|
||||||
@ -397,4 +416,70 @@ class Mobi8Reader(object):
|
|||||||
opf.render(of, ncx, 'toc.ncx')
|
opf.render(of, ncx, 'toc.ncx')
|
||||||
return 'metadata.opf'
|
return 'metadata.opf'
|
||||||
|
|
||||||
|
def read_inline_toc(self, href, frag):
|
||||||
|
ans = TOC()
|
||||||
|
base_href = '/'.join(href.split('/')[:-1])
|
||||||
|
with open(href.replace('/', os.sep), 'rb') as f:
|
||||||
|
raw = f.read().decode(self.header.codec)
|
||||||
|
root = parse_html(raw, log=self.log)
|
||||||
|
body = XPath('//h:body')(root)
|
||||||
|
reached = False
|
||||||
|
if body:
|
||||||
|
start = body[0]
|
||||||
|
else:
|
||||||
|
start = None
|
||||||
|
reached = True
|
||||||
|
if frag:
|
||||||
|
elems = XPath('//*[@id="%s"]'%frag)
|
||||||
|
if elems:
|
||||||
|
start = elems[0]
|
||||||
|
|
||||||
|
def node_depth(elem):
|
||||||
|
ans = 0
|
||||||
|
parent = elem.getparent()
|
||||||
|
while parent is not None:
|
||||||
|
parent = parent.getparent()
|
||||||
|
ans += 1
|
||||||
|
return ans
|
||||||
|
|
||||||
|
# Layer the ToC based on nesting order in the source HTML
|
||||||
|
current_depth = None
|
||||||
|
parent = ans
|
||||||
|
seen = set()
|
||||||
|
links = []
|
||||||
|
for elem in root.iterdescendants(etree.Element):
|
||||||
|
if reached and elem.tag == XHTML('a') and elem.get('href',
|
||||||
|
False):
|
||||||
|
href = elem.get('href')
|
||||||
|
href, frag = urldefrag(href)
|
||||||
|
href = base_href + '/' + href
|
||||||
|
text = xml2text(elem).strip()
|
||||||
|
if (text, href, frag) in seen:
|
||||||
|
continue
|
||||||
|
seen.add((text, href, frag))
|
||||||
|
links.append((text, href, frag, node_depth(elem)))
|
||||||
|
elif elem is start:
|
||||||
|
reached = True
|
||||||
|
|
||||||
|
depths = sorted(set(x[-1] for x in links))
|
||||||
|
depth_map = {x:i for i, x in enumerate(depths)}
|
||||||
|
for text, href, frag, depth in links:
|
||||||
|
depth = depth_map[depth]
|
||||||
|
if current_depth is None:
|
||||||
|
current_depth = 0
|
||||||
|
parent.add_item(href, frag, text)
|
||||||
|
elif current_depth == depth:
|
||||||
|
parent.add_item(href, frag, text)
|
||||||
|
elif current_depth < depth:
|
||||||
|
parent = parent[-1] if len(parent) > 0 else parent
|
||||||
|
parent.add_item(href, frag, text)
|
||||||
|
current_depth += 1
|
||||||
|
else:
|
||||||
|
delta = current_depth - depth
|
||||||
|
while delta > 0 and parent.parent is not None:
|
||||||
|
parent = parent.parent
|
||||||
|
delta -= 1
|
||||||
|
parent.add_item(href, frag, text)
|
||||||
|
current_depth = depth
|
||||||
|
return ans
|
||||||
|
|
||||||
|
@ -40,27 +40,34 @@ def get_custom_size(opts):
|
|||||||
custom_size = None
|
custom_size = None
|
||||||
return custom_size
|
return custom_size
|
||||||
|
|
||||||
def get_pdf_printer(opts, for_comic=False):
|
def get_pdf_printer(opts, for_comic=False, output_file_name=None):
|
||||||
from calibre.gui2 import is_ok_to_use_qt
|
from calibre.gui2 import is_ok_to_use_qt
|
||||||
if not is_ok_to_use_qt():
|
if not is_ok_to_use_qt():
|
||||||
raise Exception('Not OK to use Qt')
|
raise Exception('Not OK to use Qt')
|
||||||
|
|
||||||
printer = QPrinter(QPrinter.HighResolution)
|
printer = QPrinter(QPrinter.HighResolution)
|
||||||
custom_size = get_custom_size(opts)
|
custom_size = get_custom_size(opts)
|
||||||
|
if isosx and not for_comic:
|
||||||
if opts.output_profile.short_name == 'default' or \
|
# On OSX, the native engine can only produce a single page size
|
||||||
opts.output_profile.width > 9999:
|
# (usually A4). The Qt engine on the other hand produces image based
|
||||||
if custom_size is None:
|
# PDFs. If we set a custom page size using QSizeF the native engine
|
||||||
printer.setPaperSize(paper_size(opts.paper_size))
|
# produces unreadable output, so we just ignore the custom size
|
||||||
else:
|
# settings.
|
||||||
printer.setPaperSize(QSizeF(custom_size[0], custom_size[1]), unit(opts.unit))
|
printer.setPaperSize(paper_size(opts.paper_size))
|
||||||
else:
|
else:
|
||||||
w = opts.output_profile.comic_screen_size[0] if for_comic else \
|
if opts.output_profile.short_name == 'default' or \
|
||||||
opts.output_profile.width
|
opts.output_profile.width > 9999:
|
||||||
h = opts.output_profile.comic_screen_size[1] if for_comic else \
|
if custom_size is None:
|
||||||
opts.output_profile.height
|
printer.setPaperSize(paper_size(opts.paper_size))
|
||||||
dpi = opts.output_profile.dpi
|
else:
|
||||||
printer.setPaperSize(QSizeF(float(w) / dpi, float(h) / dpi), QPrinter.Inch)
|
printer.setPaperSize(QSizeF(custom_size[0], custom_size[1]), unit(opts.unit))
|
||||||
|
else:
|
||||||
|
w = opts.output_profile.comic_screen_size[0] if for_comic else \
|
||||||
|
opts.output_profile.width
|
||||||
|
h = opts.output_profile.comic_screen_size[1] if for_comic else \
|
||||||
|
opts.output_profile.height
|
||||||
|
dpi = opts.output_profile.dpi
|
||||||
|
printer.setPaperSize(QSizeF(float(w) / dpi, float(h) / dpi), QPrinter.Inch)
|
||||||
|
|
||||||
if for_comic:
|
if for_comic:
|
||||||
# Comic pages typically have their own margins, or their background
|
# Comic pages typically have their own margins, or their background
|
||||||
@ -72,6 +79,12 @@ def get_pdf_printer(opts, for_comic=False):
|
|||||||
printer.setOrientation(orientation(opts.orientation))
|
printer.setOrientation(orientation(opts.orientation))
|
||||||
printer.setOutputFormat(QPrinter.PdfFormat)
|
printer.setOutputFormat(QPrinter.PdfFormat)
|
||||||
printer.setFullPage(for_comic)
|
printer.setFullPage(for_comic)
|
||||||
|
if output_file_name:
|
||||||
|
printer.setOutputFileName(output_file_name)
|
||||||
|
if isosx and not for_comic:
|
||||||
|
# Ensure we are not generating enormous image based PDFs
|
||||||
|
printer.setOutputFormat(QPrinter.NativeFormat)
|
||||||
|
|
||||||
return printer
|
return printer
|
||||||
|
|
||||||
def get_printer_page_size(opts, for_comic=False):
|
def get_printer_page_size(opts, for_comic=False):
|
||||||
@ -163,15 +176,7 @@ class PDFWriter(QObject): # {{{
|
|||||||
if ok:
|
if ok:
|
||||||
item_path = os.path.join(self.tmp_path, '%i.pdf' % len(self.combine_queue))
|
item_path = os.path.join(self.tmp_path, '%i.pdf' % len(self.combine_queue))
|
||||||
self.logger.debug('\tRendering item %s as %i.pdf' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue)))
|
self.logger.debug('\tRendering item %s as %i.pdf' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue)))
|
||||||
printer = get_pdf_printer(self.opts)
|
printer = get_pdf_printer(self.opts, output_file_name=item_path)
|
||||||
printer.setOutputFileName(item_path)
|
|
||||||
# We have to set the engine to Native on OS X after the call to set
|
|
||||||
# filename. Setting a filename with .pdf as the extension causes
|
|
||||||
# Qt to set the format to use Qt's PDF engine even if native was
|
|
||||||
# previously set on the printer. Qt's PDF engine produces image
|
|
||||||
# based PDFs on OS X, so we cannot use it.
|
|
||||||
if isosx:
|
|
||||||
printer.setOutputFormat(QPrinter.NativeFormat)
|
|
||||||
self.view.page().mainFrame().evaluateJavaScript('''
|
self.view.page().mainFrame().evaluateJavaScript('''
|
||||||
document.body.style.backgroundColor = "white";
|
document.body.style.backgroundColor = "white";
|
||||||
|
|
||||||
@ -193,10 +198,7 @@ class PDFWriter(QObject): # {{{
|
|||||||
if self.cover_data is None:
|
if self.cover_data is None:
|
||||||
return
|
return
|
||||||
item_path = os.path.join(self.tmp_path, 'cover.pdf')
|
item_path = os.path.join(self.tmp_path, 'cover.pdf')
|
||||||
printer = get_pdf_printer(self.opts)
|
printer = get_pdf_printer(self.opts, output_file_name=item_path)
|
||||||
printer.setOutputFileName(item_path)
|
|
||||||
if isosx:
|
|
||||||
printer.setOutputFormat(QPrinter.NativeFormat)
|
|
||||||
self.combine_queue.insert(0, item_path)
|
self.combine_queue.insert(0, item_path)
|
||||||
p = QPixmap()
|
p = QPixmap()
|
||||||
p.loadFromData(self.cover_data)
|
p.loadFromData(self.cover_data)
|
||||||
@ -248,10 +250,8 @@ class ImagePDFWriter(object):
|
|||||||
os.remove(f.name)
|
os.remove(f.name)
|
||||||
|
|
||||||
def render_images(self, outpath, mi, items):
|
def render_images(self, outpath, mi, items):
|
||||||
printer = get_pdf_printer(self.opts, for_comic=True)
|
printer = get_pdf_printer(self.opts, for_comic=True,
|
||||||
printer.setOutputFileName(outpath)
|
output_file_name=outpath)
|
||||||
if isosx:
|
|
||||||
printer.setOutputFormat(QPrinter.NativeFormat)
|
|
||||||
printer.setDocName(mi.title)
|
printer.setDocName(mi.title)
|
||||||
printer.setCreator(u'%s [%s]'%(__appname__, __version__))
|
printer.setCreator(u'%s [%s]'%(__appname__, __version__))
|
||||||
# Seems to be no way to set author
|
# Seems to be no way to set author
|
||||||
|
@ -105,6 +105,7 @@ gprefs.defaults['show_files_after_save'] = True
|
|||||||
gprefs.defaults['auto_add_path'] = None
|
gprefs.defaults['auto_add_path'] = None
|
||||||
gprefs.defaults['auto_add_check_for_duplicates'] = False
|
gprefs.defaults['auto_add_check_for_duplicates'] = False
|
||||||
gprefs.defaults['blocked_auto_formats'] = []
|
gprefs.defaults['blocked_auto_formats'] = []
|
||||||
|
gprefs.defaults['auto_add_auto_convert'] = True
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
NONE = QVariant() #: Null value to return from the data function of item models
|
NONE = QVariant() #: Null value to return from the data function of item models
|
||||||
|
@ -71,7 +71,7 @@ class AddAction(InterfaceAction):
|
|||||||
ma('add-formats', _('Add files to selected book records'),
|
ma('add-formats', _('Add files to selected book records'),
|
||||||
triggered=self.add_formats, shortcut=_('Shift+A'))
|
triggered=self.add_formats, shortcut=_('Shift+A'))
|
||||||
self.add_menu.addSeparator()
|
self.add_menu.addSeparator()
|
||||||
ma('add-config', _('Configure the adding of books'),
|
ma('add-config', _('Control the adding of books'),
|
||||||
triggered=self.add_config)
|
triggered=self.add_config)
|
||||||
|
|
||||||
self.qaction.triggered.connect(self.add_books)
|
self.qaction.triggered.connect(self.add_books)
|
||||||
|
@ -53,6 +53,24 @@ class ConvertAction(InterfaceAction):
|
|||||||
self.queue_convert_jobs(jobs, changed, bad, rows, previous,
|
self.queue_convert_jobs(jobs, changed, bad, rows, previous,
|
||||||
self.book_auto_converted, extra_job_args=[on_card])
|
self.book_auto_converted, extra_job_args=[on_card])
|
||||||
|
|
||||||
|
def auto_convert_auto_add(self, book_ids):
|
||||||
|
previous = self.gui.library_view.currentIndex()
|
||||||
|
db = self.gui.current_db
|
||||||
|
needed = set()
|
||||||
|
of = prefs['output_format'].lower()
|
||||||
|
for book_id in book_ids:
|
||||||
|
fmts = db.formats(book_id, index_is_id=True)
|
||||||
|
fmts = set(x.lower() for x in fmts.split(',')) if fmts else set()
|
||||||
|
if of not in fmts:
|
||||||
|
needed.add(book_id)
|
||||||
|
if needed:
|
||||||
|
jobs, changed, bad = convert_single_ebook(self.gui,
|
||||||
|
self.gui.library_view.model().db, needed, True, of,
|
||||||
|
show_no_format_warning=False)
|
||||||
|
if not jobs: return
|
||||||
|
self.queue_convert_jobs(jobs, changed, bad, list(needed), previous,
|
||||||
|
self.book_converted, rows_are_ids=True)
|
||||||
|
|
||||||
def auto_convert_mail(self, to, fmts, delete_from_library, book_ids, format, subject):
|
def auto_convert_mail(self, to, fmts, delete_from_library, book_ids, format, subject):
|
||||||
previous = self.gui.library_view.currentIndex()
|
previous = self.gui.library_view.currentIndex()
|
||||||
rows = [x.row() for x in \
|
rows = [x.row() for x in \
|
||||||
@ -118,7 +136,7 @@ class ConvertAction(InterfaceAction):
|
|||||||
num, 2000)
|
num, 2000)
|
||||||
|
|
||||||
def queue_convert_jobs(self, jobs, changed, bad, rows, previous,
|
def queue_convert_jobs(self, jobs, changed, bad, rows, previous,
|
||||||
converted_func, extra_job_args=[]):
|
converted_func, extra_job_args=[], rows_are_ids=False):
|
||||||
for func, args, desc, fmt, id, temp_files in jobs:
|
for func, args, desc, fmt, id, temp_files in jobs:
|
||||||
func, _, same_fmt = func.partition(':')
|
func, _, same_fmt = func.partition(':')
|
||||||
same_fmt = same_fmt == 'same_fmt'
|
same_fmt = same_fmt == 'same_fmt'
|
||||||
@ -140,7 +158,11 @@ class ConvertAction(InterfaceAction):
|
|||||||
self.conversion_jobs[job] = tuple(args)
|
self.conversion_jobs[job] = tuple(args)
|
||||||
|
|
||||||
if changed:
|
if changed:
|
||||||
self.gui.library_view.model().refresh_rows(rows)
|
m = self.gui.library_view.model()
|
||||||
|
if rows_are_ids:
|
||||||
|
m.refresh_ids(rows)
|
||||||
|
else:
|
||||||
|
m.refresh_rows(rows)
|
||||||
current = self.gui.library_view.currentIndex()
|
current = self.gui.library_view.currentIndex()
|
||||||
self.gui.library_view.model().current_changed(current, previous)
|
self.gui.library_view.model().current_changed(current, previous)
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os, shutil
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from PyQt4.Qt import QMenu, QModelIndex, QTimer
|
from PyQt4.Qt import QMenu, QModelIndex, QTimer
|
||||||
@ -16,6 +16,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm
|
|||||||
from calibre.gui2.dialogs.device_category_editor import DeviceCategoryEditor
|
from calibre.gui2.dialogs.device_category_editor import DeviceCategoryEditor
|
||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key
|
||||||
from calibre.db.errors import NoSuchFormat
|
from calibre.db.errors import NoSuchFormat
|
||||||
|
|
||||||
@ -79,14 +80,23 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
Dispatcher(self.metadata_downloaded),
|
Dispatcher(self.metadata_downloaded),
|
||||||
ensure_fields=ensure_fields)
|
ensure_fields=ensure_fields)
|
||||||
|
|
||||||
|
def cleanup_bulk_download(self, tdir):
|
||||||
|
try:
|
||||||
|
shutil.rmtree(tdir, ignore_errors=True)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
def metadata_downloaded(self, job):
|
def metadata_downloaded(self, job):
|
||||||
if job.failed:
|
if job.failed:
|
||||||
self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
|
self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
|
||||||
return
|
return
|
||||||
from calibre.gui2.metadata.bulk_download import get_job_details
|
from calibre.gui2.metadata.bulk_download import get_job_details
|
||||||
id_map, failed_ids, failed_covers, all_failed, det_msg = \
|
(aborted, id_map, tdir, log_file, failed_ids, failed_covers, all_failed,
|
||||||
get_job_details(job)
|
det_msg, lm_map) = get_job_details(job)
|
||||||
|
if aborted:
|
||||||
|
return self.cleanup_bulk_download(tdir)
|
||||||
if all_failed:
|
if all_failed:
|
||||||
|
self.cleanup_bulk_download(tdir)
|
||||||
return error_dialog(self.gui, _('Download failed'),
|
return error_dialog(self.gui, _('Download failed'),
|
||||||
_('Failed to download metadata or covers for any of the %d'
|
_('Failed to download metadata or covers for any of the %d'
|
||||||
' book(s).') % len(id_map), det_msg=det_msg, show=True)
|
' book(s).') % len(id_map), det_msg=det_msg, show=True)
|
||||||
@ -103,28 +113,26 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
msg += '<p>'+_('Could not download metadata and/or covers for %d of the books. Click'
|
msg += '<p>'+_('Could not download metadata and/or covers for %d of the books. Click'
|
||||||
' "Show details" to see which books.')%num
|
' "Show details" to see which books.')%num
|
||||||
|
|
||||||
payload = (id_map, failed_ids, failed_covers)
|
payload = (id_map, tdir, log_file, lm_map)
|
||||||
from calibre.gui2.dialogs.message_box import ProceedNotification
|
from calibre.gui2.dialogs.message_box import ProceedNotification
|
||||||
p = ProceedNotification(self.apply_downloaded_metadata,
|
p = ProceedNotification(self.apply_downloaded_metadata,
|
||||||
payload, job.html_details,
|
payload, log_file,
|
||||||
_('Download log'), _('Download complete'), msg,
|
_('Download log'), _('Download complete'), msg,
|
||||||
det_msg=det_msg, show_copy_button=show_copy_button,
|
det_msg=det_msg, show_copy_button=show_copy_button,
|
||||||
parent=self.gui)
|
parent=self.gui, log_is_file=True)
|
||||||
p.show()
|
p.show()
|
||||||
|
|
||||||
def apply_downloaded_metadata(self, payload):
|
def apply_downloaded_metadata(self, payload):
|
||||||
id_map, failed_ids, failed_covers = payload
|
good_ids, tdir, log_file, lm_map = payload
|
||||||
id_map = dict([(k, v) for k, v in id_map.iteritems() if k not in
|
if not good_ids:
|
||||||
failed_ids])
|
|
||||||
if not id_map:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
modified = set()
|
modified = set()
|
||||||
db = self.gui.current_db
|
db = self.gui.current_db
|
||||||
|
|
||||||
for i, mi in id_map.iteritems():
|
for i in good_ids:
|
||||||
lm = db.metadata_last_modified(i, index_is_id=True)
|
lm = db.metadata_last_modified(i, index_is_id=True)
|
||||||
if lm > mi.last_modified:
|
if lm > lm_map[i]:
|
||||||
title = db.title(i, index_is_id=True)
|
title = db.title(i, index_is_id=True)
|
||||||
authors = db.authors(i, index_is_id=True)
|
authors = db.authors(i, index_is_id=True)
|
||||||
if authors:
|
if authors:
|
||||||
@ -144,7 +152,18 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
'Do you want to proceed?'), det_msg='\n'.join(modified)):
|
'Do you want to proceed?'), det_msg='\n'.join(modified)):
|
||||||
return
|
return
|
||||||
|
|
||||||
self.apply_metadata_changes(id_map)
|
id_map = {}
|
||||||
|
for bid in good_ids:
|
||||||
|
opf = os.path.join(tdir, '%d.mi'%bid)
|
||||||
|
if not os.path.exists(opf):
|
||||||
|
opf = None
|
||||||
|
cov = os.path.join(tdir, '%d.cover'%bid)
|
||||||
|
if not os.path.exists(cov):
|
||||||
|
cov = None
|
||||||
|
id_map[bid] = (opf, cov)
|
||||||
|
|
||||||
|
self.apply_metadata_changes(id_map, callback=lambda x:
|
||||||
|
self.cleanup_bulk_download(tdir))
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
@ -468,6 +487,11 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
callback can be either None or a function accepting a single argument,
|
callback can be either None or a function accepting a single argument,
|
||||||
in which case it is called after applying is complete with the list of
|
in which case it is called after applying is complete with the list of
|
||||||
changed ids.
|
changed ids.
|
||||||
|
|
||||||
|
id_map can also be a mapping of ids to 2-tuple's where each 2-tuple
|
||||||
|
contains the absolute paths to an OPF and cover file respectively. If
|
||||||
|
either of the paths is None, then the corresponding metadata is not
|
||||||
|
updated.
|
||||||
'''
|
'''
|
||||||
if title is None:
|
if title is None:
|
||||||
title = _('Applying changed metadata')
|
title = _('Applying changed metadata')
|
||||||
@ -492,28 +516,48 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
return self.finalize_apply()
|
return self.finalize_apply()
|
||||||
|
|
||||||
i, mi = self.apply_id_map[self.apply_current_idx]
|
i, mi = self.apply_id_map[self.apply_current_idx]
|
||||||
|
if isinstance(mi, tuple):
|
||||||
|
opf, cover = mi
|
||||||
|
if opf:
|
||||||
|
mi = OPF(open(opf, 'rb'), basedir=os.path.dirname(opf),
|
||||||
|
populate_spine=False).to_book_metadata()
|
||||||
|
self.apply_mi(i, mi)
|
||||||
|
if cover:
|
||||||
|
self.gui.current_db.set_cover(i, open(cover, 'rb'),
|
||||||
|
notify=False, commit=False)
|
||||||
|
else:
|
||||||
|
self.apply_mi(i, mi)
|
||||||
|
|
||||||
|
self.apply_current_idx += 1
|
||||||
|
if self.apply_pd is not None:
|
||||||
|
self.apply_pd.value += 1
|
||||||
|
QTimer.singleShot(50, self.do_one_apply)
|
||||||
|
|
||||||
|
|
||||||
|
def apply_mi(self, book_id, mi):
|
||||||
db = self.gui.current_db
|
db = self.gui.current_db
|
||||||
|
|
||||||
try:
|
try:
|
||||||
set_title = not mi.is_null('title')
|
set_title = not mi.is_null('title')
|
||||||
set_authors = not mi.is_null('authors')
|
set_authors = not mi.is_null('authors')
|
||||||
idents = db.get_identifiers(i, index_is_id=True)
|
idents = db.get_identifiers(book_id, index_is_id=True)
|
||||||
if mi.identifiers:
|
if mi.identifiers:
|
||||||
idents.update(mi.identifiers)
|
idents.update(mi.identifiers)
|
||||||
mi.identifiers = idents
|
mi.identifiers = idents
|
||||||
if mi.is_null('series'):
|
if mi.is_null('series'):
|
||||||
mi.series_index = None
|
mi.series_index = None
|
||||||
if self._am_merge_tags:
|
if self._am_merge_tags:
|
||||||
old_tags = db.tags(i, index_is_id=True)
|
old_tags = db.tags(book_id, index_is_id=True)
|
||||||
if old_tags:
|
if old_tags:
|
||||||
tags = [x.strip() for x in old_tags.split(',')] + (
|
tags = [x.strip() for x in old_tags.split(',')] + (
|
||||||
mi.tags if mi.tags else [])
|
mi.tags if mi.tags else [])
|
||||||
mi.tags = list(set(tags))
|
mi.tags = list(set(tags))
|
||||||
db.set_metadata(i, mi, commit=False, set_title=set_title,
|
db.set_metadata(book_id, mi, commit=False, set_title=set_title,
|
||||||
set_authors=set_authors, notify=False)
|
set_authors=set_authors, notify=False)
|
||||||
self.applied_ids.append(i)
|
self.applied_ids.append(book_id)
|
||||||
except:
|
except:
|
||||||
import traceback
|
import traceback
|
||||||
self.apply_failures.append((i, traceback.format_exc()))
|
self.apply_failures.append((book_id, traceback.format_exc()))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if mi.cover:
|
if mi.cover:
|
||||||
@ -521,11 +565,6 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
self.apply_current_idx += 1
|
|
||||||
if self.apply_pd is not None:
|
|
||||||
self.apply_pd.value += 1
|
|
||||||
QTimer.singleShot(50, self.do_one_apply)
|
|
||||||
|
|
||||||
def finalize_apply(self):
|
def finalize_apply(self):
|
||||||
db = self.gui.current_db
|
db = self.gui.current_db
|
||||||
db.commit()
|
db.commit()
|
||||||
|
@ -113,6 +113,7 @@ class Worker(Thread):
|
|||||||
class AutoAdder(QObject):
|
class AutoAdder(QObject):
|
||||||
|
|
||||||
metadata_read = pyqtSignal(object)
|
metadata_read = pyqtSignal(object)
|
||||||
|
auto_convert = pyqtSignal(object)
|
||||||
|
|
||||||
def __init__(self, path, parent):
|
def __init__(self, path, parent):
|
||||||
QObject.__init__(self, parent)
|
QObject.__init__(self, parent)
|
||||||
@ -124,6 +125,8 @@ class AutoAdder(QObject):
|
|||||||
self.metadata_read.connect(self.add_to_db,
|
self.metadata_read.connect(self.add_to_db,
|
||||||
type=Qt.QueuedConnection)
|
type=Qt.QueuedConnection)
|
||||||
QTimer.singleShot(2000, self.initialize)
|
QTimer.singleShot(2000, self.initialize)
|
||||||
|
self.auto_convert.connect(self.do_auto_convert,
|
||||||
|
type=Qt.QueuedConnection)
|
||||||
elif path:
|
elif path:
|
||||||
prints(path,
|
prints(path,
|
||||||
'is not a valid directory to watch for new ebooks, ignoring')
|
'is not a valid directory to watch for new ebooks, ignoring')
|
||||||
@ -163,6 +166,7 @@ class AutoAdder(QObject):
|
|||||||
|
|
||||||
needs_rescan = False
|
needs_rescan = False
|
||||||
duplicates = []
|
duplicates = []
|
||||||
|
added_ids = set()
|
||||||
|
|
||||||
for fname, tdir in data.iteritems():
|
for fname, tdir in data.iteritems():
|
||||||
paths = [os.path.join(self.worker.path, fname)]
|
paths = [os.path.join(self.worker.path, fname)]
|
||||||
@ -187,9 +191,12 @@ class AutoAdder(QObject):
|
|||||||
continue
|
continue
|
||||||
mi = [OPF(open(mi, 'rb'), tdir,
|
mi = [OPF(open(mi, 'rb'), tdir,
|
||||||
populate_spine=False).to_book_metadata()]
|
populate_spine=False).to_book_metadata()]
|
||||||
dups, num = m.add_books(paths,
|
dups, ids = m.add_books(paths,
|
||||||
[os.path.splitext(fname)[1][1:].upper()], mi,
|
[os.path.splitext(fname)[1][1:].upper()], mi,
|
||||||
add_duplicates=not gprefs['auto_add_check_for_duplicates'])
|
add_duplicates=not gprefs['auto_add_check_for_duplicates'],
|
||||||
|
return_ids=True)
|
||||||
|
added_ids |= set(ids)
|
||||||
|
num = len(ids)
|
||||||
if dups:
|
if dups:
|
||||||
path = dups[0][0]
|
path = dups[0][0]
|
||||||
with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()),
|
with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()),
|
||||||
@ -217,8 +224,10 @@ class AutoAdder(QObject):
|
|||||||
_('Books with the same title as the following already '
|
_('Books with the same title as the following already '
|
||||||
'exist in the database. Add them anyway?'),
|
'exist in the database. Add them anyway?'),
|
||||||
'\n'.join(files)):
|
'\n'.join(files)):
|
||||||
dups, num = m.add_books(paths, formats, metadata,
|
dups, ids = m.add_books(paths, formats, metadata,
|
||||||
add_duplicates=True)
|
add_duplicates=True, return_ids=True)
|
||||||
|
added_ids |= set(ids)
|
||||||
|
num = len(ids)
|
||||||
count += num
|
count += num
|
||||||
|
|
||||||
for tdir in data.itervalues():
|
for tdir in data.itervalues():
|
||||||
@ -227,6 +236,9 @@ class AutoAdder(QObject):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
if added_ids and gprefs['auto_add_auto_convert']:
|
||||||
|
self.auto_convert.emit(added_ids)
|
||||||
|
|
||||||
if count > 0:
|
if count > 0:
|
||||||
m.books_added(count)
|
m.books_added(count)
|
||||||
gui.status_bar.show_message(_(
|
gui.status_bar.show_message(_(
|
||||||
@ -238,4 +250,7 @@ class AutoAdder(QObject):
|
|||||||
if needs_rescan:
|
if needs_rescan:
|
||||||
QTimer.singleShot(2000, self.dir_changed)
|
QTimer.singleShot(2000, self.dir_changed)
|
||||||
|
|
||||||
|
def do_auto_convert(self, added_ids):
|
||||||
|
gui = self.parent()
|
||||||
|
gui.iactions['Convert Books'].auto_convert_auto_add(added_ids)
|
||||||
|
|
||||||
|
@ -160,7 +160,7 @@ class ProceedNotification(MessageBox): # {{{
|
|||||||
|
|
||||||
def __init__(self, callback, payload, html_log, log_viewer_title, title, msg,
|
def __init__(self, callback, payload, html_log, log_viewer_title, title, msg,
|
||||||
det_msg='', show_copy_button=False, parent=None,
|
det_msg='', show_copy_button=False, parent=None,
|
||||||
cancel_callback=None):
|
cancel_callback=None, log_is_file=False):
|
||||||
'''
|
'''
|
||||||
A non modal popup that notifies the user that a background task has
|
A non modal popup that notifies the user that a background task has
|
||||||
been completed.
|
been completed.
|
||||||
@ -175,12 +175,15 @@ class ProceedNotification(MessageBox): # {{{
|
|||||||
:param title: The title for this popup
|
:param title: The title for this popup
|
||||||
:param msg: The msg to display
|
:param msg: The msg to display
|
||||||
:param det_msg: Detailed message
|
:param det_msg: Detailed message
|
||||||
|
:param log_is_file: If True the html_log parameter is interpreted as
|
||||||
|
the path to a file on disk containing the log encoded with utf-8
|
||||||
'''
|
'''
|
||||||
MessageBox.__init__(self, MessageBox.QUESTION, title, msg,
|
MessageBox.__init__(self, MessageBox.QUESTION, title, msg,
|
||||||
det_msg=det_msg, show_copy_button=show_copy_button,
|
det_msg=det_msg, show_copy_button=show_copy_button,
|
||||||
parent=parent)
|
parent=parent)
|
||||||
self.payload = payload
|
self.payload = payload
|
||||||
self.html_log = html_log
|
self.html_log = html_log
|
||||||
|
self.log_is_file = log_is_file
|
||||||
self.log_viewer_title = log_viewer_title
|
self.log_viewer_title = log_viewer_title
|
||||||
|
|
||||||
self.vlb = self.bb.addButton(_('View log'), self.bb.ActionRole)
|
self.vlb = self.bb.addButton(_('View log'), self.bb.ActionRole)
|
||||||
@ -192,7 +195,11 @@ class ProceedNotification(MessageBox): # {{{
|
|||||||
_proceed_memory.append(self)
|
_proceed_memory.append(self)
|
||||||
|
|
||||||
def show_log(self):
|
def show_log(self):
|
||||||
self.log_viewer = ViewLog(self.log_viewer_title, self.html_log,
|
log = self.html_log
|
||||||
|
if self.log_is_file:
|
||||||
|
with open(log, 'rb') as f:
|
||||||
|
log = f.read().decode('utf-8')
|
||||||
|
self.log_viewer = ViewLog(self.log_viewer_title, log,
|
||||||
parent=self)
|
parent=self)
|
||||||
|
|
||||||
def do_proceed(self, result):
|
def do_proceed(self, result):
|
||||||
@ -202,9 +209,9 @@ class ProceedNotification(MessageBox): # {{{
|
|||||||
gui = get_gui()
|
gui = get_gui()
|
||||||
gui.proceed_requested.emit(func, self.payload)
|
gui.proceed_requested.emit(func, self.payload)
|
||||||
# Ensure this notification is garbage collected
|
# Ensure this notification is garbage collected
|
||||||
|
self.vlb.clicked.disconnect()
|
||||||
self.callback = self.cancel_callback = self.payload = None
|
self.callback = self.cancel_callback = self.payload = None
|
||||||
self.setParent(None)
|
self.setParent(None)
|
||||||
self.vlb.clicked.disconnect()
|
|
||||||
_proceed_memory.remove(self)
|
_proceed_memory.remove(self)
|
||||||
|
|
||||||
def done(self, r):
|
def done(self, r):
|
||||||
|
@ -140,34 +140,6 @@
|
|||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
|
||||||
<widget class="QGroupBox" name="groupBox">
|
|
||||||
<property name="maximumSize">
|
|
||||||
<size>
|
|
||||||
<width>16777215</width>
|
|
||||||
<height>60</height>
|
|
||||||
</size>
|
|
||||||
</property>
|
|
||||||
<layout class="QHBoxLayout" name="horizontalLayout_5">
|
|
||||||
<item>
|
|
||||||
<widget class="QLabel" name="label_51">
|
|
||||||
<property name="sizePolicy">
|
|
||||||
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
|
|
||||||
<horstretch>40</horstretch>
|
|
||||||
<verstretch>0</verstretch>
|
|
||||||
</sizepolicy>
|
|
||||||
</property>
|
|
||||||
<property name="text">
|
|
||||||
<string/>
|
|
||||||
</property>
|
|
||||||
<property name="buddy">
|
|
||||||
<cstring>matchkind</cstring>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
</layout>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item>
|
<item>
|
||||||
<widget class="QLabel" name="label_6">
|
<widget class="QLabel" name="label_6">
|
||||||
<property name="maximumSize">
|
<property name="maximumSize">
|
||||||
|
@ -402,7 +402,8 @@ class DetailView(QDialog, Ui_Dialog): # {{{
|
|||||||
self.setupUi(self)
|
self.setupUi(self)
|
||||||
self.setWindowTitle(job.description)
|
self.setWindowTitle(job.description)
|
||||||
self.job = job
|
self.job = job
|
||||||
self.html_view = hasattr(job, 'html_details')
|
self.html_view = (hasattr(job, 'html_details') and not getattr(job,
|
||||||
|
'ignore_html_details', False))
|
||||||
if self.html_view:
|
if self.html_view:
|
||||||
self.log.setVisible(False)
|
self.log.setVisible(False)
|
||||||
else:
|
else:
|
||||||
|
@ -187,9 +187,10 @@ class BooksModel(QAbstractTableModel): # {{{
|
|||||||
self.db = None
|
self.db = None
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def add_books(self, paths, formats, metadata, add_duplicates=False):
|
def add_books(self, paths, formats, metadata, add_duplicates=False,
|
||||||
|
return_ids=False):
|
||||||
ret = self.db.add_books(paths, formats, metadata,
|
ret = self.db.add_books(paths, formats, metadata,
|
||||||
add_duplicates=add_duplicates)
|
add_duplicates=add_duplicates, return_ids=return_ids)
|
||||||
self.count_changed()
|
self.count_changed()
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
@ -7,22 +7,41 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, time, shutil
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from itertools import izip
|
|
||||||
from threading import Event
|
|
||||||
|
|
||||||
from PyQt4.Qt import (QIcon, QDialog,
|
from PyQt4.Qt import (QIcon, QDialog,
|
||||||
QDialogButtonBox, QLabel, QGridLayout, QPixmap, Qt)
|
QDialogButtonBox, QLabel, QGridLayout, QPixmap, Qt)
|
||||||
|
|
||||||
from calibre.gui2.threaded_jobs import ThreadedJob
|
from calibre.gui2.threaded_jobs import ThreadedJob
|
||||||
from calibre.ebooks.metadata.sources.identify import identify, msprefs
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
from calibre.ebooks.metadata.sources.covers import download_cover
|
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ptempfile import (PersistentTemporaryDirectory,
|
||||||
from calibre.customize.ui import metadata_plugins
|
PersistentTemporaryFile)
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
|
||||||
from calibre.utils.date import as_utc
|
|
||||||
|
|
||||||
# Start download {{{
|
# Start download {{{
|
||||||
|
|
||||||
|
class Job(ThreadedJob):
|
||||||
|
|
||||||
|
ignore_html_details = True
|
||||||
|
|
||||||
|
def consolidate_log(self):
|
||||||
|
self.consolidated_log = self.log.plain_text
|
||||||
|
self.log = None
|
||||||
|
|
||||||
|
def read_consolidated_log(self):
|
||||||
|
return self.consolidated_log
|
||||||
|
|
||||||
|
@property
|
||||||
|
def details(self):
|
||||||
|
if self.consolidated_log is None:
|
||||||
|
return self.log.plain_text
|
||||||
|
return self.read_consolidated_log()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def log_file(self):
|
||||||
|
return open(self.download_debug_log, 'rb')
|
||||||
|
|
||||||
def show_config(gui, parent):
|
def show_config(gui, parent):
|
||||||
from calibre.gui2.preferences import show_config_widget
|
from calibre.gui2.preferences import show_config_widget
|
||||||
show_config_widget('Sharing', 'Metadata download', parent=parent,
|
show_config_widget('Sharing', 'Metadata download', parent=parent,
|
||||||
@ -104,19 +123,22 @@ def start_download(gui, ids, callback, ensure_fields=None):
|
|||||||
d.b.clicked.disconnect()
|
d.b.clicked.disconnect()
|
||||||
if ret != d.Accepted:
|
if ret != d.Accepted:
|
||||||
return
|
return
|
||||||
|
tf = PersistentTemporaryFile('_metadata_bulk_log_')
|
||||||
|
tf.close()
|
||||||
|
|
||||||
for batch in split_jobs(ids):
|
job = Job('metadata bulk download',
|
||||||
job = ThreadedJob('metadata bulk download',
|
_('Download metadata for %d books')%len(ids),
|
||||||
_('Download metadata for %d books')%len(batch),
|
download, (ids, tf.name, gui.current_db, d.identify, d.covers,
|
||||||
download, (batch, gui.current_db, d.identify, d.covers,
|
ensure_fields), {}, callback)
|
||||||
ensure_fields), {}, callback)
|
job.download_debug_log = tf.name
|
||||||
gui.job_manager.run_threaded_job(job)
|
gui.job_manager.run_threaded_job(job)
|
||||||
gui.status_bar.show_message(_('Metadata download started'), 3000)
|
gui.status_bar.show_message(_('Metadata download started'), 3000)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def get_job_details(job):
|
def get_job_details(job):
|
||||||
id_map, failed_ids, failed_covers, title_map, all_failed = job.result
|
(aborted, good_ids, tdir, log_file, failed_ids, failed_covers, title_map,
|
||||||
|
lm_map, all_failed) = job.result
|
||||||
det_msg = []
|
det_msg = []
|
||||||
for i in failed_ids | failed_covers:
|
for i in failed_ids | failed_covers:
|
||||||
title = title_map[i]
|
title = title_map[i]
|
||||||
@ -126,92 +148,89 @@ def get_job_details(job):
|
|||||||
title += (' ' + _('(Failed cover)'))
|
title += (' ' + _('(Failed cover)'))
|
||||||
det_msg.append(title)
|
det_msg.append(title)
|
||||||
det_msg = '\n'.join(det_msg)
|
det_msg = '\n'.join(det_msg)
|
||||||
return id_map, failed_ids, failed_covers, all_failed, det_msg
|
return (aborted, good_ids, tdir, log_file, failed_ids, failed_covers,
|
||||||
|
all_failed, det_msg, lm_map)
|
||||||
|
|
||||||
def merge_result(oldmi, newmi, ensure_fields=None):
|
class HeartBeat(object):
|
||||||
dummy = Metadata(_('Unknown'))
|
CHECK_INTERVAL = 300 # seconds
|
||||||
for f in msprefs['ignore_fields']:
|
''' Check that the file count in tdir changes every five minutes '''
|
||||||
if ':' in f or (ensure_fields and f in ensure_fields):
|
|
||||||
continue
|
|
||||||
setattr(newmi, f, getattr(dummy, f))
|
|
||||||
fields = set()
|
|
||||||
for plugin in metadata_plugins(['identify']):
|
|
||||||
fields |= plugin.touched_fields
|
|
||||||
|
|
||||||
def is_equal(x, y):
|
def __init__(self, tdir):
|
||||||
if hasattr(x, 'tzinfo'):
|
self.tdir = tdir
|
||||||
x = as_utc(x)
|
self.last_count = len(os.listdir(self.tdir))
|
||||||
if hasattr(y, 'tzinfo'):
|
self.last_time = time.time()
|
||||||
y = as_utc(y)
|
|
||||||
return x == y
|
|
||||||
|
|
||||||
for f in fields:
|
def __call__(self):
|
||||||
# Optimize so that set_metadata does not have to do extra work later
|
if time.time() - self.last_time > self.CHECK_INTERVAL:
|
||||||
if not f.startswith('identifier:'):
|
c = len(os.listdir(self.tdir))
|
||||||
if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
|
if c == self.last_count:
|
||||||
getattr(oldmi, f))):
|
return False
|
||||||
setattr(newmi, f, getattr(dummy, f))
|
self.last_count = c
|
||||||
|
self.last_time = time.time()
|
||||||
|
return True
|
||||||
|
|
||||||
newmi.last_modified = oldmi.last_modified
|
# Fix log viewer, ratings
|
||||||
|
# Test: abort, covers only, metadata only, both, 200 entry download, memory
|
||||||
|
# consumption, all errors and on and on
|
||||||
|
|
||||||
return newmi
|
def download(all_ids, tf, db, do_identify, covers, ensure_fields,
|
||||||
|
|
||||||
def download(ids, db, do_identify, covers, ensure_fields,
|
|
||||||
log=None, abort=None, notifications=None):
|
log=None, abort=None, notifications=None):
|
||||||
ids = list(ids)
|
batch_size = 10
|
||||||
metadata = [db.get_metadata(i, index_is_id=True, get_user_categories=False)
|
batches = split_jobs(all_ids, batch_size=batch_size)
|
||||||
for i in ids]
|
tdir = PersistentTemporaryDirectory('_metadata_bulk_')
|
||||||
|
heartbeat = HeartBeat(tdir)
|
||||||
|
|
||||||
failed_ids = set()
|
failed_ids = set()
|
||||||
failed_covers = set()
|
failed_covers = set()
|
||||||
title_map = {}
|
title_map = {}
|
||||||
ans = {}
|
lm_map = {}
|
||||||
count = 0
|
ans = set()
|
||||||
all_failed = True
|
all_failed = True
|
||||||
'''
|
aborted = False
|
||||||
# Test apply dialog
|
count = 0
|
||||||
all_failed = do_identify = covers = False
|
|
||||||
'''
|
for ids in batches:
|
||||||
for i, mi in izip(ids, metadata):
|
|
||||||
if abort.is_set():
|
if abort.is_set():
|
||||||
log.error('Aborting...')
|
log.error('Aborting...')
|
||||||
break
|
break
|
||||||
title, authors, identifiers = mi.title, mi.authors, mi.identifiers
|
metadata = {i:db.get_metadata(i, index_is_id=True,
|
||||||
title_map[i] = title
|
get_user_categories=False) for i in ids}
|
||||||
if do_identify:
|
for i in ids:
|
||||||
results = []
|
title_map[i] = metadata[i].title
|
||||||
try:
|
lm_map[i] = metadata[i].last_modified
|
||||||
results = identify(log, Event(), title=title, authors=authors,
|
metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in
|
||||||
identifiers=identifiers)
|
metadata.iteritems()}
|
||||||
except:
|
try:
|
||||||
pass
|
ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main',
|
||||||
if results:
|
(do_identify, covers, metadata, ensure_fields),
|
||||||
all_failed = False
|
cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True)
|
||||||
mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
|
except WorkerError as e:
|
||||||
identifiers = mi.identifiers
|
if e.orig_tb:
|
||||||
if not mi.is_null('rating'):
|
raise Exception('Failed to download metadata. Original '
|
||||||
# set_metadata expects a rating out of 10
|
'traceback: \n\n'+e.orig_tb)
|
||||||
mi.rating *= 2
|
raise
|
||||||
else:
|
count += batch_size
|
||||||
log.error('Failed to download metadata for', title)
|
|
||||||
failed_ids.add(i)
|
|
||||||
# We don't want set_metadata operating on anything but covers
|
|
||||||
mi = merge_result(mi, mi, ensure_fields=ensure_fields)
|
|
||||||
if covers:
|
|
||||||
cdata = download_cover(log, title=title, authors=authors,
|
|
||||||
identifiers=identifiers)
|
|
||||||
if cdata is not None:
|
|
||||||
with PersistentTemporaryFile('.jpg', 'downloaded-cover-') as f:
|
|
||||||
f.write(cdata[-1])
|
|
||||||
mi.cover = f.name
|
|
||||||
all_failed = False
|
|
||||||
else:
|
|
||||||
failed_covers.add(i)
|
|
||||||
ans[i] = mi
|
|
||||||
count += 1
|
|
||||||
notifications.put((count/len(ids),
|
notifications.put((count/len(ids),
|
||||||
_('Downloaded %(num)d of %(tot)d')%dict(num=count, tot=len(ids))))
|
_('Downloaded %(num)d of %(tot)d')%dict(
|
||||||
|
num=count, tot=len(all_ids))))
|
||||||
|
|
||||||
|
fids, fcovs, allf = ret['result']
|
||||||
|
if not allf:
|
||||||
|
all_failed = False
|
||||||
|
failed_ids = failed_ids.union(fids)
|
||||||
|
failed_covers = failed_covers.union(fcovs)
|
||||||
|
ans = ans.union(set(ids) - fids)
|
||||||
|
for book_id in ids:
|
||||||
|
lp = os.path.join(tdir, '%d.log'%book_id)
|
||||||
|
if os.path.exists(lp):
|
||||||
|
with open(tf, 'ab') as dest, open(lp, 'rb') as src:
|
||||||
|
dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] +
|
||||||
|
'#'*20+'\n').encode('utf-8'))
|
||||||
|
shutil.copyfileobj(src, dest)
|
||||||
|
|
||||||
|
if abort.is_set():
|
||||||
|
aborted = True
|
||||||
log('Download complete, with %d failures'%len(failed_ids))
|
log('Download complete, with %d failures'%len(failed_ids))
|
||||||
return (ans, failed_ids, failed_covers, title_map, all_failed)
|
return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map,
|
||||||
|
lm_map, all_failed)
|
||||||
|
|
||||||
|
|
||||||
|
@ -161,10 +161,10 @@ class MetadataSingleDialogBase(ResizableDialog):
|
|||||||
self.manage_authors_button.clicked.connect(self.authors.manage_authors)
|
self.manage_authors_button.clicked.connect(self.authors.manage_authors)
|
||||||
|
|
||||||
self.series = SeriesEdit(self)
|
self.series = SeriesEdit(self)
|
||||||
self.remove_unused_series_button = QToolButton(self)
|
self.clear_series_button = QToolButton(self)
|
||||||
self.remove_unused_series_button.setToolTip(
|
self.clear_series_button.setToolTip(
|
||||||
_('Remove unused series (Series that have no books)') )
|
_('Clear series') )
|
||||||
self.remove_unused_series_button.clicked.connect(self.remove_unused_series)
|
self.clear_series_button.clicked.connect(self.series.clear)
|
||||||
self.series_index = SeriesIndexEdit(self, self.series)
|
self.series_index = SeriesIndexEdit(self, self.series)
|
||||||
self.basic_metadata_widgets.extend([self.series, self.series_index])
|
self.basic_metadata_widgets.extend([self.series, self.series_index])
|
||||||
|
|
||||||
@ -198,6 +198,7 @@ class MetadataSingleDialogBase(ResizableDialog):
|
|||||||
self.basic_metadata_widgets.append(self.identifiers)
|
self.basic_metadata_widgets.append(self.identifiers)
|
||||||
self.clear_identifiers_button = QToolButton(self)
|
self.clear_identifiers_button = QToolButton(self)
|
||||||
self.clear_identifiers_button.setIcon(QIcon(I('trash.png')))
|
self.clear_identifiers_button.setIcon(QIcon(I('trash.png')))
|
||||||
|
self.clear_identifiers_button.setToolTip(_('Clear Ids'))
|
||||||
self.clear_identifiers_button.clicked.connect(self.identifiers.clear)
|
self.clear_identifiers_button.clicked.connect(self.identifiers.clear)
|
||||||
self.paste_isbn_button = QToolButton(self)
|
self.paste_isbn_button = QToolButton(self)
|
||||||
self.paste_isbn_button.setToolTip('<p>' +
|
self.paste_isbn_button.setToolTip('<p>' +
|
||||||
@ -303,17 +304,6 @@ class MetadataSingleDialogBase(ResizableDialog):
|
|||||||
self.title_sort.auto_generate()
|
self.title_sort.auto_generate()
|
||||||
self.author_sort.auto_generate()
|
self.author_sort.auto_generate()
|
||||||
|
|
||||||
def remove_unused_series(self, *args):
|
|
||||||
self.db.remove_unused_series()
|
|
||||||
idx = self.series.current_val
|
|
||||||
self.series.clear()
|
|
||||||
self.series.initialize(self.db, self.book_id)
|
|
||||||
if idx:
|
|
||||||
for i in range(self.series.count()):
|
|
||||||
if unicode(self.series.itemText(i)) == idx:
|
|
||||||
self.series.setCurrentIndex(i)
|
|
||||||
break
|
|
||||||
|
|
||||||
def tags_editor(self, *args):
|
def tags_editor(self, *args):
|
||||||
self.tags.edit(self.db, self.book_id)
|
self.tags.edit(self.db, self.book_id)
|
||||||
|
|
||||||
@ -591,7 +581,7 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
|
|||||||
sto(self.title_sort, self.authors)
|
sto(self.title_sort, self.authors)
|
||||||
create_row(1, self.authors, self.deduce_author_sort_button, self.author_sort)
|
create_row(1, self.authors, self.deduce_author_sort_button, self.author_sort)
|
||||||
sto(self.author_sort, self.series)
|
sto(self.author_sort, self.series)
|
||||||
create_row(2, self.series, self.remove_unused_series_button,
|
create_row(2, self.series, self.clear_series_button,
|
||||||
self.series_index, icon='trash.png')
|
self.series_index, icon='trash.png')
|
||||||
sto(self.series_index, self.swap_title_author_button)
|
sto(self.series_index, self.swap_title_author_button)
|
||||||
sto(self.swap_title_author_button, self.manage_authors_button)
|
sto(self.swap_title_author_button, self.manage_authors_button)
|
||||||
@ -756,7 +746,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
|
|||||||
span=2, icon='auto_author_sort.png')
|
span=2, icon='auto_author_sort.png')
|
||||||
create_row(3, self.author_sort, self.series)
|
create_row(3, self.author_sort, self.series)
|
||||||
create_row(4, self.series, self.series_index,
|
create_row(4, self.series, self.series_index,
|
||||||
button=self.remove_unused_series_button, icon='trash.png')
|
button=self.clear_series_button, icon='trash.png')
|
||||||
create_row(5, self.series_index, self.tags)
|
create_row(5, self.series_index, self.tags)
|
||||||
create_row(6, self.tags, self.rating, button=self.tags_editor_button)
|
create_row(6, self.tags, self.rating, button=self.tags_editor_button)
|
||||||
create_row(7, self.rating, self.pubdate)
|
create_row(7, self.rating, self.pubdate)
|
||||||
@ -892,7 +882,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
|
|||||||
span=2, icon='auto_author_sort.png')
|
span=2, icon='auto_author_sort.png')
|
||||||
create_row(3, self.author_sort, self.series)
|
create_row(3, self.author_sort, self.series)
|
||||||
create_row(4, self.series, self.series_index,
|
create_row(4, self.series, self.series_index,
|
||||||
button=self.remove_unused_series_button, icon='trash.png')
|
button=self.clear_series_button, icon='trash.png')
|
||||||
create_row(5, self.series_index, self.tags)
|
create_row(5, self.series_index, self.tags)
|
||||||
create_row(6, self.tags, self.rating, button=self.tags_editor_button)
|
create_row(6, self.tags, self.rating, button=self.tags_editor_button)
|
||||||
create_row(7, self.rating, self.pubdate)
|
create_row(7, self.rating, self.pubdate)
|
||||||
|
@ -36,6 +36,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
|||||||
r('new_book_tags', prefs, setting=CommaSeparatedList)
|
r('new_book_tags', prefs, setting=CommaSeparatedList)
|
||||||
r('auto_add_path', gprefs, restart_required=True)
|
r('auto_add_path', gprefs, restart_required=True)
|
||||||
r('auto_add_check_for_duplicates', gprefs)
|
r('auto_add_check_for_duplicates', gprefs)
|
||||||
|
r('auto_add_auto_convert', gprefs)
|
||||||
|
|
||||||
self.filename_pattern = FilenamePattern(self)
|
self.filename_pattern = FilenamePattern(self)
|
||||||
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
|
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
|
||||||
|
@ -151,6 +151,19 @@ Author matching is exact.</string>
|
|||||||
<string>&Automatic Adding</string>
|
<string>&Automatic Adding</string>
|
||||||
</attribute>
|
</attribute>
|
||||||
<layout class="QGridLayout" name="gridLayout_3">
|
<layout class="QGridLayout" name="gridLayout_3">
|
||||||
|
<item row="3" column="0" colspan="2">
|
||||||
|
<widget class="QCheckBox" name="opt_auto_add_check_for_duplicates">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>If set, this option will causes calibre to check if a file
|
||||||
|
being auto-added is already in the calibre library.
|
||||||
|
If it is, a meesage will pop up asking you whether
|
||||||
|
you want to add it anyway.</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>Check for &duplicates when auto-adding files</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
<item row="0" column="0" colspan="2">
|
<item row="0" column="0" colspan="2">
|
||||||
<widget class="QLabel" name="label">
|
<widget class="QLabel" name="label">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
@ -168,7 +181,7 @@ Author matching is exact.</string>
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="4" column="0">
|
<item row="5" column="0">
|
||||||
<widget class="QGroupBox" name="groupBox">
|
<widget class="QGroupBox" name="groupBox">
|
||||||
<property name="title">
|
<property name="title">
|
||||||
<string>Ignore files with the following extensions when automatically adding </string>
|
<string>Ignore files with the following extensions when automatically adding </string>
|
||||||
@ -187,7 +200,7 @@ Author matching is exact.</string>
|
|||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="4" column="1">
|
<item row="5" column="1">
|
||||||
<spacer name="horizontalSpacer_2">
|
<spacer name="horizontalSpacer_2">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Horizontal</enum>
|
<enum>Qt::Horizontal</enum>
|
||||||
@ -225,16 +238,10 @@ Author matching is exact.</string>
|
|||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="0" colspan="2">
|
<item row="4" column="0">
|
||||||
<widget class="QCheckBox" name="opt_auto_add_check_for_duplicates">
|
<widget class="QCheckBox" name="opt_auto_add_auto_convert">
|
||||||
<property name="toolTip">
|
|
||||||
<string>If set, this option will causes calibre to check if a file
|
|
||||||
being auto-added is already in the calibre library.
|
|
||||||
If it is, a meesage will pop up asking you whether
|
|
||||||
you want to add it anyway.</string>
|
|
||||||
</property>
|
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Check for &duplicates when auto-adding files</string>
|
<string>Automatically &convert added files to the current output format</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
@ -73,11 +73,13 @@ class OpenSearchOPDSStore(StorePlugin):
|
|||||||
type = link.get('type')
|
type = link.get('type')
|
||||||
|
|
||||||
if rel and href and type:
|
if rel and href and type:
|
||||||
if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
|
if 'http://opds-spec.org/thumbnail' in rel:
|
||||||
s.cover_url = href
|
s.cover_url = href
|
||||||
elif rel == u'http://opds-spec.org/acquisition/buy':
|
elif 'http://opds-spec.org/image/thumbnail' in rel:
|
||||||
|
s.cover_url = href
|
||||||
|
elif 'http://opds-spec.org/acquisition/buy' in rel:
|
||||||
s.detail_item = href
|
s.detail_item = href
|
||||||
elif rel == u'http://opds-spec.org/acquisition':
|
elif 'http://opds-spec.org/acquisition' in rel:
|
||||||
if type:
|
if type:
|
||||||
ext = mimetypes.guess_extension(type)
|
ext = mimetypes.guess_extension(type)
|
||||||
if ext:
|
if ext:
|
||||||
|
@ -25,7 +25,7 @@ from calibre.ebooks.conversion.config import GuiRecommendations, \
|
|||||||
from calibre.gui2.convert import bulk_defaults_for_input_format
|
from calibre.gui2.convert import bulk_defaults_for_input_format
|
||||||
|
|
||||||
def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{
|
def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{
|
||||||
out_format=None):
|
out_format=None, show_no_format_warning=True):
|
||||||
changed = False
|
changed = False
|
||||||
jobs = []
|
jobs = []
|
||||||
bad = []
|
bad = []
|
||||||
@ -91,7 +91,7 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{
|
|||||||
except NoSupportedInputFormats:
|
except NoSupportedInputFormats:
|
||||||
bad.append(book_id)
|
bad.append(book_id)
|
||||||
|
|
||||||
if bad != []:
|
if bad and show_no_format_warning:
|
||||||
res = []
|
res = []
|
||||||
for id in bad:
|
for id in bad:
|
||||||
title = db.title(id, True)
|
title = db.title(id, True)
|
||||||
|
@ -3243,7 +3243,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
return id
|
return id
|
||||||
|
|
||||||
|
|
||||||
def add_books(self, paths, formats, metadata, add_duplicates=True):
|
def add_books(self, paths, formats, metadata, add_duplicates=True,
|
||||||
|
return_ids=False):
|
||||||
'''
|
'''
|
||||||
Add a book to the database. The result cache is not updated.
|
Add a book to the database. The result cache is not updated.
|
||||||
:param:`paths` List of paths to book files or file-like objects
|
:param:`paths` List of paths to book files or file-like objects
|
||||||
@ -3289,7 +3290,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
formats = list(duplicate[1] for duplicate in duplicates)
|
formats = list(duplicate[1] for duplicate in duplicates)
|
||||||
metadata = list(duplicate[2] for duplicate in duplicates)
|
metadata = list(duplicate[2] for duplicate in duplicates)
|
||||||
return (paths, formats, metadata), len(ids)
|
return (paths, formats, metadata), len(ids)
|
||||||
return None, len(ids)
|
return None, (ids if return_ids else len(ids))
|
||||||
|
|
||||||
def import_book(self, mi, formats, notify=True, import_hooks=True,
|
def import_book(self, mi, formats, notify=True, import_hooks=True,
|
||||||
apply_import_tags=True, preserve_uuid=False):
|
apply_import_tags=True, preserve_uuid=False):
|
||||||
|
@ -648,7 +648,10 @@ class BasicNewsRecipe(Recipe):
|
|||||||
'url' : URL of print version,
|
'url' : URL of print version,
|
||||||
'date' : The publication date of the article as a string,
|
'date' : The publication date of the article as a string,
|
||||||
'description' : A summary of the article
|
'description' : A summary of the article
|
||||||
'content' : The full article (can be an empty string). This is used by FullContentProfile
|
'content' : The full article (can be an empty string). Obsolete
|
||||||
|
do not use, instead save the content to a temporary
|
||||||
|
file and pass a file:///path/to/temp/file.html as
|
||||||
|
the URL.
|
||||||
}
|
}
|
||||||
|
|
||||||
For an example, see the recipe for downloading `The Atlantic`.
|
For an example, see the recipe for downloading `The Atlantic`.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user