mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
20ec5de3f4
82
recipes/ba_herald.recipe
Normal file
82
recipes/ba_herald.recipe
Normal file
@ -0,0 +1,82 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.buenosairesherald.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class BuenosAiresHerald(BasicNewsRecipe):
|
||||
title = 'Buenos Aires Herald'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'A world of information in a few words'
|
||||
publisher = 'Editorial Nefir S.A.'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en_AR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg'
|
||||
INDEX = 'http://www.buenosairesherald.com'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
h1{font-family: Georgia,serif}
|
||||
#fecha{text-align: right; font-size: small}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['meta','link','iframe'])]
|
||||
keep_only_tags = [dict(attrs={'class':'nota_texto p'})]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Argentina' , u'http://www.buenosairesherald.com/argentina' )
|
||||
,(u'World' , u'http://www.buenosairesherald.com/world' )
|
||||
,(u'Latin America' , u'http://www.buenosairesherald.com/latin-america' )
|
||||
,(u'Entertainment' , u'http://www.buenosairesherald.com/entertainment' )
|
||||
,(u'Sports' , u'http://www.buenosairesherald.com/sports' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
artidraw = url.rpartition('/article/')[2]
|
||||
artid = artidraw.partition('/')[0]
|
||||
return 'http://www.buenosairesherald.com/articles/print.aspx?ix=' + artid
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('div', attrs={'class':'nota_texto_seccion'}):
|
||||
description = self.tag_to_string(item.h2)
|
||||
atag = item.h2.find('a')
|
||||
if atag and atag.has_key('href'):
|
||||
url = self.INDEX + atag['href']
|
||||
title = description
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
BIN
recipes/icons/ba_herald.png
Normal file
BIN
recipes/icons/ba_herald.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 978 B |
@ -1,45 +1,73 @@
|
||||
# Talking Points is not grabbing everything.
|
||||
# The look is right, but only the last one added?
|
||||
import re
|
||||
import string, re
|
||||
import time
|
||||
import traceback
|
||||
# above for debugging via stack
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
# Allows the Python soup converter, which makes parsing easier.
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
# strip ads and graphics
|
||||
# Current Column lacks a title.
|
||||
# Talking Points Memo - shorten title - Remove year and Bill's name
|
||||
# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
|
||||
# Newsletters: Talking Points Memos covered by cat12
|
||||
|
||||
import os, time, traceback, re, urlparse, sys, cStringIO
|
||||
from collections import defaultdict
|
||||
from functools import partial
|
||||
from contextlib import nested, closing
|
||||
|
||||
|
||||
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
|
||||
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
|
||||
|
||||
|
||||
# To Do: strip ads and graphics, Current Column lacks a title.
|
||||
# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
|
||||
# Newsletters: Talking Points Memos covered by cat12
|
||||
# ./ebook-convert --username xxx --password xxx
|
||||
|
||||
# this is derived from BasicNewsRecipe, so it can only overload those.
|
||||
# Soome of what we need is otherwise in article, so we have more copy to do than otherwise.
|
||||
class OReillyPremium(BasicNewsRecipe):
|
||||
title = u'OReilly Premium'
|
||||
__author__ = 'TMcN'
|
||||
language = 'en'
|
||||
description = 'Retrieves Premium and News Letter content from BillOReilly.com. Requires a Bill OReilly Premium Membership.'
|
||||
cover_url = 'http://images.billoreilly.com/images/headers/billgray_header.png'
|
||||
custom_title = 'Bill O\'Reilly Premium - '+ time.strftime('%d %b %Y')
|
||||
title = 'Bill O\'Reilly Premium'
|
||||
auto_cleanup = True
|
||||
conversion_options = {'linearize_tables': True}
|
||||
encoding = 'utf8'
|
||||
needs_subscription = True
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
oldest_article = 20
|
||||
needs_subscription = True
|
||||
oldest_article = 31
|
||||
remove_javascript = True
|
||||
remove_tags = [dict(name='img', attrs={})]
|
||||
# Don't go down
|
||||
recursions = 0
|
||||
max_articles_per_feed = 2000
|
||||
|
||||
max_articles_per_feed = 20
|
||||
|
||||
debugMessages = True
|
||||
|
||||
|
||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||
catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []],
|
||||
["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
|
||||
["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
|
||||
["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
|
||||
["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
|
||||
# ["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
|
||||
# ["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
|
||||
# ["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
|
||||
# ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
|
||||
["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []]
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'No Spin', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=7'),
|
||||
(u'Daily Briefing', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=11'),
|
||||
(u'Talking Points', u'https://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=12'),
|
||||
(u'Blog', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=0'),
|
||||
(u'StratFor', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=5')
|
||||
]
|
||||
# http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=8 is word for the day.
|
||||
|
||||
# Note: Talking Points is broken in the above model; the site changed to more Ajax-y.
|
||||
# Now using RSS
|
||||
|
||||
def get_browser(self):
|
||||
print("In get_browser")
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp')
|
||||
@ -48,7 +76,7 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
br['formPasswordField'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
|
||||
# Returns the best-guess print url.
|
||||
# The second parameter (pageURL) is returned if nothing is found.
|
||||
def extractPrintURL(self, baseURL, pageURL, printString):
|
||||
@ -62,17 +90,19 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
tag = printText.parent
|
||||
tagURL = baseURL+tag['href']
|
||||
return tagURL
|
||||
|
||||
|
||||
def stripBadChars(self, inString) :
|
||||
return inString.replace("\'", "")
|
||||
|
||||
|
||||
|
||||
def parseGeneric(self, baseURL):
|
||||
# Does a generic parsing of the articles. There are six categories (0-5)
|
||||
# Does a generic parsing of the articles. There are six categories (0-5)
|
||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||
# NoSpin and TV are generic
|
||||
fullReturn = []
|
||||
for i in range(len(self.catList)) :
|
||||
for i in range(len(self.catList)) :
|
||||
articleList = []
|
||||
print("In "+self.catList[i][0]+", index: "+ str(i))
|
||||
soup = self.index_to_soup(self.catList[i][1])
|
||||
# Set defaults
|
||||
description = 'None'
|
||||
@ -80,15 +110,13 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
# Problem: 0-2 create many in an array
|
||||
# 3-5 create one.
|
||||
# So no for-div for 3-5
|
||||
|
||||
if i < 3 :
|
||||
|
||||
if i == 0 :
|
||||
print("Starting TV Archives")
|
||||
for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
|
||||
print("Next DIV:")
|
||||
print(div)
|
||||
if i == 1:
|
||||
a = div.find('a', href=True)
|
||||
else :
|
||||
a = div
|
||||
print(a)
|
||||
a = div
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
@ -96,82 +124,63 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
continue
|
||||
# url = baseURL+re.sub(r'\?.*', '', a['href'])
|
||||
url = baseURL+a['href']
|
||||
if i < 2 :
|
||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
elif i == 2 :
|
||||
# Daily Briefs
|
||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
title = div.contents[0]
|
||||
if self.debugMessages :
|
||||
print(title+" @ "+url)
|
||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
|
||||
elif i == 3 : # Stratfor
|
||||
a = soup.find('a', self.catList[i][3])
|
||||
if a is None :
|
||||
continue
|
||||
url = baseURL+a['href']
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
# Get Stratfor contents so we can get the real title.
|
||||
stratSoup = self.index_to_soup(url)
|
||||
title = stratSoup.html.head.title.string
|
||||
stratIndex = title.find('Stratfor.com:', 0)
|
||||
if (stratIndex > -1) :
|
||||
title = title[stratIndex+14:-1]
|
||||
# Look for first blogBody <td class="blogBody"
|
||||
# Changed 12 Jan 2012 - new page format
|
||||
#stratBlogTable = stratSoup.find('td', {'class':['blogBody']}).findParent('table')
|
||||
#stratBody = stratSoup.find('td', {'class':['blogBody']})
|
||||
elif i == 4 : # Talking Points
|
||||
topDate = soup.find("td", "blogBody")
|
||||
if not topDate :
|
||||
print("Failed to find date in Talking Points")
|
||||
# This page has the contents in double-wrapped tables!
|
||||
myTable = topDate.findParents('table')[0]
|
||||
if myTable is not None:
|
||||
upOneTable = myTable.findParents('table')[0]
|
||||
if upOneTable is not None:
|
||||
upTwo = upOneTable.findParents('table')[0]
|
||||
if upTwo is None:
|
||||
continue
|
||||
# Now navigate rows of upTwo
|
||||
if self.debugMessages :
|
||||
print("Entering rows")
|
||||
for rows in upTwo.findChildren("tr", recursive=False):
|
||||
# Inside top level table, each row is an article
|
||||
rowTable = rows.find("table")
|
||||
articleTable = rowTable.find("table")
|
||||
# This looks wrong.
|
||||
articleTable = rows.find("tr")
|
||||
# The middle table is just for formatting the article buffer... but this means we can skip the inner table.
|
||||
blogDate = articleTable.find("a","blogDate").contents[0]
|
||||
# Skip to second blogBody for this.
|
||||
blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
|
||||
blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
|
||||
url = baseURL+re.sub(r'\?.*', '', blogURL)
|
||||
title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
|
||||
if self.debugMessages :
|
||||
print("Talking Points Memo title "+title+" at url: "+url)
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description='None', content=''))
|
||||
else : # Current Column
|
||||
titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
|
||||
if titleSpan is None :
|
||||
print("No Current Column Title Span")
|
||||
print(soup)
|
||||
continue
|
||||
title = titleSpan.contents[0]
|
||||
url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
|
||||
if i == 3 or i == 5 :
|
||||
if i == 1 :
|
||||
if self.debugMessages :
|
||||
print(self.catList[i][0]+" Title:"+title+" at url: "+url)
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
print("At Summary")
|
||||
print(summary)
|
||||
if summary is not None:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
print("At append")
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
self.catList[i][3] = articleList
|
||||
fullReturn.append((self.catList[i][0], articleList))
|
||||
print("Returning")
|
||||
# print fullReturn
|
||||
return fullReturn
|
||||
|
||||
|
||||
# build_index() starts with:
|
||||
# try:
|
||||
# feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
|
||||
# max_articles_per_feed=self.max_articles_per_feed,
|
||||
# log=self.log)
|
||||
# self.report_progress(0, _('Got feeds from index page'))
|
||||
# except NotImplementedError:
|
||||
# feeds = self.parse_feeds()
|
||||
|
||||
# which in turn is from __init__.py
|
||||
#def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100,
|
||||
# log=default_log):
|
||||
#'''
|
||||
#@param index: A parsed index as returned by L{BasicNewsRecipe.parse_index}.
|
||||
#@return: A list of L{Feed} objects.
|
||||
#@rtype: list
|
||||
#'''
|
||||
#feeds = []
|
||||
#for title, articles in index:
|
||||
# pfeed = Feed(log=log)
|
||||
# pfeed.populate_from_preparsed_feed(title, articles, oldest_article=oldest_article,
|
||||
# max_articles_per_feed=max_articles_per_feed)
|
||||
# feeds.append(pfeed)
|
||||
# return feeds
|
||||
|
||||
# use_embedded_content defaults to None, at which point if the content is > 2K, it is used as the article.
|
||||
|
||||
|
||||
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
|
||||
# returns a list of tuple ('feed title', list of articles)
|
||||
# {
|
||||
@ -182,16 +191,148 @@ class OReillyPremium(BasicNewsRecipe):
|
||||
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
||||
# }
|
||||
# this is used instead of BasicNewsRecipe.parse_feeds().
|
||||
# it is called by download
|
||||
def parse_index(self):
|
||||
# Parse the page into Python Soup
|
||||
print("Entering recipe print_index from:")
|
||||
traceback.print_stack()
|
||||
print("web")
|
||||
baseURL = "https://www.billoreilly.com"
|
||||
return self.parseGeneric(baseURL)
|
||||
|
||||
masterList = self.parseGeneric(baseURL)
|
||||
#print(masterList)
|
||||
return masterList
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
print("In preprocess_html")
|
||||
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
||||
if refresh is None:
|
||||
return soup
|
||||
content = refresh.get('content').partition('=')[2]
|
||||
raw = self.browser.open('https://www.billoreilly.com'+content).read()
|
||||
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
||||
|
||||
def build_index(self):
|
||||
print("In OReilly build_index()\n\n")
|
||||
feedsRSS = []
|
||||
self.report_progress(0, _('Fetching feeds...'))
|
||||
#try:
|
||||
feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
|
||||
max_articles_per_feed=self.max_articles_per_feed,
|
||||
log=self.log)
|
||||
self.report_progress(0, _('Got feeds from index page'))
|
||||
#except NotImplementedError:
|
||||
# feeds = self.parse_feeds()
|
||||
# Now add regular feeds.
|
||||
feedsRSS = self.parse_feeds()
|
||||
print ("feedsRSS is type "+feedsRSS.__class__.__name__)
|
||||
|
||||
for articles in feedsRSS:
|
||||
print("articles is type "+articles.__class__.__name__)
|
||||
print("Title:" + articles.title)
|
||||
feeds.append(articles)
|
||||
if not feeds:
|
||||
raise ValueError('No articles found, aborting')
|
||||
|
||||
#feeds = FeedCollection(feeds)
|
||||
|
||||
self.report_progress(0, _('Trying to download cover...'))
|
||||
self.download_cover()
|
||||
self.report_progress(0, _('Generating masthead...'))
|
||||
self.masthead_path = None
|
||||
|
||||
try:
|
||||
murl = self.get_masthead_url()
|
||||
except:
|
||||
self.log.exception('Failed to get masthead url')
|
||||
murl = None
|
||||
|
||||
if murl is not None:
|
||||
# Try downloading the user-supplied masthead_url
|
||||
# Failure sets self.masthead_path to None
|
||||
self.download_masthead(murl)
|
||||
if self.masthead_path is None:
|
||||
self.log.info("Synthesizing mastheadImage")
|
||||
self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg')
|
||||
try:
|
||||
self.default_masthead_image(self.masthead_path)
|
||||
except:
|
||||
self.log.exception('Failed to generate default masthead image')
|
||||
self.masthead_path = None
|
||||
|
||||
if self.test:
|
||||
feeds = feeds[:2]
|
||||
self.has_single_feed = len(feeds) == 1
|
||||
|
||||
index = os.path.join(self.output_dir, 'index.html')
|
||||
|
||||
html = self.feeds2index(feeds)
|
||||
with open(index, 'wb') as fi:
|
||||
fi.write(html)
|
||||
|
||||
self.jobs = []
|
||||
|
||||
if self.reverse_article_order:
|
||||
for feed in feeds:
|
||||
if hasattr(feed, 'reverse'):
|
||||
feed.reverse()
|
||||
|
||||
self.feed_objects = feeds
|
||||
for f, feed in enumerate(feeds):
|
||||
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||
if not os.path.isdir(feed_dir):
|
||||
os.makedirs(feed_dir)
|
||||
|
||||
for a, article in enumerate(feed):
|
||||
if a >= self.max_articles_per_feed:
|
||||
break
|
||||
art_dir = os.path.join(feed_dir, 'article_%d'%a)
|
||||
if not os.path.isdir(art_dir):
|
||||
os.makedirs(art_dir)
|
||||
try:
|
||||
url = self.print_version(article.url)
|
||||
except NotImplementedError:
|
||||
url = article.url
|
||||
except:
|
||||
self.log.exception('Failed to find print version for: '+article.url)
|
||||
url = None
|
||||
if not url:
|
||||
continue
|
||||
func, arg = (self.fetch_embedded_article, article) \
|
||||
if self.use_embedded_content or (self.use_embedded_content == None and feed.has_embedded_content()) \
|
||||
else \
|
||||
((self.fetch_obfuscated_article if self.articles_are_obfuscated \
|
||||
else self.fetch_article), url)
|
||||
req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
|
||||
{}, (f, a), self.article_downloaded,
|
||||
self.error_in_article_download)
|
||||
req.feed = feed
|
||||
req.article = article
|
||||
req.feed_dir = feed_dir
|
||||
self.jobs.append(req)
|
||||
|
||||
|
||||
self.jobs_done = 0
|
||||
tp = ThreadPool(self.simultaneous_downloads)
|
||||
for req in self.jobs:
|
||||
tp.putRequest(req, block=True, timeout=0)
|
||||
|
||||
|
||||
self.report_progress(0, _('Starting download [%d thread(s)]...')%self.simultaneous_downloads)
|
||||
while True:
|
||||
try:
|
||||
tp.poll()
|
||||
time.sleep(0.1)
|
||||
except NoResultsPending:
|
||||
break
|
||||
for f, feed in enumerate(feeds):
|
||||
print("Writing feeds for "+feed.title)
|
||||
html = self.feed2index(f,feeds)
|
||||
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
|
||||
fi.write(html)
|
||||
self.create_opf(feeds)
|
||||
self.report_progress(1, _('Feeds downloaded to %s')%index)
|
||||
|
||||
return index
|
||||
|
||||
|
||||
|
@ -1,7 +1,9 @@
|
||||
# Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
|
||||
import string, re
|
||||
import time
|
||||
from urlparse import urlparse
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import NavigableString
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
||||
|
||||
class RealClear(BasicNewsRecipe):
|
||||
title = u'Real Clear'
|
||||
@ -20,12 +22,13 @@ class RealClear(BasicNewsRecipe):
|
||||
# Don't go down
|
||||
recursions = 0
|
||||
max_articles_per_feed = 400
|
||||
debugMessages = False
|
||||
|
||||
# Numeric parameter is type, controls whether we look for
|
||||
debugMessages = True
|
||||
|
||||
# Numeric parameter is type, controls whether we look for
|
||||
feedsets = [
|
||||
["Politics", "http://www.realclearpolitics.com/index.xml", 0],
|
||||
["Science", "http://www.realclearscience.com/index.xml", 0],
|
||||
["Politics", "http://www.realclearpolitics.com/index.xml", 0],
|
||||
["Policy", "http://www.realclearpolicy.com/index.xml", 0],
|
||||
["Science", "http://www.realclearscience.com/index.xml", 0],
|
||||
["Tech", "http://www.realcleartechnology.com/index.xml", 0],
|
||||
# The feedburner is essentially the same as the top feed, politics.
|
||||
# ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
|
||||
@ -37,22 +40,37 @@ class RealClear(BasicNewsRecipe):
|
||||
]
|
||||
# Hints to extractPrintURL.
|
||||
# First column is the URL snippet. Then the string to search for as text, and the attributes to look for above it. Start with attributes and drill down.
|
||||
printhints = [
|
||||
phUrlSnip, phLinkText, phMainSearch, phHrefSearch = range(4)
|
||||
|
||||
printhints = [ ["realclear", "", '' , 'printpage'],
|
||||
["billoreilly.com", "Print this entry", 'a', ''],
|
||||
["billoreilly.com", "Print This Article", 'a', ''],
|
||||
["politico.com", "Print", 'a', 'share-print'],
|
||||
["politico.com", "Print", 'a', 'share-print'],
|
||||
["nationalreview.com", ">Print<", 'a', ''],
|
||||
["reason.com", "", 'a', 'printer']
|
||||
# The following are not supported due to JavaScripting, and would require obfuscated_article to handle
|
||||
# forbes,
|
||||
# forbes,
|
||||
# usatoday - just prints with all current crap anyhow
|
||||
|
||||
|
||||
]
|
||||
|
||||
# RCP - look for a strange compound. See http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879.html
|
||||
# The print link isn't obvious, and only the end is needed (the -full append.) SO maybe try that first?s
|
||||
# http://www.realclearpolitics.com/printpage/?url=http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879-full.html
|
||||
# Single page articles don't have a _full; e.g. http://www.realclearpolitics.com/articles/2012/01/25/obamas_green_robber_barons_112897.html
|
||||
# Use the FULL PRINTPAGE URL; it formats it better too!
|
||||
#
|
||||
# NYT - try single page...
|
||||
# Need special code - is it one page or several? Which URL?
|
||||
# from http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1
|
||||
# to http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1&pagewanted=all
|
||||
# which is at link rel="canonical" and at <meta property="og:url" or look for "Single Page"
|
||||
|
||||
# Returns the best-guess print url.
|
||||
# The second parameter (pageURL) is returned if nothing is found.
|
||||
def extractPrintURL(self, pageURL):
|
||||
tagURL = pageURL
|
||||
baseParse = urlparse(pageURL)
|
||||
baseURL = baseParse[0]+"://"+baseParse[1]
|
||||
hintsCount =len(self.printhints)
|
||||
for x in range(0,hintsCount):
|
||||
if pageURL.find(self.printhints[x][0])== -1 :
|
||||
@ -62,23 +80,37 @@ class RealClear(BasicNewsRecipe):
|
||||
soup = self.index_to_soup(pageURL)
|
||||
if soup is None:
|
||||
return pageURL
|
||||
if len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
|
||||
if len(self.printhints[x][self.phHrefSearch])>0 and len(self.printhints[x][self.phLinkText]) == 0:
|
||||
# e.g. RealClear
|
||||
if self.debugMessages == True :
|
||||
print("search1")
|
||||
print("Search by href: "+self.printhints[x][self.phHrefSearch])
|
||||
printFind = soup.find(href=re.compile(self.printhints[x][self.phHrefSearch]))
|
||||
elif len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
|
||||
if self.debugMessages == True :
|
||||
print("Search 1: "+self.printhints[x][2]+" Attributes: ")
|
||||
print(self.printhints[x][3])
|
||||
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3])
|
||||
elif len(self.printhints[x][3])>0 :
|
||||
if self.debugMessages == True :
|
||||
print("search2")
|
||||
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1])
|
||||
else :
|
||||
if self.debugMessages == True:
|
||||
print("Default Search: "+self.printhints[x][2]+" Text: "+self.printhints[x][1])
|
||||
printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1])
|
||||
if printFind is None:
|
||||
if self.debugMessages == True :
|
||||
print("Not Found")
|
||||
# print(soup)
|
||||
print("end soup\n\n");
|
||||
continue
|
||||
|
||||
print(printFind)
|
||||
if isinstance(printFind, NavigableString)==False:
|
||||
if printFind['href'] is not None:
|
||||
print("Check "+printFind['href']+" for base of "+baseURL)
|
||||
if printFind['href'].find("http")!=0 :
|
||||
return baseURL+printFind['href']
|
||||
return printFind['href']
|
||||
tag = printFind.parent
|
||||
print(tag)
|
||||
@ -98,7 +130,7 @@ class RealClear(BasicNewsRecipe):
|
||||
print("In get_browser")
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
return br
|
||||
|
||||
|
||||
def parseRSS(self, index) :
|
||||
if self.debugMessages == True :
|
||||
print("\n\nStarting "+self.feedsets[index][0])
|
||||
@ -128,7 +160,7 @@ class RealClear(BasicNewsRecipe):
|
||||
pubDateEl = div.find("pubDate")
|
||||
if pubDateEl is None :
|
||||
pubDateEl = div.find("pubdate")
|
||||
if pubDateEl is None :
|
||||
if pubDateEl is None :
|
||||
pubDate = time.strftime('%a, %d %b')
|
||||
else :
|
||||
pubDate = pubDateEl.contents[0]
|
||||
@ -144,7 +176,7 @@ class RealClear(BasicNewsRecipe):
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
return articleList
|
||||
|
||||
|
||||
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
|
||||
# returns a list of tuple ('feed title', list of articles)
|
||||
# {
|
||||
@ -157,7 +189,8 @@ class RealClear(BasicNewsRecipe):
|
||||
# this is used instead of BasicNewsRecipe.parse_feeds().
|
||||
def parse_index(self):
|
||||
# Parse the page into Python Soup
|
||||
|
||||
|
||||
articleList = []
|
||||
ans = []
|
||||
feedsCount = len(self.feedsets)
|
||||
for x in range(0,feedsCount): # should be ,4
|
||||
@ -167,4 +200,5 @@ class RealClear(BasicNewsRecipe):
|
||||
if self.debugMessages == True :
|
||||
print(ans)
|
||||
return ans
|
||||
|
||||
|
||||
|
@ -15,6 +15,8 @@ class Soldiers(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//div[@id="mediaWrapper"]'
|
||||
simultaneous_downloads = 1
|
||||
delay = 4
|
||||
max_connections = 1
|
||||
@ -31,14 +33,14 @@ class Soldiers(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
|
||||
#keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
#remove_tags = [
|
||||
#dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
|
||||
#,dict(name=['object','link'])
|
||||
#]
|
||||
|
||||
feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )]
|
||||
feeds = [(u'Frontpage', u'http://www.army.mil/rss/2/' )]
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
|
136
recipes/southernstar.recipe
Normal file
136
recipes/southernstar.recipe
Normal file
@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, watou'
|
||||
'''
|
||||
southernstar.ie
|
||||
'''
|
||||
import re
|
||||
import tempfile
|
||||
import os
|
||||
import codecs
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||
|
||||
class TheSouthernStar(BasicNewsRecipe):
|
||||
|
||||
title = 'The Southern Star'
|
||||
__author__ = 'watou'
|
||||
description = 'West Cork\'s leading news and information provider since 1889'
|
||||
NEWS_INDEX = 'http://www.southernstar.ie/news.php'
|
||||
LOCAL_NOTES = 'http://www.southernstar.ie/localnotes.php'
|
||||
SPORT_INDEX = 'http://www.southernstar.ie/sport.php'
|
||||
CLASSIFIEDS = 'http://www.southernstar.ie/classifieds.php'
|
||||
language = 'en_IE'
|
||||
encoding = 'cp1252'
|
||||
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.southernstar.ie/images/logo.gif'
|
||||
remove_tags_before = dict(name='div', attrs={'class':'article'})
|
||||
remove_tags_after = dict(name='div', attrs={'class':'article'})
|
||||
remove_tags = [dict(name='div', attrs={'style':'width:300px; position:relative'}),
|
||||
dict(name='form'),
|
||||
dict(name='div', attrs={'class':'endpanel'})]
|
||||
no_stylesheets = True
|
||||
tempfiles = []
|
||||
pubdate = ''
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
seen_titles = set([])
|
||||
|
||||
articles = self.fetch_ss_articles(self.NEWS_INDEX, seen_titles)
|
||||
if articles:
|
||||
feeds.append(('News', articles))
|
||||
|
||||
articles = self.fetch_ss_notes(self.LOCAL_NOTES)
|
||||
if articles:
|
||||
feeds.append(('Local Notes', articles))
|
||||
|
||||
articles = self.fetch_ss_articles(self.SPORT_INDEX, seen_titles)
|
||||
if articles:
|
||||
feeds.append(('Sport', articles))
|
||||
|
||||
articles = self.fetch_ss_notes(self.CLASSIFIEDS)
|
||||
if articles:
|
||||
feeds.append(('Classifieds', articles))
|
||||
|
||||
return feeds
|
||||
|
||||
def fetch_ss_articles(self, index, seen_titles):
|
||||
articles = []
|
||||
soup = self.index_to_soup(index)
|
||||
ts = soup.find('div', {'class':'article'})
|
||||
ds = self.tag_to_string(ts.find('strong'))
|
||||
self.pubdate = ' ['+ds+']'
|
||||
self.timefmt = ' [%s]'%ds
|
||||
|
||||
for post in ts.findAll('h1'):
|
||||
a = post.find('a', href=True)
|
||||
title = self.tag_to_string(a)
|
||||
if title in seen_titles:
|
||||
continue
|
||||
seen_titles.add(title)
|
||||
url = a['href']
|
||||
if url.startswith('article'):
|
||||
url = 'http://www.southernstar.ie/'+url
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
p = post.findNextSibling('p')
|
||||
desc = None
|
||||
if p is not None:
|
||||
desc = str(p)
|
||||
articles.append({'title':title, 'url':url, 'description':desc,
|
||||
'date':self.pubdate})
|
||||
|
||||
return articles
|
||||
|
||||
def fetch_ss_notes(self, page):
|
||||
articles = []
|
||||
|
||||
soup = self.index_to_soup(page)
|
||||
ts = soup.find('div', {'class':'content'})
|
||||
for post in ts.findAll('h1'):
|
||||
title = self.tag_to_string(post)
|
||||
self.log('\tFound note:', title)
|
||||
f = tempfile.NamedTemporaryFile(suffix='.html',delete=False)
|
||||
f.close()
|
||||
f = codecs.open(f.name, 'w+b', self.encoding, 'replace')
|
||||
url = "file://" + f.name
|
||||
f.write(u'<html><head><meta http-equiv="Content-Type" content="text/html; charset='+
|
||||
self.encoding+'"></head><body><h1>'+title+'</h1>')
|
||||
f.write(str(post.findNextSibling('p')))
|
||||
f.write(u'</body></html>')
|
||||
self.log('\tWrote note to', f.name)
|
||||
f.close()
|
||||
self.tempfiles.append(f)
|
||||
articles.append({'title':title, 'url':url, 'date':self.pubdate})
|
||||
|
||||
return articles
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for table in soup.findAll('table', align='right'):
|
||||
img = table.find('img')
|
||||
if img is not None:
|
||||
img.extract()
|
||||
caption = self.tag_to_string(table).strip()
|
||||
div = Tag(soup, 'div')
|
||||
div['style'] = 'text-align:center'
|
||||
div.insert(0, img)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
if caption:
|
||||
div.insert(2, NavigableString(caption))
|
||||
table.replaceWith(div)
|
||||
|
||||
return soup
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
return url.replace(' ','%20')
|
||||
|
||||
def cleanup(self):
|
||||
self.log('cleaning up')
|
||||
for f in self.tempfiles:
|
||||
os.unlink(f.name)
|
||||
self.tempfiles = []
|
@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
|
||||
from setup.installer.windows.wix import WixMixIn
|
||||
|
||||
OPENSSL_DIR = r'Q:\openssl'
|
||||
QT_DIR = 'Q:\\Qt\\4.8.0'
|
||||
QT_DIR = 'Q:\\Qt\\4.8.1'
|
||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||
SW = r'C:\cygwin\home\kovid\sw'
|
||||
|
@ -32,6 +32,7 @@ class MOBIInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
self.is_kf8 = False
|
||||
|
||||
if os.environ.get('USE_MOBIUNPACK', None) is not None:
|
||||
pos = stream.tell()
|
||||
@ -62,6 +63,7 @@ class MOBIInput(InputFormatPlugin):
|
||||
mr = Mobi8Reader(mr, log)
|
||||
opf = os.path.abspath(mr())
|
||||
self.encrypted_fonts = mr.encrypted_fonts
|
||||
self.is_kf8 = True
|
||||
return opf
|
||||
|
||||
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
|
||||
|
@ -535,7 +535,7 @@ class OPF(object): # {{{
|
||||
series_index = MetadataField('series_index', is_dc=False,
|
||||
formatter=float, none_is=1)
|
||||
title_sort = TitleSortField('title_sort', is_dc=False)
|
||||
rating = MetadataField('rating', is_dc=False, formatter=int)
|
||||
rating = MetadataField('rating', is_dc=False, formatter=float)
|
||||
pubdate = MetadataField('date', formatter=parse_date,
|
||||
renderer=isoformat)
|
||||
publication_type = MetadataField('publication_type', is_dc=False)
|
||||
@ -883,6 +883,8 @@ class OPF(object): # {{{
|
||||
val = etree.tostring(x, with_tail=False, encoding=unicode,
|
||||
method='text').strip()
|
||||
if val and typ not in ('calibre', 'uuid'):
|
||||
if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
|
||||
val = val[len('urn:isbn:'):]
|
||||
identifiers[typ] = val
|
||||
found_scheme = True
|
||||
break
|
||||
|
95
src/calibre/ebooks/metadata/sources/worker.py
Normal file
95
src/calibre/ebooks/metadata/sources/worker.py
Normal file
@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from threading import Event
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.utils.date import as_utc
|
||||
from calibre.ebooks.metadata.sources.identify import identify, msprefs
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
from calibre.ebooks.metadata.sources.covers import download_cover
|
||||
from calibre.utils.logging import GUILog
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf, OPF
|
||||
|
||||
def merge_result(oldmi, newmi, ensure_fields=None):
|
||||
dummy = Metadata(_('Unknown'))
|
||||
for f in msprefs['ignore_fields']:
|
||||
if ':' in f or (ensure_fields and f in ensure_fields):
|
||||
continue
|
||||
setattr(newmi, f, getattr(dummy, f))
|
||||
fields = set()
|
||||
for plugin in metadata_plugins(['identify']):
|
||||
fields |= plugin.touched_fields
|
||||
|
||||
def is_equal(x, y):
|
||||
if hasattr(x, 'tzinfo'):
|
||||
x = as_utc(x)
|
||||
if hasattr(y, 'tzinfo'):
|
||||
y = as_utc(y)
|
||||
return x == y
|
||||
|
||||
for f in fields:
|
||||
# Optimize so that set_metadata does not have to do extra work later
|
||||
if not f.startswith('identifier:'):
|
||||
if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
|
||||
getattr(oldmi, f))):
|
||||
setattr(newmi, f, getattr(dummy, f))
|
||||
|
||||
return newmi
|
||||
|
||||
def main(do_identify, covers, metadata, ensure_fields):
|
||||
failed_ids = set()
|
||||
failed_covers = set()
|
||||
all_failed = True
|
||||
log = GUILog()
|
||||
|
||||
for book_id, mi in metadata.iteritems():
|
||||
mi = OPF(BytesIO(mi), basedir=os.getcwdu(),
|
||||
populate_spine=False).to_book_metadata()
|
||||
title, authors, identifiers = mi.title, mi.authors, mi.identifiers
|
||||
cdata = None
|
||||
log.clear()
|
||||
|
||||
if do_identify:
|
||||
results = []
|
||||
try:
|
||||
results = identify(log, Event(), title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
except:
|
||||
pass
|
||||
if results:
|
||||
all_failed = False
|
||||
mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
|
||||
identifiers = mi.identifiers
|
||||
if not mi.is_null('rating'):
|
||||
# set_metadata expects a rating out of 10
|
||||
mi.rating *= 2
|
||||
with open('%d.mi'%book_id, 'wb') as f:
|
||||
f.write(metadata_to_opf(mi, default_lang='und'))
|
||||
else:
|
||||
log.error('Failed to download metadata for', title)
|
||||
failed_ids.add(book_id)
|
||||
|
||||
if covers:
|
||||
cdata = download_cover(log, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if cdata is None:
|
||||
failed_covers.add(book_id)
|
||||
else:
|
||||
with open('%d.cover'%book_id, 'wb') as f:
|
||||
f.write(cdata[-1])
|
||||
all_failed = False
|
||||
|
||||
with open('%d.log'%book_id, 'wb') as f:
|
||||
f.write(log.plain_text.encode('utf-8'))
|
||||
|
||||
return failed_ids, failed_covers, all_failed
|
||||
|
@ -217,6 +217,10 @@ class EbookIterator(object):
|
||||
if hasattr(self.pathtoopf, 'manifest'):
|
||||
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
|
||||
|
||||
self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
|
||||
if getattr(plumber.input_plugin, 'is_kf8', False):
|
||||
self.book_format = 'KF8'
|
||||
|
||||
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
|
||||
if self.opf is None:
|
||||
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
import os, shutil
|
||||
from functools import partial
|
||||
|
||||
from PyQt4.Qt import QMenu, QModelIndex, QTimer
|
||||
@ -16,6 +16,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm
|
||||
from calibre.gui2.dialogs.device_category_editor import DeviceCategoryEditor
|
||||
from calibre.gui2.actions import InterfaceAction
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.db.errors import NoSuchFormat
|
||||
|
||||
@ -79,17 +80,27 @@ class EditMetadataAction(InterfaceAction):
|
||||
Dispatcher(self.metadata_downloaded),
|
||||
ensure_fields=ensure_fields)
|
||||
|
||||
def cleanup_bulk_download(self, tdir):
|
||||
try:
|
||||
shutil.rmtree(tdir, ignore_errors=True)
|
||||
except:
|
||||
pass
|
||||
|
||||
def metadata_downloaded(self, job):
|
||||
if job.failed:
|
||||
self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
|
||||
return
|
||||
from calibre.gui2.metadata.bulk_download import get_job_details
|
||||
id_map, failed_ids, failed_covers, all_failed, det_msg = \
|
||||
get_job_details(job)
|
||||
(aborted, id_map, tdir, log_file, failed_ids, failed_covers, all_failed,
|
||||
det_msg, lm_map) = get_job_details(job)
|
||||
if aborted:
|
||||
return self.cleanup_bulk_download(tdir)
|
||||
if all_failed:
|
||||
num = len(failed_ids | failed_covers)
|
||||
self.cleanup_bulk_download(tdir)
|
||||
return error_dialog(self.gui, _('Download failed'),
|
||||
_('Failed to download metadata or covers for any of the %d'
|
||||
' book(s).') % len(id_map), det_msg=det_msg, show=True)
|
||||
' book(s).') % num, det_msg=det_msg, show=True)
|
||||
|
||||
self.gui.status_bar.show_message(_('Metadata download completed'), 3000)
|
||||
|
||||
@ -103,28 +114,27 @@ class EditMetadataAction(InterfaceAction):
|
||||
msg += '<p>'+_('Could not download metadata and/or covers for %d of the books. Click'
|
||||
' "Show details" to see which books.')%num
|
||||
|
||||
payload = (id_map, failed_ids, failed_covers)
|
||||
payload = (id_map, tdir, log_file, lm_map)
|
||||
from calibre.gui2.dialogs.message_box import ProceedNotification
|
||||
p = ProceedNotification(self.apply_downloaded_metadata,
|
||||
payload, job.html_details,
|
||||
payload, log_file,
|
||||
_('Download log'), _('Download complete'), msg,
|
||||
det_msg=det_msg, show_copy_button=show_copy_button,
|
||||
parent=self.gui)
|
||||
cancel_callback=lambda x:self.cleanup_bulk_download(tdir),
|
||||
parent=self.gui, log_is_file=True)
|
||||
p.show()
|
||||
|
||||
def apply_downloaded_metadata(self, payload):
|
||||
id_map, failed_ids, failed_covers = payload
|
||||
id_map = dict([(k, v) for k, v in id_map.iteritems() if k not in
|
||||
failed_ids])
|
||||
if not id_map:
|
||||
good_ids, tdir, log_file, lm_map = payload
|
||||
if not good_ids:
|
||||
return
|
||||
|
||||
modified = set()
|
||||
db = self.gui.current_db
|
||||
|
||||
for i, mi in id_map.iteritems():
|
||||
for i in good_ids:
|
||||
lm = db.metadata_last_modified(i, index_is_id=True)
|
||||
if lm > mi.last_modified:
|
||||
if lm > lm_map[i]:
|
||||
title = db.title(i, index_is_id=True)
|
||||
authors = db.authors(i, index_is_id=True)
|
||||
if authors:
|
||||
@ -144,7 +154,18 @@ class EditMetadataAction(InterfaceAction):
|
||||
'Do you want to proceed?'), det_msg='\n'.join(modified)):
|
||||
return
|
||||
|
||||
self.apply_metadata_changes(id_map)
|
||||
id_map = {}
|
||||
for bid in good_ids:
|
||||
opf = os.path.join(tdir, '%d.mi'%bid)
|
||||
if not os.path.exists(opf):
|
||||
opf = None
|
||||
cov = os.path.join(tdir, '%d.cover'%bid)
|
||||
if not os.path.exists(cov):
|
||||
cov = None
|
||||
id_map[bid] = (opf, cov)
|
||||
|
||||
self.apply_metadata_changes(id_map, callback=lambda x:
|
||||
self.cleanup_bulk_download(tdir))
|
||||
|
||||
# }}}
|
||||
|
||||
@ -468,13 +489,18 @@ class EditMetadataAction(InterfaceAction):
|
||||
callback can be either None or a function accepting a single argument,
|
||||
in which case it is called after applying is complete with the list of
|
||||
changed ids.
|
||||
|
||||
id_map can also be a mapping of ids to 2-tuple's where each 2-tuple
|
||||
contains the absolute paths to an OPF and cover file respectively. If
|
||||
either of the paths is None, then the corresponding metadata is not
|
||||
updated.
|
||||
'''
|
||||
if title is None:
|
||||
title = _('Applying changed metadata')
|
||||
self.apply_id_map = list(id_map.iteritems())
|
||||
self.apply_current_idx = 0
|
||||
self.apply_failures = []
|
||||
self.applied_ids = []
|
||||
self.applied_ids = set()
|
||||
self.apply_pd = None
|
||||
self.apply_callback = callback
|
||||
if len(self.apply_id_map) > 1:
|
||||
@ -492,28 +518,49 @@ class EditMetadataAction(InterfaceAction):
|
||||
return self.finalize_apply()
|
||||
|
||||
i, mi = self.apply_id_map[self.apply_current_idx]
|
||||
if isinstance(mi, tuple):
|
||||
opf, cover = mi
|
||||
if opf:
|
||||
mi = OPF(open(opf, 'rb'), basedir=os.path.dirname(opf),
|
||||
populate_spine=False).to_book_metadata()
|
||||
self.apply_mi(i, mi)
|
||||
if cover:
|
||||
self.gui.current_db.set_cover(i, open(cover, 'rb'),
|
||||
notify=False, commit=False)
|
||||
self.applied_ids.add(i)
|
||||
else:
|
||||
self.apply_mi(i, mi)
|
||||
|
||||
self.apply_current_idx += 1
|
||||
if self.apply_pd is not None:
|
||||
self.apply_pd.value += 1
|
||||
QTimer.singleShot(50, self.do_one_apply)
|
||||
|
||||
|
||||
def apply_mi(self, book_id, mi):
|
||||
db = self.gui.current_db
|
||||
|
||||
try:
|
||||
set_title = not mi.is_null('title')
|
||||
set_authors = not mi.is_null('authors')
|
||||
idents = db.get_identifiers(i, index_is_id=True)
|
||||
idents = db.get_identifiers(book_id, index_is_id=True)
|
||||
if mi.identifiers:
|
||||
idents.update(mi.identifiers)
|
||||
mi.identifiers = idents
|
||||
if mi.is_null('series'):
|
||||
mi.series_index = None
|
||||
if self._am_merge_tags:
|
||||
old_tags = db.tags(i, index_is_id=True)
|
||||
old_tags = db.tags(book_id, index_is_id=True)
|
||||
if old_tags:
|
||||
tags = [x.strip() for x in old_tags.split(',')] + (
|
||||
mi.tags if mi.tags else [])
|
||||
mi.tags = list(set(tags))
|
||||
db.set_metadata(i, mi, commit=False, set_title=set_title,
|
||||
db.set_metadata(book_id, mi, commit=False, set_title=set_title,
|
||||
set_authors=set_authors, notify=False)
|
||||
self.applied_ids.append(i)
|
||||
self.applied_ids.add(book_id)
|
||||
except:
|
||||
import traceback
|
||||
self.apply_failures.append((i, traceback.format_exc()))
|
||||
self.apply_failures.append((book_id, traceback.format_exc()))
|
||||
|
||||
try:
|
||||
if mi.cover:
|
||||
@ -521,11 +568,6 @@ class EditMetadataAction(InterfaceAction):
|
||||
except:
|
||||
pass
|
||||
|
||||
self.apply_current_idx += 1
|
||||
if self.apply_pd is not None:
|
||||
self.apply_pd.value += 1
|
||||
QTimer.singleShot(50, self.do_one_apply)
|
||||
|
||||
def finalize_apply(self):
|
||||
db = self.gui.current_db
|
||||
db.commit()
|
||||
@ -550,7 +592,7 @@ class EditMetadataAction(InterfaceAction):
|
||||
if self.applied_ids:
|
||||
cr = self.gui.library_view.currentIndex().row()
|
||||
self.gui.library_view.model().refresh_ids(
|
||||
self.applied_ids, cr)
|
||||
list(self.applied_ids), cr)
|
||||
if self.gui.cover_flow:
|
||||
self.gui.cover_flow.dataChanged()
|
||||
self.gui.tags_view.recount()
|
||||
@ -559,7 +601,7 @@ class EditMetadataAction(InterfaceAction):
|
||||
self.apply_pd = None
|
||||
try:
|
||||
if callable(self.apply_callback):
|
||||
self.apply_callback(self.applied_ids)
|
||||
self.apply_callback(list(self.applied_ids))
|
||||
finally:
|
||||
self.apply_callback = None
|
||||
|
||||
|
@ -160,7 +160,7 @@ class ProceedNotification(MessageBox): # {{{
|
||||
|
||||
def __init__(self, callback, payload, html_log, log_viewer_title, title, msg,
|
||||
det_msg='', show_copy_button=False, parent=None,
|
||||
cancel_callback=None):
|
||||
cancel_callback=None, log_is_file=False):
|
||||
'''
|
||||
A non modal popup that notifies the user that a background task has
|
||||
been completed.
|
||||
@ -175,12 +175,15 @@ class ProceedNotification(MessageBox): # {{{
|
||||
:param title: The title for this popup
|
||||
:param msg: The msg to display
|
||||
:param det_msg: Detailed message
|
||||
:param log_is_file: If True the html_log parameter is interpreted as
|
||||
the path to a file on disk containing the log encoded with utf-8
|
||||
'''
|
||||
MessageBox.__init__(self, MessageBox.QUESTION, title, msg,
|
||||
det_msg=det_msg, show_copy_button=show_copy_button,
|
||||
parent=parent)
|
||||
self.payload = payload
|
||||
self.html_log = html_log
|
||||
self.log_is_file = log_is_file
|
||||
self.log_viewer_title = log_viewer_title
|
||||
|
||||
self.vlb = self.bb.addButton(_('View log'), self.bb.ActionRole)
|
||||
@ -192,7 +195,11 @@ class ProceedNotification(MessageBox): # {{{
|
||||
_proceed_memory.append(self)
|
||||
|
||||
def show_log(self):
|
||||
self.log_viewer = ViewLog(self.log_viewer_title, self.html_log,
|
||||
log = self.html_log
|
||||
if self.log_is_file:
|
||||
with open(log, 'rb') as f:
|
||||
log = f.read().decode('utf-8')
|
||||
self.log_viewer = ViewLog(self.log_viewer_title, log,
|
||||
parent=self)
|
||||
|
||||
def do_proceed(self, result):
|
||||
|
@ -402,7 +402,8 @@ class DetailView(QDialog, Ui_Dialog): # {{{
|
||||
self.setupUi(self)
|
||||
self.setWindowTitle(job.description)
|
||||
self.job = job
|
||||
self.html_view = hasattr(job, 'html_details')
|
||||
self.html_view = (hasattr(job, 'html_details') and not getattr(job,
|
||||
'ignore_html_details', False))
|
||||
if self.html_view:
|
||||
self.log.setVisible(False)
|
||||
else:
|
||||
|
@ -7,22 +7,42 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, time, shutil
|
||||
from functools import partial
|
||||
from itertools import izip
|
||||
from threading import Event
|
||||
from threading import Thread
|
||||
|
||||
from PyQt4.Qt import (QIcon, QDialog,
|
||||
QDialogButtonBox, QLabel, QGridLayout, QPixmap, Qt)
|
||||
|
||||
from calibre.gui2.threaded_jobs import ThreadedJob
|
||||
from calibre.ebooks.metadata.sources.identify import identify, msprefs
|
||||
from calibre.ebooks.metadata.sources.covers import download_cover
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.date import as_utc
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
from calibre.ptempfile import (PersistentTemporaryDirectory,
|
||||
PersistentTemporaryFile)
|
||||
|
||||
# Start download {{{
|
||||
|
||||
class Job(ThreadedJob):
|
||||
|
||||
ignore_html_details = True
|
||||
|
||||
def consolidate_log(self):
|
||||
self.consolidated_log = self.log.plain_text
|
||||
self.log = None
|
||||
|
||||
def read_consolidated_log(self):
|
||||
return self.consolidated_log
|
||||
|
||||
@property
|
||||
def details(self):
|
||||
if self.consolidated_log is None:
|
||||
return self.log.plain_text
|
||||
return self.read_consolidated_log()
|
||||
|
||||
@property
|
||||
def log_file(self):
|
||||
return open(self.download_debug_log, 'rb')
|
||||
|
||||
def show_config(gui, parent):
|
||||
from calibre.gui2.preferences import show_config_widget
|
||||
show_config_widget('Sharing', 'Metadata download', parent=parent,
|
||||
@ -104,19 +124,22 @@ def start_download(gui, ids, callback, ensure_fields=None):
|
||||
d.b.clicked.disconnect()
|
||||
if ret != d.Accepted:
|
||||
return
|
||||
tf = PersistentTemporaryFile('_metadata_bulk.log')
|
||||
tf.close()
|
||||
|
||||
for batch in split_jobs(ids):
|
||||
job = ThreadedJob('metadata bulk download',
|
||||
_('Download metadata for %d books')%len(batch),
|
||||
download, (batch, gui.current_db, d.identify, d.covers,
|
||||
ensure_fields), {}, callback)
|
||||
gui.job_manager.run_threaded_job(job)
|
||||
job = Job('metadata bulk download',
|
||||
_('Download metadata for %d books')%len(ids),
|
||||
download, (ids, tf.name, gui.current_db, d.identify, d.covers,
|
||||
ensure_fields), {}, callback)
|
||||
job.download_debug_log = tf.name
|
||||
gui.job_manager.run_threaded_job(job)
|
||||
gui.status_bar.show_message(_('Metadata download started'), 3000)
|
||||
|
||||
# }}}
|
||||
|
||||
def get_job_details(job):
|
||||
id_map, failed_ids, failed_covers, title_map, all_failed = job.result
|
||||
(aborted, good_ids, tdir, log_file, failed_ids, failed_covers, title_map,
|
||||
lm_map, all_failed) = job.result
|
||||
det_msg = []
|
||||
for i in failed_ids | failed_covers:
|
||||
title = title_map[i]
|
||||
@ -126,92 +149,118 @@ def get_job_details(job):
|
||||
title += (' ' + _('(Failed cover)'))
|
||||
det_msg.append(title)
|
||||
det_msg = '\n'.join(det_msg)
|
||||
return id_map, failed_ids, failed_covers, all_failed, det_msg
|
||||
return (aborted, good_ids, tdir, log_file, failed_ids, failed_covers,
|
||||
all_failed, det_msg, lm_map)
|
||||
|
||||
def merge_result(oldmi, newmi, ensure_fields=None):
|
||||
dummy = Metadata(_('Unknown'))
|
||||
for f in msprefs['ignore_fields']:
|
||||
if ':' in f or (ensure_fields and f in ensure_fields):
|
||||
continue
|
||||
setattr(newmi, f, getattr(dummy, f))
|
||||
fields = set()
|
||||
for plugin in metadata_plugins(['identify']):
|
||||
fields |= plugin.touched_fields
|
||||
class HeartBeat(object):
|
||||
CHECK_INTERVAL = 300 # seconds
|
||||
''' Check that the file count in tdir changes every five minutes '''
|
||||
|
||||
def is_equal(x, y):
|
||||
if hasattr(x, 'tzinfo'):
|
||||
x = as_utc(x)
|
||||
if hasattr(y, 'tzinfo'):
|
||||
y = as_utc(y)
|
||||
return x == y
|
||||
def __init__(self, tdir):
|
||||
self.tdir = tdir
|
||||
self.last_count = len(os.listdir(self.tdir))
|
||||
self.last_time = time.time()
|
||||
|
||||
for f in fields:
|
||||
# Optimize so that set_metadata does not have to do extra work later
|
||||
if not f.startswith('identifier:'):
|
||||
if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
|
||||
getattr(oldmi, f))):
|
||||
setattr(newmi, f, getattr(dummy, f))
|
||||
def __call__(self):
|
||||
if time.time() - self.last_time > self.CHECK_INTERVAL:
|
||||
c = len(os.listdir(self.tdir))
|
||||
if c == self.last_count:
|
||||
return False
|
||||
self.last_count = c
|
||||
self.last_time = time.time()
|
||||
return True
|
||||
|
||||
newmi.last_modified = oldmi.last_modified
|
||||
class Notifier(Thread):
|
||||
|
||||
return newmi
|
||||
def __init__(self, notifications, title_map, tdir, total):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
self.notifications, self.title_map = notifications, title_map
|
||||
self.tdir, self.total = tdir, total
|
||||
self.seen = set()
|
||||
self.keep_going = True
|
||||
|
||||
def download(ids, db, do_identify, covers, ensure_fields,
|
||||
def run(self):
|
||||
while self.keep_going:
|
||||
try:
|
||||
names = os.listdir(self.tdir)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
for x in names:
|
||||
if x.endswith('.log'):
|
||||
try:
|
||||
book_id = int(x.partition('.')[0])
|
||||
except:
|
||||
continue
|
||||
if book_id not in self.seen and book_id in self.title_map:
|
||||
self.seen.add(book_id)
|
||||
self.notifications.put((
|
||||
float(len(self.seen))/self.total,
|
||||
_('Processed %s')%self.title_map[book_id]))
|
||||
time.sleep(1)
|
||||
|
||||
def download(all_ids, tf, db, do_identify, covers, ensure_fields,
|
||||
log=None, abort=None, notifications=None):
|
||||
ids = list(ids)
|
||||
metadata = [db.get_metadata(i, index_is_id=True, get_user_categories=False)
|
||||
for i in ids]
|
||||
batch_size = 10
|
||||
batches = split_jobs(all_ids, batch_size=batch_size)
|
||||
tdir = PersistentTemporaryDirectory('_metadata_bulk')
|
||||
heartbeat = HeartBeat(tdir)
|
||||
|
||||
failed_ids = set()
|
||||
failed_covers = set()
|
||||
title_map = {}
|
||||
ans = {}
|
||||
count = 0
|
||||
lm_map = {}
|
||||
ans = set()
|
||||
all_failed = True
|
||||
'''
|
||||
# Test apply dialog
|
||||
all_failed = do_identify = covers = False
|
||||
'''
|
||||
for i, mi in izip(ids, metadata):
|
||||
if abort.is_set():
|
||||
log.error('Aborting...')
|
||||
break
|
||||
title, authors, identifiers = mi.title, mi.authors, mi.identifiers
|
||||
title_map[i] = title
|
||||
if do_identify:
|
||||
results = []
|
||||
aborted = False
|
||||
count = 0
|
||||
notifier = Notifier(notifications, title_map, tdir, len(all_ids))
|
||||
notifier.start()
|
||||
|
||||
try:
|
||||
for ids in batches:
|
||||
if abort.is_set():
|
||||
log.error('Aborting...')
|
||||
break
|
||||
metadata = {i:db.get_metadata(i, index_is_id=True,
|
||||
get_user_categories=False) for i in ids}
|
||||
for i in ids:
|
||||
title_map[i] = metadata[i].title
|
||||
lm_map[i] = metadata[i].last_modified
|
||||
metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in
|
||||
metadata.iteritems()}
|
||||
try:
|
||||
results = identify(log, Event(), title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
except:
|
||||
pass
|
||||
if results:
|
||||
ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main',
|
||||
(do_identify, covers, metadata, ensure_fields),
|
||||
cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True)
|
||||
except WorkerError as e:
|
||||
if e.orig_tb:
|
||||
raise Exception('Failed to download metadata. Original '
|
||||
'traceback: \n\n'+e.orig_tb)
|
||||
raise
|
||||
count += batch_size
|
||||
|
||||
fids, fcovs, allf = ret['result']
|
||||
if not allf:
|
||||
all_failed = False
|
||||
mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
|
||||
identifiers = mi.identifiers
|
||||
if not mi.is_null('rating'):
|
||||
# set_metadata expects a rating out of 10
|
||||
mi.rating *= 2
|
||||
else:
|
||||
log.error('Failed to download metadata for', title)
|
||||
failed_ids.add(i)
|
||||
# We don't want set_metadata operating on anything but covers
|
||||
mi = merge_result(mi, mi, ensure_fields=ensure_fields)
|
||||
if covers:
|
||||
cdata = download_cover(log, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if cdata is not None:
|
||||
with PersistentTemporaryFile('.jpg', 'downloaded-cover-') as f:
|
||||
f.write(cdata[-1])
|
||||
mi.cover = f.name
|
||||
all_failed = False
|
||||
else:
|
||||
failed_covers.add(i)
|
||||
ans[i] = mi
|
||||
count += 1
|
||||
notifications.put((count/len(ids),
|
||||
_('Downloaded %(num)d of %(tot)d')%dict(num=count, tot=len(ids))))
|
||||
log('Download complete, with %d failures'%len(failed_ids))
|
||||
return (ans, failed_ids, failed_covers, title_map, all_failed)
|
||||
|
||||
failed_ids = failed_ids.union(fids)
|
||||
failed_covers = failed_covers.union(fcovs)
|
||||
ans = ans.union(set(ids) - fids)
|
||||
for book_id in ids:
|
||||
lp = os.path.join(tdir, '%d.log'%book_id)
|
||||
if os.path.exists(lp):
|
||||
with open(tf, 'ab') as dest, open(lp, 'rb') as src:
|
||||
dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] +
|
||||
'#'*20+'\n').encode('utf-8'))
|
||||
shutil.copyfileobj(src, dest)
|
||||
|
||||
if abort.is_set():
|
||||
aborted = True
|
||||
log('Download complete, with %d failures'%len(failed_ids))
|
||||
return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map,
|
||||
lm_map, all_failed)
|
||||
finally:
|
||||
notifier.keep_going = False
|
||||
|
||||
|
||||
|
@ -161,10 +161,10 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
self.manage_authors_button.clicked.connect(self.authors.manage_authors)
|
||||
|
||||
self.series = SeriesEdit(self)
|
||||
self.remove_unused_series_button = QToolButton(self)
|
||||
self.remove_unused_series_button.setToolTip(
|
||||
_('Remove unused series (Series that have no books)') )
|
||||
self.remove_unused_series_button.clicked.connect(self.remove_unused_series)
|
||||
self.clear_series_button = QToolButton(self)
|
||||
self.clear_series_button.setToolTip(
|
||||
_('Clear series') )
|
||||
self.clear_series_button.clicked.connect(self.series.clear)
|
||||
self.series_index = SeriesIndexEdit(self, self.series)
|
||||
self.basic_metadata_widgets.extend([self.series, self.series_index])
|
||||
|
||||
@ -198,6 +198,7 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
self.basic_metadata_widgets.append(self.identifiers)
|
||||
self.clear_identifiers_button = QToolButton(self)
|
||||
self.clear_identifiers_button.setIcon(QIcon(I('trash.png')))
|
||||
self.clear_identifiers_button.setToolTip(_('Clear Ids'))
|
||||
self.clear_identifiers_button.clicked.connect(self.identifiers.clear)
|
||||
self.paste_isbn_button = QToolButton(self)
|
||||
self.paste_isbn_button.setToolTip('<p>' +
|
||||
@ -303,17 +304,6 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
self.title_sort.auto_generate()
|
||||
self.author_sort.auto_generate()
|
||||
|
||||
def remove_unused_series(self, *args):
|
||||
self.db.remove_unused_series()
|
||||
idx = self.series.current_val
|
||||
self.series.clear()
|
||||
self.series.initialize(self.db, self.book_id)
|
||||
if idx:
|
||||
for i in range(self.series.count()):
|
||||
if unicode(self.series.itemText(i)) == idx:
|
||||
self.series.setCurrentIndex(i)
|
||||
break
|
||||
|
||||
def tags_editor(self, *args):
|
||||
self.tags.edit(self.db, self.book_id)
|
||||
|
||||
@ -591,7 +581,7 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
|
||||
sto(self.title_sort, self.authors)
|
||||
create_row(1, self.authors, self.deduce_author_sort_button, self.author_sort)
|
||||
sto(self.author_sort, self.series)
|
||||
create_row(2, self.series, self.remove_unused_series_button,
|
||||
create_row(2, self.series, self.clear_series_button,
|
||||
self.series_index, icon='trash.png')
|
||||
sto(self.series_index, self.swap_title_author_button)
|
||||
sto(self.swap_title_author_button, self.manage_authors_button)
|
||||
@ -756,7 +746,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
|
||||
span=2, icon='auto_author_sort.png')
|
||||
create_row(3, self.author_sort, self.series)
|
||||
create_row(4, self.series, self.series_index,
|
||||
button=self.remove_unused_series_button, icon='trash.png')
|
||||
button=self.clear_series_button, icon='trash.png')
|
||||
create_row(5, self.series_index, self.tags)
|
||||
create_row(6, self.tags, self.rating, button=self.tags_editor_button)
|
||||
create_row(7, self.rating, self.pubdate)
|
||||
@ -892,7 +882,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
|
||||
span=2, icon='auto_author_sort.png')
|
||||
create_row(3, self.author_sort, self.series)
|
||||
create_row(4, self.series, self.series_index,
|
||||
button=self.remove_unused_series_button, icon='trash.png')
|
||||
button=self.clear_series_button, icon='trash.png')
|
||||
create_row(5, self.series_index, self.tags)
|
||||
create_row(6, self.tags, self.rating, button=self.tags_editor_button)
|
||||
create_row(7, self.rating, self.pubdate)
|
||||
|
@ -35,9 +35,7 @@
|
||||
<string><p>If you leave the password blank, anyone will be able to
|
||||
access your book collection using the web interface.
|
||||
<br>
|
||||
<p>Note that passwords do not work with Android devices.
|
||||
Leave this blank if you intend to use the server with an
|
||||
Android phone or tablet.</string>
|
||||
<p>Some devices have browsers that do not support authentication. If you are having trouble downloading files from the content server, try removing the password.</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@ -167,17 +165,13 @@ Leave this blank if you intend to use the server with an
|
||||
</font>
|
||||
</property>
|
||||
<property name="toolTip">
|
||||
<string><p>Because of a bug in Google's Android, setting a password
|
||||
will prevent the server from working with Android devices.
|
||||
<br>
|
||||
<p>Do not set a password if you plan to use the server with an
|
||||
Android phone or tablet.</string>
|
||||
<string><p>Some devices have browsers that do not support authentication. If you are having trouble downloading files from the content server, trying removing the password.</string>
|
||||
</property>
|
||||
<property name="styleSheet">
|
||||
<string notr="true">QLabel {color:red}</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Password incompatible with Android devices</string>
|
||||
<string>Password incompatible with some devices</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -241,12 +241,6 @@ def fetch_scheduled_recipe(arg): # {{{
|
||||
if 'output_profile' in ps:
|
||||
recs.append(('output_profile', ps['output_profile'],
|
||||
OptionRecommendation.HIGH))
|
||||
# Disabled since apparently some people use
|
||||
# K4PC and, surprise, surprise, it doesn't support
|
||||
# indexed MOBIs.
|
||||
#if ps['output_profile'] == 'kindle':
|
||||
# recs.append(('no_inline_toc', True,
|
||||
# OptionRecommendation.HIGH))
|
||||
|
||||
lf = load_defaults('look_and_feel')
|
||||
if lf.get('base_font_size', 0.0) != 0.0:
|
||||
|
@ -822,7 +822,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
||||
as_unicode(r), det_msg=worker.traceback, show=True)
|
||||
self.close_progress_indicator()
|
||||
else:
|
||||
self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:])
|
||||
self.metadata.show_opf(self.iterator.opf,
|
||||
self.iterator.book_format)
|
||||
self.view.current_language = self.iterator.language
|
||||
title = self.iterator.opf.title
|
||||
if not title:
|
||||
@ -849,7 +850,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
||||
self.current_book_has_toc = bool(self.iterator.toc)
|
||||
self.current_title = title
|
||||
self.setWindowTitle(self.base_window_title+' - '+title +
|
||||
' [%s]'%os.path.splitext(pathtoebook)[1][1:].upper())
|
||||
' [%s]'%self.iterator.book_format)
|
||||
self.pos.setMaximum(sum(self.iterator.pages))
|
||||
self.pos.setSuffix(' / %d'%sum(self.iterator.pages))
|
||||
self.vertical_scrollbar.setMinimum(100)
|
||||
|
@ -15,7 +15,7 @@ from cherrypy.process.plugins import SimplePlugin
|
||||
from calibre.constants import __appname__, __version__
|
||||
from calibre.utils.date import fromtimestamp
|
||||
from calibre.library.server import listen_on, log_access_file, log_error_file
|
||||
from calibre.library.server.utils import expose
|
||||
from calibre.library.server.utils import expose, AuthController
|
||||
from calibre.utils.mdns import publish as publish_zeroconf, \
|
||||
stop_server as stop_zeroconf, get_external_ip
|
||||
from calibre.library.server.content import ContentServer
|
||||
@ -31,10 +31,11 @@ from calibre import prints, as_unicode
|
||||
|
||||
class DispatchController(object): # {{{
|
||||
|
||||
def __init__(self, prefix, wsgi=False):
|
||||
def __init__(self, prefix, wsgi=False, auth_controller=None):
|
||||
self.dispatcher = cherrypy.dispatch.RoutesDispatcher()
|
||||
self.funcs = []
|
||||
self.seen = set()
|
||||
self.auth_controller = auth_controller
|
||||
self.prefix = prefix if prefix else ''
|
||||
if wsgi:
|
||||
self.prefix = ''
|
||||
@ -44,6 +45,7 @@ class DispatchController(object): # {{{
|
||||
raise NameError('Route name: '+ repr(name) + ' already used')
|
||||
self.seen.add(name)
|
||||
kwargs['action'] = 'f_%d'%len(self.funcs)
|
||||
aw = kwargs.pop('android_workaround', False)
|
||||
if route != '/':
|
||||
route = self.prefix + route
|
||||
elif self.prefix:
|
||||
@ -52,6 +54,8 @@ class DispatchController(object): # {{{
|
||||
self.dispatcher.connect(name+'prefix_extra_trailing',
|
||||
self.prefix+'/', self, **kwargs)
|
||||
self.dispatcher.connect(name, route, self, **kwargs)
|
||||
if self.auth_controller is not None:
|
||||
func = self.auth_controller(func, aw)
|
||||
self.funcs.append(expose(func))
|
||||
|
||||
def __getattr__(self, attr):
|
||||
@ -156,6 +160,8 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
|
||||
self.config = {}
|
||||
self.is_running = False
|
||||
self.exception = None
|
||||
auth_controller = None
|
||||
self.users_dict = {}
|
||||
#self.config['/'] = {
|
||||
# 'tools.sessions.on' : True,
|
||||
# 'tools.sessions.timeout': 60, # Session times out after 60 minutes
|
||||
@ -171,15 +177,12 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
|
||||
}
|
||||
|
||||
if opts.password:
|
||||
self.config['/'] = {
|
||||
'tools.digest_auth.on' : True,
|
||||
'tools.digest_auth.realm' : (
|
||||
'Your calibre library. Username: '
|
||||
+ opts.username.strip()),
|
||||
'tools.digest_auth.users' : {opts.username.strip():opts.password.strip()},
|
||||
}
|
||||
self.users_dict[opts.username.strip()] = opts.password.strip()
|
||||
auth_controller = AuthController('Your calibre library',
|
||||
self.users_dict)
|
||||
|
||||
self.__dispatcher__ = DispatchController(self.opts.url_prefix, wsgi)
|
||||
self.__dispatcher__ = DispatchController(self.opts.url_prefix,
|
||||
wsgi=wsgi, auth_controller=auth_controller)
|
||||
for x in self.__class__.__bases__:
|
||||
if hasattr(x, 'add_routes'):
|
||||
x.__init__(self)
|
||||
|
@ -41,7 +41,8 @@ class ContentServer(object):
|
||||
connect('root', '/', self.index)
|
||||
connect('old', '/old', self.old)
|
||||
connect('get', '/get/{what}/{id}', self.get,
|
||||
conditions=dict(method=["GET", "HEAD"]))
|
||||
conditions=dict(method=["GET", "HEAD"]),
|
||||
android_workaround=True)
|
||||
connect('static', '/static/{name:.*?}', self.static,
|
||||
conditions=dict(method=["GET", "HEAD"]))
|
||||
connect('favicon', '/favicon.png', self.favicon,
|
||||
|
@ -5,10 +5,12 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import time, sys
|
||||
import time, sys, uuid, hashlib
|
||||
from urllib import quote as quote_, unquote as unquote_
|
||||
from functools import wraps
|
||||
|
||||
import cherrypy
|
||||
from cherrypy.lib.auth_digest import digest_auth, get_ha1_dict_plain
|
||||
|
||||
from calibre import strftime as _strftime, prints, isbytestring
|
||||
from calibre.utils.date import now as nowf
|
||||
@ -40,6 +42,7 @@ class Offsets(object):
|
||||
|
||||
def expose(func):
|
||||
|
||||
@wraps(func)
|
||||
def do(*args, **kwargs):
|
||||
self = func.im_self
|
||||
if self.opts.develop:
|
||||
@ -54,10 +57,87 @@ def expose(func):
|
||||
prints('\tTime:', func.__name__, time.time()-start)
|
||||
return ans
|
||||
|
||||
do.__name__ = func.__name__
|
||||
|
||||
return do
|
||||
|
||||
class AuthController(object):
|
||||
|
||||
'''
|
||||
Implement Digest authentication for the content server. Android browsers
|
||||
cannot handle HTTP AUTH when downloading files, as the download is handed
|
||||
off to a separate process. So we use a cookie based authentication scheme
|
||||
for some endpoints (/get) to allow downloads to work on android. Apparently,
|
||||
cookies are passed to the download process. The cookie expires after
|
||||
MAX_AGE seconds.
|
||||
|
||||
The android browser appears to send a GET request to the server and only if
|
||||
that request succeeds is the download handed off to the download process.
|
||||
Therefore, even if the user clicks Get after MAX_AGE, it should still work.
|
||||
In fact, we could reduce MAX_AGE, but we leave it high as the download
|
||||
process might have downloads queued and therefore not start the download
|
||||
immediately.
|
||||
|
||||
Note that this makes the server vulnerable to session-hijacking (i.e. some
|
||||
one can sniff the traffic and create their own requests to /get with the
|
||||
appropriate cookie, for an hour). The fix is to use https, but since this
|
||||
is usually run as a private server, that cannot be done. If you care about
|
||||
this vulnerability, run the server behind a reverse proxy that uses HTTPS.
|
||||
'''
|
||||
|
||||
MAX_AGE = 3600 # Number of seconds after a successful digest auth for which
|
||||
# the cookie auth will be allowed
|
||||
|
||||
def __init__(self, realm, users_dict):
|
||||
self.realm = realm
|
||||
self.users_dict = users_dict
|
||||
self.secret = bytes(uuid.uuid4().hex)
|
||||
self.cookie_name = 'android_workaround'
|
||||
|
||||
def hashit(self, raw):
|
||||
return hashlib.sha1(raw).hexdigest()
|
||||
|
||||
def __call__(self, func, allow_cookie_auth):
|
||||
|
||||
@wraps(func)
|
||||
def authenticate(*args, **kwargs):
|
||||
cookie = cherrypy.request.cookie.get(self.cookie_name, None)
|
||||
if not (allow_cookie_auth and self.is_valid(cookie)):
|
||||
digest_auth(self.realm, get_ha1_dict_plain(self.users_dict),
|
||||
self.secret)
|
||||
|
||||
cookie = cherrypy.response.cookie
|
||||
cookie[self.cookie_name] = self.generate_cookie()
|
||||
cookie[self.cookie_name]['path'] = '/'
|
||||
cookie[self.cookie_name]['version'] = '1'
|
||||
|
||||
return func(*args, **kwargs)
|
||||
|
||||
authenticate.im_self = func.im_self
|
||||
return authenticate
|
||||
|
||||
def generate_cookie(self, timestamp=None):
|
||||
'''
|
||||
Generate a cookie. The cookie contains a plain text timestamp and a
|
||||
hashe of the timestamp and the server secret.
|
||||
'''
|
||||
timestamp = int(time.time()) if timestamp is None else timestamp
|
||||
key = self.hashit('%d:%s'%(timestamp, self.secret))
|
||||
return '%d:%s'%(timestamp, key)
|
||||
|
||||
def is_valid(self, cookie):
|
||||
'''
|
||||
Check that cookie has not been spoofed (i.e. verify the declared
|
||||
timestamp against the hashed timestamp). If the timestamps match, check
|
||||
that the cookie has not expired. Return True iff the cookie has not
|
||||
been spoofed and has not expired.
|
||||
'''
|
||||
try:
|
||||
timestamp, hashpart = cookie.value.split(':', 1)
|
||||
timestamp = int(timestamp)
|
||||
except:
|
||||
return False
|
||||
s_timestamp, s_hashpart = self.generate_cookie(timestamp).split(':', 1)
|
||||
is_valid = s_hashpart == hashpart
|
||||
return (is_valid and (time.time() - timestamp) < self.MAX_AGE)
|
||||
|
||||
def strftime(fmt='%Y/%m/%d %H:%M:%S', dt=None):
|
||||
if not hasattr(dt, 'timetuple'):
|
||||
|
@ -381,6 +381,18 @@ that allows you to create collections on your Kindle from the |app| metadata. It
|
||||
|
||||
.. note:: Amazon have removed the ability to manipulate collections completely in their newer models, like the Kindle Touch and Kindle Fire, making even the above plugin useless. If you really want the ability to manage collections on your Kindle via a USB connection, we encourage you to complain to Amazon about it, or get a reader where this is supported, like the SONY Readers.
|
||||
|
||||
I am getting an error when I try to use |app| with my Kobo Touch?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The Kobo Touch has very buggy firmware. Connecting to it has been known to fail at random. Certain combinations of motherboard, USB ports/cables/hubs can exacerbate this tendency to fail. If you are getting an error when connecting to your touch with |app| try the following, each of which has solved the problem for *some* |app| users.
|
||||
|
||||
* Connect the Kobo directly to your computer, not via USB Hub
|
||||
* Try a different USB cable and a different USB port on your computer
|
||||
* Try a different computer (preferably an older model)
|
||||
* Try upgrading the firmware on your Kobo Touch to the latest
|
||||
* Try resetting the Kobo (sometimes this cures the problem for a little while, but then it re-appears, in which case you have to reset again and again)
|
||||
* Try only putting one or two books onto the Kobo at a time and do not keep large collections on the Kobo
|
||||
|
||||
Library Management
|
||||
------------------
|
||||
|
||||
|
@ -73,7 +73,7 @@ Edit metadata
|
||||
|
||||
|emii| The :guilabel:`Edit metadata` action has four variations which can be accessed by doing a right-click on the button.
|
||||
|
||||
1. **Edit metadata individually**: Allows you to edit the metadata of books one-by-one with the option of fetching metadata, including covers, from the Internet. It also allows you to add or remove particular ebook formats from a book.
|
||||
1. **Edit metadata individually**: Allows you to edit the metadata of books one-by-one with the option of fetching metadata, including covers, from the Internet. It also allows you to add or remove particular ebook formats from a book.
|
||||
2. **Edit metadata in bulk**: Allows you to edit common metadata fields for large numbers of books simulataneously. It operates on all the books you have selected in the :ref:`Library view <search_sort>`.
|
||||
3. **Download metadata and covers**: Downloads metadata and covers (if available) for the books that are selected in the book list.
|
||||
4. **Merge book records**: Gives you the capability of merging the metadata and formats of two or more book records. You can choose to either delete or keep the records that were not clicked first.
|
||||
@ -117,7 +117,7 @@ View
|
||||
|
||||
|vi| The :guilabel:`View` action displays the book in an ebook viewer program. |app| has a built-in viewer for many ebook formats.
|
||||
For other formats it uses the default operating system application. You can configure which formats should open with the internal viewer via
|
||||
Preferences->Behavior. If a book has more than one format, you can view a particular format by doing a right-click on the button.
|
||||
Preferences->Behavior. If a book has more than one format, you can view a particular format by doing a right-click on the button.
|
||||
|
||||
|
||||
.. _send_to_device:
|
||||
@ -175,7 +175,7 @@ Library
|
||||
5. **<library name>**: Actions 5, 6 etc... give you immediate switch access between multiple libraries that you have created or attached to. This list contains only the 5 most frequently used libraries. For the complete list, use the Quick Switch menu.
|
||||
6. **Library maintenance**: Allows you to check the current library for data consistency issues and restore the current library's database from backups.
|
||||
|
||||
.. note:: Metadata about your ebooks, e.g. title, author, and tags, is stored in a single file in your |app| library folder called metadata.db. If this file gets corrupted (a very rare event), you can lose the metadata. Fortunately, |app| automatically backs up the metadata for every individual book in the book's folder as an OPF file. By using the Restore Library action under Library Maintenance described above, you can have |app| rebuild the metadata.db file from the individual OPF files for you.
|
||||
.. note:: Metadata about your ebooks, e.g. title, author, and tags, is stored in a single file in your |app| library folder called metadata.db. If this file gets corrupted (a very rare event), you can lose the metadata. Fortunately, |app| automatically backs up the metadata for every individual book in the book's folder as an OPF file. By using the Restore Library action under Library Maintenance described above, you can have |app| rebuild the metadata.db file from the individual OPF files for you.
|
||||
|
||||
You can copy or move books between different libraries (once you have more than one library setup) by right clicking on the book and selecting the action :guilabel:`Copy to library`.
|
||||
|
||||
@ -235,7 +235,7 @@ Connect/Share
|
||||
|
||||
1. **Connect to folder**: Allows you to connect to any folder on your computer as though it were a device and use all the facilities |app| has for devices with that folder. Useful if your device cannot be supported by |app| but is available as a USB disk.
|
||||
|
||||
2. **Connect to iTunes**: Allows you to connect to your iTunes books database as though it were a device. Once the books are sent to iTunes, you can use iTunes to make them available to your various iDevices. This is useful if you would rather not have |app| send books to your iDevice directly.
|
||||
2. **Connect to iTunes**: Allows you to connect to your iTunes books database as though it were a device. Once the books are sent to iTunes, you can use iTunes to make them available to your various iDevices.
|
||||
|
||||
3. **Start Content Server**: Starts |app|'s built-in web server. When started, your |app| library will be accessible via a web browser from the Internet (if you choose). You can configure how the web server is accessed by setting preferences at :guilabel:`Preferences->Sharing->Sharing over the net`
|
||||
|
||||
@ -338,9 +338,9 @@ Two other kinds of searches are available: equality search and search using `reg
|
||||
Equality searches are indicated by prefixing the search string with an equals sign (=). For example, the query
|
||||
``tag:"=science"`` will match "science", but not "science fiction" or "hard science". Regular expression searches are
|
||||
indicated by prefixing the search string with a tilde (~). Any `python-compatible regular expression <http://docs.python.org/library/re.html>`_ can
|
||||
be used. Note that backslashes used to escape special characters in reqular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(``. Regular expression searches are 'contains' searches unless the expression contains anchors.
|
||||
be used. Note that backslashes used to escape special characters in reqular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(``. Regular expression searches are 'contains' searches unless the expression contains anchors.
|
||||
|
||||
Should you need to search for a string with a leading equals or tilde, prefix the string with a backslash.
|
||||
Should you need to search for a string with a leading equals or tilde, prefix the string with a backslash.
|
||||
|
||||
Enclose search strings with quotes (") if the string contains parenthesis or spaces. For example, to search
|
||||
for the tag ``Science Fiction`` you would need to search for ``tag:"=science fiction"``. If you search for
|
||||
@ -362,7 +362,7 @@ The syntax for searching for dates is::
|
||||
If the date is ambiguous, the current locale is used for date comparison. For example, in an mm/dd/yyyy
|
||||
locale 2/1/2009 is interpreted as 1 Feb 2009. In a dd/mm/yyyy locale it is interpreted as 2 Jan 2009. Some
|
||||
special date strings are available. The string ``today`` translates to today's date, whatever it is. The
|
||||
strings ``yesterday`` and ``thismonth`` (or the translated equivalent in the current language) also work.
|
||||
strings ``yesterday`` and ``thismonth`` (or the translated equivalent in the current language) also work.
|
||||
In addition, the string ``daysago`` (also translated) can be used to compare to a date some number of days ago.
|
||||
For example::
|
||||
|
||||
|
@ -167,7 +167,8 @@ class Worker(object):
|
||||
'''
|
||||
exe = self.gui_executable if self.gui else self.executable
|
||||
env = self.env
|
||||
env['ORIGWD'] = cwd or os.path.abspath(os.getcwd())
|
||||
env[b'ORIGWD'] = binascii.hexlify(cPickle.dumps(cwd or
|
||||
os.path.abspath(os.getcwdu())))
|
||||
_cwd = cwd
|
||||
if priority is None:
|
||||
priority = prefs['worker_process_priority']
|
||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, cPickle, os
|
||||
import sys, cPickle, os, binascii
|
||||
from code import InteractiveInterpreter
|
||||
from Queue import Queue, Empty
|
||||
from threading import Thread
|
||||
@ -130,7 +130,7 @@ class Interpreter(InteractiveInterpreter): # {{{
|
||||
# }}}
|
||||
|
||||
def connect():
|
||||
os.chdir(os.environ['ORIGWD'])
|
||||
os.chdir(cPickle.loads(binascii.unhexlify(os.environ['ORIGWD'])))
|
||||
address = cPickle.loads(unhexlify(os.environ['CALIBRE_WORKER_ADDRESS']))
|
||||
key = unhexlify(os.environ['CALIBRE_WORKER_KEY'])
|
||||
return Client(address, authkey=key)
|
||||
|
@ -648,7 +648,10 @@ class BasicNewsRecipe(Recipe):
|
||||
'url' : URL of print version,
|
||||
'date' : The publication date of the article as a string,
|
||||
'description' : A summary of the article
|
||||
'content' : The full article (can be an empty string). This is used by FullContentProfile
|
||||
'content' : The full article (can be an empty string). Obsolete
|
||||
do not use, instead save the content to a temporary
|
||||
file and pass a file:///path/to/temp/file.html as
|
||||
the URL.
|
||||
}
|
||||
|
||||
For an example, see the recipe for downloading `The Atlantic`.
|
||||
|
@ -33,7 +33,8 @@ qop_auth = 'auth'
|
||||
qop_auth_int = 'auth-int'
|
||||
valid_qops = (qop_auth, qop_auth_int)
|
||||
|
||||
valid_algorithms = ('MD5', 'MD5-sess')
|
||||
valid_algorithms = ('MD5', 'MD5-sess', 'md5', 'md5-sess') # Changed by Kovid to
|
||||
# add lowercase
|
||||
|
||||
|
||||
def TRACE(msg):
|
||||
@ -67,7 +68,7 @@ def get_ha1_dict(user_ha1_dict):
|
||||
argument to digest_auth().
|
||||
"""
|
||||
def get_ha1(realm, username):
|
||||
return user_ha1_dict.get(user)
|
||||
return user_ha1_dict.get(username) # Changed by Kovid to fix typo
|
||||
|
||||
return get_ha1
|
||||
|
||||
@ -107,10 +108,10 @@ def synthesize_nonce(s, key, timestamp=None):
|
||||
|
||||
key
|
||||
A secret string known only to the server.
|
||||
|
||||
|
||||
timestamp
|
||||
An integer seconds-since-the-epoch timestamp
|
||||
|
||||
|
||||
"""
|
||||
if timestamp is None:
|
||||
timestamp = int(time.time())
|
||||
@ -190,10 +191,10 @@ class HttpDigestAuthorization (object):
|
||||
|
||||
s
|
||||
A string related to the resource, such as the hostname of the server.
|
||||
|
||||
|
||||
key
|
||||
A secret string known only to the server.
|
||||
|
||||
|
||||
Both s and key must be the same values which were used to synthesize the nonce
|
||||
we are trying to validate.
|
||||
"""
|
||||
@ -256,7 +257,7 @@ class HttpDigestAuthorization (object):
|
||||
4.3. This refers to the entity the user agent sent in the request which
|
||||
has the Authorization header. Typically GET requests don't have an entity,
|
||||
and POST requests do.
|
||||
|
||||
|
||||
"""
|
||||
ha2 = self.HA2(entity_body)
|
||||
# Request-Digest -- RFC 2617 3.2.2.1
|
||||
@ -302,16 +303,16 @@ def www_authenticate(realm, key, algorithm='MD5', nonce=None, qop=qop_auth, stal
|
||||
def digest_auth(realm, get_ha1, key, debug=False):
|
||||
"""A CherryPy tool which hooks at before_handler to perform
|
||||
HTTP Digest Access Authentication, as specified in :rfc:`2617`.
|
||||
|
||||
|
||||
If the request has an 'authorization' header with a 'Digest' scheme, this
|
||||
tool authenticates the credentials supplied in that header. If
|
||||
the request has no 'authorization' header, or if it does but the scheme is
|
||||
not "Digest", or if authentication fails, the tool sends a 401 response with
|
||||
a 'WWW-Authenticate' Digest header.
|
||||
|
||||
|
||||
realm
|
||||
A string containing the authentication realm.
|
||||
|
||||
|
||||
get_ha1
|
||||
A callable which looks up a username in a credentials store
|
||||
and returns the HA1 string, which is defined in the RFC to be
|
||||
@ -320,13 +321,13 @@ def digest_auth(realm, get_ha1, key, debug=False):
|
||||
where username is obtained from the request's 'authorization' header.
|
||||
If username is not found in the credentials store, get_ha1() returns
|
||||
None.
|
||||
|
||||
|
||||
key
|
||||
A secret string known only to the server, used in the synthesis of nonces.
|
||||
|
||||
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
|
||||
|
||||
auth_header = request.headers.get('authorization')
|
||||
nonce_is_stale = False
|
||||
if auth_header is not None:
|
||||
@ -334,10 +335,10 @@ def digest_auth(realm, get_ha1, key, debug=False):
|
||||
auth = HttpDigestAuthorization(auth_header, request.method, debug=debug)
|
||||
except ValueError:
|
||||
raise cherrypy.HTTPError(400, "The Authorization header could not be parsed.")
|
||||
|
||||
|
||||
if debug:
|
||||
TRACE(str(auth))
|
||||
|
||||
|
||||
if auth.validate_nonce(realm, key):
|
||||
ha1 = get_ha1(realm, auth.username)
|
||||
if ha1 is not None:
|
||||
@ -355,7 +356,7 @@ def digest_auth(realm, get_ha1, key, debug=False):
|
||||
if debug:
|
||||
TRACE("authentication of %s successful" % auth.username)
|
||||
return
|
||||
|
||||
|
||||
# Respond with 401 status and a WWW-Authenticate header
|
||||
header = www_authenticate(realm, key, stale=nonce_is_stale)
|
||||
if debug:
|
||||
|
Loading…
x
Reference in New Issue
Block a user