mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
0dac0ef3a0
@ -4,6 +4,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
|||||||
www.mainichi.jp
|
www.mainichi.jp
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class MainichiDailyNews(BasicNewsRecipe):
|
class MainichiDailyNews(BasicNewsRecipe):
|
||||||
@ -22,3 +23,18 @@ class MainichiDailyNews(BasicNewsRecipe):
|
|||||||
remove_tags = [{'class':"RelatedArticle"}]
|
remove_tags = [{'class':"RelatedArticle"}]
|
||||||
remove_tags_after = {'class':"Credit"}
|
remove_tags_after = {'class':"Credit"}
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
for curfeed in feeds:
|
||||||
|
delList = []
|
||||||
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
|
if re.search(r'pheedo.jp', curarticle.url):
|
||||||
|
delList.append(curarticle)
|
||||||
|
if len(delList)>0:
|
||||||
|
for d in delList:
|
||||||
|
index = curfeed.articles.index(d)
|
||||||
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
@ -14,5 +14,19 @@ class MainichiDailyITNews(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags_before = {'class':"NewsTitle"}
|
remove_tags_before = {'class':"NewsTitle"}
|
||||||
remove_tags = [{'class':"RelatedArticle"}]
|
remove_tags = [{'class':"RelatedArticle"}]
|
||||||
remove_tags_after = {'class':"Credit"}
|
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
for curfeed in feeds:
|
||||||
|
delList = []
|
||||||
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
|
if re.search(r'pheedo.jp', curarticle.url):
|
||||||
|
delList.append(curarticle)
|
||||||
|
if len(delList)>0:
|
||||||
|
for d in delList:
|
||||||
|
index = curfeed.articles.index(d)
|
||||||
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
|
return feeds remove_tags_after = {'class':"Credit"}
|
||||||
|
@ -32,12 +32,9 @@ class NikkeiNet_sub_life(BasicNewsRecipe):
|
|||||||
remove_tags_after = {'class':"cmn-pr_list"}
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
||||||
feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
||||||
(u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
|
|
||||||
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
|
|
||||||
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
||||||
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
||||||
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
|
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special')
|
||||||
(u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
|
102
resources/recipes/nikkei_sub_shakai.recipe
Normal file
102
resources/recipes/nikkei_sub_shakai.recipe
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.nikkei.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
import mechanize
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
|
||||||
|
class NikkeiNet_sub_life(BasicNewsRecipe):
|
||||||
|
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
description = 'News and current market affairs from Japan'
|
||||||
|
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
needs_subscription = True
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
language = 'ja'
|
||||||
|
remove_javascript = False
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||||
|
remove_tags = [
|
||||||
|
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||||
|
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||||
|
{'class':"cmn-print_headline cmn-clearfix"},
|
||||||
|
]
|
||||||
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai')
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
|
||||||
|
cj = mechanize.LWPCookieJar()
|
||||||
|
br.set_cookiejar(cj)
|
||||||
|
|
||||||
|
#br.set_debug_http(True)
|
||||||
|
#br.set_debug_redirects(True)
|
||||||
|
#br.set_debug_responses(True)
|
||||||
|
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
#print "----------------------------get login form--------------------------------------------"
|
||||||
|
# open login form
|
||||||
|
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||||
|
response = br.response()
|
||||||
|
#print "----------------------------get login form---------------------------------------------"
|
||||||
|
#print "----------------------------set login form---------------------------------------------"
|
||||||
|
# remove disabled input which brings error on mechanize
|
||||||
|
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||||
|
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||||
|
br.set_response(response)
|
||||||
|
br.select_form(name='LA0010Form01')
|
||||||
|
br['LA0010Form01:LA0010Email'] = self.username
|
||||||
|
br['LA0010Form01:LA0010Password'] = self.password
|
||||||
|
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||||
|
br.submit()
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------send login form---------------------------------------------"
|
||||||
|
#print "----------------------------open news main page-----------------------------------------"
|
||||||
|
# open news site
|
||||||
|
br.open('http://www.nikkei.com/')
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||||
|
#print response2.get_data()
|
||||||
|
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||||
|
# forced redirect in default
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br.submit()
|
||||||
|
response3 = br.response()
|
||||||
|
# return some cookie which should be set by Javascript
|
||||||
|
#print response3.geturl()
|
||||||
|
raw = response3.get_data()
|
||||||
|
#print "---------------------------response to form --------------------------------------------"
|
||||||
|
# grab cookie from JS and set it
|
||||||
|
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||||
|
br.select_form(nr=0)
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||||
|
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
cj.load(self.temp_files[-1].name)
|
||||||
|
|
||||||
|
br.submit()
|
||||||
|
|
||||||
|
#br.set_debug_http(False)
|
||||||
|
#br.set_debug_redirects(False)
|
||||||
|
#br.set_debug_responses(False)
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -21,7 +21,7 @@ class YOLNews(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
masthead_title = u'YOMIURI ONLINE'
|
masthead_title = u'YOMIURI ONLINE'
|
||||||
|
|
||||||
remove_tags_before = {'class':"article-def"}
|
keep_only_tags = [{'class':"article-def"}]
|
||||||
remove_tags = [{'class':"RelatedArticle"},
|
remove_tags = [{'class':"RelatedArticle"},
|
||||||
{'class':"sbtns"}
|
{'class':"sbtns"}
|
||||||
]
|
]
|
||||||
|
@ -21,7 +21,7 @@ class YOLNews(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
masthead_title = u"YOMIURI ONLINE"
|
masthead_title = u"YOMIURI ONLINE"
|
||||||
|
|
||||||
remove_tags_before = {'class':"article-def"}
|
keep_only_tags = [{'class':"article-def"}]
|
||||||
remove_tags = [{'class':"RelatedArticle"},
|
remove_tags = [{'class':"RelatedArticle"},
|
||||||
{'class':"sbtns"}
|
{'class':"sbtns"}
|
||||||
]
|
]
|
||||||
|
@ -21,7 +21,7 @@ class ANDROID(USBMS):
|
|||||||
# HTC
|
# HTC
|
||||||
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, 0x0227], 0x0ff9
|
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, 0x0227], 0x0ff9
|
||||||
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
|
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
|
||||||
0xc92 : [0x100]},
|
0xc92 : [0x100], 0xc97: [0x226]},
|
||||||
|
|
||||||
# Eken
|
# Eken
|
||||||
0x040d : { 0x8510 : [0x0001] },
|
0x040d : { 0x8510 : [0x0001] },
|
||||||
@ -63,7 +63,7 @@ class ANDROID(USBMS):
|
|||||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||||
'SCH-I500_CARD']
|
'SCH-I500_CARD', 'SPH-D700_CARD']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID']
|
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID']
|
||||||
|
|
||||||
|
@ -11,9 +11,9 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.devices.mime import mime_type_ext
|
from calibre.devices.mime import mime_type_ext
|
||||||
from calibre.devices.interface import BookList as _BookList
|
from calibre.devices.interface import BookList as _BookList
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre import isbytestring
|
from calibre import isbytestring, force_unicode
|
||||||
from calibre.utils.config import prefs, tweaks
|
from calibre.utils.config import prefs, tweaks
|
||||||
from calibre.utils.icu import sort_key, strcmp as icu_strcmp
|
from calibre.utils.icu import strcmp
|
||||||
|
|
||||||
class Book(Metadata):
|
class Book(Metadata):
|
||||||
def __init__(self, prefix, lpath, size=None, other=None):
|
def __init__(self, prefix, lpath, size=None, other=None):
|
||||||
@ -241,7 +241,7 @@ class CollectionsBookList(BookList):
|
|||||||
if y is None:
|
if y is None:
|
||||||
return -1
|
return -1
|
||||||
if isinstance(x, (unicode, str)):
|
if isinstance(x, (unicode, str)):
|
||||||
c = strcmp(x, y)
|
c = strcmp(force_unicode(x), force_unicode(y))
|
||||||
else:
|
else:
|
||||||
c = cmp(x, y)
|
c = cmp(x, y)
|
||||||
if c != 0:
|
if c != 0:
|
||||||
|
516
src/calibre/ebooks/metadata/amazonfr.py
Normal file
516
src/calibre/ebooks/metadata/amazonfr.py
Normal file
@ -0,0 +1,516 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
||||||
|
|
||||||
|
import sys, textwrap, re, traceback
|
||||||
|
from urllib import urlencode
|
||||||
|
from math import ceil
|
||||||
|
|
||||||
|
from lxml import html
|
||||||
|
from lxml.html import soupparser
|
||||||
|
|
||||||
|
from calibre.utils.date import parse_date, utcnow, replace_months
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
from calibre import browser, preferred_encoding
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||||
|
authors_to_sort_string
|
||||||
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
|
||||||
|
|
||||||
|
class AmazonFr(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon French'
|
||||||
|
description = _('Downloads metadata from amazon.fr')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='fr')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class AmazonEs(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon Spanish'
|
||||||
|
description = _('Downloads metadata from amazon.com in spanish')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='es')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class AmazonEn(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon English'
|
||||||
|
description = _('Downloads metadata from amazon.com in english')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='en')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class AmazonDe(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon German'
|
||||||
|
description = _('Downloads metadata from amazon.de')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='de')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class Amazon(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon'
|
||||||
|
description = _('Downloads metadata from amazon.com')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Kovid Goyal & Sengian'
|
||||||
|
version = (1, 1, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
# if not self.site_customization:
|
||||||
|
# return
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='all')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
# @property
|
||||||
|
# def string_customization_help(self):
|
||||||
|
# return _('You can select here the language for metadata search with amazon.com')
|
||||||
|
|
||||||
|
|
||||||
|
def report(verbose):
|
||||||
|
if verbose:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
|
class Query(object):
|
||||||
|
|
||||||
|
BASE_URL_ALL = 'http://www.amazon.com'
|
||||||
|
BASE_URL_FR = 'http://www.amazon.fr'
|
||||||
|
BASE_URL_DE = 'http://www.amazon.de'
|
||||||
|
|
||||||
|
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
|
||||||
|
max_results=20, rlang='all'):
|
||||||
|
assert not(title is None and author is None and publisher is None \
|
||||||
|
and isbn is None and keywords is None)
|
||||||
|
assert (max_results < 21)
|
||||||
|
|
||||||
|
self.max_results = int(max_results)
|
||||||
|
self.renbres = re.compile(u'\s*(\d+)\s*')
|
||||||
|
|
||||||
|
q = { 'search-alias' : 'stripbooks' ,
|
||||||
|
'unfiltered' : '1',
|
||||||
|
'field-keywords' : '',
|
||||||
|
'field-author' : '',
|
||||||
|
'field-title' : '',
|
||||||
|
'field-isbn' : '',
|
||||||
|
'field-publisher' : ''
|
||||||
|
#get to amazon detailed search page to get all options
|
||||||
|
# 'node' : '',
|
||||||
|
# 'field-binding' : '',
|
||||||
|
#before, during, after
|
||||||
|
# 'field-dateop' : '',
|
||||||
|
#month as number
|
||||||
|
# 'field-datemod' : '',
|
||||||
|
# 'field-dateyear' : '',
|
||||||
|
#french only
|
||||||
|
# 'field-collection' : '',
|
||||||
|
#many options available
|
||||||
|
}
|
||||||
|
|
||||||
|
if rlang =='all':
|
||||||
|
q['sort'] = 'relevanceexprank'
|
||||||
|
self.urldata = self.BASE_URL_ALL
|
||||||
|
elif rlang =='es':
|
||||||
|
q['sort'] = 'relevanceexprank'
|
||||||
|
q['field-language'] = 'Spanish'
|
||||||
|
self.urldata = self.BASE_URL_ALL
|
||||||
|
elif rlang =='en':
|
||||||
|
q['sort'] = 'relevanceexprank'
|
||||||
|
q['field-language'] = 'English'
|
||||||
|
self.urldata = self.BASE_URL_ALL
|
||||||
|
elif rlang =='fr':
|
||||||
|
q['sort'] = 'relevancerank'
|
||||||
|
self.urldata = self.BASE_URL_FR
|
||||||
|
elif rlang =='de':
|
||||||
|
q['sort'] = 'relevancerank'
|
||||||
|
self.urldata = self.BASE_URL_DE
|
||||||
|
self.baseurl = self.urldata
|
||||||
|
|
||||||
|
if isbn is not None:
|
||||||
|
q['field-isbn'] = isbn.replace('-', '')
|
||||||
|
else:
|
||||||
|
if title is not None:
|
||||||
|
q['field-title'] = title
|
||||||
|
if author is not None:
|
||||||
|
q['field-author'] = author
|
||||||
|
if publisher is not None:
|
||||||
|
q['field-publisher'] = publisher
|
||||||
|
if keywords is not None:
|
||||||
|
q['field-keywords'] = keywords
|
||||||
|
|
||||||
|
if isinstance(q, unicode):
|
||||||
|
q = q.encode('utf-8')
|
||||||
|
self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
|
||||||
|
|
||||||
|
def __call__(self, browser, verbose, timeout = 5.):
|
||||||
|
if verbose:
|
||||||
|
print 'Query:', self.urldata
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(self.urldata, timeout=timeout).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
|
||||||
|
try:
|
||||||
|
feed = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None, self.urldata
|
||||||
|
|
||||||
|
#nb of page
|
||||||
|
try:
|
||||||
|
nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
|
||||||
|
except:
|
||||||
|
return None, self.urldata
|
||||||
|
|
||||||
|
pages =[feed]
|
||||||
|
if len(nbresults) > 1:
|
||||||
|
nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
|
||||||
|
for i in xrange(2, nbpagetoquery + 1):
|
||||||
|
try:
|
||||||
|
urldata = self.urldata + '&page=' + str(i)
|
||||||
|
raw = browser.open_novisit(urldata, timeout=timeout).read()
|
||||||
|
except Exception, e:
|
||||||
|
continue
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
continue
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
feed = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
pages.append(feed)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for x in pages:
|
||||||
|
results.extend([i.getparent().get('href') \
|
||||||
|
for i in x.xpath("//a/span[@class='srTitle']")])
|
||||||
|
return results[:self.max_results], self.baseurl
|
||||||
|
|
||||||
|
class ResultList(list):
|
||||||
|
|
||||||
|
def __init__(self, baseurl, lang = 'all'):
|
||||||
|
self.baseurl = baseurl
|
||||||
|
self.lang = lang
|
||||||
|
self.repub = re.compile(u'\((.*)\)')
|
||||||
|
self.rerat = re.compile(u'([0-9.]+)')
|
||||||
|
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
|
||||||
|
self.reoutp = re.compile(r'(?s)<em>--This text ref.*?</em>')
|
||||||
|
self.recom = re.compile(r'(?s)<!--.*?-->')
|
||||||
|
self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
|
||||||
|
self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
|
||||||
|
self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
|
||||||
|
self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
|
||||||
|
self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
|
||||||
|
|
||||||
|
def strip_tags_etree(self, etreeobj, invalid_tags):
|
||||||
|
for (itag, rmv) in invalid_tags.iteritems():
|
||||||
|
if rmv:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tree()
|
||||||
|
else:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tag()
|
||||||
|
|
||||||
|
def clean_entry(self, entry, invalid_tags = {'script': True},
|
||||||
|
invalid_id = (), invalid_class=()):
|
||||||
|
#invalid_tags: remove tag and keep content if False else remove
|
||||||
|
#remove tags
|
||||||
|
if invalid_tags:
|
||||||
|
self.strip_tags_etree(entry, invalid_tags)
|
||||||
|
#remove id
|
||||||
|
if invalid_id:
|
||||||
|
for eltid in invalid_id:
|
||||||
|
elt = entry.get_element_by_id(eltid)
|
||||||
|
if elt is not None:
|
||||||
|
elt.drop_tree()
|
||||||
|
#remove class
|
||||||
|
if invalid_class:
|
||||||
|
for eltclass in invalid_class:
|
||||||
|
elts = entry.find_class(eltclass)
|
||||||
|
if elts is not None:
|
||||||
|
for elt in elts:
|
||||||
|
elt.drop_tree()
|
||||||
|
|
||||||
|
def get_title(self, entry):
|
||||||
|
title = entry.get_element_by_id('btAsinTitle')
|
||||||
|
if title is not None:
|
||||||
|
title = title.text
|
||||||
|
return unicode(title.replace('\n', '').strip())
|
||||||
|
|
||||||
|
def get_authors(self, entry):
|
||||||
|
author = entry.get_element_by_id('btAsinTitle')
|
||||||
|
while author.getparent().tag != 'div':
|
||||||
|
author = author.getparent()
|
||||||
|
author = author.getparent()
|
||||||
|
authortext = []
|
||||||
|
for x in author.getiterator('a'):
|
||||||
|
authortext.append(unicode(x.text_content().strip()))
|
||||||
|
return authortext
|
||||||
|
|
||||||
|
def get_description(self, entry, verbose):
|
||||||
|
try:
|
||||||
|
description = entry.get_element_by_id("productDescription").find("div[@class='content']")
|
||||||
|
inv_class = ('seeAll', 'emptyClear')
|
||||||
|
inv_tags ={'img': True, 'a': False}
|
||||||
|
self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
|
||||||
|
description = html.tostring(description, method='html', encoding=unicode).strip()
|
||||||
|
# remove all attributes from tags
|
||||||
|
description = self.reattr.sub(r'<\1>', description)
|
||||||
|
# Remove the notice about text referring to out of print editions
|
||||||
|
description = self.reoutp.sub('', description)
|
||||||
|
# Remove comments
|
||||||
|
description = self.recom.sub('', description)
|
||||||
|
return unicode(sanitize_comments_html(description))
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_tags(self, entry, browser, verbose):
|
||||||
|
try:
|
||||||
|
tags = entry.get_element_by_id('tagContentHolder')
|
||||||
|
testptag = tags.find_class('see-all')
|
||||||
|
if testptag:
|
||||||
|
for x in testptag:
|
||||||
|
alink = x.xpath('descendant-or-self::a')
|
||||||
|
if alink:
|
||||||
|
if alink[0].get('class') == 'tgJsActive':
|
||||||
|
continue
|
||||||
|
link = self.baseurl + alink[0].get('href')
|
||||||
|
entry = self.get_individual_metadata(browser, link, verbose)
|
||||||
|
tags = entry.get_element_by_id('tagContentHolder')
|
||||||
|
break
|
||||||
|
tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
tags = []
|
||||||
|
return tags
|
||||||
|
|
||||||
|
def get_book_info(self, entry, mi, verbose):
|
||||||
|
try:
|
||||||
|
entry = entry.get_element_by_id('SalesRank').getparent()
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
for z in entry.getiterator('h2'):
|
||||||
|
if self.reprod.search(z.text_content()):
|
||||||
|
entry = z.getparent().find("div[@class='content']/ul")
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return mi
|
||||||
|
elts = entry.findall('li')
|
||||||
|
#pub & date
|
||||||
|
elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
pub = elt[0].find('b').tail
|
||||||
|
mi.publisher = unicode(self.repub.sub('', pub).strip())
|
||||||
|
d = self.repub.search(pub)
|
||||||
|
if d is not None:
|
||||||
|
d = d.group(1)
|
||||||
|
try:
|
||||||
|
default = utcnow().replace(day=15)
|
||||||
|
if self.lang != 'all':
|
||||||
|
d = replace_months(d, self.lang)
|
||||||
|
d = parse_date(d, assume_utc=True, default=default)
|
||||||
|
mi.pubdate = d
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
#ISBN
|
||||||
|
elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
isbn = elt[0].find('b').tail.replace('-', '').strip()
|
||||||
|
if check_isbn(isbn):
|
||||||
|
mi.isbn = unicode(isbn)
|
||||||
|
elif len(elt) > 1:
|
||||||
|
isbn = elt[1].find('b').tail.replace('-', '').strip()
|
||||||
|
if check_isbn(isbn):
|
||||||
|
mi.isbn = unicode(isbn)
|
||||||
|
#Langue
|
||||||
|
elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
langue = elt[0].find('b').tail.strip()
|
||||||
|
if langue:
|
||||||
|
mi.language = unicode(langue)
|
||||||
|
#ratings
|
||||||
|
elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
ratings = elt[0].find_class('swSprite')
|
||||||
|
if ratings:
|
||||||
|
ratings = self.rerat.findall(ratings[0].get('title'))
|
||||||
|
if len(ratings) == 2:
|
||||||
|
mi.rating = float(ratings[0])/float(ratings[1]) * 5
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def fill_MI(self, entry, title, authors, browser, verbose):
|
||||||
|
mi = MetaInformation(title, authors)
|
||||||
|
mi.author_sort = authors_to_sort_string(authors)
|
||||||
|
mi.comments = self.get_description(entry, verbose)
|
||||||
|
mi = self.get_book_info(entry, mi, verbose)
|
||||||
|
mi.tags = self.get_tags(entry, browser, verbose)
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def get_individual_metadata(self, browser, linkdata, verbose):
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(linkdata).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
report(verbose)
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
return soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return
|
||||||
|
|
||||||
|
def populate(self, entries, browser, verbose=False):
|
||||||
|
for x in entries:
|
||||||
|
try:
|
||||||
|
entry = self.get_individual_metadata(browser, x, verbose)
|
||||||
|
# clean results
|
||||||
|
# inv_ids = ('divsinglecolumnminwidth', 'sims.purchase', 'AutoBuyXGetY', 'A9AdsMiddleBoxTop')
|
||||||
|
# inv_class = ('buyingDetailsGrid', 'productImageGrid')
|
||||||
|
# inv_tags ={'script': True, 'style': True, 'form': False}
|
||||||
|
# self.clean_entry(entry, invalid_id=inv_ids)
|
||||||
|
title = self.get_title(entry)
|
||||||
|
authors = self.get_authors(entry)
|
||||||
|
except Exception, e:
|
||||||
|
if verbose:
|
||||||
|
print 'Failed to get all details for an entry'
|
||||||
|
print e
|
||||||
|
print 'URL who failed:', x
|
||||||
|
report(verbose)
|
||||||
|
continue
|
||||||
|
self.append(self.fill_MI(entry, title, authors, browser, verbose))
|
||||||
|
|
||||||
|
|
||||||
|
def search(title=None, author=None, publisher=None, isbn=None,
|
||||||
|
max_results=5, verbose=False, keywords=None, lang='all'):
|
||||||
|
br = browser()
|
||||||
|
entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
||||||
|
keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
|
||||||
|
|
||||||
|
if entries is None or len(entries) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
#List of entry
|
||||||
|
ans = ResultList(baseurl, lang)
|
||||||
|
ans.populate(entries, br, verbose)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(textwrap.dedent(\
|
||||||
|
_('''\
|
||||||
|
%prog [options]
|
||||||
|
|
||||||
|
Fetch book metadata from Amazon. You must specify one of title, author,
|
||||||
|
ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
|
||||||
|
so you should make your query as specific as possible.
|
||||||
|
You can chose the language for metadata retrieval:
|
||||||
|
All & english & french & german & spanish
|
||||||
|
'''
|
||||||
|
)))
|
||||||
|
parser.add_option('-t', '--title', help='Book title')
|
||||||
|
parser.add_option('-a', '--author', help='Book author(s)')
|
||||||
|
parser.add_option('-p', '--publisher', help='Book publisher')
|
||||||
|
parser.add_option('-i', '--isbn', help='Book ISBN')
|
||||||
|
parser.add_option('-k', '--keywords', help='Keywords')
|
||||||
|
parser.add_option('-m', '--max-results', default=10,
|
||||||
|
help='Maximum number of results to fetch')
|
||||||
|
parser.add_option('-l', '--lang', default='all',
|
||||||
|
help='Chosen language for metadata search (all, en, fr, es, de)')
|
||||||
|
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||||
|
help='Be more verbose about errors')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
try:
|
||||||
|
results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
|
||||||
|
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results,
|
||||||
|
lang=opts.lang)
|
||||||
|
except AssertionError:
|
||||||
|
report(True)
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
if results is None or len(results) == 0:
|
||||||
|
print 'No result found for this search!'
|
||||||
|
return 0
|
||||||
|
for result in results:
|
||||||
|
print unicode(result).encode(preferred_encoding, 'replace')
|
||||||
|
print
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
390
src/calibre/ebooks/metadata/fictionwise.py
Normal file
390
src/calibre/ebooks/metadata/fictionwise.py
Normal file
@ -0,0 +1,390 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, textwrap, re, traceback, socket
|
||||||
|
from urllib import urlencode
|
||||||
|
|
||||||
|
from lxml.html import soupparser, tostring
|
||||||
|
|
||||||
|
from calibre import browser, preferred_encoding
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||||
|
authors_to_sort_string
|
||||||
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.utils.date import parse_date, utcnow
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
|
||||||
|
class Fictionwise(MetadataSource): # {{{
|
||||||
|
|
||||||
|
author = 'Sengian'
|
||||||
|
name = 'Fictionwise'
|
||||||
|
description = _('Downloads metadata from Fictionwise')
|
||||||
|
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose)
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class FictionwiseError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def report(verbose):
|
||||||
|
if verbose:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
class Query(object):
|
||||||
|
|
||||||
|
BASE_URL = 'http://www.fictionwise.com/servlet/mw'
|
||||||
|
|
||||||
|
def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
|
||||||
|
assert not(title is None and author is None and publisher is None and keywords is None)
|
||||||
|
assert (max_results < 21)
|
||||||
|
|
||||||
|
self.max_results = int(max_results)
|
||||||
|
q = { 'template' : 'searchresults_adv.htm' ,
|
||||||
|
'searchtitle' : '',
|
||||||
|
'searchauthor' : '',
|
||||||
|
'searchpublisher' : '',
|
||||||
|
'searchkeyword' : '',
|
||||||
|
#possibilities startoflast, fullname, lastfirst
|
||||||
|
'searchauthortype' : 'startoflast',
|
||||||
|
'searchcategory' : '',
|
||||||
|
'searchcategory2' : '',
|
||||||
|
'searchprice_s' : '0',
|
||||||
|
'searchprice_e' : 'ANY',
|
||||||
|
'searchformat' : '',
|
||||||
|
'searchgeo' : 'US',
|
||||||
|
'searchfwdatetype' : '',
|
||||||
|
#maybe use dates fields if needed?
|
||||||
|
#'sortorder' : 'DESC',
|
||||||
|
#many options available: b.SortTitle, a.SortName,
|
||||||
|
#b.DateFirstPublished, b.FWPublishDate
|
||||||
|
'sortby' : 'b.SortTitle'
|
||||||
|
}
|
||||||
|
if title is not None:
|
||||||
|
q['searchtitle'] = title
|
||||||
|
if author is not None:
|
||||||
|
q['searchauthor'] = author
|
||||||
|
if publisher is not None:
|
||||||
|
q['searchpublisher'] = publisher
|
||||||
|
if keywords is not None:
|
||||||
|
q['searchkeyword'] = keywords
|
||||||
|
|
||||||
|
if isinstance(q, unicode):
|
||||||
|
q = q.encode('utf-8')
|
||||||
|
self.urldata = urlencode(q)
|
||||||
|
|
||||||
|
def __call__(self, browser, verbose, timeout = 5.):
|
||||||
|
if verbose:
|
||||||
|
print _('Query: %s') % self.BASE_URL+self.urldata
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
|
||||||
|
raise FictionwiseError(_('Fictionwise encountered an error.'))
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
feed = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# get list of results as links
|
||||||
|
results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]")
|
||||||
|
results = results[:self.max_results]
|
||||||
|
results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
|
||||||
|
#return feed if no links ie normally a single book or nothing
|
||||||
|
if not results:
|
||||||
|
results = [feed]
|
||||||
|
return results
|
||||||
|
|
||||||
|
class ResultList(list):
|
||||||
|
|
||||||
|
BASE_URL = 'http://www.fictionwise.com'
|
||||||
|
COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.retitle = re.compile(r'\[[^\[\]]+\]')
|
||||||
|
self.rechkauth = re.compile(r'.*book\s*by', re.I)
|
||||||
|
self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
|
||||||
|
self.repub = re.compile(r'.*publisher\s*:\s*', re.I)
|
||||||
|
self.redate = re.compile(r'.*release\s*date\s*:\s*', re.I)
|
||||||
|
self.retag = re.compile(r'.*book\s*category\s*:\s*', re.I)
|
||||||
|
self.resplitbr = re.compile(r'<br[^>]*>', re.I)
|
||||||
|
self.recomment = re.compile(r'(?s)<!--.*?-->')
|
||||||
|
self.reimg = re.compile(r'<img[^>]*>', re.I)
|
||||||
|
self.resanitize = re.compile(r'\[HTML_REMOVED\]\s*', re.I)
|
||||||
|
self.renbcom = re.compile('(?P<nbcom>\d+)\s*Reader Ratings:')
|
||||||
|
self.recolor = re.compile('(?P<ncolor>[^/]+).gif')
|
||||||
|
self.resplitbrdiv = re.compile(r'(<br[^>]+>|</?div[^>]*>)', re.I)
|
||||||
|
self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I)
|
||||||
|
|
||||||
|
def strip_tags_etree(self, etreeobj, invalid_tags):
|
||||||
|
for (itag, rmv) in invalid_tags.iteritems():
|
||||||
|
if rmv:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tree()
|
||||||
|
else:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tag()
|
||||||
|
|
||||||
|
def clean_entry(self, entry, invalid_tags = {'script': True},
|
||||||
|
invalid_id = (), invalid_class=(), invalid_xpath = ()):
|
||||||
|
#invalid_tags: remove tag and keep content if False else remove
|
||||||
|
#remove tags
|
||||||
|
if invalid_tags:
|
||||||
|
self.strip_tags_etree(entry, invalid_tags)
|
||||||
|
#remove xpath
|
||||||
|
if invalid_xpath:
|
||||||
|
for eltid in invalid_xpath:
|
||||||
|
elt = entry.xpath(eltid)
|
||||||
|
for el in elt:
|
||||||
|
el.drop_tree()
|
||||||
|
#remove id
|
||||||
|
if invalid_id:
|
||||||
|
for eltid in invalid_id:
|
||||||
|
elt = entry.get_element_by_id(eltid)
|
||||||
|
if elt is not None:
|
||||||
|
elt.drop_tree()
|
||||||
|
#remove class
|
||||||
|
if invalid_class:
|
||||||
|
for eltclass in invalid_class:
|
||||||
|
elts = entry.find_class(eltclass)
|
||||||
|
if elts is not None:
|
||||||
|
for elt in elts:
|
||||||
|
elt.drop_tree()
|
||||||
|
|
||||||
|
def output_entry(self, entry, prettyout = True, htmlrm="\d+"):
|
||||||
|
out = tostring(entry, pretty_print=prettyout)
|
||||||
|
#try to work around tostring to remove this encoding for exemle
|
||||||
|
reclean = re.compile('(\n+|\t+|\r+|&#'+htmlrm+';)')
|
||||||
|
return reclean.sub('', out)
|
||||||
|
|
||||||
|
def get_title(self, entry):
|
||||||
|
title = entry.findtext('./')
|
||||||
|
return self.retitle.sub('', title).strip()
|
||||||
|
|
||||||
|
def get_authors(self, entry):
|
||||||
|
authortext = entry.find('./br').tail
|
||||||
|
if not self.rechkauth.search(authortext):
|
||||||
|
return []
|
||||||
|
authortext = self.rechkauth.sub('', authortext)
|
||||||
|
return [a.strip() for a in authortext.split('&')]
|
||||||
|
|
||||||
|
def get_rating(self, entrytable, verbose):
|
||||||
|
nbcomment = tostring(entrytable.getprevious())
|
||||||
|
try:
|
||||||
|
nbcomment = self.renbcom.search(nbcomment).group("nbcom")
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return None
|
||||||
|
hval = dict((self.COLOR_VALUES[self.recolor.search(image.get('src', default='NA.gif')).group("ncolor")],
|
||||||
|
float(image.get('height', default=0))) \
|
||||||
|
for image in entrytable.getiterator('img'))
|
||||||
|
#ratings as x/5
|
||||||
|
return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()))
|
||||||
|
|
||||||
|
def get_description(self, entry):
|
||||||
|
description = self.output_entry(entry.xpath('./p')[1],htmlrm="")
|
||||||
|
description = self.redesc.search(description)
|
||||||
|
if not description or not description.group("desc"):
|
||||||
|
return None
|
||||||
|
#remove invalid tags
|
||||||
|
description = self.reimg.sub('', description.group("desc"))
|
||||||
|
description = self.recomment.sub('', description)
|
||||||
|
description = self.resanitize.sub('', sanitize_comments_html(description))
|
||||||
|
return _('SUMMARY:\n %s') % re.sub(r'\n\s+</p>','\n</p>', description)
|
||||||
|
|
||||||
|
def get_publisher(self, entry):
|
||||||
|
publisher = self.output_entry(entry.xpath('./p')[1])
|
||||||
|
publisher = filter(lambda x: self.repub.search(x) is not None,
|
||||||
|
self.resplitbr.split(publisher))
|
||||||
|
if not len(publisher):
|
||||||
|
return None
|
||||||
|
publisher = self.repub.sub('', publisher[0])
|
||||||
|
return publisher.split(',')[0].strip()
|
||||||
|
|
||||||
|
def get_tags(self, entry):
|
||||||
|
tag = self.output_entry(entry.xpath('./p')[1])
|
||||||
|
tag = filter(lambda x: self.retag.search(x) is not None,
|
||||||
|
self.resplitbr.split(tag))
|
||||||
|
if not len(tag):
|
||||||
|
return []
|
||||||
|
return map(lambda x: x.strip(), self.retag.sub('', tag[0]).split('/'))
|
||||||
|
|
||||||
|
def get_date(self, entry, verbose):
|
||||||
|
date = self.output_entry(entry.xpath('./p')[1])
|
||||||
|
date = filter(lambda x: self.redate.search(x) is not None,
|
||||||
|
self.resplitbr.split(date))
|
||||||
|
if not len(date):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
d = self.redate.sub('', date[0])
|
||||||
|
if d:
|
||||||
|
default = utcnow().replace(day=15)
|
||||||
|
d = parse_date(d, assume_utc=True, default=default)
|
||||||
|
else:
|
||||||
|
d = None
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
d = None
|
||||||
|
return d
|
||||||
|
|
||||||
|
def get_ISBN(self, entry):
|
||||||
|
isbns = self.output_entry(entry.xpath('./p')[2])
|
||||||
|
isbns = filter(lambda x: self.reisbn.search(x) is not None,
|
||||||
|
self.resplitbrdiv.split(isbns))
|
||||||
|
if not len(isbns):
|
||||||
|
return None
|
||||||
|
isbns = [self.reisbn.sub('', x) for x in isbns if check_isbn(self.reisbn.sub('', x))]
|
||||||
|
return sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
|
||||||
|
|
||||||
|
def fill_MI(self, entry, title, authors, ratings, verbose):
|
||||||
|
mi = MetaInformation(title, authors)
|
||||||
|
mi.rating = ratings
|
||||||
|
mi.comments = self.get_description(entry)
|
||||||
|
mi.publisher = self.get_publisher(entry)
|
||||||
|
mi.tags = self.get_tags(entry)
|
||||||
|
mi.pubdate = self.get_date(entry, verbose)
|
||||||
|
mi.isbn = self.get_ISBN(entry)
|
||||||
|
mi.author_sort = authors_to_sort_string(authors)
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def get_individual_metadata(self, browser, linkdata, verbose):
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(self.BASE_URL + linkdata).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
|
||||||
|
raise FictionwiseError(_('Fictionwise encountered an error.'))
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
report(verbose)
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
return soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def populate(self, entries, browser, verbose=False):
|
||||||
|
inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
|
||||||
|
'ul': False, 'span': False}
|
||||||
|
inv_xpath =('./table',)
|
||||||
|
#single entry
|
||||||
|
if len(entries) == 1 and not isinstance(entries[0], str):
|
||||||
|
try:
|
||||||
|
entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
|
||||||
|
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
|
||||||
|
title = self.get_title(entry)
|
||||||
|
#maybe strenghten the search
|
||||||
|
ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
|
||||||
|
authors = self.get_authors(entry)
|
||||||
|
except Exception, e:
|
||||||
|
if verbose:
|
||||||
|
print _('Failed to get all details for an entry')
|
||||||
|
print e
|
||||||
|
return
|
||||||
|
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
|
||||||
|
else:
|
||||||
|
#multiple entries
|
||||||
|
for x in entries:
|
||||||
|
try:
|
||||||
|
entry = self.get_individual_metadata(browser, x, verbose)
|
||||||
|
entry = entry.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")[0]
|
||||||
|
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
|
||||||
|
title = self.get_title(entry)
|
||||||
|
#maybe strenghten the search
|
||||||
|
ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
|
||||||
|
authors = self.get_authors(entry)
|
||||||
|
except Exception, e:
|
||||||
|
if verbose:
|
||||||
|
print _('Failed to get all details for an entry')
|
||||||
|
print e
|
||||||
|
continue
|
||||||
|
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
|
||||||
|
|
||||||
|
|
||||||
|
def search(title=None, author=None, publisher=None, isbn=None,
|
||||||
|
min_viewability='none', verbose=False, max_results=5,
|
||||||
|
keywords=None):
|
||||||
|
br = browser()
|
||||||
|
entries = Query(title=title, author=author, publisher=publisher,
|
||||||
|
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
|
||||||
|
|
||||||
|
#List of entry
|
||||||
|
ans = ResultList()
|
||||||
|
ans.populate(entries, br, verbose)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(textwrap.dedent(\
|
||||||
|
_('''\
|
||||||
|
%prog [options]
|
||||||
|
|
||||||
|
Fetch book metadata from Fictionwise. You must specify one of title, author,
|
||||||
|
or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches,
|
||||||
|
so you should make your query as specific as possible.
|
||||||
|
''')
|
||||||
|
))
|
||||||
|
parser.add_option('-t', '--title', help=_('Book title'))
|
||||||
|
parser.add_option('-a', '--author', help=_('Book author(s)'))
|
||||||
|
parser.add_option('-p', '--publisher', help=_('Book publisher'))
|
||||||
|
parser.add_option('-k', '--keywords', help=_('Keywords'))
|
||||||
|
parser.add_option('-m', '--max-results', default=20,
|
||||||
|
help=_('Maximum number of results to fetch'))
|
||||||
|
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||||
|
help=_('Be more verbose about errors'))
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
try:
|
||||||
|
results = search(opts.title, opts.author, publisher=opts.publisher,
|
||||||
|
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
|
||||||
|
except AssertionError:
|
||||||
|
report(True)
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
if results is None or len(results) == 0:
|
||||||
|
print _('No result found for this search!')
|
||||||
|
return 0
|
||||||
|
for result in results:
|
||||||
|
print unicode(result).encode(preferred_encoding, 'replace')
|
||||||
|
print
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -10,7 +10,8 @@ from copy import deepcopy
|
|||||||
|
|
||||||
from lxml.html import soupparser
|
from lxml.html import soupparser
|
||||||
|
|
||||||
from calibre.utils.date import parse_date, utcnow
|
from calibre.utils.date import parse_date, utcnow, replace_months
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
from calibre import browser, preferred_encoding
|
from calibre import browser, preferred_encoding
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||||
@ -71,31 +72,16 @@ class NiceBooksCovers(CoverDownload):
|
|||||||
traceback.format_exc(), self.name))
|
traceback.format_exc(), self.name))
|
||||||
|
|
||||||
|
|
||||||
|
class NiceBooksError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ISBNNotFound(NiceBooksError):
|
||||||
|
pass
|
||||||
|
|
||||||
def report(verbose):
|
def report(verbose):
|
||||||
if verbose:
|
if verbose:
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
def replace_monthsfr(datefr):
|
|
||||||
# Replace french months by english equivalent for parse_date
|
|
||||||
frtoen = {
|
|
||||||
u'[jJ]anvier': u'jan',
|
|
||||||
u'[fF].vrier': u'feb',
|
|
||||||
u'[mM]ars': u'mar',
|
|
||||||
u'[aA]vril': u'apr',
|
|
||||||
u'[mM]ai': u'may',
|
|
||||||
u'[jJ]uin': u'jun',
|
|
||||||
u'[jJ]uillet': u'jul',
|
|
||||||
u'[aA]o.t': u'aug',
|
|
||||||
u'[sS]eptembre': u'sep',
|
|
||||||
u'[Oo]ctobre': u'oct',
|
|
||||||
u'[nN]ovembre': u'nov',
|
|
||||||
u'[dD].cembre': u'dec' }
|
|
||||||
for k in frtoen.iterkeys():
|
|
||||||
tmp = re.sub(k, frtoen[k], datefr)
|
|
||||||
if tmp <> datefr: break
|
|
||||||
return tmp
|
|
||||||
|
|
||||||
class Query(object):
|
class Query(object):
|
||||||
|
|
||||||
BASE_URL = 'http://fr.nicebooks.com/'
|
BASE_URL = 'http://fr.nicebooks.com/'
|
||||||
@ -119,7 +105,7 @@ class Query(object):
|
|||||||
|
|
||||||
def __call__(self, browser, verbose, timeout = 5.):
|
def __call__(self, browser, verbose, timeout = 5.):
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'Query:', self.BASE_URL+self.urldata
|
print _('Query: %s') % self.BASE_URL+self.urldata
|
||||||
|
|
||||||
try:
|
try:
|
||||||
raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
|
raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
|
||||||
@ -128,7 +114,9 @@ class Query(object):
|
|||||||
if callable(getattr(e, 'getcode', None)) and \
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
e.getcode() == 404:
|
e.getcode() == 404:
|
||||||
return
|
return
|
||||||
raise
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
||||||
|
raise NiceBooksError(_('Nicebooks encountered an error.'))
|
||||||
if '<title>404 - ' in raw:
|
if '<title>404 - ' in raw:
|
||||||
return
|
return
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
@ -136,7 +124,11 @@ class Query(object):
|
|||||||
try:
|
try:
|
||||||
feed = soupparser.fromstring(raw)
|
feed = soupparser.fromstring(raw)
|
||||||
except:
|
except:
|
||||||
return
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
#nb of page to call
|
#nb of page to call
|
||||||
try:
|
try:
|
||||||
@ -161,7 +153,11 @@ class Query(object):
|
|||||||
try:
|
try:
|
||||||
feed = soupparser.fromstring(raw)
|
feed = soupparser.fromstring(raw)
|
||||||
except:
|
except:
|
||||||
continue
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
continue
|
||||||
pages.append(feed)
|
pages.append(feed)
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
@ -180,14 +176,12 @@ class ResultList(list):
|
|||||||
self.reautclean = re.compile(u'\s*\(.*\)\s*')
|
self.reautclean = re.compile(u'\s*\(.*\)\s*')
|
||||||
|
|
||||||
def get_title(self, entry):
|
def get_title(self, entry):
|
||||||
# title = deepcopy(entry.find("div[@id='book-info']"))
|
|
||||||
title = deepcopy(entry)
|
title = deepcopy(entry)
|
||||||
title.remove(title.find("dl[@title='Informations sur le livre']"))
|
title.remove(title.find("dl[@title='Informations sur le livre']"))
|
||||||
title = ' '.join([i.text_content() for i in title.iterchildren()])
|
title = ' '.join([i.text_content() for i in title.iterchildren()])
|
||||||
return unicode(title.replace('\n', ''))
|
return unicode(title.replace('\n', ''))
|
||||||
|
|
||||||
def get_authors(self, entry):
|
def get_authors(self, entry):
|
||||||
# author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
|
|
||||||
author = entry.find("dl[@title='Informations sur le livre']")
|
author = entry.find("dl[@title='Informations sur le livre']")
|
||||||
authortext = []
|
authortext = []
|
||||||
for x in author.getiterator('dt'):
|
for x in author.getiterator('dt'):
|
||||||
@ -223,7 +217,7 @@ class ResultList(list):
|
|||||||
d = x.getnext().text_content()
|
d = x.getnext().text_content()
|
||||||
try:
|
try:
|
||||||
default = utcnow().replace(day=15)
|
default = utcnow().replace(day=15)
|
||||||
d = replace_monthsfr(d)
|
d = replace_months(d, 'fr')
|
||||||
d = parse_date(d, assume_utc=True, default=default)
|
d = parse_date(d, assume_utc=True, default=default)
|
||||||
mi.pubdate = d
|
mi.pubdate = d
|
||||||
except:
|
except:
|
||||||
@ -234,11 +228,6 @@ class ResultList(list):
|
|||||||
mi = MetaInformation(title, authors)
|
mi = MetaInformation(title, authors)
|
||||||
mi.author_sort = authors_to_sort_string(authors)
|
mi.author_sort = authors_to_sort_string(authors)
|
||||||
mi.comments = self.get_description(entry, verbose)
|
mi.comments = self.get_description(entry, verbose)
|
||||||
# entry = entry.find("dl[@title='Informations sur le livre']")
|
|
||||||
# mi.publisher = self.get_publisher(entry)
|
|
||||||
# mi.pubdate = self.get_date(entry, verbose)
|
|
||||||
# mi.isbn = self.get_ISBN(entry)
|
|
||||||
# mi.language = self.get_language(entry)
|
|
||||||
return self.get_book_info(entry, mi, verbose)
|
return self.get_book_info(entry, mi, verbose)
|
||||||
|
|
||||||
def get_individual_metadata(self, browser, linkdata, verbose):
|
def get_individual_metadata(self, browser, linkdata, verbose):
|
||||||
@ -249,7 +238,9 @@ class ResultList(list):
|
|||||||
if callable(getattr(e, 'getcode', None)) and \
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
e.getcode() == 404:
|
e.getcode() == 404:
|
||||||
return
|
return
|
||||||
raise
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
||||||
|
raise NiceBooksError(_('Nicebooks encountered an error.'))
|
||||||
if '<title>404 - ' in raw:
|
if '<title>404 - ' in raw:
|
||||||
report(verbose)
|
report(verbose)
|
||||||
return
|
return
|
||||||
@ -258,7 +249,11 @@ class ResultList(list):
|
|||||||
try:
|
try:
|
||||||
feed = soupparser.fromstring(raw)
|
feed = soupparser.fromstring(raw)
|
||||||
except:
|
except:
|
||||||
return
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
# get results
|
# get results
|
||||||
return feed.xpath("//div[@id='container']")[0]
|
return feed.xpath("//div[@id='container']")[0]
|
||||||
@ -292,13 +287,6 @@ class ResultList(list):
|
|||||||
continue
|
continue
|
||||||
self.append(self.fill_MI(entry, title, authors, verbose))
|
self.append(self.fill_MI(entry, title, authors, verbose))
|
||||||
|
|
||||||
|
|
||||||
class NiceBooksError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class ISBNNotFound(NiceBooksError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class Covers(object):
|
class Covers(object):
|
||||||
|
|
||||||
def __init__(self, isbn = None):
|
def __init__(self, isbn = None):
|
||||||
@ -329,11 +317,10 @@ class Covers(object):
|
|||||||
return cover, ext if ext else 'jpg'
|
return cover, ext if ext else 'jpg'
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
||||||
err = NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
||||||
raise err
|
|
||||||
if not len(self.urlimg):
|
if not len(self.urlimg):
|
||||||
if not self.isbnf:
|
if not self.isbnf:
|
||||||
raise ISBNNotFound('ISBN: '+self.isbn+_(' not found.'))
|
raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn)
|
||||||
raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
|
raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
|
||||||
|
|
||||||
|
|
||||||
@ -341,10 +328,10 @@ def search(title=None, author=None, publisher=None, isbn=None,
|
|||||||
max_results=5, verbose=False, keywords=None):
|
max_results=5, verbose=False, keywords=None):
|
||||||
br = browser()
|
br = browser()
|
||||||
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
||||||
keywords=keywords, max_results=max_results)(br, verbose)
|
keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.)
|
||||||
|
|
||||||
if entries is None or len(entries) == 0:
|
if entries is None or len(entries) == 0:
|
||||||
return
|
return None
|
||||||
|
|
||||||
#List of entry
|
#List of entry
|
||||||
ans = ResultList()
|
ans = ResultList()
|
||||||
@ -364,28 +351,28 @@ def cover_from_isbn(isbn, timeout = 5.):
|
|||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
parser = OptionParser(textwrap.dedent(\
|
parser = OptionParser(textwrap.dedent(\
|
||||||
'''\
|
_('''\
|
||||||
%prog [options]
|
%prog [options]
|
||||||
|
|
||||||
Fetch book metadata from Nicebooks. You must specify one of title, author,
|
Fetch book metadata from Nicebooks. You must specify one of title, author,
|
||||||
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
|
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
|
||||||
so you should make your query as specific as possible.
|
so you should make your query as specific as possible.
|
||||||
It can also get covers if the option is activated.
|
It can also get covers if the option is activated.
|
||||||
'''
|
''')
|
||||||
))
|
))
|
||||||
parser.add_option('-t', '--title', help='Book title')
|
parser.add_option('-t', '--title', help=_('Book title'))
|
||||||
parser.add_option('-a', '--author', help='Book author(s)')
|
parser.add_option('-a', '--author', help=_('Book author(s)'))
|
||||||
parser.add_option('-p', '--publisher', help='Book publisher')
|
parser.add_option('-p', '--publisher', help=_('Book publisher'))
|
||||||
parser.add_option('-i', '--isbn', help='Book ISBN')
|
parser.add_option('-i', '--isbn', help=_('Book ISBN'))
|
||||||
parser.add_option('-k', '--keywords', help='Keywords')
|
parser.add_option('-k', '--keywords', help=_('Keywords'))
|
||||||
parser.add_option('-c', '--covers', default=0,
|
parser.add_option('-c', '--covers', default=0,
|
||||||
help='Covers: 1-Check/ 2-Download')
|
help=_('Covers: 1-Check/ 2-Download'))
|
||||||
parser.add_option('-p', '--coverspath', default='',
|
parser.add_option('-p', '--coverspath', default='',
|
||||||
help='Covers files path')
|
help=_('Covers files path'))
|
||||||
parser.add_option('-m', '--max-results', default=20,
|
parser.add_option('-m', '--max-results', default=20,
|
||||||
help='Maximum number of results to fetch')
|
help=_('Maximum number of results to fetch'))
|
||||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||||
help='Be more verbose about errors')
|
help=_('Be more verbose about errors'))
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
@ -400,15 +387,15 @@ def main(args=sys.argv):
|
|||||||
parser.print_help()
|
parser.print_help()
|
||||||
return 1
|
return 1
|
||||||
if results is None or len(results) == 0:
|
if results is None or len(results) == 0:
|
||||||
print 'No result found for this search!'
|
print _('No result found for this search!')
|
||||||
return 0
|
return 0
|
||||||
for result in results:
|
for result in results:
|
||||||
print unicode(result).encode(preferred_encoding, 'replace')
|
print unicode(result).encode(preferred_encoding, 'replace')
|
||||||
covact = int(opts.covers)
|
covact = int(opts.covers)
|
||||||
if covact == 1:
|
if covact == 1:
|
||||||
textcover = 'No cover found!'
|
textcover = _('No cover found!')
|
||||||
if check_for_cover(result.isbn):
|
if check_for_cover(result.isbn):
|
||||||
textcover = 'A cover was found for this book'
|
textcover = _('A cover was found for this book')
|
||||||
print textcover
|
print textcover
|
||||||
elif covact == 2:
|
elif covact == 2:
|
||||||
cover_data, ext = cover_from_isbn(result.isbn)
|
cover_data, ext = cover_from_isbn(result.isbn)
|
||||||
@ -417,7 +404,7 @@ def main(args=sys.argv):
|
|||||||
cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
|
cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
|
||||||
oname = os.path.abspath(cpath+'.'+ext)
|
oname = os.path.abspath(cpath+'.'+ext)
|
||||||
open(oname, 'wb').write(cover_data)
|
open(oname, 'wb').write(cover_data)
|
||||||
print 'Cover saved to file ', oname
|
print _('Cover saved to file '), oname
|
||||||
print
|
print
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -8,12 +8,12 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from Queue import Empty
|
from Queue import Empty
|
||||||
import os, time, sys, shutil
|
import os, time, sys, shutil, json
|
||||||
|
|
||||||
from calibre.utils.ipc.job import ParallelJob
|
from calibre.utils.ipc.job import ParallelJob
|
||||||
from calibre.utils.ipc.server import Server
|
from calibre.utils.ipc.server import Server
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory, TemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory, TemporaryDirectory
|
||||||
from calibre import prints
|
from calibre import prints, isbytestring
|
||||||
from calibre.constants import filesystem_encoding
|
from calibre.constants import filesystem_encoding
|
||||||
|
|
||||||
|
|
||||||
@ -194,14 +194,42 @@ class SaveWorker(Thread):
|
|||||||
self.daemon = True
|
self.daemon = True
|
||||||
self.path, self.opts = path, opts
|
self.path, self.opts = path, opts
|
||||||
self.ids = ids
|
self.ids = ids
|
||||||
self.library_path = db.library_path
|
self.db = db
|
||||||
self.canceled = False
|
self.canceled = False
|
||||||
self.result_queue = result_queue
|
self.result_queue = result_queue
|
||||||
self.error = None
|
self.error = None
|
||||||
self.spare_server = spare_server
|
self.spare_server = spare_server
|
||||||
self.start()
|
self.start()
|
||||||
|
|
||||||
|
def collect_data(self, ids):
|
||||||
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
|
data = {}
|
||||||
|
for i in set(ids):
|
||||||
|
mi = self.db.get_metadata(i, index_is_id=True, get_cover=True)
|
||||||
|
opf = metadata_to_opf(mi)
|
||||||
|
if isbytestring(opf):
|
||||||
|
opf = opf.decode('utf-8')
|
||||||
|
cpath = None
|
||||||
|
if mi.cover:
|
||||||
|
cpath = mi.cover
|
||||||
|
if isbytestring(cpath):
|
||||||
|
cpath = cpath.decode(filesystem_encoding)
|
||||||
|
formats = {}
|
||||||
|
if mi.formats:
|
||||||
|
for fmt in mi.formats:
|
||||||
|
fpath = self.db.format_abspath(i, fmt, index_is_id=True)
|
||||||
|
if fpath is not None:
|
||||||
|
if isbytestring(fpath):
|
||||||
|
fpath = fpath.decode(filesystem_encoding)
|
||||||
|
formats[fmt.lower()] = fpath
|
||||||
|
data[i] = [opf, cpath, formats]
|
||||||
|
return data
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
with TemporaryDirectory('save_to_disk_data') as tdir:
|
||||||
|
self._run(tdir)
|
||||||
|
|
||||||
|
def _run(self, tdir):
|
||||||
from calibre.library.save_to_disk import config
|
from calibre.library.save_to_disk import config
|
||||||
server = Server() if self.spare_server is None else self.spare_server
|
server = Server() if self.spare_server is None else self.spare_server
|
||||||
ids = set(self.ids)
|
ids = set(self.ids)
|
||||||
@ -212,12 +240,19 @@ class SaveWorker(Thread):
|
|||||||
for pref in c.preferences:
|
for pref in c.preferences:
|
||||||
recs[pref.name] = getattr(self.opts, pref.name)
|
recs[pref.name] = getattr(self.opts, pref.name)
|
||||||
|
|
||||||
|
plugboards = self.db.prefs.get('plugboards', {})
|
||||||
|
|
||||||
for i, task in enumerate(tasks):
|
for i, task in enumerate(tasks):
|
||||||
tids = [x[-1] for x in task]
|
tids = [x[-1] for x in task]
|
||||||
|
data = self.collect_data(tids)
|
||||||
|
dpath = os.path.join(tdir, '%d.json'%i)
|
||||||
|
with open(dpath, 'wb') as f:
|
||||||
|
f.write(json.dumps(data, ensure_ascii=False).encode('utf-8'))
|
||||||
|
|
||||||
job = ParallelJob('save_book',
|
job = ParallelJob('save_book',
|
||||||
'Save books (%d of %d)'%(i, len(tasks)),
|
'Save books (%d of %d)'%(i, len(tasks)),
|
||||||
lambda x,y:x,
|
lambda x,y:x,
|
||||||
args=[tids, self.library_path, self.path, recs])
|
args=[tids, dpath, plugboards, self.path, recs])
|
||||||
jobs.add(job)
|
jobs.add(job)
|
||||||
server.add_job(job)
|
server.add_job(job)
|
||||||
|
|
||||||
@ -226,21 +261,21 @@ class SaveWorker(Thread):
|
|||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
running = False
|
running = False
|
||||||
for job in jobs:
|
for job in jobs:
|
||||||
job.update(consume_notifications=False)
|
self.get_notifications(job, ids)
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
id, title, ok, tb = job.notifications.get_nowait()[0]
|
|
||||||
if id in ids:
|
|
||||||
self.result_queue.put((id, title, ok, tb))
|
|
||||||
ids.remove(id)
|
|
||||||
except Empty:
|
|
||||||
break
|
|
||||||
if not job.is_finished:
|
if not job.is_finished:
|
||||||
running = True
|
running = True
|
||||||
|
|
||||||
if not running:
|
if not running:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
for job in jobs:
|
||||||
|
if not job.result:
|
||||||
|
continue
|
||||||
|
for id_, title, ok, tb in job.result:
|
||||||
|
if id_ in ids:
|
||||||
|
self.result_queue.put((id_, title, ok, tb))
|
||||||
|
ids.remove(id_)
|
||||||
|
|
||||||
server.close()
|
server.close()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
@ -257,21 +292,39 @@ class SaveWorker(Thread):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def get_notifications(self, job, ids):
|
||||||
|
job.update(consume_notifications=False)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
id, title, ok, tb = job.notifications.get_nowait()[0]
|
||||||
|
if id in ids:
|
||||||
|
self.result_queue.put((id, title, ok, tb))
|
||||||
|
ids.remove(id)
|
||||||
|
except Empty:
|
||||||
|
break
|
||||||
|
|
||||||
def save_book(task, library_path, path, recs, notification=lambda x,y:x):
|
|
||||||
from calibre.library.database2 import LibraryDatabase2
|
def save_book(ids, dpath, plugboards, path, recs, notification=lambda x,y:x):
|
||||||
db = LibraryDatabase2(library_path)
|
from calibre.library.save_to_disk import config, save_serialized_to_disk
|
||||||
from calibre.library.save_to_disk import config, save_to_disk
|
|
||||||
from calibre.customize.ui import apply_null_metadata
|
from calibre.customize.ui import apply_null_metadata
|
||||||
opts = config().parse()
|
opts = config().parse()
|
||||||
for name in recs:
|
for name in recs:
|
||||||
setattr(opts, name, recs[name])
|
setattr(opts, name, recs[name])
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
def callback(id, title, failed, tb):
|
def callback(id, title, failed, tb):
|
||||||
|
results.append((id, title, not failed, tb))
|
||||||
notification((id, title, not failed, tb))
|
notification((id, title, not failed, tb))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
with apply_null_metadata:
|
data_ = json.loads(open(dpath, 'rb').read().decode('utf-8'))
|
||||||
save_to_disk(db, task, path, opts, callback)
|
data = {}
|
||||||
|
for k, v in data_.iteritems():
|
||||||
|
data[int(k)] = v
|
||||||
|
|
||||||
|
with apply_null_metadata:
|
||||||
|
save_serialized_to_disk(ids, data, plugboards, path, opts, callback)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
@ -123,6 +123,8 @@ def _config():
|
|||||||
help=_('Download social metadata (tags/rating/etc.)'))
|
help=_('Download social metadata (tags/rating/etc.)'))
|
||||||
c.add_opt('overwrite_author_title_metadata', default=True,
|
c.add_opt('overwrite_author_title_metadata', default=True,
|
||||||
help=_('Overwrite author and title with new metadata'))
|
help=_('Overwrite author and title with new metadata'))
|
||||||
|
c.add_opt('auto_download_cover', default=False,
|
||||||
|
help=_('Automatically download the cover, if available'))
|
||||||
c.add_opt('enforce_cpu_limit', default=True,
|
c.add_opt('enforce_cpu_limit', default=True,
|
||||||
help=_('Limit max simultaneous jobs to number of CPUs'))
|
help=_('Limit max simultaneous jobs to number of CPUs'))
|
||||||
c.add_opt('tag_browser_hidden_categories', default=set(),
|
c.add_opt('tag_browser_hidden_categories', default=set(),
|
||||||
|
@ -427,11 +427,27 @@ class Saver(QObject): # {{{
|
|||||||
if not self.ids or not self.worker.is_alive():
|
if not self.ids or not self.worker.is_alive():
|
||||||
self.timer.stop()
|
self.timer.stop()
|
||||||
self.pd.hide()
|
self.pd.hide()
|
||||||
|
while self.ids:
|
||||||
|
before = len(self.ids)
|
||||||
|
self.get_result()
|
||||||
|
if before == len(self.ids):
|
||||||
|
for i in list(self.ids):
|
||||||
|
self.failures.add(('id:%d'%i, 'Unknown error'))
|
||||||
|
self.ids.remove(i)
|
||||||
|
break
|
||||||
if not self.callback_called:
|
if not self.callback_called:
|
||||||
|
try:
|
||||||
|
self.worker.join(1.5)
|
||||||
|
except:
|
||||||
|
pass # The worker was not yet started
|
||||||
self.callback(self.worker.path, self.failures, self.worker.error)
|
self.callback(self.worker.path, self.failures, self.worker.error)
|
||||||
self.callback_called = True
|
self.callback_called = True
|
||||||
return
|
return
|
||||||
|
|
||||||
|
self.get_result()
|
||||||
|
|
||||||
|
|
||||||
|
def get_result(self):
|
||||||
try:
|
try:
|
||||||
id, title, ok, tb = self.rq.get_nowait()
|
id, title, ok, tb = self.rq.get_nowait()
|
||||||
except Empty:
|
except Empty:
|
||||||
@ -441,6 +457,7 @@ class Saver(QObject): # {{{
|
|||||||
if not isinstance(title, unicode):
|
if not isinstance(title, unicode):
|
||||||
title = str(title).decode(preferred_encoding, 'replace')
|
title = str(title).decode(preferred_encoding, 'replace')
|
||||||
self.pd.set_msg(_('Saved')+' '+title)
|
self.pd.set_msg(_('Saved')+' '+title)
|
||||||
|
|
||||||
if not ok:
|
if not ok:
|
||||||
self.failures.add((title, tb))
|
self.failures.add((title, tb))
|
||||||
# }}}
|
# }}}
|
||||||
|
@ -9,7 +9,7 @@ from threading import Thread
|
|||||||
|
|
||||||
from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, pyqtSignal, \
|
from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, pyqtSignal, \
|
||||||
QAbstractTableModel, QCoreApplication, QTimer
|
QAbstractTableModel, QCoreApplication, QTimer
|
||||||
from PyQt4.QtGui import QDialog, QItemSelectionModel
|
from PyQt4.QtGui import QDialog, QItemSelectionModel, QIcon
|
||||||
|
|
||||||
from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata
|
from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata
|
||||||
from calibre.gui2 import error_dialog, NONE, info_dialog, config
|
from calibre.gui2 import error_dialog, NONE, info_dialog, config
|
||||||
@ -42,13 +42,14 @@ class Matches(QAbstractTableModel):
|
|||||||
|
|
||||||
def __init__(self, matches):
|
def __init__(self, matches):
|
||||||
self.matches = matches
|
self.matches = matches
|
||||||
|
self.yes_icon = QVariant(QIcon(I('ok.png')))
|
||||||
QAbstractTableModel.__init__(self)
|
QAbstractTableModel.__init__(self)
|
||||||
|
|
||||||
def rowCount(self, *args):
|
def rowCount(self, *args):
|
||||||
return len(self.matches)
|
return len(self.matches)
|
||||||
|
|
||||||
def columnCount(self, *args):
|
def columnCount(self, *args):
|
||||||
return 6
|
return 8
|
||||||
|
|
||||||
def headerData(self, section, orientation, role):
|
def headerData(self, section, orientation, role):
|
||||||
if role != Qt.DisplayRole:
|
if role != Qt.DisplayRole:
|
||||||
@ -61,6 +62,8 @@ class Matches(QAbstractTableModel):
|
|||||||
elif section == 3: text = _("Publisher")
|
elif section == 3: text = _("Publisher")
|
||||||
elif section == 4: text = _("ISBN")
|
elif section == 4: text = _("ISBN")
|
||||||
elif section == 5: text = _("Published")
|
elif section == 5: text = _("Published")
|
||||||
|
elif section == 6: text = _("Has Cover")
|
||||||
|
elif section == 7: text = _("Has Summary")
|
||||||
|
|
||||||
return QVariant(text)
|
return QVariant(text)
|
||||||
else:
|
else:
|
||||||
@ -71,8 +74,8 @@ class Matches(QAbstractTableModel):
|
|||||||
|
|
||||||
def data(self, index, role):
|
def data(self, index, role):
|
||||||
row, col = index.row(), index.column()
|
row, col = index.row(), index.column()
|
||||||
|
book = self.matches[row]
|
||||||
if role == Qt.DisplayRole:
|
if role == Qt.DisplayRole:
|
||||||
book = self.matches[row]
|
|
||||||
res = None
|
res = None
|
||||||
if col == 0:
|
if col == 0:
|
||||||
res = book.title
|
res = book.title
|
||||||
@ -90,6 +93,11 @@ class Matches(QAbstractTableModel):
|
|||||||
if not res:
|
if not res:
|
||||||
return NONE
|
return NONE
|
||||||
return QVariant(res)
|
return QVariant(res)
|
||||||
|
elif role == Qt.DecorationRole:
|
||||||
|
if col == 6 and book.has_cover:
|
||||||
|
return self.yes_icon
|
||||||
|
if col == 7 and book.comments:
|
||||||
|
return self.yes_icon
|
||||||
return NONE
|
return NONE
|
||||||
|
|
||||||
class FetchMetadata(QDialog, Ui_FetchMetadata):
|
class FetchMetadata(QDialog, Ui_FetchMetadata):
|
||||||
@ -131,7 +139,7 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
|
|||||||
self.fetch_metadata()
|
self.fetch_metadata()
|
||||||
self.opt_get_social_metadata.setChecked(config['get_social_metadata'])
|
self.opt_get_social_metadata.setChecked(config['get_social_metadata'])
|
||||||
self.opt_overwrite_author_title_metadata.setChecked(config['overwrite_author_title_metadata'])
|
self.opt_overwrite_author_title_metadata.setChecked(config['overwrite_author_title_metadata'])
|
||||||
|
self.opt_auto_download_cover.setChecked(config['auto_download_cover'])
|
||||||
|
|
||||||
def show_summary(self, current, *args):
|
def show_summary(self, current, *args):
|
||||||
row = current.row()
|
row = current.row()
|
||||||
@ -213,6 +221,12 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
|
|||||||
_hung_fetchers.add(self.fetcher)
|
_hung_fetchers.add(self.fetcher)
|
||||||
if hasattr(self, '_hangcheck') and self._hangcheck.isActive():
|
if hasattr(self, '_hangcheck') and self._hangcheck.isActive():
|
||||||
self._hangcheck.stop()
|
self._hangcheck.stop()
|
||||||
|
# Save value of auto_download_cover, since this is the only place it can
|
||||||
|
# be set. The values of the other options can be set in
|
||||||
|
# Preferences->Behavior and should not be set here as they affect bulk
|
||||||
|
# downloading as well.
|
||||||
|
if self.opt_auto_download_cover.isChecked() != config['auto_download_cover']:
|
||||||
|
config.set('auto_download_cover', self.opt_auto_download_cover.isChecked())
|
||||||
|
|
||||||
def __enter__(self, *args):
|
def __enter__(self, *args):
|
||||||
return self
|
return self
|
||||||
|
@ -1,172 +1,179 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<ui version="4.0">
|
<ui version="4.0">
|
||||||
<class>FetchMetadata</class>
|
<class>FetchMetadata</class>
|
||||||
<widget class="QDialog" name="FetchMetadata">
|
<widget class="QDialog" name="FetchMetadata">
|
||||||
<property name="windowModality">
|
<property name="windowModality">
|
||||||
<enum>Qt::WindowModal</enum>
|
<enum>Qt::WindowModal</enum>
|
||||||
</property>
|
</property>
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>830</width>
|
<width>890</width>
|
||||||
<height>642</height>
|
<height>642</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
<string>Fetch metadata</string>
|
<string>Fetch metadata</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowIcon">
|
<property name="windowIcon">
|
||||||
<iconset resource="../../../../resources/images.qrc">
|
<iconset resource="../../../../resources/images.qrc">
|
||||||
<normaloff>:/images/metadata.png</normaloff>:/images/metadata.png</iconset>
|
<normaloff>:/images/metadata.png</normaloff>:/images/metadata.png</iconset>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QVBoxLayout">
|
<layout class="QVBoxLayout">
|
||||||
<item>
|
<item>
|
||||||
<widget class="QLabel" name="tlabel">
|
<widget class="QLabel" name="tlabel">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string><p>calibre can find metadata for your books from two locations: <b>Google Books</b> and <b>isbndb.com</b>. <p>To use isbndb.com you must sign up for a <a href="http://www.isbndb.com">free account</a> and enter your access key below.</string>
|
<string><p>calibre can find metadata for your books from two locations: <b>Google Books</b> and <b>isbndb.com</b>. <p>To use isbndb.com you must sign up for a <a href="http://www.isbndb.com">free account</a> and enter your access key below.</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="alignment">
|
<property name="alignment">
|
||||||
<set>Qt::AlignCenter</set>
|
<set>Qt::AlignCenter</set>
|
||||||
</property>
|
</property>
|
||||||
<property name="wordWrap">
|
<property name="wordWrap">
|
||||||
<bool>true</bool>
|
<bool>true</bool>
|
||||||
</property>
|
</property>
|
||||||
<property name="openExternalLinks">
|
<property name="openExternalLinks">
|
||||||
<bool>true</bool>
|
<bool>true</bool>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<layout class="QHBoxLayout">
|
<layout class="QHBoxLayout">
|
||||||
<item>
|
<item>
|
||||||
<widget class="QLabel" name="label_2">
|
<widget class="QLabel" name="label_2">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Access Key:</string>
|
<string>&Access Key:</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="buddy">
|
<property name="buddy">
|
||||||
<cstring>key</cstring>
|
<cstring>key</cstring>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QLineEdit" name="key"/>
|
<widget class="QLineEdit" name="key"/>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QPushButton" name="fetch">
|
<widget class="QPushButton" name="fetch">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Fetch</string>
|
<string>Fetch</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QLabel" name="warning">
|
<widget class="QLabel" name="warning">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string/>
|
<string/>
|
||||||
</property>
|
</property>
|
||||||
<property name="wordWrap">
|
<property name="wordWrap">
|
||||||
<bool>true</bool>
|
<bool>true</bool>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QGroupBox" name="groupBox">
|
<widget class="QGroupBox" name="groupBox">
|
||||||
<property name="title">
|
<property name="title">
|
||||||
<string>Matches</string>
|
<string>Matches</string>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QVBoxLayout">
|
<layout class="QVBoxLayout">
|
||||||
<item>
|
<item>
|
||||||
<widget class="QLabel" name="label_3">
|
<widget class="QLabel" name="label_3">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Select the book that most closely matches your copy from the list below</string>
|
<string>Select the book that most closely matches your copy from the list below</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QTableView" name="matches">
|
<widget class="QTableView" name="matches">
|
||||||
<property name="sizePolicy">
|
<property name="sizePolicy">
|
||||||
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
|
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
|
||||||
<horstretch>0</horstretch>
|
<horstretch>0</horstretch>
|
||||||
<verstretch>1</verstretch>
|
<verstretch>1</verstretch>
|
||||||
</sizepolicy>
|
</sizepolicy>
|
||||||
</property>
|
</property>
|
||||||
<property name="alternatingRowColors">
|
<property name="alternatingRowColors">
|
||||||
<bool>true</bool>
|
<bool>true</bool>
|
||||||
</property>
|
</property>
|
||||||
<property name="selectionMode">
|
<property name="selectionMode">
|
||||||
<enum>QAbstractItemView::SingleSelection</enum>
|
<enum>QAbstractItemView::SingleSelection</enum>
|
||||||
</property>
|
</property>
|
||||||
<property name="selectionBehavior">
|
<property name="selectionBehavior">
|
||||||
<enum>QAbstractItemView::SelectRows</enum>
|
<enum>QAbstractItemView::SelectRows</enum>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QTextBrowser" name="summary"/>
|
<widget class="QTextBrowser" name="summary"/>
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="opt_get_social_metadata">
|
<widget class="QCheckBox" name="opt_overwrite_author_title_metadata">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Download &social metadata (tags/rating/etc.) for the selected book</string>
|
<string>Overwrite author and title with author and title of selected book</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="opt_overwrite_author_title_metadata">
|
<widget class="QCheckBox" name="opt_get_social_metadata">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Overwrite author and title with author and title of selected book</string>
|
<string>Download &social metadata (tags/rating/etc.) for the selected book</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QDialogButtonBox" name="buttonBox">
|
<widget class="QCheckBox" name="opt_auto_download_cover">
|
||||||
<property name="standardButtons">
|
<property name="text">
|
||||||
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
|
<string>Automatically download the cover, if available</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
<item>
|
||||||
</widget>
|
<widget class="QDialogButtonBox" name="buttonBox">
|
||||||
<resources>
|
<property name="standardButtons">
|
||||||
<include location="../../../../resources/images.qrc"/>
|
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
|
||||||
</resources>
|
</property>
|
||||||
<connections>
|
</widget>
|
||||||
<connection>
|
</item>
|
||||||
<sender>buttonBox</sender>
|
</layout>
|
||||||
<signal>accepted()</signal>
|
</widget>
|
||||||
<receiver>FetchMetadata</receiver>
|
<resources>
|
||||||
<slot>accept()</slot>
|
<include location="../../../../resources/images.qrc"/>
|
||||||
<hints>
|
</resources>
|
||||||
<hint type="sourcelabel">
|
<connections>
|
||||||
<x>460</x>
|
<connection>
|
||||||
<y>599</y>
|
<sender>buttonBox</sender>
|
||||||
</hint>
|
<signal>accepted()</signal>
|
||||||
<hint type="destinationlabel">
|
<receiver>FetchMetadata</receiver>
|
||||||
<x>657</x>
|
<slot>accept()</slot>
|
||||||
<y>530</y>
|
<hints>
|
||||||
</hint>
|
<hint type="sourcelabel">
|
||||||
</hints>
|
<x>460</x>
|
||||||
</connection>
|
<y>599</y>
|
||||||
<connection>
|
</hint>
|
||||||
<sender>buttonBox</sender>
|
<hint type="destinationlabel">
|
||||||
<signal>rejected()</signal>
|
<x>657</x>
|
||||||
<receiver>FetchMetadata</receiver>
|
<y>530</y>
|
||||||
<slot>reject()</slot>
|
</hint>
|
||||||
<hints>
|
</hints>
|
||||||
<hint type="sourcelabel">
|
</connection>
|
||||||
<x>417</x>
|
<connection>
|
||||||
<y>599</y>
|
<sender>buttonBox</sender>
|
||||||
</hint>
|
<signal>rejected()</signal>
|
||||||
<hint type="destinationlabel">
|
<receiver>FetchMetadata</receiver>
|
||||||
<x>0</x>
|
<slot>reject()</slot>
|
||||||
<y>491</y>
|
<hints>
|
||||||
</hint>
|
<hint type="sourcelabel">
|
||||||
</hints>
|
<x>417</x>
|
||||||
</connection>
|
<y>599</y>
|
||||||
</connections>
|
</hint>
|
||||||
</ui>
|
<hint type="destinationlabel">
|
||||||
|
<x>0</x>
|
||||||
|
<y>491</y>
|
||||||
|
</hint>
|
||||||
|
</hints>
|
||||||
|
</connection>
|
||||||
|
</connections>
|
||||||
|
</ui>
|
||||||
|
@ -760,8 +760,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
if book.publisher: self.publisher.setEditText(book.publisher)
|
if book.publisher: self.publisher.setEditText(book.publisher)
|
||||||
if book.isbn: self.isbn.setText(book.isbn)
|
if book.isbn: self.isbn.setText(book.isbn)
|
||||||
if book.pubdate:
|
if book.pubdate:
|
||||||
d = book.pubdate
|
dt = book.pubdate
|
||||||
self.pubdate.setDate(QDate(d.year, d.month, d.day))
|
self.pubdate.setDate(QDate(dt.year, dt.month, dt.day))
|
||||||
summ = book.comments
|
summ = book.comments
|
||||||
if summ:
|
if summ:
|
||||||
prefix = unicode(self.comments.toPlainText())
|
prefix = unicode(self.comments.toPlainText())
|
||||||
@ -777,8 +777,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
self.series.setText(book.series)
|
self.series.setText(book.series)
|
||||||
if book.series_index is not None:
|
if book.series_index is not None:
|
||||||
self.series_index.setValue(book.series_index)
|
self.series_index.setValue(book.series_index)
|
||||||
# Needed because of Qt focus bug on OS X
|
if book.has_cover:
|
||||||
self.fetch_cover_button.setFocus(Qt.OtherFocusReason)
|
if d.opt_auto_download_cover.isChecked() and book.has_cover:
|
||||||
|
self.fetch_cover()
|
||||||
|
else:
|
||||||
|
self.fetch_cover_button.setFocus(Qt.OtherFocusReason)
|
||||||
else:
|
else:
|
||||||
error_dialog(self, _('Cannot fetch metadata'),
|
error_dialog(self, _('Cannot fetch metadata'),
|
||||||
_('You must specify at least one of ISBN, Title, '
|
_('You must specify at least one of ISBN, Title, '
|
||||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, traceback, cStringIO, re
|
import os, traceback, cStringIO, re, shutil
|
||||||
|
|
||||||
from calibre.constants import DEBUG
|
from calibre.constants import DEBUG
|
||||||
from calibre.utils.config import Config, StringConfig, tweaks
|
from calibre.utils.config import Config, StringConfig, tweaks
|
||||||
@ -203,31 +203,49 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
|
|||||||
return shorten_components_to(length, components)
|
return shorten_components_to(length, components)
|
||||||
|
|
||||||
|
|
||||||
def save_book_to_disk(id, db, root, opts, length):
|
def save_book_to_disk(id_, db, root, opts, length):
|
||||||
mi = db.get_metadata(id, index_is_id=True)
|
mi = db.get_metadata(id_, index_is_id=True)
|
||||||
|
cover = db.cover(id_, index_is_id=True, as_path=True)
|
||||||
|
plugboards = db.prefs.get('plugboards', {})
|
||||||
|
|
||||||
available_formats = db.formats(id, index_is_id=True)
|
available_formats = db.formats(id_, index_is_id=True)
|
||||||
if not available_formats:
|
if not available_formats:
|
||||||
available_formats = []
|
available_formats = []
|
||||||
else:
|
else:
|
||||||
available_formats = [x.lower().strip() for x in
|
available_formats = [x.lower().strip() for x in
|
||||||
available_formats.split(',')]
|
available_formats.split(',')]
|
||||||
|
formats = {}
|
||||||
|
fmts = db.formats(id_, index_is_id=True, verify_formats=False)
|
||||||
|
if fmts:
|
||||||
|
fmts = fmts.split(',')
|
||||||
|
for fmt in fmts:
|
||||||
|
fpath = db.format_abspath(id_, fmt, index_is_id=True)
|
||||||
|
if fpath is not None:
|
||||||
|
formats[fmt.lower()] = fpath
|
||||||
|
|
||||||
|
return do_save_book_to_disk(id_, mi, cover, plugboards,
|
||||||
|
formats, root, opts, length)
|
||||||
|
|
||||||
|
|
||||||
|
def do_save_book_to_disk(id_, mi, cover, plugboards,
|
||||||
|
format_map, root, opts, length):
|
||||||
|
available_formats = [x.lower().strip() for x in format_map.keys()]
|
||||||
if opts.formats == 'all':
|
if opts.formats == 'all':
|
||||||
asked_formats = available_formats
|
asked_formats = available_formats
|
||||||
else:
|
else:
|
||||||
asked_formats = [x.lower().strip() for x in opts.formats.split(',')]
|
asked_formats = [x.lower().strip() for x in opts.formats.split(',')]
|
||||||
formats = set(available_formats).intersection(set(asked_formats))
|
formats = set(available_formats).intersection(set(asked_formats))
|
||||||
if not formats:
|
if not formats:
|
||||||
return True, id, mi.title
|
return True, id_, mi.title
|
||||||
|
|
||||||
components = get_components(opts.template, mi, id, opts.timefmt, length,
|
components = get_components(opts.template, mi, id_, opts.timefmt, length,
|
||||||
ascii_filename if opts.asciiize else sanitize_file_name,
|
ascii_filename if opts.asciiize else sanitize_file_name,
|
||||||
to_lowercase=opts.to_lowercase,
|
to_lowercase=opts.to_lowercase,
|
||||||
replace_whitespace=opts.replace_whitespace)
|
replace_whitespace=opts.replace_whitespace)
|
||||||
base_path = os.path.join(root, *components)
|
base_path = os.path.join(root, *components)
|
||||||
base_name = os.path.basename(base_path)
|
base_name = os.path.basename(base_path)
|
||||||
dirpath = os.path.dirname(base_path)
|
dirpath = os.path.dirname(base_path)
|
||||||
# Don't test for existence first are the test could fail but
|
# Don't test for existence first as the test could fail but
|
||||||
# another worker process could create the directory before
|
# another worker process could create the directory before
|
||||||
# the call to makedirs
|
# the call to makedirs
|
||||||
try:
|
try:
|
||||||
@ -236,29 +254,23 @@ def save_book_to_disk(id, db, root, opts, length):
|
|||||||
if not os.path.exists(dirpath):
|
if not os.path.exists(dirpath):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
cdata = db.cover(id, index_is_id=True)
|
if opts.save_cover and cover and os.access(cover, os.R_OK):
|
||||||
if opts.save_cover:
|
with open(base_path+'.jpg', 'wb') as f:
|
||||||
if cdata is not None:
|
with open(cover, 'rb') as s:
|
||||||
with open(base_path+'.jpg', 'wb') as f:
|
shutil.copyfileobj(s, f)
|
||||||
f.write(cdata)
|
mi.cover = base_name+'.jpg'
|
||||||
mi.cover = base_name+'.jpg'
|
else:
|
||||||
else:
|
mi.cover = None
|
||||||
mi.cover = None
|
|
||||||
|
|
||||||
if opts.write_opf:
|
if opts.write_opf:
|
||||||
opf = metadata_to_opf(mi)
|
opf = metadata_to_opf(mi)
|
||||||
with open(base_path+'.opf', 'wb') as f:
|
with open(base_path+'.opf', 'wb') as f:
|
||||||
f.write(opf)
|
f.write(opf)
|
||||||
|
|
||||||
if cdata is not None:
|
|
||||||
mi.cover_data = ('jpg', cdata)
|
|
||||||
mi.cover = None
|
|
||||||
|
|
||||||
written = False
|
written = False
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
global plugboard_save_to_disk_value, plugboard_any_format_value
|
global plugboard_save_to_disk_value, plugboard_any_format_value
|
||||||
dev_name = plugboard_save_to_disk_value
|
dev_name = plugboard_save_to_disk_value
|
||||||
plugboards = db.prefs.get('plugboards', {})
|
|
||||||
cpb = None
|
cpb = None
|
||||||
if fmt in plugboards:
|
if fmt in plugboards:
|
||||||
cpb = plugboards[fmt]
|
cpb = plugboards[fmt]
|
||||||
@ -275,11 +287,12 @@ def save_book_to_disk(id, db, root, opts, length):
|
|||||||
# Leave this here for a while, in case problems arise.
|
# Leave this here for a while, in case problems arise.
|
||||||
if cpb is not None:
|
if cpb is not None:
|
||||||
prints('Save-to-disk using plugboard:', fmt, cpb)
|
prints('Save-to-disk using plugboard:', fmt, cpb)
|
||||||
data = db.format(id, fmt, index_is_id=True)
|
fp = format_map.get(fmt, None)
|
||||||
if data is None:
|
if fp is None:
|
||||||
continue
|
continue
|
||||||
else:
|
with open(fp, 'rb') as f:
|
||||||
written = True
|
data = f.read()
|
||||||
|
written = True
|
||||||
if opts.update_metadata:
|
if opts.update_metadata:
|
||||||
stream = cStringIO.StringIO()
|
stream = cStringIO.StringIO()
|
||||||
stream.write(data)
|
stream.write(data)
|
||||||
@ -300,9 +313,21 @@ def save_book_to_disk(id, db, root, opts, length):
|
|||||||
with open(fmt_path, 'wb') as f:
|
with open(fmt_path, 'wb') as f:
|
||||||
f.write(data)
|
f.write(data)
|
||||||
|
|
||||||
return not written, id, mi.title
|
return not written, id_, mi.title
|
||||||
|
|
||||||
|
def _sanitize_args(root, opts):
|
||||||
|
if opts is None:
|
||||||
|
opts = config().parse()
|
||||||
|
if isinstance(root, unicode):
|
||||||
|
root = root.encode(filesystem_encoding)
|
||||||
|
root = os.path.abspath(root)
|
||||||
|
|
||||||
|
opts.template = preprocess_template(opts.template)
|
||||||
|
length = 1000 if supports_long_names(root) else 250
|
||||||
|
length -= len(root)
|
||||||
|
if length < 5:
|
||||||
|
raise ValueError('%r is too long.'%root)
|
||||||
|
return root, opts, length
|
||||||
|
|
||||||
def save_to_disk(db, ids, root, opts=None, callback=None):
|
def save_to_disk(db, ids, root, opts=None, callback=None):
|
||||||
'''
|
'''
|
||||||
@ -316,17 +341,7 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
|
|||||||
:return: A list of failures. Each element of the list is a tuple
|
:return: A list of failures. Each element of the list is a tuple
|
||||||
(id, title, traceback)
|
(id, title, traceback)
|
||||||
'''
|
'''
|
||||||
if opts is None:
|
root, opts, length = _sanitize_args(root, opts)
|
||||||
opts = config().parse()
|
|
||||||
if isinstance(root, unicode):
|
|
||||||
root = root.encode(filesystem_encoding)
|
|
||||||
root = os.path.abspath(root)
|
|
||||||
|
|
||||||
opts.template = preprocess_template(opts.template)
|
|
||||||
length = 1000 if supports_long_names(root) else 250
|
|
||||||
length -= len(root)
|
|
||||||
if length < 5:
|
|
||||||
raise ValueError('%r is too long.'%root)
|
|
||||||
failures = []
|
failures = []
|
||||||
for x in ids:
|
for x in ids:
|
||||||
tb = ''
|
tb = ''
|
||||||
@ -343,4 +358,28 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
|
|||||||
break
|
break
|
||||||
return failures
|
return failures
|
||||||
|
|
||||||
|
def save_serialized_to_disk(ids, data, plugboards, root, opts, callback):
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
|
root, opts, length = _sanitize_args(root, opts)
|
||||||
|
failures = []
|
||||||
|
for x in ids:
|
||||||
|
opf, cover, format_map = data[x]
|
||||||
|
if isinstance(opf, unicode):
|
||||||
|
opf = opf.encode('utf-8')
|
||||||
|
mi = OPF(cStringIO.StringIO(opf)).to_book_metadata()
|
||||||
|
tb = ''
|
||||||
|
try:
|
||||||
|
failed, id, title = do_save_book_to_disk(x, mi, cover, plugboards,
|
||||||
|
format_map, root, opts, length)
|
||||||
|
tb = _('Requested formats not available')
|
||||||
|
except:
|
||||||
|
failed, id, title = True, x, mi.title
|
||||||
|
tb = traceback.format_exc()
|
||||||
|
if failed:
|
||||||
|
failures.append((id, title, tb))
|
||||||
|
if callable(callback):
|
||||||
|
if not callback(int(id), title, failed, tb):
|
||||||
|
break
|
||||||
|
|
||||||
|
return failures
|
||||||
|
|
||||||
|
23
src/calibre/utils/cleantext.py
Normal file
23
src/calibre/utils/cleantext.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
_ascii_pat = None
|
||||||
|
|
||||||
|
def clean_ascii_chars(txt, charlist=None):
|
||||||
|
'remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 by default'
|
||||||
|
global _ascii_pat
|
||||||
|
if _ascii_pat is None:
|
||||||
|
chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) \
|
||||||
|
+ [0x1A, 0x1B]
|
||||||
|
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
|
||||||
|
|
||||||
|
if charlist is None:
|
||||||
|
pat = _ascii_pat
|
||||||
|
else:
|
||||||
|
pat = re.compile(u'|'.join(map(unichr, charlist)))
|
||||||
|
return pat.sub('', txt)
|
||||||
|
|
@ -151,3 +151,45 @@ def format_date(dt, format, assume_utc=False, as_utc=False):
|
|||||||
format = re.sub('d{1,4}', format_day, format)
|
format = re.sub('d{1,4}', format_day, format)
|
||||||
format = re.sub('M{1,4}', format_month, format)
|
format = re.sub('M{1,4}', format_month, format)
|
||||||
return re.sub('yyyy|yy', format_year, format)
|
return re.sub('yyyy|yy', format_year, format)
|
||||||
|
|
||||||
|
def replace_months(datestr, clang):
|
||||||
|
# Replace months by english equivalent for parse_date
|
||||||
|
frtoen = {
|
||||||
|
u'[jJ]anvier': u'jan',
|
||||||
|
u'[fF].vrier': u'feb',
|
||||||
|
u'[mM]ars': u'mar',
|
||||||
|
u'[aA]vril': u'apr',
|
||||||
|
u'[mM]ai': u'may',
|
||||||
|
u'[jJ]uin': u'jun',
|
||||||
|
u'[jJ]uillet': u'jul',
|
||||||
|
u'[aA]o.t': u'aug',
|
||||||
|
u'[sS]eptembre': u'sep',
|
||||||
|
u'[Oo]ctobre': u'oct',
|
||||||
|
u'[nN]ovembre': u'nov',
|
||||||
|
u'[dD].cembre': u'dec' }
|
||||||
|
detoen = {
|
||||||
|
u'[jJ]anuar': u'jan',
|
||||||
|
u'[fF]ebruar': u'feb',
|
||||||
|
u'[mM].rz': u'mar',
|
||||||
|
u'[aA]pril': u'apr',
|
||||||
|
u'[mM]ai': u'may',
|
||||||
|
u'[jJ]uni': u'jun',
|
||||||
|
u'[jJ]uli': u'jul',
|
||||||
|
u'[aA]ugust': u'aug',
|
||||||
|
u'[sS]eptember': u'sep',
|
||||||
|
u'[Oo]ktober': u'oct',
|
||||||
|
u'[nN]ovember': u'nov',
|
||||||
|
u'[dD]ezember': u'dec' }
|
||||||
|
|
||||||
|
if clang == 'fr':
|
||||||
|
dictoen = frtoen
|
||||||
|
elif clang == 'de':
|
||||||
|
dictoen = detoen
|
||||||
|
else:
|
||||||
|
return datestr
|
||||||
|
|
||||||
|
for k in dictoen.iterkeys():
|
||||||
|
tmp = re.sub(k, dictoen[k], datestr)
|
||||||
|
if tmp != datestr: break
|
||||||
|
return tmp
|
||||||
|
|
||||||
|
@ -237,8 +237,6 @@ static PyTypeObject icu_CollatorType = { // {{{
|
|||||||
// }}
|
// }}
|
||||||
|
|
||||||
|
|
||||||
// }}}
|
|
||||||
|
|
||||||
// }}}
|
// }}}
|
||||||
|
|
||||||
// Module initialization {{{
|
// Module initialization {{{
|
||||||
@ -286,7 +284,7 @@ icu_upper(PyObject *self, PyObject *args) {
|
|||||||
PyMem_Free(input);
|
PyMem_Free(input);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
} // }}}
|
||||||
|
|
||||||
// lower {{{
|
// lower {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -56,7 +56,7 @@ def py_sort_key(obj):
|
|||||||
def icu_sort_key(collator, obj):
|
def icu_sort_key(collator, obj):
|
||||||
if not obj:
|
if not obj:
|
||||||
return _none2
|
return _none2
|
||||||
return collator.sort_key(obj.lower())
|
return collator.sort_key(lower(obj))
|
||||||
|
|
||||||
def py_case_sensitive_sort_key(obj):
|
def py_case_sensitive_sort_key(obj):
|
||||||
if not obj:
|
if not obj:
|
||||||
|
@ -1227,7 +1227,7 @@ class ZipFile:
|
|||||||
self.fp.flush()
|
self.fp.flush()
|
||||||
if zinfo.flag_bits & 0x08:
|
if zinfo.flag_bits & 0x08:
|
||||||
# Write CRC and file sizes after the file data
|
# Write CRC and file sizes after the file data
|
||||||
self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
|
self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
|
||||||
zinfo.file_size))
|
zinfo.file_size))
|
||||||
self.filelist.append(zinfo)
|
self.filelist.append(zinfo)
|
||||||
self.NameToInfo[zinfo.filename] = zinfo
|
self.NameToInfo[zinfo.filename] = zinfo
|
||||||
|
Loading…
x
Reference in New Issue
Block a user