Merge from trunk

This commit is contained in:
Charles Haley 2012-07-08 16:34:52 +02:00
commit 20f28df944
20 changed files with 438 additions and 184 deletions

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.adventuregamers.com www.adventuregamers.com
''' '''
@ -14,24 +14,24 @@ class AdventureGamers(BasicNewsRecipe):
publisher = 'Adventure Gamers' publisher = 'Adventure Gamers'
category = 'news, games, adventure, technology' category = 'news, games, adventure, technology'
oldest_article = 10 oldest_article = 10
delay = 10 #delay = 10
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'utf8'
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
INDEX = u'http://www.adventuregamers.com' INDEX = u'http://www.adventuregamers.com'
extra_css = """ extra_css = """
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74} .pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
.pageheader_title{font-size: xx-large; color: #394128} .pageheader_title,.page_title{font-size: xx-large; color: #394128}
.pageheader_byline{font-size: small; font-weight: bold; color: #394128} .pageheader_byline{font-size: small; font-weight: bold; color: #394128}
.score_bg {display: inline; width: 100%; margin-bottom: 2em} .score_bg {display: inline; width: 100%; margin-bottom: 2em}
.score_column_1{ padding-left: 10px; font-size: small; width: 50%} .score_column_1{ padding-left: 10px; font-size: small; width: 50%}
.score_column_2{ padding-left: 10px; font-size: small; width: 50%} .score_column_2{ padding-left: 10px; font-size: small; width: 50%}
.score_column_3{ padding-left: 10px; font-size: small; width: 50%} .score_column_3{ padding-left: 10px; font-size: small; width: 50%}
.score_header{font-size: large; color: #50544A} .score_header{font-size: large; color: #50544A}
.bodytext{display: block} img{margin-bottom: 1em;}
body{font-family: Helvetica,Arial,sans-serif} body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
""" """
conversion_options = { conversion_options = {
@ -41,35 +41,38 @@ class AdventureGamers(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
keep_only_tags = [ keep_only_tags = [dict(name='div', attrs={'class':'cleft_inn'})]
dict(name='div', attrs={'class':'content_middle'})
]
remove_tags = [ remove_tags = [
dict(name=['object','link','embed','form']) dict(name=['object','link','embed','form','iframe','meta'])
,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']}) ,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/scoring'})
,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/policies'})
] ]
remove_tags_after = [dict(name='div', attrs={'class':'bodytext'})]
remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})]
remove_attributes = ['width','height'] remove_attributes = ['width','height']
feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')] feeds = [(u'Articles', u'http://www.adventuregamers.com/rss/')]
def get_article_url(self, article): def get_article_url(self, article):
return article.get('guid', None) url = BasicNewsRecipe.get_article_url(self, article)
if '/videos/' in url or '/hypeometer/' in url:
return None
return url
def append_page(self, soup, appendtag, position): def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'class':'toolbar_fat_next'}) pager = soup.find('div', attrs={'class':'pagination_big'})
if pager: if pager:
nexturl = self.INDEX + pager.a['href'] nextpage = soup.find('a', attrs={'class':'next-page'})
soup2 = self.index_to_soup(nexturl) if nextpage:
texttag = soup2.find('div', attrs={'class':'bodytext'}) nexturl = nextpage['href']
for it in texttag.findAll(style=True): soup2 = self.index_to_soup(nexturl)
del it['style'] texttag = soup2.find('div', attrs={'class':'bodytext'})
newpos = len(texttag.contents) for it in texttag.findAll(style=True):
self.append_page(soup2,texttag,newpos) del it['style']
texttag.extract() newpos = len(texttag.contents)
appendtag.insert(position,texttag) self.append_page(soup2,texttag,newpos)
texttag.extract()
pager.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -78,7 +81,7 @@ class AdventureGamers(BasicNewsRecipe):
for item in soup.findAll('div', attrs={'class':'floatright'}): for item in soup.findAll('div', attrs={'class':'floatright'}):
item.extract() item.extract()
self.append_page(soup, soup.body, 3) self.append_page(soup, soup.body, 3)
pager = soup.find('div',attrs={'class':'toolbar_fat'}) pager = soup.find('div',attrs={'class':'pagination_big'})
if pager: if pager:
pager.extract() pager.extract()
return self.adeify_images(soup) return self.adeify_images(soup)

View File

@ -1,13 +1,13 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
#from calibre import __appname__ import re
from calibre.utils.magick import Image from calibre import browser
class AdvancedUserRecipe1306097511(BasicNewsRecipe): class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Cosmopolitan UK' title = u'Cosmopolitan UK'
description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK' description = 'Author : D.Asbury : Womens Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
__author__ = 'Dave Asbury' __author__ = 'Dave Asbury'
#last update 21/12/11 #last update 7/7/12 hopefully get current cover from itunes
# greyscale code by Starson # greyscale code by Starson
cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg' cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
no_stylesheets = True no_stylesheets = True
@ -39,14 +39,19 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
feeds = [ feeds = [
(u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')] (u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
def postprocess_html(self, soup, first): def get_cover_url(self):
#process all the images soup = self.index_to_soup('http://itunes.apple.com/gb/app/cosmopolitan-uk/id461363572?mt=8')
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): # look for the block containing the sun button and url
iurl = tag['src'] cov = soup.find(attrs={'alt' : 'iPhone Screenshot 1'})
img = Image() cov2 = str(cov['src'])
img.open(iurl) br = browser()
if img < 0: br.set_handle_redirect(False)
raise RuntimeError('Out of memory') try:
img.type = "GrayscaleType" br.open_novisit(cov2)
img.save(iurl) cover_url = cov2
return soup except:
cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
return cover_url

View File

@ -0,0 +1,51 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1341650280(BasicNewsRecipe):
title = u'Empire Magazine'
description = 'Author D.Asbury. Film articles from Empire Mag. '
__author__ = 'Dave Asbury'
# last updated 7/7/12
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
#oldest_article = 7
max_articles_per_feed = 20
cover_url = 'http://www.empireonline.com/images/magazine/cover.jpg'
conversion_options = {
'linearize_tables' : True,
}
#auto_cleanup = True
preprocess_regexps = [
(re.compile(r'<a href="http://twitter.com/share.*?</a>', re.IGNORECASE | re.DOTALL), lambda match: ''),
(re.compile(r'<head>.*?<!-- CONTENT: START -->', re.IGNORECASE | re.DOTALL), lambda match: '<head></head><!-- CONTENT: START -->'),
(re.compile(r'<!-- LATEST NEWS HEADLINES: START -->.*?<!-- LATEST NEWS HEADLINES: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- LATEST NEWS HEADLINES: START --><!-- LATEST NEWS HEADLINES: END -->'),
(re.compile(r'<!-- RELATED FUTURE FILMS: START -->.*?<!-- RELATED FUTURE FILMS: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- RELATED FUTURE FILMS: START --><!-- RELATED FUTURE FILMS: END -->'),
(re.compile(r'<!-- CURRENT HIGHLIGHTS: START-->.*?<!-- CURRENT HIGHLIGHTS: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- CURRENT HIGHLIGHTS: START--><!-- CURRENT HIGHLIGHTS: END -->'),
(re.compile(r'<!-- RELATED REVIEWS: START -->.*?<!-- RELATED REVIEWS: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- RELATED REVIEWS: START --><!-- RELATED REVIEWS: END -->'),
(re.compile(r'<!-- RELATED INTERVIEWS -->.*?<!-- RELATED REVIEWS: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- RELATED INTERVIEWS --><!-- RELATED REVIEWS: END -->'),
(re.compile(r'<!-- CONTENT: END -->.*?</body>', re.IGNORECASE | re.DOTALL), lambda match: '<!-- CONTENT: END --></body>'),
(re.compile(r'<!-- STORY: END -->.*?</body>', re.IGNORECASE | re.DOTALL), lambda match: '<!-- STORY: END --></body>'),
(re.compile(r'<!-- RATINGS GUIDE: START-->.*?<!-- RATINGS GUIDE: END-->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- RATINGS GUIDE: START--><!-- RATINGS GUIDE: END-->'),
(re.compile(r'<strong>SUBSCRIBE TO EMPIRE</strong>.*?</tbody>', re.IGNORECASE | re.DOTALL), lambda match: '</tbody>'),
(re.compile(r'<!-- USER REVIEWS: START -->.*?<!-- USER REVIEWS: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- USER REVIEWS: START --><!-- USER REVIEWS: END -->'),
(re.compile(r'Advertisement', re.IGNORECASE | re.DOTALL), lambda match: ''),
(re.compile(r'<a name="haveyoursay".*?now to have your say.', re.IGNORECASE | re.DOTALL), lambda match: ''),
]
keep_only_tags = [
# dict(name='h1'),
# dict(attrs={'class' : 'mediumblack'}),
]
remove_tags = [dict(name='td', attrs={'width':'200', 'valign' : 'top'}),
dict(name='b'),
dict(name='a',attrs={'name' : 'haveyoursay'}),
dict(attrs={'class' : 'newslink'}),
]
feeds = [(u'News', u'http://feed43.com/7338478755673147.xml'),
(u'Recent Features',u'http://feed43.com/4346347750304760.xml'),
(u'Interviews',u'http://feed43.com/3418350077724081.xml'),
(u'Film Reviews',u'http://feed43.com/2643703076510627.xml'),
]

61
recipes/nzz_folio.recipe Normal file
View File

@ -0,0 +1,61 @@
__license__ = 'GPL v3'
__copyright__ = '2012 Bernd Leinfelder <skoll1975@gmail.com>'
'''
www.nzzfolio.ch
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class Nzzfolio(BasicNewsRecipe):
title = 'NZZ Folio'
__author__ = 'Bernd Leinfelder'
description = 'Aktuelle Artikel des NZZ Folio'
publisher = 'NZZ AG'
category = 'news, politics, nachrichten, Switzerland'
oldest_article = 35
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'de'
extra_css = """
body{font-family: Georgia,"Times New Roman",Times,serif }
.artikel h3,.artikel h4,.bildLegende,.question,.autor{font-family: Arial,Verdana,Helvetica,sans-serif}
.bildLegende{font-size: small}
.autor{font-size: 0.9375em; color: #666666}
.quote{font-size: large !important;
font-style: italic;
font-weight: normal !important;
border-bottom: 1px dotted #BFBFBF;
border-top: 1px dotted #BFBFBF;
line-height: 1.25em}
.quelle{color: #666666; font-style: italic; white-space: nowrap}
"""
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables' : True
}
remove_attributes=['width','height','lang']
remove_tags_before = dict(id='content')
remove_tags_after = dict(id='content')
remove_tags = [
dict(name=['h2','object','link','base','meta','iframe'])
,dict(id='artikelBar')
,dict(id='foot')
,dict(id='bildLegende')
,dict(name='div',attrs={'class':['box']})
]
feeds = [
(u'NZZ Folio' , u'http://rss.nzzfolio.ch/')
]

View File

@ -364,6 +364,7 @@ class Py2App(object):
'application. Visit http://calibre-ebook.com for details.'), 'application. Visit http://calibre-ebook.com for details.'),
CFBundleIconFile='library.icns', CFBundleIconFile='library.icns',
LSMultipleInstancesProhibited=True, LSMultipleInstancesProhibited=True,
NSHighResolutionCapable=True,
LSEnvironment=env LSEnvironment=env
) )
plistlib.writePlist(pl, join(self.contents_dir, 'Info.plist')) plistlib.writePlist(pl, join(self.contents_dir, 'Info.plist'))

View File

@ -1262,7 +1262,6 @@ class StoreBNStore(StoreBase):
headquarters = 'US' headquarters = 'US'
formats = ['NOOK'] formats = ['NOOK']
affiliate = True
class StoreBeamEBooksDEStore(StoreBase): class StoreBeamEBooksDEStore(StoreBase):
name = 'Beam EBooks DE' name = 'Beam EBooks DE'

View File

@ -10,7 +10,7 @@ import cStringIO
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
HTC_BCDS = [0x100, 0x0222, 0x0226, 0x227, 0x228] HTC_BCDS = [0x100, 0x0222, 0x0226, 0x227, 0x228, 0x229]
class ANDROID(USBMS): class ANDROID(USBMS):

View File

@ -413,7 +413,8 @@ class KINDLE2(KINDLE):
if not opts.extra_customization[self.OPT_APNX]: if not opts.extra_customization[self.OPT_APNX]:
return return
if os.path.splitext(filepath.lower())[1] not in ('.azw', '.mobi', '.prc'): if os.path.splitext(filepath.lower())[1] not in ('.azw', '.mobi',
'.prc', '.azw3'):
return return
# Create the sidecar folder if necessary # Create the sidecar folder if necessary

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import (QLineEdit, QAbstractListModel, Qt, from PyQt4.Qt import (QLineEdit, QAbstractListModel, Qt,
QApplication, QCompleter) QApplication, QCompleter)
from calibre.utils.icu import sort_key, lower from calibre.utils.icu import sort_key
from calibre.gui2 import NONE from calibre.gui2 import NONE
from calibre.gui2.widgets import EnComboBox, LineEditECM from calibre.gui2.widgets import EnComboBox, LineEditECM
from calibre.utils.config_base import tweaks from calibre.utils.config_base import tweaks
@ -24,12 +24,11 @@ class CompleteModel(QAbstractListModel):
def set_items(self, items): def set_items(self, items):
items = [unicode(x.strip()) for x in items] items = [unicode(x.strip()) for x in items]
if len(items) < tweaks['completion_change_to_ascii_sorting']: if len(items) < tweaks['completion_change_to_ascii_sorting']:
self.items = sorted(items, key=lambda x: sort_key(x)) self.items = sorted(items, key=sort_key)
self.sorting = QCompleter.UnsortedModel self.sorting = QCompleter.UnsortedModel
else: else:
self.items = sorted(items, key=lambda x:x.lower()) self.items = sorted(items, key=lambda x:x.lower())
self.sorting = QCompleter.CaseInsensitivelySortedModel self.sorting = QCompleter.CaseInsensitivelySortedModel
self.lowered_items = [lower(x) for x in self.items]
self.reset() self.reset()
def rowCount(self, *args): def rowCount(self, *args):

View File

@ -97,6 +97,9 @@ class MetadataWidget(Widget, Ui_Form):
else: else:
self.cover.setPixmap(QPixmap(I('default_cover.png'))) self.cover.setPixmap(QPixmap(I('default_cover.png')))
self.cover.setToolTip(_('This book has no cover')) self.cover.setToolTip(_('This book has no cover'))
for x in ('author', 'series', 'publisher'):
x = getattr(self, x)
x.lineEdit().deselect()
def set_cover_tooltip(self, pm): def set_cover_tooltip(self, pm):
tt = _('Cover size: %(width)d x %(height)d pixels') % dict( tt = _('Cover size: %(width)d x %(height)d pixels') % dict(

View File

@ -125,7 +125,7 @@ class BooksView(QTableView): # {{{
self.last_modified_delegate = DateDelegate(self, self.last_modified_delegate = DateDelegate(self,
tweak_name='gui_last_modified_display_format') tweak_name='gui_last_modified_display_format')
self.languages_delegate = LanguagesDelegate(self) self.languages_delegate = LanguagesDelegate(self)
self.tags_delegate = CompleteDelegate(self, ',', 'all_tags') self.tags_delegate = CompleteDelegate(self, ',', 'all_tag_names')
self.authors_delegate = CompleteDelegate(self, '&', 'all_author_names', True) self.authors_delegate = CompleteDelegate(self, '&', 'all_author_names', True)
self.cc_names_delegate = CompleteDelegate(self, '&', 'all_custom', True) self.cc_names_delegate = CompleteDelegate(self, '&', 'all_custom', True)
self.series_delegate = TextDelegate(self) self.series_delegate = TextDelegate(self)

View File

@ -1114,7 +1114,7 @@ class TagsEdit(MultiCompleteLineEdit): # {{{
tags = db.tags(id_, index_is_id=True) tags = db.tags(id_, index_is_id=True)
tags = tags.split(',') if tags else [] tags = tags.split(',') if tags else []
self.current_val = tags self.current_val = tags
self.all_items = db.all_tags() self.all_items = db.all_tag_names()
self.original_val = self.current_val self.original_val = self.current_val
@property @property

View File

@ -24,26 +24,12 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
class BNStore(BasicStoreConfig, StorePlugin): class BNStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
pub_id = 'sHa5EXvYOwA' url = "http://bn.com"
# Use Kovid's affiliate id 30% of the time.
if random.randint(1, 10) in (1, 2, 3):
pub_id = '0dsO3kDu/AU'
murl = 'http://click.linksynergy.com/fs-bin/click?id=%s&offerid=239662.13&type=3&subid=0' % pub_id
if detail_item:
purl = 'http://click.linksynergy.com/fs-bin/click?id=%s&subid=&offerid=239662.%s&type=2&subid=0' % (pub_id, detail_item)
url = purl
else:
purl = None
url = murl
#print(url)
if external or self.config.get('open_external', False): if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(url))) open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
else: else:
d = WebStoreDialog(self.gui, murl, parent, purl) d = WebStoreDialog(self.gui, url, parent, detail_item)
d.setWindowTitle(self.name) d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', '')) d.set_tags(self.config.get('tags', ''))
d.exec_() d.exec_()
@ -60,7 +46,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
if counter <= 0: if counter <= 0:
break break
id = ''.join(data.xpath('.//div[contains(@class, "display-tile-item")]/@data-bn-ean')) id = ''.join(data.xpath('.//div[contains(@class, "image-bounding-box")]/a/@href'))
if not id: if not id:
continue continue

View File

@ -11,6 +11,7 @@ import cPickle, os
from PyQt4.Qt import QDialog, QProgressDialog, QString, QTimer from PyQt4.Qt import QDialog, QProgressDialog, QString, QTimer
from calibre.constants import DEBUG
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.gui2 import warning_dialog, question_dialog from calibre.gui2 import warning_dialog, question_dialog
from calibre.gui2.convert.single import NoSupportedInputFormats from calibre.gui2.convert.single import NoSupportedInputFormats

View File

@ -11,7 +11,6 @@ import os
from collections import namedtuple from collections import namedtuple
from calibre import strftime from calibre import strftime
from calibre.constants import DEBUG
from calibre.customize import CatalogPlugin from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.customize.conversion import OptionRecommendation, DummyReporter
@ -277,6 +276,7 @@ class EPUB_MOBI(CatalogPlugin):
log.error("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width,self.THUMB_SMALLEST)) log.error("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width,self.THUMB_SMALLEST))
opts.thumb_width = "1.0" opts.thumb_width = "1.0"
# Display opts # Display opts
keys = opts_dict.keys() keys = opts_dict.keys()
keys.sort() keys.sort()
@ -315,11 +315,10 @@ class EPUB_MOBI(CatalogPlugin):
recommendations = [] recommendations = []
recommendations.append(('remove_fake_margins', False, recommendations.append(('remove_fake_margins', False,
OptionRecommendation.HIGH)) OptionRecommendation.HIGH))
if DEBUG: recommendations.append(('comments', '', OptionRecommendation.HIGH))
recommendations.append(('comments', '\n'.join(line for line in build_log),
OptionRecommendation.HIGH)) # Use to debug generated catalog code before conversion
else: #setattr(opts,'debug_pipeline',os.path.expanduser("~/Desktop/Catalog debug"))
recommendations.append(('comments', '', OptionRecommendation.HIGH))
dp = getattr(opts, 'debug_pipeline', None) dp = getattr(opts, 'debug_pipeline', None)
if dp is not None: if dp is not None:

View File

@ -3,7 +3,7 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Greg Riker' __copyright__ = '2010, Greg Riker'
import datetime, htmlentitydefs, os, re, shutil, zlib import datetime, htmlentitydefs, os, re, shutil, unicodedata, zlib
from copy import deepcopy from copy import deepcopy
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
@ -15,12 +15,11 @@ from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.config import config_dir from calibre.utils.config import config_dir
from calibre.utils.date import format_date, is_date_undefined, now as nowf from calibre.utils.date import format_date, is_date_undefined, now as nowf
from calibre.utils.filenames import ascii_text from calibre.utils.filenames import ascii_text
from calibre.utils.icu import capitalize from calibre.utils.icu import capitalize, sort_key
from calibre.utils.magick.draw import thumbnail from calibre.utils.magick.draw import thumbnail
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
class CatalogBuilder(object): class CatalogBuilder(object):
''' '''
Generates catalog source files from calibre database Generates catalog source files from calibre database
@ -42,6 +41,9 @@ class CatalogBuilder(object):
# [] = No date ranges added # [] = No date ranges added
DATE_RANGE=[30] DATE_RANGE=[30]
# Text used in generated catalog for title section with other-than-ASCII leading letter
SYMBOLS = _('Symbols')
# basename output file basename # basename output file basename
# creator dc:creator in OPF metadata # creator dc:creator in OPF metadata
# descriptionClip limits size of NCX descriptions (Kindle only) # descriptionClip limits size of NCX descriptions (Kindle only)
@ -565,10 +567,9 @@ class CatalogBuilder(object):
self.updateProgressFullStep("Sorting database") self.updateProgressFullStep("Sorting database")
self.booksByAuthor = list(self.booksByTitle) self.booksByAuthor = list(self.booksByTitle)
# Test for author_sort mismatches
self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author) self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author)
# Build the unique_authors set from existing data
# Build the unique_authors set from existing data, test for author_sort mismatches
authors = [(record['author'], record['author_sort']) for record in self.booksByAuthor] authors = [(record['author'], record['author_sort']) for record in self.booksByAuthor]
current_author = authors[0] current_author = authors[0]
for (i,author) in enumerate(authors): for (i,author) in enumerate(authors):
@ -603,7 +604,8 @@ Author '{0}':
current_author = author current_author = author
self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author_sort) self.booksByAuthor = sorted(self.booksByAuthor,
key=lambda x: sort_key(self.booksByAuthorSorter_author_sort(x)))
# Build the unique_authors set from existing data # Build the unique_authors set from existing data
authors = [(record['author'], capitalize(record['author_sort'])) for record in self.booksByAuthor] authors = [(record['author'], capitalize(record['author_sort'])) for record in self.booksByAuthor]
@ -690,7 +692,7 @@ Author '{0}':
this_title['series'] = None this_title['series'] = None
this_title['series_index'] = 0.0 this_title['series_index'] = 0.0
this_title['title_sort'] = self.generateSortTitle(ascii_text(this_title['title'])) this_title['title_sort'] = self.generateSortTitle(this_title['title'])
if 'authors' in record: if 'authors' in record:
# from calibre.ebooks.metadata import authors_to_string # from calibre.ebooks.metadata import authors_to_string
# return authors_to_string(self.authors) # return authors_to_string(self.authors)
@ -705,7 +707,6 @@ Author '{0}':
this_title['author_sort'] = record['author_sort'] this_title['author_sort'] = record['author_sort']
else: else:
this_title['author_sort'] = self.author_to_author_sort(this_title['author']) this_title['author_sort'] = self.author_to_author_sort(this_title['author'])
this_title['author_sort'] = ascii_text(this_title['author_sort'])
if record['publisher']: if record['publisher']:
this_title['publisher'] = re.sub('&', '&amp;', record['publisher']) this_title['publisher'] = re.sub('&', '&amp;', record['publisher'])
@ -780,8 +781,11 @@ Author '{0}':
# Re-sort based on title_sort # Re-sort based on title_sort
if len(titles): if len(titles):
self.booksByTitle = sorted(titles, #self.booksByTitle = sorted(titles,
key=lambda x:(x['title_sort'].upper(), x['title_sort'].upper())) # key=lambda x:(x['title_sort'].upper(), x['title_sort'].upper()))
self.booksByTitle = sorted(titles, key=lambda x: sort_key(x['title_sort'].upper()))
if False and self.verbose: if False and self.verbose:
self.opts.log.info("fetchBooksByTitle(): %d books" % len(self.booksByTitle)) self.opts.log.info("fetchBooksByTitle(): %d books" % len(self.booksByTitle))
self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort')) self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort'))
@ -923,29 +927,24 @@ Author '{0}':
body = soup.find('body') body = soup.find('body')
btc = 0 btc = 0
# Insert section tag pTag = Tag(soup, "p")
pTag['class'] = 'title'
ptc = 0
aTag = Tag(soup,'a') aTag = Tag(soup,'a')
aTag['name'] = 'section_start' aTag['id'] = 'section_start'
body.insert(btc, aTag) pTag.insert(ptc, aTag)
btc += 1 ptc += 1
# Insert the anchor
aTag = Tag(soup, "a")
aTag['name'] = "bytitle"
body.insert(btc, aTag)
btc += 1
if not self.__generateForKindle: if not self.__generateForKindle:
# We don't need this because the Kindle shows section titles # Kindle don't need this because it shows section titles in Periodical format
#<h2><a name="byalphatitle" id="byalphatitle"></a>By Title</h2>
pTag = Tag(soup, "p")
pTag['class'] = 'title'
aTag = Tag(soup, "a") aTag = Tag(soup, "a")
aTag['name'] = "bytitle" aTag['id'] = "bytitle"
pTag.insert(0,aTag) pTag.insert(ptc,aTag)
pTag.insert(1,NavigableString('Titles')) ptc += 1
body.insert(btc,pTag) pTag.insert(ptc,NavigableString('Titles'))
btc += 1
body.insert(btc,pTag)
btc += 1
divTag = Tag(soup, "div") divTag = Tag(soup, "div")
dtc = 0 dtc = 0
@ -955,7 +954,7 @@ Author '{0}':
# Incoming title <series> <series_index>: <title> # Incoming title <series> <series_index>: <title>
if not self.useSeriesPrefixInTitlesSection: if not self.useSeriesPrefixInTitlesSection:
nspt = deepcopy(self.booksByTitle) nspt = deepcopy(self.booksByTitle)
nspt = sorted(nspt, key=lambda x:(x['title_sort'].upper(), x['title_sort'].upper())) nspt = sorted(nspt, key=lambda x: sort_key(x['title_sort'].upper()))
self.booksByTitle_noSeriesPrefix = nspt self.booksByTitle_noSeriesPrefix = nspt
# Loop through the books by title # Loop through the books by title
@ -977,11 +976,14 @@ Author '{0}':
if dtc > 0: if dtc > 0:
divRunningTag['class'] = "initial_letter" divRunningTag['class'] = "initial_letter"
drtc = 0 drtc = 0
current_letter = self.letter_or_symbol(book['title_sort'][0])
pIndexTag = Tag(soup, "p") pIndexTag = Tag(soup, "p")
pIndexTag['class'] = "author_title_letter_index" pIndexTag['class'] = "author_title_letter_index"
aTag = Tag(soup, "a") aTag = Tag(soup, "a")
aTag['name'] = "%s" % self.letter_or_symbol(book['title_sort'][0]) current_letter = self.letter_or_symbol(book['title_sort'][0])
if current_letter == self.SYMBOLS:
aTag['id'] = self.SYMBOLS
else:
aTag['id'] = "%s" % self.generateUnicodeName(current_letter)
pIndexTag.insert(0,aTag) pIndexTag.insert(0,aTag)
pIndexTag.insert(1,NavigableString(self.letter_or_symbol(book['title_sort'][0]))) pIndexTag.insert(1,NavigableString(self.letter_or_symbol(book['title_sort'][0])))
divRunningTag.insert(dtc,pIndexTag) divRunningTag.insert(dtc,pIndexTag)
@ -1074,19 +1076,6 @@ Author '{0}':
btc = 0 btc = 0
# Insert section tag
aTag = Tag(soup,'a')
aTag['name'] = 'section_start'
body.insert(btc, aTag)
btc += 1
# Insert the anchor
aTag = Tag(soup, "a")
anchor_name = friendly_name.lower()
aTag['name'] = anchor_name.replace(" ","")
body.insert(btc, aTag)
btc += 1
divTag = Tag(soup, "div") divTag = Tag(soup, "div")
dtc = 0 dtc = 0
divOpeningTag = None divOpeningTag = None
@ -1117,7 +1106,6 @@ Author '{0}':
drtc = 0 drtc = 0
divRunningTag = None divRunningTag = None
current_letter = self.letter_or_symbol(book['author_sort'][0].upper())
author_count = 0 author_count = 0
divOpeningTag = Tag(soup, 'div') divOpeningTag = Tag(soup, 'div')
if dtc > 0: if dtc > 0:
@ -1126,7 +1114,11 @@ Author '{0}':
pIndexTag = Tag(soup, "p") pIndexTag = Tag(soup, "p")
pIndexTag['class'] = "author_title_letter_index" pIndexTag['class'] = "author_title_letter_index"
aTag = Tag(soup, "a") aTag = Tag(soup, "a")
aTag['name'] = "%sauthors" % self.letter_or_symbol(current_letter) current_letter = self.letter_or_symbol(book['author_sort'][0].upper())
if current_letter == self.SYMBOLS:
aTag['id'] = self.SYMBOLS
else:
aTag['id'] = "%s_authors" % self.generateUnicodeName(current_letter)
pIndexTag.insert(0,aTag) pIndexTag.insert(0,aTag)
pIndexTag.insert(1,NavigableString(self.letter_or_symbol(book['author_sort'][0].upper()))) pIndexTag.insert(1,NavigableString(self.letter_or_symbol(book['author_sort'][0].upper())))
divOpeningTag.insert(dotc,pIndexTag) divOpeningTag.insert(dotc,pIndexTag)
@ -1158,7 +1150,7 @@ Author '{0}':
pAuthorTag = Tag(soup, "p") pAuthorTag = Tag(soup, "p")
pAuthorTag['class'] = "author_index" pAuthorTag['class'] = "author_index"
aTag = Tag(soup, "a") aTag = Tag(soup, "a")
aTag['name'] = "%s" % self.generateAuthorAnchor(current_author) aTag['id'] = "%s" % self.generateAuthorAnchor(current_author)
aTag.insert(0,NavigableString(current_author)) aTag.insert(0,NavigableString(current_author))
pAuthorTag.insert(0,aTag) pAuthorTag.insert(0,aTag)
if author_count == 1: if author_count == 1:
@ -1247,19 +1239,25 @@ Author '{0}':
# Loop ends here # Loop ends here
pTag = Tag(soup, "p")
pTag['class'] = 'title'
ptc = 0
aTag = Tag(soup,'a')
aTag['id'] = 'section_start'
pTag.insert(ptc, aTag)
ptc += 1
if not self.__generateForKindle: if not self.__generateForKindle:
# Insert the <h2> tag with book_count at the head # Kindle don't need this because it shows section titles in Periodical format
#<h2><a name="byalphaauthor" id="byalphaauthor"></a>By Author</h2>
pTag = Tag(soup, "p")
pTag['class'] = 'title'
aTag = Tag(soup, "a") aTag = Tag(soup, "a")
anchor_name = friendly_name.lower() anchor_name = friendly_name.lower()
aTag['name'] = anchor_name.replace(" ","") aTag['id'] = anchor_name.replace(" ","")
pTag.insert(0,aTag) pTag.insert(ptc,aTag)
#h2Tag.insert(1,NavigableString('%s (%d)' % (friendly_name, book_count))) ptc += 1
pTag.insert(1,NavigableString('%s' % (friendly_name))) pTag.insert(ptc,NavigableString('%s' % (friendly_name)))
body.insert(btc,pTag)
btc += 1 body.insert(btc,pTag)
btc += 1
if author_count == 1: if author_count == 1:
divTag.insert(dtc, divOpeningTag) divTag.insert(dtc, divOpeningTag)
@ -1294,7 +1292,7 @@ Author '{0}':
pIndexTag = Tag(soup, "p") pIndexTag = Tag(soup, "p")
pIndexTag['class'] = "date_index" pIndexTag['class'] = "date_index"
aTag = Tag(soup, "a") aTag = Tag(soup, "a")
aTag['name'] = "bda_%s-%s" % (current_date.year, current_date.month) aTag['id'] = "bda_%s-%s" % (current_date.year, current_date.month)
pIndexTag.insert(0,aTag) pIndexTag.insert(0,aTag)
pIndexTag.insert(1,NavigableString(date_string)) pIndexTag.insert(1,NavigableString(date_string))
divTag.insert(dtc,pIndexTag) divTag.insert(dtc,pIndexTag)
@ -1312,7 +1310,7 @@ Author '{0}':
pAuthorTag['class'] = "author_index" pAuthorTag['class'] = "author_index"
aTag = Tag(soup, "a") aTag = Tag(soup, "a")
if self.opts.generate_authors: if self.opts.generate_authors:
aTag['name'] = "%s" % self.generateAuthorAnchor(current_author) aTag['id'] = "%s" % self.generateAuthorAnchor(current_author)
aTag.insert(0,NavigableString(current_author)) aTag.insert(0,NavigableString(current_author))
pAuthorTag.insert(0,aTag) pAuthorTag.insert(0,aTag)
divTag.insert(dtc,pAuthorTag) divTag.insert(dtc,pAuthorTag)
@ -1386,7 +1384,7 @@ Author '{0}':
pIndexTag = Tag(soup, "p") pIndexTag = Tag(soup, "p")
pIndexTag['class'] = "date_index" pIndexTag['class'] = "date_index"
aTag = Tag(soup, "a") aTag = Tag(soup, "a")
aTag['name'] = "bda_%s" % date_range.replace(' ','') aTag['id'] = "bda_%s" % date_range.replace(' ','')
pIndexTag.insert(0,aTag) pIndexTag.insert(0,aTag)
pIndexTag.insert(1,NavigableString(date_range)) pIndexTag.insert(1,NavigableString(date_range))
divTag.insert(dtc,pIndexTag) divTag.insert(dtc,pIndexTag)
@ -1457,30 +1455,27 @@ Author '{0}':
btc = 0 btc = 0
# Insert section tag pTag = Tag(soup, "p")
aTag = Tag(soup,'a') pTag['class'] = 'title'
aTag['name'] = 'section_start' ptc = 0
body.insert(btc, aTag)
btc += 1
# Insert the anchor aTag = Tag(soup,'a')
aTag = Tag(soup, "a") aTag['id'] = 'section_start'
anchor_name = friendly_name.lower() pTag.insert(ptc, aTag)
aTag['name'] = anchor_name.replace(" ","") ptc += 1
body.insert(btc, aTag)
btc += 1
if not self.__generateForKindle: if not self.__generateForKindle:
#<h2><a name="byalphaauthor" id="byalphaauthor"></a>By Author</h2> # Kindle don't need this because it shows section titles in Periodical format
pTag = Tag(soup, "p")
pTag['class'] = 'title'
aTag = Tag(soup, "a") aTag = Tag(soup, "a")
anchor_name = friendly_name.lower() anchor_name = friendly_name.lower()
aTag['name'] = anchor_name.replace(" ","") aTag['id'] = anchor_name.replace(" ","")
pTag.insert(0,aTag)
pTag.insert(1,NavigableString('%s' % friendly_name)) pTag.insert(ptc,aTag)
body.insert(btc,pTag) ptc += 1
btc += 1 pTag.insert(ptc, NavigableString('%s' % friendly_name))
body.insert(btc,pTag)
btc += 1
divTag = Tag(soup, "div") divTag = Tag(soup, "div")
dtc = 0 dtc = 0
@ -1895,11 +1890,10 @@ Author '{0}':
self.updateProgressFullStep("'Genres'") self.updateProgressFullStep("'Genres'")
self.genre_tags_dict = self.filterDbTags(self.db.all_tags()) self.genre_tags_dict = self.filterDbTags(self.db.all_tags())
# Extract books matching filtered_tags # Extract books matching filtered_tags
genre_list = [] genre_list = []
for friendly_tag in sorted(self.genre_tags_dict): for friendly_tag in sorted(self.genre_tags_dict, key=sort_key):
#print "\ngenerateHTMLByTags(): looking for books with friendly_tag '%s'" % friendly_tag #print("\ngenerateHTMLByTags(): looking for books with friendly_tag '%s'" % friendly_tag)
# tag_list => { normalized_genre_tag : [{book},{},{}], # tag_list => { normalized_genre_tag : [{book},{},{}],
# normalized_genre_tag : [{book},{},{}] } # normalized_genre_tag : [{book},{},{}] }
@ -2268,7 +2262,7 @@ Author '{0}':
navPointTag.insert(1, contentTag) navPointTag.insert(1, contentTag)
cmiTag = Tag(soup, '%s' % 'calibre:meta-img') cmiTag = Tag(soup, '%s' % 'calibre:meta-img')
cmiTag['name'] = "mastheadImage" cmiTag['id'] = "mastheadImage"
cmiTag['src'] = "images/mastheadImage.gif" cmiTag['src'] = "images/mastheadImage.gif"
navPointTag.insert(2,cmiTag) navPointTag.insert(2,cmiTag)
navMapTag.insert(0,navPointTag) navMapTag.insert(0,navPointTag)
@ -2552,7 +2546,10 @@ Author '{0}':
navLabelTag.insert(0, textTag) navLabelTag.insert(0, textTag)
navPointByLetterTag.insert(0,navLabelTag) navPointByLetterTag.insert(0,navLabelTag)
contentTag = Tag(soup, 'content') contentTag = Tag(soup, 'content')
contentTag['src'] = "content/%s.html#%s" % (output, title_letters[i]) if title_letters[i] == self.SYMBOLS:
contentTag['src'] = "content/%s.html#%s" % (output, title_letters[i])
else:
contentTag['src'] = "content/%s.html#%s" % (output, self.generateUnicodeName(title_letters[i]))
navPointByLetterTag.insert(1,contentTag) navPointByLetterTag.insert(1,contentTag)
if self.generateForKindle: if self.generateForKindle:
@ -2640,7 +2637,7 @@ Author '{0}':
navLabelTag.insert(0, textTag) navLabelTag.insert(0, textTag)
navPointByLetterTag.insert(0,navLabelTag) navPointByLetterTag.insert(0,navLabelTag)
contentTag = Tag(soup, 'content') contentTag = Tag(soup, 'content')
contentTag['src'] = "%s#%sauthors" % (HTML_file, authors_by_letter[1]) contentTag['src'] = "%s#%s_authors" % (HTML_file, self.generateUnicodeName(authors_by_letter[1]))
navPointByLetterTag.insert(1,contentTag) navPointByLetterTag.insert(1,contentTag)
@ -3213,7 +3210,7 @@ Author '{0}':
ans = '%s%d %s:\n' % (' ' * indent, len(tags), header) ans = '%s%d %s:\n' % (' ' * indent, len(tags), header)
ans += ' ' * (indent + 1) ans += ' ' * (indent + 1)
out_str = '' out_str = ''
sorted_tags = sorted(tags) sorted_tags = sorted(tags, key=sort_key)
for tag in next_tag(sorted_tags): for tag in next_tag(sorted_tags):
out_str += tag out_str += tag
if len(out_str) >= line_break: if len(out_str) >= line_break:
@ -3234,7 +3231,7 @@ Author '{0}':
if tag == ' ': if tag == ' ':
continue continue
normalized_tags.append(re.sub('\W','',tag).lower()) normalized_tags.append(re.sub('\W','',ascii_text(tag)).lower())
friendly_tags.append(tag) friendly_tags.append(tag)
genre_tags_dict = dict(zip(friendly_tags,normalized_tags)) genre_tags_dict = dict(zip(friendly_tags,normalized_tags))
@ -3293,18 +3290,24 @@ Author '{0}':
body = soup.find('body') body = soup.find('body')
btc = 0 btc = 0
divTag = Tag(soup, 'div')
dtc = 0
# Insert section tag if this is the section start - first article only # Insert section tag if this is the section start - first article only
if section_head: if section_head:
aTag = Tag(soup,'a') aTag = Tag(soup,'a')
aTag['name'] = 'section_start' aTag['id'] = 'section_start'
body.insert(btc, aTag) divTag.insert(dtc, aTag)
btc += 1 dtc += 1
#body.insert(btc, aTag)
#btc += 1
# Create an anchor from the tag # Create an anchor from the tag
aTag = Tag(soup, 'a') aTag = Tag(soup, 'a')
aTag['name'] = "Genre_%s" % genre aTag['id'] = "Genre_%s" % genre
body.insert(btc,aTag) divTag.insert(dtc, aTag)
body.insert(btc,divTag)
btc += 1 btc += 1
titleTag = body.find(attrs={'class':'title'}) titleTag = body.find(attrs={'class':'title'})
@ -3477,7 +3480,7 @@ Author '{0}':
for (i, tag) in enumerate(sorted(book.get('tags', []))): for (i, tag) in enumerate(sorted(book.get('tags', []))):
aTag = Tag(_soup,'a') aTag = Tag(_soup,'a')
if self.opts.generate_genres: if self.opts.generate_genres:
aTag['href'] = "Genre_%s.html" % re.sub("\W","",tag.lower()) aTag['href'] = "Genre_%s.html" % re.sub("\W","",ascii_text(tag).lower())
aTag.insert(0,escape(NavigableString(tag))) aTag.insert(0,escape(NavigableString(tag)))
genresTag.insert(gtc, aTag) genresTag.insert(gtc, aTag)
gtc += 1 gtc += 1
@ -3544,8 +3547,10 @@ Author '{0}':
btc = 0 btc = 0
# Insert the title anchor for inbound links # Insert the title anchor for inbound links
aTag = Tag(soup, "a") aTag = Tag(soup, "a")
aTag['name'] = "book%d" % int(book['id']) aTag['id'] = "book%d" % int(book['id'])
body.insert(btc, aTag) divTag = Tag(soup, 'div')
divTag.insert(0, aTag)
body.insert(btc, divTag)
btc += 1 btc += 1
# Insert the link to the series or remove <a class="series"> # Insert the link to the series or remove <a class="series">
@ -3770,7 +3775,7 @@ Author '{0}':
else: else:
word = '%10.0f' % (float(word)) word = '%10.0f' % (float(word))
translated.append(word) translated.append(word)
return ascii_text(' '.join(translated)) return ' '.join(translated)
def generateThumbnail(self, title, image_dir, thumb_file): def generateThumbnail(self, title, image_dir, thumb_file):
''' '''
@ -3824,6 +3829,14 @@ Author '{0}':
with zf: with zf:
zf.writestr(title['uuid']+cover_crc, thumb_data) zf.writestr(title['uuid']+cover_crc, thumb_data)
def generateUnicodeName(self, c):
'''
Generate an anchor name string
'''
fullname = unicodedata.name(unicode(c))
terms = fullname.split()
return "_".join(terms)
def getFriendlyGenreTag(self, genre): def getFriendlyGenreTag(self, genre):
# Find the first instance of friendly_tag matching genre # Find the first instance of friendly_tag matching genre
for friendly_tag in self.genre_tags_dict: for friendly_tag in self.genre_tags_dict:
@ -3837,8 +3850,8 @@ Author '{0}':
return markerTags return markerTags
def letter_or_symbol(self,char): def letter_or_symbol(self,char):
if not re.search('[a-zA-Z]',char): if not re.search('[a-zA-Z]', ascii_text(char)):
return 'Symbols' return self.SYMBOLS
else: else:
return char return char

View File

@ -3737,4 +3737,42 @@ books_series_link feeds
'SELECT {0}, count(*) FROM books_{1}_link GROUP BY {0}'.format( 'SELECT {0}, count(*) FROM books_{1}_link GROUP BY {0}'.format(
fm['link_column'], fm['table'])) fm['link_column'], fm['table']))
def all_author_names(self):
ai = self.FIELD_MAP['authors']
ans = set()
for rec in self.data.iterall():
auts = rec[ai]
if auts:
for x in auts.split(','):
ans.add(x.replace('|', ','))
return ans
def all_tag_names(self):
ai = self.FIELD_MAP['tags']
ans = set()
for rec in self.data.iterall():
auts = rec[ai]
if auts:
for x in auts.split(','):
ans.add(x)
return ans
def all_publisher_names(self):
ai = self.FIELD_MAP['publisher']
ans = set()
for rec in self.data.iterall():
auts = rec[ai]
if auts:
ans.add(auts)
return ans
def all_series_names(self):
ai = self.FIELD_MAP['series']
ans = set()
for rec in self.data.iterall():
auts = rec[ai]
if auts:
ans.add(auts)
return ans

View File

@ -427,7 +427,9 @@ def _prefs():
'accented versions, based on the language you have chosen ' 'accented versions, based on the language you have chosen '
'for the calibre interface. For example, in ' 'for the calibre interface. For example, in '
u' English, searching for n will match ñ and n, but if ' u' English, searching for n will match ñ and n, but if '
'your language is Spanish it will only match n.')) 'your language is Spanish it will only match n. Note that '
'this is much slower than a simple search on very large '
'libraries.'))
c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.') c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.')
return c return c

View File

@ -272,6 +272,44 @@ icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs)
return Py_BuildValue("O", ans); return Py_BuildValue("O", ans);
} // }}} } // }}}
// Collator.startswith {{{
static PyObject *
icu_Collator_startswith(icu_Collator *self, PyObject *args, PyObject *kwargs) {
PyObject *a_, *b_;
size_t asz, bsz;
int32_t actual_a, actual_b;
UChar *a, *b;
wchar_t *aw, *bw;
UErrorCode status = U_ZERO_ERROR;
int ans = 0;
if (!PyArg_ParseTuple(args, "UU", &a_, &b_)) return NULL;
asz = PyUnicode_GetSize(a_); bsz = PyUnicode_GetSize(b_);
if (asz < bsz) Py_RETURN_FALSE;
if (bsz == 0) Py_RETURN_TRUE;
a = (UChar*)calloc(asz*4 + 2, sizeof(UChar));
b = (UChar*)calloc(bsz*4 + 2, sizeof(UChar));
aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t));
bw = (wchar_t*)calloc(bsz*4 + 2, sizeof(wchar_t));
if (a == NULL || b == NULL || aw == NULL || bw == NULL) return PyErr_NoMemory();
actual_a = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1);
actual_b = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)b_, bw, bsz*4+1);
if (actual_a > -1 && actual_b > -1) {
u_strFromWCS(a, asz*4 + 1, &actual_a, aw, -1, &status);
u_strFromWCS(b, bsz*4 + 1, &actual_b, bw, -1, &status);
if (U_SUCCESS(status) && ucol_equal(self->collator, a, actual_b, b, actual_b))
ans = 1;
}
free(a); free(b); free(aw); free(bw);
if (ans) Py_RETURN_TRUE;
Py_RETURN_FALSE;
} // }}}
static PyObject* static PyObject*
icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs); icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs);
@ -296,6 +334,10 @@ static PyMethodDef icu_Collator_methods[] = {
"clone() -> returns a clone of this collator." "clone() -> returns a clone of this collator."
}, },
{"startswith", (PyCFunction)icu_Collator_startswith, METH_VARARGS,
"startswith(a, b) -> returns True iff a startswith b, following the current collation rules."
},
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };

View File

@ -12,7 +12,7 @@ from functools import partial
from calibre.constants import plugins from calibre.constants import plugins
from calibre.utils.config_base import tweaks from calibre.utils.config_base import tweaks
_icu = _collator = _primary_collator = None _icu = _collator = _primary_collator = _secondary_collator = None
_locale = None _locale = None
_none = u'' _none = u''
@ -55,6 +55,13 @@ def primary_collator():
_primary_collator.strength = _icu.UCOL_PRIMARY _primary_collator.strength = _icu.UCOL_PRIMARY
return _primary_collator return _primary_collator
def secondary_collator():
global _secondary_collator
if _secondary_collator is None:
_secondary_collator = _collator.clone()
_secondary_collator.strength = _icu.UCOL_SECONDARY
return _secondary_collator
def py_sort_key(obj): def py_sort_key(obj):
if not obj: if not obj:
return _none return _none
@ -63,7 +70,10 @@ def py_sort_key(obj):
def icu_sort_key(collator, obj): def icu_sort_key(collator, obj):
if not obj: if not obj:
return _none2 return _none2
return collator.sort_key(lower(obj)) try:
return _secondary_collator.sort_key(obj)
except AttributeError:
return secondary_collator().sort_key(obj)
def py_find(pattern, source): def py_find(pattern, source):
pos = source.find(pattern) pos = source.find(pattern)
@ -77,6 +87,12 @@ def icu_find(collator, pattern, source):
except TypeError: except TypeError:
return collator.find(unicode(pattern), unicode(source)) return collator.find(unicode(pattern), unicode(source))
def icu_startswith(collator, a, b):
try:
return collator.startswith(a, b)
except TypeError:
return collator.startswith(unicode(a), unicode(b))
def py_case_sensitive_sort_key(obj): def py_case_sensitive_sort_key(obj):
if not obj: if not obj:
return _none return _none
@ -129,7 +145,7 @@ sort_key = py_sort_key if _icu_not_ok else partial(icu_sort_key, _collator)
strcmp = py_strcmp if _icu_not_ok else partial(icu_strcmp, _collator) strcmp = py_strcmp if _icu_not_ok else partial(icu_strcmp, _collator)
case_sensitive_sort_key = py_case_sensitive_sort_key if _icu_not_ok else \ case_sensitive_sort_key = py_case_sensitive_sort_key if _icu_not_ok else \
icu_case_sensitive_sort_key partial(icu_case_sensitive_sort_key, _collator)
case_sensitive_strcmp = cmp if _icu_not_ok else icu_case_sensitive_strcmp case_sensitive_strcmp = cmp if _icu_not_ok else icu_case_sensitive_strcmp
@ -155,14 +171,39 @@ def primary_strcmp(a, b):
if _icu_not_ok: if _icu_not_ok:
from calibre.utils.filenames import ascii_text from calibre.utils.filenames import ascii_text
return py_strcmp(ascii_text(a), ascii_text(b)) return py_strcmp(ascii_text(a), ascii_text(b))
return primary_collator().strcmp(a, b) try:
return _primary_collator.strcmp(a, b)
except AttributeError:
return primary_collator().strcmp(a, b)
def primary_find(pat, src): def primary_find(pat, src):
'find that ignores case and accents on letters' 'find that ignores case and accents on letters'
if _icu_not_ok: if _icu_not_ok:
from calibre.utils.filenames import ascii_text from calibre.utils.filenames import ascii_text
return py_find(ascii_text(pat), ascii_text(src)) return py_find(ascii_text(pat), ascii_text(src))
return icu_find(primary_collator(), pat, src) try:
return icu_find(_primary_collator, pat, src)
except AttributeError:
return icu_find(primary_collator(), pat, src)
def primary_sort_key(val):
'A sort key that ignores case and diacritics'
if _icu_not_ok:
from calibre.utils.filenames import ascii_text
return ascii_text(val).lower()
try:
return _primary_collator.sort_key(val)
except AttributeError:
return primary_collator().sort_key(val)
def primary_startswith(a, b):
if _icu_not_ok:
from calibre.utils.filenames import ascii_text
return ascii_text(a).lower().startswith(ascii_text(b).lower())
try:
return icu_startswith(_primary_collator, a, b)
except AttributeError:
return icu_startswith(primary_collator(), a, b)
################################################################################ ################################################################################
@ -283,8 +324,8 @@ pêché'''
print print
print '\nTesting primary collation' print '\nTesting primary collation'
for k, v in {u'pèché': u'peche', u'flüße':u'flusse', for k, v in {u'pèché': u'peche', u'flüße':u'Flusse',
u'Štepánek':u'Štepanek'}.iteritems(): u'Štepánek':u'ŠtepaneK'}.iteritems():
if primary_strcmp(k, v) != 0: if primary_strcmp(k, v) != 0:
prints('primary_strcmp() failed with %s != %s'%(k, v)) prints('primary_strcmp() failed with %s != %s'%(k, v))
return return
@ -293,10 +334,12 @@ pêché'''
return return
global _primary_collator global _primary_collator
orig = _primary_collator
_primary_collator = _icu.Collator('es') _primary_collator = _icu.Collator('es')
if primary_strcmp(u'peña', u'pena') == 0: if primary_strcmp(u'peña', u'pena') == 0:
print 'Primary collation in Spanish locale failed' print 'Primary collation in Spanish locale failed'
return return
_primary_collator = orig
print '\nTesting contractions' print '\nTesting contractions'
c = _icu.Collator('cs') c = _icu.Collator('cs')
@ -306,6 +349,13 @@ pêché'''
print 'Contractions for the Czech language failed' print 'Contractions for the Czech language failed'
return return
print '\nTesting startswith'
p = primary_startswith
if (not p('asd', 'asd') or not p('asd', 'A') or
not p('x', '')):
print 'startswith() failed'
return
# }}} # }}}
if __name__ == '__main__': if __name__ == '__main__':