Modifications of BIBTEX catalog generation: create a class for bibtex fonctions, use the new Mreplace fonction as the dictionnary is very large.

Divide by 10 the total execution time.
This commit is contained in:
Sengian 2010-07-28 00:49:37 +02:00
parent 3cf9f7986a
commit 7ebf416513
2 changed files with 85 additions and 81 deletions

View File

@ -270,10 +270,10 @@ class BIBTEX(CatalogPlugin):
from calibre.library.save_to_disk import preprocess_template from calibre.library.save_to_disk import preprocess_template
#Bibtex functions #Bibtex functions
from calibre.utils.bibtex import bibtex_author_format, utf8ToBibtex, ValidateCitationKey from calibre.utils.bibtex import BibTeX
def create_bibtex_entry(entry, fields, mode, template_citation, def create_bibtex_entry(entry, fields, mode, template_citation,
asccii_bibtex = True, citation_bibtex = True): bibtexdict, citation_bibtex = True):
#Bibtex doesn't like UTF-8 but keep unicode until writing #Bibtex doesn't like UTF-8 but keep unicode until writing
#Define starting chain or if book valid strict and not book return a Fail string #Define starting chain or if book valid strict and not book return a Fail string
@ -289,7 +289,8 @@ class BIBTEX(CatalogPlugin):
if citation_bibtex : if citation_bibtex :
# Citation tag # Citation tag
bibtex_entry.append(make_bibtex_citation(entry, template_citation, asccii_bibtex)) bibtex_entry.append(make_bibtex_citation(entry, template_citation,
bibtexdict))
bibtex_entry = [u' '.join(bibtex_entry)] bibtex_entry = [u' '.join(bibtex_entry)]
for field in fields: for field in fields:
@ -304,11 +305,11 @@ class BIBTEX(CatalogPlugin):
pass pass
if field == 'authors' : if field == 'authors' :
bibtex_entry.append(u'author = "%s"' % bibtex_author_format(item)) bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
elif field in ['title', 'publisher', 'cover', 'uuid', elif field in ['title', 'publisher', 'cover', 'uuid',
'author_sort', 'series'] : 'author_sort', 'series'] :
bibtex_entry.append(u'%s = "%s"' % (field, utf8ToBibtex(item, asccii_bibtex))) bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
elif field == 'id' : elif field == 'id' :
bibtex_entry.append(u'calibreid = "%s"' % int(item)) bibtex_entry.append(u'calibreid = "%s"' % int(item))
@ -321,13 +322,13 @@ class BIBTEX(CatalogPlugin):
elif field == 'tags' : elif field == 'tags' :
#A list to flatten #A list to flatten
bibtex_entry.append(u'tags = "%s"' % utf8ToBibtex(u', '.join(item), asccii_bibtex)) bibtex_entry.append(u'tags = "%s"' % bibtexdict.utf8ToBibtex(u', '.join(item)))
elif field == 'comments' : elif field == 'comments' :
#\n removal #\n removal
item = item.replace(u'\r\n',u' ') item = item.replace(u'\r\n',u' ')
item = item.replace(u'\n',u' ') item = item.replace(u'\n',u' ')
bibtex_entry.append(u'note = "%s"' % utf8ToBibtex(item, asccii_bibtex)) bibtex_entry.append(u'note = "%s"' % bibtexdict.utf8ToBibtex(item))
elif field == 'isbn' : elif field == 'isbn' :
# Could be 9, 10 or 13 digits # Could be 9, 10 or 13 digits
@ -345,8 +346,7 @@ class BIBTEX(CatalogPlugin):
elif field == 'pubdate' : elif field == 'pubdate' :
bibtex_entry.append(u'year = "%s"' % item.year) bibtex_entry.append(u'year = "%s"' % item.year)
bibtex_entry.append(u'month = "%s"' % utf8ToBibtex(strftime("%b", item), bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item)))
asccii_bibtex))
bibtex_entry = u',\n '.join(bibtex_entry) bibtex_entry = u',\n '.join(bibtex_entry)
bibtex_entry += u' }\n\n' bibtex_entry += u' }\n\n'
@ -363,7 +363,7 @@ class BIBTEX(CatalogPlugin):
else : else :
return True return True
def make_bibtex_citation(entry, template_citation, asccii_bibtex): def make_bibtex_citation(entry, template_citation, bibtexclass):
#define a function to replace the template entry by its value #define a function to replace the template entry by its value
def tpl_replace(objtplname) : def tpl_replace(objtplname) :
@ -384,8 +384,9 @@ class BIBTEX(CatalogPlugin):
return u'' return u''
if len(template_citation) >0 : if len(template_citation) >0 :
tpl_citation = utf8ToBibtex(ValidateCitationKey(re.sub(u'\{[^{}]*\}', tpl_citation = bibtexclass.utf8ToBibtex(
tpl_replace, template_citation)), asccii_bibtex) bibtexclass.ValidateCitationKey(re.sub(u'\{[^{}]*\}',
tpl_replace, template_citation)))
if len(tpl_citation) >0 : if len(tpl_citation) >0 :
return tpl_citation return tpl_citation
@ -397,9 +398,9 @@ class BIBTEX(CatalogPlugin):
template_citation = u'%s' % str(entry["id"]) template_citation = u'%s' % str(entry["id"])
if asccii_bibtex : if asccii_bibtex :
return ValidateCitationKey(template_citation.encode('ascii', 'replace')) return bibtexclass.ValidateCitationKey(template_citation.encode('ascii', 'replace'))
else : else :
return ValidateCitationKey(template_citation) return bibtexclass.ValidateCitationKey(template_citation)
self.fmt = path_to_output.rpartition('.')[2] self.fmt = path_to_output.rpartition('.')[2]
self.notification = notification self.notification = notification
@ -467,13 +468,16 @@ class BIBTEX(CatalogPlugin):
if not len(data): if not len(data):
log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text) log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
#Initialize BibTeX class
bibtexc = BibTeX()
#Entries writing after Bibtex formating (or not) #Entries writing after Bibtex formating (or not)
if bibfile_enc != 'ascii' : if bibfile_enc != 'ascii' :
asccii_bibtex = False bibtexc.ascii_bibtex = False
else : else :
asccii_bibtex = True bibtexc.ascii_bibtex = True
#Check and go to default in case of bad CLI #Check citation choice and go to default in case of bad CLI
if isinstance(opts.impcit, (StringType, UnicodeType)) : if isinstance(opts.impcit, (StringType, UnicodeType)) :
if opts.impcit == 'False' : if opts.impcit == 'False' :
citation_bibtex= False citation_bibtex= False
@ -485,6 +489,7 @@ class BIBTEX(CatalogPlugin):
else : else :
citation_bibtex= opts.impcit citation_bibtex= opts.impcit
#Preprocess for error and light correction
template_citation = preprocess_template(opts.bib_cit) template_citation = preprocess_template(opts.bib_cit)
#Open output and write entries #Open output and write entries
@ -506,7 +511,7 @@ class BIBTEX(CatalogPlugin):
for entry in data: for entry in data:
outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation, outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
asccii_bibtex, citation_bibtex)) bibtexc, citation_bibtex))
outfile.close() outfile.close()

View File

@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
""" Collection of python utility-methodes commonly used by other """ Collection of python utility-methodes commonly used by other
bibliograph packages. bibliograph packages.
From http://pypi.python.org/pypi/bibliograph.core/ From http://pypi.python.org/pypi/bibliograph.core/
@ -62,10 +60,14 @@
DAMAGE. DAMAGE.
""" """
__docformat__ = 'reStructuredText'
__author__ = 'sengian <sengian1 at gmail.com>' __author__ = 'sengian <sengian1 at gmail.com>'
__docformat__ = 'restructuredtext en'
import re, string import re, string
from UserDict import UserDict
from calibre.constants import preferred_encoding
from calibre.utils.mreplace import MReplace
utf8enc2latex_mapping = { utf8enc2latex_mapping = {
# This is a mapping of Unicode characters to LaTeX equivalents. # This is a mapping of Unicode characters to LaTeX equivalents.
@ -2842,69 +2844,66 @@ entity_mapping = {
'"':'{"}', '"':'{"}',
} }
def ValidateCitationKey(text): class BibTeX:
""" def __init__(self):
removes characters not allowed in BibTeX keys self.rep_utf8 = MReplace(utf8enc2latex_mapping)
self.rep_ent = MReplace(entity_mapping)
#Set default conversion to ASCII BibTeX
self.ascii_bibtex = True
# This substitution is based on the description of cite key restrictions at
# http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html
self.invalid_cit = re.compile(u'[ "@\',\\#}{~%&$^]')
self.upper = re.compile(u'[' +
string.uppercase.decode(preferred_encoding) + u']')
self.escape = re.compile(u'[~#&%_]')
def ValidateCitationKey(self, text):
"""
removes characters not allowed in BibTeX keys
>>> ValidateCitationKey(DummyEntry('my@id'))
'myid'
"""
return self.invalid_cit.sub(u'', text)
>>> from bibliograph.core.utils import _validKey def braceUppercase(self, text):
>>> _validKey(DummyEntry('Foo Bar')) """ Convert uppercase letters to bibtex encoded uppercase
'FooBar' >>> braceUppercase('Foo Bar')
'{F}oo {B}ar'
"""
return self.upper.sub(lambda m: u'{%s}' % m.group(), text)
>>> _validKey(DummyEntry('my@id')) def resolveEntities(self, text):
'myid' #for entity, entity_map in entity_mapping.iteritems():
# text = text.replace(entity, entity_map)
#return text
return self.rep_ent.mreplace(text)
""" def resolveUnicode(self, text):
# This substitution is based on the description of cite key restrictions at #UTF-8 text as entry
# http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html #for unichar, latexenc in utf8enc2latex_mapping.iteritems() :
return re.sub(u'[ "@\',\\#}{~%&$^]', u'', text) # text = text.replace(unichar, latexenc)
text = self.rep_utf8.mreplace(text)
return text.replace(u'$}{$', u'')
def BraceUppercase(text): def escapeSpecialCharacters(self, text):
""" Convert uppercase letters to bibtex encoded uppercase """
latex escaping some (not all) special characters
"""
text.replace('\\', '\\\\')
return self.escape.sub(lambda m: u'\\%s' % m.group(), text)
>>> from bibliograph.core.utils import _braceUppercase #Calibre functions
>>> _braceUppercase('foo bar') #Option to go to official ASCII Bibtex or unofficial UTF-8
'foo bar' #Go from an unicode entry to ASCII Bibtex format without encoding
def utf8ToBibtex(self, text):
if len(text) == 0:
return ''
text.replace('\\', '\\\\')
text = self.resolveEntities(text)
if self.ascii_bibtex :
text = self.resolveUnicode(text)
return self.escapeSpecialCharacters(text)
>>> _braceUppercase('Foo Bar') def bibtex_author_format(self, item):
'{F}oo {B}ar' #Format authors for Bibtex compliance (get a list as input)
""" return self.utf8ToBibtex(u' and'.join([author for author in item]))
for uc in string.uppercase:
text = text.replace(uc, u'{%s}' % uc)
return text
def resolveEntities(text):
for entity, entity_map in entity_mapping.iteritems():
text = text.replace(entity, entity_map)
return text
def resolveUnicode(text):
#UTF-8 text as entry
for unichar, latexenc in utf8enc2latex_mapping.iteritems() :
text = text.replace(unichar, latexenc)
return text.replace(u'$}{$', u'')
def escapeSpecialCharacters(text):
"""
latex escaping some (not all) special characters
"""
text.replace('\\', '\\\\')
escape = ['~', '#', '&', '%', '_']
for c in escape:
text = text.replace(c, '\\' + c )
return text
#Calibre functions
#Go from an unicode entry to ASCII Bibtex format without encoding
#Option to go to official ASCII Bibtex or unofficial UTF-8
def utf8ToBibtex(text, asccii_bibtex = True):
if len(text) == 0:
return ''
text.replace('\\', '\\\\')
text = resolveEntities(text)
if asccii_bibtex :
text = resolveUnicode(text)
return escapeSpecialCharacters(text)
def bibtex_author_format(item):
#Format authors for Bibtex compliance (get a list as input)
return utf8ToBibtex(u' and'.join([author for author in item]))