mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Switch Bibtex catalog to class use and mreplace for speed gain more efficiency
This commit is contained in:
parent
ded56f11dd
commit
f6f96ae97c
@ -278,10 +278,10 @@ class BIBTEX(CatalogPlugin):
|
|||||||
|
|
||||||
from calibre.library.save_to_disk import preprocess_template
|
from calibre.library.save_to_disk import preprocess_template
|
||||||
#Bibtex functions
|
#Bibtex functions
|
||||||
from calibre.utils.bibtex import bibtex_author_format, utf8ToBibtex, ValidateCitationKey
|
from calibre.utils.bibtex import BibTeX
|
||||||
|
|
||||||
def create_bibtex_entry(entry, fields, mode, template_citation,
|
def create_bibtex_entry(entry, fields, mode, template_citation,
|
||||||
asccii_bibtex = True, citation_bibtex = True):
|
bibtexdict, citation_bibtex = True):
|
||||||
|
|
||||||
#Bibtex doesn't like UTF-8 but keep unicode until writing
|
#Bibtex doesn't like UTF-8 but keep unicode until writing
|
||||||
#Define starting chain or if book valid strict and not book return a Fail string
|
#Define starting chain or if book valid strict and not book return a Fail string
|
||||||
@ -297,7 +297,8 @@ class BIBTEX(CatalogPlugin):
|
|||||||
|
|
||||||
if citation_bibtex :
|
if citation_bibtex :
|
||||||
# Citation tag
|
# Citation tag
|
||||||
bibtex_entry.append(make_bibtex_citation(entry, template_citation, asccii_bibtex))
|
bibtex_entry.append(make_bibtex_citation(entry, template_citation,
|
||||||
|
bibtexdict))
|
||||||
bibtex_entry = [u' '.join(bibtex_entry)]
|
bibtex_entry = [u' '.join(bibtex_entry)]
|
||||||
|
|
||||||
for field in fields:
|
for field in fields:
|
||||||
@ -312,11 +313,11 @@ class BIBTEX(CatalogPlugin):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
if field == 'authors' :
|
if field == 'authors' :
|
||||||
bibtex_entry.append(u'author = "%s"' % bibtex_author_format(item))
|
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
|
||||||
|
|
||||||
elif field in ['title', 'publisher', 'cover', 'uuid',
|
elif field in ['title', 'publisher', 'cover', 'uuid',
|
||||||
'author_sort', 'series'] :
|
'author_sort', 'series'] :
|
||||||
bibtex_entry.append(u'%s = "%s"' % (field, utf8ToBibtex(item, asccii_bibtex)))
|
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
|
||||||
|
|
||||||
elif field == 'id' :
|
elif field == 'id' :
|
||||||
bibtex_entry.append(u'calibreid = "%s"' % int(item))
|
bibtex_entry.append(u'calibreid = "%s"' % int(item))
|
||||||
@ -329,13 +330,13 @@ class BIBTEX(CatalogPlugin):
|
|||||||
|
|
||||||
elif field == 'tags' :
|
elif field == 'tags' :
|
||||||
#A list to flatten
|
#A list to flatten
|
||||||
bibtex_entry.append(u'tags = "%s"' % utf8ToBibtex(u', '.join(item), asccii_bibtex))
|
bibtex_entry.append(u'tags = "%s"' % bibtexdict.utf8ToBibtex(u', '.join(item)))
|
||||||
|
|
||||||
elif field == 'comments' :
|
elif field == 'comments' :
|
||||||
#\n removal
|
#\n removal
|
||||||
item = item.replace(u'\r\n',u' ')
|
item = item.replace(u'\r\n',u' ')
|
||||||
item = item.replace(u'\n',u' ')
|
item = item.replace(u'\n',u' ')
|
||||||
bibtex_entry.append(u'note = "%s"' % utf8ToBibtex(item, asccii_bibtex))
|
bibtex_entry.append(u'note = "%s"' % bibtexdict.utf8ToBibtex(item))
|
||||||
|
|
||||||
elif field == 'isbn' :
|
elif field == 'isbn' :
|
||||||
# Could be 9, 10 or 13 digits
|
# Could be 9, 10 or 13 digits
|
||||||
@ -353,8 +354,7 @@ class BIBTEX(CatalogPlugin):
|
|||||||
|
|
||||||
elif field == 'pubdate' :
|
elif field == 'pubdate' :
|
||||||
bibtex_entry.append(u'year = "%s"' % item.year)
|
bibtex_entry.append(u'year = "%s"' % item.year)
|
||||||
bibtex_entry.append(u'month = "%s"' % utf8ToBibtex(strftime("%b", item),
|
bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item)))
|
||||||
asccii_bibtex))
|
|
||||||
|
|
||||||
bibtex_entry = u',\n '.join(bibtex_entry)
|
bibtex_entry = u',\n '.join(bibtex_entry)
|
||||||
bibtex_entry += u' }\n\n'
|
bibtex_entry += u' }\n\n'
|
||||||
@ -371,7 +371,7 @@ class BIBTEX(CatalogPlugin):
|
|||||||
else :
|
else :
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def make_bibtex_citation(entry, template_citation, asccii_bibtex):
|
def make_bibtex_citation(entry, template_citation, bibtexclass):
|
||||||
|
|
||||||
#define a function to replace the template entry by its value
|
#define a function to replace the template entry by its value
|
||||||
def tpl_replace(objtplname) :
|
def tpl_replace(objtplname) :
|
||||||
@ -392,8 +392,9 @@ class BIBTEX(CatalogPlugin):
|
|||||||
return u''
|
return u''
|
||||||
|
|
||||||
if len(template_citation) >0 :
|
if len(template_citation) >0 :
|
||||||
tpl_citation = utf8ToBibtex(ValidateCitationKey(re.sub(u'\{[^{}]*\}',
|
tpl_citation = bibtexclass.utf8ToBibtex(
|
||||||
tpl_replace, template_citation)), asccii_bibtex)
|
bibtexclass.ValidateCitationKey(re.sub(u'\{[^{}]*\}',
|
||||||
|
tpl_replace, template_citation)))
|
||||||
|
|
||||||
if len(tpl_citation) >0 :
|
if len(tpl_citation) >0 :
|
||||||
return tpl_citation
|
return tpl_citation
|
||||||
@ -405,9 +406,9 @@ class BIBTEX(CatalogPlugin):
|
|||||||
template_citation = u'%s' % str(entry["id"])
|
template_citation = u'%s' % str(entry["id"])
|
||||||
|
|
||||||
if asccii_bibtex :
|
if asccii_bibtex :
|
||||||
return ValidateCitationKey(template_citation.encode('ascii', 'replace'))
|
return bibtexclass.ValidateCitationKey(template_citation.encode('ascii', 'replace'))
|
||||||
else :
|
else :
|
||||||
return ValidateCitationKey(template_citation)
|
return bibtexclass.ValidateCitationKey(template_citation)
|
||||||
|
|
||||||
self.fmt = path_to_output.rpartition('.')[2]
|
self.fmt = path_to_output.rpartition('.')[2]
|
||||||
self.notification = notification
|
self.notification = notification
|
||||||
@ -475,13 +476,16 @@ class BIBTEX(CatalogPlugin):
|
|||||||
if not len(data):
|
if not len(data):
|
||||||
log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
|
log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
|
||||||
|
|
||||||
|
#Initialize BibTeX class
|
||||||
|
bibtexc = BibTeX()
|
||||||
|
|
||||||
#Entries writing after Bibtex formating (or not)
|
#Entries writing after Bibtex formating (or not)
|
||||||
if bibfile_enc != 'ascii' :
|
if bibfile_enc != 'ascii' :
|
||||||
asccii_bibtex = False
|
bibtexc.ascii_bibtex = False
|
||||||
else :
|
else :
|
||||||
asccii_bibtex = True
|
bibtexc.ascii_bibtex = True
|
||||||
|
|
||||||
#Check and go to default in case of bad CLI
|
#Check citation choice and go to default in case of bad CLI
|
||||||
if isinstance(opts.impcit, (StringType, UnicodeType)) :
|
if isinstance(opts.impcit, (StringType, UnicodeType)) :
|
||||||
if opts.impcit == 'False' :
|
if opts.impcit == 'False' :
|
||||||
citation_bibtex= False
|
citation_bibtex= False
|
||||||
@ -493,6 +497,7 @@ class BIBTEX(CatalogPlugin):
|
|||||||
else :
|
else :
|
||||||
citation_bibtex= opts.impcit
|
citation_bibtex= opts.impcit
|
||||||
|
|
||||||
|
#Preprocess for error and light correction
|
||||||
template_citation = preprocess_template(opts.bib_cit)
|
template_citation = preprocess_template(opts.bib_cit)
|
||||||
|
|
||||||
#Open output and write entries
|
#Open output and write entries
|
||||||
@ -514,7 +519,7 @@ class BIBTEX(CatalogPlugin):
|
|||||||
|
|
||||||
for entry in data:
|
for entry in data:
|
||||||
outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
|
outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
|
||||||
asccii_bibtex, citation_bibtex))
|
bibtexc, citation_bibtex))
|
||||||
|
|
||||||
outfile.close()
|
outfile.close()
|
||||||
|
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
""" Collection of python utility-methodes commonly used by other
|
""" Collection of python utility-methodes commonly used by other
|
||||||
bibliograph packages.
|
bibliograph packages.
|
||||||
From http://pypi.python.org/pypi/bibliograph.core/
|
From http://pypi.python.org/pypi/bibliograph.core/
|
||||||
@ -62,10 +60,14 @@
|
|||||||
DAMAGE.
|
DAMAGE.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__docformat__ = 'reStructuredText'
|
|
||||||
__author__ = 'sengian <sengian1 at gmail.com>'
|
__author__ = 'sengian <sengian1 at gmail.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, string
|
import re, string
|
||||||
|
from UserDict import UserDict
|
||||||
|
|
||||||
|
from calibre.constants import preferred_encoding
|
||||||
|
from calibre.utils.mreplace import MReplace
|
||||||
|
|
||||||
utf8enc2latex_mapping = {
|
utf8enc2latex_mapping = {
|
||||||
# This is a mapping of Unicode characters to LaTeX equivalents.
|
# This is a mapping of Unicode characters to LaTeX equivalents.
|
||||||
@ -2842,69 +2844,66 @@ entity_mapping = {
|
|||||||
'"':'{"}',
|
'"':'{"}',
|
||||||
}
|
}
|
||||||
|
|
||||||
def ValidateCitationKey(text):
|
class BibTeX:
|
||||||
"""
|
def __init__(self):
|
||||||
removes characters not allowed in BibTeX keys
|
self.rep_utf8 = MReplace(utf8enc2latex_mapping)
|
||||||
|
self.rep_ent = MReplace(entity_mapping)
|
||||||
|
#Set default conversion to ASCII BibTeX
|
||||||
|
self.ascii_bibtex = True
|
||||||
|
# This substitution is based on the description of cite key restrictions at
|
||||||
|
# http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html
|
||||||
|
self.invalid_cit = re.compile(u'[ "@\',\\#}{~%&$^]')
|
||||||
|
self.upper = re.compile(u'[' +
|
||||||
|
string.uppercase.decode(preferred_encoding) + u']')
|
||||||
|
self.escape = re.compile(u'[~#&%_]')
|
||||||
|
|
||||||
|
def ValidateCitationKey(self, text):
|
||||||
|
"""
|
||||||
|
removes characters not allowed in BibTeX keys
|
||||||
|
>>> ValidateCitationKey(DummyEntry('my@id'))
|
||||||
|
'myid'
|
||||||
|
"""
|
||||||
|
return self.invalid_cit.sub(u'', text)
|
||||||
|
|
||||||
>>> from bibliograph.core.utils import _validKey
|
def braceUppercase(self, text):
|
||||||
>>> _validKey(DummyEntry('Foo Bar'))
|
""" Convert uppercase letters to bibtex encoded uppercase
|
||||||
'FooBar'
|
>>> braceUppercase('Foo Bar')
|
||||||
|
'{F}oo {B}ar'
|
||||||
|
"""
|
||||||
|
return self.upper.sub(lambda m: u'{%s}' % m.group(), text)
|
||||||
|
|
||||||
>>> _validKey(DummyEntry('my@id'))
|
def resolveEntities(self, text):
|
||||||
'myid'
|
#for entity, entity_map in entity_mapping.iteritems():
|
||||||
|
# text = text.replace(entity, entity_map)
|
||||||
|
#return text
|
||||||
|
return self.rep_ent.mreplace(text)
|
||||||
|
|
||||||
"""
|
def resolveUnicode(self, text):
|
||||||
# This substitution is based on the description of cite key restrictions at
|
#UTF-8 text as entry
|
||||||
# http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html
|
#for unichar, latexenc in utf8enc2latex_mapping.iteritems() :
|
||||||
return re.sub(u'[ "@\',\\#}{~%&$^]', u'', text)
|
# text = text.replace(unichar, latexenc)
|
||||||
|
text = self.rep_utf8.mreplace(text)
|
||||||
|
return text.replace(u'$}{$', u'')
|
||||||
|
|
||||||
def BraceUppercase(text):
|
def escapeSpecialCharacters(self, text):
|
||||||
""" Convert uppercase letters to bibtex encoded uppercase
|
"""
|
||||||
|
latex escaping some (not all) special characters
|
||||||
|
"""
|
||||||
|
text.replace('\\', '\\\\')
|
||||||
|
return self.escape.sub(lambda m: u'\\%s' % m.group(), text)
|
||||||
|
|
||||||
>>> from bibliograph.core.utils import _braceUppercase
|
#Calibre functions
|
||||||
>>> _braceUppercase('foo bar')
|
#Option to go to official ASCII Bibtex or unofficial UTF-8
|
||||||
'foo bar'
|
#Go from an unicode entry to ASCII Bibtex format without encoding
|
||||||
|
def utf8ToBibtex(self, text):
|
||||||
|
if len(text) == 0:
|
||||||
|
return ''
|
||||||
|
text.replace('\\', '\\\\')
|
||||||
|
text = self.resolveEntities(text)
|
||||||
|
if self.ascii_bibtex :
|
||||||
|
text = self.resolveUnicode(text)
|
||||||
|
return self.escapeSpecialCharacters(text)
|
||||||
|
|
||||||
>>> _braceUppercase('Foo Bar')
|
def bibtex_author_format(self, item):
|
||||||
'{F}oo {B}ar'
|
#Format authors for Bibtex compliance (get a list as input)
|
||||||
"""
|
return self.utf8ToBibtex(u' and'.join([author for author in item]))
|
||||||
for uc in string.uppercase:
|
|
||||||
text = text.replace(uc, u'{%s}' % uc)
|
|
||||||
return text
|
|
||||||
|
|
||||||
def resolveEntities(text):
|
|
||||||
for entity, entity_map in entity_mapping.iteritems():
|
|
||||||
text = text.replace(entity, entity_map)
|
|
||||||
return text
|
|
||||||
|
|
||||||
def resolveUnicode(text):
|
|
||||||
#UTF-8 text as entry
|
|
||||||
for unichar, latexenc in utf8enc2latex_mapping.iteritems() :
|
|
||||||
text = text.replace(unichar, latexenc)
|
|
||||||
return text.replace(u'$}{$', u'')
|
|
||||||
|
|
||||||
def escapeSpecialCharacters(text):
|
|
||||||
"""
|
|
||||||
latex escaping some (not all) special characters
|
|
||||||
"""
|
|
||||||
text.replace('\\', '\\\\')
|
|
||||||
escape = ['~', '#', '&', '%', '_']
|
|
||||||
for c in escape:
|
|
||||||
text = text.replace(c, '\\' + c )
|
|
||||||
return text
|
|
||||||
|
|
||||||
#Calibre functions
|
|
||||||
#Go from an unicode entry to ASCII Bibtex format without encoding
|
|
||||||
#Option to go to official ASCII Bibtex or unofficial UTF-8
|
|
||||||
def utf8ToBibtex(text, asccii_bibtex = True):
|
|
||||||
if len(text) == 0:
|
|
||||||
return ''
|
|
||||||
text.replace('\\', '\\\\')
|
|
||||||
text = resolveEntities(text)
|
|
||||||
if asccii_bibtex :
|
|
||||||
text = resolveUnicode(text)
|
|
||||||
return escapeSpecialCharacters(text)
|
|
||||||
|
|
||||||
def bibtex_author_format(item):
|
|
||||||
#Format authors for Bibtex compliance (get a list as input)
|
|
||||||
return utf8ToBibtex(u' and'.join([author for author in item]))
|
|
||||||
|
32
src/calibre/utils/mreplace.py
Normal file
32
src/calibre/utils/mreplace.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#multiple replace from dictionnary : http://code.activestate.com/recipes/81330/
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, sengian <sengian1 @ gmail.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
from UserDict import UserDict
|
||||||
|
|
||||||
|
class MReplace(UserDict):
|
||||||
|
def __init__(self, dict = None):
|
||||||
|
UserDict.__init__(self, dict)
|
||||||
|
self.re = None
|
||||||
|
self.regex = None
|
||||||
|
self.compile_regex()
|
||||||
|
|
||||||
|
def compile_regex(self):
|
||||||
|
if len(self.data) > 0:
|
||||||
|
keys = sorted(self.data.keys(), key=len)
|
||||||
|
keys.reverse()
|
||||||
|
tmp = "(%s)" % "|".join(map(re.escape, keys))
|
||||||
|
if self.re != tmp:
|
||||||
|
self.re = tmp
|
||||||
|
self.regex = re.compile(self.re)
|
||||||
|
|
||||||
|
def __call__(self, mo):
|
||||||
|
return self[mo.string[mo.start():mo.end()]]
|
||||||
|
|
||||||
|
def mreplace(self, text):
|
||||||
|
#Replace without regex compile
|
||||||
|
if len(self.data) < 1 or self.re is None:
|
||||||
|
return text
|
||||||
|
return self.regex.sub(self, text)
|
Loading…
x
Reference in New Issue
Block a user