diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 5edc96f6b7..defffba160 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1,7 +1,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Greg Riker ' -import datetime, htmlentitydefs, os, re, shutil, codecs +import datetime, htmlentitydefs, os, re, shutil from collections import namedtuple from copy import deepcopy @@ -18,7 +18,7 @@ from calibre.utils.date import isoformat, now as nowf from calibre.utils.logging import default_log as log #Bibtex functions -from calibre.library.bibtex import create_bibtex_entry, utf8ToBibtex +from calibre.utils.bibtex import bibtex_author_format, utf8ToBibtex, ValidateCitationKey FIELDS = ['all', 'author_sort', 'authors', 'comments', 'cover', 'formats', 'id', 'isbn', 'pubdate', 'publisher', 'rating', @@ -216,8 +216,103 @@ class BIBTEX(CatalogPlugin): 'Available types: book, misc, mixed.\n' "Default: '%default'\n" "Applies to: BIBTEX output format"))] - + def run(self, path_to_output, opts, db, notification=DummyReporter()): + + import codecs + + def create_bibtex_entry(entry, fields, mode = "mixed"): + #Bibtex doesn't like UTF-8 but keep unicode until writing + #Define starting chain or if book valid strict and not book return a Fail string + + bibtex_entry = [] + if mode != "misc" and check_entry_book_valid(entry) : + bibtex_entry.append(u'@book{') + elif mode != "book" : + bibtex_entry.append(u'@misc{') + else : + #case strict book + return '' + + # Citation tag (not the best should be a user defined thing with regexp) + if not len(entry["isbn"]) == 0 : + bibtex_entry.append(u'%s' % utf8ToBibtex(ValidateCitationKey(re.sub(u'[\D]', + u'', entry["isbn"])))) + else : + bibtex_entry.append(u'%s' % utf8ToBibtex(ValidateCitationKey(str(entry["id"])))) + + bibtex_entry = [u' '.join(bibtex_entry)] + + for field in fields: + item = entry[field] + #check if the field should be included (none or empty) + if item is None: + continue + try: + if len(item) == 0 : + continue + except TypeError: + pass + + if field == 'authors' : + bibtex_entry.append(u'author = "%s"' % bibtex_author_format(item)) + + elif field in ['title', 'publisher', 'cover', 'uuid', + 'author_sort', 'series'] : + bibtex_entry.append(u'%s = "%s"' % (field, utf8ToBibtex(item))) + + elif field == 'id' : + bibtex_entry.append(u'calibreid = "%s"' % int(item)) + + elif field == 'rating' : + bibtex_entry.append(u'rating = "%s"' % int(item)) + + elif field == 'size' : + bibtex_entry.append(u'%s = "%s octets"' % (field, int(item))) + + elif field == 'tags' : + #A list to flatten + bibtex_entry.append(u'tags = "%s"' % utf8ToBibtex(u', '.join(item))) + + elif field == 'comments' : + #\n removal + bibtex_entry.append(u'note = "%s"' % utf8ToBibtex(item.replace(u'\n',u' '))) + + elif field == 'isbn' : + # Could be 9, 10 or 13 digits + bibtex_entry.append(u'isbn = "%s"' % re.sub(u'[\D]', u'', item)) + + elif field == 'formats' : + item = u', '.join([format.rpartition('.')[2].lower() for format in item]) + bibtex_entry.append(u'formats = "%s"' % item) + + elif field == 'series_index' : + bibtex_entry.append(u'volume = "%s"' % int(item)) + + elif field == 'timestamp' : + bibtex_entry.append(u'timestamp = "%s"' % isoformat(item).partition('T')[0]) + + elif field == 'pubdate' : + bibtex_entry.append(u'year = "%s"' % item.year) + #Messing locale in date string formatting + bibtex_entry.append(u'month = "%s"' % utf8ToBibtex(item.strftime("%b").decode(preferred_encoding))) + #bibtex_entry.append('month = "%s"' % utf8ToBibtex(item.strftime("%B").decode(getlocale()[1]))) + + bibtex_entry = u',\n '.join(bibtex_entry) + bibtex_entry += u' }\n\n' + + return bibtex_entry + + def check_entry_book_valid(entry): + #Check that the required fields are ok for a book entry + for field in ['title', 'authors', 'publisher'] : + if entry[field] is None or len(entry[field]) == 0 : + return False + if entry['pubdate'] is None : + return False + else : + return True + self.fmt = path_to_output.rpartition('.')[2] self.notification = notification @@ -238,7 +333,6 @@ class BIBTEX(CatalogPlugin): else: log(" Fields: %s" % opts_dict['fields']) - # If a list of ids are provided, don't use search_text if opts.ids: opts.search_text = None @@ -255,15 +349,16 @@ class BIBTEX(CatalogPlugin): log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text) #Open output and write entries - outfile = codecs.open(path_to_output, 'w', 'ascii') + #replace should be an option and not the default to generate errors for improving + outfile = codecs.open(path_to_output, 'w', 'ascii','replace') #File header - nb_entries = len(vars(opts)['ids']) + nb_entries = len(opts.ids) outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries)) outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n' % (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding))) - #Entries + #Entries wrintng after Bibtex formating for entry in data: outfile.write(create_bibtex_entry(entry, fields)) diff --git a/src/calibre/library/bibtex.py b/src/calibre/utils/bibtex.py similarity index 95% rename from src/calibre/library/bibtex.py rename to src/calibre/utils/bibtex.py index c3586f5e2b..1452c7f5e1 100644 --- a/src/calibre/library/bibtex.py +++ b/src/calibre/utils/bibtex.py @@ -3,6 +3,9 @@ """ Collection of python utility-methodes commonly used by other bibliograph packages. From http://pypi.python.org/pypi/bibliograph.core/ + from Tom Gross + + Adapted for calibre use Zope Public License (ZPL) Version 2.1 @@ -60,15 +63,10 @@ """ __docformat__ = 'reStructuredText' -__author__ = 'Tom Gross ' +__author__ = 'sengian ' import os, re, string -#from locale import getlocale -from calibre.constants import preferred_encoding -from calibre import strftime -from calibre.utils.date import isoformat - utf8enc2latex_mapping = { # This is a mapping of Unicode characters to LaTeX equivalents. # The information has been extracted from @@ -77,6 +75,37 @@ utf8enc2latex_mapping = { # # The extraction has been done by the "create_unimap.py" script # located at . + + #Fix some encoding problem between cp1252 and latin1 + # from http://www.microsoft.com/typography/unicode/1252.htm + u'\x80': '{\\mbox{\\texteuro}}', # EURO SIGN + u'\x82': '{,}', # SINGLE LOW-9 QUOTATION MARK + u'\x83': '$f$', # LATIN SMALL LETTER F WITH HOOK + u'\x84': '{,,}', # DOUBLE LOW-9 QUOTATION MARK + u'\x85': '{\\ldots}', # HORIZONTAL ELLIPSIS + u'\x86': '{\\textdagger}', # DAGGER + u'\x87': '{\\textdaggerdbl}', # DOUBLE DAGGER + u'\x88': '{\textasciicircum}', # MODIFIER LETTER CIRCUMFLEX ACCENT + u'\x89': '{\\textperthousand}', # PER MILLE SIGN + u'\x8A': '{\\v{S}}', # LATIN CAPITAL LETTER S WITH CARON + u'\x8B': '{\\guilsinglleft}', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\x8C': '{\\OE}', # LATIN CAPITAL LIGATURE OE + u'\x8E': '{\\v{Z}}', # LATIN CAPITAL LETTER Z WITH CARON + u'\x91': '{`}', # LEFT SINGLE QUOTATION MARK + u'\x92': "{'}", # RIGHT SINGLE QUOTATION MARK + u'\x93': '{\\textquotedblleft}', # LEFT DOUBLE QUOTATION MARK + u'\x94': '{\\textquotedblright}', # RIGHT DOUBLE QUOTATION MARK + u'\x95': '{\\textbullet}', # BULLET + u'\x96': '{\\textendash}', # EN DASH + u'\x97': '{\\textemdash}', # EM DASH + u'\x98': '{\\texttildelow}', # SMALL TILDE + u'\x99': '{\\texttrademark}', # TRADE MARK SIGN + u'\x9A': '{\\v{s}}', # LATIN SMALL LETTER S WITH CARON + u'\x9B': '{\\guilsinglright}', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\x9C': '{\\oe}', # LATIN SMALL LIGATURE OE + u'\x9E': '{\\v{z}}', # LATIN SMALL LETTER Z WITH CARON + u'\x9F': '{\\"{Y}}', # LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0': '$~$', u'\xa1': '{\\textexclamdown}', u'\xa2': '{\\textcent}', @@ -2494,100 +2523,22 @@ def escapeSpecialCharacters(text): return text #Calibre functions -#Go from an unicode entry to ASCII Bibtex format without encoding (try to decode if needed) -def utf8ToBibtex(text): +#Go from an unicode entry to ASCII Bibtex format without encoding +def utf8ToBibtex(text, asccii_bibtex = True): if len(text) == 0: return '' - try : - text = text.decode(preferred_encoding) + '''try : + text = text.decode('cp1252') except (TypeError, UnicodeDecodeError, ValueError): - pass + pass ''' text.replace('\\', '\\\\') text = resolveEntities(text) - text = resolveUnicode(text) - text = escapeSpecialCharacters(text) - return text + if asccii_bibtex : + text = resolveUnicode(text) + return escapeSpecialCharacters(text) -def create_bibtex_entry(entry, fields, mode = "mixed"): - #Bibtex doesn't like UTF-8 but keep unicode until writing - #Define starting chain as a misc then overide if book valid - bibtex_entry = [] - if mode != "misc" and check_entry_book_valid(entry) : - bibtex_entry.append(u'@book{') - else : - bibtex_entry.append(u'@misc{') - - # Citation tag (not the best should be a user defined thing with regexp) - if not len(entry["isbn"]) == 0 : - bibtex_entry.append(u'%s' % utf8ToBibtex(ValidateCitationKey(re.sub(u'[\D]', - u'', entry["isbn"])))) - else : - bibtex_entry.append(u'%s' % utf8ToBibtex(ValidateCitationKey(str(entry["id"])))) - - bibtex_entry = [u' '.join(bibtex_entry)] - - for field in fields: - item = entry[field] - #check if the field should be included (none or empty) - if item is None: - continue - try: - if len(item) == 0 : - continue - except TypeError: - pass - - if field == 'authors' : - bibtex_entry.append(u'author = "%s"' % bibtex_author_format(item)) - elif field in ['title', 'publisher', 'cover', 'uuid', - 'author_sort', 'series'] : - bibtex_entry.append(u'%s = "%s"' % (field, utf8ToBibtex(item))) - elif field == 'id' : - bibtex_entry.append(u'calibreid = "%s"' % int(item)) - elif field == 'rating' : - bibtex_entry.append(u'rating = "%s"' % int(item)) - elif field == 'size' : - bibtex_entry.append(u'%s = "%s octets"' % (field, int(item))) - elif field == 'tags' : - #A list to flatten - bibtex_entry.append(u'tags = "%s"' % utf8ToBibtex(u', '.join(item))) - elif field == 'comments' : - #\n removal - bibtex_entry.append(u'note = "%s"' % utf8ToBibtex(item.replace(u'\n',u' '))) - elif field == 'isbn' : - # Could be 9, 10 or 13 digits - bibtex_entry.append(u'isbn = "%s"' % re.sub(u'[\D]', u'', item)) - elif field == 'formats' : - item = u', '.join([format.rpartition('.')[2].lower() for format in item]) - bibtex_entry.append(u'formats = "%s"' % item) - elif field == 'series_index' : - bibtex_entry.append(u'volume = "%s"' % int(item)) - elif field == 'timestamp' : - bibtex_entry.append(u'timestamp = "%s"' % isoformat(item).partition('T')[0]) - elif field == 'pubdate' : - bibtex_entry.append(u'year = "%s"' % item.year) - #Messing locale in date string formatting - bibtex_entry.append(u'month = "%s"' % utf8ToBibtex(item.strftime("%b"))) - #bibtex_entry.append('month = "%s"' % utf8ToBibtex(item.strftime("%B").decode(getlocale()[1]))) - - bibtex_entry = u',\n '.join(bibtex_entry) - bibtex_entry += u' }\n\n' - - return bibtex_entry - -def check_entry_book_valid(entry): - #Check that the required fields are ok for a book entry - for field in ['title', 'authors', 'publisher'] : - if entry[field] is None or len(entry[field]) == 0 : - return False - if entry['pubdate'] is None : - return False - else : - return True - def bibtex_author_format(item): - #Format authors for Bibtex compliance + #Format authors for Bibtex compliance (get a list as input) item = u' and'.join([author for author in item]) return utf8ToBibtex(item) -