diff --git a/src/calibre/library/catalogs/bibtex.py b/src/calibre/library/catalogs/bibtex.py index d87005f863..6d1c2609cf 100644 --- a/src/calibre/library/catalogs/bibtex.py +++ b/src/calibre/library/catalogs/bibtex.py @@ -179,6 +179,8 @@ class BIBTEX(CatalogPlugin): #\n removal item = item.replace(u'\r\n',u' ') item = item.replace(u'\n',u' ') + # unmatched brace removal (users should use \leftbrace or \rightbrace for single braces) + item = bibtexdict.stripUnmatchedSyntax(item, u'{', u'}') #html to text try: item = html2text(item) diff --git a/src/calibre/utils/bibtex.py b/src/calibre/utils/bibtex.py index 2ffd2003c2..e86be3ffa2 100644 --- a/src/calibre/utils/bibtex.py +++ b/src/calibre/utils/bibtex.py @@ -2863,7 +2863,7 @@ class BibTeX: return self.invalid_cit.sub(u'', text) def braceUppercase(self, text): - """ + """ Convert uppercase letters to bibtex encoded uppercase """ return self.upper.sub(lambda m: u'{%s}' % m.group(), text) @@ -2902,3 +2902,29 @@ class BibTeX: """ return self.utf8ToBibtex(u' and '.join([author for author in item])) + def stripUnmatchedSyntax(text, open_character, close_character): + """ + Strips unmatched BibTeX syntax + """ + stack = [] + assert len(open_character) == 1 and len(close_character) == 1 + remove = [] + for i, ch in enumerate(text): + if ch == open_character: + stack.append(i) + elif ch == close_character: + try: + stack.pop() + except IndexError: + # Remove unmatched closing char + remove.append(i) + # Remove unmatched opening chars + remove.extend(stack) + + if remove: + text = list(text) + for i in sorted(remove, reverse=True): + text.pop(i) + text = ''.join(text) + return text +