diff --git a/src/calibre/library/catalogs/bibtex.py b/src/calibre/library/catalogs/bibtex.py index d87005f863..6d1c2609cf 100644 --- a/src/calibre/library/catalogs/bibtex.py +++ b/src/calibre/library/catalogs/bibtex.py @@ -179,6 +179,8 @@ class BIBTEX(CatalogPlugin): #\n removal item = item.replace(u'\r\n',u' ') item = item.replace(u'\n',u' ') + # unmatched brace removal (users should use \leftbrace or \rightbrace for single braces) + item = bibtexdict.stripUnmatchedSyntax(item, u'{', u'}') #html to text try: item = html2text(item) diff --git a/src/calibre/utils/bibtex.py b/src/calibre/utils/bibtex.py index 2ffd2003c2..5f8fb9d7ee 100644 --- a/src/calibre/utils/bibtex.py +++ b/src/calibre/utils/bibtex.py @@ -2902,3 +2902,31 @@ class BibTeX: """ return self.utf8ToBibtex(u' and '.join([author for author in item])) + def stripUnmatchedSyntax(self, text, open_character, close_character): + """ + Strips unmatched BibTeX syntax + """ + stack = list() + + if len(open_character) > 1 or len(close_character) > 1: + raise ValueError("Only single characters accepted") + + for i in text: + if i == open_character: + stack.append(open_character) + # if there is already an orphan closing character strip everything + elif i == close_character and not stack: + text = text.replace(open_character, u'') + text = text.replace(close_character, u'') + return text + elif i == close_character and stack: + stack.pop() + + # if there are still open_tokens on the stack they have not been closed strip everything + if stack: + text = text.replace(open_character, u'') + text = text.replace(close_character, u'') + return text + # if nothing left on stack all characters matched and do nothing + else: + return text