From 76c0aeb57c5b863de091a0bc0b55cfd78895fd70 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 12 May 2007 00:22:53 +0000 Subject: [PATCH] Fix handling of filenames that have non ascii characters on systems with encodings other than utf8 --- src/libprs500/__init__.py | 5 +++++ src/libprs500/lrf/html/convert_from.py | 4 ++-- src/libprs500/lrf/txt/convert_from.py | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/libprs500/__init__.py b/src/libprs500/__init__.py index 50154a71a4..a06dc9626a 100644 --- a/src/libprs500/__init__.py +++ b/src/libprs500/__init__.py @@ -41,6 +41,11 @@ import sys iswindows = 'win32' in sys.platform.lower() isosx = 'darwin' in sys.platform.lower() +def filename_to_utf8(name): + '''Return C{name} encoded in utf8. Unhandled characters are replaced. ''' + codec = 'cp1252' if iswindows else 'utf8' + return name.decode(codec, 'replace').encode('utf8') + def extract(path, dir): import os ext = os.path.splitext(path)[1][1:].lower() diff --git a/src/libprs500/lrf/html/convert_from.py b/src/libprs500/lrf/html/convert_from.py index 24f0e30314..0ab43a2bd1 100644 --- a/src/libprs500/lrf/html/convert_from.py +++ b/src/libprs500/lrf/html/convert_from.py @@ -40,7 +40,7 @@ from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBl RuledLine, BookSetting from libprs500.lrf.pylrs.pylrs import Span as _Span from libprs500.lrf import ConversionError, option_parser, Book -from libprs500 import extract +from libprs500 import extract, filename_to_utf8 from libprs500.ptempfile import PersistentTemporaryFile class Span(_Span): @@ -1063,7 +1063,7 @@ def main(): sys.exit(1) src = args[0] if options.title == None: - options.title = os.path.splitext(os.path.basename(src))[0] + options.title = filename_to_utf8(os.path.splitext(os.path.basename(src))[0]) process_file(src, options) def console_query(dirpath, candidate, docs): diff --git a/src/libprs500/lrf/txt/convert_from.py b/src/libprs500/lrf/txt/convert_from.py index 82899c8f10..7c3c6aace7 100644 --- a/src/libprs500/lrf/txt/convert_from.py +++ b/src/libprs500/lrf/txt/convert_from.py @@ -20,7 +20,7 @@ import os, sys from libprs500.lrf import ConversionError, option_parser from libprs500.lrf import Book from libprs500.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting - +from libprs500 import filename_to_utf8 def main(): """ CLI for txt -> lrf conversions """ @@ -42,7 +42,7 @@ def main(): sys.exit(1) src = os.path.abspath(os.path.expanduser(args[0])) if options.title == None: - options.title = os.path.splitext(os.path.basename(src))[0] + options.title = filename_to_utf8(os.path.splitext(os.path.basename(src))[0]) try: convert_txt(src, options) except ConversionError, err: