diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 807ce1def5..6299bb8782 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -246,6 +246,23 @@ class CurrentDir(object): os.chdir(self.cwd) +class FileWrapper(object): + ''' + Used primarily with pyPdf to ensure the stream is properly closed. + ''' + + def __init__(self, stream): + for x in ('read', 'seek', 'tell'): + setattr(self, x, getattr(stream, x)) + + def __exit__(self, *args): + for x in ('read', 'seek', 'tell'): + setattr(self, x, None) + + def __enter__(self): + return self + + def detect_ncpus(): """Detects the number of effective CPUs in the system""" try: diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py index c3a4fa94b0..1cdf9863b4 100644 --- a/src/calibre/devices/cybookg3/driver.py +++ b/src/calibre/devices/cybookg3/driver.py @@ -7,7 +7,6 @@ Device driver for Bookeen's Cybook Gen 3 import os, shutil from itertools import cycle -from calibre.ebooks.metadata import authors_to_string from calibre.devices.errors import DeviceError, FreeSpaceError from calibre.devices.usbms.driver import USBMS import calibre.devices.cybookg3.t2b as t2b @@ -78,22 +77,21 @@ class CYBOOKG3(USBMS): newpath = path mdata = metadata.next() - if self.SUPPORTS_SUB_DIRS: - if 'tags' in mdata.keys(): - for tag in mdata['tags']: - if tag.startswith(_('News')): - newpath = os.path.join(newpath, 'news') - newpath = os.path.join(newpath, mdata.get('title', '')) - newpath = os.path.join(newpath, mdata.get('timestamp', '')) - elif tag.startswith('/'): - newpath = path - newpath += tag - newpath = os.path.normpath(newpath) - break + if 'tags' in mdata.keys(): + for tag in mdata['tags']: + if tag.startswith(_('News')): + newpath = os.path.join(newpath, 'news') + newpath = os.path.join(newpath, mdata.get('title', '')) + newpath = os.path.join(newpath, mdata.get('timestamp', '')) + elif tag.startswith('/'): + newpath = path + newpath += tag + newpath = os.path.normpath(newpath) + break - if newpath == path: - newpath = os.path.join(newpath, authors_to_string(mdata.get('authors', ''))) - newpath = os.path.join(newpath, mdata.get('title', '')) + if newpath == path: + newpath = os.path.join(newpath, mdata.get('authors', _('Unknown'))) + newpath = os.path.join(newpath, mdata.get('title', _('Unknown'))) if not os.path.exists(newpath): os.makedirs(newpath) diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py index efc48a2dff..a704eb1ec3 100644 --- a/src/calibre/devices/prs505/driver.py +++ b/src/calibre/devices/prs505/driver.py @@ -119,19 +119,44 @@ class PRS505(CLI, Device): paths, ctimes = [], [] names = iter(names) + metadata = iter(metadata) for infile in files: close = False if not hasattr(infile, 'read'): infile, close = open(infile, 'rb'), True infile.seek(0) - name = names.next() - paths.append(os.path.join(path, name)) - if not os.path.exists(os.path.dirname(paths[-1])): - os.makedirs(os.path.dirname(paths[-1])) + + newpath = path + mdata = metadata.next() + + if 'tags' in mdata.keys(): + for tag in mdata['tags']: + if tag.startswith(_('News')): + newpath = os.path.join(newpath, 'news') + newpath = os.path.join(newpath, mdata.get('title', '')) + newpath = os.path.join(newpath, mdata.get('timestamp', '')) + elif tag.startswith('/'): + newpath = path + newpath += tag + newpath = os.path.normpath(newpath) + break + + if newpath == path: + newpath = os.path.join(newpath, mdata.get('authors', _('Unknown'))) + newpath = os.path.join(newpath, mdata.get('title', _('Unknown'))) + + if not os.path.exists(newpath): + os.makedirs(newpath) + + filepath = os.path.join(newpath, names.next()) + paths.append(filepath) + self.put_file(infile, paths[-1], replace_file=True) + if close: infile.close() ctimes.append(os.path.getctime(paths[-1])) + return zip(paths, sizes, ctimes, cycle([on_card])) @classmethod diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py index bb7a104fa4..aa40f90c25 100644 --- a/src/calibre/devices/usbms/driver.py +++ b/src/calibre/devices/usbms/driver.py @@ -124,8 +124,8 @@ class USBMS(CLI, Device): break if newpath == path: - newpath = os.path.join(newpath, authors_to_string(mdata.get('authors', ''))) - newpath = os.path.join(newpath, mdata.get('title', '')) + newpath = os.path.join(newpath, mdata.get('authors', _('Unknown'))) + newpath = os.path.join(newpath, mdata.get('title', _('Unknown'))) if not os.path.exists(newpath): os.makedirs(newpath) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 6b58d2d18d..632a7a3291 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -73,7 +73,7 @@ class HTMLPreProcessor(object): (re.compile(r''), lambda match : '

'), # Un wrap lines - (re.compile(r'(?<=\w)\s*\s*\s*\s*(?=\w)'), lambda match: ' '), + (re.compile(r'(?<=\w)\s*\s*\s*<(i|b|u)>\s*(?=\w)'), lambda match: ' '), (re.compile(r'(?<=\w)\s*\s*(?=\w)', re.UNICODE), lambda match: ' '), # Clean up spaces (re.compile(u'(?<=\.|,|:|;|\?|!|”|"|\')[\s^ ]*(?=<)'), lambda match: ' '), diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index a2b18e21ac..e99025057f 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -7,6 +7,7 @@ __copyright__ = '2008, Kovid Goyal ' import sys, os, cStringIO from threading import Thread +from calibre import FileWrapper from calibre.ebooks.metadata import MetaInformation, authors_to_string from calibre.ptempfile import TemporaryDirectory from pyPdf import PdfFileReader, PdfFileWriter @@ -34,18 +35,19 @@ def get_metadata(stream, extract_cover=True): traceback.print_exc() try: - info = PdfFileReader(stream).getDocumentInfo() - if info.title: - mi.title = info.title - if info.author: - src = info.author.split('&') - authors = [] - for au in src: - authors += au.split(',') - mi.authors = authors - mi.author = info.author - if info.subject: - mi.category = info.subject + with FileWrapper(stream) as stream: + info = PdfFileReader(stream).getDocumentInfo() + if info.title: + mi.title = info.title + if info.author: + src = info.author.split('&') + authors = [] + for au in src: + authors += au.split(',') + mi.authors = authors + mi.author = info.author + if info.subject: + mi.category = info.subject except Exception, err: msg = u'Couldn\'t read metadata from pdf: %s with error %s'%(mi.title, unicode(err)) print >>sys.stderr, msg.encode('utf8') diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py index 6733d3aadc..e8c3889e41 100644 --- a/src/calibre/ebooks/pdf/input.py +++ b/src/calibre/ebooks/pdf/input.py @@ -9,7 +9,7 @@ import os from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.pdf.pdftohtml import pdftohtml -from calibre.ebooks.metadata.opf import OPFCreator +from calibre.ebooks.metadata.opf2 import OPFCreator class PDFInput(InputFormatPlugin): diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index aafc36989e..34fafc91fc 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -9,7 +9,7 @@ import os from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.markdown import markdown -from calibre.ebooks.metadata.opf import OPFCreator +from calibre.ebooks.metadata.opf2 import OPFCreator class TXTInput(InputFormatPlugin): diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py index 423e668a56..dd87394507 100644 --- a/src/calibre/ebooks/txt/output.py +++ b/src/calibre/ebooks/txt/output.py @@ -18,14 +18,14 @@ class TXTOutput(OutputFormatPlugin): options = set([ OptionRecommendation(name='newline', recommended_value='system', - level=OptionRecommendation.LOW, long_switch='newline', + level=OptionRecommendation.LOW, short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(), help=_('Type of newline to use. Options are %s. Default is \'system\'. ' 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. ' 'For Mac OS X use \'unix\'. \'system\' will default to the newline ' 'type used by this OS.' % sorted(TxtNewlines.NEWLINE_TYPES.keys()))), OptionRecommendation(name='prepend_metadata', recommended_value='false', - level=OptionRecommendation.LOW, long_switch='prepend_metadata', + level=OptionRecommendation.LOW, choices=['true', 'false'], help=_('Write the title and author to the beginning of the file. ' 'Default is \'true\'. Use \'false\' to disable.')), diff --git a/src/calibre/ebooks/txt/writer.py b/src/calibre/ebooks/txt/writer.py index 0f84c32804..ea613010ef 100644 --- a/src/calibre/ebooks/txt/writer.py +++ b/src/calibre/ebooks/txt/writer.py @@ -76,7 +76,7 @@ class TxtWriter(object): text = re.sub('(?imu)' % tag, '\n\n', text) for tag in ['hr', 'br']: - text = re.sub('(?imu)<[ ]*%s[ ]*/*?>' % tag, '\n\n', text) + text = re.sub('(?imu)<[ ]*%s.*?>' % tag, '\n\n', text) # Remove any tags that do not need special processing. text = re.sub('<.*?>', '', text)