From 5770808fcf903d1caaa30fdf64873a02f83e194c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 23 Oct 2009 19:29:13 -0600 Subject: [PATCH 1/5] PML Output: Add .png to image names. Fix removing excessive newlines from PML output. PMLZ Output: Name images correctly. --- src/calibre/ebooks/pdb/ereader/__init__.py | 6 +-- src/calibre/ebooks/pml/output.py | 16 +++---- src/calibre/ebooks/pml/pmlml.py | 50 ++++++++++------------ 3 files changed, 32 insertions(+), 40 deletions(-) diff --git a/src/calibre/ebooks/pdb/ereader/__init__.py b/src/calibre/ebooks/pdb/ereader/__init__.py index 3f08b068cb..89560c9448 100644 --- a/src/calibre/ebooks/pdb/ereader/__init__.py +++ b/src/calibre/ebooks/pdb/ereader/__init__.py @@ -16,11 +16,11 @@ def image_name(name, taken_names=[]): cut = len(name) - 32 names = name[:10] namee = name[10+cut:] - name = names + namee + name = '%s%s.png' % (names, namee) while name in taken_names: - for i in xrange(9999999999999999999999999999999): - name = '%s%s' % (name[:-len('%s' % i)], i) + for i in xrange(999999999999999999999999999): + name = '%s%s.png' % (name[:-len('%s' % i)], i) name = name.ljust(32, '\x00')[:32] diff --git a/src/calibre/ebooks/pml/output.py b/src/calibre/ebooks/pml/output.py index 360e63c98e..774fc4c8d1 100644 --- a/src/calibre/ebooks/pml/output.py +++ b/src/calibre/ebooks/pml/output.py @@ -18,7 +18,7 @@ from calibre.customize.conversion import OutputFormatPlugin from calibre.customize.conversion import OptionRecommendation from calibre.ptempfile import TemporaryDirectory from calibre.utils.zipfile import ZipFile -from calibre.ebooks.oeb.base import OEB_IMAGES +from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES from calibre.ebooks.pml.pmlml import PMLMLizer class PMLOutput(OutputFormatPlugin): @@ -40,28 +40,26 @@ class PMLOutput(OutputFormatPlugin): def convert(self, oeb_book, output_path, input_plugin, opts, log): with TemporaryDirectory('_pmlz_output') as tdir: pmlmlizer = PMLMLizer(log) - content = pmlmlizer.extract_content(oeb_book, opts) + pml = unicode(pmlmlizer.extract_content(oeb_book, opts)) with open(os.path.join(tdir, 'index.pml'), 'wb') as out: - out.write(content.encode(opts.output_encoding, 'replace')) + out.write(pml.encode(opts.output_encoding, 'replace')) - self.write_images(oeb_book.manifest, tdir) + self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir) log.debug('Compressing output...') pmlz = ZipFile(output_path, 'w') pmlz.add_dir(tdir) - def write_images(self, manifest, out_dir): + def write_images(self, manifest, image_hrefs, out_dir): for item in manifest: - if item.media_type in OEB_IMAGES: + if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys(): im = Image.open(cStringIO.StringIO(item.data)) data = cStringIO.StringIO() im.save(data, 'PNG') data = data.getvalue() - name = os.path.splitext(os.path.basename(item.href))[0] + '.png' - path = os.path.join(out_dir, name) + path = os.path.join(out_dir, image_hrefs[item.href]) with open(path, 'wb') as out: out.write(data) - diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index 7b1813256e..862f0ea0ae 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en' Transform OEB content into PML markup ''' -import os import re from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace @@ -138,16 +137,13 @@ class PMLMLizer(object): aid = self.link_hrefs[aid] return u'\\Q="%s"' % aid + def remove_newlines(self, text): + text = text.replace('\r\n', ' ') + text = text.replace('\n', ' ') + text = text.replace('\r', ' ') + return text + def clean_text(self, text): - # Remove excess spaces at beginning and end of lines - text = re.sub('(?m)^[ ]+', '', text) - text = re.sub('(?m)[ ]+$', '', text) - - # Remove excessive newlines - text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text) - text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text) - text = re.sub('[ ]{2,}', ' ', text) - # Remove excessive \p tags text = re.sub(r'\\p\s*\\p', '', text) @@ -166,6 +162,17 @@ class PMLMLizer(object): # Turn all unicode characters into their PML hex equivelent text = re.sub('[^\x00-\x7f]', lambda x: '\\U%04x' % ord(x.group()), text) + # Remove excess spaces at beginning and end of lines + text = re.sub('(?m)^[ ]+', '', text) + text = re.sub('(?m)[ ]+$', '', text) + + # Remove excessive spaces + text = re.sub('[ ]{2,}', ' ', text) + + # Remove excessive newlines + text = re.sub('\n[ ]+\n', '\n\n', text) + text = re.sub('\n\n\n+', '\n\n', text) + return text def dump_text(self, elem, stylizer, page, tag_stack=[]): @@ -197,7 +204,7 @@ class PMLMLizer(object): if len(self.image_hrefs.keys()) == 0: self.image_hrefs[page.abshref(elem.attrib['src'])] = 'cover.png' else: - self.image_hrefs[page.abshref(elem.attrib['src'])] = image_name('%s' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00') + self.image_hrefs[page.abshref(elem.attrib['src'])] = image_name('%s.png' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00') text.append('\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])]) if tag == 'hr': w = '\\w' @@ -251,7 +258,7 @@ class PMLMLizer(object): # Proccess tags that contain text. if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '': - text.append(self.elem_text(elem, tag_stack)) + text.append(self.remove_newlines(elem.text)) for item in elem: text += self.dump_text(item, stylizer, page, tag_stack) @@ -261,32 +268,19 @@ class PMLMLizer(object): close_tag_list.insert(0, tag_stack.pop()) text += self.close_tags(close_tag_list) if tag in SEPARATE_TAGS: - text.append(os.linesep + os.linesep) + text.append('\n\n') if 'block' not in tag_stack: - text.append(os.linesep + os.linesep) + text.append('\n\n') #if style['page-break-after'] == 'always': # text.append('\\p') if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '': - text.append(self.elem_tail(elem, tag_stack)) + text.append(self.remove_newlines(elem.tail)) return text - def elem_text(self, elem, tag_stack): - return self.block_text(elem.text, 'block' in tag_stack) - - def elem_tail(self, elem, tag_stack): - return self.block_text(elem.tail, 'block' in tag_stack) - - def block_text(self, text, in_block): - if in_block: - text = text.replace('\n\r', ' ') - text = text.replace('\n', ' ') - text = text.replace('\r', ' ') - return text - def close_tags(self, tags): text = [u''] for i in range(0, len(tags)): From 20372ec444ef9a14963080041a09b3d8ef963528 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 24 Oct 2009 09:18:35 -0600 Subject: [PATCH 2/5] New recipe for Irish Independant by Neil Grogan --- resources/recipes/irish_independent.recipe | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 resources/recipes/irish_independent.recipe diff --git a/resources/recipes/irish_independent.recipe b/resources/recipes/irish_independent.recipe new file mode 100644 index 0000000000..268b25b381 --- /dev/null +++ b/resources/recipes/irish_independent.recipe @@ -0,0 +1,44 @@ +__license__ = 'GPL v3' +__copyright__ = '2009 Neil Grogan' +# +# Irish Independent Recipe +# + +from calibre.web.feeds.news import BasicNewsRecipe + +class IrishIndependent(BasicNewsRecipe): + title = u'Irish Independent' + description = 'Irish and World news from Irelands Bestselling Daily Broadsheet' + __author__ = 'Neil Grogan' + language = 'en_UK' + oldest_article = 7 + max_articles_per_feed = 100 + remove_tags_before = dict(id='article') + remove_tags_after = [dict(name='div', attrs={'class':'toolsBottom'})] + remove_tags = [ + dict(name='div',attrs={'class':'toolsBottom'}), + dict(name='div',attrs={'class':'toolsTop'}), + dict(name='div',attrs={'class':'boxRelated'}), + dict(name='div',attrs={'class':'section first'}), + dict(name='div',attrs={'class':'tabIt'}), + dict(name='div',attrs={'class':'inner'}) + ] + + feeds = [ + (u'Frontpage News', u'http://www.independent.ie/rss'), + (u'World News', u'http://www.independent.ie/world-news/rss'), + (u'Technology', u'http://www.independent.ie/business/technology/rss'), + (u'Sport', u'http://www.independent.ie/sport/rss'), + (u'Entertainment', u'http://www.independent.ie/entertainment/rss'), + (u'Independent Woman', u'http://www.independent.ie/lifestyle/independent-woman/rss'), + (u'Education',u'http://www.independent.ie/education/rss'), + (u'Lifestyle', u'http://www.independent.ie/lifestyle/rss'), + (u'Travel', u'http://www.independent.ie/travel/rss'), + (u'Letters', u'http://www.independent.ie/opinion/letters/rss'), + (u'Weather', u'http://www.independent.ie/weather/rss') + ] + +# If text only articles are desired +#def print_version(self, url): +# return '%s?service=Print' % url + From 614522802feac42cd23af2011fdde176cd81e90a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 24 Oct 2009 09:48:01 -0600 Subject: [PATCH 3/5] IGN:Correct mobi.writer merge --- src/calibre/ebooks/mobi/writer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index dc501ad0f6..69aecba212 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -247,11 +247,15 @@ class Serializer(object): if not item.linear: self.breaks.append(buffer.tell() - 1) self.id_offsets[item.href] = buffer.tell() + # Kindle periodical articles are contained in a
tag + buffer.write('
') for elem in item.data.find(XHTML('body')): - buffer.write('') self.serialize_elem(elem, item) + # Kindle periodical article end marker + buffer.write('
') if self.write_page_breaks_after_item: buffer.write('') + buffer.write('
') def serialize_elem(self, elem, item, nsrmap=NSRMAP): buffer = self.buffer From 72dd492d8dd862e36a94e9626d6d30df974e96dc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 24 Oct 2009 10:39:30 -0600 Subject: [PATCH 4/5] IGN:OSX Freeze: turn off argv emulation --- setup/installer/osx/freeze.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/installer/osx/freeze.py b/setup/installer/osx/freeze.py index 281432fcf3..f30a037703 100644 --- a/setup/installer/osx/freeze.py +++ b/setup/installer/osx/freeze.py @@ -386,7 +386,7 @@ def main(): { 'optimize' : 2, 'dist_dir' : 'build/py2app', - 'argv_emulation' : True, + 'argv_emulation' : False, 'iconfile' : icon, 'frameworks': ['libusb.dylib', 'libunrar.dylib'], 'includes' : ['sip', 'pkg_resources', 'PyQt4.QtXml', From 712e082dc442fb1ad365dc19fafb52fb524a288c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 24 Oct 2009 14:42:02 -0600 Subject: [PATCH 5/5] Fix #3856 (Error message when Kindle 2 is attached) --- src/calibre/gui2/device.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index ec1f8984e2..b03101b23e 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -680,11 +680,13 @@ class DeviceGUI(object): auto = [] if do_auto_convert and _auto_ids: for id in _auto_ids: - formats = [f.lower() for f in self.library_view.model().db.formats(id, index_is_id=True).split(',')] - formats = formats if formats != None else [] - if list(set(formats).intersection(available_input_formats())) != [] and list(set(self.device_manager.device_class.settings().format_map).intersection(available_output_formats())) != []: + dbfmts = self.library_view.model().db.formats(id, index_is_id=True) + formats = [] if dbfmts is None else \ + [f.lower() for f in dbfmts.split(',')] + if set(formats).intersection(available_input_formats()) \ + and set(self.device_manager.device_class.settings().format_map).intersection(available_output_formats()): auto.append(id) - if auto != []: + if auto: format = None for fmt in self.device_manager.device_class.settings().format_map: if fmt in list(set(self.device_manager.device_class.settings().format_map).intersection(set(available_output_formats()))):