Sync to pluginize

2025-07-09 03:04:10 -04:00 · 2009-04-16 18:27:04 -04:00 · 2009-04-16 18:27:04 -04:00 · e773e0e13e
commit e773e0e13e
parent 575b021f48 c822d25916
43 changed files with 1015 additions and 711 deletions
--- a/installer/windows/calibre/calibre.mpi
+++ b/installer/windows/calibre/calibre.mpi
@ -571,9 +571,6 @@ Condition 08195201-0797-932C-4B51-E5EF9D1D41BD -active Yes -parent 710F2507-2557
 Condition 2E18F4AE-F1BB-5C62-2900-73A576A49261 -active Yes -parent 710F2507-2557-652D-EA55-440D710EFDFA -title {String Is Condition} -component StringIsCondition -TreeObject::id 2E18F4AE-F1BB-5C62-2900-73A576A49261
 InstallComponent 21B897C4-24BE-70D1-58EA-DE78EFA60719 -setup Install -type action -conditions 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5 -title {Message Box} -component MessageBox -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708
 Condition 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5 -active Yes -parent 21B897C4-24BE-70D1-58EA-DE78EFA60719 -title {String Is Condition} -component StringIsCondition -TreeObject::id 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5
 InstallComponent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -setup Install -type action -conditions {E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C A8856922-E6C1-160B-E55C-5C1806A89136} -title {Launch Application Checkbutton} -component AddWidget -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708
 Condition E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C -active Yes -parent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -title {File Exists Condition} -component FileExistsCondition -TreeObject::id E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C
 Condition A8856922-E6C1-160B-E55C-5C1806A89136 -active Yes -parent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -title {String Is Condition} -component StringIsCondition -TreeObject::id A8856922-E6C1-160B-E55C-5C1806A89136
 InstallComponent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -setup Install -type action -conditions {96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0 FBA33088-C809-DD6B-D337-EADBF1CEE966} -title {Desktop Shortcut Checkbutton} -component AddWidget -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708
 Condition 96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0 -active Yes -parent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0
 Condition FBA33088-C809-DD6B-D337-EADBF1CEE966 -active Yes -parent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -title {String Is Condition} -component StringIsCondition -TreeObject::id FBA33088-C809-DD6B-D337-EADBF1CEE966
@ -630,7 +627,7 @@ Condition 03FA7EEF-F626-B69A-09C6-0AA7A54EE9E7 -active Yes -parent E32519F3-A540
 InstallComponent D86BBA5C-4903-33BA-59F8-4266A3D45896 -setup Install -type action -conditions {C4C0A903-CF2A-D25A-27AB-A64219FB7E70 5EC7056B-6F90-311E-2C6F-76E96164CFFD} -title {Install Quick Launch Shortcut} -component InstallWindowsShortcut -command insert -active Yes -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC
 Condition C4C0A903-CF2A-D25A-27AB-A64219FB7E70 -active Yes -parent D86BBA5C-4903-33BA-59F8-4266A3D45896 -title {String Is Condition} -component StringIsCondition -TreeObject::id C4C0A903-CF2A-D25A-27AB-A64219FB7E70
 Condition 5EC7056B-6F90-311E-2C6F-76E96164CFFD -active Yes -parent D86BBA5C-4903-33BA-59F8-4266A3D45896 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 5EC7056B-6F90-311E-2C6F-76E96164CFFD
-InstallComponent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -setup Install -type action -conditions {4E5FC4FE-5D37-B216-CFFE-E046A2D6321E E560F3A1-208D-2B4F-2C87-E08595F8E1CD 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475} -title {Launch Application} -component ExecuteExternalProgram -command insert -active Yes -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC
+InstallComponent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -setup Install -type action -conditions {4E5FC4FE-5D37-B216-CFFE-E046A2D6321E E560F3A1-208D-2B4F-2C87-E08595F8E1CD 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475} -title {Launch Application} -component ExecuteExternalProgram -command insert -active No -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC
 Condition 4E5FC4FE-5D37-B216-CFFE-E046A2D6321E -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {String Is Condition} -component StringIsCondition -TreeObject::id 4E5FC4FE-5D37-B216-CFFE-E046A2D6321E
 Condition E560F3A1-208D-2B4F-2C87-E08595F8E1CD -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {String Is Condition} -component StringIsCondition -TreeObject::id E560F3A1-208D-2B4F-2C87-E08595F8E1CD
 Condition 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475 -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475
@ -802,6 +799,9 @@ CreateQuickLaunchShortcut
 28FDA3F4-B799-901F-8A27-AA04F0C022AB,Title,subst
 1
 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2,Active
 No
 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2,Conditions
 {3 conditions}
@ -976,27 +976,6 @@ disabled
 5C66451D-6042-DBDE-0D8C-31156EE244AD,Widget
 {Back Button;Next Button}
 5D20DD8D-064A-9922-29E1-A7FABEF3666A,Background
 white
 5D20DD8D-064A-9922-29E1-A7FABEF3666A,Conditions
 {2 conditions}
 5D20DD8D-064A-9922-29E1-A7FABEF3666A,Text,subst
 1
 5D20DD8D-064A-9922-29E1-A7FABEF3666A,Type
 checkbutton
 5D20DD8D-064A-9922-29E1-A7FABEF3666A,VirtualText
 LaunchApplication
 5D20DD8D-064A-9922-29E1-A7FABEF3666A,X
 185
 5D20DD8D-064A-9922-29E1-A7FABEF3666A,Y
 130
 5EC7056B-6F90-311E-2C6F-76E96164CFFD,CheckCondition
 {Before Action is Executed}
@ -1408,15 +1387,6 @@ disabled
 A75C97CC-01AC-C12A-D663-A54E3257F11B,Widget
 {Back Button;Next Button}
 A8856922-E6C1-160B-E55C-5C1806A89136,CheckCondition
 {Before Action is Executed}
 A8856922-E6C1-160B-E55C-5C1806A89136,Operator
 false
 A8856922-E6C1-160B-E55C-5C1806A89136,String
 <%InstallStopped%>
 AAEC34E6-7F02-18F2-30BB-744738192A3B,Conditions
 {2 conditions}
@ -1730,12 +1700,6 @@ disabled
 E5CBB018-A89D-3145-CFF5-CFC3B62BEA97,Widget
 {NextButton; CancelButton}
 E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C,CheckCondition
 {Before Action is Executed}
 E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C,Filename
 <%ProgramExecutable%>
 E611105F-DC85-9E20-4F7B-E63C54E5DF06,Message,subst
 1
@ -2340,9 +2304,6 @@ Please make sure that calibre is not running, as this will cause the install to
 48E8A9D6-B57E-C506-680D-898C65DD2A1B,Title
 <%InstallApplicationText%>
 5D20DD8D-064A-9922-29E1-A7FABEF3666A,Text
 <%LaunchApplicationText%>
 64B8D0F3-4B11-DA22-D6E7-7248872D5FA7,Message
 <%UninstallStartupText%>
@ -2356,7 +2317,7 @@ Please make sure that calibre is not running, as this will cause the install to
 {<%AppName%> Installation complete}
 8A7FD0C2-F053-8764-F204-4BAE71E05708,Message
-{Installation of <%AppName%> was successful. Click Finish to quit the installer.}
+{Installation of <%AppName%> was successful. Click Finish to quit the installer. <%AppName%> can be launched from the start menu.}
 940F7FED-7D20-7264-3BF9-ED78205A76B3,Text
 <%CreateDesktopShortcutText%>
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.5.6'
+__version__   = '0.5.7'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 '''
 Various run time constants.
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -143,7 +143,7 @@ class OutputProfile(Plugin):
    # ADE dies an agonizing, long drawn out death if HTML files have more
    # bytes than this.
-    flow_size                 = sys.maxint
+    flow_size                 = -1
    # ADE runs screaming when it sees these characters
    remove_special_chars      = re.compile(u'[\u200b\u00ad]')
    # ADE falls to the ground in a dead faint when it sees an <object>
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -2,7 +2,7 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import os, shutil, traceback, functools, sys
+import os, shutil, traceback, functools, sys, re
 from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
                              MetadataWriterPlugin
@ -55,7 +55,14 @@ def load_plugin(path_to_zip_file):
    for name in zf.namelist():
        if name.lower().endswith('plugin.py'):
            locals = {}
-            exec zf.read(name) in locals
+            raw = zf.read(name)
            match = re.search(r'coding[:=]\s*([-\w.]+)', raw[:300])
            encoding = 'utf-8'
            if match is not None:
                encoding = match.group(1)
            raw = raw.decode(encoding)
            raw = re.sub('\r\n', '\n', raw)
            exec raw in locals
            for x in locals.values():
                if isinstance(x, type) and issubclass(x, Plugin):
                    if x.minimum_calibre_version > version or \
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -31,6 +31,11 @@ Run an embedded python interpreter.
    parser.add_option('--migrate', action='store_true', default=False,
                      help='Migrate old database. Needs two arguments. Path '
                           'to library1.db and path to new library folder.')
    parser.add_option('--add-simple-plugin', default=None,
            help='Add a simple plugin (i.e. a plugin that consists of only a '
            '.py file), by specifying the path to the py file containing the '
            'plugin code.')
    return parser
 def update_zipfile(zipfile, mod, path):
@ -115,6 +120,22 @@ def debug_device_driver():
            print 'Total space:', d.total_space()
            break
 def add_simple_plugin(path_to_plugin):
    import tempfile, zipfile, shutil
    tdir = tempfile.mkdtemp()
    open(os.path.join(tdir, 'custom_plugin.py'),
            'wb').write(open(path_to_plugin, 'rb').read())
    odir = os.getcwd()
    os.chdir(tdir)
    zf = zipfile.ZipFile('plugin.zip', 'w')
    zf.write('custom_plugin.py')
    zf.close()
    from calibre.customize.ui import main
    main(['calibre-customize', '-a', 'plugin.zip'])
    os.chdir(odir)
    shutil.rmtree(tdir)
 def main(args=sys.argv):
    opts, args = option_parser().parse_args(args)
@ -137,6 +158,8 @@ def main(args=sys.argv):
            print 'You must specify the path to library1.db and the path to the new library folder'
            return 1
        migrate(args[1], args[2])
    elif opts.add_simple_plugin is not None:
        add_simple_plugin(opts.add_simple_plugin)
    else:
        from IPython.Shell import IPShellEmbed
        ipshell = IPShellEmbed()
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -209,7 +209,7 @@ class Device(_Device):
        time.sleep(6)
        drives = {}
        wmi = __import__('wmi', globals(), locals(), [], -1)
-        c = wmi.WMI()
+        c = wmi.WMI(find_classes=False)
        for drive in c.Win32_DiskDrive():
            if self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_MAIN_MEM):
                drives['main'] = self.windows_get_drive_prefix(drive)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -94,7 +94,8 @@ OptionRecommendation(name='font_size_mapping',
 OptionRecommendation(name='line_height',
            recommended_value=None, level=OptionRecommendation.LOW,
            help=_('The line height in pts. Controls spacing between consecutive '
-                   'lines of text. By default ??'
+                   'lines of text. By default no line height manipulation is '
                   'performed.'
                   )
        ),
@ -102,12 +103,25 @@ OptionRecommendation(name='linearize_tables',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Some badly designed documents use tables to control the '
                'layout of text on the page. When converted these documents '
-                'often have text that runs of the page and other artifacts. '
+                'often have text that runs off the page and other artifacts. '
                'This option will extract the content from the tables and '
                'present it in a linear fashion.'
                )
        ),
 OptionRecommendation(name='dont_split_on_page_breaks',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Turn off splitting at page breaks. Normally, input '
                    'files are automatically split at every page break into '
                    'two files. This gives an output ebook that can be '
                    'parsed faster and with less resources. However, '
                    'splitting is slow and if your source file contains a '
                    'very large number of page breaks, you should turn off '
                    'splitting on page breaks.'
                )
        ),
 OptionRecommendation(name='read_metadata_from_opf',
            recommended_value=None, level=OptionRecommendation.LOW,
            short_switch='m',
@ -330,6 +344,17 @@ OptionRecommendation(name='language',
                untable=self.opts.linearize_tables)
        flattener(self.oeb, self.opts)
        if self.opts.linearize_tables:
            from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
            LinearizeTables()(self.oeb, self.opts)
        from calibre.ebooks.oeb.transforms.split import Split
        pbx = accelerators.get('pagebreaks', None)
        split = Split(not self.opts.dont_split_on_page_breaks,
                max_flow_size=self.opts.output_profile.flow_size,
                page_breaks_xpath=pbx)
        split(self.oeb, self.opts)
        from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
        self.log.info('Cleaning up manifest...')
--- a/src/calibre/ebooks/metadata/lit.py
+++ b/src/calibre/ebooks/metadata/lit.py
@ -25,7 +25,7 @@ def get_metadata(stream):
        for item in litfile.manifest.values():
            if item.path in candidates:
                try:
-                    covers.append((litfile.get_file('/data/'+item.internal), 
+                    covers.append((litfile.get_file('/data/'+item.internal),
                                   ctype))
                except:
                    pass
@ -33,7 +33,7 @@ def get_metadata(stream):
    covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True)
    idx = 0
    if len(covers) > 1:
-        if covers[1][1] == covers[1][0]+'-standard':
+        if covers[1][1] == covers[0][1]+'-standard':
            idx = 1
    mi.cover_data = ('jpg', covers[idx][0])
    return mi
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import os, re, collections
 from calibre.utils.config import prefs
- 
+
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata
@ -37,18 +37,18 @@ def metadata_from_formats(formats):
        mi2 = opf_metadata(opf)
        if mi2 is not None and mi2.title:
            return mi2
-    
+
    for path, ext in zip(formats, extensions):
        with open(path, 'rb') as stream:
            try:
-                newmi = get_metadata(stream, stream_type=ext, 
+                newmi = get_metadata(stream, stream_type=ext,
                                     use_libprs_metadata=True)
                mi.smart_update(newmi)
            except:
                continue
            if getattr(mi, 'application_id', None) is not None:
                return mi
-    
+
    if not mi.title:
        mi.title = _('Unknown')
    if not mi.authors:
@ -64,20 +64,20 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
        stream_type = 'mobi'
    if stream_type in ('odt', 'ods', 'odp', 'odg', 'odf'):
        stream_type = 'odt'
-        
+
    opf = None
    if hasattr(stream, 'name'):
        c = os.path.splitext(stream.name)[0]+'.opf'
        if os.access(c, os.R_OK):
            opf = opf_metadata(os.path.abspath(c))
-        
+
    if use_libprs_metadata and getattr(opf, 'application_id', None) is not None:
        return opf
-    
+
    mi = MetaInformation(None, None)
    if prefs['read_file_metadata']:
        mi = get_file_type_metadata(stream, stream_type)
-        
+
    name = os.path.basename(getattr(stream, 'name', ''))
    base = metadata_from_filename(name)
    if base.title == os.path.splitext(name)[0] and base.authors is None:
@ -98,17 +98,17 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
    base.smart_update(mi)
    if opf is not None:
        base.smart_update(opf)
-        
+
    return base
 def set_metadata(stream, mi, stream_type='lrf'):
    if stream_type:
        stream_type = stream_type.lower()
    set_file_type_metadata(stream, mi, stream_type)
-    
+
-    
+
 def metadata_from_filename(name, pat=None):
-    name = os.path.splitext(name)[0]
+    name = name.rpartition('.')[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get('filename_pattern'))
@ -161,7 +161,7 @@ def opf_metadata(opfpath):
            mi = MetaInformation(opf)
            if hasattr(opf, 'cover') and opf.cover:
                cpath = os.path.join(os.path.dirname(opfpath), opf.cover)
-                if os.access(cpath, os.R_OK):                     
+                if os.access(cpath, os.R_OK):
                    fmt = cpath.rpartition('.')[-1]
                    data = open(cpath, 'rb').read()
                    mi.cover_data = (fmt, data)
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@ -1,9 +1,8 @@
 '''Read meta information from PDF files'''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''
 import sys, os, StringIO
@ -31,7 +30,7 @@ def get_metadata(stream, extract_cover=True):
        except:
            import traceback
            traceback.print_exc()
-    
+
    try:
        info = PdfFileReader(stream).getDocumentInfo()
        if info.title:
@ -52,23 +51,18 @@ def get_metadata(stream, extract_cover=True):
 def set_metadata(stream, mi):
    stream.seek(0)
    # Use a StringIO object for the pdf because we will want to over
    # write it later and if we are working on the stream directly it
    # could cause some issues.
    raw = StringIO.StringIO(stream.read())
    orig_pdf = PdfFileReader(raw)
    title = mi.title if mi.title else orig_pdf.documentInfo.title
    author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
    out_pdf = PdfFileWriter(title=title, author=author)
    for page in orig_pdf.pages:
        out_pdf.addPage(page)
    out_str = StringIO.StringIO()
    out_pdf.write(out_str)
    stream.seek(0)
    stream.truncate()
    out_str.seek(0)
@ -77,31 +71,31 @@ def set_metadata(stream, mi):
 def get_cover(stream):
    data = StringIO.StringIO()
-    
+
    try:
        pdf = PdfFileReader(stream)
        output = PdfFileWriter()
-        
+
        if len(pdf.pages) >= 1:
            output.addPage(pdf.getPage(0))
-            
+
        with TemporaryDirectory('_pdfmeta') as tdir:
            cover_path = os.path.join(tdir, 'cover.pdf')
-        
+
            outputStream = file(cover_path, "wb")
            output.write(outputStream)
            outputStream.close()
-            
+
            wand = NewMagickWand()
            MagickReadImage(wand, cover_path)
            MagickSetImageFormat(wand, 'JPEG')
            MagickWriteImage(wand, '%s.jpg' % cover_path)
-            
+
            img = Image.open('%s.jpg' % cover_path)
-            
+
            img.save(data, 'JPEG')
    except:
        import traceback
        traceback.print_exc()
-        
+
    return data.getvalue()
--- a/src/calibre/ebooks/mobi/input.py
+++ b/src/calibre/ebooks/mobi/input.py
@ -29,5 +29,5 @@ class MOBIInput(InputFormatPlugin):
            with open(f, 'wb') as q:
                q.write(html.tostring(root, encoding='utf-8', method='xml',
                    include_meta_content_type=False))
-            accelerators['pagebreaks'] = {f: '//*[@class="mbp_pagebreak"]'}
+                accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]'
        return mr.created_opf_path
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -160,35 +160,31 @@ class BookHeader(object):
 class MetadataHeader(BookHeader):
    def __init__(self, stream, log):
        self.stream = stream
        self.ident = self.identity()
        self.num_sections = self.section_count()
        if self.num_sections >= 2:
            header = self.header()
            BookHeader.__init__(self, header, self.ident, None, log)
        else:
            self.exth = None
-            
+
    def identity(self):
        self.stream.seek(60)
        ident = self.stream.read(8).upper()
        if ident not in ['BOOKMOBI', 'TEXTREAD']:
            raise MobiError('Unknown book type: %s' % ident)
        return ident
-            
+
    def section_count(self):
        self.stream.seek(76)
        return struct.unpack('>H', self.stream.read(2))[0]
-            
+
    def section_offset(self, number):
        self.stream.seek(78+number*8)
        return struct.unpack('>LBBBB', self.stream.read(8))[0]
-        
+
    def header(self):
        section_headers = []
        # First section with the metadata
        section_headers.append(self.section_offset(0))
        # Second section used to get the lengh of the first
@ -196,20 +192,16 @@ class MetadataHeader(BookHeader):
        end_off = section_headers[1]
        off = section_headers[0]
        self.stream.seek(off)
        return self.stream.read(end_off - off)
    def section_data(self, number):
        start = self.section_offset(number)
        if number == self.num_sections -1:
            end = os.stat(self.stream.name).st_size
        else:
            end = self.section_offset(number + 1)
        self.stream.seek(start)
        return self.stream.read(end - start)
@ -651,7 +643,7 @@ class MobiReader(object):
 def get_metadata(stream):
    from calibre.utils.logging import Log
    log = Log()
-    
+
    mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    try:
        mh = MetadataHeader(stream, log)
@ -666,7 +658,6 @@ def get_metadata(stream):
                mr.extract_content(tdir, parse_cache)
                if mr.embedded_mi is not None:
                    mi = mr.embedded_mi
        if hasattr(mh.exth, 'cover_offset'):
            cover_index = mh.first_image_index + mh.exth.cover_offset
            data  = mh.section_data(int(cover_index))
@ -679,5 +670,4 @@ def get_metadata(stream):
        mi.cover_data = ('jpg', obuf.getvalue())
    except:
        log.exception()
    return mi
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -218,7 +218,7 @@ class Serializer(object):
        for elem in item.data.find(XHTML('body')):
            self.serialize_elem(elem, item)
        #buffer.write('</mbp:section>')
-        buffer.write('</mbp:pagebreak>')
+        buffer.write('<mbp:pagebreak/>')
    def serialize_elem(self, elem, item, nsrmap=NSRMAP):
        buffer = self.buffer
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -272,11 +272,26 @@ def XPath(expr):
 def xpath(elem, expr):
    return elem.xpath(expr, namespaces=XPNSMAP)
-def xml2str(root, pretty_print=False):
+def _prepare_xml_for_serialization(root):
-    return etree.tostring(root, encoding='utf-8', xml_declaration=True,
+    root.set('xmlns', XHTML_NS)
    root.set('{%s}xlink'%XHTML_NS, XLINK_NS)
    for x in root.iter():
        if hasattr(x.tag, 'rpartition') and x.tag.rpartition('}')[-1].lower() == 'svg':
            x.set('xmlns', SVG_NS)
 def xml2str(root, pretty_print=False, strip_comments=False):
    _prepare_xml_for_serialization(root)
    ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
                          pretty_print=pretty_print)
    if strip_comments:
        ans = re.compile(r'<!--.*?-->', re.DOTALL).sub('', ans)
    return ans
 def xml2unicode(root, pretty_print=False):
    _prepare_xml_for_serialization(root)
    return etree.tostring(root, pretty_print=pretty_print)
 ASCII_CHARS   = set(chr(x) for x in xrange(128))
@ -826,6 +841,11 @@ class Manifest(object):
                return xml2str(data, pretty_print=self.oeb.pretty_print)
            if isinstance(data, unicode):
                return data.encode('utf-8')
            if hasattr(data, 'cssText'):
                data = data.cssText
                if isinstance(data, unicode):
                    data = data.encode('utf-8')
                return data
            return str(data)
        def __unicode__(self):
@ -834,6 +854,8 @@ class Manifest(object):
                return xml2unicode(data, pretty_print=self.oeb.pretty_print)
            if isinstance(data, unicode):
                return data
            if hasattr(data, 'cssText'):
                return data.cssText
            return unicode(data)
        def __eq__(self, other):
@ -1044,6 +1066,12 @@ class Spine(object):
            self.items[i].spine_position = i
        item.spine_position = None
    def index(self, item):
        for i, x in enumerate(self):
            if item == x:
                return i
        return -1
    def __iter__(self):
        for item in self.items:
            yield item
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -163,7 +163,6 @@ class EbookIterator(object):
            s.pages = p
        start = 1
        for s in self.spine:
            s.start_page = start
            start += s.pages
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -22,7 +22,6 @@ class OEBOutput(OutputFormatPlugin):
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        from calibre.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME
        from calibre.ebooks.html import tostring as html_tostring
        with CurrentDir(output_path):
            results = oeb_book.to_opf2(page_map=True)
            for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
@ -38,16 +37,7 @@ class OEBOutput(OutputFormatPlugin):
                dir = os.path.dirname(path)
                if not os.path.exists(dir):
                    os.makedirs(dir)
                raw = item.data
                if not isinstance(raw, basestring):
                    if hasattr(raw, 'cssText'):
                        raw = raw.cssText
                    else:
                        raw = html_tostring(raw,
                                pretty_print=opts.pretty_print)
                if isinstance(raw, unicode):
                    raw = raw.encode('utf-8')
                with open(path, 'wb') as f:
-                    f.write(raw)
+                    f.write(str(item))
--- a/src/calibre/ebooks/oeb/transforms/linearize_tables.py
+++ b/src/calibre/ebooks/oeb/transforms/linearize_tables.py
@ -0,0 +1,21 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.oeb.base import OEB_DOCS, XPNSMAP
 class LinearizeTables(object):
    def linearize(self, root):
        for x in root.xpath('//h:table|//h:td|//h:tr|//h:th',
                namespaces=XPNSMAP):
            x.tag = 'div'
    def __call__(self, oeb, context):
        for x in oeb.manifest.items:
            if x.media_type in OEB_DOCS:
                self.linearize(x.data)
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -4,21 +4,25 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
-Split the flows in an epub file to conform to size limitations.
+Splitting of the XHTML flows. Splitting can happen on page boundaries or can be
 forces at "likely" locations to conform to size limitations. This transform
 assumes a prior call to the flatcss transform.
 '''
-import os, math, functools, collections, re, copy, sys
+import os, math, functools, collections, re, copy
 from lxml.etree import XPath as _XPath
 from lxml import etree, html
 from lxml.cssselect import CSSSelector
-from calibre.ebooks.metadata.opf2 import OPF
+from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \
                rewrite_links
 from calibre.ebooks.epub import tostring, rules
 from calibre import CurrentDir
-XPath = functools.partial(_XPath, namespaces={'re':'http://exslt.org/regular-expressions'})
+NAMESPACES = dict(XPNSMAP)
-content = functools.partial(os.path.join, 'content')
+NAMESPACES['re'] = 'http://exslt.org/regular-expressions'
 XPath = functools.partial(_XPath, namespaces=NAMESPACES)
 SPLIT_ATTR       = 'cs'
 SPLIT_POINT_ATTR = 'csp'
@ -27,149 +31,166 @@ class SplitError(ValueError):
    def __init__(self, path, root):
        size = len(tostring(root))/1024.
-        ValueError.__init__(self, _('Could not find reasonable point at which to split: %s Sub-tree size: %d KB')%
+        ValueError.__init__(self,
-                            (os.path.basename(path), size))
+            _('Could not find reasonable point at which to split: '
                '%s Sub-tree size: %d KB')%
                            (path, size))
 class Split(object):
    def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None,
            max_flow_size=0):
        self.split_on_page_breaks = split_on_page_breaks
        self.page_breaks_xpath = page_breaks_xpath
        self.max_flow_size = max_flow_size
        if self.page_breaks_xpath is not None:
            self.page_breaks_xpath = XPath(self.page_breaks_xpath)
    def __call__(self, oeb, context):
        self.oeb = oeb
        self.log = oeb.log
        self.map = {}
        self.page_break_selectors = None
        for item in self.oeb.manifest.items:
            if etree.iselement(item.data):
                self.split_item(item)
        self.fix_links()
    def split_item(self, item):
        if self.split_on_page_breaks:
            if self.page_breaks_xpath is None:
                page_breaks, page_break_ids = self.find_page_breaks(item)
            else:
                page_breaks, page_break_ids = self.page_breaks_xpath(item.data)
        splitter = FlowSplitter(item, page_breaks, page_break_ids,
                self.max_flow_size, self.oeb)
        if splitter.was_split:
            self.map[item.href] = dict(splitter.anchor_map)
    def find_page_breaks(self, item):
        if self.page_break_selectors is None:
            self.page_break_selectors = set([])
            stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
                    OEB_STYLES]
        page_break_selectors = set([])
        for rule in rules(stylesheets):
            before = getattr(rule.style.getPropertyCSSValue(
                'page-break-before'), 'cssText', '').strip().lower()
            after  = getattr(rule.style.getPropertyCSSValue(
                'page-break-after'), 'cssText', '').strip().lower()
            try:
                if before and before != 'avoid':
                    page_break_selectors.add((CSSSelector(rule.selectorText),
                        True))
            except:
                pass
            try:
                if after and after != 'avoid':
                    page_break_selectors.add((CSSSelector(rule.selectorText),
                        False))
            except:
                pass
        page_breaks = set([])
        for selector, before in page_break_selectors:
            for elem in selector(item.data):
                elem.pb_before = before
                page_breaks.add(elem)
        for i, elem in enumerate(item.data.iter()):
            elem.pb_order = i
        page_breaks = list(page_breaks)
        page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
        page_break_ids, page_breaks_ = [], []
        for i, x in enumerate(page_breaks):
            x.set('id', x.get('id', 'calibre_pb_%d'%i))
            id = x.get('id')
            page_breaks_.append((XPath('//*[@id="%s"]'%id), x.pb_before))
            page_break_ids.append(id)
        return page_breaks_, page_break_ids
    def fix_links(self, opf):
        '''
        Fix references to the split files in other content files.
        '''
        for item in self.oeb.manifest:
            if etree.iselement(item.data):
                self.current_item = item
                rewrite_links(item.data, self.rewrite_links)
    def rewrite_links(self, url):
        href, frag = urldefrag(url)
        href = self.current_item.abshref(href)
        if href in self.map:
            anchor_map = self.map[href]
            nhref = anchor_map[frag if frag else None]
            if frag:
                nhref = '#'.joinn(href, frag)
            return nhref
        return url
-class Splitter(object):
+class FlowSplitter(object):
-    def __init__(self, path, opts, stylesheet_map, opf):
+    def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb):
-        self.setup_cli_handler(opts.verbose)
+        self.item           = item
-        self.path = path
+        self.oeb            = oeb
-        self.always_remove = not opts.preserve_tag_structure or \
+        self.log            = oeb.log
-                    os.stat(content(path)).st_size > 5*opts.profile.flow_size
+        self.page_breaks    = page_breaks
-        self.base = (os.path.splitext(path)[0].replace('%', '%%') + '_split_%d.html')
+        self.page_break_ids = page_break_ids
-        self.opts = opts
+        self.max_flow_size  = max_flow_size
-        self.orig_size = os.stat(content(path)).st_size
+        self.base           = item.abshref(item.href)
        self.log_info('\tSplitting %s (%d KB)', path, self.orig_size/1024.)
        root = html.fromstring(open(content(path)).read())
-        self.page_breaks, self.trees = [], []
+        base, ext = os.path.splitext(self.base)
-        self.split_size = 0
+        self.base = base.replace('%', '%%')+'_split_%d'+ext
-        # Split on page breaks
+        self.trees = [self.item.data]
        self.splitting_on_page_breaks = True
-        if not opts.dont_split_on_page_breaks:
+        if self.page_breaks:
-            self.log_info('\tSplitting on page breaks...')
+            self.split_on_page_breaks(self.item.data)
            if self.path in stylesheet_map:
                self.find_page_breaks(stylesheet_map[self.path], root)
            self.split_on_page_breaks(root.getroottree())
            trees = list(self.trees)
        else:
            self.trees = [root.getroottree()]
            trees = list(self.trees)
        # Split any remaining over-sized trees
        self.splitting_on_page_breaks = False
-        if self.opts.profile.flow_size < sys.maxint:
+
        if self.max_flow_size > 0:
            lt_found = False
-            self.log_info('\tLooking for large trees...')
+            self.log('\tLooking for large trees...')
-            for i, tree in enumerate(list(trees)):
+            trees = list(self.trees)
            for i, tree in enumerate(list(self.trees)):
                self.trees = []
                size = len(tostring(tree.getroot()))
                if size > self.opts.profile.flow_size:
                    lt_found = True
-                    try:
+                    self.split_to_size(tree)
                        self.split_to_size(tree)
                    except (SplitError, RuntimeError): # Splitting fails
                        if not self.always_remove:
                            self.always_remove = True
                            self.split_to_size(tree)
                        else:
                            raise
                    trees[i:i+1] = list(self.trees)
            if not lt_found:
                self.log_info('\tNo large trees found')
            self.trees = trees
        self.trees = trees
        self.was_split = len(self.trees) > 1
-        if self.was_split:
+        self.commit()
            self.commit()
            self.log_info('\t\tSplit into %d parts.', len(self.trees))
            if self.opts.verbose:
                for f in self.files:
                    self.log_info('\t\t\t%s - %d KB', f, os.stat(content(f)).st_size/1024.)
            self.fix_opf(opf)
-        self.trees = None
+    def split_on_page_breaks(self, orig_tree):
        ordered_ids = []
        for elem in orig_tree.xpath('//*[@id]'):
            id = elem.get('id')
            if id in self.page_break_ids:
                ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
-
+        self.trees = []
-    def split_text(self, text, root, size):
+        tree = orig_tree
-        self.log_debug('\t\t\tSplitting text of length: %d'%len(text))
+        for pattern, before in ordered_ids:
-        rest = text.replace('\r', '')
+            self.log.debug('\t\tSplitting on page-break')
-        parts = re.split('\n\n', rest)
+            elem = pattern(tree)
-        self.log_debug('\t\t\t\tFound %d parts'%len(parts))
+            if elem:
-        if max(map(len, parts)) > size:
+                before, after = self.do_split(tree, elem[0], before)
-            raise SplitError('Cannot split as file contains a <pre> tag with a very large paragraph', root)
+                self.trees.append(before)
-        ans = []
+                tree = after
-        buf = ''
+        self.trees.append(tree)
-        for part in parts:
+        self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
            if len(buf) + len(part) < size:
                buf += '\n\n'+part
            else:
                ans.append(buf)
                buf = part
        return ans
    def split_to_size(self, tree):
        self.log_debug('\t\tSplitting...')
        root = tree.getroot()
        # Split large <pre> tags
        for pre in list(root.xpath('//pre')):
            text = u''.join(pre.xpath('descendant::text()'))
            pre.text = text
            for child in list(pre.iterchildren()):
                pre.remove(child)
            if len(pre.text) > self.opts.profile.flow_size*0.5:
                frags = self.split_text(pre.text, root, int(0.2*self.opts.profile.flow_size))
                new_pres = []
                for frag in frags:
                    pre2 = copy.copy(pre)
                    pre2.text = frag
                    pre2.tail = u''
                    new_pres.append(pre2)
                new_pres[-1].tail = pre.tail
                p = pre.getparent()
                i = p.index(pre)
                p[i:i+1] = new_pres
        split_point, before = self.find_split_point(root)
        if split_point is None or self.split_size > 6*self.orig_size:
            if not self.always_remove:
                self.log_warn(_('\t\tToo much markup. Re-splitting without '
                                'structure preservation. This may cause '
                                'incorrect rendering.'))
            raise SplitError(self.path, root)
        for t in self.do_split(tree, split_point, before):
            r = t.getroot()
            if self.is_page_empty(r):
                continue
            size = len(tostring(r))
            if size <= self.opts.profile.flow_size:
                self.trees.append(t)
                #print tostring(t.getroot(), pretty_print=True)
                self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)',
                               len(self.trees), size/1024.)
                self.split_size += size
            else:
                self.split_to_size(t)
    def is_page_empty(self, root):
        body = root.find('body')
        if body is None:
            return False
        txt = re.sub(r'\s+', '', html.tostring(body, method='text', encoding=unicode))
        if len(txt) > 4:
            #if len(txt) < 100:
            #    print 1111111, html.tostring(body, method='html', encoding=unicode)
            return False
        for img in root.xpath('//img'):
            if img.get('style', '') != 'display:none':
                return False
        return True
    def do_split(self, tree, split_point, before):
        '''
@ -190,7 +211,7 @@ class Splitter(object):
        split_point2 = root2.xpath(path)[0]
        def nix_element(elem, top=True):
-            if self.always_remove:
+            if True:
                parent = elem.getparent()
                index = parent.index(elem)
                if top:
@ -198,7 +219,6 @@ class Splitter(object):
                else:
                    index = parent.index(elem)
                    parent[index:index+1] = list(elem.iterchildren())
            else:
                elem.text = u''
                elem.tail = u''
@ -241,67 +261,76 @@ class Splitter(object):
        return tree, tree2
    def is_page_empty(self, root):
        body = root.find('body')
        if body is None:
            return False
        txt = re.sub(r'\s+', '', html.tostring(body, method='text', encoding=unicode))
        if len(txt) > 4:
            return False
        for img in root.xpath('//img'):
            if img.get('style', '') != 'display:none':
                return False
        return True
-    def split_on_page_breaks(self, orig_tree):
+    def split_text(self, text, root, size):
-        ordered_ids = []
+        self.log.debug('\t\t\tSplitting text of length: %d'%len(text))
-        for elem in orig_tree.xpath('//*[@id]'):
+        rest = text.replace('\r', '')
-            id = elem.get('id')
+        parts = re.split('\n\n', rest)
-            if id in self.page_break_ids:
+        self.log.debug('\t\t\t\tFound %d parts'%len(parts))
-                ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
+        if max(map(len, parts)) > size:
-
+            raise SplitError('Cannot split as file contains a <pre> tag '
-        self.trees = []
+                'with a very large paragraph', root)
-        tree = orig_tree
+        ans = []
-        for pattern, before in ordered_ids:
+        buf = ''
-            self.log_info('\t\tSplitting on page-break')
+        for part in parts:
-            elem = pattern(tree)
+            if len(buf) + len(part) < size:
-            if elem:
+                buf += '\n\n'+part
-                before, after = self.do_split(tree, elem[0], before)
+            else:
-                self.trees.append(before)
+                ans.append(buf)
-                tree = after
+                buf = part
-        self.trees.append(tree)
+        return ans
        self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
    def split_to_size(self, tree):
        self.log.debug('\t\tSplitting...')
        root = tree.getroot()
        # Split large <pre> tags
        for pre in list(root.xpath('//pre')):
            text = u''.join(pre.xpath('descendant::text()'))
            pre.text = text
            for child in list(pre.iterchildren()):
                pre.remove(child)
            if len(pre.text) > self.max_flow_size*0.5:
                frags = self.split_text(pre.text, root, int(0.2*self.max_flow_size))
                new_pres = []
                for frag in frags:
                    pre2 = copy.copy(pre)
                    pre2.text = frag
                    pre2.tail = u''
                    new_pres.append(pre2)
                new_pres[-1].tail = pre.tail
                p = pre.getparent()
                i = p.index(pre)
                p[i:i+1] = new_pres
-    def find_page_breaks(self, stylesheets, root):
+        split_point, before = self.find_split_point(root)
-        '''
+        if split_point is None:
-        Find all elements that have either page-break-before or page-break-after set.
+            raise SplitError(self.item.href, root)
        Populates `self.page_breaks` with id based XPath selectors (for elements that don't
        have ids, an id is created).
        '''
        page_break_selectors = set([])
        for rule in rules(stylesheets):
            before = getattr(rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower()
            after  = getattr(rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower()
            try:
                if before and before != 'avoid':
                    page_break_selectors.add((CSSSelector(rule.selectorText), True))
            except:
                pass
            try:
                if after and after != 'avoid':
                    page_break_selectors.add((CSSSelector(rule.selectorText), False))
            except:
                pass
        page_breaks = set([])
        for selector, before in page_break_selectors:
            for elem in selector(root):
                elem.pb_before = before
                page_breaks.add(elem)
        for i, elem in enumerate(root.iter()):
            elem.pb_order = i
        page_breaks = list(page_breaks)
        page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
        self.page_break_ids = []
        for i, x in enumerate(page_breaks):
            x.set('id', x.get('id', 'calibre_pb_%d'%i))
            id = x.get('id')
            self.page_breaks.append((XPath('//*[@id="%s"]'%id), x.pb_before))
            self.page_break_ids.append(id)
        for t in self.do_split(tree, split_point, before):
            r = t.getroot()
            if self.is_page_empty(r):
                continue
            size = len(tostring(r))
            if size <= self.max_flow_size:
                self.trees.append(t)
                #print tostring(t.getroot(), pretty_print=True)
                self.log.debug('\t\t\tCommitted sub-tree #%d (%d KB)',
                               len(self.trees), size/1024.)
                self.split_size += size
            else:
                self.split_to_size(t)
    def find_split_point(self, root):
        '''
@ -336,8 +365,7 @@ class Splitter(object):
                     '//br',
                     '//li',
                     ):
-            elems = root.xpath(path,
+            elems = root.xpath(path, namespaces=NAMESPACES)
                    namespaces={'re':'http://exslt.org/regular-expressions'})
            elem = pick_elem(elems)
            if elem is not None:
                try:
@ -355,6 +383,8 @@ class Splitter(object):
        all anchors in the original tree. Internal links are re-directed. The
        original file is deleted and the split files are saved.
        '''
        if not self.was_split:
            return
        self.anchor_map = collections.defaultdict(lambda :self.base%0)
        self.files = []
@ -368,134 +398,46 @@ class Splitter(object):
                elem.attrib.pop(SPLIT_ATTR, None)
                elem.attrib.pop(SPLIT_POINT_ATTR, '0')
-        for current, tree in zip(self.files, self.trees):
+        spine_pos = self.item.spine_pos
-            for a in tree.getroot().xpath('//a[@href]'):
+        for current, tree in zip(map(reversed, (self.files, self.trees))):
            for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
                href = a.get('href').strip()
                if href.startswith('#'):
                    anchor = href[1:]
                    file = self.anchor_map[anchor]
                    if file != current:
                        a.set('href', file+href)
            open(content(current), 'wb').\
                write(tostring(tree.getroot(), pretty_print=self.opts.pretty_print))
-        os.remove(content(self.path))
+            new_id = self.oeb.manifest.generate(id=self.item.id)[0]
            new_item = self.oeb.manifest.add(new_id, current,
                    self.item.media_type, data=tree.getroot())
            self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
        if self.oeb.guide:
            for ref in self.oeb.guide:
                href, frag = urldefrag(ref.href)
                if href == self.item.href:
                    nhref = self.anchor_map[frag if frag else None]
                    if frag:
                        nhref = '#'.join(nhref, frag)
                    ref.href = nhref
        def fix_toc_entry(toc):
            if toc.href:
                href, frag = urldefrag(toc.href)
                if href == self.item.href:
                    nhref = self.anchor_map[frag if frag else None]
                    if frag:
                        nhref = '#'.join(nhref, frag)
                    toc.href = nhref
            for x in toc:
                fix_toc_entry(x)
-    def fix_opf(self, opf):
+        if self.oeb.toc:
-        '''
+            fix_toc_entry(self.oeb.toc)
        Fix references to the split file in the OPF.
        '''
        items = [item for item in opf.itermanifest() if item.get('href') == 'content/'+self.path]
        new_items = [('content/'+f, None) for f in self.files]
        id_map = {}
        for item in items:
            id_map[item.get('id')] = opf.replace_manifest_item(item, new_items)
-        for id in id_map.keys():
+        self.oeb.manifest.remove(self.item)
            opf.replace_spine_items_by_idref(id, id_map[id])
        for ref in opf.iterguide():
            href = ref.get('href', '')
            if href.startswith('content/'+self.path):
                href = href.split('#')
                frag = None
                if len(href) > 1:
                    frag = href[1]
                if frag not in self.anchor_map:
                    self.log_warning('\t\tUnable to re-map OPF link', href)
                    continue
                new_file = self.anchor_map[frag]
                ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
 def fix_content_links(html_files, changes, opts):
    split_files = [f.path for f in changes]
    anchor_maps = [f.anchor_map for f in changes]
    files = list(html_files)
    for j, f in enumerate(split_files):
        try:
            i = files.index(f)
            files[i:i+1] = changes[j].files
        except ValueError:
            continue
    for htmlfile in files:
        changed = False
        root = html.fromstring(open(content(htmlfile), 'rb').read())
        for a in root.xpath('//a[@href]'):
            href = a.get('href')
            if not href.startswith('#'):
                href = href.split('#')
                anchor = href[1] if len(href) > 1 else None
                href = href[0]
                if href in split_files:
                    try:
                        newf = anchor_maps[split_files.index(href)][anchor]
                    except:
                        print '\t\tUnable to remap HTML link:', href, anchor
                        continue
                    frag = ('#'+anchor) if anchor else ''
                    a.set('href', newf+frag)
                    changed = True
        if changed:
            open(content(htmlfile), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
 def fix_ncx(path, changes):
    split_files = [f.path for f in changes]
    anchor_maps = [f.anchor_map for f in changes]
    tree = etree.parse(path)
    changed = False
    for content in tree.getroot().xpath('//x:content[@src]',
                    namespaces={'x':"http://www.daisy.org/z3986/2005/ncx/"}):
        href = content.get('src')
        if not href.startswith('#'):
            href = href.split('#')
            anchor = href[1] if len(href) > 1 else None
            href = href[0].split('/')[-1]
            if href in split_files:
                try:
                    newf = anchor_maps[split_files.index(href)][anchor]
                except:
                    print 'Unable to remap NCX link:', href, anchor
                frag = ('#'+anchor) if anchor else ''
                content.set('src', 'content/'+newf+frag)
                changed = True
    if changed:
        open(path, 'wb').write(etree.tostring(tree.getroot(), encoding='UTF-8', xml_declaration=True))
 def find_html_files(opf):
    '''
    Find all HTML files referenced by `opf`.
    '''
    html_files = []
    for item in opf.itermanifest():
        if 'html' in item.get('media-type', '').lower():
            f = item.get('href').split('/')[-1]
            f2 = f.replace('&', '%26')
            if not os.path.exists(content(f)) and os.path.exists(content(f2)):
                f = f2
                item.set('href', item.get('href').replace('&', '%26'))
            if os.path.exists(content(f)):
                html_files.append(f)
    return html_files
 def split(pathtoopf, opts, stylesheet_map):
    pathtoopf = os.path.abspath(pathtoopf)
    opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
    with CurrentDir(os.path.dirname(pathtoopf)):
        html_files = find_html_files(opf)
        changes = [Splitter(f, opts, stylesheet_map, opf) for f in html_files]
        changes = [c for c in changes if c.was_split]
        fix_content_links(html_files, changes, opts)
        for item in opf.itermanifest():
            if item.get('media-type', '') == 'application/x-dtbncx+xml':
                fix_ncx(item.get('href'), changes)
                break
        open(pathtoopf, 'wb').write(opf.render())
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -67,6 +67,10 @@ def _config():
    c.add_opt('default_send_to_device_action', default=None,
            help=_('Default action to perform when send to device button is '
                'clicked'))
    c.add_opt('show_donate_button', default=True,
            help='Show donation button')
    c.add_opt('asked_library_thing_password', default=False,
            help='Asked library thing password at least once.')
    return ConfigProxy(c)
 config = _config()
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -12,6 +12,7 @@ from PyQt4.Qt import QMenu, QAction, QActionGroup, QIcon, SIGNAL, QPixmap, \
 from calibre.customize.ui import available_input_formats, available_output_formats
 from calibre.devices import devices
 from calibre.constants import iswindows
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
 from calibre.parallel import Job
 from calibre.devices.scanner import DeviceScanner
@ -71,7 +72,14 @@ class DeviceManager(Thread):
            if connected and not device[1]:
                try:
                    dev = device[0]()
-                    dev.open()
+                    if iswindows:
                        import pythoncom
                        pythoncom.CoInitialize()
                    try:
                        dev.open()
                    finally:
                        if iswindows:
                            pythoncom.CoUninitialize()
                    self.device       = dev
                    self.device_class = dev.__class__
                    self.connected_slot(True)
@ -670,7 +678,9 @@ class DeviceGUI(object):
            bad = '\n'.join('<li>%s</li>'%(i,) for i in bad)
            d = warning_dialog(self, _('No suitable formats'),
                    _('Could not upload the following books to the device, '
-                'as no suitable formats were found:<br><ul>%s</ul>')%(bad,))
+                'as no suitable formats were found. Try changing the output '
                'format in the upper right corner next to the red heart and '
                're-converting. <br><ul>%s</ul>')%(bad,))
            d.exec_()
    def upload_booklists(self):
--- a/src/calibre/gui2/dialogs/epub.py
+++ b/src/calibre/gui2/dialogs/epub.py
@ -176,19 +176,19 @@ class Config(ResizableDialog, Ui_Dialog):
    def get_metadata(self):
        title, authors = self.get_title_and_authors()
        mi = MetaInformation(title, authors)
-        publisher = unicode(self.publisher.text())
+        publisher = unicode(self.publisher.text()).strip()
        if publisher:
            mi.publisher = publisher
-        author_sort = unicode(self.author_sort.text())
+        author_sort = unicode(self.author_sort.text()).strip()
        if author_sort:
            mi.author_sort = author_sort
-        comments = unicode(self.comment.toPlainText())
+        comments = unicode(self.comment.toPlainText()).strip()
        if comments:
            mi.comments = comments
        mi.series_index = int(self.series_index.value())
        if self.series.currentIndex() > -1:
-            mi.series = unicode(self.series.currentText())
+            mi.series = unicode(self.series.currentText()).strip()
-        tags = [t.strip() for t in unicode(self.tags.text()).split(',')]
+        tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')]
        if tags:
            mi.tags = tags
@ -267,6 +267,7 @@ class Config(ResizableDialog, Ui_Dialog):
                                 ).exec_()
                    return
        mi = self.get_metadata()
        self.user_mi = mi
        self.read_settings()
        self.cover_file = None
        if self.row is not None:
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -25,24 +25,48 @@ from calibre import islinux
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.utils.config import prefs
 from calibre.customize.ui import run_plugins_on_import
 from calibre.gui2 import config as gui_conf
 class CoverFetcher(QThread):
-    def __init__(self, username, password, isbn, timeout):
+    def __init__(self, username, password, isbn, timeout, title, author):
-        self.username = username
+        self.username = username.strip() if username else username
-        self.password = password
+        self.password = password.strip() if password else password
        self.timeout = timeout
        self.isbn = isbn
        self.title = title
        self.needs_isbn = False
        self.author = author
        QThread.__init__(self)
        self.exception = self.traceback = self.cover_data = None
    def run(self):
        try:
-            login(self.username, self.password, force=False)
+            if not self.isbn:
                from calibre.ebooks.metadata.fetch import search
                if not self.title:
                    self.needs_isbn = True
                    return
                au = self.author if self.author else None
                key = prefs['isbndb_com_key']
                if not key:
                    key = None
                results = search(title=self.title, author=au,
                        isbndb_key=key)[0]
                results = sorted([x.isbn for x in results if x.isbn],
                        cmp=lambda x,y:cmp(len(x),len(y)), reverse=True)
                if not results:
                    self.needs_isbn = True
                    return
                self.isbn = results[0]
            if self.username and self.password:
                login(self.username, self.password, force=False)
            self.cover_data = cover_from_isbn(self.isbn, timeout=self.timeout)[0]
        except Exception, e:
            self.exception = e
            self.traceback = traceback.format_exc()
            print self.traceback
@ -64,6 +88,8 @@ class AuthorCompleter(QCompleter):
 class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
    COVER_FETCH_TIMEOUT = 240 # seconds
    def do_reset_cover(self, *args):
        pix = QPixmap(':/images/book.svg')
        self.cover.setPixmap(pix)
@ -345,36 +371,39 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
    def lt_password_dialog(self):
        return PasswordDialog(self, 'LibraryThing account',
-                 _('<p>Enter your username and password for <b>LibraryThing.com</b>. <br/>If you do not have one, you can <a href=\'http://www.librarything.com\'>register</a> for free!.</p>'))
+                 _('<p>Enter your username and password for '
                   '<b>LibraryThing.com</b>. This is <b>optional</b>. It will '
                   'make fetching of covers faster and more reliable.<br/>If '
                   'you do not have an account, you can '
                   '<a href=\'http://www.librarything.com\'>register</a> for '
                   'free.</p>'))
    def change_password(self):
        d = self.lt_password_dialog()
        d.exec_()
    def fetch_cover(self):
-        isbn   = qstring_to_unicode(self.isbn.text())
+        isbn   = unicode(self.isbn.text()).strip()
-        if isbn:
+        d = self.lt_password_dialog()
-            d = self.lt_password_dialog()
+        if not gui_conf['asked_library_thing_password'] and \
-            if not d.username() or not d.password():
+                (not d.username() or not d.password()):
-                d.exec_()
+            d.exec_()
-                if d.result() != PasswordDialog.Accepted:
+            gui_conf['asked_library_thing_password'] = True
-                    return
+        self.fetch_cover_button.setEnabled(False)
-            self.fetch_cover_button.setEnabled(False)
+        self.setCursor(Qt.WaitCursor)
-            self.setCursor(Qt.WaitCursor)
+        title, author = map(unicode, (self.title.text(), self.authors.text()))
-            self.cover_fetcher = CoverFetcher(d.username(), d.password(), isbn,
+        self.cover_fetcher = CoverFetcher(d.username(), d.password(), isbn,
-                                              self.timeout)
+                                            self.timeout, title, author)
-            self.cover_fetcher.start()
+        self.cover_fetcher.start()
-            self._hangcheck = QTimer(self)
+        self._hangcheck = QTimer(self)
-            self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck)
+        self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck)
-            self.cf_start_time = time.time()
+        self.cf_start_time = time.time()
-            self.pi.start(_('Downloading cover...'))
+        self.pi.start(_('Downloading cover...'))
-            self._hangcheck.start(100)
+        self._hangcheck.start(100)
        else:
            error_dialog(self, _('Cannot fetch cover'),
            _('You must specify the ISBN identifier for this book.')).exec_()
    def hangcheck(self):
-        if not (self.cover_fetcher.isFinished() or time.time()-self.cf_start_time > 150):
+        if not self.cover_fetcher.isFinished() and \
            time.time()-self.cf_start_time < self.COVER_FETCH_TIMEOUT:
            return
        self._hangcheck.stop()
@ -385,6 +414,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                    _('<b>Could not fetch cover.</b><br/>')+
                    _('The download timed out.')).exec_()
                return
            if self.cover_fetcher.needs_isbn:
                error_dialog(self, _('Cannot fetch cover'),
                    _('Could not find cover for this book. Try '
                      'specifying the ISBN first.')).exec_()
                return
            if self.cover_fetcher.exception is not None:
                err = self.cover_fetcher.exception
                error_dialog(self, _('Cannot fetch cover'),
--- a/src/calibre/gui2/dialogs/password.ui
+++ b/src/calibre/gui2/dialogs/password.ui
@ -1,7 +1,8 @@
-<ui version="4.0" >
+<?xml version="1.0" encoding="UTF-8"?>
 <ui version="4.0">
 <class>Dialog</class>
- <widget class="QDialog" name="Dialog" >
+ <widget class="QDialog" name="Dialog">
-  <property name="geometry" >
+  <property name="geometry">
   <rect>
    <x>0</x>
    <y>0</y>
@ -9,66 +10,70 @@
    <height>209</height>
   </rect>
  </property>
-  <property name="windowTitle" >
+  <property name="windowTitle">
   <string>Password needed</string>
  </property>
-  <property name="windowIcon" >
+  <property name="windowIcon">
-   <iconset resource="../images.qrc" >:/images/mimetypes/unknown.svg</iconset>
+   <iconset resource="../images.qrc">
    <normaloff>:/images/mimetypes/unknown.svg</normaloff>:/images/mimetypes/unknown.svg</iconset>
  </property>
-  <layout class="QGridLayout" >
+  <layout class="QGridLayout">
-   <item row="0" column="1" >
+   <item row="0" column="1">
-    <widget class="QLabel" name="msg" >
+    <widget class="QLabel" name="msg">
-     <property name="text" >
+     <property name="text">
      <string>TextLabel</string>
     </property>
-     <property name="openExternalLinks" >
+     <property name="wordWrap">
      <bool>true</bool>
     </property>
     <property name="openExternalLinks">
      <bool>true</bool>
     </property>
    </widget>
   </item>
-   <item row="1" column="0" >
+   <item row="1" column="0">
-    <widget class="QLabel" name="label" >
+    <widget class="QLabel" name="label">
-     <property name="text" >
+     <property name="text">
      <string>&amp;Username:</string>
     </property>
-     <property name="buddy" >
+     <property name="buddy">
      <cstring>gui_username</cstring>
     </property>
    </widget>
   </item>
-   <item row="1" column="1" >
+   <item row="1" column="1">
-    <widget class="QLineEdit" name="gui_username" />
+    <widget class="QLineEdit" name="gui_username"/>
   </item>
-   <item row="2" column="0" >
+   <item row="2" column="0">
-    <widget class="QLabel" name="label_2" >
+    <widget class="QLabel" name="label_2">
-     <property name="text" >
+     <property name="text">
      <string>&amp;Password:</string>
     </property>
-     <property name="buddy" >
+     <property name="buddy">
      <cstring>gui_password</cstring>
     </property>
    </widget>
   </item>
-   <item row="2" column="1" >
+   <item row="2" column="1">
-    <widget class="QLineEdit" name="gui_password" >
+    <widget class="QLineEdit" name="gui_password">
-     <property name="echoMode" >
+     <property name="echoMode">
      <enum>QLineEdit::Password</enum>
     </property>
    </widget>
   </item>
-   <item row="4" column="1" >
+   <item row="4" column="1">
-    <widget class="QDialogButtonBox" name="buttonBox" >
+    <widget class="QDialogButtonBox" name="buttonBox">
-     <property name="orientation" >
+     <property name="orientation">
      <enum>Qt::Horizontal</enum>
     </property>
-     <property name="standardButtons" >
+     <property name="standardButtons">
-      <set>QDialogButtonBox::Cancel|QDialogButtonBox::NoButton|QDialogButtonBox::Ok</set>
+      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
     </property>
    </widget>
   </item>
-   <item row="3" column="1" >
+   <item row="3" column="1">
-    <widget class="QCheckBox" name="show_password" >
+    <widget class="QCheckBox" name="show_password">
-     <property name="text" >
+     <property name="text">
      <string>&amp;Show password</string>
     </property>
    </widget>
@ -76,7 +81,7 @@
  </layout>
 </widget>
 <resources>
-  <include location="../images.qrc" />
+  <include location="../images.qrc"/>
 </resources>
 <connections>
  <connection>
@ -85,11 +90,11 @@
   <receiver>Dialog</receiver>
   <slot>accept()</slot>
   <hints>
-    <hint type="sourcelabel" >
+    <hint type="sourcelabel">
     <x>248</x>
     <y>254</y>
    </hint>
-    <hint type="destinationlabel" >
+    <hint type="destinationlabel">
     <x>157</x>
     <y>274</y>
    </hint>
@ -101,11 +106,11 @@
   <receiver>Dialog</receiver>
   <slot>reject()</slot>
   <hints>
-    <hint type="sourcelabel" >
+    <hint type="sourcelabel">
     <x>316</x>
     <y>260</y>
    </hint>
-    <hint type="destinationlabel" >
+    <hint type="destinationlabel">
     <x>286</x>
     <y>274</y>
    </hint>
--- a/src/calibre/gui2/images/news/hna.png
+++ b/src/calibre/gui2/images/news/hna.png
--- a/src/calibre/gui2/images/news/nzz_ger.png
+++ b/src/calibre/gui2/images/news/nzz_ger.png
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -108,6 +108,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        self.donate_action  = self.system_tray_menu.addAction(
                QIcon(':/images/donate.svg'), _('&Donate to support calibre'))
        self.donate_button.setDefaultAction(self.donate_action)
        if not config['show_donate_button']:
            self.donate_button.setVisible(False)
        self.addAction(self.quit_action)
        self.action_restart = QAction(_('&Restart'), self)
        self.addAction(self.action_restart)
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -25,7 +25,7 @@ from calibre.ebooks.lrf.comic.convert_from import config as comicconfig
 # Ordered list of source formats. Items closer to the beginning are
 # preferred for conversion over those toward the end.
-PREFERRED_SOURCE_FORMATS = ['epub', 'lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf', 
+PREFERRED_SOURCE_FORMATS = ['epub', 'lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
                  'txt', 'pdf', 'oebzip', 'htm', 'html']
 def get_dialog(fmt):
@ -43,20 +43,20 @@ def get_config(fmt):
 def auto_convert(fmt, parent, db, rows):
    changed = False
    jobs = []
-    
+
    total = len(rows)
    if total == 0:
        return None, None, None
    parent.status_bar.showMessage(_('Starting auto conversion of %d books')%total, 2000)
-    
+
    i = 0
    bad_rows = []
-    
+
    for i, row in enumerate(rows):
        row_id = db.id(row)
-        
+
        temp_files = []
-        
+
        data = None
        in_formats = [f.lower() for f in db.formats(row).split(',')]
        in_formats = list(set(in_formats).intersection(available_input_formats()))
@ -88,10 +88,10 @@ def auto_convert(fmt, parent, db, rows):
        for row in bad_rows:
            title = db.title(row)
            res.append('<li>%s</li>'%title)
-        
+
        msg = _('<p>Could not convert %d of %d books, because no suitable source format was found.<ul>%s</ul>')%(len(res), total, '\n'.join(res))
        warning_dialog(parent, _('Could not convert some books'), msg).exec_()
-        
+
    return jobs, changed, bad_rows
 def convert_single(fmt, parent, db, comics, others):
@ -120,10 +120,10 @@ def convert_single(fmt, parent, db, comics, others):
                    temp_files.append(d.cover_file)
                    opts.cover = d.cover_file.name
                temp_files.extend([d.opf_file, pt, of])
-                jobs.append(('any2'+fmt, args, _('Convert book: ')+d.mi.title, 
+                jobs.append(('any2'+fmt, args, _('Convert book: ')+d.mi.title,
                             fmt.upper(), row_id, temp_files))
                changed = True
-                
+
    for row, row_id in zip(comics, comics_ids):
        mi = db.get_metadata(row)
        title = author = _('Unknown')
@ -140,7 +140,7 @@ def convert_single(fmt, parent, db, comics, others):
            try:
                data = db.format(row, _fmt.upper())
                if data is not None:
-                    break                    
+                    break
            except:
                continue
        pt = PersistentTemporaryFile('.'+_fmt)
@ -152,12 +152,12 @@ def convert_single(fmt, parent, db, comics, others):
        opts.verbose = 2
        args = [pt.name, opts]
        changed = True
-        jobs.append(('comic2'+fmt, args, _('Convert comic: ')+opts.title, 
+        jobs.append(('comic2'+fmt, args, _('Convert comic: ')+opts.title,
                     fmt.upper(), row_id, [pt, of]))
-        
+
    return jobs, changed
-    
+
-    
+
 def convert_single_lrf(parent, db, comics, others):
    changed = False
@ -182,10 +182,10 @@ def convert_single_lrf(parent, db, comics, others):
                if d.cover_file:
                    temp_files.append(d.cover_file)
                temp_files.extend([pt, of])
-                jobs.append(('any2lrf', [cmdline], _('Convert book: ')+d.title(), 
+                jobs.append(('any2lrf', [cmdline], _('Convert book: ')+d.title(),
                             'LRF', row_id, temp_files))
                changed = True
-                
+
    for row, row_id in zip(comics, comics_ids):
        mi = db.get_metadata(row)
        title = author = _('Unknown')
@ -202,7 +202,7 @@ def convert_single_lrf(parent, db, comics, others):
            try:
                data = db.format(row, fmt.upper())
                if data is not None:
-                    break                    
+                    break
            except:
                continue
        if data is None:
@ -216,19 +216,20 @@ def convert_single_lrf(parent, db, comics, others):
        opts.verbose = 1
        args = [pt.name, opts]
        changed = True
-        jobs.append(('comic2lrf', args, _('Convert comic: ')+opts.title, 
+        jobs.append(('comic2lrf', args, _('Convert comic: ')+opts.title,
                     'LRF', row_id, [pt, of]))
-        
+
    return jobs, changed
 def convert_bulk(fmt, parent, db, comics, others):
    if others:
        d = get_dialog(fmt)(parent, db)
        if d.exec_() != QDialog.Accepted:
-            others = []
+            others, user_mi = [], None
        else:
            opts = d.opts
            opts.verbose = 2
            user_mi = d.user_mi
    if comics:
        comic_opts = ComicConf.get_bulk_conversion_options(parent)
        if not comic_opts:
@ -239,7 +240,7 @@ def convert_bulk(fmt, parent, db, comics, others):
    if total == 0:
        return
    parent.status_bar.showMessage(_('Starting Bulk conversion of %d books')%total, 2000)
-    
+
    for i, row in enumerate(others+comics):
        row_id = db.id(row)
        if row in others:
@ -256,6 +257,11 @@ def convert_bulk(fmt, parent, db, comics, others):
                continue
            options = opts.copy()
            mi = db.get_metadata(row)
            if user_mi is not None:
                if user_mi.series_index == 1:
                    user_mi.series_index = None
                mi.smart_update(user_mi)
            db.set_metadata(db.id(row), mi)
            opf = OPFCreator(os.getcwdu(), mi)
            opf_file = PersistentTemporaryFile('.opf')
            opf.render(opf_file)
@ -291,10 +297,10 @@ def convert_bulk(fmt, parent, db, comics, others):
                try:
                    data = db.format(row, _fmt.upper())
                    if data is not None:
-                        break                    
+                        break
                except:
                    continue
-            
+
            pt = PersistentTemporaryFile('.'+_fmt.lower())
            pt.write(data)
            pt.close()
@ -304,17 +310,17 @@ def convert_bulk(fmt, parent, db, comics, others):
            options.verbose = 1
            args = [pt.name, options]
            desc = _('Convert book %d of %d (%s)')%(i+1, total, repr(mi.title))
-            jobs.append(('comic2'+fmt, args, desc, fmt.upper(), row_id, [pt, of]))        
+            jobs.append(('comic2'+fmt, args, desc, fmt.upper(), row_id, [pt, of]))
-        
+
    if bad_rows:
        res = []
        for row in bad_rows:
            title = db.title(row)
            res.append('<li>%s</li>'%title)
-        
+
        msg = _('<p>Could not convert %d of %d books, because no suitable source format was found.<ul>%s</ul>')%(len(res), total, '\n'.join(res))
        warning_dialog(parent, _('Could not convert some books'), msg).exec_()
-        
+
    return jobs, False
@ -333,7 +339,7 @@ def convert_bulk_lrf(parent, db, comics, others):
    if total == 0:
        return
    parent.status_bar.showMessage(_('Starting Bulk conversion of %d books')%total, 2000)
-    
+
    for i, row in enumerate(others+comics):
        row_id = db.id(row)
        if row in others:
@ -388,10 +394,10 @@ def convert_bulk_lrf(parent, db, comics, others):
                try:
                    data = db.format(row, fmt.upper())
                    if data is not None:
-                        break                    
+                        break
                except:
                    continue
-            
+
            pt = PersistentTemporaryFile('.'+fmt.lower())
            pt.write(data)
            pt.close()
@ -401,17 +407,17 @@ def convert_bulk_lrf(parent, db, comics, others):
            options.verbose = 1
            args = [pt.name, options]
            desc = _('Convert book %d of %d (%s)')%(i+1, total, repr(mi.title))
-            jobs.append(('comic2lrf', args, desc, 'LRF', row_id, [pt, of]))        
+            jobs.append(('comic2lrf', args, desc, 'LRF', row_id, [pt, of]))
-        
+
    if bad_rows:
        res = []
        for row in bad_rows:
            title = db.title(row)
            res.append('<li>%s</li>'%title)
-        
+
        msg = _('<p>Could not convert %d of %d books, because no suitable source format was found.<ul>%s</ul>')%(len(res), total, '\n'.join(res))
        warning_dialog(parent, _('Could not convert some books'), msg).exec_()
-        
+
    return jobs, False
 def set_conversion_defaults_lrf(comic, parent, db):
@ -438,7 +444,7 @@ def _fetch_news(data, fmt):
        args.extend(['--password', data['password']])
    args.append(data['script'] if data['script'] else data['title'])
    return 'feeds2'+fmt.lower(), [args], _('Fetch news from ')+data['title'], fmt.upper(), [pt]
-    
+
 def fetch_scheduled_recipe(recipe, script):
    from calibre.gui2.dialogs.scheduler import config
@ -453,7 +459,7 @@ def fetch_scheduled_recipe(recipe, script):
        args.extend(['--username', x[0], '--password', x[1]])
    args.append(script)
    return 'feeds2'+fmt, [args], _('Fetch news from ')+recipe.title, fmt.upper(), [pt]
-            
+
 def auto_convert_ebook(*args):
    return auto_convert(*args)
@ -463,14 +469,14 @@ def convert_single_ebook(*args):
        return convert_single_lrf(*args)
    elif fmt in ('epub', 'mobi'):
        return convert_single(fmt, *args)
-    
+
 def convert_bulk_ebooks(*args):
    fmt = prefs['output_format'].lower()
    if fmt == 'lrf':
        return convert_bulk_lrf(*args)
    elif fmt in ('epub', 'mobi'):
        return convert_bulk(fmt, *args)
-    
+
 def set_conversion_defaults(comic, parent, db):
    fmt = prefs['output_format'].lower()
    if fmt == 'lrf':
--- a/src/calibre/library/init.py
+++ b/src/calibre/library/init.py
@ -7,19 +7,19 @@ from calibre.utils.config import Config, StringConfig
 def server_config(defaults=None):
    desc=_('Settings to control the calibre content server')
    c = Config('server', desc) if defaults is None else StringConfig(defaults, desc)
-    
+
-    c.add_opt('port', ['-p', '--port'], default=8080, 
+    c.add_opt('port', ['-p', '--port'], default=8080,
              help=_('The port on which to listen. Default is %default'))
-    c.add_opt('timeout', ['-t', '--timeout'], default=120, 
+    c.add_opt('timeout', ['-t', '--timeout'], default=120,
              help=_('The server timeout in seconds. Default is %default'))
-    c.add_opt('thread_pool', ['--thread-pool'], default=30, 
+    c.add_opt('thread_pool', ['--thread-pool'], default=30,
              help=_('The max number of worker threads to use. Default is %default'))
-    c.add_opt('password', ['--password'], default=None, 
+    c.add_opt('password', ['--password'], default=None,
              help=_('Set a password to restrict access. By default access is unrestricted.'))
    c.add_opt('username', ['--username'], default='calibre',
              help=_('Username for access. By default, it is: %default'))
    c.add_opt('develop', ['--develop'], default=False,
              help='Development mode. Server automatically restarts on file changes and serves code files (html, css, js) from the file system instead of calibre\'s resource system.')
-    c.add_opt('max_cover', ['--max-cover'], default='600x800', 
+    c.add_opt('max_cover', ['--max-cover'], default='600x800',
              help=_('The maximum size for displayed covers. Default is %default.'))
    return c
--- a/src/calibre/library/server.py
+++ b/src/calibre/library/server.py
@ -30,31 +30,31 @@ build_time = datetime.strptime(build_time, '%d %m %Y %H%M%S')
 server_resources['jquery.js'] = jquery
 def expose(func):
-    
+
    def do(self, *args, **kwargs):
        dict.update(cherrypy.response.headers, {'Server':self.server_name})
        return func(self, *args, **kwargs)
-    
+
    return cherrypy.expose(do)
 log_access_file = os.path.join(config_dir, 'server_access_log.txt')
 log_error_file = os.path.join(config_dir, 'server_error_log.txt')
-    
+
 class LibraryServer(object):
-    
+
    server_name = __appname__ + '/' + __version__
    BOOK = textwrap.dedent('''\
-        <book xmlns:py="http://genshi.edgewall.org/" 
+        <book xmlns:py="http://genshi.edgewall.org/"
-            id="${r[0]}" 
+            id="${r[0]}"
            title="${r[1]}"
            sort="${r[11]}"
            author_sort="${r[12]}"
-            authors="${authors}" 
+            authors="${authors}"
            rating="${r[4]}"
-            timestamp="${r[5].strftime('%Y/%m/%d %H:%M:%S')}" 
+            timestamp="${r[5].strftime('%Y/%m/%d %H:%M:%S')}"
-            size="${r[6]}" 
+            size="${r[6]}"
            isbn="${r[14] if r[14] else ''}"
            formats="${r[13] if r[13] else ''}"
            series = "${r[9] if r[9] else ''}"
@ -63,7 +63,7 @@ class LibraryServer(object):
            publisher="${r[3] if r[3] else ''}">${r[8] if r[8] else ''}
            </book>
        ''')
-    
+
    LIBRARY = MarkupTemplate(textwrap.dedent('''\
    <?xml version="1.0" encoding="utf-8"?>
    <library xmlns:py="http://genshi.edgewall.org/" start="$start" num="${len(books)}" total="$total" updated="${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')}">
@ -72,7 +72,7 @@ class LibraryServer(object):
    </py:for>
    </library>
    '''))
-    
+
    STANZA_ENTRY=MarkupTemplate(textwrap.dedent('''\
    <entry xmlns:py="http://genshi.edgewall.org/">
        <title>${record[FM['title']]}</title>
@ -87,7 +87,7 @@ class LibraryServer(object):
        </content>
    </entry>
    '''))
-    
+
    STANZA = MarkupTemplate(textwrap.dedent('''\
    <?xml version="1.0" encoding="utf-8"?>
    <feed xmlns="http://www.w3.org/2005/Atom" xmlns:py="http://genshi.edgewall.org/">
@ -107,7 +107,7 @@ class LibraryServer(object):
    </feed>
    '''))
-    
+
    def __init__(self, db, opts, embedded=False, show_tracebacks=True):
        self.db = db
        for item in self.db:
@ -116,7 +116,7 @@ class LibraryServer(object):
        self.opts = opts
        self.max_cover_width, self.max_cover_height = \
                        map(int, self.opts.max_cover.split('x'))
-        
+
        cherrypy.config.update({
                                'log.screen'             : opts.develop,
                                'engine.autoreload_on'   : opts.develop,
@ -141,10 +141,10 @@ class LibraryServer(object):
                      'tools.digest_auth.realm' : (_('Password to access your calibre library. Username is ') + opts.username.strip()).encode('ascii', 'replace'),
                      'tools.digest_auth.users' : {opts.username.strip():opts.password.strip()},
                      }
-            
+
        self.is_running = False
        self.exception = None
-        
+
    def setup_loggers(self):
        access_file = log_access_file
        error_file  = log_error_file
@ -152,20 +152,20 @@ class LibraryServer(object):
        maxBytes = getattr(log, "rot_maxBytes", 10000000)
        backupCount = getattr(log, "rot_backupCount", 1000)
-        
+
        # Make a new RotatingFileHandler for the error log.
        h = RotatingFileHandler(error_file, 'a', maxBytes, backupCount)
        h.setLevel(logging.DEBUG)
        h.setFormatter(cherrypy._cplogging.logfmt)
        log.error_log.addHandler(h)
-        
+
        # Make a new RotatingFileHandler for the access log.
        h = RotatingFileHandler(access_file, 'a', maxBytes, backupCount)
        h.setLevel(logging.DEBUG)
        h.setFormatter(cherrypy._cplogging.logfmt)
        log.access_log.addHandler(h)
-    
+
    def start(self):
        self.is_running = False
        self.setup_loggers()
@ -173,7 +173,7 @@ class LibraryServer(object):
        try:
            cherrypy.engine.start()
            self.is_running = True
-            publish_zeroconf('Books in calibre', '_stanza._tcp', 
+            publish_zeroconf('Books in calibre', '_stanza._tcp',
                             self.opts.port, {'path':'/stanza'})
            cherrypy.engine.block()
        except Exception, e:
@ -181,10 +181,10 @@ class LibraryServer(object):
        finally:
            self.is_running = False
            stop_zeroconf()
-        
+
    def exit(self):
        cherrypy.engine.exit()
-    
+
    def get_cover(self, id, thumbnail=False):
        cover = self.db.cover(id, index_is_id=True, as_file=False)
        if cover is None:
@ -196,14 +196,14 @@ class LibraryServer(object):
        try:
            if QApplication.instance() is None:
                QApplication([])
-            
+
            im = QImage()
            im.loadFromData(cover)
            if im.isNull():
                raise cherrypy.HTTPError(404, 'No valid cover found')
            width, height = im.width(), im.height()
-            scaled, width, height = fit_image(width, height, 
+            scaled, width, height = fit_image(width, height,
-                60 if thumbnail else self.max_cover_width, 
+                60 if thumbnail else self.max_cover_width,
                80 if thumbnail else self.max_cover_height)
            if not scaled:
                return cover
@ -217,7 +217,7 @@ class LibraryServer(object):
            import traceback
            traceback.print_exc()
            raise cherrypy.HTTPError(404, 'Failed to generate cover: %s'%err)
-        
+
    def get_format(self, id, format):
        format = format.upper()
        fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb')
@ -232,7 +232,7 @@ class LibraryServer(object):
            updated = datetime.utcfromtimestamp(os.stat(path).st_mtime)
            cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
        return fmt.read()
-    
+
    def sort(self, items, field, order):
        field = field.lower().strip()
        if field == 'author':
@ -243,10 +243,23 @@ class LibraryServer(object):
            raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field)
        cmpf = cmp if field in ('rating', 'size', 'timestamp') else \
                lambda x, y: cmp(x.lower() if x else '', y.lower() if y else '')
-        field = FIELD_MAP[field]
+        if field == 'series':
-        getter = operator.itemgetter(field)
+            items.sort(cmp=self.seriescmp, reverse=not order)
-        items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order)
+        else:
-    
+            field = FIELD_MAP[field]
            getter = operator.itemgetter(field)
            items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order)
    def seriescmp(self, x, y):
        si = FIELD_MAP['series']
        try:
            ans = cmp(x[si].lower(), y[si].lower())
        except AttributeError: # Some entries may be None
            ans = cmp(x[si], y[si])
        if ans != 0: return ans
        return cmp(x[FIELD_MAP['series_index']], y[FIELD_MAP['series_index']])
    def last_modified(self, updated):
        lm = updated.strftime('day, %d month %Y %H:%M:%S GMT')
        day ={0:'Sun', 1:'Mon', 2:'Tue', 3:'Wed', 4:'Thu', 5:'Fri', 6:'Sat'}
@ -254,8 +267,8 @@ class LibraryServer(object):
        month = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul',
                 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
        return lm.replace('month', month[updated.month])
-        
+
-        
+
    @expose
    def stanza(self):
        ' Feeds to read calibre books on a ipod with stanza.'
@ -264,7 +277,7 @@ class LibraryServer(object):
            r = record[FIELD_MAP['formats']]
            r = r.upper() if r else ''
            if 'EPUB' in r or 'PDB' in r:
-                authors = ' & '.join([i.replace('|', ',') for i in 
+                authors = ' & '.join([i.replace('|', ',') for i in
                                      record[FIELD_MAP['authors']].split(',')])
                extra = []
                rating = record[FIELD_MAP['rating']]
@ -276,7 +289,7 @@ class LibraryServer(object):
                    extra.append('TAGS: %s<br />'%', '.join(tags.split(',')))
                series = record[FIELD_MAP['series']]
                if series:
-                    extra.append('SERIES: %s [%d]<br />'%(series, 
+                    extra.append('SERIES: %s [%d]<br />'%(series,
                                            record[FIELD_MAP['series_index']]))
                fmt = 'epub' if 'EPUB' in r else 'pdb'
                mimetype = guess_type('dummy.'+fmt)[0]
@ -288,24 +301,24 @@ class LibraryServer(object):
                                                mimetype=mimetype,
                                                fmt=fmt,
                                                ).render('xml').decode('utf8'))
-        
+
        updated = self.db.last_modified()
        cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
        cherrypy.response.headers['Content-Type'] = 'text/xml'
-        
+
        return self.STANZA.generate(subtitle='', data=books, FM=FIELD_MAP,
                    updated=updated, id='urn:calibre:main').render('xml')
-    
+
    @expose
-    def library(self, start='0', num='50', sort=None, search=None, 
+    def library(self, start='0', num='50', sort=None, search=None,
                _=None, order='ascending'):
        '''
        Serves metadata from the calibre database as XML.
-        
+
        :param sort: Sort results by ``sort``. Can be one of `title,author,rating`.
        :param search: Filter results by ``search`` query. See :class:`SearchQueryParser` for query syntax
        :param start,num: Return the slice `[start:start+num]` of the sorted and filtered results
-        :param _: Firefox seems to sometimes send this when using XMLHttpRequest with no caching 
+        :param _: Firefox seems to sometimes send this when using XMLHttpRequest with no caching
        '''
        try:
            start = int(start)
@ -321,19 +334,19 @@ class LibraryServer(object):
        items = [r for r in iter(self.db) if r[0] in ids]
        if sort is not None:
            self.sort(items, sort, order)
-        
+
        book, books = MarkupTemplate(self.BOOK), []
        for record in items[start:start+num]:
            aus = record[2] if record[2] else _('Unknown')
            authors = '|'.join([i.replace('|', ',') for i in aus.split(',')])
            books.append(book.generate(r=record, authors=authors).render('xml').decode('utf-8'))
        updated = self.db.last_modified()
-        
+
        cherrypy.response.headers['Content-Type'] = 'text/xml'
        cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
-        return self.LIBRARY.generate(books=books, start=start, updated=updated, 
+        return self.LIBRARY.generate(books=books, start=start, updated=updated,
                                     total=len(ids)).render('xml')
-    
+
    @expose
    def index(self, **kwargs):
        'The / URL'
@ -341,8 +354,8 @@ class LibraryServer(object):
        if stanza == 919:
            return self.static('index.html')
        return self.stanza()
-        
+
-    
+
    @expose
    def get(self, what, id):
        'Serves files, covers, thumbnails from the calibre database'
@ -361,7 +374,7 @@ class LibraryServer(object):
        if what == 'cover':
            return self.get_cover(id)
        return self.get_format(id, what)
-    
+
    @expose
    def static(self, name):
        'Serves static content'
@ -392,11 +405,11 @@ def start_threaded_server(db, opts):
    server.thread.setDaemon(True)
    server.thread.start()
    return server
-    
+
 def stop_threaded_server(server):
    server.exit()
    server.thread = None
-    
+
 def option_parser():
    return config().option_parser('%prog '+ _('[options]\n\nStart the calibre content server.'))
--- a/src/calibre/trac/plugins/download.py
+++ b/src/calibre/trac/plugins/download.py
@ -69,6 +69,7 @@ else:
    DOWNLOAD_DIR = '/var/www/calibre.kovidgoyal.net/htdocs/downloads'
    MOBILEREAD = 'https://dev.mobileread.com/dist/kovid/calibre/'
    #MOBILEREAD = 'http://calibre.kovidgoyal.net/downloads/'
    class OS(dict):
        """Dictionary with a default value for unknown keys."""
@ -197,6 +198,8 @@ else:
    import sys, os, shutil, tarfile, subprocess, tempfile, urllib2, re, stat
    MOBILEREAD='https://dev.mobileread.com/dist/kovid/calibre/'
    #MOBILEREAD='http://calibre.kovidgoyal.net/downloads/'
    class TerminalController:
        BOL = ''             #: Move the cursor to the beginning of the line
--- a/src/calibre/trac/plugins/htdocs/images/binary_logo.png
+++ b/src/calibre/trac/plugins/htdocs/images/binary_logo.png
--- a/src/calibre/trac/plugins/htdocs/images/foresight_logo.png
+++ b/src/calibre/trac/plugins/htdocs/images/foresight_logo.png
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -128,6 +128,12 @@ class BasicNewsRecipe(object):
    #:
    extra_css              = None
    #: If True empty feeds are removed from the output.
    #: This option has no effect if parse_index is overriden in
    #: the sub class. It is meant only for recipes that return a list
    #: of feeds using :member:`feeds` or :method:`get_feeds`.
    remove_empty_feeds = False
    #: List of regular expressions that determines which links to follow
    #: If empty, it is ignored. For example::
    #:
@ -985,6 +991,11 @@ class BasicNewsRecipe(object):
                self.log.exception(msg)
        remove = [f for f in parsed_feeds if len(f) == 0 and
                self.remove_empty_feeds]
        for f in remove:
            parsed_feeds.remove(f)
        return parsed_feeds
    @classmethod
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -39,6 +39,7 @@ recipe_modules = ['recipe_' + r for r in (
           'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs',
           'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
           'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
           'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna',
          )]
 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_der_standard.py
+++ b/src/calibre/web/feeds/recipes/recipe_der_standard.py
@ -0,0 +1,42 @@
 ''' http://www.derstandard.at - Austrian Newspaper '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class DerStandardRecipe(BasicNewsRecipe):
    title          = u'derStandard'
    __author__  = 'Gerhard Aigner'
    oldest_article = 1
    max_articles_per_feed = 100
    feeds          = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'),
        (u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'),
        (u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'),
        (u'Web', u'http://derstandard.at/?page=rss&ressort=webstandard'),
        (u'Sport', u'http://derstandard.at/?page=rss&ressort=sport'),
        (u'Panorama', u'http://derstandard.at/?page=rss&ressort=panorama'),
        (u'Etat', u'http://derstandard.at/?page=rss&ressort=etat'),
        (u'Kultur', u'http://derstandard.at/?page=rss&ressort=kultur'),
        (u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'),
        (u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'),
        (u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')]
    encoding = 'utf-8'
    language = _('German')
    recursions = 0
    remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'),
        dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')]
    preprocess_regexps = [
        (re.compile(r'\[[\d*]\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
    ]
    def print_version(self, url):
        return url.replace('?id=', 'txt/?id=')
    def get_article_url(self, article):
        '''if the article links to a index page (ressort) or a picture gallery
           (ansichtssache), don't add it'''
        if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0):
            return None
        return article.link
--- a/src/calibre/web/feeds/recipes/recipe_diepresse.py
+++ b/src/calibre/web/feeds/recipes/recipe_diepresse.py
@ -0,0 +1,40 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class DiePresseRecipe(BasicNewsRecipe):
    title          = u'diePresse'
    oldest_article = 1
    max_articles_per_feed = 100
    recursions = 0
    language = _('German')
    __author__ = 'Gerhard Aigner'
    preprocess_regexps = [
 	(re.compile(r'Textversion', re.DOTALL), lambda match: ''),
    ]
    remove_tags = [dict(name='hr'),
 	dict(name='br'),
 	dict(name='small'),
 	dict(name='img'),
 	dict(name='div', attrs={'class':'textnavi'}),
 	dict(name='h1', attrs={'class':'titel'}),
 	dict(name='a', attrs={'class':'print'}),
 	dict(name='div', attrs={'class':'hline'})]
    feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'),
 	(u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'),
 	(u'Europa', u'http://diepresse.com/rss/EU'),
 	(u'Panorama', u'http://diepresse.com/rss/Panorama'),
 	(u'Sport', u'http://diepresse.com/rss/Sport'),
 	(u'Kultur', u'http://diepresse.com/rss/Kultur'),
 	(u'Leben', u'http://diepresse.com/rss/Leben'),
 	(u'Tech', u'http://diepresse.com/rss/Tech'),
 	(u'Science', u'http://diepresse.com/rss/Science'),
 	(u'Bildung', u'http://diepresse.com/rss/Bildung'),
 	(u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'),
 	(u'Recht', u'http://diepresse.com/rss/Recht'),
 	(u'Spectrum', u'http://diepresse.com/rss/Spectrum'),
 	(u'Meinung', u'http://diepresse.com/rss/Meinung')]
    def print_version(self, url):
        return url.replace('home','text/home')
--- a/src/calibre/web/feeds/recipes/recipe_hna.py
+++ b/src/calibre/web/feeds/recipes/recipe_hna.py
@ -0,0 +1,40 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Fetch Hessisch Niedersachsische Allgemeine.
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class hnaDe(BasicNewsRecipe):
    title = 'HNA'
    description = 'local news from Hessen/Germany'
    __author__ = 'Oliver Niesner'
    use_embedded_content   = False
    language = _('German')
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
    max_articles_per_feed = 40
    no_stylesheets = True
    encoding = 'iso-8859-1'
    remove_tags = [dict(id='topnav'),
 		   dict(id='nav_main'),
 		   dict(id='suchen'),
 		   dict(id=''),
                   dict(name='span'),
 		   dict(name='ul', attrs={'class':'linklist'}),
 		   dict(name='a', attrs={'href':'#'}),
 		   dict(name='p', attrs={'class':'breadcrumb'}),
 		   dict(name='p', attrs={'class':'h5'})]
    #remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})]
    remove_tags_after = [dict(name='a', attrs={'href':'#'})]
    feeds =  [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'),
 	       ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ]
--- a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py
+++ b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py
@ -1,80 +1,78 @@
-__license__   = 'GPL v3'
+__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-
+
-'''
+'''
-Fetch Linuxdevices.
+Fetch Linuxdevices.
-'''
+'''
-
+
-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.news import BasicNewsRecipe
-
+
-
+
-class Sueddeutsche(BasicNewsRecipe):
+class Sueddeutsche(BasicNewsRecipe):
-    
+    
-    title = u'Linuxdevices'
+    title = u'Linuxdevices'
-    description = 'News about Linux driven Hardware'
+    description = 'News about Linux driven Hardware'
-    __author__ = 'Oliver Niesner'
+    __author__ = 'Oliver Niesner'
-    use_embedded_content   = False
+    use_embedded_content   = False
-    timefmt = ' [%a, %d %b %Y]'
+    timefmt = ' [%a %d %b %Y]'
-    language = _('English')
+    max_articles_per_feed = 50
-    max_articles_per_feed = 50
+    no_stylesheets = True
-    no_stylesheets = True
+    html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
-    encoding = 'latin1'
+    encoding = 'latin1'
-
+
-    remove_tags_after = [dict(id='nointelliTXT')]
+
-    filter_regexps = [r'ad\.doubleclick\.net']
+    remove_tags_after = [dict(id='nointelliTXT')]
-
+    filter_regexps = [r'ad\.doubleclick\.net']
-
+
-    remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
+    remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
-                   dict(name='div', attrs={'class':'bannerSky'}),
+                   dict(name='div', attrs={'class':'bannerSky'}),
-                   dict(name='div', attrs={'class':'footerLinks'}),
+                   dict(name='div', attrs={'class':'footerLinks'}),
-                   dict(name='div', attrs={'class':'seitenanfang'}),
+                   dict(name='div', attrs={'class':'seitenanfang'}),
-                   dict(name='td', attrs={'class':'mar5'}),
+                   dict(name='td', attrs={'class':'mar5'}),
-                   dict(name='td', attrs={'class':'mar5'}),
+                   dict(name='td', attrs={'class':'mar5'}),
-                   dict(name='table', attrs={'class':'pageAktiv'}),
+                   dict(name='table', attrs={'class':'pageAktiv'}),
-                   dict(name='table', attrs={'class':'xartable'}),
+                   dict(name='table', attrs={'class':'xartable'}),
-                   dict(name='table', attrs={'class':'wpnavi'}),
+                   dict(name='table', attrs={'class':'wpnavi'}),
-                   dict(name='table', attrs={'class':'bgcontent absatz'}),
+                   dict(name='table', attrs={'class':'bgcontent absatz'}),
-                   dict(name='table', attrs={'class':'footer'}),
+                   dict(name='table', attrs={'class':'footer'}),
-                   dict(name='table', attrs={'class':'artikelBox'}),
+                   dict(name='table', attrs={'class':'artikelBox'}),
-                   dict(name='table', attrs={'class':'kommentare'}),
+                   dict(name='table', attrs={'class':'kommentare'}),
-                   dict(name='table', attrs={'class':'pageBoxBot'}),
+                   dict(name='table', attrs={'class':'pageBoxBot'}),
-                   #dict(name='table', attrs={'with':'100%'}),
+                   dict(name='td', attrs={'nowrap':'nowrap'}),
-                   dict(name='td', attrs={'nowrap':'nowrap'}),
+                   dict(name='td', attrs={'valign':'middle'}),
-                   dict(name='td', attrs={'valign':'middle'}),
+                   dict(name='td', attrs={'align':'left'}),
-                   dict(name='td', attrs={'align':'left'}),
+                   dict(name='td', attrs={'align':'center'}),
-                   dict(name='td', attrs={'align':'center'}),
+                   dict(name='td', attrs={'height':'5'}),
-                   dict(name='td', attrs={'height':'5'}),
+                   dict(name='div', attrs={'class':'artikelBox navigatorBox'}),
-                   dict(name='div', attrs={'class':'artikelBox navigatorBox'}),
+                   dict(name='div', attrs={'class':'similar-article-box'}),
-                   dict(name='div', attrs={'class':'similar-article-box'}),
+                   dict(name='div', attrs={'class':'videoBigHack'}),
-                   dict(name='div', attrs={'class':'videoBigHack'}),
+                   dict(name='td', attrs={'class':'artikelDruckenRight'}),
-                   dict(name='td', attrs={'class':'artikelDruckenRight'}),
+                   dict(name='td', attrs={'class':'width="200"'}),
-                   dict(name='td', attrs={'class':'width="200"'}),
+                   dict(name='a', attrs={'href':'/news'}),
-                   dict(name='a', attrs={'href':'/news'}),
+                   dict(name='a', attrs={'href':'/'}),
-                   dict(name='a', attrs={'href':'/'}),
+                   dict(name='a', attrs={'href':'/articles'}),
-                   dict(name='a', attrs={'href':'/articles'}),
+                   dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}),
-                   dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}),
+                   dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
-                   dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
+                   dict(name='iframe'),
-                   dict(name='iframe'),
+                   dict(name='form'),
-                   dict(name='form'),
+                   dict(name='span', attrs={'class':'hidePrint'}),
-                   #dict(name='tr', attrs={'td':'Click here to learn'}),
+                   dict(id='headerLBox'),
-                   dict(name='span', attrs={'class':'hidePrint'}),
+                   dict(id='nointelliTXT'),
-                   dict(id='headerLBox'),
+                   dict(id='rechteSpalte'),
-                   dict(id='nointelliTXT'),
+                   dict(id='newsticker-list-small'),
-                   dict(id='rechteSpalte'),
+                   dict(id='ntop5'),
-                   dict(id='newsticker-list-small'),
+                   dict(id='ntop5send'),
-                   dict(id='ntop5'),
+                   dict(id='ntop5commented'),
-                   dict(id='ntop5send'),
+                   dict(id='nnav-bgheader'),
-                   dict(id='ntop5commented'),
+                   dict(id='nnav-headerteaser'),
-                   dict(id='nnav-bgheader'),
+                   dict(id='nnav-head'),
-                   dict(id='nnav-headerteaser'),
+                   dict(id='nnav-top'),
-                   dict(id='nnav-head'),
+                   dict(id='nnav-logodiv'),
-                   dict(id='nnav-top'),
+                   dict(id='nnav-logo'),
-                   dict(id='nnav-logodiv'),
+                   dict(id='nnav-oly'),
-                   dict(id='nnav-logo'),
+                   dict(id='readcomment')]
-                   dict(id='nnav-oly'),
+    
-                   dict(id='readcomment')]
+
-    
+
-
+    feeds =  [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] 
-
+
    feeds =  [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] 
--- a/src/calibre/web/feeds/recipes/recipe_moneynews.py
+++ b/src/calibre/web/feeds/recipes/recipe_moneynews.py
@ -0,0 +1,49 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 moneynews.newsmax.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class MoneyNews(BasicNewsRecipe):
    title                 = 'Moneynews.com'
    __author__            = 'Darko Miletic'
    description           = 'Financial news worldwide'  
    publisher             = 'moneynews.com'
    category              = 'news, finances, USA, business'    
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
    feeds = [ 
              (u'Street Talk'          , u'http://moneynews.newsmax.com/xml/streettalk.xml'  )
             ,(u'Finance News'         , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' )
             ,(u'Economy'              , u'http://moneynews.newsmax.com/xml/economy.xml'     )
             ,(u'Companies'            , u'http://moneynews.newsmax.com/xml/companies.xml'   )
             ,(u'Markets'              , u'http://moneynews.newsmax.com/xml/Markets.xml'     )
             ,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml'   )
            ]
    keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
    remove_tags = [
                     dict(name='td'   , attrs={'id':'article_fontsize'})
                    ,dict(name='table', attrs={'id':'toolbox'         })
                    ,dict(name='tr'   , attrs={'id':'noprint3'        })
                  ]
--- a/src/calibre/web/feeds/recipes/recipe_nzz_ger.py
+++ b/src/calibre/web/feeds/recipes/recipe_nzz_ger.py
@ -0,0 +1,66 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.nzz.ch
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Nzz(BasicNewsRecipe):
    title                 = 'NZZ Online'
    __author__            = 'Darko Miletic'
    description           = 'Laufend aktualisierte Nachrichten, Analysen und Hintergruende zu Politik, Wirtschaft, Kultur und Sport'
    publisher             = 'NZZ AG'
    category              = 'news, politics, nachrichten, Switzerland'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    lang                  = 'de-CH'
    language              = _('German')
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
    keep_only_tags = [dict(name='div', attrs={'class':'article'})]
    remove_tags = [
                     dict(name=['object','link','base','script'])
                    ,dict(name='div',attrs={'class':['more','teaser','advXertXoriXals','legal']})
                    ,dict(name='div',attrs={'id':['popup-src','readercomments','google-ad','advXertXoriXals']})
                  ]
    feeds = [
               (u'Neuste Artikel', u'http://www.nzz.ch/feeds/recent/'                     )
              ,(u'International' , u'http://www.nzz.ch/nachrichten/international?rss=true')
              ,(u'Schweiz'       , u'http://www.nzz.ch/nachrichten/schweiz?rss=true')
              ,(u'Wirtschaft'    , u'http://www.nzz.ch/nachrichten/wirtschaft/aktuell?rss=true')
              ,(u'Finanzmaerkte' , u'http://www.nzz.ch/finanzen/nachrichten?rss=true')
              ,(u'Zuerich'       , u'http://www.nzz.ch/nachrichten/zuerich?rss=true')
              ,(u'Sport'         , u'http://www.nzz.ch/nachrichten/sport?rss=true')
              ,(u'Panorama'      , u'http://www.nzz.ch/nachrichten/panorama?rss=true')
              ,(u'Kultur'        , u'http://www.nzz.ch/nachrichten/kultur/aktuell?rss=true')
              ,(u'Wissenschaft'  , u'http://www.nzz.ch/nachrichten/wissenschaft?rss=true')
              ,(u'Medien'        , u'http://www.nzz.ch/nachrichten/medien?rss=true')
              ,(u'Reisen'        , u'http://www.nzz.ch/magazin/reisen?rss=true')
            ]
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup
    def print_version(self, url):
        return url + '?printview=true'
--- a/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py
+++ b/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py
@ -8,26 +8,19 @@ Fetch tomshardware.
 from calibre.web.feeds.news import BasicNewsRecipe
-class TomsHardwareDe(BasicNewsRecipe):
+class cdnet(BasicNewsRecipe):
-    
+
-    title = 'Tom\'s Hardware German'
+    title = 'tomshardware'
-    description = 'Computer news in german'
+    description = 'computer news in german'
    __author__ = 'Oliver Niesner'
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
    max_articles_per_feed = 50
    language = _('German')
    no_stylesheets = True
    language = _('German')
    encoding = 'utf-8'
-    #preprocess_regexps = \
+
 #	[(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
 #		[
 #		(r'<84>', lambda match: ''),
 #		(r'<93>', lambda match: ''),
 #		]
 #	]
    remove_tags = [dict(id='outside-advert'),
 		   dict(id='advertRightWhite'),
 		   dict(id='header-advert'),
@ -36,9 +29,15 @@ class TomsHardwareDe(BasicNewsRecipe):
 		   dict(id='header-top'),
 		   dict(id='header-tools'),
 		   dict(id='nbComment'),
 		   dict(id='commentTools'),
 		   dict(id='internalSidebar'),
 		   dict(id='header-news-infos'),
 		   dict(id='header-news-tools'),
 		   dict(id='breadcrumbs'),
 		   dict(id='emailTools'),
 		   dict(id='bookmarkTools'),
 		   dict(id='printTools'),
 		   dict(id='header-nextNews'),
 		   dict(id=''),
 		   dict(name='div', attrs={'class':'pyjama'}),
 		   dict(name='href', attrs={'class':'comment'}),
@ -47,8 +46,10 @@ class TomsHardwareDe(BasicNewsRecipe):
 		   dict(name='div', attrs={'class':'greyBox clearfix'}),
 		   dict(id='')]
    #remove_tags_before = [dict(id='header-news-title')]
-    remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})]
+    remove_tags_after = [dict(name='div', attrs={'class':'btmGreyTables'})]
    #remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
-    
+
-    feeds =  [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] 
+    feeds =  [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ]
-    
+
--- a/src/calibre/web/feeds/recipes/recipe_vreme.py
+++ b/src/calibre/web/feeds/recipes/recipe_vreme.py
@ -11,20 +11,23 @@ from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Vreme(BasicNewsRecipe):    
-    title          = 'Vreme'
+    title                = 'Vreme'
-    __author__     = 'Darko Miletic'
+    __author__           = 'Darko Miletic'
-    description    = 'Politicki Nedeljnik Srbije'
+    description          = 'Politicki Nedeljnik Srbije'
-    publisher      = 'Vreme d.o.o.'
+    publisher            = 'NP Vreme d.o.o.'
-    category       = 'news, politics, Serbia'    
+    category             = 'news, politics, Serbia'
-    no_stylesheets = True
+    delay                = 1
-    remove_javascript  = True
+    no_stylesheets       = True
-    needs_subscription = True    
+    needs_subscription   = True    
-    INDEX = 'http://www.vreme.com'
+    INDEX                = 'http://www.vreme.com'
-    LOGIN = 'http://www.vreme.com/account/index.php'
+    LOGIN                = 'http://www.vreme.com/account/login.php?url=%2F'
-    remove_javascript     = True
+    remove_javascript    = True
-    use_embedded_content  = False
+    use_embedded_content = False
-    language              = _('Serbian')
+    encoding             = 'utf-8'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}' 
+    language             = _('Serbian')
    lang                 = 'sr-Latn-RS'
    direction            = 'ltr'    
    extra_css            = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}' 
    html2lrf_options = [
                          '--comment'  , description
@ -52,20 +55,11 @@ class Vreme(BasicNewsRecipe):
        articles = []
        soup = self.index_to_soup(self.INDEX)
-        for item in soup.findAll('span', attrs={'class':'toc2'}):
+        for item in soup.findAll(['h3','h4']):
            description = ''
            title_prefix = ''
            descript_title_tag = item.findPreviousSibling('span', attrs={'class':'toc1'})
            if descript_title_tag:
               title_prefix = self.tag_to_string(descript_title_tag) + ' '
            descript_tag = item.findNextSibling('span', attrs={'class':'toc3'})
            if descript_tag:
               description = self.tag_to_string(descript_tag)
            feed_link = item.find('a')
-            if feed_link and feed_link.has_key('href'):
+            if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('/cms/view.php'):
                url   = self.INDEX + feed_link['href']
                title = title_prefix + self.tag_to_string(feed_link)
                date  = strftime(self.timefmt)                
@ -93,14 +87,17 @@ class Vreme(BasicNewsRecipe):
            del item['face']
        for item in soup.findAll(size=True):
            del item['size']
-        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
+        soup.html['lang'] = self.lang
-        soup.head.insert(0,mtag)
+        soup.html['dir' ] = self.direction
        mtag = '<meta http-equiv="Content-Language" content="' + self.lang + '"/>'
        mtag += '\n<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '"/>'
        soup.head.insert(0,mtag)    
        return soup
    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup(self.INDEX)
-        cover_item = soup.find('img',attrs={'alt':'Naslovna strana broja'})
+        cover_item = soup.find('div',attrs={'id':'najava'})
        if cover_item:
-           cover_url = self.INDEX + cover_item['src']
+           cover_url = self.INDEX + cover_item.img['src']
        return cover_url
--- a/upload.py
+++ b/upload.py
@ -530,6 +530,7 @@ class build_windows(VMInstaller):
        self.run_windows_install_jammer(installer)
        return os.path.basename(installer)
    @classmethod
    def run_windows_install_jammer(self, installer):
        ibp = os.path.abspath('installer/windows')
        sys.path.insert(0, ibp)