merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-09-25 12:57:23 -10:00 · 2010-09-25 12:57:23 -10:00 · a0a984c5b0
commit a0a984c5b0
parent 8b7ef0984f 2dd9692a5e
52 changed files with 32238 additions and 25207 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,99 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.

+- version: 0.7.20
+  date: 2010-09-24
+
+
+  new features:
+    - title: "Tweak epub feature."
+      type: major
+      description: >
+        "Now you can conveniently browse the contents of an epub, tweak them and rebuild the epub within your calibre library 
+         by right clicking on the book and selecting Tweak ePub. See http://www.mobileread.com/forums/showthread.php?t=99875 
+         for details."
+
+    - title: "Add button to Edit metadata dialog to trim borders from the cover"
+
+    - title: "Kobo driver: Add support for setting the ReadStatus to Read and correctly deal with empty collections"
+
+    - title: "Improved algorithm for removal of hyphens during pre-processing"
+
+    - title: "EPUB metadata: Don't read timestamp value from epubs as I am sick of closing bugs about adding books and having the Date not be today."
+
+    - title: "After bulk edit metadata, reselect previously selected books."
+
+  bug fixes:
+    - title: "Fix regression in 0.7.19 that broke the By Author and By Title category listing in Stanza/Aldiko feeds."
+   
+    - title: "MOBI Output: Fix regression that broke sections list in downloaded periodicals on Kindle for non-english news sources"
+
+    - title: "News download: Rationalize cover processing."
+      tickets: [6852]
+
+    - title: "Cover cache: load images only in the GUI thread to prevent stale files being leftover by set_path due to Windows file locking"
+
+    - title: "Database: Make renaming of folders on case change more robust"
+      tickets: [6914]
+
+    - title: "When adding/replacing files to/in EPUB files, set the GPF bit for all files in the archive, to prevent unzip from complaining in linux"
+      tickets: [6363]
+
+    - title: "Plugin loading: Handle encoding declarations in .py files correctly"
+
+    - title: "MOBI input: Another corner case"
+      tickets: [6909]
+
+    - title: "IPC: Store results file in the calibre temp dir and also dont die if for some reason removing result file fails. Should make adding/saving more robust"
+
+    - title: "Database: Fix regression that caused has_cover to create empty directories unneccessarily"
+
+    - title: "Detection of Alex on unix"
+      tickets: [5900]
+
+    - title: "News download: Don't add inline table of contents when downloading news for the Kindle"
+
+    - title: "Add prologue and epilogue to default chapter detection regex"
+
+    - title: "Kobo driver: Fix issue where books that are read were getting their status reset to Unread"
+
+    - title: "Device drivers: Fix occassional false positive when matching books on device with books in the calibre library"
+
+    - title: "Content server: Making serving of large files more efficient."
+
+    - title: "GUI device detection: Handle case when user yanks connected device before device connection handler is called."
+      tickets: [6864]
+
+    - title: "Strip leading/trailing whitespace when setting metadata using the edit metadata dialog"
+      tickets: [6854]
+
+    - title: "KOBO: Editing the Im_Reading list with SD Card installed fixed"
+      tickets: [6850]
+
+  new recipes:
+    - title: "Neal's Nuze and Popular Science"
+      author: Tony Stegall
+
+    - title: "Rmf24.pl"
+      author: "Tomasz Dlugosz"
+      
+    - title: "Gazeta Pomorska"
+      author: "Richard"
+
+    - title: "Le Journal de Montreal and superesportes"
+      author: "Luciano Furtado"
+
+    - title: "The Marker"
+      author: Marbs
+
+    - title: "Tagesanzeiger"
+      author: noxxx
+ 
+
+  improved recipes:
+    - Danas
+    - Harvard Business Review
+
 - version: 0.7.19
  date: 2010-09-17

@ -61,6 +154,7 @@

    - title: "PDB Input: Fix bug in conversion of TOC in some PML files"

+
  new recipes:
    - title: "taz.de RSS"
      author: Alexander Schremmer
@ -272,7 +366,7 @@
  new features:
    - title: "Multiple library support: Various improvements to make using multiple calibre libraries easier."
      type: major
-      desc: >
+      description: >
        "Now, when you switch libraries using the Choose Library button on the toolbar, entries are created in the menu of that button to easily switch to that library in the 
        future. Also, you can now right click on a book in the calibre library and use the 'Copy to library' action to copy the book to another library,
        that you have switched to at least once. The name of the current library is shown in the titlebar.
@ -280,7 +374,7 @@

    - title: "Content server: Allow setting a restriction so that the server shares only some of the books in the library."
      type: major
-      desc: >
+      description: >
        "You can now use a Saved Search as a restiction for the content server, via Preferences->Content Server. This will cause the
        server to share only those books that match the saved search.
        "
--- a/resources/content_server/gui.js
+++ b/resources/content_server/gui.js
@ -54,7 +54,7 @@ function render_book(book) {
    formats = book.attr("formats").split(",");
    if (formats.length > 0) {
        for (i=0; i < formats.length; i++) {
-            title += '<a title="Download in '+formats[i]+' format" class="format" href="'+format_url(formats[i], id, book.attr("title"))+'">'+formats[i]+'</a>, ';
+            title += '<a title="Download in '+formats[i]+' format" class="format" href="'+format_url(formats[i], id, book.attr("safe_title"))+'">'+formats[i]+'</a>, ';
        }
        title = title.slice(0, title.length-2);
        title += '&nbsp;({0}&nbsp;MB)&nbsp;'.format(size);
--- a/resources/images/news/boortz.png
+++ b/resources/images/news/boortz.png
--- a/resources/images/news/howtogeek.png
+++ b/resources/images/news/howtogeek.png
--- a/resources/images/news/jpost_fr.png
+++ b/resources/images/news/jpost_fr.png
--- a/resources/recipes/boortz.recipe
+++ b/resources/recipes/boortz.recipe
@ -0,0 +1,44 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
+class AdvancedUserRecipe1282101454(BasicNewsRecipe):
+    title = 'Nealz Nuze'
+    language = 'en'
+    __author__ = 'TonytheBookworm'
+    description = 'Neal Boortz Show Radio Notes'
+    publisher = 'Neal Boortz'
+    category = 'news, politics, USA, talkshow'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    linearize_tables = True
+    no_stylesheets = True
+    remove_javascript   = True
+    
+    masthead_url = 'http://boortz.com/images/nuze_logo.gif'
+    keep_only_tags    = [
+                         dict(name='td', attrs={'id':['contentWellCell']})
+                     
+                        ]
+    remove_tags = [
+                   dict(name='a', attrs={'class':['blogPermalink']}),
+                   dict(name='span', attrs={'class':['blogBylineSeparator']}),
+                   dict(name='td', attrs={'id':['nealztitle']}),
+                   ]
+    remove_tags_after = [dict(name='div', attrs={'class':'blogEntryBody'}),]
+    feeds          = [
+                      ('NUZE', 'http://boortz.com/nealz_nuze_rss/rss.xml')
+                      
+                    ]
+
+    
+
+
+    
+
+
+
+
+
+
+
+
+
--- a/resources/recipes/danas.recipe
+++ b/resources/recipes/danas.recipe
@ -51,8 +51,14 @@ class Danas(BasicNewsRecipe):

    preprocess_regexps = [ 
                           (re.compile(u'\u0110'), lambda match: u'\u00D0')
-                          ,(re.compile(u'\u201c'), lambda match: '"')
-                          ,(re.compile(u'\u201e'), lambda match: '"')
+                          ,(re.compile(u'\u2018'), lambda match: '&lsquo;') # left single quotation mark                        
+                          ,(re.compile(u'\u2019'), lambda match: '&rsquo;') # right single quotation mark
+                          ,(re.compile(u'\u201a'), lambda match: '&lsquo;') # single low-9 quotation mark                        
+                          ,(re.compile(u'\u201b'), lambda match: '&rsquo;') # single high-reversed-9 quotation mark
+                          ,(re.compile(u'\u201c'), lambda match: '&ldquo;') # left double quotation mark
+                          ,(re.compile(u'\u201d'), lambda match: '&rdquo;') # right double quotation mark
+                          ,(re.compile(u'\u201e'), lambda match: '&ldquo;') # double low-9 quotation mark                          
+                          ,(re.compile(u'\u201f'), lambda match: '&rdquo;') # double high-reversed-9 quotation mark
                         ]

    keep_only_tags     = [dict(name='div', attrs={'id':'left'})]
@ -90,6 +96,8 @@ class Danas(BasicNewsRecipe):
                       ,(u'Vostani Serbie'       , u'http://www.danas.rs/rss/rss.asp?column_id=57')
                       ,(u'Med&Jad-a'            , u'http://www.danas.rs/rss/rss.asp?column_id=58')
                       ,(u'Svetlosti pozornice'  , u'http://www.danas.rs/rss/rss.asp?column_id=59')
+                       ,(u'Dva cvancika'         , u'http://www.danas.rs/rss/rss.asp?column_id=65')
+                       ,(u'Iz kornera'           , u'http://www.danas.rs/rss/rss.asp?column_id=64')
                     ]

    def preprocess_html(self, soup):
--- a/resources/recipes/howtogeek.recipe
+++ b/resources/recipes/howtogeek.recipe
@ -0,0 +1,40 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class AdvancedUserRecipe1282101454(BasicNewsRecipe):
+    title = 'How To Geek'
+    language = 'en'
+    __author__ = 'TonytheBookworm'
+    description = 'Daily Computer Tips and Tricks'
+    publisher = 'Howtogeek'
+    category = 'PC,tips,tricks'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    linearize_tables = True
+    no_stylesheets = True
+    remove_javascript   = True
+    masthead_url = 'http://blog.stackoverflow.com/wp-content/uploads/how-to-geek-logo.png'
+
+
+
+    remove_tags =[dict(name='a', attrs={'target':['_blank']}),
+                  dict(name='table', attrs={'id':['articleTable']}),
+                  dict(name='div',   attrs={'class':['feedflare']}),
+                  ]
+
+    feeds          = [
+                      ('Tips', 'http://feeds.howtogeek.com/howtogeek')
+
+                    ]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/resources/recipes/jpost_fr.recipe
+++ b/resources/recipes/jpost_fr.recipe
@ -0,0 +1,57 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class JerusalemPost(BasicNewsRecipe):
+    title = 'Jerusalem post'
+    language = 'fr'
+    __author__ = 'TonytheBookworm'
+    description = 'The Jerusalem Post (in French)'
+    publisher = 'jpost'
+    category = 'news'
+    oldest_article = 30
+    max_articles_per_feed = 100
+    linearize_tables = True
+    no_stylesheets = True
+    remove_javascript   = True
+
+    masthead_url = 'http://static.jpost.com/JPSITES/images/JFrench/2008/site/jplogo.JFrench.gif'
+
+    remove_tags = [
+                   dict(name='a', attrs={'href':['javascript:window.print()']}),
+                   dict(name='div', attrs={'class':['bot']}),
+
+                   ]
+
+    feeds          = [
+                      ('NEWS', 'http://fr.jpost.com/servlet/Satellite?collId=1216805762036&pagename=JFrench%2FPage%2FRSS'),
+                      ('JFrench En route vers la paix', 'http://fr.jpost.com/servlet/Satellite?collId=1216805762201&pagename=JFrench%2FPage%2FRSS'),
+                      ('JFrench Politique', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737334&pagename=JFrench%2FPage%2FRSS'),
+                      ('JFrench Securite', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737338&pagename=JFrench%2FPage%2FRSS'),
+                      ('JFrench Moyen Orient', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737342&pagename=JFrench%2FPage%2FRSS'),
+                      ('JFrench Diplomatie / Monde', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737346&pagename=JFrench%2FPage%2FRSS'),
+                      ('JFrench Economie / Sciences', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737358&pagename=JFrench%2FPage%2FRSS'),
+                      ('JFrench Societe', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737354&pagename=JFrench%2FPage%2FRSS'),
+                      ('JFrench Opinions', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737350&pagename=JFrench%2FPage%2FRSS'),
+                      ('JFrench Monde juif', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737366&pagename=JFrench%2FPage%2FRSS'),
+                      ('JFrench Culture / Sport', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737362&pagename=JFrench%2FPage%2FRSS')
+                    ]
+    def print_version(self, url):
+        split1 = url.split("cid=")
+        #for testing only -------
+        #print 'SPLIT IS: ', split1
+        #print 'ORG URL IS: ', url
+        #---------------------------
+        idnum = split1[1] # get the actual value of the id article
+        #for testing only --------------------
+        #print 'the idnum is: ', idnum
+        #--------------------------------------
+        print_url = 'http://fr.jpost.com/servlet/Satellite?cid=' + idnum + '&pagename=JFrench%2FJPArticle%2FPrinter'
+        #for testing only -------------------------
+        #print 'PRINT URL IS: ', print_url
+        #------------------------------------------
+        return print_url
+
+    #example of how links should be formated
+    #--------------------------------------------------------------------------------------------------------------
+    #org   version =  http://fr.jpost.com/servlet/Satellite?pagename=JFrench/JPArticle/ShowFull&cid=1282804806075
+    #print version =  http://fr.jpost.com/servlet/Satellite?cid=1282804806075&pagename=JFrench%2FJPArticle%2FPrinter
+    #------------------------------------------------------------------------------------------------------------------
--- a/resources/recipes/popscience.recipe
+++ b/resources/recipes/popscience.recipe
@ -1,5 +1,5 @@
-import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, re

 class AdvancedUserRecipe1282101454(BasicNewsRecipe):
    title = 'Popular Science'
@ -12,13 +12,11 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_javascript = True
+    use_embedded_content = True
    
    masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg'
    
-    remove_tags = [dict(name='div', attrs={'id':['toolbar','main_supplements']}),
-                   dict(name='span', attrs={'class':['comments']}),
-                   dict(name='div', attrs={'class':['relatedinfo related-right','node_navigation','content2']}),
-                   dict(name='ul', attrs={'class':['item-list clear-block']})]
+               
    feeds          = [
                      
                      ('Gadgets', 'http://www.popsci.com/full-feed/gadgets'),
--- a/resources/recipes/scientific_american.recipe
+++ b/resources/recipes/scientific_american.recipe
@ -1,68 +1,53 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'

-'''
-sciam.com
-'''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class ScientificAmerican(BasicNewsRecipe):
-    title = u'Scientific American'
-    description = u'Popular science. Monthly magazine.'
-    __author__ = 'Kovid Goyal and Sujata Raman'
-    language = 'en'
-    remove_javascript   = True
-    oldest_article = 30
+    title                 = u'Scientific American'
+    description           = u'Popular Science. Monthly magazine.'
+    category              = 'science'
+    __author__            = 'Starson17'
+    no_stylesheets        = True
+    use_embedded_content  = False
+    language              = 'en'
+    publisher             = 'Nature Publishing Group'
+    remove_empty_feeds    = True
+    remove_javascript     = True
+    oldest_article        = 30
    max_articles_per_feed = 100
-    no_stylesheets = True
-    use_embedded_content   = False
-    extra_css = '''
-                p{font-weight: normal; font-size:small}
-                li{font-weight: normal; font-size:small}
-                .headline p{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
-                h2{font-size:x-small;}
-                h3{font-size:x-small;font-family:Arial,Helvetica,sans-serif;}
-                '''
-    remove_tags_before = dict(name='div', attrs={'class':'headline'})

-    remove_tags_after  = dict(id=['article'])
-    remove_tags        = [
-                          dict(id=['sharetools', 'reddit']),
-                          #dict(name='script'),
-                          {'class':['float_left', 'atools']},
-                          {"class": re.compile(r'also-in-this')},
-                          dict(name='a',title = ["Get the Rest of the Article","Subscribe","Buy this Issue"]),
-                          dict(name = 'img',alt = ["Graphic - Get the Rest of the Article"]),
-                          dict(name='div', attrs={'class':['commentbox']}),
-                          dict(name='h2', attrs={'class':['discuss_h2']}),
-                         ]
+    conversion_options = {'linearize_tables'  : True
+                        , 'comment'           : description
+                        , 'tags'              : category
+                        , 'publisher'         : publisher
+                        , 'language'          : language
+                        }

-    html2lrf_options = ['--base-font-size', '8']
-    recursions = 1
-    match_regexps = [r'article.cfm.id=\S+page=(2|3|4|5|6|7|8|9|10|11|12|13|14|15)']
+    keep_only_tags = [
+                dict(name='h2', attrs={'class':'articleTitle'})
+                ,dict(name='p', attrs={'id':'articleDek'})
+                ,dict(name='p', attrs={'class':'articleInfo'})
+                ,dict(name='div', attrs={'id':['articleContent']})
+                ,dict(name='img', attrs={'src':re.compile(r'/media/inline/blog/Image/', re.DOTALL|re.IGNORECASE)}) 
+                ]
+
+    remove_tags = [dict(name='a', attrs={'class':'tinyCommentCount'})]

    def parse_index(self):
        soup = self.index_to_soup('http://www.scientificamerican.com/sciammag/')
-        monthtag = soup.find('div',attrs={'id':'magazine-main_col2'})
-        month = self.tag_to_string(monthtag.contents[1])
-
-
-        self.timefmt = ' [%s]'%(self.tag_to_string(month))
+        issuetag = soup.find('p',attrs={'id':'articleDek'})
+        self.timefmt = ' [%s]'%(self.tag_to_string(issuetag))
        img = soup.find('img', alt='Scientific American Magazine', src=True)
        if img is not None:
            self.cover_url = img['src']
        features, feeds = [], []
-        for p in soup.find(id='magazine-main_col2').findAll('p') :
-            a = p.find('a', href=True)
-
+        for a in soup.find(attrs={'class':'primaryCol'}).findAll('a',attrs={'title':'Feature'}):
            if a is None: continue
            desc = ''
-            s = p.find('span', attrs={'class':"sub"})
+            s = a.parent.parent.find(attrs={'class':'dek'})
            desc = self.tag_to_string(s)
-
            article = {
                    'url' : a['href'],
                    'title' : self.tag_to_string(a),
@ -71,51 +56,36 @@ class ScientificAmerican(BasicNewsRecipe):
                    }
            features.append(article)
        feeds.append(('Features', features))
-
-        section = []
+        department = []
        title = None
-
-        for x in soup.find(id='magazine-main_col1').findAll(['div', 'a']):
-
-            if x.name == 'div':
-
-                if section:
-                    feeds.append((title, section))
-
-                title = self.tag_to_string(x)
-                section = []
-            else:
-
-                if 'article.cfm' in x['href']:
-                    article = {
-                            'url' : x['href'],
-                            'title' : self.tag_to_string(x),
-                            'date': '',
-                            'description': '',
-                        }
-
-                    section.append(article)
-
-        if section:
-            feeds.append((title, section))
-
+        for li in soup.find(attrs={'class':'secondaryCol'}).findAll('li'):
+            if 'department.cfm' in li.a['href']:
+                if department:
+                    feeds.append((title, department))
+                title = self.tag_to_string(li.a)
+                department = []
+            if 'article.cfm' in li.h3.a['href']:
+                article = {
+                        'url' : li.h3.a['href'],
+                        'title' : self.tag_to_string(li.h3.a),
+                        'date': '',
+                        'description': self.tag_to_string(li.p),
+                    }
+                department.append(article)
+        if department:
+            feeds.append((title, department))
        return feeds

-
    def postprocess_html(self, soup, first_fetch):
-        if soup is not None:
-            for span in soup.findAll('span', attrs={'class':'pagination'}):
-                span.extract()
-            if not first_fetch:
-                div = soup.find('div', attrs={'class':'headline'})
-                if div:
-                    div.extract()
-
+        for item in soup.findAll('a'):
+            if 'topic.cfm' in item['href']:
+                item.replaceWith(item.string)
        return soup

-    preprocess_regexps = [
-        (re.compile(r'Already a Digital subscriber.*Now</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
-        (re.compile(r'If your institution has site license access, enter.*here</a>.', re.DOTALL|re.IGNORECASE), lambda match: ''),
-        (re.compile(r'to subscribe to our.*;.*\}', re.DOTALL|re.IGNORECASE), lambda match: ''),
-        (re.compile(r'\)\(jQuery\);.*-->', re.DOTALL|re.IGNORECASE), lambda match: ''),
-        ]
+    extra_css = '''
+                p{font-weight: normal; font-size:small}
+                li{font-weight: normal; font-size:small}
+                .headline p{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
+                h2{font-size:large; font-family:Arial,Helvetica,sans-serif;}
+                h3{font-size:x-small;font-family:Arial,Helvetica,sans-serif;}
+                '''
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.19'
+__version__   = '0.7.20'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -67,10 +67,17 @@ def load_plugin(path_to_zip_file): # {{{
            if name.lower().endswith('plugin.py'):
                locals = {}
                raw = zf.read(name)
-                match = re.search(r'coding[:=]\s*([-\w.]+)', raw[:300])
-                encoding = 'utf-8'
-                if match is not None:
-                    encoding = match.group(1)
+                lines, encoding = raw.splitlines(), 'utf-8'
+                cr = re.compile(r'coding[:=]\s*([-\w.]+)')
+                raw = []
+                for l in lines[:2]:
+                    match = cr.search(l)
+                    if match is not None:
+                        encoding = match.group(1)
+                    else:
+                        raw.append(l)
+                raw += lines[2:]
+                raw = '\n'.join(raw)
                raw = raw.decode(encoding)
                raw = re.sub('\r\n', '\n', raw)
                exec raw in locals
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -34,7 +34,7 @@ class ANDROID(USBMS):
                0x227]},

            # Samsung
-            0x04e8 : { 0x681d : [0x0222, 0x0400],
+            0x04e8 : { 0x681d : [0x0222, 0x0224, 0x0400],
                       0x681c : [0x0222, 0x0224, 0x0400],
                       0x6640 : [0x0100],
                     },
--- a/src/calibre/devices/kobo/books.py
+++ b/src/calibre/devices/kobo/books.py
@ -41,6 +41,10 @@ class Book(MetaInformation):
            self.authors = ['']
        else:
            self.authors = [authors]
+
+        if not title:
+            self.title = _('Unknown')
+
        self.mime = mime

        self.size = size # will be set later if None
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -455,6 +455,7 @@ class HTMLPreProcessor(object):
            if is_pdftohtml:
                end_rules.append((re.compile(r'<p>\s*(?P<chap>(<[ibu]>){0,2}\s*([A-Z \'"!]{3,})\s*([\dA-Z:]+\s){0,4}\s*(</[ibu]>){0,2})\s*<p>\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<p>)?'), chap_head),)

+        length = -1
        if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
            length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'), 'median')
            if length:
@ -493,7 +494,7 @@ class HTMLPreProcessor(object):
        for rule in rules + end_rules:
            html = rule[0].sub(rule[1], html)

-        if is_pdftohtml:
+        if is_pdftohtml and length > -1:
            # Dehyphenate
            dehyphenator = Dehyphenator()
            html = dehyphenator(html,'pdf', length)
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -1696,11 +1696,12 @@ class MobiWriter(object):
        header.write(pack('>I', 1))

        # 0x1c - 0x1f : Text encoding ?
-        # GR: Language encoding for NCX entries (latin_1)
-        header.write(pack('>I', 0x4e4))
+        # header.write(pack('>I', 650001))
+        # GR: This needs to be either 0xFDE9 or 0x4E4
+        header.write(pack('>I', 0xFDE9))

-        # 0x20 - 0x23 : Mimicking kindleGen
-        header.write(pack('>I', 0xFFFFFFFF))
+        # 0x20 - 0x23 : Language code?
+        header.write(iana2mobi(str(self._oeb.metadata.language[0])))

        # 0x24 - 0x27 : Number of TOC entries in INDX1
        header.write(pack('>I', indxt_count + 1))
@ -1800,7 +1801,7 @@ class MobiWriter(object):
        text = text.strip()
        if not isinstance(text, unicode):
            text = text.decode('utf-8', 'replace')
-        text = text.encode('cp1252','replace')
+        text = text.encode('ascii','replace')
        return text

    def _add_to_ctoc(self, ctoc_str, record_offset):
@ -2150,26 +2151,6 @@ class MobiWriter(object):
        indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD))	# vwi title offset in CNCX
        indxt.write(decint(0, DECINT_FORWARD))						# unknown byte

-    def _write_subchapter_node(self, indxt, indices, index, offset, length, count):
-        # This style works without a parent chapter, mimicking what KindleGen does,
-        # using a value of 0x0B for parentIndex
-        # Writes an INDX1 NCXEntry of entryType 0x1F - subchapter
-        if self.opts.verbose > 2:
-            # *** GR: Turn this off while I'm developing my code
-            #self._oeb.log.debug('Writing TOC node to IDXT:', node.title, 'href:', node.href)
-            pass
-
-        pos = 0xc0 + indxt.tell()
-        indices.write(pack('>H', pos))								# Save the offset for IDXTIndices
-        name = "%04X"%count
-        indxt.write(chr(len(name)) + name)							# Write the name
-        indxt.write(INDXT['subchapter'])						    # entryType [0x0F | 0xDF | 0xFF | 0x3F]
-        indxt.write(decint(offset, DECINT_FORWARD))					# offset
-        indxt.write(decint(length, DECINT_FORWARD))					# length
-        indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD))	# vwi title offset in CNCX
-        indxt.write(decint(0, DECINT_FORWARD))						# unknown byte
-        indxt.write(decint(0xb, DECINT_FORWARD))				    # parentIndex - null
-
    def _compute_offset_length(self, i, node, entries) :
        h = node.href
        if h not in self._id_offsets:
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -15,7 +15,7 @@ from calibre.customize.ui import available_input_formats
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.chardet import xml_to_unicode
-from calibre.utils.zipfile import safe_replace, ZipFile
+from calibre.utils.zipfile import safe_replace
 from calibre.utils.config import DynamicConfig
 from calibre.utils.logging import Log
 from calibre import guess_type, prints
@ -294,12 +294,8 @@ class EbookIterator(object):
                zf = open(self.pathtoebook, 'r+b')
            except IOError:
                return
-            zipf = ZipFile(zf, mode='a')
-            for name in zipf.namelist():
-                if name == 'META-INF/calibre_bookmarks.txt':
-                    safe_replace(zf, 'META-INF/calibre_bookmarks.txt', StringIO(dat))
-                    return
-            zipf.writestr('META-INF/calibre_bookmarks.txt', dat)
+            safe_replace(zf, 'META-INF/calibre_bookmarks.txt', StringIO(dat),
+                    add_missing=True)
        else:
            self.config['bookmarks_'+self.pathtoebook] = dat

--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -219,7 +219,10 @@ class CSSFlattener(object):
                    fnums = self.context.source.fnums
                    if size[0] in ('+', '-'):
                        # Oh, the warcrimes
-                        esize = 3 + force_int(size)
+                        try:
+                            esize = 3 + force_int(size)
+                        except:
+                            esize = 3
                        if esize < 1:
                            esize = 1
                        if esize > 7:
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -819,7 +819,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                fname = err.filename if err.filename else 'file'
                return error_dialog(self, _('Permission denied'),
                        _('Could not open %s. Is it being used by another'
-                        ' program?')%fname, show=True)
+                        ' program?')%fname, det_msg=traceback.format_exc(),
+                        show=True)
            raise
        self.save_state()
        QDialog.accept(self)
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -358,10 +358,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        return row[self.FIELD_MAP['path']].replace('/', os.sep)


-    def abspath(self, index, index_is_id=False):
+    def abspath(self, index, index_is_id=False, create_dirs=True):
        'Return the absolute path to the directory containing this books files as a unicode string.'
        path = os.path.join(self.library_path, self.path(index, index_is_id=index_is_id))
-        if not os.path.exists(path):
+        if create_dirs and not os.path.exists(path):
            os.makedirs(path)
        return path

@ -443,6 +443,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                self.add_format(id, format, stream, index_is_id=True,
                        path=tpath, notify=False)
        self.conn.execute('UPDATE books SET path=? WHERE id=?', (path, id))
+        self.conn.commit()
        self.data.set(id, self.FIELD_MAP['path'], path, row_is_id=True)
        # Delete not needed directories
        if current_path and os.path.exists(spath):
@ -451,6 +452,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                parent = os.path.dirname(spath)
                if len(os.listdir(parent)) == 0:
                    self.rmtree(parent, permanent=True)
+
        curpath = self.library_path
        c1, c2 = current_path.split('/'), path.split('/')
        if not self.is_case_sensitive and len(c1) == len(c2):
@ -465,13 +467,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            # the directories, so no need to do them here.
            for oldseg, newseg in zip(c1, c2):
                if oldseg.lower() == newseg.lower() and oldseg != newseg:
-                    while True:
-                        # need a temp name in the current segment for renames
-                        tempname = os.path.join(curpath, 'TEMP.%f'%time.time())
-                        if not os.path.exists(tempname):
-                            break
-                    os.rename(os.path.join(curpath, oldseg), tempname)
-                    os.rename(tempname, os.path.join(curpath, newseg))
+                    try:
+                        os.rename(os.path.join(curpath, oldseg), os.path.join(curpath, newseg))
+                    except:
+                        break # Fail silently since nothing catastrophic has happened
                curpath = os.path.join(curpath, newseg)

    def add_listener(self, listener):
@ -599,7 +598,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
    def has_cover(self, index, index_is_id=False):
        id = index if index_is_id else self.id(index)
        try:
-            path = os.path.join(self.abspath(id, index_is_id=True), 'cover.jpg')
+            path = os.path.join(self.abspath(id, index_is_id=True,
+                create_dirs=False), 'cover.jpg')
        except:
            # Can happen if path has not yet been set
            return False
@ -721,7 +721,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        path = self.format_abspath(index, format, index_is_id=index_is_id)
        if path is not None:
            f = open(path, mode)
-            ret = f if as_file else f.read()
+            try:
+                ret = f if as_file else f.read()
+            except IOError:
+                f.seek(0)
+                out = cStringIO.StringIO()
+                shutil.copyfileobj(f, out)
+                ret = out.getvalue()
            if not as_file:
                f.close()
            return ret
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@ -123,8 +123,6 @@ class ContentServer(object):

        return self.static('index.html')

-
-
    # Actually get content from the database {{{
    def get_cover(self, id, thumbnail=False):
        cover = self.db.cover(id, index_is_id=True, as_file=False)
--- a/src/calibre/library/server/mobile.py
+++ b/src/calibre/library/server/mobile.py
@ -18,6 +18,7 @@ from calibre.ebooks.metadata import fmt_sidx
 from calibre.constants import __appname__
 from calibre import human_readable
 from calibre.utils.date import utcfromtimestamp, format_date
+from calibre.utils.filenames import ascii_filename

 def CLASS(*args, **kwargs): # class is a reserved word in Python
    kwargs['class'] = ' '.join(args)
@ -110,11 +111,13 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS):
        data = TD()
        last = None
        for fmt in book['formats'].split(','):
+            a = ascii_filename(book['authors'])
+            t = ascii_filename(book['title'])
            s = SPAN(
                A(
                    fmt.lower(),
-                    href='/get/%s/%s-%s_%d.%s' % (fmt, book['authors'],
-                        book['title'], book['id'], fmt)
+                    href='/get/%s/%s-%s_%d.%s' % (fmt, a, t,
+                        book['id'], fmt)
                ),
                CLASS('button'))
            s.tail = u'\u202f' # &nbsp;
--- a/src/calibre/library/server/xml.py
+++ b/src/calibre/library/server/xml.py
@ -16,6 +16,7 @@ from calibre.ebooks.metadata import fmt_sidx
 from calibre.constants import preferred_encoding
 from calibre import isbytestring
 from calibre.utils.date import format_date
+from calibre.utils.filenames import ascii_filename

 E = ElementMaker()

@ -88,6 +89,8 @@ class XMLServer(object):
                    y = format_tag_string(y, ',')
                kwargs[x] = serialize(y) if y else ''

+            kwargs['safe_title'] = ascii_filename(kwargs['title'])
+
            c = kwargs.pop('comments')

            CFM = self.db.field_metadata
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt_BR.po
+++ b/src/calibre/translations/pt_BR.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sr.po
+++ b/src/calibre/translations/sr.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/th.po
+++ b/src/calibre/translations/th.po
--- a/src/calibre/translations/tr.po
+++ b/src/calibre/translations/tr.po
--- a/src/calibre/translations/uk.po
+++ b/src/calibre/translations/uk.po
--- a/src/calibre/translations/vi.po
+++ b/src/calibre/translations/vi.po
--- a/src/calibre/translations/zh_CN.po
+++ b/src/calibre/translations/zh_CN.po
--- a/src/calibre/utils/ipc/server.py
+++ b/src/calibre/utils/ipc/server.py
@ -18,6 +18,7 @@ from calibre.utils.ipc.launch import Worker
 from calibre.utils.ipc.worker import PARALLEL_FUNCS
 from calibre import detect_ncpus as cpu_count
 from calibre.constants import iswindows
+from calibre.ptempfile import base_dir

 _counter = 0

@ -114,8 +115,9 @@ class Server(Thread):
        with self._worker_launch_lock:
            self.launched_worker_count += 1
            id = self.launched_worker_count
-        rfile = os.path.join(tempfile.gettempdir(),
-        'calibre_ipc_result_%d_%d.pickle'%(self.id, id))
+        fd, rfile = tempfile.mkstemp(prefix='ipc_result_%d_%d_'%(self.id, id),
+                dir=base_dir(), suffix='.pickle')
+        os.close(fd)
        if redirect_output is None:
            redirect_output = not gui

@ -189,8 +191,11 @@ class Server(Thread):
                    job.failed   = True
                    job.returncode = worker.returncode
                elif os.path.exists(worker.rfile):
-                    job.result = cPickle.load(open(worker.rfile, 'rb'))
-                    os.remove(worker.rfile)
+                    try:
+                        job.result = cPickle.load(open(worker.rfile, 'rb'))
+                        os.remove(worker.rfile)
+                    except:
+                        pass
                job.duration = time.time() - job.start_time
                self.changed_jobs_queue.put(job)

--- a/src/calibre/utils/zipfile.py
+++ b/src/calibre/utils/zipfile.py
@ -281,7 +281,7 @@ class ZipInfo (object):
            'file_offset',
        )

-    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
+    def __init__(self, filename=u"NoName", date_time=(1980,1,1,0,0,0)):
        self.orig_filename = filename   # Original file name in archive

        # Terminate the file name at the first null byte.  Null bytes in file
@ -1362,30 +1362,42 @@ class ZipFile:
            self.fp.close()
        self.fp = None

-def safe_replace(zipstream, name, datastream, extra_replacements={}):
+def safe_replace(zipstream, name, datastream, extra_replacements={},
+        add_missing=False):
    '''
    Replace a file in a zip file in a safe manner. This proceeds by extracting
    and re-creating the zipfile. This is necessary because :method:`ZipFile.replace`
    sometimes created corrupted zip files.

+
    :param zipstream:  Stream from a zip file
    :param name:       The name of the file to replace
    :param datastream: The data to replace the file with.
    :param extra_replacements: Extra replacements. Mapping of name to file-like
                               objects
+    :param add_missing: If a replacement does not exist in the zip file, it is
+                        added. Use with care as currently parent directories
+                        are not created.

    '''
    z = ZipFile(zipstream, 'r')
    replacements = {name:datastream}
    replacements.update(extra_replacements)
    names = frozenset(replacements.keys())
+    found = set([])
    with SpooledTemporaryFile(max_size=100*1024*1024) as temp:
        ztemp = ZipFile(temp, 'w')
        for obj in z.infolist():
+            if isinstance(obj.filename, unicode):
+                obj.flag_bits |= 0x16 # Set isUTF-8 bit
            if obj.filename in names:
                ztemp.writestr(obj, replacements[obj.filename].read())
+                found.add(obj.filename)
            else:
                ztemp.writestr(obj, z.read_raw(obj), raw_bytes=True)
+        if add_missing:
+            for name in names - found:
+                ztemp.writestr(name, replacements[name].read())
        ztemp.close()
        z.close()
        temp.seek(0)