From 9287eb4445d803cbef826bb82f377d277c30517e Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Tue, 25 Jan 2011 07:16:14 -0700
Subject: [PATCH 1/5] various GwR revisions

---
 resources/catalog/stylesheet.css              | 12 ++++++++++++
 src/calibre/gui2/__init__.py                  |  8 ++++++++
 src/calibre/gui2/actions/annotate.py          |  4 ++++
 src/calibre/gui2/catalog/catalog_epub_mobi.py |  3 ++-
 src/calibre/library/catalog.py                | 11 ++++++++---
 5 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/resources/catalog/stylesheet.css b/resources/catalog/stylesheet.css
index bf83a4c60b..336d015e44 100644
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@@ -62,6 +62,18 @@ div.description {
 	text-indent: 1em;
 	}
 
+/*
+* 	Attempt to minimize widows and orphans by logically grouping chunks
+* 	Recommend enabling for iPad
+*   Some reports of problems with Sony ereaders, presumably ADE engines
+*/
+/*
+div.logical_group {
+	display:inline-block;
+	width:100%;
+	}
+*/
+
 p.date_index {
 	font-size:x-large;
 	text-align:center;
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index c94b99f141..84a26cea18 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -550,6 +550,14 @@ def choose_dir(window, name, title, default_dir='~'):
     if dir:
         return dir[0]
 
+def choose_osx_app(window, name, title, default_dir='/Applications'):
+    fd = FileDialog(title=title, parent=window, name=name, mode=QFileDialog.ExistingFile,
+            default_dir=default_dir)
+    app = fd.get_files()
+    fd.setParent(None)
+    if app:
+        return app
+
 def choose_files(window, name, title,
                  filters=[], all_files=True, select_only_single_file=False):
     '''
diff --git a/src/calibre/gui2/actions/annotate.py b/src/calibre/gui2/actions/annotate.py
index dfafcd1a39..8714654d4b 100644
--- a/src/calibre/gui2/actions/annotate.py
+++ b/src/calibre/gui2/actions/annotate.py
@@ -165,10 +165,12 @@ class FetchAnnotationsAction(InterfaceAction):
                 ka_soup.insert(0,divTag)
                 return ka_soup
 
+            '''
             def mark_book_as_read(self,id):
                 read_tag = gprefs.get('catalog_epub_mobi_read_tag')
                 if read_tag:
                     self.db.set_tags(id, [read_tag], append=True)
+            '''
 
             def canceled(self):
                 self.pd.hide()
@@ -201,10 +203,12 @@ class FetchAnnotationsAction(InterfaceAction):
                         # Update library comments
                         self.db.set_comment(id, mi.comments)
 
+                        '''
                         # Update 'read' tag except for Catalogs/Clippings
                         if bm.value.percent_read >= self.FINISHED_READING_PCT_THRESHOLD:
                             if not set(mi.tags).intersection(ignore_tags):
                                 self.mark_book_as_read(id)
+                        '''
 
                         # Add bookmark file to id
                         self.db.add_format_with_hooks(id, bm.value.bookmark_extension,
diff --git a/src/calibre/gui2/catalog/catalog_epub_mobi.py b/src/calibre/gui2/catalog/catalog_epub_mobi.py
index 94760306c3..d5149569be 100644
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@@ -335,7 +335,7 @@ class PluginWidget(QWidget,Ui_Form):
         '''
 
         return
-
+        '''
         if new_state == 0:
             # unchecked
             self.merge_source_field.setEnabled(False)
@@ -348,6 +348,7 @@ class PluginWidget(QWidget,Ui_Form):
             self.merge_before.setEnabled(True)
             self.merge_after.setEnabled(True)
             self.include_hr.setEnabled(True)
+        '''
 
     def header_note_source_field_changed(self,new_index):
         '''
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 95e738dd58..f0e4778de4 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -1820,6 +1820,9 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
                 self.booksByTitle_noSeriesPrefix = nspt
 
             # Loop through the books by title
+            # Generate one divRunningTag per initial letter for the purposes of
+            # minimizing widows and orphans on readers that can handle large
+            # <divs> styled as inline-block
             title_list = self.booksByTitle
             if not self.useSeriesPrefixInTitlesSection:
                 title_list = self.booksByTitle_noSeriesPrefix
@@ -1832,7 +1835,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
                         divTag.insert(dtc, divRunningTag)
                         dtc += 1
                     divRunningTag = Tag(soup, 'div')
-                    divRunningTag['style'] = 'display:inline-block;width:100%'
+                    divRunningTag['class'] = "logical_group"
                     drtc = 0
                     current_letter = self.letter_or_symbol(book['title_sort'][0])
                     pIndexTag = Tag(soup, "p")
@@ -1954,6 +1957,8 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
             drtc = 0
 
             # Loop through booksByAuthor
+            # Each author/books group goes in an openingTag div (first) or
+            # a runningTag div (subsequent)
             book_count = 0
             current_author = ''
             current_letter = ''
@@ -1977,7 +1982,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
                     current_letter = self.letter_or_symbol(book['author_sort'][0].upper())
                     author_count = 0
                     divOpeningTag = Tag(soup, 'div')
-                    divOpeningTag['style'] = 'display:inline-block;width:100%'
+                    divOpeningTag['class'] = "logical_group"
                     dotc = 0
                     pIndexTag = Tag(soup, "p")
                     pIndexTag['class'] = "letter_index"
@@ -2001,7 +2006,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
 
                         # Create a divRunningTag for the rest of the authors in this letter
                         divRunningTag = Tag(soup, 'div')
-                        divRunningTag['style'] = 'display:inline-block;width:100%'
+                        divRunningTag['class'] = "logical_group"
                         drtc = 0
 
                     non_series_books = 0

From c9bb59a5d5a443ae277dbf16d0b261e86c8521a2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 25 Jan 2011 08:35:29 -0700
Subject: [PATCH 2/5] Fix extra spaces being inserted into TOC title when
 reading TOC from OPD guide element. Fixes #8569 (Chapters headers with
 internal tags not added to TOC correctly.)

---
 src/calibre/ebooks/oeb/reader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py
index 8e11ac6498..d08a68c0bc 100644
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@@ -17,7 +17,7 @@ from lxml import etree
 import cssutils
 
 from calibre.ebooks.oeb.base import OPF1_NS, OPF2_NS, OPF2_NSMAP, DC11_NS, \
-    DC_NSES, OPF
+    DC_NSES, OPF, xml2text
 from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \
     PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME
 from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, \
@@ -423,7 +423,7 @@ class OEBReader(object):
             path, frag = urldefrag(href)
             if path not in self.oeb.manifest.hrefs:
                 continue
-            title = ' '.join(xpath(anchor, './/text()'))
+            title = xml2text(anchor)
             title = COLLAPSE_RE.sub(' ', title.strip())
             if href not in titles:
                 order.append(href)

From 336874d87f7520616d06310460d143653d5c5001 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 25 Jan 2011 09:38:38 -0700
Subject: [PATCH 3/5] ...

---
 resources/recipes/20_minutos.recipe  | 74 +++++++++++++++++++++++-----
 resources/recipes/nytimes_sub.recipe | 16 +++---
 src/calibre/gui2/actions/annotate.py |  2 +-
 3 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/resources/recipes/20_minutos.recipe b/resources/recipes/20_minutos.recipe
index 8205c918f5..1f862847dc 100644
--- a/resources/recipes/20_minutos.recipe
+++ b/resources/recipes/20_minutos.recipe
@@ -1,17 +1,67 @@
+# -*- coding: utf-8
+__license__   = 'GPL v3'
+__author__    = 'Luis Hernandez'
+__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
+description   = 'Periódico gratuito en español - v0.5 - 25 Jan 2011'
+
+'''
+www.20minutos.es
+'''
+
 from calibre.web.feeds.news import BasicNewsRecipe
 
-class AdvancedUserRecipe1295310874(BasicNewsRecipe):
-    title          = u'20 Minutos (Boletin)'
-    __author__            = 'Luis Hernandez'
-    description           = 'Periódico gratuito en español'
+class AdvancedUserRecipe1294946868(BasicNewsRecipe):
+
+    title          = u'20 Minutos'
+    publisher      = u'Grupo 20 Minutos'
+
+    __author__            = u'Luis Hernández'
+    description           = u'Periódico gratuito en español'
     cover_url     = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
-    language      = 'es'
 
-    oldest_article = 2
-    max_articles_per_feed = 50
+    oldest_article = 5
+    max_articles_per_feed = 100
+
+    remove_javascript = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+
+    encoding              = 'ISO-8859-1'
+    language              = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    keep_only_tags     = [dict(name='div', attrs={'id':['content']})
+                                  ,dict(name='div', attrs={'class':['boxed','description','lead','article-content']})
+                                  ,dict(name='span', attrs={'class':['photo-bar']})
+                                  ,dict(name='ul', attrs={'class':['article-author']})
+                                ]
+
+    remove_tags_before = dict(name='ul' , attrs={'class':['servicios-sub']})
+    remove_tags_after  = dict(name='div' , attrs={'class':['related-news','col']})
+
+    remove_tags = [
+                     dict(name='ol', attrs={'class':['navigation',]})
+                    ,dict(name='span', attrs={'class':['action']})
+                    ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col']})
+                    ,dict(name='div', attrs={'id':['twitter-destacados']})
+                    ,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
+                                          ]
+
+    feeds = [
+              (u'Portada'              , u'http://www.20minutos.es/rss/')
+             ,(u'Nacional'             , u'http://www.20minutos.es/rss/nacional/')
+             ,(u'Internacional'       , u'http://www.20minutos.es/rss/internacional/')
+             ,(u'Economia'           , u'http://www.20minutos.es/rss/economia/')
+             ,(u'Deportes'            , u'http://www.20minutos.es/rss/deportes/')
+             ,(u'Tecnologia'          , u'http://www.20minutos.es/rss/tecnologia/')
+             ,(u'Gente - TV'         , u'http://www.20minutos.es/rss/gente-television/')
+             ,(u'Motor'                 , u'http://www.20minutos.es/rss/motor/')
+             ,(u'Salud'                 , u'http://www.20minutos.es/rss/belleza-y-salud/')
+             ,(u'Viajes'                , u'http://www.20minutos.es/rss/viajes/')
+             ,(u'Vivienda'             , u'http://www.20minutos.es/rss/vivienda/')
+             ,(u'Empleo'              , u'http://www.20minutos.es/rss/empleo/')
+             ,(u'Cine'                  , u'http://www.20minutos.es/rss/cine/')
+             ,(u'Musica'               , u'http://www.20minutos.es/rss/musica/')
+             ,(u'Comunidad20'     , u'http://www.20minutos.es/rss/zona20/')
+            ]
 
-    feeds          = [(u'VESPERTINO', u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss')
-                        , (u'DEPORTES', u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss')
-                        , (u'CULTURA', u'http://www.20minutos.es/rss/ocio/')
-                        , (u'TV', u'http://20minutos.feedsportal.com/c/32489/f/490877/index.rss')
-]
diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe
index 863e4b22ba..81b8bd5cb7 100644
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@@ -498,7 +498,7 @@ class NYTimes(BasicNewsRecipe):
                 for lidiv in div.findAll('li'):
                     if not skipping:
                         self.handle_article(lidiv)
-            
+
         self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
         return self.filter_ans(self.ans)
 
@@ -609,7 +609,7 @@ class NYTimes(BasicNewsRecipe):
                 if article_date < self.earliest_date:
                     self.log("Skipping article dated %s" % date_str)
                     return None
-                    
+
         #all articles are from today, no need to print the date on every page
         try:
             if not self.webEdition:
@@ -631,7 +631,7 @@ class NYTimes(BasicNewsRecipe):
                             refstart = reflinkstring.find("javascript:pop_me_up2('") + len("javascript:pop_me_up2('")
                             refend = reflinkstring.find(".html", refstart) + len(".html")
                             reflinkstring = reflinkstring[refstart:refend]
-                            
+
                             popuppage = self.browser.open(reflinkstring)
                             popuphtml = popuppage.read()
                             popuppage.close()
@@ -640,7 +640,7 @@ class NYTimes(BasicNewsRecipe):
                                 year = str(st.tm_year)
                                 month = "%.2d" % st.tm_mon
                                 day = "%.2d" % st.tm_mday
-                                imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/')                                
+                                imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/')
                                 highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
                                 popupSoup = BeautifulSoup(popuphtml)
                                 highResTag = popupSoup.find('img', {'src':highResImageLink})
@@ -659,9 +659,9 @@ class NYTimes(BasicNewsRecipe):
                                             imageTag['height'] = newHeight
                                         except:
                                             self.log("Error setting the src width and height parameters")
-            except Exception as e:
+            except Exception:
                 self.log("Error pulling high resolution images")
-                
+
             try:
                 #remove "Related content" bar
                 runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline']})
@@ -674,8 +674,8 @@ class NYTimes(BasicNewsRecipe):
                                 hline.extract()
             except:
                 self.log("Error removing related content bar")
-     
-                
+
+
             try:
                 #in case pulling images failed, delete the enlarge this text
                 enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
diff --git a/src/calibre/gui2/actions/annotate.py b/src/calibre/gui2/actions/annotate.py
index 8714654d4b..a702ba045e 100644
--- a/src/calibre/gui2/actions/annotate.py
+++ b/src/calibre/gui2/actions/annotate.py
@@ -9,7 +9,7 @@ import os, datetime
 
 from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt
 
-from calibre.gui2 import error_dialog, gprefs
+from calibre.gui2 import error_dialog
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
 from calibre import strftime
 from calibre.gui2.actions import InterfaceAction

From 503038f39febac1075a1a809db557db05a133a5b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 25 Jan 2011 10:38:32 -0700
Subject: [PATCH 4/5] Fix #8576 (Issue with new yorker download)

---
 resources/recipes/new_yorker.recipe | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/resources/recipes/new_yorker.recipe b/resources/recipes/new_yorker.recipe
index 0c95aa358d..d69a4df24f 100644
--- a/resources/recipes/new_yorker.recipe
+++ b/resources/recipes/new_yorker.recipe
@@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 newyorker.com
 '''
@@ -54,10 +54,10 @@ class NewYorker(BasicNewsRecipe):
                         ,dict(attrs={'id':['show-header','show-footer'] })
                      ]
     remove_attributes = ['lang']
-    feeds             = [(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')]
+    feeds             = [(u'The New Yorker', u'http://www.newyorker.com/services/rss/feeds/everything.xml')]
 
     def print_version(self, url):
-        return url + '?printable=true'
+        return 'http://www.newyorker.com' + url + '?printable=true'
 
     def image_url_processor(self, baseurl, url):
         return url.strip()

From 05be08a9ee9a645fe63c520d98989d33c6f58209 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 25 Jan 2011 11:09:49 -0700
Subject: [PATCH 5/5] Fix the metadata backup thread to more reliably flush an
 in_limbo id

---
 src/calibre/gui2/ui.py        |  2 --
 src/calibre/library/caches.py | 15 ++++++++++++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py
index 6a74ccd6ea..c0658536bb 100644
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@@ -638,8 +638,6 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
         except KeyboardInterrupt:
             pass
         time.sleep(2)
-        if mb is not None:
-            mb.flush()
         self.hide_windows()
         return True
 
diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index 77e75736cf..7d6511e8a5 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -42,6 +42,7 @@ class MetadataBackup(Thread): # {{{
 
     def stop(self):
         self.keep_running = False
+        self.flush()
         # Break cycles so that this object doesn't hold references to db
         self.do_write = self.get_metadata_for_dump = self.clear_dirtied = \
             self.set_dirtied = self.db = None
@@ -57,7 +58,10 @@ class MetadataBackup(Thread): # {{{
             except:
                 # Happens during interpreter shutdown
                 break
+            if not self.keep_running:
+                break
 
+            self.in_limbo = id_
             try:
                 path, mi = self.get_metadata_for_dump(id_)
             except:
@@ -72,10 +76,10 @@ class MetadataBackup(Thread): # {{{
                     continue
 
             # at this point the dirty indication is off
-
             if mi is None:
                 continue
-            self.in_limbo = id_
+            if not self.keep_running:
+                break
 
             # Give the GUI thread a chance to do something. Python threads don't
             # have priorities, so this thread would naturally keep the processor
@@ -89,6 +93,9 @@ class MetadataBackup(Thread): # {{{
                 traceback.print_exc()
                 continue
 
+            if not self.keep_running:
+                break
+
             time.sleep(0.1) # Give the GUI thread a chance to do something
             try:
                 self.do_write(path, raw)
@@ -102,7 +109,8 @@ class MetadataBackup(Thread): # {{{
                     prints('Failed to write backup metadata for id:', id_,
                             'again, giving up')
                     continue
-        self.in_limbo = None
+
+            self.in_limbo = None
 
     def flush(self):
         'Used during shutdown to ensure that a dirtied book is not missed'
@@ -111,6 +119,7 @@ class MetadataBackup(Thread): # {{{
                 self.db.dirtied([self.in_limbo])
             except:
                 traceback.print_exc()
+            self.in_limbo = None
 
     def write(self, path, raw):
         with lopen(path, 'wb') as f: