From 8af795d9dd34ce97bfa9ca3f07afd3553f64ff9d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 21 Mar 2010 06:58:02 +0530
Subject: [PATCH 1/7] Fix NYTimes

---
 resources/recipes/nytimes_sub.recipe | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe
index c126902899..78f6016c94 100644
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@@ -31,7 +31,7 @@ class NYTimes(BasicNewsRecipe):
     # List of sections to exclude
     # To add a section, copy the section name from the allSectionKeywords list above
     # For example, to exclude 'Dining' and 'Weddings':
-    # excludeSectionKeywords = ['Dining','Weddings']
+    #excludeSectionKeywords = ['Dining','Weddings']
     excludeSectionKeywords = []
 
     # List of sections to include (test and debug only)
@@ -56,9 +56,12 @@ class NYTimes(BasicNewsRecipe):
     remove_tags_before = dict(id='article')
     remove_tags_after  = dict(id='article')
     remove_tags = [dict(attrs={'class':[
+                            'articleFooter',
+                            'articleInline runaroundLeft',
                             'articleTools',
                             'columnGroup doubleRule',
                             'columnGroup last',
+                            'columnGroup  last',
                             'doubleRule',
                             'dottedLine',
                             'entry-meta',
@@ -70,6 +73,7 @@ class NYTimes(BasicNewsRecipe):
                             'relatedSearchesModule',
                             'side_tool',
                             'singleAd',
+                            'subNavigation tabContent active clearfix',
                             ]}),
                    dict(id=[
                             'adxLeaderboard',
@@ -222,11 +226,11 @@ class NYTimes(BasicNewsRecipe):
 
             if div['class'] == 'section-headline':
                 key = string.capwords(feed_title(div))
-                excluded = re.compile('|'.join(self.excludeSectionKeywords))
-                if excluded.search(key):
-                    self.log("Skipping section %s" % key)
-                    continue
-
+                if len(self.excludeSectionKeywords):
+                    excluded = re.compile('|'.join(self.excludeSectionKeywords))
+                    if excluded.search(key):
+                        self.log("Skipping section %s" % key)
+                        continue
                 articles[key] = []
                 ans.append(key)
 

From 70d9a6d3d60b99276467832809f988fe862fa6fb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 21 Mar 2010 07:20:57 +0530
Subject: [PATCH 2/7] Don't choke if the user provides an invalid remove
 header/footer regexp

---
 src/calibre/ebooks/conversion/preprocess.py | 25 +++++++++++++++------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 29ce0e4296..ada4f1a3af 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -230,14 +230,25 @@ class HTMLPreProcessor(object):
 
         end_rules = []
         if getattr(self.extra_opts, 'remove_header', None):
-            end_rules.append(
-                (re.compile(getattr(self.extra_opts, 'header_regex')), lambda match : '')
-            )
+            try:
+                end_rules.append(
+                    (re.compile(self.extra_opts.header_regex), lambda match : '')
+                )
+            except:
+                import traceback
+                print 'Failed to parse remove_header regexp'
+                traceback.print_exc()
+
         if getattr(self.extra_opts, 'remove_footer', None):
-            end_rules.append(
-                (re.compile(getattr(self.extra_opts, 'footer_regex')), lambda match : '')
-            )
-        
+            try:
+                end_rules.append(
+                    (re.compile(self.extra_opts.footer_regex), lambda match : '')
+                )
+            except:
+                import traceback
+                print 'Failed to parse remove_footer regexp'
+                traceback.print_exc()
+
         if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
             length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
             if length:

From 91f3f2d8b8addbaef9975b0eb7b53d8abfa0b44d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 22 Mar 2010 06:43:16 +0530
Subject: [PATCH 3/7] More intelligent error message when user selects wrong
 card for send to device operation on SONYs and improved nyimes

---
 resources/recipes/nytimes_sub.recipe | 5 +++--
 src/calibre/devices/prs505/driver.py | 9 +++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe
index 78f6016c94..93df08220d 100644
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@@ -57,9 +57,9 @@ class NYTimes(BasicNewsRecipe):
     remove_tags_after  = dict(id='article')
     remove_tags = [dict(attrs={'class':[
                             'articleFooter',
-                            'articleInline runaroundLeft',
                             'articleTools',
                             'columnGroup doubleRule',
+                            'columnGroup singleRule',
                             'columnGroup last',
                             'columnGroup  last',
                             'doubleRule',
@@ -68,6 +68,7 @@ class NYTimes(BasicNewsRecipe):
                             'icon enlargeThis',
                             'leftNavTabs',
                             'module box nav',
+                            'nextArticleLink',
                             'nextArticleLink clearfix',
                             'post-tools',
                             'relatedSearchesModule',
@@ -226,7 +227,7 @@ class NYTimes(BasicNewsRecipe):
 
             if div['class'] == 'section-headline':
                 key = string.capwords(feed_title(div))
-                if len(self.excludeSectionKeywords):
+                if self.excludeSectionKeywords:
                     excluded = re.compile('|'.join(self.excludeSectionKeywords))
                     if excluded.search(key):
                         self.log("Skipping section %s" % key)
diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py
index 5d759be47c..448965a913 100644
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@@ -150,7 +150,8 @@ class PRS505(CLI, Device):
         for location in locations:
             info = metadata.next()
             path = location[0]
-            blist = 2 if location[3] == 'cardb' else 1 if location[3] == 'carda' else 0
+            oncard = location[3]
+            blist = 2 if oncard == 'cardb' else 1 if oncard == 'carda' else 0
 
             if self._main_prefix and path.startswith(self._main_prefix):
                 name = path.replace(self._main_prefix, '')
@@ -166,7 +167,11 @@ class PRS505(CLI, Device):
 
             opts = self.settings()
             collections = opts.extra_customization.split(',') if opts.extra_customization else []
-            booklists[blist].add_book(info, name, collections, *location[1:-1])
+            booklist = booklists[blist]
+            if not hasattr(booklist, 'add_book'):
+                raise ValueError(('Incorrect upload location %s. Did you choose the'
+                        ' correct card A or B, to send books to?')%oncard)
+            booklist.add_book(info, name, collections, *location[1:-1])
         fix_ids(*booklists)
 
     def delete_books(self, paths, end_session=True):

From fdaed4a1690298d20e49034656fc3e65ca5412b4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 23 Mar 2010 09:46:54 +0530
Subject: [PATCH 4/7] IEEE Spectrum by Franco Venturi

---
 resources/recipes/ieeespectrum.recipe | 67 +++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 resources/recipes/ieeespectrum.recipe

diff --git a/resources/recipes/ieeespectrum.recipe b/resources/recipes/ieeespectrum.recipe
new file mode 100644
index 0000000000..79a107cd9d
--- /dev/null
+++ b/resources/recipes/ieeespectrum.recipe
@@ -0,0 +1,67 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Franco Venturi <fventuri at comcast.net>'
+'''
+spectrum.ieee.org
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from string import capwords
+from urlparse import urljoin
+
+class IEEESpectrum(BasicNewsRecipe):
+    title                 = 'IEEE Spectrum'
+    __author__            = 'Franco Venturi'
+    description           = 'Electronics News from IEEE'
+    publisher             = 'IEEE'
+    category              = 'news, electronics, IT, computer science'
+    oldest_article        = 32
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    language              = 'en'
+    index                 = 'http://spectrum.ieee.org/magazine/'
+    masthead_url          = 'http://spectrum.ieee.org/images/logo_hdr.png'
+
+    remove_javascript     = True
+    remove_tags           = [dict(name={'script':True, 'object':True})]
+    remove_attributes     = ['height','width','alt']
+    keep_only_tags        = [dict(attrs={'class': {'artSctn':True, 'artTitle':True, 'dekTitle': True}}), dict(attrs={'id':'artBody'})]
+
+
+#    def get_cover_url(self):
+#        cover_url = None
+#        soup = self.index_to_soup(self.index)
+#        cover_item = soup.find('img',attrs={'image':'cover.gif'})
+#        if cover_item:
+#            cover_url = urljoin(self.index, cover_item['src'])
+#        return cover_url
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.index)
+        content = soup.find(id='gnrlContent')
+        title = content.find(attrs={'class':'style4'}).string.strip()
+        date = ' '.join(title.split()[0:2])
+        self.timefmt = ' [' + date + ']'
+        contents = []
+        for tag in content.findAll(attrs={'class': {'style2':True, 'lstngTitle':True, 'lstngBody': True}}):
+            if tag['class'] == 'style2':
+                contents.append((capwords(tag.renderContents().strip()), []))
+            elif tag['class'] == 'lstngTitle':
+                url = urljoin(self.index, tag.findPrevious('a')['href']) + '/0'
+                contents[-1][1].append({'title': tag.renderContents().strip(),
+                                        'url': url,
+                                        'date': date,
+                                        'description': '',
+                                        'content': ''
+                                       })
+            elif tag['class'] == 'lstngBody':
+                contents[-1][1][-1]['description'] = tag.renderContents().strip()
+
+        return contents
+
+    def preprocess_html(self, soup):
+        for a in soup.findAll('a'):
+            if not a['href'].lower().startswith('http'):
+               a['href'] = urljoin(self.index, a['href'])
+        return soup

From 69ec0e1ee56699429b4b5ee4b9cb2bddccaec070 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 23 Mar 2010 11:36:00 +0530
Subject: [PATCH 5/7] ...

---
 resources/recipes/ieeespectrum.recipe | 12 ++++--------
 src/calibre/web/feeds/news.py         |  4 ++--
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/resources/recipes/ieeespectrum.recipe b/resources/recipes/ieeespectrum.recipe
index 79a107cd9d..e2490b2a6c 100644
--- a/resources/recipes/ieeespectrum.recipe
+++ b/resources/recipes/ieeespectrum.recipe
@@ -29,16 +29,12 @@ class IEEESpectrum(BasicNewsRecipe):
     keep_only_tags        = [dict(attrs={'class': {'artSctn':True, 'artTitle':True, 'dekTitle': True}}), dict(attrs={'id':'artBody'})]
 
 
-#    def get_cover_url(self):
-#        cover_url = None
-#        soup = self.index_to_soup(self.index)
-#        cover_item = soup.find('img',attrs={'image':'cover.gif'})
-#        if cover_item:
-#            cover_url = urljoin(self.index, cover_item['src'])
-#        return cover_url
-
     def parse_index(self):
         soup = self.index_to_soup(self.index)
+        img = soup.find('img', image='cover.gif', src=True)
+        if img is not None:
+            self.cover_url = 'http://spectrum.ieee.org'+img['src']
+
         content = soup.find(id='gnrlContent')
         title = content.find(attrs={'class':'style4'}).string.strip()
         date = ' '.join(title.split()[0:2])
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index d07c135abd..496a1f4d5b 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -864,10 +864,10 @@ class BasicNewsRecipe(Recipe):
             self.log.error(_('Could not download cover: %s')%str(err))
             self.log.debug(traceback.format_exc())
         if cu is not None:
-            ext = cu.rpartition('.')[-1]
+            ext = cu.split('/')[-1].rpartition('.')[-1]
             if '?' in ext:
                 ext = ''
-            ext = ext.lower() if ext else 'jpg'
+            ext = ext.lower() if ext and '/' not in ext else 'jpg'
             cpath = os.path.join(self.output_dir, 'cover.'+ext)
             if os.access(cu, os.R_OK):
                 with open(cpath, 'wb') as cfile:

From 4a3ebab22ec7393f3b837330b2a358c53cbe18e5 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Tue, 23 Mar 2010 16:53:35 -0700
Subject: [PATCH 6/7] GwR fix problem with TPZ files not indexing on Kindle

---
 src/calibre/ebooks/metadata/topaz.py | 63 +++++++++++++---------------
 1 file changed, 29 insertions(+), 34 deletions(-)

diff --git a/src/calibre/ebooks/metadata/topaz.py b/src/calibre/ebooks/metadata/topaz.py
index 6fe858df53..996cc77770 100644
--- a/src/calibre/ebooks/metadata/topaz.py
+++ b/src/calibre/ebooks/metadata/topaz.py
@@ -267,7 +267,7 @@ class MetadataUpdater(object):
             offset += md_len
             self.metadata[tag] = metadata
 
-    def regenerate_headers(self, len_updated_metadata):
+    def regenerate_headers(self, updated_md_len):
 
         headers = {}
         for tag in self.topaz_headers:
@@ -276,22 +276,16 @@ class MetadataUpdater(object):
             else:
                 headers[tag] = None
 
-        # Sort headers based on initial offset
-        sh = sorted(headers,key=lambda x:(headers[x],headers[x]))
-
-        # Metadata goes last
-        sh.remove('metadata')
-        sh.append('metadata')
-
         original_md_len = self.topaz_headers['metadata']['blocks'][0]['len_uncomp']
         original_md_offset = self.topaz_headers['metadata']['blocks'][0]['offset']
+        delta = updated_md_len - original_md_len
 
         # Copy the first 5 bytes of the file: sig + num_recs
         ths = StringIO.StringIO()
         ths.write(self.data[:5])
 
-        # Rewrite the offsets for hdr_offsets > metadata original location
-        for tag in sh[:-1]:
+        # Rewrite the offsets for hdr_offsets > metadata offset
+        for tag in headers.keys():
             ths.write('c')
             ths.write(self.encode_vwi(len(tag)))
             ths.write(tag)
@@ -300,32 +294,18 @@ class MetadataUpdater(object):
                 for block in self.topaz_headers[tag]['blocks']:
                     b = self.topaz_headers[tag]['blocks'][block]
 
-                    if b['offset'] < original_md_offset:
+                    if b['offset'] <= original_md_offset:
                         ths.write(self.encode_vwi(b['offset']))
                     else:
-                        ths.write(self.encode_vwi(b['offset'] - original_md_len))
+                        ths.write(self.encode_vwi(b['offset'] + delta))
 
-                    ths.write(self.encode_vwi(b['len_uncomp']))
+                    if tag == 'metadata':
+                        ths.write(self.encode_vwi(updated_md_len))
+                    else:
+                        ths.write(self.encode_vwi(b['len_uncomp']))
                     ths.write(self.encode_vwi(b['len_comp']))
             else:
                 ths.write(self.encode_vwi(0))
-
-        # Adjust metadata offset to end
-        new_md_offset = (len(self.data) - self.base - original_md_len)
-
-        new_md_len = len_updated_metadata - 1 - len('metadata') - 1
-
-        # Write the metadata header
-        ths.write('c')
-        ths.write(self.encode_vwi(len('metadata')))
-        ths.write('metadata')
-        ths.write(self.encode_vwi(1))
-        ths.write(self.encode_vwi(new_md_offset))
-
-        ths.write(self.encode_vwi(new_md_len))
-        ths.write(self.encode_vwi(0))
-
-        self.sorted_headers = sh
         self.original_md_start = original_md_offset + self.base
         self.original_md_len = original_md_len
         return ths.getvalue().encode('iso-8859-1')
@@ -364,8 +344,8 @@ class MetadataUpdater(object):
         self.stream.write(head)
         self.stream.write('d')
         self.stream.write(chunk1)
-        self.stream.write(chunk2)
         self.stream.write(updated_metadata)
+        self.stream.write(chunk2)
 
 def get_metadata(stream):
     mu = MetadataUpdater(stream)
@@ -377,6 +357,21 @@ def set_metadata(stream, mi):
     return
 
 if __name__ == '__main__':
-    #print get_metadata(open(sys.argv[1], 'rb'))
-    mi = MetaInformation(title="My New Title", authors=['Smith, John'])
-    set_metadata(open(sys.argv[1], 'rb'), mi)
+    if False:
+        # Test get_metadata()
+        print get_metadata(open(sys.argv[1], 'rb'))
+    else:
+        # Test set_metadata()
+        import cStringIO
+        data = open(sys.argv[1], 'rb')
+        stream = cStringIO.StringIO()
+        stream.write(data.read())
+        mi = MetaInformation(title="A Marvelously Long Title", authors=['Riker, Gregory; Riker, Charles'])
+        set_metadata(stream, mi)
+
+        # Write the result
+        tokens = sys.argv[1].rpartition('.')
+        updated_data = open(tokens[0]+'-updated' + '.' + tokens[2],'wb')
+        updated_data.write(stream.getvalue())
+        updated_data.close()
+

From c1a6be45b95c1f46de1ed7872b6d49fa8e688ed5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 24 Mar 2010 08:47:31 +0530
Subject: [PATCH 7/7] ...

---
 resources/recipes/times_online.recipe | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/resources/recipes/times_online.recipe b/resources/recipes/times_online.recipe
index 98c26e6a66..98e96552ce 100644
--- a/resources/recipes/times_online.recipe
+++ b/resources/recipes/times_online.recipe
@@ -21,9 +21,8 @@ class Timesonline(BasicNewsRecipe):
     use_embedded_content   = False
     simultaneous_downloads = 1
     encoding               = 'ISO-8859-1'
-    lang                   = 'en-UK'
     remove_javascript = True
-    language = 'en'
+    language = 'en_GB'
     recursions = 9
     match_regexps = [r'http://www.timesonline.co.uk/.*page=[2-9]']