From b8018f59f29eaf7c5036c704f6c4baa92fda9165 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Thu, 27 Oct 2011 13:53:43 -0600
Subject: [PATCH 01/11] WIP - conform epub metadata with calibre

---
 src/calibre/devices/apple/driver.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py
index 645b2fb606..289d5079f4 100644
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@@ -1305,6 +1305,8 @@ class ITUNES(DriverBase):
         if DEBUG:
             self.log.info(" ITUNES._add_new_copy()")
 
+        self._update_epub_metadata(fpath, metadata)
+
         db_added = None
         lb_added = None
 
@@ -2663,6 +2665,7 @@ class ITUNES(DriverBase):
                     metadata.timestamp = now()
                     if DEBUG:
                         self.log.info("   add timestamp: %s" % metadata.timestamp)
+
             else:
                 metadata.timestamp = now()
                 if DEBUG:
@@ -2699,7 +2702,7 @@ class ITUNES(DriverBase):
             if iswindows and metadata.series:
                 metadata.tags = None
 
-            set_metadata(zfo, metadata, update_timestamp=True)
+            set_metadata(zfo, metadata, apply_null=True, update_timestamp=True)
 
     def _update_device(self, msg='', wait=True):
         '''

From 5c7bf560c2ec37867f7a1fd5e128ca5dafbb3a9a Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Fri, 18 Nov 2011 05:18:15 -0700
Subject: [PATCH 02/11] Added iPhone 4S device fingerprint

---
 src/calibre/devices/apple/driver.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py
index 289d5079f4..9a242b245d 100644
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@@ -217,10 +217,11 @@ class ITUNES(DriverBase):
     #  0x1297   iPhone 4
     #  0x129a   iPad
     #  0x129f   iPad2 (WiFi)
+    #  0x12a0   iPhone 4S
     #  0x12a2   iPad2 (GSM)
     #  0x12a3   iPad2 (CDMA)
     VENDOR_ID = [0x05ac]
-    PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x129f,0x12a2,0x12a3]
+    PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x129f,0x12a0,0x12a2,0x12a3]
     BCD = [0x01]
 
     # Plugboard ID

From 055b17c68a29ffaf8b4b9985de31429969d0c836 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 19 Nov 2011 14:52:57 +0530
Subject: [PATCH 03/11] Fix handling of comments in the jacket template

---
 resources/jacket/template.xhtml             | 9 ++++++---
 src/calibre/ebooks/oeb/transforms/jacket.py | 8 +++++++-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/resources/jacket/template.xhtml b/resources/jacket/template.xhtml
index 17d0493a82..671ca5a04d 100644
--- a/resources/jacket/template.xhtml
+++ b/resources/jacket/template.xhtml
@@ -38,9 +38,12 @@
         <hr class="cbj_kindle_banner_hr" />
         <!-- 
         In addition you can add code to show the values of custom columns here.
-        The value is available as _column_name and the title as _column_name_label.
-        For example, if you have a custom column with label #genre, you can add it to
-        this template with _genre_label and _genre. Note that the # is replaced by an underscore.
+        The value is available as _column_name and the title as
+        _column_name_label.  For example, if you have a custom column with
+        label #genre, you can add it to this template with _genre_label and
+        _genre. Note that the # is replaced by an underscore. For example
+        
+        <div><b>{_genre_label}:</b> {_genre}</div>
         -->
 
         <div class="cbj_comments">{comments}</div>
diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
index 987fe0ce86..ede34ef17c 100644
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -16,6 +16,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML
 from calibre.library.comments import comments_to_html
 from calibre.utils.date import is_date_undefined
+from calibre.ebooks.chardet import strip_encoding_declarations
 
 JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]'
 
@@ -180,10 +181,14 @@ def render_jacket(mi, output_profile,
             except:
                 pass
 
+        args['_genre_label'] = args.get('_genre_label', '')
+        args['_genre'] = args.get('_genre', '')
+
         generated_html = P('jacket/template.xhtml',
                 data=True).decode('utf-8').format(**args)
 
         # Post-process the generated html to strip out empty header items
+
         soup = BeautifulSoup(generated_html)
         if not series:
             series_tag = soup.find(attrs={'class':'cbj_series'})
@@ -206,7 +211,8 @@ def render_jacket(mi, output_profile,
             if hr_tag is not None:
                 hr_tag.extract()
 
-        return soup.renderContents(None)
+        return strip_encoding_declarations(
+                soup.renderContents('utf-8').decode('utf-8'))
 
     from calibre.ebooks.oeb.base import RECOVER_PARSER
 

From 133338565f0bc61d264007d9e9e2859e914701ab Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 19 Nov 2011 14:53:37 +0530
Subject: [PATCH 04/11] ...

---
 src/calibre/ebooks/oeb/transforms/jacket.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
index ede34ef17c..429e10acf4 100644
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -181,6 +181,7 @@ def render_jacket(mi, output_profile,
             except:
                 pass
 
+        # Used in the comment describing use of custom columns in templates
         args['_genre_label'] = args.get('_genre_label', '')
         args['_genre'] = args.get('_genre', '')
 

From abb41dcbb4269adb9481ed9f4a0b965c7e47e936 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 19 Nov 2011 15:16:37 +0530
Subject: [PATCH 05/11] Irex driver: Put books into the top level directory
 instead of into /ebooks or /Books. Fixes #883616 (IREX DR Driver root
 directory)

---
 src/calibre/devices/irexdr/driver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/devices/irexdr/driver.py b/src/calibre/devices/irexdr/driver.py
index 32e98f9353..bdc77b3193 100644
--- a/src/calibre/devices/irexdr/driver.py
+++ b/src/calibre/devices/irexdr/driver.py
@@ -33,7 +33,7 @@ class IREXDR1000(USBMS):
 
     MAIN_MEMORY_VOLUME_LABEL  = 'IRex Digital Reader 1000 Main Memory'
 
-    EBOOK_DIR_MAIN = 'ebooks'
+    EBOOK_DIR_MAIN = ''
     DELETE_EXTS = ['.mbp']
     SUPPORTS_SUB_DIRS = True
 
@@ -44,7 +44,7 @@ class IREXDR800(IREXDR1000):
     WINDOWS_MAIN_MEM = 'DR800'
     FORMATS     = ['epub', 'pdb', 'html', 'pdf', 'txt']
 
-    EBOOK_DIR_MAIN = 'Books'
+    EBOOK_DIR_MAIN = ''
     DELETE_EXTS = []
     SUPPORTS_SUB_DIRS = True
 

From b9765b8f529de6be40dc09d3b2e8a8c38f58ab8e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 19 Nov 2011 15:54:28 +0530
Subject: [PATCH 06/11] ...

---
 src/calibre/ebooks/oeb/transforms/jacket.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
index 429e10acf4..79524c19eb 100644
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -176,14 +176,14 @@ def render_jacket(mi, output_profile,
             try:
                 display_name, val = mi.format_field_extended(key)[:2]
                 key = key.replace('#', '_')
-                args[key] = val
-                args[key+'_label'] = display_name
+                args[key] = escape(val)
+                args[key+'_label'] = escape(display_name)
             except:
                 pass
 
         # Used in the comment describing use of custom columns in templates
-        args['_genre_label'] = args.get('_genre_label', '')
-        args['_genre'] = args.get('_genre', '')
+        args['_genre_label'] = args.get('_genre_label', '{_genre_label}')
+        args['_genre'] = args.get('_genre', '{_genre}')
 
         generated_html = P('jacket/template.xhtml',
                 data=True).decode('utf-8').format(**args)

From 5d6706b5fe8eee56e5a7a97adee9fd112e8c378f Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 19 Nov 2011 09:52:21 -0500
Subject: [PATCH 07/11] Fix Cyrillic encoding issues, patch from Roman.

---
 src/calibre/ebooks/metadata/book/base.py      |  3 +-
 src/calibre/ebooks/metadata/sources/ozon.py   | 45 ++++++++++++-------
 .../gui2/store/stores/ozon_ru_plugin.py       |  5 ++-
 src/calibre/translations/ru.po                | 10 ++---
 4 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py
index 53d336a23d..286bcee9d0 100644
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@@ -710,7 +710,8 @@ class Metadata(object):
             fmt('Title sort', self.title_sort)
         if self.authors:
             fmt('Author(s)',  authors_to_string(self.authors) + \
-               ((' [' + self.author_sort + ']') if self.author_sort else ''))
+               ((' [' + self.author_sort + ']') 
+                if self.author_sort and self.author_sort != _('Unknown') else ''))
         if self.publisher:
             fmt('Publisher', self.publisher)
         if getattr(self, 'book_producer', False):
diff --git a/src/calibre/ebooks/metadata/sources/ozon.py b/src/calibre/ebooks/metadata/sources/ozon.py
index fa9951c40c..ecec13662f 100644
--- a/src/calibre/ebooks/metadata/sources/ozon.py
+++ b/src/calibre/ebooks/metadata/sources/ozon.py
@@ -11,7 +11,7 @@ import datetime
 from urllib import quote_plus
 from Queue import Queue, Empty
 from lxml import etree, html
-from calibre import as_unicode
+from calibre import prints, as_unicode
 
 from calibre.ebooks.chardet import xml_to_unicode
 
@@ -54,7 +54,8 @@ class Ozon(Source):
     def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
         # div_book -> search only books, ebooks and audio books
         search_url = self.ozon_url + '/webservice/webservice.asmx/SearchWebService?searchContext=div_book&searchText='
-
+        
+        # for ozon.ru search we have to format ISBN with '-'
         isbn = _format_isbn(log, identifiers.get('isbn', None))
         # TODO: format isbn!
         qItems = set([isbn, title])
@@ -64,7 +65,7 @@ class Ozon(Source):
         qItems.discard('')
         qItems = map(_quoteString, qItems)
 
-        q = ' '.join(qItems).strip()
+        q = u' '.join(qItems).strip()
         log.info(u'search string: ' + q)
 
         if isinstance(q, unicode):
@@ -78,13 +79,13 @@ class Ozon(Source):
         return search_url
     # }}}
 
-    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
-            identifiers={}, timeout=30):
+    def identify(self, log, result_queue, abort, title=None, authors=None, 
+            identifiers={}, timeout=30): # {{{
         if not self.is_configured():
             return
         query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
         if not query:
-            err = 'Insufficient metadata to construct query'
+            err = u'Insufficient metadata to construct query'
             log.error(err)
             return err
 
@@ -109,7 +110,7 @@ class Ozon(Source):
     # }}}
 
     def get_metadata(self, log, entries, title, authors, identifiers): # {{{
-        # some book titles have extra charactes like this
+        # some book titles have extra characters like this
         # TODO: make a twick
         reRemoveFromTitle = None 
         #reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
@@ -160,7 +161,7 @@ class Ozon(Source):
             mi.source_relevance = i
             if ensure_metadata_match(mi):
                 metadata.append(mi)
-                # log.debug(u'added metadata %s %s. '%(mi.title, mi.authors))
+                #log.debug(u'added metadata %s %s.'%(mi.title,  mi.authors))
             else:
                 log.debug(u'skipped metadata %s %s. (does not match the query)'%(mi.title, mi.authors))
         return metadata
@@ -285,12 +286,12 @@ class Ozon(Source):
         url = self.get_book_url(metadata.get_identifiers())[2]
 
         raw = self.browser.open_novisit(url, timeout=timeout).read()
-        doc = html.fromstring(raw)
+        doc = html.fromstring(xml_to_unicode(raw, verbose=True)[0])
 
         xpt_prod_det_at = u'string(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "%s")]/a[1]/@title)'
         xpt_prod_det_tx = u'substring-after(//div[contains(@class, "product-detail")]//text()[contains(., "%s")], ":")'
 
-        # series
+        # series Серия/Серии
         xpt = xpt_prod_det_at % u'Сери'
         # % u'Серия:'
         series = doc.xpath(xpt)
@@ -300,7 +301,7 @@ class Ozon(Source):
         xpt = u'normalize-space(substring-after(//meta[@name="description"]/@content, "ISBN"))'
         isbn_str = doc.xpath(xpt)
         if isbn_str:
-            all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if check_isbn(isbn)]
+            all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if _verifyISBNIntegrity(log, isbn)]
             if all_isbns:
                 metadata.all_isbns = all_isbns
                 metadata.isbn = all_isbns[0]
@@ -333,10 +334,10 @@ class Ozon(Source):
         xpt = u'//table[@id="detail_description"]//tr/td'
         comment_elem = doc.xpath(xpt)
         if comment_elem:
-            comments = unicode(etree.tostring(comment_elem[0]))
+            comments = unicode(etree.tostring(comment_elem[0], encoding=unicode))
             if comments:
                 # cleanup root tag, TODO: remove tags like object/embeded
-                comments = re.sub(r'\A.*?<td.*?>|</td>.*\Z', u'', comments.strip(), re.MULTILINE).strip()
+                comments = re.sub(ur'\A.*?<td.*?>|</td>.*\Z', u'', comments.strip(), re.MULTILINE).strip()
                 if comments and (not metadata.comments or len(comments) > len(metadata.comments)):
                     metadata.comments = comments
                 else:
@@ -345,8 +346,16 @@ class Ozon(Source):
             log.debug('No book description found in HTML')
     # }}}
 
-def _quoteString(str): # {{{
-    return '"' + str + '"' if str and str.find(' ') != -1 else str
+def _quoteString(strToQuote): # {{{
+    return '"' + strToQuote + '"' if strToQuote and strToQuote.find(' ') != -1 else strToQuote
+# }}}
+
+def _verifyISBNIntegrity(log, isbn): # {{{
+    # Online ISBN-Check http://www.isbn-check.de/
+    res = check_isbn(isbn)
+    if not res:
+        log.error(u'ISBN integrity check failed for "%s"'%isbn)
+    return res is not None
 # }}}
 
 # TODO: make customizable
@@ -438,7 +447,7 @@ def _normalizeAuthorNameWithInitials(name): # {{{
     return res
 # }}}
 
-def toPubdate(log, yearAsString):
+def toPubdate(log, yearAsString): # {{{
     res = None
     if yearAsString:
         try:
@@ -448,7 +457,11 @@ def toPubdate(log, yearAsString):
         except:
             log.error('cannot parse to date %s'%yearAsString)
     return res
+# }}}
 
+def _listToUnicodePrintStr(lst): # {{{
+    return u'[' + u', '.join(unicode(x) for x in lst) + u']'
+# }}}
 
 if __name__ == '__main__': # tests {{{
     # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/ozon.py
diff --git a/src/calibre/gui2/store/stores/ozon_ru_plugin.py b/src/calibre/gui2/store/stores/ozon_ru_plugin.py
index 3934ebbbb3..5d977700c8 100644
--- a/src/calibre/gui2/store/stores/ozon_ru_plugin.py
+++ b/src/calibre/gui2/store/stores/ozon_ru_plugin.py
@@ -77,7 +77,8 @@ class OzonRUStore(BasicStoreConfig, StorePlugin):
 
         result = False
         with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
+            raw = xml_to_unicode(f.read(), verbose=True)[0]
+            doc = html.fromstring(raw)
             
             # example where we are going to find formats
             # <div class="l">
@@ -88,7 +89,7 @@ class OzonRUStore(BasicStoreConfig, StorePlugin):
             # <div class="l">
             #     <p>.epub, .fb2.zip, .pdf</p>
             # </div>
-            xpt = u'normalize-space(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "Доступ")]/ancestor-or-self::div[1]/following-sibling::div[1]/*[1])'
+            xpt = u'normalize-space(//div[contains(@id, "saleBlock")]//*[contains(normalize-space(text()), "Доступ")]/ancestor-or-self::div[1]/following-sibling::div[1]/*[1])'
             formats = doc.xpath(xpt)
             if formats:
                 result = True
diff --git a/src/calibre/translations/ru.po b/src/calibre/translations/ru.po
index c515e6213e..89f44b0b6f 100644
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
@@ -12539,7 +12539,7 @@ msgstr "За&грузить метаданные"
 
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:226
 msgid "Configure download metadata"
-msgstr ""
+msgstr "Настроить загрузку метаданных"
 
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:230
 msgid "Change how calibre downloads metadata"
@@ -12595,7 +12595,7 @@ msgstr "&Пользовательские метаданные"
 
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:788
 msgid "&Comments"
-msgstr "Комментарии"
+msgstr "&Комментарии"
 
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:854
 msgid "Basic metadata"
@@ -12603,11 +12603,11 @@ msgstr "Основные метаданные"
 
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:133
 msgid "Has cover"
-msgstr "Есть обложка"
+msgstr "Обложка"
 
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:133
 msgid "Has summary"
-msgstr ""
+msgstr "Аннотация"
 
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:190
 msgid ""
@@ -12619,7 +12619,7 @@ msgstr ""
 
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:268
 msgid "See at"
-msgstr ""
+msgstr "Посмотреть на"
 
 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:403
 msgid "calibre is downloading metadata from: "

From 6ddaa374ce7e2a73037a223ce3aec5d07411a8a6 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Sat, 19 Nov 2011 08:59:58 -0700
Subject: [PATCH 08/11] Rewrite metadata header, removing dc:subject tags,
 added more error handling for Windows/iTunes artwork error, added iPhone 4S
 device ID (not enabled)

---
 src/calibre/devices/apple/driver.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py
index 9a242b245d..2f7c1a9d20 100644
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@@ -221,7 +221,7 @@ class ITUNES(DriverBase):
     #  0x12a2   iPad2 (GSM)
     #  0x12a3   iPad2 (CDMA)
     VENDOR_ID = [0x05ac]
-    PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x129f,0x12a0,0x12a2,0x12a3]
+    PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x129f,0x12a2,0x12a3]
     BCD = [0x01]
 
     # Plugboard ID
@@ -1412,10 +1412,16 @@ class ITUNES(DriverBase):
                         tmp_cover.write(cover_data)
 
                     if lb_added:
-                        if lb_added.Artwork.Count:
-                            lb_added.Artwork.Item(1).SetArtworkFromFile(tc)
-                        else:
-                            lb_added.AddArtworkFromFile(tc)
+                        try:
+                            if lb_added.Artwork.Count:
+                                lb_added.Artwork.Item(1).SetArtworkFromFile(tc)
+                            else:
+                                lb_added.AddArtworkFromFile(tc)
+                        except:
+                            if DEBUG:
+                                self.log.warning("  iTunes automation interface reported an error"
+                                                 " when adding artwork to '%s' in the iTunes Library" % metadata.title)
+                            pass
 
                     if db_added:
                         if db_added.Artwork.Count:
@@ -2775,6 +2781,8 @@ class ITUNES(DriverBase):
                 lb_added.sort_name.set(metadata_x.title_sort)
 
             if db_added:
+                self.log.warning("  waiting for db_added to become writeable ")
+                time.sleep(1.0)
                 db_added.name.set(metadata_x.title)
                 db_added.album.set(metadata_x.title)
                 db_added.artist.set(authors_to_string(metadata_x.authors))

From 8f3fff04e31b3046dde8a30ec0fa3c9f6b1e49c3 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 20 Nov 2011 08:13:11 +0530
Subject: [PATCH 09/11] Have downloaded periodicals recognized when transferred
 via USB to the Kindle Fire

---
 src/calibre/ebooks/mobi/writer2/main.py | 46 +++++++++++++------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py
index 655aa12c9e..760b444cd3 100644
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@@ -302,7 +302,19 @@ class MobiWriter(object):
 
     def generate_record0(self): #  MOBI header {{{
         metadata = self.oeb.metadata
-        exth = self.build_exth()
+        bt = 0x002
+        if self.primary_index_record_idx is not None:
+            if False and self.indexer.is_flat_periodical:
+                # Disabled as setting this to 0x102 causes the Kindle to not
+                # auto archive the issues
+                bt = 0x102
+            elif self.indexer.is_periodical:
+                # If you change this, remember to change the cdetype in the EXTH
+                # header as well
+                bt = {'newspaper':0x101}.get(self.publication_type, 0x103)
+
+
+        exth = self.build_exth(bt)
         first_image_record = None
         if self.image_records:
             first_image_record  = len(self.records)
@@ -351,17 +363,6 @@ class MobiWriter(object):
         # 0x10 - 0x13 : UID
         # 0x14 - 0x17 : Generator version
 
-        bt = 0x002
-        if self.primary_index_record_idx is not None:
-            if False and self.indexer.is_flat_periodical:
-                # Disabled as setting this to 0x102 causes the Kindle to not
-                # auto archive the issues
-                bt = 0x102
-            elif self.indexer.is_periodical:
-                # If you change this, remember to change the cdetype in the EXTH
-                # header as well
-                bt = {'newspaper':0x101}.get(self.publication_type, 0x103)
-
         record0.write(pack(b'>IIIII',
             0xe8, bt, 65001, uid, 6))
 
@@ -479,7 +480,7 @@ class MobiWriter(object):
         self.records[0] = align_block(record0)
     # }}}
 
-    def build_exth(self): # EXTH Header {{{
+    def build_exth(self, mobi_doctype): # EXTH Header {{{
         oeb = self.oeb
         exth = StringIO()
         nrecs = 0
@@ -535,16 +536,17 @@ class MobiWriter(object):
             nrecs += 1
 
         # Write cdetype
-        if not self.is_periodical and not self.opts.share_not_sync:
-            exth.write(pack(b'>II', 501, 12))
-            exth.write(b'EBOK')
-            nrecs += 1
+        if not self.is_periodical:
+            if not self.opts.share_not_sync:
+                exth.write(pack(b'>II', 501, 12))
+                exth.write(b'EBOK')
+                nrecs += 1
         else:
-            # Should be b'NWPR' for doc type of 0x101 and b'MAGZ' for doctype
-            # of 0x103 but the old writer didn't write them, and I dont know
-            # what it should be for type 0x102 (b'BLOG'?) so write nothing
-            # instead
-            pass
+            ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
+            if ids:
+                exth.write(pack(b'>II', 501, 12))
+                exth.write(ids)
+                nrecs += 1
 
         # Add a publication date entry
         if oeb.metadata['date']:

From c73b2569a152cffb2410ea40d5ce2db9fd341a3b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 20 Nov 2011 08:30:29 +0530
Subject: [PATCH 10/11] Updated Expansion (spanish)

---
 recipes/expansion_spanish.recipe            | 160 +++++++++++++++-----
 src/calibre/ebooks/metadata/sources/ozon.py |  22 +--
 2 files changed, 130 insertions(+), 52 deletions(-)

diff --git a/recipes/expansion_spanish.recipe b/recipes/expansion_spanish.recipe
index f2229e90e6..07a0c99761 100644
--- a/recipes/expansion_spanish.recipe
+++ b/recipes/expansion_spanish.recipe
@@ -1,35 +1,43 @@
 #!/usr/bin/env  python
-__license__   = 'GPL v3'
-__author__    = 'Gerardo Diez'
-__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
-description   = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
-__docformat__ = 'restructuredtext en'
+__license__     = 'GPL v3'
+__copyright__   = '5, January 2011 Gerardo Diez<gerardo.diez.garcia@gmail.com> & desUBIKado'
+__author__      = 'desUBIKado, based on an earlier version by Gerardo Diez'
+__version__     = 'v1.01'
+__date__        = '13, November 2011'
 
 '''
-expansion.es
+[url]http://www.expansion.com/[/url]
 '''
+
+import time
+import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
-class Publico(BasicNewsRecipe):
-    title               =u'Expansion.com'
-    __author__      ='Gerardo Diez'
-    publisher       =u'Unidad Editorial Información Económica, S.L.'
-    category                ='finances, catalunya'
-    oldest_article      =1
+
+class expansion_spanish(BasicNewsRecipe):
+    __author__      ='Gerardo Diez & desUBIKado'
+    description     ='Financial news from Spain'
+    title           =u'Expansion'
+    publisher       =u'Unidad Editorial Internet, S.L.'
+    category        ='news, finances, Spain'
+    oldest_article  = 2
+    simultaneous_downloads = 10
     max_articles_per_feed   =100
-    simultaneous_downloads  =10
-    cover_url       =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
-    timefmt         ='[%A, %d %B, %Y]'
-    encoding        ='latin'
+    timefmt         = '[%a, %d %b, %Y]'
+    encoding        ='iso-8859-15'
     language        ='es'
-    remove_javascript   =True
-    no_stylesheets      =True
+    use_embedded_content  = False
+    remove_javascript     = True
+    no_stylesheets        = True
+    remove_empty_feeds    = True
+
     keep_only_tags      =dict(name='div', attrs={'class':['noticia primer_elemento']})
+
     remove_tags         =[
-                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
-                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
+                dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto','tit_relacionadas','interact','paginacion estirar','sumario derecha']}),
+                dict(name='ul', attrs={'class':['bolos_desarrollo_noticia','not_logged']}),
                 dict(name='span', attrs={'class':['comentarios']}),
                 dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
-                dict(name='div', attrs={'id':['comentarios_lectores_listado']})
+                dict(name='div', attrs={'id':['comentarios_lectores_listado','comentar']})
                             ]
     feeds               =[
                 (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
@@ -38,42 +46,112 @@ class Publico(BasicNewsRecipe):
                 (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
                 (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
                 (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
-
                 (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
                 (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
-                (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
+                (u'Cr\xe9ditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
                 (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
-                (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
+                (u'Fondos de Inversi\xf3n', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
                 (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
-
                 (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
                 (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
                 (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
-                (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
-                (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
+                (u'Energ\xeda', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
+                (u'Inmobiliario y Construcci\xf3n', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
                 (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
-                (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
-                (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
-                (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
+                (u'Automoci\xf3n e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
+                (u'Distribuci\xf3n', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
+                (u'Deporte y Negocio', u' [url]http://estaticos.expansion.com/rss/empresasdeporte.xml[/url]'),
                 (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
                 (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
                 (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
-
-                (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
-                (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
+                (u'Portada: Econom\xeda y Pol\xedtica', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
+                (u'Pol\xedtica', u'http://estaticos.expansion.com/rss/economia.xml'),
                 (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
-
-                (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
+                (u'Portada: Opini\xf3n', u'http://estaticos.expansion.com/rss/opinion.xml'),
                 (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
                 (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
-
-                (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
+                (u'Portada: Jur\xeddico', u'http://estaticos.expansion.com/rss/juridico.xml'),
                 (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
-                (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
+                (u'Opini\xf3n', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
                 (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
-
                 (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
-                (u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
-                (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
+                (u'Catalu\xf1a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
+                (u'Funci\xf3n p\xfablica', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
                 ]
 
+    # Obtener la imagen de portada
+
+    def get_cover_url(self):
+       cover = None
+       st = time.localtime()
+       year = str(st.tm_year)
+       month = "%.2d" % st.tm_mon
+       day = "%.2d" % st.tm_mday
+		#[url]http://img5.kiosko.net/2011/11/14/es/expansion.750.jpg[/url]
+       cover='http://img5.kiosko.net/'+ year + '/' +  month + '/' + day +'/es/expansion.750.jpg'
+       br = BasicNewsRecipe.get_browser()
+       try:
+           br.open(cover)
+       except:
+           self.log("\nPortada no disponible")
+           cover ='http://www.aproahp.org/enlaces/images/diario_expansion.gif'
+       return cover
+
+
+
+    # Para que no salte la publicidad al recuperar la noticia, y que siempre se recupere
+    # la página web, mando la variable "t" con la hora "linux" o "epoch" actual
+    # haciendole creer al sitio web que justo se acaba de ver la publicidad
+
+    def print_version(self, url):
+           st = time.time()
+           segundos = str(int(st))
+           parametros = '.html?t=' + segundos
+           return url.replace('.html', parametros)
+
+
+
+    _processed_links = []
+
+    def get_article_url(self, article):
+
+       # Para obtener la url original del artículo a partir de la de "feedsportal"
+
+       link = article.get('link', None)
+       if link is None:
+           return article
+       if link.split('/')[-1]=="story01.htm":
+           link=link.split('/')[-2]
+           a=['0B','0C','0D','0E','0F','0G','0N'  ,'0L0S','0A']
+           b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'.com','www.','0']
+           for i in range(0,len(a)):
+              link=link.replace(a[i],b[i])
+           link="http://"+link
+
+       # Eliminar artículos duplicados en otros feeds
+
+       if not (link in self._processed_links):
+            self._processed_links.append(link)
+       else:
+            link = None
+
+       return link
+
+
+
+    # Un poco de css para mejorar la presentación de las noticias
+
+    extra_css = '''
+                    .entradilla {font-family:Arial,Helvetica,sans-serif; font-weight:bold; font-style:italic; font-size:16px;}
+                    .fecha_publicacion,.autor {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
+                '''
+
+
+
+    # Para presentar la imagen de los videos incrustados
+
+    preprocess_regexps = [
+                           (re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
+                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
+                           (re.compile(r'var id_reproductor', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
+                         ]
diff --git a/src/calibre/ebooks/metadata/sources/ozon.py b/src/calibre/ebooks/metadata/sources/ozon.py
index ecec13662f..de45e0b8db 100644
--- a/src/calibre/ebooks/metadata/sources/ozon.py
+++ b/src/calibre/ebooks/metadata/sources/ozon.py
@@ -11,7 +11,7 @@ import datetime
 from urllib import quote_plus
 from Queue import Queue, Empty
 from lxml import etree, html
-from calibre import prints, as_unicode
+from calibre import as_unicode
 
 from calibre.ebooks.chardet import xml_to_unicode
 
@@ -54,7 +54,7 @@ class Ozon(Source):
     def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
         # div_book -> search only books, ebooks and audio books
         search_url = self.ozon_url + '/webservice/webservice.asmx/SearchWebService?searchContext=div_book&searchText='
-        
+
         # for ozon.ru search we have to format ISBN with '-'
         isbn = _format_isbn(log, identifiers.get('isbn', None))
         # TODO: format isbn!
@@ -79,7 +79,7 @@ class Ozon(Source):
         return search_url
     # }}}
 
-    def identify(self, log, result_queue, abort, title=None, authors=None, 
+    def identify(self, log, result_queue, abort, title=None, authors=None,
             identifiers={}, timeout=30): # {{{
         if not self.is_configured():
             return
@@ -112,13 +112,13 @@ class Ozon(Source):
     def get_metadata(self, log, entries, title, authors, identifiers): # {{{
         # some book titles have extra characters like this
         # TODO: make a twick
-        reRemoveFromTitle = None 
+        reRemoveFromTitle = None
         #reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
-        
+
         title = unicode(title).upper() if title else ''
         if reRemoveFromTitle:
-            title = reRemoveFromTitle.sub('', title) 
-        authors = map(_normalizeAuthorNameWithInitials, 
+            title = reRemoveFromTitle.sub('', title)
+        authors = map(_normalizeAuthorNameWithInitials,
                       map(unicode.upper, map(unicode, authors))) if authors else None
         ozon_id = identifiers.get('ozon', None)
 
@@ -320,7 +320,7 @@ class Ozon(Source):
                 displ_lang = lng_splt[0].strip()
         metadata.language = _translageLanguageToCode(displ_lang)
         #log.debug(u'language: %s'%displ_lang)
-        
+
         # can be set before from xml search responce
         if not metadata.pubdate:
             xpt = u'normalize-space(//div[@class="product-misc"]//text()[contains(., "г.")])'
@@ -434,13 +434,13 @@ def _translageLanguageToCode(displayLang): # {{{
 # [В.П. Колесников | Колесников В.П.]-> В. П. BКолесников
 def _normalizeAuthorNameWithInitials(name): # {{{
     res = name
-    if name: 
-        re1 = u'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$' 
+    if name:
+        re1 = u'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
         re2 = u'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
         matcher = re.match(re1, unicode(name), re.UNICODE)
         if not matcher:
             matcher = re.match(re2, unicode(name), re.UNICODE)
-            
+
         if matcher:
             d = matcher.groupdict()
             res = ' '.join(x for x in (d['fname'], d['mname'], d['lname']) if x)

From 43f484f3e76b5f3b885b91e16364000be6bc761f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 20 Nov 2011 08:45:44 +0530
Subject: [PATCH 11/11] Fix Salon.com

---
 recipes/salon.recipe | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/recipes/salon.recipe b/recipes/salon.recipe
index c421ab094d..5659e2f5c4 100644
--- a/recipes/salon.recipe
+++ b/recipes/salon.recipe
@@ -11,17 +11,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
 
 class Salon_com(BasicNewsRecipe):
     title = 'Salon.com'
-    __author__ = 'cix3'
+    __author__ = 'Kovid Goyal'
     description = 'Salon.com - Breaking news, opinion, politics, entertainment, sports and culture.'
     timefmt = ' [%b %d, %Y]'
     language = 'en'
 
     oldest_article = 7
     max_articles_per_feed = 100
-
-    remove_tags = [dict(name='div', attrs={'class':['ad_content', 'clearfix']}), dict(name='hr'), dict(name='img')]
-
-    remove_tags_before = dict(name='h2')
+    auto_cleanup = True
+    auto_cleanup_keep = '//div[@class="art"]'
+    remove_empty_feeds = True
 
     feeds = [
         ('News & Politics', 'http://feeds.salon.com/salon/news'),
@@ -40,5 +39,5 @@ class Salon_com(BasicNewsRecipe):
             ]
 
     def print_version(self, url):
-        return url.replace('/index.html', '/print.html')
+        return url + '/print/'