From 50f642ec07c12338e12e98b58b66cd7672583953 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jul 2011 21:15:12 -0600
Subject: [PATCH 1/6] Los Andes by Darko Miletic. Fixes #813278 (New recipe for
 Argentinian newspaper Los Andes)

---
 recipes/icons/losandes.png | Bin 0 -> 285 bytes
 recipes/losandes.recipe    |  78 +++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 recipes/icons/losandes.png
 create mode 100644 recipes/losandes.recipe

diff --git a/recipes/icons/losandes.png b/recipes/icons/losandes.png
new file mode 100644
index 0000000000000000000000000000000000000000..635217e7279702682fa4ae5de202e79b02f20267
GIT binary patch
literal 285
zcmV+&0pk9NP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM0002wNkl<Zc${PW
z|NlP&DS&ZsYG_z@3BqyE6bB+?X`qyX2zP>=GF*fShy}P<8#i2j_~wg&oFKC54QC$%
z#S537{qXe{iUtFDAs{I)!Xv=VhOA-Z*~bnVVjsT$2I@r7fE55u-Ej8F{8URId*P9L
zgf#$71^VZ~o39EYTnBEvdhq%S0Syy(-2i&`!1b378lYfUaQGf}4M3MD7Oe-G_u<>G
z{uq;nAie%*6R@l9LKJ|4)v#vkg-0I*I9Uzk1ur~&FTl+T*7<vWnq`2oEHGq&VFeTc
j2EhI**Fl;zNYVfR;B$BdtHVM800000NkvXXu0mjf_F8gF

literal 0
HcmV?d00001

diff --git a/recipes/losandes.recipe b/recipes/losandes.recipe
new file mode 100644
index 0000000000..cd095f1b4a
--- /dev/null
+++ b/recipes/losandes.recipe
@@ -0,0 +1,78 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.losandes.com.ar
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LosAndes(BasicNewsRecipe):
+    title                 = 'Los Andes'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias de Mendoza, Argentina y el resto del mundo'
+    publisher             = 'Los Andes'
+    category              = 'news, politics, Argentina'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'es_AR'
+    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://www.losandes.com.ar/graficos/losandes.png'
+    extra_css             = """
+                               body{font-family: Arial,Helvetica,sans-serif }
+                               h1,h2{font-family: "Times New Roman",Times,serif}
+                               .fechaNota{font-weight: bold; color: gray}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [
+                     dict(name=['meta','link'])
+                    ,dict(attrs={'class':['cabecera', 'url']})
+                  ]
+    remove_tags_before=dict(attrs={'class':'cabecera'})
+    remove_tags_after=dict(attrs={'class':'url'})
+
+
+    feeds = [
+              (u'Ultimas Noticias'       , u'http://www.losandes.com.ar/servicios/rss.asp?r=78' )
+             ,(u'Politica'               , u'http://www.losandes.com.ar/servicios/rss.asp?r=68' )
+             ,(u'Economia nacional'      , u'http://www.losandes.com.ar/servicios/rss.asp?r=65' )
+             ,(u'Economia internacional' , u'http://www.losandes.com.ar/servicios/rss.asp?r=505')
+             ,(u'Internacionales'        , u'http://www.losandes.com.ar/servicios/rss.asp?r=66' )
+             ,(u'Turismo'                , u'http://www.losandes.com.ar/servicios/rss.asp?r=502')
+             ,(u'Fincas'                 , u'http://www.losandes.com.ar/servicios/rss.asp?r=504')
+             ,(u'Isha nos habla'         , u'http://www.losandes.com.ar/servicios/rss.asp?r=562')
+             ,(u'Estilo'                 , u'http://www.losandes.com.ar/servicios/rss.asp?r=81' )
+             ,(u'Cultura'                , u'http://www.losandes.com.ar/servicios/rss.asp?r=503')
+             ,(u'Policiales'             , u'http://www.losandes.com.ar/servicios/rss.asp?r=70' )
+             ,(u'Deportes'               , u'http://www.losandes.com.ar/servicios/rss.asp?r=69' )
+             ,(u'Sociedad'               , u'http://www.losandes.com.ar/servicios/rss.asp?r=67' )
+             ,(u'Opinion'                , u'http://www.losandes.com.ar/servicios/rss.asp?r=80' )
+             ,(u'Editorial'              , u'http://www.losandes.com.ar/servicios/rss.asp?r=76' )
+             ,(u'Mirador'                , u'http://www.losandes.com.ar/servicios/rss.asp?r=79' )
+            ]
+
+    def print_version(self, url):
+        artid = url.rpartition('.')[0].rpartition('-')[2]
+        return "http://www.losandes.com.ar/includes/modulos/imprimir.asp?tipo=noticia&id=" + artid
+
+    def get_cover_url(self):
+        month = strftime("%m").lstrip('0')
+        day   = strftime("%d").lstrip('0')
+        year  = strftime("%Y")
+        return "http://www.losandes.com.ar/fotografias/fotosnoticias/" + year + "/" + month + "/" + day + "/th_tapa.jpg"
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup

From 337ba181565bb18081bfa0710ff4455afb5b3df2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jul 2011 23:00:13 -0600
Subject: [PATCH 2/6] Mobi debug: Interpret the TBS index entries for book type
 documents

---
 src/calibre/ebooks/mobi/debug.py         | 69 ++++++++++++++++++++++--
 src/calibre/ebooks/mobi/writer2/utils.py |  5 +-
 2 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py
index 2dbe363e7c..9bc587c527 100644
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@@ -618,6 +618,13 @@ class IndexEntry(object): # {{{
                 vals.append(val)
             self.tags.append(Tag(tag, vals, self.entry_type, cncx))
 
+    @property
+    def label(self):
+        for tag in self.tags:
+            if tag.attr == 'label_offset':
+                return tag.cncx_value
+        return ''
+
     def __str__(self):
         ans = ['Index Entry(index=%s, entry_type=%s, length=%d)'%(
             self.index, self.entry_type, len(self.tags))]
@@ -731,7 +738,8 @@ class CNCX(object) : # {{{
 
 class TextRecord(object): # {{{
 
-    def __init__(self, idx, record, extra_data_flags, decompress):
+    def __init__(self, idx, record, extra_data_flags, decompress, index_record,
+            doc_type):
         self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
         self.raw = decompress(self.raw)
         if 0 in self.trailing_data:
@@ -743,6 +751,60 @@ class TextRecord(object): # {{{
 
         self.idx = idx
 
+        if 'indexing' in self.trailing_data and index_record is not None:
+            self.interpret_indexing(doc_type, index_record.indices)
+
+    def interpret_indexing(self, doc_type, indices):
+        raw = self.trailing_data['indexing']
+        ident, consumed = decint(raw)
+        raw = raw[consumed:]
+        entry_type = ident & 0b111
+        index_entry_idx = ident >> 3
+        index_entry = None
+        for i in indices:
+            if i.index == index_entry_idx:
+                index_entry = i.label
+                break
+        self.trailing_data['interpreted_indexing'] = (
+                'Type: %s, Index Entry: %s'%(entry_type, index_entry))
+        if doc_type == 2: # Book
+            self.interpret_book_indexing(raw, entry_type)
+
+    def interpret_book_indexing(self, raw, entry_type):
+        arg1, consumed = decint(raw)
+        raw = raw[consumed:]
+        if arg1 != 0:
+            raise ValueError('TBS index entry has unknown arg1: %d'%
+                    arg1)
+        if entry_type == 2:
+            desc = ('This record has only a single starting or a single'
+                    ' ending point')
+            if raw:
+                raise ValueError('TBS index entry has unknown extra bytes:'
+                        ' %r'%raw)
+        elif entry_type == 3:
+            desc = ('This record is spanned by a single node (i.e. it'
+                    ' has no start or end points)')
+            arg2, consumed = decint(raw)
+            if arg2 != 0:
+                raise ValueError('TBS index entry has unknown arg2: %d'%
+                        arg2)
+        elif entry_type == 6:
+            if len(raw) != 1:
+                raise ValueError('TBS index entry has unknown extra bytes:'
+                        ' %r'%raw)
+            num = ord(raw[0])
+            # An unmatched starting or ending point each contributes 1 to
+            # this count. A matched pair of starting and ending points
+            # together contribute 1 to this count. Note that you can only
+            # ever have either 1 unmatched start point or 1 unmatched end
+            # point, never both (logically impossible).
+            desc = ('This record has %d starting/ending points and/or complete'
+                    ' nodes.')%num
+        else:
+            raise ValueError('Unknown TBS index entry type: %d for book'%entry_type)
+        self.trailing_data['interpreted_indexing'] += ' :: ' + desc
+
     def dump(self, folder):
         name = '%06d'%self.idx
         with open(os.path.join(folder, name+'.txt'), 'wb') as f:
@@ -828,7 +890,7 @@ class MOBIFile(object): # {{{
         else:
             decompress = lambda x: x
 
-        self.index_header = None
+        self.index_header = self.index_record = None
         self.indexing_record_nums = set()
         pir = self.mobi_header.primary_index_record
         if pir != 0xffffffff:
@@ -848,7 +910,8 @@ class MOBIFile(object): # {{{
         if fntbr == 0xffffffff:
             fntbr = len(self.records)
         self.text_records = [TextRecord(r, self.records[r],
-            self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
+            self.mobi_header.extra_data_flags, decompress, self.index_record,
+            self.mobi_header.type_raw) for r in xrange(1,
             min(len(self.records), ntr+1))]
         self.image_records, self.binary_records = [], []
         for i in xrange(fntbr, len(self.records)):
diff --git a/src/calibre/ebooks/mobi/writer2/utils.py b/src/calibre/ebooks/mobi/writer2/utils.py
index 708b9152d4..1c2d3a110d 100644
--- a/src/calibre/ebooks/mobi/writer2/utils.py
+++ b/src/calibre/ebooks/mobi/writer2/utils.py
@@ -170,7 +170,8 @@ def get_trailing_data(record, extra_data_flags):
                 consumed = 1
             else:
                 sz, consumed = decint(record, forward=False)
-            data[i] = record[-(sz+consumed):-consumed]
-            record = record[:-(sz+consumed)]
+            if sz > consumed:
+                data[i] = record[-sz:-consumed]
+            record = record[:-sz]
     return data, record
 

From 7f5651e0bd61b9fbf4efecb65e8d433f8fc38b1b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jul 2011 23:11:03 -0600
Subject: [PATCH 3/6] ...

---
 src/calibre/ebooks/mobi/writer2/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/mobi/writer2/utils.py b/src/calibre/ebooks/mobi/writer2/utils.py
index 1c2d3a110d..dc9526eb77 100644
--- a/src/calibre/ebooks/mobi/writer2/utils.py
+++ b/src/calibre/ebooks/mobi/writer2/utils.py
@@ -166,7 +166,7 @@ def get_trailing_data(record, extra_data_flags):
             if i == 0:
                 # Only the first two bits are used for the size since there can
                 # never be more than 3 trailing multibyte chars
-                sz = ord(record[-1]) & 0b11
+                sz = (ord(record[-1]) & 0b11) + 1
                 consumed = 1
             else:
                 sz, consumed = decint(record, forward=False)

From ddae0ca7964bd21d0d40e6902b24ff903a66467d Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 20 Jul 2011 08:51:40 +0100
Subject: [PATCH 4/6] Make the conversion chain accept series numbers larger
 than 9999.99

---
 src/calibre/gui2/convert/metadata.ui | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/convert/metadata.ui b/src/calibre/gui2/convert/metadata.ui
index 95ccac6890..478f65e870 100644
--- a/src/calibre/gui2/convert/metadata.ui
+++ b/src/calibre/gui2/convert/metadata.ui
@@ -240,7 +240,7 @@
           <string>Book </string>
          </property>
          <property name="maximum">
-          <double>9999.989999999999782</double>
+          <double>9999999999.99</double>
          </property>
          <property name="value">
           <double>1.000000000000000</double>

From 96d51719af4816fe50c6f6315c1a9bf9982559b7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jul 2011 13:30:13 -0600
Subject: [PATCH 5/6] ...

---
 src/calibre/ebooks/mobi/writer2/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/mobi/writer2/utils.py b/src/calibre/ebooks/mobi/writer2/utils.py
index dc9526eb77..cd0ee453c3 100644
--- a/src/calibre/ebooks/mobi/writer2/utils.py
+++ b/src/calibre/ebooks/mobi/writer2/utils.py
@@ -161,7 +161,7 @@ def get_trailing_data(record, extra_data_flags):
     '''
     data = OrderedDict()
     for i in xrange(16, -1, -1):
-        flag = 2**i
+        flag = 1 << i # 2**i
         if flag & extra_data_flags:
             if i == 0:
                 # Only the first two bits are used for the size since there can

From 629da2b324ff5017e3a316bed0722cb591259258 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jul 2011 14:01:41 -0600
Subject: [PATCH 6/6] ...

---
 src/calibre/ebooks/mobi/debug.py              | 62 +------------------
 .../ebooks/mobi/{writer2 => }/utils.py        |  0
 src/calibre/ebooks/mobi/writer2/main.py       |  2 +-
 3 files changed, 4 insertions(+), 60 deletions(-)
 rename src/calibre/ebooks/mobi/{writer2 => }/utils.py (100%)

diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py
index 9bc587c527..971f037479 100644
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@@ -11,7 +11,7 @@ import struct, datetime, sys, os, shutil
 from collections import OrderedDict
 from calibre.utils.date import utc_tz
 from calibre.ebooks.mobi.langcodes import main_language, sub_language
-from calibre.ebooks.mobi.writer2.utils import (decode_hex_number, decint,
+from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
         get_trailing_data)
 from calibre.utils.magick.draw import identify_data
 
@@ -738,8 +738,7 @@ class CNCX(object) : # {{{
 
 class TextRecord(object): # {{{
 
-    def __init__(self, idx, record, extra_data_flags, decompress, index_record,
-            doc_type):
+    def __init__(self, idx, record, extra_data_flags, decompress):
         self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
         self.raw = decompress(self.raw)
         if 0 in self.trailing_data:
@@ -751,60 +750,6 @@ class TextRecord(object): # {{{
 
         self.idx = idx
 
-        if 'indexing' in self.trailing_data and index_record is not None:
-            self.interpret_indexing(doc_type, index_record.indices)
-
-    def interpret_indexing(self, doc_type, indices):
-        raw = self.trailing_data['indexing']
-        ident, consumed = decint(raw)
-        raw = raw[consumed:]
-        entry_type = ident & 0b111
-        index_entry_idx = ident >> 3
-        index_entry = None
-        for i in indices:
-            if i.index == index_entry_idx:
-                index_entry = i.label
-                break
-        self.trailing_data['interpreted_indexing'] = (
-                'Type: %s, Index Entry: %s'%(entry_type, index_entry))
-        if doc_type == 2: # Book
-            self.interpret_book_indexing(raw, entry_type)
-
-    def interpret_book_indexing(self, raw, entry_type):
-        arg1, consumed = decint(raw)
-        raw = raw[consumed:]
-        if arg1 != 0:
-            raise ValueError('TBS index entry has unknown arg1: %d'%
-                    arg1)
-        if entry_type == 2:
-            desc = ('This record has only a single starting or a single'
-                    ' ending point')
-            if raw:
-                raise ValueError('TBS index entry has unknown extra bytes:'
-                        ' %r'%raw)
-        elif entry_type == 3:
-            desc = ('This record is spanned by a single node (i.e. it'
-                    ' has no start or end points)')
-            arg2, consumed = decint(raw)
-            if arg2 != 0:
-                raise ValueError('TBS index entry has unknown arg2: %d'%
-                        arg2)
-        elif entry_type == 6:
-            if len(raw) != 1:
-                raise ValueError('TBS index entry has unknown extra bytes:'
-                        ' %r'%raw)
-            num = ord(raw[0])
-            # An unmatched starting or ending point each contributes 1 to
-            # this count. A matched pair of starting and ending points
-            # together contribute 1 to this count. Note that you can only
-            # ever have either 1 unmatched start point or 1 unmatched end
-            # point, never both (logically impossible).
-            desc = ('This record has %d starting/ending points and/or complete'
-                    ' nodes.')%num
-        else:
-            raise ValueError('Unknown TBS index entry type: %d for book'%entry_type)
-        self.trailing_data['interpreted_indexing'] += ' :: ' + desc
-
     def dump(self, folder):
         name = '%06d'%self.idx
         with open(os.path.join(folder, name+'.txt'), 'wb') as f:
@@ -910,8 +855,7 @@ class MOBIFile(object): # {{{
         if fntbr == 0xffffffff:
             fntbr = len(self.records)
         self.text_records = [TextRecord(r, self.records[r],
-            self.mobi_header.extra_data_flags, decompress, self.index_record,
-            self.mobi_header.type_raw) for r in xrange(1,
+            self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
             min(len(self.records), ntr+1))]
         self.image_records, self.binary_records = [], []
         for i in xrange(fntbr, len(self.records)):
diff --git a/src/calibre/ebooks/mobi/writer2/utils.py b/src/calibre/ebooks/mobi/utils.py
similarity index 100%
rename from src/calibre/ebooks/mobi/writer2/utils.py
rename to src/calibre/ebooks/mobi/utils.py
diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py
index 76976ce81e..2e9d31458a 100644
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@@ -18,7 +18,7 @@ from calibre.ebooks.compression.palmdoc import compress_doc
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.utils.filenames import ascii_filename
 from calibre.ebooks.mobi.writer2 import PALMDOC, UNCOMPRESSED
-from calibre.ebooks.mobi.writer2.utils import (rescale_image, encint)
+from calibre.ebooks.mobi.utils import (rescale_image, encint)
 
 EXTH_CODES = {
     'creator': 100,