Sync to trunk.

2026-01-02 18:20:19 -05:00 · 2009-10-25 10:13:27 -04:00 · 2009-10-25 10:13:27 -04:00 · e21e36ed60
commit e21e36ed60
parent 53a97fc98c 712e082dc4
4 changed files with 146 additions and 39 deletions
--- a/resources/recipes/irish_independent.recipe
+++ b/resources/recipes/irish_independent.recipe
@ -0,0 +1,44 @@
+__license__   = 'GPL v3'
+__copyright__ = '2009 Neil Grogan'
+# 
+# Irish Independent Recipe
+#
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class IrishIndependent(BasicNewsRecipe):
+    title          = u'Irish Independent'
+    description = 'Irish and World news from Irelands Bestselling Daily Broadsheet'
+    __author__	= 'Neil Grogan'
+    language = 'en_UK'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_tags_before = dict(id='article')
+    remove_tags_after  = [dict(name='div', attrs={'class':'toolsBottom'})]
+    remove_tags    = [
+	        dict(name='div',attrs={'class':'toolsBottom'}),
+	        dict(name='div',attrs={'class':'toolsTop'}),
+	        dict(name='div',attrs={'class':'boxRelated'}),
+	        dict(name='div',attrs={'class':'section first'}),
+	        dict(name='div',attrs={'class':'tabIt'}),
+	        dict(name='div',attrs={'class':'inner'})
+	        ]
+
+    feeds	= [
+    (u'Frontpage News', u'http://www.independent.ie/rss'),
+    (u'World News', u'http://www.independent.ie/world-news/rss'),
+    (u'Technology', u'http://www.independent.ie/business/technology/rss'),
+    (u'Sport', u'http://www.independent.ie/sport/rss'),
+    (u'Entertainment', u'http://www.independent.ie/entertainment/rss'),
+    (u'Independent Woman', u'http://www.independent.ie/lifestyle/independent-woman/rss'),
+    (u'Education',u'http://www.independent.ie/education/rss'),
+    (u'Lifestyle', u'http://www.independent.ie/lifestyle/rss'),
+    (u'Travel', u'http://www.independent.ie/travel/rss'),
+    (u'Letters', u'http://www.independent.ie/opinion/letters/rss'),
+    (u'Weather', u'http://www.independent.ie/weather/rss')
+    ]
+    
+# If text only articles are desired
+#def print_version(self, url):
+#    return '%s?service=Print' % url
+
--- a/setup/installer/osx/freeze.py
+++ b/setup/installer/osx/freeze.py
@ -386,7 +386,7 @@ def main():
                     {
                         'optimize' : 2,
                         'dist_dir' : 'build/py2app',
-                         'argv_emulation' : True,
+                         'argv_emulation' : False,
                         'iconfile' : icon,
                         'frameworks': ['libusb.dylib', 'libunrar.dylib'],
                         'includes' : ['sip', 'pkg_resources', 'PyQt4.QtXml',
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -247,11 +247,15 @@ class Serializer(object):
        if not item.linear:
            self.breaks.append(buffer.tell() - 1)
        self.id_offsets[item.href] = buffer.tell()
+        # Kindle periodical articles are contained in a <div> tag
+        buffer.write('<div>')
        for elem in item.data.find(XHTML('body')):
-            buffer.write('<a></a>')
            self.serialize_elem(elem, item)
+        # Kindle periodical article end marker
+        buffer.write('<div></div>')
        if self.write_page_breaks_after_item:
            buffer.write('<mbp:pagebreak/>')
+        buffer.write('</div>')

    def serialize_elem(self, elem, item, nsrmap=NSRMAP):
        buffer = self.buffer
@ -341,6 +345,9 @@ class MobiWriter(object):
        self._conforming_periodical_toc = False
        self._indexable = False
        self._ctoc = ""
+        self._ctoc_records = []
+        self._ctoc_offset = 0
+        self._ctoc_largest = 0
        self._HTMLRecords = []
        self._tbSequence = ""
        self._MobiDoc = None
@ -888,7 +895,6 @@ class MobiWriter(object):
        tbsType = 0x00
        tbSequence = ""

-
        # Generate TBS for type 0x101/0x103 - structured periodical
        if self._initialIndexRecordFound == False :
            # Is there any indexed content yet?
@ -1239,7 +1245,10 @@ class MobiWriter(object):
            self._conforming_periodical_toc = self._evaluate_periodical_toc()

        # This routine decides whether to build flat or structured based on self._conforming_periodical_toc
-        self._ctoc = self._generate_ctoc()
+        # self._ctoc = self._generate_ctoc()
+
+        # There may be multiple CNCX records built below, but the last record is returned and should be stored
+        self._ctoc_records.append(self._generate_ctoc())

        # Build the HTMLRecords list so we can assemble the trailing bytes sequences in the following while loop
        toc = self._oeb.toc
@ -1396,8 +1405,10 @@ class MobiWriter(object):
        if btype < 0x100 :
            record0.write(pack('>I', 0xffffffff))
        elif btype > 0x100 and self._indexable :
-            record0.write(pack('>I', 0xffffffff if self._primary_index_record is
-                None else self._primary_index_record+3))
+            if self._primary_index_record is None:
+                record0.write(pack('>I', 0xffffffff))
+            else:
+                record0.write(pack('>I', self._primary_index_record + 2 + len(self._ctoc_records)))
        else :
            record0.write(pack('>I', 0xffffffff))

@ -1614,8 +1625,7 @@ class MobiWriter(object):
        self._primary_index_record = None

 		# Build the NCXEntries and INDX
-        indxt, indxt_count, indices, last_name = \
-                self._generate_indxt(self._ctoc)
+        indxt, indxt_count, indices, last_name = self._generate_indxt()

        if last_name is None:
            self._oeb.log.warn('Input document has no TOC. No index generated.')
@ -1724,7 +1734,13 @@ class MobiWriter(object):
        indx0 = indx0.getvalue()

        self._primary_index_record = len(self._records)
-        self._records.extend([indx0, indx1, self._ctoc])
+
+        # GR: handle multiple ctoc records
+        # self._records.extend([indx0, indx1, self._ctoc])
+        self._records.extend([indx0, indx1 ])
+        for (i,ctoc_record) in enumerate(self._ctoc_records):
+            self._records.append(ctoc_record)
+            # print "adding %d of %d ctoc records" % (i+1, len(self._ctoc_records))

        # Indexing for author/description fields in summary section
        # Test for indexed periodical - only one that needs secondary index
@ -1788,6 +1804,28 @@ class MobiWriter(object):
            text = "(none)".encode('utf-8')
        return text

+    def _add_to_ctoc(self, ctoc_str, record_offset):
+        # Write vwilen + string to ctoc
+        # Return offset
+        # Is there enough room for this string in the current ctoc record?
+        if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str):
+            # flush this ctoc, start a new one
+            print "closing ctoc_record at 0x%X" % self._ctoc.tell()
+            print "starting new ctoc with '%-50.50s ...'" % ctoc_str
+            # pad with 00
+            pad = 0xfbf8 - self._ctoc.tell()
+            print "padding %d bytes of 00" % pad
+            self._ctoc.write('\0' * (pad))
+            self._ctoc_records.append(self._ctoc.getvalue())
+            self._ctoc.truncate(0)
+            self._ctoc_offset += 0x10000
+            record_offset = self._ctoc_offset
+
+        offset = self._ctoc.tell() + record_offset
+        self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
+        return offset
+
+
    def _add_flat_ctoc_node(self, node, ctoc, title=None):
        # Process 'chapter' or 'article' nodes only, force either to 'chapter'
        t = node.title if title is None else title
@ -1804,8 +1842,9 @@ class MobiWriter(object):
            ctoc_name_map['klass'] = node.klass

        # Add title offset to name map
-        ctoc_name_map['titleOffset'] = ctoc.tell()
-        ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+#         ctoc_name_map['titleOffset'] = ctoc.tell()
+#         ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+        ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
        self._chapterCount += 1

        # append this node's name_map to map
@ -1816,6 +1855,8 @@ class MobiWriter(object):

    def _add_structured_ctoc_node(self, node, ctoc, title=None):
        # Process 'periodical', 'section' and 'article'
+
+        # Fetch the offset referencing the current ctoc_record
        if node.klass is None :
            return
        t = node.title if title is None else title
@ -1830,14 +1871,16 @@ class MobiWriter(object):

        if node.klass == 'chapter':
            # Add title offset to name map
-            ctoc_name_map['titleOffset'] = ctoc.tell()
-            ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+#             ctoc_name_map['titleOffset'] = ctoc.tell() + ctoc_offset
+#             ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+            ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
            self._chapterCount += 1

        elif node.klass == 'periodical' :
            # Add title offset
-            ctoc_name_map['titleOffset'] = ctoc.tell()
-            ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+#             ctoc_name_map['titleOffset'] = ctoc.tell() + ctoc_offset
+#             ctoc.write( decint(len(t), DECINT_FORWARD) + t )
+            ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)

            # Look for existing class entry 'periodical' in _ctoc_map
            for entry in self._ctoc_map:
@ -1848,15 +1891,18 @@ class MobiWriter(object):
                else :
                    continue
            else:
-                ctoc_name_map['classOffset'] = ctoc.tell()
-                ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
+                # class names should always be in CNCX 0 - no offset
+#                 ctoc_name_map['classOffset'] = ctoc.tell()
+#                 ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
+                ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)

            self._periodicalCount += 1

        elif node.klass == 'section' :
            # Add title offset
-            ctoc_name_map['titleOffset'] = ctoc.tell()
-            ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+#             ctoc_name_map['titleOffset'] = ctoc.tell() + ctoc_offset
+#             ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+            ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)

            # Look for existing class entry 'section' in _ctoc_map
            for entry in self._ctoc_map:
@ -1867,15 +1913,18 @@ class MobiWriter(object):
                else :
                    continue
            else:
-                ctoc_name_map['classOffset'] = ctoc.tell()
-                ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
+                # class names should always be in CNCX 0 - no offset
+#                 ctoc_name_map['classOffset'] = ctoc.tell()
+#                 ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
+                ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)

            self._sectionCount += 1

        elif node.klass == 'article' :
            # Add title offset/title
-            ctoc_name_map['titleOffset'] = ctoc.tell()
-            ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+#             ctoc_name_map['titleOffset'] = ctoc.tell() + ctoc_offset
+#             ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+            ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)

            # Look for existing class entry 'article' in _ctoc_map
            for entry in self._ctoc_map:
@ -1885,22 +1934,26 @@ class MobiWriter(object):
                else :
                    continue
            else:
-                ctoc_name_map['classOffset'] = ctoc.tell()
-                ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
+                # class names should always be in CNCX 0 - no offset
+#                 ctoc_name_map['classOffset'] = ctoc.tell()
+#                 ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
+                ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)

            # Add description offset/description
            if node.description :
                d = self._clean_text_value(node.description)
-                ctoc_name_map['descriptionOffset'] = ctoc.tell()
-                ctoc.write(decint(len(d), DECINT_FORWARD)+d)
+#                 ctoc_name_map['descriptionOffset'] = ctoc.tell() + ctoc_offset
+#                 ctoc.write(decint(len(d), DECINT_FORWARD)+d)
+                ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
            else :
                ctoc_name_map['descriptionOffset'] = None

-            # Add author offset/description
+            # Add author offset/attribution
            if node.author :
                a = self._clean_text_value(node.author)
-                ctoc_name_map['authorOffset'] = ctoc.tell()
-                ctoc.write(decint(len(a), DECINT_FORWARD)+a)
+#                 ctoc_name_map['authorOffset'] = ctoc.tell() + ctoc_offset
+#                 ctoc.write(decint(len(a), DECINT_FORWARD)+a)
+                ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
            else :
                ctoc_name_map['authorOffset'] = None

@ -1914,6 +1967,7 @@ class MobiWriter(object):
        # append this node's name_map to map
        self._ctoc_map.append(ctoc_name_map)

+
    def _generate_ctoc(self):
        # Generate the compiled TOC strings
        # Each node has 1-4 CTOC entries:
@ -1932,7 +1986,8 @@ class MobiWriter(object):
        reduced_toc = []
        self._ctoc_map = []				# per node dictionary of {class/title/desc/author} offsets
        self._last_toc_entry = None
-        ctoc = StringIO()
+        #ctoc = StringIO()
+        self._ctoc = StringIO()

        # Track the individual node types
        self._periodicalCount = 0
@ -1947,8 +2002,9 @@ class MobiWriter(object):
            for (child) in toc.iter():
                if self.opts.verbose > 2 :
                    self._oeb.logger.info("  %s" % child)
-                self._add_structured_ctoc_node(child, ctoc)
+                self._add_structured_ctoc_node(child, self._ctoc)
                first = False
+
        else :
            self._oeb.logger.info('Generating flat CTOC ...')
            previousOffset = -1
@ -1980,7 +2036,7 @@ class MobiWriter(object):
                    # print "_generate_ctoc: child offset: 0x%X" % currentOffset

                    if currentOffset != previousOffset :
-                        self._add_flat_ctoc_node(child, ctoc)
+                        self._add_flat_ctoc_node(child, self._ctoc)
                        reduced_toc.append(child)
                        previousOffset = currentOffset
                    else :
@ -2023,7 +2079,12 @@ class MobiWriter(object):
            else :
                self._oeb.logger.info("chapterCount: %d" % self._chapterCount)

-        return align_block(ctoc.getvalue())
+        if True:
+            rec_count = len(self._ctoc_records)
+            self._oeb.logger.info("  CNCX utilization: %d %s %.0f%% full" % \
+                (rec_count + 1, 'records, last record' if rec_count else 'record,', len(self._ctoc.getvalue())/655) )
+
+        return align_block(self._ctoc.getvalue())

    def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :
        pos = 0xc0 + indxt.tell()
@ -2342,7 +2403,7 @@ class MobiWriter(object):

        return last_name, c

-    def _generate_indxt(self, ctoc):
+    def _generate_indxt(self):
        # Assumption: child.depth() represents nestedness of the TOC.
        # A flat document (book) has a depth of 2:
        # <navMap>					child.depth() = 2
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -679,11 +679,13 @@ class DeviceGUI(object):
            auto = []
            if do_auto_convert and _auto_ids:
                for id in _auto_ids:
-                    formats = [f.lower() for f in self.library_view.model().db.formats(id, index_is_id=True).split(',')]
-                    formats = formats if formats != None else []
-                    if list(set(formats).intersection(available_input_formats())) != [] and list(set(self.device_manager.device_class.settings().format_map).intersection(available_output_formats())) != []:
+                    dbfmts = self.library_view.model().db.formats(id, index_is_id=True)
+                    formats = [] if dbfmts is None else \
+                        [f.lower() for f in dbfmts.split(',')]
+                    if set(formats).intersection(available_input_formats()) \
+                            and set(self.device_manager.device_class.settings().format_map).intersection(available_output_formats()):
                        auto.append(id)
-            if auto != []:
+            if auto:
                format = None
                for fmt in self.device_manager.device_class.settings().format_map:
                    if fmt in list(set(self.device_manager.device_class.settings().format_map).intersection(set(available_output_formats()))):