IGN:...

2025-07-09 03:04:10 -04:00 · 2009-11-22 08:14:34 -07:00 · 2009-11-22 08:14:34 -07:00 · 7ba005f3e0
commit 7ba005f3e0
parent 2fe75e21af
4 changed files with 95 additions and 93 deletions
--- a/resources/recipes/fokkeensukke.recipe
+++ b/resources/recipes/fokkeensukke.recipe
@ -1,87 +1,87 @@
-#!/usr/bin/python
+#!/usr/bin/python
-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag
+from calibre.ebooks.BeautifulSoup import Tag
-
+
-
+
-class FokkeEnSukkeRecipe(BasicNewsRecipe) :
+class FokkeEnSukkeRecipe(BasicNewsRecipe) :
-    __license__   = 'GPL v3'
+    __license__   = 'GPL v3'
-    __author__ = 'kwetal'
+    __author__ = 'kwetal'
-    language = 'nl'
+    language = 'nl'
-    description = u'Popular Dutch daily cartoon Fokke en Sukke'
+    description = u'Popular Dutch daily cartoon Fokke en Sukke'
-
+
-    title = u'Fokke en Sukke'
+    title = u'Fokke en Sukke'
-    no_stylesheets = True
+    no_stylesheets = True
-    # For reasons unknown to me the extra css is, on the cartoon pages, inserted in the <body> and not in the <head>. My reader (Sony PRS-600) has a serious issue
+    # For reasons unknown to me the extra css is, on the cartoon pages, inserted in the <body> and not in the <head>. My reader (Sony PRS-600) has a serious issue
-    # with that: it treats it as content and displays it as is. Setting this property to empty solves this for me.
+    # with that: it treats it as content and displays it as is. Setting this property to empty solves this for me.
-    template_css = ''
+    template_css = ''
-    INDEX = u'http://foksuk.nl'
+    INDEX = u'http://foksuk.nl'
-
+
-    # This cover is not as nice as it could be, needs some work
+    # This cover is not as nice as it could be, needs some work
-    #cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
+    #cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
-
+
-    keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
+    keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
-
+
-    def parse_index(self) :
+    def parse_index(self) :
-        # A list with daynames as they _can_ appear in the index
+        # A list with daynames as they _can_ appear in the index
-        dayNames = ['maandag', 'dinsdag', 'woensdag', 'donderdag', 'vrijdag', 'zaterdag & zondag']
+        dayNames = ['maandag', 'dinsdag', 'woensdag', 'donderdag', 'vrijdag', 'zaterdag & zondag']
-        soup = self.index_to_soup(self.INDEX)
+        soup = self.index_to_soup(self.INDEX)
-
+
-        # Find the links for the various cartoons for this week and loop through them
+        # Find the links for the various cartoons for this week and loop through them
-        index = soup.find('div', attrs={'class' : 'selectcartoon'})
+        index = soup.find('div', attrs={'class' : 'selectcartoon'})
-        links = index.findAll('a')
+        links = index.findAll('a')
-        maxIndex = len(links) - 1
+        maxIndex = len(links) - 1
-        articles = []
+        articles = []
-        for i in range(len(links)) :
+        for i in range(len(links)) :
-            # The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice.
+            # The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice.
-            if i == 0 :
+            if i == 0 :
-                continue
+                continue
-
+
-            # There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname.
+            # There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname.
-            # If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>. In that case we're interested in the last two.
+            # If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>. In that case we're interested in the last two.
-            if links[i].renderContents() in dayNames :
+            if links[i].renderContents() in dayNames :
-                # If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content
+                # If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content
-                if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
+                if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
-                    # Got you! Add it to the list
+                    # Got you! Add it to the list
-                    article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url'  : self.INDEX + links[i + 1]['href'], 'description' : ''}
+                    article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url'  : self.INDEX + links[i + 1]['href'], 'description' : ''}
-                    articles.append(article)
+                    articles.append(article)
-                    # If there is a '1', there should be a '2' as well, but better save than sorry
+                    # If there is a '1', there should be a '2' as well, but better save than sorry
-                    if (i + 2 <= maxIndex) and (links[i + 2].renderContents() == '2') :
+                    if (i + 2 <= maxIndex) and (links[i + 2].renderContents() == '2') :
-                        # Got you! Add it to the list
+                        # Got you! Add it to the list
-                        article = {'title' : links[i].renderContents() + ' 2', 'date' : u'', 'url'  : self.INDEX + links[i + 2]['href'], 'description' : ''}
+                        article = {'title' : links[i].renderContents() + ' 2', 'date' : u'', 'url'  : self.INDEX + links[i + 2]['href'], 'description' : ''}
-                        articles.append(article)
+                        articles.append(article)
-                else :
+                else :
-                    # There is only one cartoon for this day. Add it to the list.
+                    # There is only one cartoon for this day. Add it to the list.
-                    article = {'title' : links[i].renderContents(), 'date' : u'', 'url'  : self.INDEX + links[i]['href'], 'description' : ''}
+                    article = {'title' : links[i].renderContents(), 'date' : u'', 'url'  : self.INDEX + links[i]['href'], 'description' : ''}
-                    articles.append(article)
+                    articles.append(article)
-        # Might as well use the weeknumber as title
+        # Might as well use the weeknumber as title
-        week = index.find('span', attrs={'class' : 'week'}).renderContents()
+        week = index.find('span', attrs={'class' : 'week'}).renderContents()
-
+
-        return [[week, articles]]
+        return [[week, articles]]
-
+
-    def preprocess_html(self, soup) :
+    def preprocess_html(self, soup) :
-        # This method is called for every page, be it cartoon or TOC. We need to process each in their own way
+        # This method is called for every page, be it cartoon or TOC. We need to process each in their own way
-        cartoon = soup.find('div', attrs={'class' : 'cartoon'})
+        cartoon = soup.find('div', attrs={'class' : 'cartoon'})
-        if cartoon :
+        if cartoon :
-            # It is a cartoon. Extract the title.
+            # It is a cartoon. Extract the title.
-            title = ''
+            title = ''
-            img = soup.find('img', attrs = {'alt' : True})
+            img = soup.find('img', attrs = {'alt' : True})
-            if img :
+            if img :
-                title = img['alt']
+                title = img['alt']
-
+
-            # Using the 'extra_css' displays it in the <body> and not in the <head>. See comment at the top of this class. Setting the style this way solves that.
+            # Using the 'extra_css' displays it in the <body> and not in the <head>. See comment at the top of this class. Setting the style this way solves that.
-            tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')])
+            tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')])
-            tag.insert(0, title)
+            tag.insert(0, title)
-            cartoon.insert(0, tag)
+            cartoon.insert(0, tag)
-
+
-            # I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier,
+            # I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier,
-            # and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook.
+            # and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook.
-            select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
+            select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
-            if select :
+            if select :
-                select.extract()
+                select.extract()
-
+
-            return cartoon
+            return cartoon
-        else :
+        else :
-            # It is a TOC. Just return the whole lot.
+            # It is a TOC. Just return the whole lot.
-            return soup
+            return soup
-
+
-
+
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -309,11 +309,13 @@ OptionRecommendation(name='remove_paragraph_spacing',
        'paragraphs of 1.5em. Spacing removal will not work '
        'if the source file does not use paragraphs (<p> or <div> tags).')
        ),
-        
+
 OptionRecommendation(name='remove_paragraph_spacing_indent_size',
        recommended_value=1.5, level=OptionRecommendation.LOW,
-        help=_('Width of the indent used with Remove spacing between paragraphs option')
+        help=_('When calibre removes inter paragraph spacing, it automatically '
-        ),        
+            'sets a paragraph indent, to ensure that paragraphs can be easily '
            'distinguished. This option controls the width of that indent.')
        ),
 OptionRecommendation(name='prefer_metadata_cover',
        recommended_value=False, level=OptionRecommendation.LOW,
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -256,7 +256,7 @@ class PML_HTMLizer(object):
            if code in self.SPAN_STATES:
                del spans[spans.index(code)]
            for c in divs+spans:
-                if state[c][0]:
+                if self.state[c][0]:
                    if c in self.STATES_VALUE_REQ:
                        text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
                    else:
@ -265,7 +265,7 @@ class PML_HTMLizer(object):
            if code in self.STATES_VALUE_REQ:
                val = self.code_value(stream)
                text = self.STATES_TAGS[code][0] % val
-                state[code][1] = val
+                self.state[code][1] = val
            else:
                text = self.STATES_TAGS[code][0]
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@ -163,7 +163,7 @@ Paragraph spacing
 Normally, paragraphs in XHTML are rendered with a blank line between them and no leading text
 indent. |app| has a couple of options to control this. :guilabel:`Remove spacing between paragraphs`
 forcefully ensure that all paragraphs have no inter paragraph spacing. It also sets the text
-indent to 1.5em (can be changed) to mark that start of every paragraph.
+indent to 1.5em (can be changed) to mark the start of every paragraph.
 :guilabel:`Insert blank line` does the
 opposite, guaranteeing that there is exactly one blank line between each pair of paragraphs. 
 Both these options are very comprehensive, removing spacing, or inserting it for *all* paragraphs