diff --git a/resources/recipes/fokkeensukke.recipe b/resources/recipes/fokkeensukke.recipe index 5627631770..3ddbe1cfe5 100644 --- a/resources/recipes/fokkeensukke.recipe +++ b/resources/recipes/fokkeensukke.recipe @@ -1,87 +1,87 @@ -#!/usr/bin/python -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag - - -class FokkeEnSukkeRecipe(BasicNewsRecipe) : - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'nl' - description = u'Popular Dutch daily cartoon Fokke en Sukke' - - title = u'Fokke en Sukke' - no_stylesheets = True - # For reasons unknown to me the extra css is, on the cartoon pages, inserted in the and not in the . My reader (Sony PRS-600) has a serious issue - # with that: it treats it as content and displays it as is. Setting this property to empty solves this for me. - template_css = '' - INDEX = u'http://foksuk.nl' - - # This cover is not as nice as it could be, needs some work - #cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif' - - keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})] - - def parse_index(self) : - # A list with daynames as they _can_ appear in the index - dayNames = ['maandag', 'dinsdag', 'woensdag', 'donderdag', 'vrijdag', 'zaterdag & zondag'] - soup = self.index_to_soup(self.INDEX) - - # Find the links for the various cartoons for this week and loop through them - index = soup.find('div', attrs={'class' : 'selectcartoon'}) - links = index.findAll('a') - maxIndex = len(links) - 1 - articles = [] - for i in range(len(links)) : - # The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice. - if i == 0 : - continue - - # There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname. - # If there are two, there are three links in sequence: dayname 1 2. In that case we're interested in the last two. - if links[i].renderContents() in dayNames : - # If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content - if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') : - # Got you! Add it to the list - article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''} - articles.append(article) - # If there is a '1', there should be a '2' as well, but better save than sorry - if (i + 2 <= maxIndex) and (links[i + 2].renderContents() == '2') : - # Got you! Add it to the list - article = {'title' : links[i].renderContents() + ' 2', 'date' : u'', 'url' : self.INDEX + links[i + 2]['href'], 'description' : ''} - articles.append(article) - else : - # There is only one cartoon for this day. Add it to the list. - article = {'title' : links[i].renderContents(), 'date' : u'', 'url' : self.INDEX + links[i]['href'], 'description' : ''} - articles.append(article) - # Might as well use the weeknumber as title - week = index.find('span', attrs={'class' : 'week'}).renderContents() - - return [[week, articles]] - - def preprocess_html(self, soup) : - # This method is called for every page, be it cartoon or TOC. We need to process each in their own way - cartoon = soup.find('div', attrs={'class' : 'cartoon'}) - if cartoon : - # It is a cartoon. Extract the title. - title = '' - img = soup.find('img', attrs = {'alt' : True}) - if img : - title = img['alt'] - - # Using the 'extra_css' displays it in the and not in the . See comment at the top of this class. Setting the style this way solves that. - tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')]) - tag.insert(0, title) - cartoon.insert(0, tag) - - # I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier, - # and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook. - select = cartoon.find('div', attrs={'class' : 'selectcartoon'}) - if select : - select.extract() - - return cartoon - else : - # It is a TOC. Just return the whole lot. - return soup - - +#!/usr/bin/python +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + + +class FokkeEnSukkeRecipe(BasicNewsRecipe) : + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'nl' + description = u'Popular Dutch daily cartoon Fokke en Sukke' + + title = u'Fokke en Sukke' + no_stylesheets = True + # For reasons unknown to me the extra css is, on the cartoon pages, inserted in the and not in the . My reader (Sony PRS-600) has a serious issue + # with that: it treats it as content and displays it as is. Setting this property to empty solves this for me. + template_css = '' + INDEX = u'http://foksuk.nl' + + # This cover is not as nice as it could be, needs some work + #cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif' + + keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})] + + def parse_index(self) : + # A list with daynames as they _can_ appear in the index + dayNames = ['maandag', 'dinsdag', 'woensdag', 'donderdag', 'vrijdag', 'zaterdag & zondag'] + soup = self.index_to_soup(self.INDEX) + + # Find the links for the various cartoons for this week and loop through them + index = soup.find('div', attrs={'class' : 'selectcartoon'}) + links = index.findAll('a') + maxIndex = len(links) - 1 + articles = [] + for i in range(len(links)) : + # The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice. + if i == 0 : + continue + + # There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname. + # If there are two, there are three links in sequence: dayname 1 2. In that case we're interested in the last two. + if links[i].renderContents() in dayNames : + # If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content + if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') : + # Got you! Add it to the list + article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''} + articles.append(article) + # If there is a '1', there should be a '2' as well, but better save than sorry + if (i + 2 <= maxIndex) and (links[i + 2].renderContents() == '2') : + # Got you! Add it to the list + article = {'title' : links[i].renderContents() + ' 2', 'date' : u'', 'url' : self.INDEX + links[i + 2]['href'], 'description' : ''} + articles.append(article) + else : + # There is only one cartoon for this day. Add it to the list. + article = {'title' : links[i].renderContents(), 'date' : u'', 'url' : self.INDEX + links[i]['href'], 'description' : ''} + articles.append(article) + # Might as well use the weeknumber as title + week = index.find('span', attrs={'class' : 'week'}).renderContents() + + return [[week, articles]] + + def preprocess_html(self, soup) : + # This method is called for every page, be it cartoon or TOC. We need to process each in their own way + cartoon = soup.find('div', attrs={'class' : 'cartoon'}) + if cartoon : + # It is a cartoon. Extract the title. + title = '' + img = soup.find('img', attrs = {'alt' : True}) + if img : + title = img['alt'] + + # Using the 'extra_css' displays it in the and not in the . See comment at the top of this class. Setting the style this way solves that. + tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')]) + tag.insert(0, title) + cartoon.insert(0, tag) + + # I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier, + # and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook. + select = cartoon.find('div', attrs={'class' : 'selectcartoon'}) + if select : + select.extract() + + return cartoon + else : + # It is a TOC. Just return the whole lot. + return soup + + diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 75c545f8b5..178561fcb5 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -125,7 +125,7 @@ def add_pipeline_options(parser, plumber): 'extra_css', 'margin_top', 'margin_left', 'margin_right', 'margin_bottom', 'dont_justify', - 'insert_blank_line', 'remove_paragraph_spacing', + 'insert_blank_line', 'remove_paragraph_spacing','remove_paragraph_spacing_indent_size', 'asciiize', 'remove_header', 'header_regex', 'remove_footer', 'footer_regex', ] diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 2a3dfedd65..30cc42480c 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -310,6 +310,13 @@ OptionRecommendation(name='remove_paragraph_spacing', 'if the source file does not use paragraphs (

or

tags).') ), +OptionRecommendation(name='remove_paragraph_spacing_indent_size', + recommended_value=1.5, level=OptionRecommendation.LOW, + help=_('When calibre removes inter paragraph spacing, it automatically ' + 'sets a paragraph indent, to ensure that paragraphs can be easily ' + 'distinguished. This option controls the width of that indent.') + ), + OptionRecommendation(name='prefer_metadata_cover', recommended_value=False, level=OptionRecommendation.LOW, help=_('Use the cover detected from the source file in preference ' diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index 464acbe0e0..fb32a1ccf9 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -278,7 +278,7 @@ class CSSFlattener(object): if self.context.insert_blank_line: cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em' if self.context.remove_paragraph_spacing: - cssdict['text-indent'] = '1.5em' + cssdict['text-indent'] = "%1.1fem" % self.context.remove_paragraph_spacing_indent_size if cssdict: items = cssdict.items() items.sort() diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 05cf488617..cb8ae15298 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -256,7 +256,7 @@ class PML_HTMLizer(object): if code in self.SPAN_STATES: del spans[spans.index(code)] for c in divs+spans: - if state[c][0]: + if self.state[c][0]: if c in self.STATES_VALUE_REQ: text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1] else: @@ -265,7 +265,7 @@ class PML_HTMLizer(object): if code in self.STATES_VALUE_REQ: val = self.code_value(stream) text = self.STATES_TAGS[code][0] % val - state[code][1] = val + self.state[code][1] = val else: text = self.STATES_TAGS[code][0] diff --git a/src/calibre/gui2/convert/look_and_feel.py b/src/calibre/gui2/convert/look_and_feel.py index a10a410b67..4d43f64910 100644 --- a/src/calibre/gui2/convert/look_and_feel.py +++ b/src/calibre/gui2/convert/look_and_feel.py @@ -23,7 +23,7 @@ class LookAndFeelWidget(Widget, Ui_Form): 'font_size_mapping', 'line_height', 'linearize_tables', 'disable_font_rescaling', 'insert_blank_line', - 'remove_paragraph_spacing', 'input_encoding', + 'remove_paragraph_spacing', 'remove_paragraph_spacing_indent_size','input_encoding', 'asciiize'] ) self.db, self.book_id = db, book_id @@ -32,6 +32,8 @@ class LookAndFeelWidget(Widget, Ui_Form): self.opt_disable_font_rescaling.toggle() self.connect(self.button_font_key, SIGNAL('clicked()'), self.font_key_wizard) + self.opt_remove_paragraph_spacing.toggle() + self.opt_remove_paragraph_spacing.toggle() def font_key_wizard(self): from calibre.gui2.convert.font_key import FontKeyChooser diff --git a/src/calibre/gui2/convert/look_and_feel.ui b/src/calibre/gui2/convert/look_and_feel.ui index d451cd9af0..84d587d7b2 100644 --- a/src/calibre/gui2/convert/look_and_feel.ui +++ b/src/calibre/gui2/convert/look_and_feel.ui @@ -14,7 +14,7 @@ Form - + &Disable font size rescaling @@ -31,7 +31,7 @@ - + pt @@ -63,7 +63,7 @@ - + @@ -107,7 +107,7 @@ - + pt @@ -127,12 +127,59 @@ + + + - - - Remove &spacing between paragraphs - - + + + + + Remove &spacing between paragraphs + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + + + Indent size: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + + + + + <p>When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent. + + + em + + + 1 + + + + + + @@ -155,14 +202,14 @@ - + &Transliterate unicode characters to ASCII. - + Extra &CSS @@ -174,9 +221,6 @@ - - - @@ -216,5 +260,37 @@ + + opt_remove_paragraph_spacing + toggled(bool) + label_4 + setEnabled(bool) + + + 20 + 20 + + + 20 + 20 + + + + + opt_remove_paragraph_spacing + toggled(bool) + opt_remove_paragraph_spacing_indent_size + setEnabled(bool) + + + 20 + 20 + + + 20 + 20 + + + diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index 30cc3b1445..b21d92b6aa 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -163,7 +163,8 @@ Paragraph spacing Normally, paragraphs in XHTML are rendered with a blank line between them and no leading text indent. |app| has a couple of options to control this. :guilabel:`Remove spacing between paragraphs` forcefully ensure that all paragraphs have no inter paragraph spacing. It also sets the text -indent to 1.5em to mark that start of every paragraph. :guilabel:`Insert blank line` does the +indent to 1.5em (can be changed) to mark the start of every paragraph. +:guilabel:`Insert blank line` does the opposite, guaranteeing that there is exactly one blank line between each pair of paragraphs. Both these options are very comprehensive, removing spacing, or inserting it for *all* paragraphs (technically

and

tags). This is so that you can just set the option and be sure that