diff --git a/recipes/grrm.recipe b/recipes/grrm.recipe new file mode 100644 index 0000000000..a315aa9f32 --- /dev/null +++ b/recipes/grrm.recipe @@ -0,0 +1,36 @@ + +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Miletic ' +''' +grrm.livejournal.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class NotABlog(BasicNewsRecipe): + title = 'Not A Blog - George R.R. Martin' + __author__ = 'Darko Miletic' + description = 'George R.R. Martin' + oldest_article = 15 + max_articles_per_feed = 100 + language = 'en' + encoding = 'utf-8' + no_stylesheets = True + use_embedded_content = True + publication_type = 'blog' + + conversion_options = { + 'comment' : description + , 'tags' : 'sf, fantasy, game of thrones' + , 'publisher': 'George R.R. Martin' + , 'language' : language + } + + feeds = [(u'Posts', u'http://grrm.livejournal.com/data/rss')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return self.adeify_images(soup) + + diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index 73358e0f72..540da0fc9a 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -655,6 +655,7 @@ Some limitations of PDF input are: * Some PDFs use special glyphs to represent ll or ff or fi, etc. Conversion of these may or may not work depending on just how they are represented internally in the PDF. * Some PDFs store their images upside down with a rotation instruction, |app| currently doesn't support that instruction, so the images will be rotated in the output as well. * Links and Tables of Contents are not supported + * PDFs that use embedded non-unicode fonts to represent non-English characters will result in garbled output for those characters To re-iterate **PDF is a really, really bad** format to use as input. If you absolutely must use PDF, then be prepared for an output ranging anywhere from decent to unusable, depending on the input PDF.