Merge from trunk

This commit is contained in:
Charles Haley 2011-05-24 08:12:56 +01:00
commit 3265f5e8c2
2 changed files with 37 additions and 0 deletions

36
recipes/grrm.recipe Normal file
View File

@ -0,0 +1,36 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
grrm.livejournal.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NotABlog(BasicNewsRecipe):
title = 'Not A Blog - George R.R. Martin'
__author__ = 'Darko Miletic'
description = 'George R.R. Martin'
oldest_article = 15
max_articles_per_feed = 100
language = 'en'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
conversion_options = {
'comment' : description
, 'tags' : 'sf, fantasy, game of thrones'
, 'publisher': 'George R.R. Martin'
, 'language' : language
}
feeds = [(u'Posts', u'http://grrm.livejournal.com/data/rss')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -655,6 +655,7 @@ Some limitations of PDF input are:
* Some PDFs use special glyphs to represent ll or ff or fi, etc. Conversion of these may or may not work depending on just how they are represented internally in the PDF.
* Some PDFs store their images upside down with a rotation instruction, |app| currently doesn't support that instruction, so the images will be rotated in the output as well.
* Links and Tables of Contents are not supported
* PDFs that use embedded non-unicode fonts to represent non-English characters will result in garbled output for those characters
To re-iterate **PDF is a really, really bad** format to use as input. If you absolutely must use PDF, then be prepared for an
output ranging anywhere from decent to unusable, depending on the input PDF.