Merge from trunk

This commit is contained in:
Charles Haley
2011-05-24 08:12:56 +01:00
2 changed files with 37 additions and 0 deletions
+36
View File
@@ -0,0 +1,36 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
grrm.livejournal.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NotABlog(BasicNewsRecipe):
title = 'Not A Blog - George R.R. Martin'
__author__ = 'Darko Miletic'
description = 'George R.R. Martin'
oldest_article = 15
max_articles_per_feed = 100
language = 'en'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
conversion_options = {
'comment' : description
, 'tags' : 'sf, fantasy, game of thrones'
, 'publisher': 'George R.R. Martin'
, 'language' : language
}
feeds = [(u'Posts', u'http://grrm.livejournal.com/data/rss')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)
+1
View File
@@ -655,6 +655,7 @@ Some limitations of PDF input are:
* Some PDFs use special glyphs to represent ll or ff or fi, etc. Conversion of these may or may not work depending on just how they are represented internally in the PDF.
* Some PDFs store their images upside down with a rotation instruction, |app| currently doesn't support that instruction, so the images will be rotated in the output as well.
* Links and Tables of Contents are not supported
* PDFs that use embedded non-unicode fonts to represent non-English characters will result in garbled output for those characters
To re-iterate **PDF is a really, really bad** format to use as input. If you absolutely must use PDF, then be prepared for an
output ranging anywhere from decent to unusable, depending on the input PDF.