Merge from trunk

2026-06-07 06:25:26 -04:00 · 2011-05-24 08:12:56 +01:00
parent bd4b5ec6ea 044446b6c2
commit 3265f5e8c2
2 changed files with 37 additions and 0 deletions
@@ -0,0 +1,36 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+grrm.livejournal.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NotABlog(BasicNewsRecipe):
+    title                 = 'Not A Blog - George R.R. Martin'
+    __author__            = 'Darko Miletic'
+    description           = 'George R.R. Martin'
+    oldest_article        = 15
+    max_articles_per_feed = 100
+    language              = 'en'
+    encoding              = 'utf-8'
+    no_stylesheets        = True
+    use_embedded_content  = True
+    publication_type      = 'blog'
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : 'sf, fantasy, game of thrones'
+                        , 'publisher': 'George R.R. Martin'
+                        , 'language' : language
+                        }
+
+    feeds = [(u'Posts', u'http://grrm.livejournal.com/data/rss')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return self.adeify_images(soup)
+
+
@@ -655,6 +655,7 @@ Some limitations of PDF input are:
    * Some PDFs use special glyphs to represent ll or ff or fi, etc. Conversion of these may or may not work depending on just how they are represented internally in the PDF.
    * Some PDFs store their images upside down with a rotation instruction, |app| currently doesn't support that instruction, so the images will be rotated in the output as well. 
    * Links and Tables of Contents are not supported
+    * PDFs that use embedded non-unicode fonts to represent non-English characters will result in garbled output for those characters

 To re-iterate **PDF is a really, really bad** format to use as input. If you absolutely must use PDF, then be prepared for an
 output ranging anywhere from decent to unusable, depending on the input PDF.