Merge upstream changes.

2025-11-25 07:45:01 -05:00 · 2008-12-18 16:13:30 -05:00 · 2008-12-18 16:13:30 -05:00 · c655b787b0
commit c655b787b0
parent 93d3eedfdd 30a11fdfaa
6 changed files with 43 additions and 4 deletions
--- a/src/calibre/devices/prs505/books.py
+++ b/src/calibre/devices/prs505/books.py
@ -60,7 +60,7 @@ class Book(object):
    rpath        = book_metadata_field("path")
    id           = book_metadata_field("id", formatter=int)
    sourceid     = book_metadata_field("sourceid", formatter=int)
-    size         = book_metadata_field("size", formatter=int)
+    size         = book_metadata_field("size", formatter=lambda x : int(float(x)))
    # When setting this attribute you must use an epoch
    datetime     = book_metadata_field("date", formatter=strptime, setter=strftime)
    
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -32,7 +32,6 @@ from calibre.ebooks.lrf.html.table import Table
 from calibre import filename_to_utf8,  setup_cli_handlers, __appname__, \
                    fit_image, LoggingInterface, preferred_encoding
 from calibre.ptempfile import PersistentTemporaryFile
-from calibre.ebooks.metadata.opf import OPFReader
 from calibre.devices.interface import Device
 from calibre.ebooks.lrf.html.color_map import lrs_color
 from calibre.ebooks.chardet import xml_to_unicode
@ -106,6 +105,8 @@ class HTMLConverter(object, LoggingInterface):
                        (re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL),
                         strip_style_comments),
                         
+                        # Remove self closing script tags as they also mess up BeautifulSoup
+                        (re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
                        
                        ]
    # Fix Baen markup
@ -334,7 +335,8 @@ class HTMLConverter(object, LoggingInterface):
                soup = BeautifulSoup(raw, 
                         convertEntities=BeautifulSoup.XHTML_ENTITIES,
                         markupMassage=nmassage)
-        
+            else:
+                raise
        if not self.baen and self.is_baen(soup):
            self.baen = True
            self.log_info(_('\tBaen file detected. Re-parsing...'))
--- a/src/calibre/ebooks/lrf/pylrs/pylrs.py
+++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py
@ -1432,7 +1432,7 @@ class Page(LrsObject, LrsContainer):

        #print "page contents:", pageContent
        # ObjectList not needed and causes slowdown in SONY LRF renderer
-        p.appendLrfTag(LrfTag("ObjectList", pageContent))
+        #p.appendLrfTag(LrfTag("ObjectList", pageContent))
        p.appendLrfTag(LrfTag("Link", self.pageStyle.objId))
        p.appendLrfTag(LrfTag("ParentPageTree", lrfWriter.getPageTreeId()))
        p.appendTagDict(self.settings)
--- a/src/calibre/gui2/images/news/linux_magazine.png
+++ b/src/calibre/gui2/images/news/linux_magazine.png
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -18,6 +18,7 @@ recipe_modules = [
           'nytimes_sub', 'security_watch', 'cyberpresse', 'st_petersburg_times',
           'clarin', 'financial_times', 'heise', 'le_monde', 'harpers', 'science_aas',
           'science_news', 'the_nation', 'lrb', 'harpers_full', 'liberation',
+           'linux_magazine',
          ]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/linux_magazine.py
+++ b/src/calibre/web/feeds/recipes/linux_magazine.py
@ -0,0 +1,36 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+'''
+linux-magazine.com
+'''
+
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LinuxMagazine(BasicNewsRecipe):
+    title                 = u'Linux Magazine'
+    __author__            = 'Darko Miletic'
+    description           = 'Linux news'    
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+
+    remove_tags_after = dict(name='div', attrs={'class':'end_intro'})
+    remove_tags = [
+                     dict(name='div' , attrs={'class':'end_intro' })
+                    ,dict(name='table'  , attrs={'width':'100%'})
+                  ]
+
+    feeds          = [(u'Linux Magazine Full Feed', u'http://www.linux-magazine.com/rss/feed/lmi_full')]
+        
+    def print_version(self, url):
+        raw = self.browser.open(url).read()
+        soup = BeautifulSoup(raw.decode('utf8', 'replace'))
+        print_link = soup.find('a', {'title':'Print this page'})
+        if print_link is None:
+            return url
+        return 'http://www.linux-magazine.com'+print_link['href']
+