diff --git a/src/calibre/devices/prs505/books.py b/src/calibre/devices/prs505/books.py index f42f1b5513..b63b089fdd 100644 --- a/src/calibre/devices/prs505/books.py +++ b/src/calibre/devices/prs505/books.py @@ -60,7 +60,7 @@ class Book(object): rpath = book_metadata_field("path") id = book_metadata_field("id", formatter=int) sourceid = book_metadata_field("sourceid", formatter=int) - size = book_metadata_field("size", formatter=int) + size = book_metadata_field("size", formatter=lambda x : int(float(x))) # When setting this attribute you must use an epoch datetime = book_metadata_field("date", formatter=strptime, setter=strftime) diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 2554d91f15..8eeff46d79 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -32,7 +32,6 @@ from calibre.ebooks.lrf.html.table import Table from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \ fit_image, LoggingInterface, preferred_encoding from calibre.ptempfile import PersistentTemporaryFile -from calibre.ebooks.metadata.opf import OPFReader from calibre.devices.interface import Device from calibre.ebooks.lrf.html.color_map import lrs_color from calibre.ebooks.chardet import xml_to_unicode @@ -106,6 +105,8 @@ class HTMLConverter(object, LoggingInterface): (re.compile(r'()', re.IGNORECASE|re.DOTALL), strip_style_comments), + # Remove self closing script tags as they also mess up BeautifulSoup + (re.compile(r'(?i)]+?/>'), lambda match: ''), ] # Fix Baen markup @@ -334,7 +335,8 @@ class HTMLConverter(object, LoggingInterface): soup = BeautifulSoup(raw, convertEntities=BeautifulSoup.XHTML_ENTITIES, markupMassage=nmassage) - + else: + raise if not self.baen and self.is_baen(soup): self.baen = True self.log_info(_('\tBaen file detected. Re-parsing...')) diff --git a/src/calibre/ebooks/lrf/pylrs/pylrs.py b/src/calibre/ebooks/lrf/pylrs/pylrs.py index 081c5a5179..60f8d21336 100644 --- a/src/calibre/ebooks/lrf/pylrs/pylrs.py +++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py @@ -1432,7 +1432,7 @@ class Page(LrsObject, LrsContainer): #print "page contents:", pageContent # ObjectList not needed and causes slowdown in SONY LRF renderer - p.appendLrfTag(LrfTag("ObjectList", pageContent)) + #p.appendLrfTag(LrfTag("ObjectList", pageContent)) p.appendLrfTag(LrfTag("Link", self.pageStyle.objId)) p.appendLrfTag(LrfTag("ParentPageTree", lrfWriter.getPageTreeId())) p.appendTagDict(self.settings) diff --git a/src/calibre/gui2/images/news/linux_magazine.png b/src/calibre/gui2/images/news/linux_magazine.png new file mode 100644 index 0000000000..ef6ed8c3fe Binary files /dev/null and b/src/calibre/gui2/images/news/linux_magazine.png differ diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 6d4f399d10..ec2a609eee 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -18,6 +18,7 @@ recipe_modules = [ 'nytimes_sub', 'security_watch', 'cyberpresse', 'st_petersburg_times', 'clarin', 'financial_times', 'heise', 'le_monde', 'harpers', 'science_aas', 'science_news', 'the_nation', 'lrb', 'harpers_full', 'liberation', + 'linux_magazine', ] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/linux_magazine.py b/src/calibre/web/feeds/recipes/linux_magazine.py new file mode 100644 index 0000000000..8b6d6ba81f --- /dev/null +++ b/src/calibre/web/feeds/recipes/linux_magazine.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2008, Darko Miletic ' +''' +linux-magazine.com +''' + +from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.web.feeds.news import BasicNewsRecipe + +class LinuxMagazine(BasicNewsRecipe): + title = u'Linux Magazine' + __author__ = 'Darko Miletic' + description = 'Linux news' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + + remove_tags_after = dict(name='div', attrs={'class':'end_intro'}) + remove_tags = [ + dict(name='div' , attrs={'class':'end_intro' }) + ,dict(name='table' , attrs={'width':'100%'}) + ] + + feeds = [(u'Linux Magazine Full Feed', u'http://www.linux-magazine.com/rss/feed/lmi_full')] + + def print_version(self, url): + raw = self.browser.open(url).read() + soup = BeautifulSoup(raw.decode('utf8', 'replace')) + print_link = soup.find('a', {'title':'Print this page'}) + if print_link is None: + return url + return 'http://www.linux-magazine.com'+print_link['href'] +