Merge upstream changes.

This commit is contained in:
Marshall T. Vandegrift 2008-12-18 16:13:30 -05:00
commit c655b787b0
6 changed files with 43 additions and 4 deletions

View File

@ -60,7 +60,7 @@ class Book(object):
rpath = book_metadata_field("path")
id = book_metadata_field("id", formatter=int)
sourceid = book_metadata_field("sourceid", formatter=int)
size = book_metadata_field("size", formatter=int)
size = book_metadata_field("size", formatter=lambda x : int(float(x)))
# When setting this attribute you must use an epoch
datetime = book_metadata_field("date", formatter=strptime, setter=strftime)

View File

@ -32,7 +32,6 @@ from calibre.ebooks.lrf.html.table import Table
from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \
fit_image, LoggingInterface, preferred_encoding
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.metadata.opf import OPFReader
from calibre.devices.interface import Device
from calibre.ebooks.lrf.html.color_map import lrs_color
from calibre.ebooks.chardet import xml_to_unicode
@ -106,6 +105,8 @@ class HTMLConverter(object, LoggingInterface):
(re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL),
strip_style_comments),
# Remove self closing script tags as they also mess up BeautifulSoup
(re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
]
# Fix Baen markup
@ -334,7 +335,8 @@ class HTMLConverter(object, LoggingInterface):
soup = BeautifulSoup(raw,
convertEntities=BeautifulSoup.XHTML_ENTITIES,
markupMassage=nmassage)
else:
raise
if not self.baen and self.is_baen(soup):
self.baen = True
self.log_info(_('\tBaen file detected. Re-parsing...'))

View File

@ -1432,7 +1432,7 @@ class Page(LrsObject, LrsContainer):
#print "page contents:", pageContent
# ObjectList not needed and causes slowdown in SONY LRF renderer
p.appendLrfTag(LrfTag("ObjectList", pageContent))
#p.appendLrfTag(LrfTag("ObjectList", pageContent))
p.appendLrfTag(LrfTag("Link", self.pageStyle.objId))
p.appendLrfTag(LrfTag("ParentPageTree", lrfWriter.getPageTreeId()))
p.appendTagDict(self.settings)

Binary file not shown.

After

Width:  |  Height:  |  Size: 394 B

View File

@ -18,6 +18,7 @@ recipe_modules = [
'nytimes_sub', 'security_watch', 'cyberpresse', 'st_petersburg_times',
'clarin', 'financial_times', 'heise', 'le_monde', 'harpers', 'science_aas',
'science_news', 'the_nation', 'lrb', 'harpers_full', 'liberation',
'linux_magazine',
]
import re, imp, inspect, time, os

View File

@ -0,0 +1,36 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
linux-magazine.com
'''
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe
class LinuxMagazine(BasicNewsRecipe):
title = u'Linux Magazine'
__author__ = 'Darko Miletic'
description = 'Linux news'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_tags_after = dict(name='div', attrs={'class':'end_intro'})
remove_tags = [
dict(name='div' , attrs={'class':'end_intro' })
,dict(name='table' , attrs={'width':'100%'})
]
feeds = [(u'Linux Magazine Full Feed', u'http://www.linux-magazine.com/rss/feed/lmi_full')]
def print_version(self, url):
raw = self.browser.open(url).read()
soup = BeautifulSoup(raw.decode('utf8', 'replace'))
print_link = soup.find('a', {'title':'Print this page'})
if print_link is None:
return url
return 'http://www.linux-magazine.com'+print_link['href']