mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Merge upstream changes.
This commit is contained in:
commit
c655b787b0
@ -60,7 +60,7 @@ class Book(object):
|
||||
rpath = book_metadata_field("path")
|
||||
id = book_metadata_field("id", formatter=int)
|
||||
sourceid = book_metadata_field("sourceid", formatter=int)
|
||||
size = book_metadata_field("size", formatter=int)
|
||||
size = book_metadata_field("size", formatter=lambda x : int(float(x)))
|
||||
# When setting this attribute you must use an epoch
|
||||
datetime = book_metadata_field("date", formatter=strptime, setter=strftime)
|
||||
|
||||
|
@ -32,7 +32,6 @@ from calibre.ebooks.lrf.html.table import Table
|
||||
from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \
|
||||
fit_image, LoggingInterface, preferred_encoding
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.ebooks.metadata.opf import OPFReader
|
||||
from calibre.devices.interface import Device
|
||||
from calibre.ebooks.lrf.html.color_map import lrs_color
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
@ -106,6 +105,8 @@ class HTMLConverter(object, LoggingInterface):
|
||||
(re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL),
|
||||
strip_style_comments),
|
||||
|
||||
# Remove self closing script tags as they also mess up BeautifulSoup
|
||||
(re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
|
||||
|
||||
]
|
||||
# Fix Baen markup
|
||||
@ -334,7 +335,8 @@ class HTMLConverter(object, LoggingInterface):
|
||||
soup = BeautifulSoup(raw,
|
||||
convertEntities=BeautifulSoup.XHTML_ENTITIES,
|
||||
markupMassage=nmassage)
|
||||
|
||||
else:
|
||||
raise
|
||||
if not self.baen and self.is_baen(soup):
|
||||
self.baen = True
|
||||
self.log_info(_('\tBaen file detected. Re-parsing...'))
|
||||
|
@ -1432,7 +1432,7 @@ class Page(LrsObject, LrsContainer):
|
||||
|
||||
#print "page contents:", pageContent
|
||||
# ObjectList not needed and causes slowdown in SONY LRF renderer
|
||||
p.appendLrfTag(LrfTag("ObjectList", pageContent))
|
||||
#p.appendLrfTag(LrfTag("ObjectList", pageContent))
|
||||
p.appendLrfTag(LrfTag("Link", self.pageStyle.objId))
|
||||
p.appendLrfTag(LrfTag("ParentPageTree", lrfWriter.getPageTreeId()))
|
||||
p.appendTagDict(self.settings)
|
||||
|
BIN
src/calibre/gui2/images/news/linux_magazine.png
Normal file
BIN
src/calibre/gui2/images/news/linux_magazine.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 394 B |
@ -18,6 +18,7 @@ recipe_modules = [
|
||||
'nytimes_sub', 'security_watch', 'cyberpresse', 'st_petersburg_times',
|
||||
'clarin', 'financial_times', 'heise', 'le_monde', 'harpers', 'science_aas',
|
||||
'science_news', 'the_nation', 'lrb', 'harpers_full', 'liberation',
|
||||
'linux_magazine',
|
||||
]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
36
src/calibre/web/feeds/recipes/linux_magazine.py
Normal file
36
src/calibre/web/feeds/recipes/linux_magazine.py
Normal file
@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
linux-magazine.com
|
||||
'''
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LinuxMagazine(BasicNewsRecipe):
|
||||
title = u'Linux Magazine'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Linux news'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class':'end_intro'})
|
||||
remove_tags = [
|
||||
dict(name='div' , attrs={'class':'end_intro' })
|
||||
,dict(name='table' , attrs={'width':'100%'})
|
||||
]
|
||||
|
||||
feeds = [(u'Linux Magazine Full Feed', u'http://www.linux-magazine.com/rss/feed/lmi_full')]
|
||||
|
||||
def print_version(self, url):
|
||||
raw = self.browser.open(url).read()
|
||||
soup = BeautifulSoup(raw.decode('utf8', 'replace'))
|
||||
print_link = soup.find('a', {'title':'Print this page'})
|
||||
if print_link is None:
|
||||
return url
|
||||
return 'http://www.linux-magazine.com'+print_link['href']
|
||||
|
Loading…
x
Reference in New Issue
Block a user