Sync to trunk.

This commit is contained in:
John Schember 2009-07-08 06:26:31 -04:00
commit ccdc693b97
29 changed files with 367 additions and 118 deletions

View File

@ -308,14 +308,25 @@ def walk(dir):
yield os.path.join(record[0], f) yield os.path.join(record[0], f)
def strftime(fmt, t=None): def strftime(fmt, t=None):
''' A version of strtime that returns unicode strings. ''' ''' A version of strtime that returns unicode strings and tries to handle dates
before 1900 '''
if t is None: if t is None:
t = time.localtime() t = time.localtime()
early_year = t[0] < 1900
if early_year:
fmt = fmt.replace('%Y', '_early year hack##')
t = list(t)
orig_year = t[0]
t[0] = 1900
ans = None
if iswindows: if iswindows:
if isinstance(fmt, unicode): if isinstance(fmt, unicode):
fmt = fmt.encode('mbcs') fmt = fmt.encode('mbcs')
return plugins['winutil'][0].strftime(fmt, t) ans = plugins['winutil'][0].strftime(fmt, t)
return time.strftime(fmt, t).decode(preferred_encoding, 'replace') ans = time.strftime(fmt, t).decode(preferred_encoding, 'replace')
if early_year:
ans = ans.replace('_early year hack##', str(orig_year))
return ans
def my_unichr(num): def my_unichr(num):
try: try:

View File

@ -155,6 +155,9 @@ class OutputProfile(Plugin):
# The image size for comics # The image size for comics
comic_screen_size = (584, 754) comic_screen_size = (584, 754)
# If True the MOBI renderer on the device supports MOBI indexing
supports_mobi_indexing = False
@classmethod @classmethod
def tags_to_string(cls, tags): def tags_to_string(cls, tags):
return ', '.join(tags) return ', '.join(tags)
@ -254,6 +257,7 @@ class KindleOutput(OutputProfile):
dpi = 168.451 dpi = 168.451
fbase = 16 fbase = 16
fsizes = [12, 12, 14, 16, 18, 20, 22, 24] fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
supports_mobi_indexing = True
@classmethod @classmethod
def tags_to_string(cls, tags): def tags_to_string(cls, tags):
@ -269,6 +273,7 @@ class KindleDXOutput(OutputProfile):
screen_size = (744, 1022) screen_size = (744, 1022)
dpi = 150.0 dpi = 150.0
comic_screen_size = (741, 1022) comic_screen_size = (741, 1022)
supports_mobi_indexing = True
@classmethod @classmethod
def tags_to_string(cls, tags): def tags_to_string(cls, tags):

View File

@ -563,6 +563,8 @@ OptionRecommendation(name='list_recipes',
break break
self.read_user_metadata() self.read_user_metadata()
self.opts.no_inline_navbars = self.opts.output_profile.supports_mobi_indexing \
and self.output_fmt == 'mobi'
def flush(self): def flush(self):
try: try:

View File

@ -80,6 +80,8 @@ class EPUBOutput(OutputFormatPlugin):
]) ])
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
TITLEPAGE_COVER = '''\ TITLEPAGE_COVER = '''\
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
@ -134,6 +136,21 @@ class EPUBOutput(OutputFormatPlugin):
</body> </body>
</html> </html>
''' '''
def workaround_webkit_quirks(self):
from calibre.ebooks.oeb.base import XPath
for x in self.oeb.spine:
root = x.data
body = XPath('//h:body')(root)
if body:
body = body[0]
if not hasattr(body, 'xpath'):
continue
for pre in XPath('//h:pre')(body):
if not pre.text and len(pre) == 0:
pre.tag = 'div'
def convert(self, oeb, output_path, input_plugin, opts, log): def convert(self, oeb, output_path, input_plugin, opts, log):
self.log, self.opts, self.oeb = log, opts, oeb self.log, self.opts, self.oeb = log, opts, oeb
@ -146,6 +163,7 @@ class EPUBOutput(OutputFormatPlugin):
self.workaround_ade_quirks() self.workaround_ade_quirks()
self.workaround_webkit_quirks()
from calibre.ebooks.oeb.transforms.rescale import RescaleImages from calibre.ebooks.oeb.transforms.rescale import RescaleImages
RescaleImages()(oeb, opts) RescaleImages()(oeb, opts)

View File

@ -29,7 +29,7 @@ class LRFOptions(object):
self.use_metadata_cover = True self.use_metadata_cover = True
self.output = output self.output = output
self.ignore_tables = opts.linearize_tables self.ignore_tables = opts.linearize_tables
self.base_font_size = 0 self.base_font_size = opts.base_font_size
self.blank_after_para = opts.insert_blank_line self.blank_after_para = opts.insert_blank_line
self.use_spine = True self.use_spine = True
self.font_delta = 0 self.font_delta = 0

View File

@ -367,7 +367,7 @@ class MetaInformation(object):
if self.pubdate is not None: if self.pubdate is not None:
ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))] ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
if self.rights is not None: if self.rights is not None:
ans += [(_('Rights'), unicode(self.rights.isoformat(' ')))] ans += [(_('Rights'), unicode(self.rights))]
for i, x in enumerate(ans): for i, x in enumerate(ans):
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
return u'<table>%s</table>'%u'\n'.join(ans) return u'<table>%s</table>'%u'\n'.join(ans)

View File

@ -31,7 +31,7 @@ def metadata_from_formats(formats):
try: try:
return _metadata_from_formats(formats) return _metadata_from_formats(formats)
except: except:
mi = metadata_from_filename(formats[0]) mi = metadata_from_filename(list(formats)[0])
if not mi.authors: if not mi.authors:
mi.authors = [_('Unknown')] mi.authors = [_('Unknown')]
@ -126,14 +126,10 @@ def metadata_from_filename(name, pat=None):
mi.title = match.group('title') mi.title = match.group('title')
except IndexError: except IndexError:
pass pass
try:
mi.authors = [match.group('author')]
except IndexError:
pass
try: try:
au = match.group('authors') au = match.group('authors')
aus = string_to_authors(au) aus = string_to_authors(au)
mi.authors = authors mi.authors = aus
except IndexError: except IndexError:
pass pass
try: try:

View File

@ -452,9 +452,12 @@ class OPF(object):
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True): def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
if not hasattr(stream, 'read'): if not hasattr(stream, 'read'):
stream = open(stream, 'rb') stream = open(stream, 'rb')
raw = stream.read()
if not raw:
raise ValueError('Empty file: '+getattr(stream, 'name', 'stream'))
self.basedir = self.base_dir = basedir self.basedir = self.base_dir = basedir
self.path_to_html_toc = self.html_toc_fragment = None self.path_to_html_toc = self.html_toc_fragment = None
raw, self.encoding = xml_to_unicode(stream.read(), strip_encoding_pats=True, resolve_entities=True) raw, self.encoding = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)
raw = raw[raw.find('<'):] raw = raw[raw.find('<'):]
self.root = etree.fromstring(raw, self.PARSER) self.root = etree.fromstring(raw, self.PARSER)
self.metadata = self.metadata_path(self.root) self.metadata = self.metadata_path(self.root)

View File

@ -48,11 +48,7 @@ class MOBIOutput(OutputFormatPlugin):
self.opts.mobi_periodical = False self.opts.mobi_periodical = False
def check_for_masthead(self): def check_for_masthead(self):
found = False found = 'masthead' in self.oeb.guide
for typ in self.oeb.guide:
if type == 'masthead':
found = True
break
if not found: if not found:
self.oeb.log.debug('No masthead found, generating default one...') self.oeb.log.debug('No masthead found, generating default one...')
from calibre.resources import server_resources from calibre.resources import server_resources
@ -76,12 +72,14 @@ class MOBIOutput(OutputFormatPlugin):
from calibre.ebooks.oeb.base import TOC from calibre.ebooks.oeb.base import TOC
toc = self.oeb.toc toc = self.oeb.toc
if toc and toc[0].klass != 'periodical': if toc and toc[0].klass != 'periodical':
start_href = self.oeb.spine[0].href
self.log('Converting TOC for MOBI periodical indexing...') self.log('Converting TOC for MOBI periodical indexing...')
articles = {} articles = {}
if toc.depth < 3: if toc.depth() < 3:
sections = [TOC(klass='section')] sections = [TOC(klass='section', title=_('All articles'),
href=start_href)]
for x in toc: for x in toc:
sections[0].append(x) sections[0].nodes.append(x)
else: else:
sections = list(toc) sections = list(toc)
for x in sections: for x in sections:
@ -92,7 +90,7 @@ class MOBIOutput(OutputFormatPlugin):
a.klass = 'article' a.klass = 'article'
articles[id(sec)].append(a) articles[id(sec)].append(a)
sec.nodes.remove(a) sec.nodes.remove(a)
root = TOC(klass='periodical', root = TOC(klass='periodical', href=start_href,
title=unicode(self.oeb.metadata.title[0])) title=unicode(self.oeb.metadata.title[0]))
for s in sections: for s in sections:
if articles[id(s)]: if articles[id(s)]:

View File

@ -301,7 +301,7 @@ class MobiReader(object):
root = html.fromstring(self.processed_html) root = html.fromstring(self.processed_html)
if root.xpath('descendant::p/descendant::p'): if root.xpath('descendant::p/descendant::p'):
from lxml.html import soupparser from lxml.html import soupparser
self.log.warning('Malformed markup, parsing using BeatifulSoup') self.log.warning('Malformed markup, parsing using BeautifulSoup')
root = soupparser.fromstring(self.processed_html) root = soupparser.fromstring(self.processed_html)
if root.tag != 'html': if root.tag != 'html':
@ -439,7 +439,12 @@ class MobiReader(object):
self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>' self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
self.processed_html = self.processed_html.replace('\r\n', '\n') self.processed_html = self.processed_html.replace('\r\n', '\n')
self.processed_html = self.processed_html.replace('> <', '>\n<') self.processed_html = self.processed_html.replace('> <', '>\n<')
self.processed_html = re.sub('\x14|\x15', '', self.processed_html) self.processed_html = re.sub('\x14|\x15|\x1c|\x1d', '', self.processed_html)
def ensure_unit(self, raw, unit='px'):
if re.search(r'\d+$', raw) is not None:
raw += unit
return raw
def upshift_markup(self, root): def upshift_markup(self, root):
self.log.debug('Converting style information to CSS...') self.log.debug('Converting style information to CSS...')
@ -469,13 +474,13 @@ class MobiReader(object):
if attrib.has_key('height'): if attrib.has_key('height'):
height = attrib.pop('height').strip() height = attrib.pop('height').strip()
if height: if height:
styles.append('margin-top: %s' % height) styles.append('margin-top: %s' % self.ensure_unit(height))
if attrib.has_key('width'): if attrib.has_key('width'):
width = attrib.pop('width').strip() width = attrib.pop('width').strip()
if width: if width:
styles.append('text-indent: %s' % width) styles.append('text-indent: %s' % self.ensure_unit(width))
if width.startswith('-'): if width.startswith('-'):
styles.append('margin-left: %s' % (width[1:])) styles.append('margin-left: %s' % self.ensure_unit(width[1:]))
if attrib.has_key('align'): if attrib.has_key('align'):
align = attrib.pop('align').strip() align = attrib.pop('align').strip()
if align: if align:

View File

@ -379,7 +379,7 @@ class MobiWriter(object):
try: try:
self._generate_index() self._generate_index()
except: except:
self.oeb.log.exception('Failed to generate index') self._oeb.log.exception('Failed to generate index')
self._generate_images() self._generate_images()
@ -1178,40 +1178,29 @@ class MobiWriter(object):
''' '''
toc = self._oeb.toc toc = self._oeb.toc
nodes = list(toc.iter())[1:] nodes = list(toc.iter())[1:]
toc_conforms = True
for (i, child) in enumerate(nodes) : for (i, child) in enumerate(nodes) :
if self.opts.verbose > 3 : if child.klass == "periodical" and child.depth() != 3 or \
self._oeb.logger.info(" <title>: %-25.25s \tklass=%-15.15s \tdepth:%d playOrder=%03d" % \ child.klass == "section" and child.depth() != 2 or \
child.klass == "article" and child.depth() != 1 :
self._oeb.logger.warn('Nonconforming TOC entry: "%s" found at depth %d' % \
(child.klass, child.depth()) )
self._oeb.logger.warn(" <title>: '%-25.25s...' \t\tklass=%-15.15s \tdepth:%d \tplayOrder=%03d" % \
(child.title, child.klass, child.depth(), child.play_order) ) (child.title, child.klass, child.depth(), child.play_order) )
toc_conforms = False
if child.klass == "periodical" and child.depth() != 3 :
self._oeb.logger.info('<navPoint class="periodical"> found at depth %d, nonconforming TOC' % \
child.depth() )
return False
if child.klass == "section" and child.depth() != 2 :
self._oeb.logger.info('<navPoint class="section"> found at depth %d, nonconforming TOC' % \
child.depth() )
return False
if child.klass == "article" and child.depth() != 1 :
self._oeb.logger.info('<navPoint class="article"> found at depth %d, nonconforming TOC' % \
child.depth() )
return False
# We also need to know that we have a pubdate or timestamp in the metadata, which the Kindle needs # We also need to know that we have a pubdate or timestamp in the metadata, which the Kindle needs
if self._oeb.metadata['date'] == [] and self._oeb.metadata['timestamp'] == [] : if self._oeb.metadata['date'] == [] and self._oeb.metadata['timestamp'] == [] :
self._oeb.logger.info('metadata missing timestamp needed for periodical') self._oeb.logger.info('metadata missing date/timestamp')
return False toc_conforms = False
# Periodicals also need a mastheadImage in the manifest if not 'masthead' in self._oeb.guide :
has_mastheadImage = 'masthead' in self._oeb.guide self._oeb.logger.info('mastheadImage missing from manifest')
toc_conforms = False
if not has_mastheadImage : self._oeb.logger.info("%s" % " TOC structure conforms" if toc_conforms else " TOC structure non-conforming")
self._oeb.logger.info('mastheadImage missing from manifest, aborting periodical indexing') return toc_conforms
return False
self._oeb.logger.info('TOC structure and pubdate verified')
return True
def _generate_text(self): def _generate_text(self):
@ -1236,7 +1225,7 @@ class MobiWriter(object):
# Evaluate toc for conformance # Evaluate toc for conformance
if self.opts.mobi_periodical : if self.opts.mobi_periodical :
self._oeb.logger.info('--mobi-periodical specified, evaluating TOC for periodical conformance ...') self._oeb.logger.info(' MOBI periodical specified, evaluating TOC for periodical conformance ...')
self._conforming_periodical_toc = self._evaluate_periodical_toc() self._conforming_periodical_toc = self._evaluate_periodical_toc()
# This routine decides whether to build flat or structured based on self._conforming_periodical_toc # This routine decides whether to build flat or structured based on self._conforming_periodical_toc
@ -1271,6 +1260,7 @@ class MobiWriter(object):
while breaks and (breaks[0] - offset) < RECORD_SIZE: while breaks and (breaks[0] - offset) < RECORD_SIZE:
# .pop returns item, removes it from list # .pop returns item, removes it from list
pbreak = (breaks.pop(0) - running) >> 3 pbreak = (breaks.pop(0) - running) >> 3
if self.opts.verbose > 2 :
self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) ) self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )
encoded = decint(pbreak, DECINT_FORWARD) encoded = decint(pbreak, DECINT_FORWARD)
record.write(encoded) record.write(encoded)
@ -1384,7 +1374,7 @@ class MobiWriter(object):
# 0x002 MOBI book (chapter - chapter navigation) # 0x002 MOBI book (chapter - chapter navigation)
# 0x101 News - Hierarchical navigation with sections and articles # 0x101 News - Hierarchical navigation with sections and articles
# 0x102 News feed - Flat navigation # 0x102 News feed - Flat navigation
# 0x103 News magazine - same as 1x101 # 0x103 News magazine - same as 0x101
# 0xC - 0xF : Text encoding (65001 is utf-8) # 0xC - 0xF : Text encoding (65001 is utf-8)
# 0x10 - 0x13 : UID # 0x10 - 0x13 : UID
# 0x14 - 0x17 : Generator version # 0x14 - 0x17 : Generator version
@ -1545,7 +1535,7 @@ class MobiWriter(object):
exth.write(data) exth.write(data)
nrecs += 1 nrecs += 1
if term == 'rights' : if term == 'rights' :
rights = unicode(oeb.metadata.rights[0]) rights = unicode(oeb.metadata.rights[0]).encode('utf-8')
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8)) exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
exth.write(rights) exth.write(rights)
@ -1614,7 +1604,7 @@ class MobiWriter(object):
self._write(record) self._write(record)
def _generate_index(self): def _generate_index(self):
self._oeb.log('Generating primary index ...') self._oeb.log('Generating INDX ...')
self._primary_index_record = None self._primary_index_record = None
# Build the NCXEntries and INDX # Build the NCXEntries and INDX
@ -1953,6 +1943,8 @@ class MobiWriter(object):
first = False first = False
else : else :
self._oeb.logger.info('Generating flat CTOC ...') self._oeb.logger.info('Generating flat CTOC ...')
previousOffset = -1
currentOffset = 0
for (i, child) in enumerate(toc.iter()): for (i, child) in enumerate(toc.iter()):
# Only add chapters or articles at depth==1 # Only add chapters or articles at depth==1
# no class defaults to 'chapter' # no class defaults to 'chapter'
@ -1961,8 +1953,20 @@ class MobiWriter(object):
if self.opts.verbose > 2 : if self.opts.verbose > 2 :
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \ self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
(child.klass, child.depth(), child) ) (child.klass, child.depth(), child) )
# Test to see if this child's offset is the same as the previous child's
# offset, skip it
h = child.href
currentOffset = self._id_offsets[h]
# print "_generate_ctoc: child offset: 0x%X" % currentOffset
if currentOffset != previousOffset :
self._add_flat_ctoc_node(child, ctoc) self._add_flat_ctoc_node(child, ctoc)
reduced_toc.append(child) reduced_toc.append(child)
previousOffset = currentOffset
else :
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
first = False first = False
else : else :
if self.opts.verbose > 2 : if self.opts.verbose > 2 :

View File

@ -1468,7 +1468,9 @@ class TOC(object):
node.to_opf1(tour) node.to_opf1(tour)
return tour return tour
def to_ncx(self, parent): def to_ncx(self, parent=None):
if parent is None:
parent = etree.Element(NCX('navMap'))
for node in self.nodes: for node in self.nodes:
id = node.id or unicode(uuid.uuid4()) id = node.id or unicode(uuid.uuid4())
attrib = {'id': id, 'playOrder': str(node.play_order)} attrib = {'id': id, 'playOrder': str(node.play_order)}

View File

@ -9,6 +9,8 @@ from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin from calibre.customize.conversion import OutputFormatPlugin
from calibre import CurrentDir from calibre import CurrentDir
from calibre.customize.conversion import OptionRecommendation
from urllib import unquote from urllib import unquote
class OEBOutput(OutputFormatPlugin): class OEBOutput(OutputFormatPlugin):
@ -17,6 +19,9 @@ class OEBOutput(OutputFormatPlugin):
author = 'Kovid Goyal' author = 'Kovid Goyal'
file_type = 'oeb' file_type = 'oeb'
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
def convert(self, oeb_book, output_path, input_plugin, opts, log): def convert(self, oeb_book, output_path, input_plugin, opts, log):
self.log, self.opts = log, opts self.log, self.opts = log, opts
if not os.path.exists(output_path): if not os.path.exists(output_path):

View File

@ -169,7 +169,8 @@ class Stylizer(object):
if not matches and class_sel_pat.match(text): if not matches and class_sel_pat.match(text):
found = False found = False
for x in tree.xpath('//*[@class]'): for x in tree.xpath('//*[@class]'):
if text.lower().endswith('.'+x.get('class').lower()): if text.lower().endswith('.'+x.get('class').lower()) and \
text.lower() != text:
matches.append(x) matches.append(x)
found = True found = True
if found: if found:

View File

@ -34,7 +34,8 @@ class Clean(object):
for x in list(self.oeb.guide): for x in list(self.oeb.guide):
href = urldefrag(self.oeb.guide[x].href)[0] href = urldefrag(self.oeb.guide[x].href)[0]
if x.lower() not in ('cover', 'titlepage'): if x.lower() not in ('cover', 'titlepage', 'masthead', 'toc',
'title-page', 'copyright-page'):
self.oeb.guide.remove(x) self.oeb.guide.remove(x)

View File

@ -16,7 +16,7 @@ from lxml import etree
from lxml.cssselect import CSSSelector from lxml.cssselect import CSSSelector
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \ from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
urldefrag, rewrite_links, urlunquote, barename urldefrag, rewrite_links, urlunquote, barename, XHTML
from calibre.ebooks.epub import rules from calibre.ebooks.epub import rules
XPath = functools.partial(_XPath, namespaces=NAMESPACES) XPath = functools.partial(_XPath, namespaces=NAMESPACES)
@ -216,7 +216,25 @@ class FlowSplitter(object):
self.trees.append(before) self.trees.append(before)
tree = after tree = after
self.trees.append(tree) self.trees.append(tree)
self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())] trees, ids = [], set([])
for tree in self.trees:
root = tree.getroot()
if self.is_page_empty(root):
discarded_ids = root.xpath('//*[@id]')
for x in discarded_ids:
x = x.get('id')
if not x.startswith('calibre_'):
ids.add(x)
else:
if ids:
body = self.get_body(root)
if body is not None:
for x in ids:
body.insert(0, body.makeelement(XHTML('div'),
id=x, style='height:0pt'))
ids = set([])
trees.append(tree)
self.trees = trees
def get_body(self, root): def get_body(self, root):
body = root.xpath('//h:body', namespaces=NAMESPACES) body = root.xpath('//h:body', namespaces=NAMESPACES)

View File

@ -107,8 +107,6 @@ class Adder(QObject):
self.callback(self.paths, self.names, self.infos) self.callback(self.paths, self.names, self.infos)
self.callback_called = True self.callback_called = True
def update(self): def update(self):
if not self.ids: if not self.ids:
self.timer.stop() self.timer.stop()

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 306 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 553 B

View File

@ -657,6 +657,8 @@ class LibraryDatabase2(LibraryDatabase):
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
name = title + ' - ' + author name = title + ' - ' + author
while name.endswith('.'):
name = name[:-1]
return name return name
def rmtree(self, path): def rmtree(self, path):
@ -1074,6 +1076,8 @@ class LibraryDatabase2(LibraryDatabase):
self.set_isbn(id, mi.isbn, notify=False) self.set_isbn(id, mi.isbn, notify=False)
if mi.series_index: if mi.series_index:
self.set_series_index(id, mi.series_index, notify=False) self.set_series_index(id, mi.series_index, notify=False)
if mi.pubdate:
self.set_pubdate(id, mi.pubdate, notify=False)
if getattr(mi, 'timestamp', None) is not None: if getattr(mi, 'timestamp', None) is not None:
self.set_timestamp(id, mi.timestamp, notify=False) self.set_timestamp(id, mi.timestamp, notify=False)
self.set_path(id, True) self.set_path(id, True)
@ -1734,7 +1738,7 @@ books_series_link feeds
formats = self.find_books_in_directory(dirpath, True) formats = self.find_books_in_directory(dirpath, True)
if not formats: if not formats:
return return
formats = list(formats)
mi = metadata_from_formats(formats) mi = metadata_from_formats(formats)
if mi.title is None: if mi.title is None:
return return

View File

@ -366,10 +366,9 @@ class LibraryServer(object):
@expose @expose
def index(self, **kwargs): def index(self, **kwargs):
'The / URL' 'The / URL'
stanza = cherrypy.request.headers.get('Stanza-Device-Name', 919) want_opds = cherrypy.request.headers.get('Stanza-Device-Name', 919) != \
if stanza == 919: 919 or cherrypy.request.headers.get('Want-OPDS-Catalog', 919) != 919
return self.static('index.html') return self.stanza() if want_opds else self.static('index.html')
return self.stanza()
@expose @expose

View File

@ -469,6 +469,7 @@ class BasicNewsRecipe(Recipe):
self.username = options.username self.username = options.username
self.password = options.password self.password = options.password
self.lrf = options.lrf self.lrf = options.lrf
self.include_navbars = not options.no_inline_navbars
self.output_dir = os.path.abspath(self.output_dir) self.output_dir = os.path.abspath(self.output_dir)
if options.test: if options.test:
@ -539,7 +540,7 @@ class BasicNewsRecipe(Recipe):
if first_fetch and job_info: if first_fetch and job_info:
url, f, a, feed_len = job_info url, f, a, feed_len = job_info
body = soup.find('body') body = soup.find('body')
if body is not None: if body is not None and self.include_navbars:
templ = self.navbar.generate(False, f, a, feed_len, templ = self.navbar.generate(False, f, a, feed_len,
not self.has_single_feed, not self.has_single_feed,
url, __appname__, url, __appname__,
@ -907,6 +908,7 @@ class BasicNewsRecipe(Recipe):
body = soup.find('body') body = soup.find('body')
if body is not None: if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
if self.include_navbars:
templ = self.navbar.generate(True, num, j, len(f), templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed, not self.has_single_feed,
a.orig_url, __appname__, prefix=prefix, a.orig_url, __appname__, prefix=prefix,
@ -923,7 +925,7 @@ class BasicNewsRecipe(Recipe):
if po is None: if po is None:
self.play_order_counter += 1 self.play_order_counter += 1
po = self.play_order_counter po = self.play_order_counter
desc = f.description desc = getattr(f, 'description', None)
if not desc: if not desc:
desc = None desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None, feed_index(i, toc.add_item('feed_%d/index.html'%i, None,

View File

@ -51,9 +51,11 @@ recipe_modules = ['recipe_' + r for r in (
'theeconomictimes_india', '7dias', 'buenosaireseconomico', 'theeconomictimes_india', '7dias', 'buenosaireseconomico',
'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres', 'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres',
'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate', 'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate',
'fastcompany', 'accountancyage', 'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna',
'eltiempo_hn',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os
from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup

View File

@ -0,0 +1,52 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.tiempo.hn
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class ElTiempoHn(BasicNewsRecipe):
title = 'El Tiempo - Honduras'
__author__ = 'Darko Miletic'
description = 'Noticias de Honduras y mundo'
publisher = 'El Tiempo'
category = 'news, politics, Honduras'
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
language = _('Spanish')
lang = 'es-HN'
direction = 'ltr'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} img {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em}"'
remove_tags = [dict(name=['form','object','embed','base'])]
keep_only_tags = [dict(name='td' , attrs={'id':'mainbodycont'})]
feeds = [(u'Noticias', u'http://www.tiempo.hn/index.php?format=feed&type=rss')]
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.laprensahn.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class LaPrensaHn(BasicNewsRecipe):
title = 'La Prensa - Honduras'
__author__ = 'Darko Miletic'
description = 'Noticias de Honduras y mundo'
publisher = 'La Prensa'
category = 'news, politics, Honduras'
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
language = _('Spanish')
lang = 'es-HN'
direction = 'ltr'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
remove_tags = [dict(name=['form','object','embed'])]
keep_only_tags = [
dict(name='h1' , attrs={'class':'titulo1'})
,dict(name='div', attrs={'class':['sumario11','hora','texto']})
]
feeds = [(u'Noticias', u'http://feeds.feedburner.com/laprensa_titulares')]
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.latribuna.hn
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class LaTribuna(BasicNewsRecipe):
title = 'La Tribuna - Honduras'
__author__ = 'Darko Miletic'
description = 'Noticias de Honduras y mundo'
publisher = 'La Tribuna'
category = 'news, politics, Honduras'
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
language = _('Spanish')
lang = 'es-HN'
direction = 'ltr'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
remove_tags = [dict(name=['form','object','embed'])]
keep_only_tags = [
dict(name='p', attrs={'id':['BlogTitle','BlogDate']})
,dict(name='div', attrs={'id':'BlogContent'})
]
feeds = [(u'Noticias', u'http://www.latribuna.hn/web2.0/?feed=rss')]
def print_version(self, url):
return url + '&print=1'
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup('http://www.latribuna.hn/web2.0/')
cover_item = soup.find('div',attrs={'class':'portada_impresa'})
if cover_item:
cover_url = cover_item.a.img['src']
return cover_url

View File

@ -53,6 +53,10 @@ class WallStreetJournal(BasicNewsRecipe):
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
for tag in soup.findAll(name=['table', 'tr', 'td']): for tag in soup.findAll(name=['table', 'tr', 'td']):
tag.name = 'div' tag.name = 'div'
for tag in soup.findAll('div', dict(id=["articleImage_1", "articleImage_2", "articleImage_3", "articleImage_4", "articleImage_5", "articleImage_6", "articleImage_7"])):
tag.extract()
return soup return soup
def get_article_url(self, article): def get_article_url(self, article):