mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
ccdc693b97
@ -308,14 +308,25 @@ def walk(dir):
|
|||||||
yield os.path.join(record[0], f)
|
yield os.path.join(record[0], f)
|
||||||
|
|
||||||
def strftime(fmt, t=None):
|
def strftime(fmt, t=None):
|
||||||
''' A version of strtime that returns unicode strings. '''
|
''' A version of strtime that returns unicode strings and tries to handle dates
|
||||||
|
before 1900 '''
|
||||||
if t is None:
|
if t is None:
|
||||||
t = time.localtime()
|
t = time.localtime()
|
||||||
|
early_year = t[0] < 1900
|
||||||
|
if early_year:
|
||||||
|
fmt = fmt.replace('%Y', '_early year hack##')
|
||||||
|
t = list(t)
|
||||||
|
orig_year = t[0]
|
||||||
|
t[0] = 1900
|
||||||
|
ans = None
|
||||||
if iswindows:
|
if iswindows:
|
||||||
if isinstance(fmt, unicode):
|
if isinstance(fmt, unicode):
|
||||||
fmt = fmt.encode('mbcs')
|
fmt = fmt.encode('mbcs')
|
||||||
return plugins['winutil'][0].strftime(fmt, t)
|
ans = plugins['winutil'][0].strftime(fmt, t)
|
||||||
return time.strftime(fmt, t).decode(preferred_encoding, 'replace')
|
ans = time.strftime(fmt, t).decode(preferred_encoding, 'replace')
|
||||||
|
if early_year:
|
||||||
|
ans = ans.replace('_early year hack##', str(orig_year))
|
||||||
|
return ans
|
||||||
|
|
||||||
def my_unichr(num):
|
def my_unichr(num):
|
||||||
try:
|
try:
|
||||||
|
@ -155,6 +155,9 @@ class OutputProfile(Plugin):
|
|||||||
# The image size for comics
|
# The image size for comics
|
||||||
comic_screen_size = (584, 754)
|
comic_screen_size = (584, 754)
|
||||||
|
|
||||||
|
# If True the MOBI renderer on the device supports MOBI indexing
|
||||||
|
supports_mobi_indexing = False
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def tags_to_string(cls, tags):
|
def tags_to_string(cls, tags):
|
||||||
return ', '.join(tags)
|
return ', '.join(tags)
|
||||||
@ -254,6 +257,7 @@ class KindleOutput(OutputProfile):
|
|||||||
dpi = 168.451
|
dpi = 168.451
|
||||||
fbase = 16
|
fbase = 16
|
||||||
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
|
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
|
||||||
|
supports_mobi_indexing = True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def tags_to_string(cls, tags):
|
def tags_to_string(cls, tags):
|
||||||
@ -269,6 +273,7 @@ class KindleDXOutput(OutputProfile):
|
|||||||
screen_size = (744, 1022)
|
screen_size = (744, 1022)
|
||||||
dpi = 150.0
|
dpi = 150.0
|
||||||
comic_screen_size = (741, 1022)
|
comic_screen_size = (741, 1022)
|
||||||
|
supports_mobi_indexing = True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def tags_to_string(cls, tags):
|
def tags_to_string(cls, tags):
|
||||||
|
@ -563,6 +563,8 @@ OptionRecommendation(name='list_recipes',
|
|||||||
break
|
break
|
||||||
|
|
||||||
self.read_user_metadata()
|
self.read_user_metadata()
|
||||||
|
self.opts.no_inline_navbars = self.opts.output_profile.supports_mobi_indexing \
|
||||||
|
and self.output_fmt == 'mobi'
|
||||||
|
|
||||||
def flush(self):
|
def flush(self):
|
||||||
try:
|
try:
|
||||||
|
@ -80,6 +80,8 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
])
|
])
|
||||||
|
|
||||||
|
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
|
||||||
|
|
||||||
|
|
||||||
TITLEPAGE_COVER = '''\
|
TITLEPAGE_COVER = '''\
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||||
@ -134,6 +136,21 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
'''
|
'''
|
||||||
|
def workaround_webkit_quirks(self):
|
||||||
|
from calibre.ebooks.oeb.base import XPath
|
||||||
|
for x in self.oeb.spine:
|
||||||
|
root = x.data
|
||||||
|
body = XPath('//h:body')(root)
|
||||||
|
if body:
|
||||||
|
body = body[0]
|
||||||
|
|
||||||
|
if not hasattr(body, 'xpath'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for pre in XPath('//h:pre')(body):
|
||||||
|
if not pre.text and len(pre) == 0:
|
||||||
|
pre.tag = 'div'
|
||||||
|
|
||||||
|
|
||||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||||
self.log, self.opts, self.oeb = log, opts, oeb
|
self.log, self.opts, self.oeb = log, opts, oeb
|
||||||
@ -146,6 +163,7 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
|
|
||||||
self.workaround_ade_quirks()
|
self.workaround_ade_quirks()
|
||||||
|
self.workaround_webkit_quirks()
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.rescale import RescaleImages
|
from calibre.ebooks.oeb.transforms.rescale import RescaleImages
|
||||||
RescaleImages()(oeb, opts)
|
RescaleImages()(oeb, opts)
|
||||||
|
@ -29,7 +29,7 @@ class LRFOptions(object):
|
|||||||
self.use_metadata_cover = True
|
self.use_metadata_cover = True
|
||||||
self.output = output
|
self.output = output
|
||||||
self.ignore_tables = opts.linearize_tables
|
self.ignore_tables = opts.linearize_tables
|
||||||
self.base_font_size = 0
|
self.base_font_size = opts.base_font_size
|
||||||
self.blank_after_para = opts.insert_blank_line
|
self.blank_after_para = opts.insert_blank_line
|
||||||
self.use_spine = True
|
self.use_spine = True
|
||||||
self.font_delta = 0
|
self.font_delta = 0
|
||||||
|
@ -367,7 +367,7 @@ class MetaInformation(object):
|
|||||||
if self.pubdate is not None:
|
if self.pubdate is not None:
|
||||||
ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
|
ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
|
||||||
if self.rights is not None:
|
if self.rights is not None:
|
||||||
ans += [(_('Rights'), unicode(self.rights.isoformat(' ')))]
|
ans += [(_('Rights'), unicode(self.rights))]
|
||||||
for i, x in enumerate(ans):
|
for i, x in enumerate(ans):
|
||||||
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
|
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
|
||||||
return u'<table>%s</table>'%u'\n'.join(ans)
|
return u'<table>%s</table>'%u'\n'.join(ans)
|
||||||
|
@ -31,7 +31,7 @@ def metadata_from_formats(formats):
|
|||||||
try:
|
try:
|
||||||
return _metadata_from_formats(formats)
|
return _metadata_from_formats(formats)
|
||||||
except:
|
except:
|
||||||
mi = metadata_from_filename(formats[0])
|
mi = metadata_from_filename(list(formats)[0])
|
||||||
if not mi.authors:
|
if not mi.authors:
|
||||||
mi.authors = [_('Unknown')]
|
mi.authors = [_('Unknown')]
|
||||||
|
|
||||||
@ -126,14 +126,10 @@ def metadata_from_filename(name, pat=None):
|
|||||||
mi.title = match.group('title')
|
mi.title = match.group('title')
|
||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
try:
|
|
||||||
mi.authors = [match.group('author')]
|
|
||||||
except IndexError:
|
|
||||||
pass
|
|
||||||
try:
|
try:
|
||||||
au = match.group('authors')
|
au = match.group('authors')
|
||||||
aus = string_to_authors(au)
|
aus = string_to_authors(au)
|
||||||
mi.authors = authors
|
mi.authors = aus
|
||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
|
@ -452,9 +452,12 @@ class OPF(object):
|
|||||||
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
|
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
|
||||||
if not hasattr(stream, 'read'):
|
if not hasattr(stream, 'read'):
|
||||||
stream = open(stream, 'rb')
|
stream = open(stream, 'rb')
|
||||||
|
raw = stream.read()
|
||||||
|
if not raw:
|
||||||
|
raise ValueError('Empty file: '+getattr(stream, 'name', 'stream'))
|
||||||
self.basedir = self.base_dir = basedir
|
self.basedir = self.base_dir = basedir
|
||||||
self.path_to_html_toc = self.html_toc_fragment = None
|
self.path_to_html_toc = self.html_toc_fragment = None
|
||||||
raw, self.encoding = xml_to_unicode(stream.read(), strip_encoding_pats=True, resolve_entities=True)
|
raw, self.encoding = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)
|
||||||
raw = raw[raw.find('<'):]
|
raw = raw[raw.find('<'):]
|
||||||
self.root = etree.fromstring(raw, self.PARSER)
|
self.root = etree.fromstring(raw, self.PARSER)
|
||||||
self.metadata = self.metadata_path(self.root)
|
self.metadata = self.metadata_path(self.root)
|
||||||
|
@ -48,11 +48,7 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
self.opts.mobi_periodical = False
|
self.opts.mobi_periodical = False
|
||||||
|
|
||||||
def check_for_masthead(self):
|
def check_for_masthead(self):
|
||||||
found = False
|
found = 'masthead' in self.oeb.guide
|
||||||
for typ in self.oeb.guide:
|
|
||||||
if type == 'masthead':
|
|
||||||
found = True
|
|
||||||
break
|
|
||||||
if not found:
|
if not found:
|
||||||
self.oeb.log.debug('No masthead found, generating default one...')
|
self.oeb.log.debug('No masthead found, generating default one...')
|
||||||
from calibre.resources import server_resources
|
from calibre.resources import server_resources
|
||||||
@ -76,12 +72,14 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
from calibre.ebooks.oeb.base import TOC
|
from calibre.ebooks.oeb.base import TOC
|
||||||
toc = self.oeb.toc
|
toc = self.oeb.toc
|
||||||
if toc and toc[0].klass != 'periodical':
|
if toc and toc[0].klass != 'periodical':
|
||||||
|
start_href = self.oeb.spine[0].href
|
||||||
self.log('Converting TOC for MOBI periodical indexing...')
|
self.log('Converting TOC for MOBI periodical indexing...')
|
||||||
articles = {}
|
articles = {}
|
||||||
if toc.depth < 3:
|
if toc.depth() < 3:
|
||||||
sections = [TOC(klass='section')]
|
sections = [TOC(klass='section', title=_('All articles'),
|
||||||
|
href=start_href)]
|
||||||
for x in toc:
|
for x in toc:
|
||||||
sections[0].append(x)
|
sections[0].nodes.append(x)
|
||||||
else:
|
else:
|
||||||
sections = list(toc)
|
sections = list(toc)
|
||||||
for x in sections:
|
for x in sections:
|
||||||
@ -92,13 +90,13 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
a.klass = 'article'
|
a.klass = 'article'
|
||||||
articles[id(sec)].append(a)
|
articles[id(sec)].append(a)
|
||||||
sec.nodes.remove(a)
|
sec.nodes.remove(a)
|
||||||
root = TOC(klass='periodical',
|
root = TOC(klass='periodical', href=start_href,
|
||||||
title=unicode(self.oeb.metadata.title[0]))
|
title=unicode(self.oeb.metadata.title[0]))
|
||||||
for s in sections:
|
for s in sections:
|
||||||
if articles[id(s)]:
|
if articles[id(s)]:
|
||||||
for a in articles[id(s)]:
|
for a in articles[id(s)]:
|
||||||
s.nodes.append(a)
|
s.nodes.append(a)
|
||||||
root.nodes.append(s)
|
root.nodes.append(s)
|
||||||
|
|
||||||
for x in list(toc.nodes):
|
for x in list(toc.nodes):
|
||||||
toc.nodes.remove(x)
|
toc.nodes.remove(x)
|
||||||
|
@ -301,7 +301,7 @@ class MobiReader(object):
|
|||||||
root = html.fromstring(self.processed_html)
|
root = html.fromstring(self.processed_html)
|
||||||
if root.xpath('descendant::p/descendant::p'):
|
if root.xpath('descendant::p/descendant::p'):
|
||||||
from lxml.html import soupparser
|
from lxml.html import soupparser
|
||||||
self.log.warning('Malformed markup, parsing using BeatifulSoup')
|
self.log.warning('Malformed markup, parsing using BeautifulSoup')
|
||||||
root = soupparser.fromstring(self.processed_html)
|
root = soupparser.fromstring(self.processed_html)
|
||||||
|
|
||||||
if root.tag != 'html':
|
if root.tag != 'html':
|
||||||
@ -439,7 +439,12 @@ class MobiReader(object):
|
|||||||
self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
|
self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
|
||||||
self.processed_html = self.processed_html.replace('\r\n', '\n')
|
self.processed_html = self.processed_html.replace('\r\n', '\n')
|
||||||
self.processed_html = self.processed_html.replace('> <', '>\n<')
|
self.processed_html = self.processed_html.replace('> <', '>\n<')
|
||||||
self.processed_html = re.sub('\x14|\x15', '', self.processed_html)
|
self.processed_html = re.sub('\x14|\x15|\x1c|\x1d', '', self.processed_html)
|
||||||
|
|
||||||
|
def ensure_unit(self, raw, unit='px'):
|
||||||
|
if re.search(r'\d+$', raw) is not None:
|
||||||
|
raw += unit
|
||||||
|
return raw
|
||||||
|
|
||||||
def upshift_markup(self, root):
|
def upshift_markup(self, root):
|
||||||
self.log.debug('Converting style information to CSS...')
|
self.log.debug('Converting style information to CSS...')
|
||||||
@ -469,13 +474,13 @@ class MobiReader(object):
|
|||||||
if attrib.has_key('height'):
|
if attrib.has_key('height'):
|
||||||
height = attrib.pop('height').strip()
|
height = attrib.pop('height').strip()
|
||||||
if height:
|
if height:
|
||||||
styles.append('margin-top: %s' % height)
|
styles.append('margin-top: %s' % self.ensure_unit(height))
|
||||||
if attrib.has_key('width'):
|
if attrib.has_key('width'):
|
||||||
width = attrib.pop('width').strip()
|
width = attrib.pop('width').strip()
|
||||||
if width:
|
if width:
|
||||||
styles.append('text-indent: %s' % width)
|
styles.append('text-indent: %s' % self.ensure_unit(width))
|
||||||
if width.startswith('-'):
|
if width.startswith('-'):
|
||||||
styles.append('margin-left: %s' % (width[1:]))
|
styles.append('margin-left: %s' % self.ensure_unit(width[1:]))
|
||||||
if attrib.has_key('align'):
|
if attrib.has_key('align'):
|
||||||
align = attrib.pop('align').strip()
|
align = attrib.pop('align').strip()
|
||||||
if align:
|
if align:
|
||||||
|
@ -379,7 +379,7 @@ class MobiWriter(object):
|
|||||||
try:
|
try:
|
||||||
self._generate_index()
|
self._generate_index()
|
||||||
except:
|
except:
|
||||||
self.oeb.log.exception('Failed to generate index')
|
self._oeb.log.exception('Failed to generate index')
|
||||||
|
|
||||||
self._generate_images()
|
self._generate_images()
|
||||||
|
|
||||||
@ -461,7 +461,7 @@ class MobiWriter(object):
|
|||||||
|
|
||||||
h = child.href
|
h = child.href
|
||||||
if h not in self._id_offsets:
|
if h not in self._id_offsets:
|
||||||
self._oeb.log.warning('Could not find TOC entry "%s", aborting indexing ...'% child.title)
|
self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...'% child.title)
|
||||||
return False
|
return False
|
||||||
offset = self._id_offsets[h]
|
offset = self._id_offsets[h]
|
||||||
|
|
||||||
@ -573,7 +573,7 @@ class MobiWriter(object):
|
|||||||
# Entries continues with a stream of section+articles, section+articles ...
|
# Entries continues with a stream of section+articles, section+articles ...
|
||||||
h = child.href
|
h = child.href
|
||||||
if h not in self._id_offsets:
|
if h not in self._id_offsets:
|
||||||
self._oeb.log.warning('Could not find TOC entry "%s", aborting indexing ...'% child.title)
|
self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...'% child.title)
|
||||||
return False
|
return False
|
||||||
offset = self._id_offsets[h]
|
offset = self._id_offsets[h]
|
||||||
|
|
||||||
@ -1178,40 +1178,29 @@ class MobiWriter(object):
|
|||||||
'''
|
'''
|
||||||
toc = self._oeb.toc
|
toc = self._oeb.toc
|
||||||
nodes = list(toc.iter())[1:]
|
nodes = list(toc.iter())[1:]
|
||||||
|
toc_conforms = True
|
||||||
for (i, child) in enumerate(nodes) :
|
for (i, child) in enumerate(nodes) :
|
||||||
if self.opts.verbose > 3 :
|
if child.klass == "periodical" and child.depth() != 3 or \
|
||||||
self._oeb.logger.info(" <title>: %-25.25s \tklass=%-15.15s \tdepth:%d playOrder=%03d" % \
|
child.klass == "section" and child.depth() != 2 or \
|
||||||
(child.title, child.klass, child.depth(), child.play_order) )
|
child.klass == "article" and child.depth() != 1 :
|
||||||
|
|
||||||
if child.klass == "periodical" and child.depth() != 3 :
|
self._oeb.logger.warn('Nonconforming TOC entry: "%s" found at depth %d' % \
|
||||||
self._oeb.logger.info('<navPoint class="periodical"> found at depth %d, nonconforming TOC' % \
|
(child.klass, child.depth()) )
|
||||||
child.depth() )
|
self._oeb.logger.warn(" <title>: '%-25.25s...' \t\tklass=%-15.15s \tdepth:%d \tplayOrder=%03d" % \
|
||||||
return False
|
(child.title, child.klass, child.depth(), child.play_order) )
|
||||||
|
toc_conforms = False
|
||||||
if child.klass == "section" and child.depth() != 2 :
|
|
||||||
self._oeb.logger.info('<navPoint class="section"> found at depth %d, nonconforming TOC' % \
|
|
||||||
child.depth() )
|
|
||||||
return False
|
|
||||||
|
|
||||||
if child.klass == "article" and child.depth() != 1 :
|
|
||||||
self._oeb.logger.info('<navPoint class="article"> found at depth %d, nonconforming TOC' % \
|
|
||||||
child.depth() )
|
|
||||||
return False
|
|
||||||
|
|
||||||
# We also need to know that we have a pubdate or timestamp in the metadata, which the Kindle needs
|
# We also need to know that we have a pubdate or timestamp in the metadata, which the Kindle needs
|
||||||
if self._oeb.metadata['date'] == [] and self._oeb.metadata['timestamp'] == [] :
|
if self._oeb.metadata['date'] == [] and self._oeb.metadata['timestamp'] == [] :
|
||||||
self._oeb.logger.info('metadata missing timestamp needed for periodical')
|
self._oeb.logger.info('metadata missing date/timestamp')
|
||||||
return False
|
toc_conforms = False
|
||||||
|
|
||||||
# Periodicals also need a mastheadImage in the manifest
|
if not 'masthead' in self._oeb.guide :
|
||||||
has_mastheadImage = 'masthead' in self._oeb.guide
|
self._oeb.logger.info('mastheadImage missing from manifest')
|
||||||
|
toc_conforms = False
|
||||||
|
|
||||||
if not has_mastheadImage :
|
self._oeb.logger.info("%s" % " TOC structure conforms" if toc_conforms else " TOC structure non-conforming")
|
||||||
self._oeb.logger.info('mastheadImage missing from manifest, aborting periodical indexing')
|
return toc_conforms
|
||||||
return False
|
|
||||||
|
|
||||||
self._oeb.logger.info('TOC structure and pubdate verified')
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def _generate_text(self):
|
def _generate_text(self):
|
||||||
@ -1231,12 +1220,12 @@ class MobiWriter(object):
|
|||||||
offset = 0
|
offset = 0
|
||||||
|
|
||||||
if self._compression != UNCOMPRESSED:
|
if self._compression != UNCOMPRESSED:
|
||||||
self._oeb.logger.info('Compressing markup content...')
|
self._oeb.logger.info(' Compressing markup content...')
|
||||||
data, overlap = self._read_text_record(text)
|
data, overlap = self._read_text_record(text)
|
||||||
|
|
||||||
# Evaluate toc for conformance
|
# Evaluate toc for conformance
|
||||||
if self.opts.mobi_periodical :
|
if self.opts.mobi_periodical :
|
||||||
self._oeb.logger.info('--mobi-periodical specified, evaluating TOC for periodical conformance ...')
|
self._oeb.logger.info(' MOBI periodical specified, evaluating TOC for periodical conformance ...')
|
||||||
self._conforming_periodical_toc = self._evaluate_periodical_toc()
|
self._conforming_periodical_toc = self._evaluate_periodical_toc()
|
||||||
|
|
||||||
# This routine decides whether to build flat or structured based on self._conforming_periodical_toc
|
# This routine decides whether to build flat or structured based on self._conforming_periodical_toc
|
||||||
@ -1249,11 +1238,11 @@ class MobiWriter(object):
|
|||||||
if len(entries) :
|
if len(entries) :
|
||||||
self._indexable = self._generate_indexed_navpoints()
|
self._indexable = self._generate_indexed_navpoints()
|
||||||
else :
|
else :
|
||||||
self._oeb.logger.info('No entries found in TOC ...')
|
self._oeb.logger.info(' No entries found in TOC ...')
|
||||||
self._indexable = False
|
self._indexable = False
|
||||||
|
|
||||||
if not self._indexable :
|
if not self._indexable :
|
||||||
self._oeb.logger.info('Writing unindexed mobi ...')
|
self._oeb.logger.info(' Writing unindexed mobi ...')
|
||||||
|
|
||||||
while len(data) > 0:
|
while len(data) > 0:
|
||||||
if self._compression == PALMDOC:
|
if self._compression == PALMDOC:
|
||||||
@ -1271,7 +1260,8 @@ class MobiWriter(object):
|
|||||||
while breaks and (breaks[0] - offset) < RECORD_SIZE:
|
while breaks and (breaks[0] - offset) < RECORD_SIZE:
|
||||||
# .pop returns item, removes it from list
|
# .pop returns item, removes it from list
|
||||||
pbreak = (breaks.pop(0) - running) >> 3
|
pbreak = (breaks.pop(0) - running) >> 3
|
||||||
self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )
|
if self.opts.verbose > 2 :
|
||||||
|
self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )
|
||||||
encoded = decint(pbreak, DECINT_FORWARD)
|
encoded = decint(pbreak, DECINT_FORWARD)
|
||||||
record.write(encoded)
|
record.write(encoded)
|
||||||
running += pbreak << 3
|
running += pbreak << 3
|
||||||
@ -1384,7 +1374,7 @@ class MobiWriter(object):
|
|||||||
# 0x002 MOBI book (chapter - chapter navigation)
|
# 0x002 MOBI book (chapter - chapter navigation)
|
||||||
# 0x101 News - Hierarchical navigation with sections and articles
|
# 0x101 News - Hierarchical navigation with sections and articles
|
||||||
# 0x102 News feed - Flat navigation
|
# 0x102 News feed - Flat navigation
|
||||||
# 0x103 News magazine - same as 1x101
|
# 0x103 News magazine - same as 0x101
|
||||||
# 0xC - 0xF : Text encoding (65001 is utf-8)
|
# 0xC - 0xF : Text encoding (65001 is utf-8)
|
||||||
# 0x10 - 0x13 : UID
|
# 0x10 - 0x13 : UID
|
||||||
# 0x14 - 0x17 : Generator version
|
# 0x14 - 0x17 : Generator version
|
||||||
@ -1545,7 +1535,7 @@ class MobiWriter(object):
|
|||||||
exth.write(data)
|
exth.write(data)
|
||||||
nrecs += 1
|
nrecs += 1
|
||||||
if term == 'rights' :
|
if term == 'rights' :
|
||||||
rights = unicode(oeb.metadata.rights[0])
|
rights = unicode(oeb.metadata.rights[0]).encode('utf-8')
|
||||||
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
|
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
|
||||||
exth.write(rights)
|
exth.write(rights)
|
||||||
|
|
||||||
@ -1614,7 +1604,7 @@ class MobiWriter(object):
|
|||||||
self._write(record)
|
self._write(record)
|
||||||
|
|
||||||
def _generate_index(self):
|
def _generate_index(self):
|
||||||
self._oeb.log('Generating primary index ...')
|
self._oeb.log('Generating INDX ...')
|
||||||
self._primary_index_record = None
|
self._primary_index_record = None
|
||||||
|
|
||||||
# Build the NCXEntries and INDX
|
# Build the NCXEntries and INDX
|
||||||
@ -1917,18 +1907,18 @@ class MobiWriter(object):
|
|||||||
self._ctoc_map.append(ctoc_name_map)
|
self._ctoc_map.append(ctoc_name_map)
|
||||||
|
|
||||||
def _generate_ctoc(self):
|
def _generate_ctoc(self):
|
||||||
# Generate the compiled TOC strings
|
# Generate the compiled TOC strings
|
||||||
# Each node has 1-4 CTOC entries:
|
# Each node has 1-4 CTOC entries:
|
||||||
# Periodical (0xDF)
|
# Periodical (0xDF)
|
||||||
# title, class
|
# title, class
|
||||||
# Section (0xFF)
|
# Section (0xFF)
|
||||||
# title, class
|
# title, class
|
||||||
# Article (0x3F)
|
# Article (0x3F)
|
||||||
# title, class, description, author
|
# title, class, description, author
|
||||||
# Chapter (0x0F)
|
# Chapter (0x0F)
|
||||||
# title, class
|
# title, class
|
||||||
# nb: Chapters don't actually have @class, so we synthesize it
|
# nb: Chapters don't actually have @class, so we synthesize it
|
||||||
# in reader._toc_from_navpoint
|
# in reader._toc_from_navpoint
|
||||||
|
|
||||||
toc = self._oeb.toc
|
toc = self._oeb.toc
|
||||||
reduced_toc = []
|
reduced_toc = []
|
||||||
@ -1953,6 +1943,8 @@ class MobiWriter(object):
|
|||||||
first = False
|
first = False
|
||||||
else :
|
else :
|
||||||
self._oeb.logger.info('Generating flat CTOC ...')
|
self._oeb.logger.info('Generating flat CTOC ...')
|
||||||
|
previousOffset = -1
|
||||||
|
currentOffset = 0
|
||||||
for (i, child) in enumerate(toc.iter()):
|
for (i, child) in enumerate(toc.iter()):
|
||||||
# Only add chapters or articles at depth==1
|
# Only add chapters or articles at depth==1
|
||||||
# no class defaults to 'chapter'
|
# no class defaults to 'chapter'
|
||||||
@ -1961,8 +1953,20 @@ class MobiWriter(object):
|
|||||||
if self.opts.verbose > 2 :
|
if self.opts.verbose > 2 :
|
||||||
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
|
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
|
||||||
(child.klass, child.depth(), child) )
|
(child.klass, child.depth(), child) )
|
||||||
self._add_flat_ctoc_node(child, ctoc)
|
|
||||||
reduced_toc.append(child)
|
# Test to see if this child's offset is the same as the previous child's
|
||||||
|
# offset, skip it
|
||||||
|
h = child.href
|
||||||
|
currentOffset = self._id_offsets[h]
|
||||||
|
# print "_generate_ctoc: child offset: 0x%X" % currentOffset
|
||||||
|
|
||||||
|
if currentOffset != previousOffset :
|
||||||
|
self._add_flat_ctoc_node(child, ctoc)
|
||||||
|
reduced_toc.append(child)
|
||||||
|
previousOffset = currentOffset
|
||||||
|
else :
|
||||||
|
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
|
||||||
|
|
||||||
first = False
|
first = False
|
||||||
else :
|
else :
|
||||||
if self.opts.verbose > 2 :
|
if self.opts.verbose > 2 :
|
||||||
@ -2027,7 +2031,7 @@ class MobiWriter(object):
|
|||||||
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
|
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
|
||||||
name = "%04X"%count
|
name = "%04X"%count
|
||||||
indxt.write(chr(len(name)) + name) # Write the name
|
indxt.write(chr(len(name)) + name) # Write the name
|
||||||
indxt.write(INDXT['section']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
|
indxt.write(INDXT['section']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
|
||||||
indxt.write(chr(0)) # subType 0
|
indxt.write(chr(0)) # subType 0
|
||||||
indxt.write(decint(offset, DECINT_FORWARD)) # offset
|
indxt.write(decint(offset, DECINT_FORWARD)) # offset
|
||||||
indxt.write(decint(length, DECINT_FORWARD)) # length
|
indxt.write(decint(length, DECINT_FORWARD)) # length
|
||||||
@ -2045,7 +2049,7 @@ class MobiWriter(object):
|
|||||||
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
|
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
|
||||||
name = "%04X"%count
|
name = "%04X"%count
|
||||||
indxt.write(chr(len(name)) + name) # Write the name
|
indxt.write(chr(len(name)) + name) # Write the name
|
||||||
indxt.write(INDXT['article']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
|
indxt.write(INDXT['article']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
|
||||||
|
|
||||||
hasAuthor = True if self._ctoc_map[index]['authorOffset'] else False
|
hasAuthor = True if self._ctoc_map[index]['authorOffset'] else False
|
||||||
hasDescription = True if self._ctoc_map[index]['descriptionOffset'] else False
|
hasDescription = True if self._ctoc_map[index]['descriptionOffset'] else False
|
||||||
|
@ -1468,7 +1468,9 @@ class TOC(object):
|
|||||||
node.to_opf1(tour)
|
node.to_opf1(tour)
|
||||||
return tour
|
return tour
|
||||||
|
|
||||||
def to_ncx(self, parent):
|
def to_ncx(self, parent=None):
|
||||||
|
if parent is None:
|
||||||
|
parent = etree.Element(NCX('navMap'))
|
||||||
for node in self.nodes:
|
for node in self.nodes:
|
||||||
id = node.id or unicode(uuid.uuid4())
|
id = node.id or unicode(uuid.uuid4())
|
||||||
attrib = {'id': id, 'playOrder': str(node.play_order)}
|
attrib = {'id': id, 'playOrder': str(node.play_order)}
|
||||||
|
@ -9,6 +9,8 @@ from lxml import etree
|
|||||||
|
|
||||||
from calibre.customize.conversion import OutputFormatPlugin
|
from calibre.customize.conversion import OutputFormatPlugin
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
|
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
|
|
||||||
class OEBOutput(OutputFormatPlugin):
|
class OEBOutput(OutputFormatPlugin):
|
||||||
@ -17,6 +19,9 @@ class OEBOutput(OutputFormatPlugin):
|
|||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
file_type = 'oeb'
|
file_type = 'oeb'
|
||||||
|
|
||||||
|
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
|
||||||
|
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
self.log, self.opts = log, opts
|
self.log, self.opts = log, opts
|
||||||
if not os.path.exists(output_path):
|
if not os.path.exists(output_path):
|
||||||
|
@ -169,7 +169,8 @@ class Stylizer(object):
|
|||||||
if not matches and class_sel_pat.match(text):
|
if not matches and class_sel_pat.match(text):
|
||||||
found = False
|
found = False
|
||||||
for x in tree.xpath('//*[@class]'):
|
for x in tree.xpath('//*[@class]'):
|
||||||
if text.lower().endswith('.'+x.get('class').lower()):
|
if text.lower().endswith('.'+x.get('class').lower()) and \
|
||||||
|
text.lower() != text:
|
||||||
matches.append(x)
|
matches.append(x)
|
||||||
found = True
|
found = True
|
||||||
if found:
|
if found:
|
||||||
|
@ -34,7 +34,8 @@ class Clean(object):
|
|||||||
|
|
||||||
for x in list(self.oeb.guide):
|
for x in list(self.oeb.guide):
|
||||||
href = urldefrag(self.oeb.guide[x].href)[0]
|
href = urldefrag(self.oeb.guide[x].href)[0]
|
||||||
if x.lower() not in ('cover', 'titlepage'):
|
if x.lower() not in ('cover', 'titlepage', 'masthead', 'toc',
|
||||||
|
'title-page', 'copyright-page'):
|
||||||
self.oeb.guide.remove(x)
|
self.oeb.guide.remove(x)
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ from lxml import etree
|
|||||||
from lxml.cssselect import CSSSelector
|
from lxml.cssselect import CSSSelector
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
|
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
|
||||||
urldefrag, rewrite_links, urlunquote, barename
|
urldefrag, rewrite_links, urlunquote, barename, XHTML
|
||||||
from calibre.ebooks.epub import rules
|
from calibre.ebooks.epub import rules
|
||||||
|
|
||||||
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
|
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
|
||||||
@ -216,7 +216,25 @@ class FlowSplitter(object):
|
|||||||
self.trees.append(before)
|
self.trees.append(before)
|
||||||
tree = after
|
tree = after
|
||||||
self.trees.append(tree)
|
self.trees.append(tree)
|
||||||
self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
|
trees, ids = [], set([])
|
||||||
|
for tree in self.trees:
|
||||||
|
root = tree.getroot()
|
||||||
|
if self.is_page_empty(root):
|
||||||
|
discarded_ids = root.xpath('//*[@id]')
|
||||||
|
for x in discarded_ids:
|
||||||
|
x = x.get('id')
|
||||||
|
if not x.startswith('calibre_'):
|
||||||
|
ids.add(x)
|
||||||
|
else:
|
||||||
|
if ids:
|
||||||
|
body = self.get_body(root)
|
||||||
|
if body is not None:
|
||||||
|
for x in ids:
|
||||||
|
body.insert(0, body.makeelement(XHTML('div'),
|
||||||
|
id=x, style='height:0pt'))
|
||||||
|
ids = set([])
|
||||||
|
trees.append(tree)
|
||||||
|
self.trees = trees
|
||||||
|
|
||||||
def get_body(self, root):
|
def get_body(self, root):
|
||||||
body = root.xpath('//h:body', namespaces=NAMESPACES)
|
body = root.xpath('//h:body', namespaces=NAMESPACES)
|
||||||
|
@ -107,8 +107,6 @@ class Adder(QObject):
|
|||||||
self.callback(self.paths, self.names, self.infos)
|
self.callback(self.paths, self.names, self.infos)
|
||||||
self.callback_called = True
|
self.callback_called = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def update(self):
|
def update(self):
|
||||||
if not self.ids:
|
if not self.ids:
|
||||||
self.timer.stop()
|
self.timer.stop()
|
||||||
|
BIN
src/calibre/gui2/images/news/eltiempo_hn.png
Normal file
BIN
src/calibre/gui2/images/news/eltiempo_hn.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.2 KiB |
BIN
src/calibre/gui2/images/news/laprensa_hn.png
Normal file
BIN
src/calibre/gui2/images/news/laprensa_hn.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 306 B |
BIN
src/calibre/gui2/images/news/latribuna.png
Normal file
BIN
src/calibre/gui2/images/news/latribuna.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 553 B |
@ -657,6 +657,8 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
||||||
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
||||||
name = title + ' - ' + author
|
name = title + ' - ' + author
|
||||||
|
while name.endswith('.'):
|
||||||
|
name = name[:-1]
|
||||||
return name
|
return name
|
||||||
|
|
||||||
def rmtree(self, path):
|
def rmtree(self, path):
|
||||||
@ -1074,6 +1076,8 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
self.set_isbn(id, mi.isbn, notify=False)
|
self.set_isbn(id, mi.isbn, notify=False)
|
||||||
if mi.series_index:
|
if mi.series_index:
|
||||||
self.set_series_index(id, mi.series_index, notify=False)
|
self.set_series_index(id, mi.series_index, notify=False)
|
||||||
|
if mi.pubdate:
|
||||||
|
self.set_pubdate(id, mi.pubdate, notify=False)
|
||||||
if getattr(mi, 'timestamp', None) is not None:
|
if getattr(mi, 'timestamp', None) is not None:
|
||||||
self.set_timestamp(id, mi.timestamp, notify=False)
|
self.set_timestamp(id, mi.timestamp, notify=False)
|
||||||
self.set_path(id, True)
|
self.set_path(id, True)
|
||||||
@ -1734,7 +1738,7 @@ books_series_link feeds
|
|||||||
formats = self.find_books_in_directory(dirpath, True)
|
formats = self.find_books_in_directory(dirpath, True)
|
||||||
if not formats:
|
if not formats:
|
||||||
return
|
return
|
||||||
|
formats = list(formats)
|
||||||
mi = metadata_from_formats(formats)
|
mi = metadata_from_formats(formats)
|
||||||
if mi.title is None:
|
if mi.title is None:
|
||||||
return
|
return
|
||||||
|
@ -366,10 +366,9 @@ class LibraryServer(object):
|
|||||||
@expose
|
@expose
|
||||||
def index(self, **kwargs):
|
def index(self, **kwargs):
|
||||||
'The / URL'
|
'The / URL'
|
||||||
stanza = cherrypy.request.headers.get('Stanza-Device-Name', 919)
|
want_opds = cherrypy.request.headers.get('Stanza-Device-Name', 919) != \
|
||||||
if stanza == 919:
|
919 or cherrypy.request.headers.get('Want-OPDS-Catalog', 919) != 919
|
||||||
return self.static('index.html')
|
return self.stanza() if want_opds else self.static('index.html')
|
||||||
return self.stanza()
|
|
||||||
|
|
||||||
|
|
||||||
@expose
|
@expose
|
||||||
|
@ -469,6 +469,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
self.username = options.username
|
self.username = options.username
|
||||||
self.password = options.password
|
self.password = options.password
|
||||||
self.lrf = options.lrf
|
self.lrf = options.lrf
|
||||||
|
self.include_navbars = not options.no_inline_navbars
|
||||||
|
|
||||||
self.output_dir = os.path.abspath(self.output_dir)
|
self.output_dir = os.path.abspath(self.output_dir)
|
||||||
if options.test:
|
if options.test:
|
||||||
@ -539,7 +540,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
if first_fetch and job_info:
|
if first_fetch and job_info:
|
||||||
url, f, a, feed_len = job_info
|
url, f, a, feed_len = job_info
|
||||||
body = soup.find('body')
|
body = soup.find('body')
|
||||||
if body is not None:
|
if body is not None and self.include_navbars:
|
||||||
templ = self.navbar.generate(False, f, a, feed_len,
|
templ = self.navbar.generate(False, f, a, feed_len,
|
||||||
not self.has_single_feed,
|
not self.has_single_feed,
|
||||||
url, __appname__,
|
url, __appname__,
|
||||||
@ -907,12 +908,13 @@ class BasicNewsRecipe(Recipe):
|
|||||||
body = soup.find('body')
|
body = soup.find('body')
|
||||||
if body is not None:
|
if body is not None:
|
||||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||||
templ = self.navbar.generate(True, num, j, len(f),
|
if self.include_navbars:
|
||||||
not self.has_single_feed,
|
templ = self.navbar.generate(True, num, j, len(f),
|
||||||
a.orig_url, __appname__, prefix=prefix,
|
not self.has_single_feed,
|
||||||
center=self.center_navbar)
|
a.orig_url, __appname__, prefix=prefix,
|
||||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
center=self.center_navbar)
|
||||||
body.insert(len(body.contents), elem)
|
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||||
|
body.insert(len(body.contents), elem)
|
||||||
with open(last, 'wb') as fi:
|
with open(last, 'wb') as fi:
|
||||||
fi.write(unicode(soup).encode('utf-8'))
|
fi.write(unicode(soup).encode('utf-8'))
|
||||||
|
|
||||||
@ -923,7 +925,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
if po is None:
|
if po is None:
|
||||||
self.play_order_counter += 1
|
self.play_order_counter += 1
|
||||||
po = self.play_order_counter
|
po = self.play_order_counter
|
||||||
desc = f.description
|
desc = getattr(f, 'description', None)
|
||||||
if not desc:
|
if not desc:
|
||||||
desc = None
|
desc = None
|
||||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||||
|
@ -51,9 +51,11 @@ recipe_modules = ['recipe_' + r for r in (
|
|||||||
'theeconomictimes_india', '7dias', 'buenosaireseconomico',
|
'theeconomictimes_india', '7dias', 'buenosaireseconomico',
|
||||||
'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres',
|
'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres',
|
||||||
'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate',
|
'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate',
|
||||||
'fastcompany', 'accountancyage',
|
'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna',
|
||||||
|
'eltiempo_hn',
|
||||||
)]
|
)]
|
||||||
|
|
||||||
|
|
||||||
import re, imp, inspect, time, os
|
import re, imp, inspect, time, os
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
52
src/calibre/web/feeds/recipes/recipe_eltiempo_hn.py
Normal file
52
src/calibre/web/feeds/recipes/recipe_eltiempo_hn.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.tiempo.hn
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
class ElTiempoHn(BasicNewsRecipe):
|
||||||
|
title = 'El Tiempo - Honduras'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Noticias de Honduras y mundo'
|
||||||
|
publisher = 'El Tiempo'
|
||||||
|
category = 'news, politics, Honduras'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
language = _('Spanish')
|
||||||
|
lang = 'es-HN'
|
||||||
|
direction = 'ltr'
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} img {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em}"'
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['form','object','embed','base'])]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='td' , attrs={'id':'mainbodycont'})]
|
||||||
|
|
||||||
|
feeds = [(u'Noticias', u'http://www.tiempo.hn/index.php?format=feed&type=rss')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
soup.html['lang'] = self.lang
|
||||||
|
soup.html['dir' ] = self.direction
|
||||||
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
|
soup.head.insert(0,mlang)
|
||||||
|
soup.head.insert(1,mcharset)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
54
src/calibre/web/feeds/recipes/recipe_laprensa_hn.py
Normal file
54
src/calibre/web/feeds/recipes/recipe_laprensa_hn.py
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.laprensahn.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
class LaPrensaHn(BasicNewsRecipe):
|
||||||
|
title = 'La Prensa - Honduras'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Noticias de Honduras y mundo'
|
||||||
|
publisher = 'La Prensa'
|
||||||
|
category = 'news, politics, Honduras'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
language = _('Spanish')
|
||||||
|
lang = 'es-HN'
|
||||||
|
direction = 'ltr'
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['form','object','embed'])]
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1' , attrs={'class':'titulo1'})
|
||||||
|
,dict(name='div', attrs={'class':['sumario11','hora','texto']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [(u'Noticias', u'http://feeds.feedburner.com/laprensa_titulares')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
soup.html['lang'] = self.lang
|
||||||
|
soup.html['dir' ] = self.direction
|
||||||
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
|
soup.head.insert(0,mlang)
|
||||||
|
soup.head.insert(1,mcharset)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
65
src/calibre/web/feeds/recipes/recipe_latribuna.py
Normal file
65
src/calibre/web/feeds/recipes/recipe_latribuna.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.latribuna.hn
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
class LaTribuna(BasicNewsRecipe):
|
||||||
|
title = 'La Tribuna - Honduras'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Noticias de Honduras y mundo'
|
||||||
|
publisher = 'La Tribuna'
|
||||||
|
category = 'news, politics, Honduras'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
language = _('Spanish')
|
||||||
|
lang = 'es-HN'
|
||||||
|
direction = 'ltr'
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['form','object','embed'])]
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='p', attrs={'id':['BlogTitle','BlogDate']})
|
||||||
|
,dict(name='div', attrs={'id':'BlogContent'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [(u'Noticias', u'http://www.latribuna.hn/web2.0/?feed=rss')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '&print=1'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
soup.html['lang'] = self.lang
|
||||||
|
soup.html['dir' ] = self.direction
|
||||||
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
|
soup.head.insert(0,mlang)
|
||||||
|
soup.head.insert(1,mcharset)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cover_url = None
|
||||||
|
soup = self.index_to_soup('http://www.latribuna.hn/web2.0/')
|
||||||
|
cover_item = soup.find('div',attrs={'class':'portada_impresa'})
|
||||||
|
if cover_item:
|
||||||
|
cover_url = cover_item.a.img['src']
|
||||||
|
return cover_url
|
@ -53,6 +53,10 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
def postprocess_html(self, soup, first):
|
def postprocess_html(self, soup, first):
|
||||||
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
||||||
tag.name = 'div'
|
tag.name = 'div'
|
||||||
|
|
||||||
|
for tag in soup.findAll('div', dict(id=["articleImage_1", "articleImage_2", "articleImage_3", "articleImage_4", "articleImage_5", "articleImage_6", "articleImage_7"])):
|
||||||
|
tag.extract()
|
||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
@ -70,7 +74,7 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
#('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'),
|
#('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'),
|
||||||
(' Most Viewed - Day', 'http://online.wsj.com/xml/rss/3_7198.xml'),
|
(' Most Viewed - Day', 'http://online.wsj.com/xml/rss/3_7198.xml'),
|
||||||
(' Most Viewed - Week', 'http://online.wsj.com/xml/rss/3_7251.xml'),
|
(' Most Viewed - Week', 'http://online.wsj.com/xml/rss/3_7251.xml'),
|
||||||
# ('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'),
|
#('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'),
|
||||||
('Today\'s Newspaper - Page One', 'http://online.wsj.com/xml/rss/3_7205.xml'),
|
('Today\'s Newspaper - Page One', 'http://online.wsj.com/xml/rss/3_7205.xml'),
|
||||||
('Today\'s Newspaper - Marketplace', 'http://online.wsj.com/xml/rss/3_7206.xml'),
|
('Today\'s Newspaper - Marketplace', 'http://online.wsj.com/xml/rss/3_7206.xml'),
|
||||||
('Today\'s Newspaper - Money & Investing', 'http://online.wsj.com/xml/rss/3_7207.xml'),
|
('Today\'s Newspaper - Money & Investing', 'http://online.wsj.com/xml/rss/3_7207.xml'),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user