mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
0e8cc35f18
@ -250,6 +250,19 @@ class KindleDXOutput(OutputProfile):
|
|||||||
def tags_to_string(cls, tags):
|
def tags_to_string(cls, tags):
|
||||||
return 'ttt '.join(tags)+'ttt '
|
return 'ttt '.join(tags)+'ttt '
|
||||||
|
|
||||||
|
class IlliadOutput(OutputProfile):
|
||||||
|
|
||||||
|
name = 'Illiad'
|
||||||
|
short_name = 'illiad'
|
||||||
|
description = _('This profile is intended for the Irex Illiad.')
|
||||||
|
|
||||||
|
screen_size = (760, 925)
|
||||||
|
comic_screen_size = (760, 925)
|
||||||
|
dpi = 160.0
|
||||||
|
fbase = 12
|
||||||
|
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
output_profiles = [OutputProfile, SonyReaderOutput, MSReaderOutput,
|
output_profiles = [OutputProfile, SonyReaderOutput, MSReaderOutput,
|
||||||
MobipocketOutput, HanlinV3Output, CybookG3Output, KindleOutput,
|
MobipocketOutput, HanlinV3Output, CybookG3Output, KindleOutput,
|
||||||
|
@ -17,6 +17,34 @@ from calibre.customize.conversion import OptionRecommendation
|
|||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
|
block_level_tags = (
|
||||||
|
'address',
|
||||||
|
'body',
|
||||||
|
'blockquote',
|
||||||
|
'center',
|
||||||
|
'dir',
|
||||||
|
'div',
|
||||||
|
'dl',
|
||||||
|
'fieldset',
|
||||||
|
'form',
|
||||||
|
'h1',
|
||||||
|
'h2',
|
||||||
|
'h3',
|
||||||
|
'h4',
|
||||||
|
'h5',
|
||||||
|
'h6',
|
||||||
|
'hr',
|
||||||
|
'isindex',
|
||||||
|
'menu',
|
||||||
|
'noframes',
|
||||||
|
'noscript',
|
||||||
|
'ol',
|
||||||
|
'p',
|
||||||
|
'pre',
|
||||||
|
'table',
|
||||||
|
'ul',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class EPUBOutput(OutputFormatPlugin):
|
class EPUBOutput(OutputFormatPlugin):
|
||||||
|
|
||||||
@ -197,8 +225,6 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
if 'titlepage' in self.oeb.guide.refs:
|
if 'titlepage' in self.oeb.guide.refs:
|
||||||
self.oeb.guide.refs['titlepage'].href = item.href
|
self.oeb.guide.refs['titlepage'].href = item.href
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def condense_ncx(self, ncx_path):
|
def condense_ncx(self, ncx_path):
|
||||||
if not self.opts.pretty_print:
|
if not self.opts.pretty_print:
|
||||||
tree = etree.parse(ncx_path)
|
tree = etree.parse(ncx_path)
|
||||||
@ -210,46 +236,46 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
|
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
|
||||||
open(ncx_path, 'wb').write(compressed)
|
open(ncx_path, 'wb').write(compressed)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def workaround_ade_quirks(self):
|
def workaround_ade_quirks(self):
|
||||||
'''
|
'''
|
||||||
Perform various markup transforms to get the output to render correctly
|
Perform various markup transforms to get the output to render correctly
|
||||||
in the quirky ADE.
|
in the quirky ADE.
|
||||||
'''
|
'''
|
||||||
from calibre.ebooks.oeb.base import XPNSMAP, XHTML, OEB_STYLES
|
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename
|
||||||
from lxml.etree import XPath as _XPath
|
|
||||||
from functools import partial
|
|
||||||
XPath = partial(_XPath, namespaces=XPNSMAP)
|
|
||||||
|
|
||||||
for x in self.oeb.spine:
|
for x in self.oeb.spine:
|
||||||
root = x.data
|
root = x.data
|
||||||
body = XPath('//h:body')(root)
|
body = XPath('//h:body')(root)
|
||||||
if body:
|
if body:
|
||||||
body = body[0]
|
body = body[0]
|
||||||
|
|
||||||
# Replace <br> that are children of <body> as ADE doesn't handle them
|
# Replace <br> that are children of <body> as ADE doesn't handle them
|
||||||
if hasattr(body, 'xpath'):
|
if hasattr(body, 'xpath'):
|
||||||
for br in XPath('./h:br')(body):
|
for br in XPath('./h:br')(body):
|
||||||
if br.getparent() is None:
|
if br.getparent() is None:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
sibling = br.itersiblings().next()
|
prior = br.itersiblings(preceding=True).next()
|
||||||
|
priortag = barename(prior.tag)
|
||||||
|
priortext = prior.tail
|
||||||
except:
|
except:
|
||||||
sibling = None
|
priortag = 'body'
|
||||||
|
priortext = body.text
|
||||||
|
if priortext:
|
||||||
|
priortext = priortext.strip()
|
||||||
br.tag = XHTML('p')
|
br.tag = XHTML('p')
|
||||||
br.text = u'\u00a0'
|
br.text = u'\u00a0'
|
||||||
if (br.tail and br.tail.strip()) or sibling is None or \
|
style = br.get('style', '').split(';')
|
||||||
getattr(sibling, 'tag', '') != XHTML('br'):
|
style = filter(None, map(lambda x: x.strip(), style))
|
||||||
style = br.get('style', '').split(';')
|
style.append('margin:0pt; border:0pt')
|
||||||
style = filter(None, map(lambda x: x.strip(), style))
|
# If the prior tag is a block (including a <br> we replaced)
|
||||||
style.append('margin: 0pt; border:0pt; height:0pt')
|
# then this <br> replacement should have a 1-line height.
|
||||||
br.set('style', '; '.join(style))
|
# Otherwise it should have no height.
|
||||||
|
if not priortext and priortag in block_level_tags:
|
||||||
|
style.append('height:1em')
|
||||||
else:
|
else:
|
||||||
sibling.getparent().remove(sibling)
|
style.append('height:0pt')
|
||||||
if sibling.tail:
|
br.set('style', '; '.join(style))
|
||||||
if not br.tail:
|
|
||||||
br.tail = ''
|
|
||||||
br.tail += sibling.tail
|
|
||||||
|
|
||||||
for tag in XPath('//h:embed')(root):
|
for tag in XPath('//h:embed')(root):
|
||||||
tag.getparent().remove(tag)
|
tag.getparent().remove(tag)
|
||||||
|
@ -218,7 +218,7 @@ class MetaInformation(object):
|
|||||||
'isbn', 'tags', 'cover_data', 'application_id', 'guide',
|
'isbn', 'tags', 'cover_data', 'application_id', 'guide',
|
||||||
'manifest', 'spine', 'toc', 'cover', 'language',
|
'manifest', 'spine', 'toc', 'cover', 'language',
|
||||||
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc',
|
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc',
|
||||||
'pubdate'):
|
'pubdate', 'rights', 'publication_type'):
|
||||||
if hasattr(mi, attr):
|
if hasattr(mi, attr):
|
||||||
setattr(ans, attr, getattr(mi, attr))
|
setattr(ans, attr, getattr(mi, attr))
|
||||||
|
|
||||||
@ -243,7 +243,8 @@ class MetaInformation(object):
|
|||||||
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
|
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
|
||||||
'series', 'series_index', 'rating', 'isbn', 'language',
|
'series', 'series_index', 'rating', 'isbn', 'language',
|
||||||
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
|
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
|
||||||
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'
|
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
|
||||||
|
'rights', 'publication_type',
|
||||||
):
|
):
|
||||||
setattr(self, x, getattr(mi, x, None))
|
setattr(self, x, getattr(mi, x, None))
|
||||||
|
|
||||||
@ -262,7 +263,8 @@ class MetaInformation(object):
|
|||||||
'publisher', 'series', 'series_index', 'rating',
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
'isbn', 'application_id', 'manifest', 'spine', 'toc',
|
'isbn', 'application_id', 'manifest', 'spine', 'toc',
|
||||||
'cover', 'language', 'guide', 'book_producer',
|
'cover', 'language', 'guide', 'book_producer',
|
||||||
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'):
|
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
|
||||||
|
'publication_type'):
|
||||||
if hasattr(mi, attr):
|
if hasattr(mi, attr):
|
||||||
val = getattr(mi, attr)
|
val = getattr(mi, attr)
|
||||||
if val is not None:
|
if val is not None:
|
||||||
@ -332,6 +334,8 @@ class MetaInformation(object):
|
|||||||
fmt('Timestamp', self.timestamp.isoformat(' '))
|
fmt('Timestamp', self.timestamp.isoformat(' '))
|
||||||
if self.pubdate is not None:
|
if self.pubdate is not None:
|
||||||
fmt('Published', self.pubdate.isoformat(' '))
|
fmt('Published', self.pubdate.isoformat(' '))
|
||||||
|
if self.rights is not None:
|
||||||
|
fmt('Rights', unicode(self.rights))
|
||||||
if self.lccn:
|
if self.lccn:
|
||||||
fmt('LCCN', unicode(self.lccn))
|
fmt('LCCN', unicode(self.lccn))
|
||||||
if self.lcc:
|
if self.lcc:
|
||||||
@ -362,6 +366,8 @@ class MetaInformation(object):
|
|||||||
ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
|
ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
|
||||||
if self.pubdate is not None:
|
if self.pubdate is not None:
|
||||||
ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
|
ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
|
||||||
|
if self.rights is not None:
|
||||||
|
ans += [(_('Rights'), unicode(self.rights.isoformat(' ')))]
|
||||||
for i, x in enumerate(ans):
|
for i, x in enumerate(ans):
|
||||||
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
|
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
|
||||||
return u'<table>%s</table>'%u'\n'.join(ans)
|
return u'<table>%s</table>'%u'\n'.join(ans)
|
||||||
|
@ -7,6 +7,7 @@ import re
|
|||||||
xml:lang="en"
|
xml:lang="en"
|
||||||
xmlns="http://www.daisy.org/z3986/2005/ncx/"
|
xmlns="http://www.daisy.org/z3986/2005/ncx/"
|
||||||
xmlns:py="http://genshi.edgewall.org/"
|
xmlns:py="http://genshi.edgewall.org/"
|
||||||
|
xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata"
|
||||||
>
|
>
|
||||||
<head>
|
<head>
|
||||||
<meta name="dtb:uid" content="${uid}"/>
|
<meta name="dtb:uid" content="${uid}"/>
|
||||||
@ -23,6 +24,8 @@ import re
|
|||||||
${'%*s'%(4*level,'')}<text>${re.sub(r'\s+', ' ', np.text)}</text>
|
${'%*s'%(4*level,'')}<text>${re.sub(r'\s+', ' ', np.text)}</text>
|
||||||
${'%*s'%(4*level,'')}</navLabel>
|
${'%*s'%(4*level,'')}</navLabel>
|
||||||
${'%*s'%(4*level,'')}<content src="${unicode(np.href)+(('#' + unicode(np.fragment)) if np.fragment else '')}" />
|
${'%*s'%(4*level,'')}<content src="${unicode(np.href)+(('#' + unicode(np.fragment)) if np.fragment else '')}" />
|
||||||
|
${'%*s'%(4*level,'')}<calibre:meta py:if="np.author" name="author">${np.author}</calibre:meta>
|
||||||
|
${'%*s'%(4*level,'')}<calibre:meta py:if="np.description" name="description">${np.description}</calibre:meta>
|
||||||
<py:for each="np2 in np">${navpoint(np2, level+1)}</py:for>
|
<py:for each="np2 in np">${navpoint(np2, level+1)}</py:for>
|
||||||
${'%*s'%(4*level,'')}</navPoint>
|
${'%*s'%(4*level,'')}</navPoint>
|
||||||
</py:def>
|
</py:def>
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
<meta py:if="mi.series_index is not None" name="calibre:series_index" content="${mi.format_series_index()}"/>
|
<meta py:if="mi.series_index is not None" name="calibre:series_index" content="${mi.format_series_index()}"/>
|
||||||
<meta py:if="mi.rating is not None" name="calibre:rating" content="${mi.rating}"/>
|
<meta py:if="mi.rating is not None" name="calibre:rating" content="${mi.rating}"/>
|
||||||
<meta py:if="mi.timestamp is not None" name="calibre:timestamp" content="${mi.timestamp.isoformat()}"/>
|
<meta py:if="mi.timestamp is not None" name="calibre:timestamp" content="${mi.timestamp.isoformat()}"/>
|
||||||
|
<meta py:if="mi.publication_type is not None" name="calibre:publication_type" content="${mi.publication_type}" />
|
||||||
<py:for each="tag in mi.tags">
|
<py:for each="tag in mi.tags">
|
||||||
<dc:subject py:if="mi.tags is not None">${tag}</dc:subject>
|
<dc:subject py:if="mi.tags is not None">${tag}</dc:subject>
|
||||||
</py:for>
|
</py:for>
|
||||||
|
@ -440,10 +440,12 @@ class OPF(object):
|
|||||||
language = MetadataField('language')
|
language = MetadataField('language')
|
||||||
comments = MetadataField('description')
|
comments = MetadataField('description')
|
||||||
category = MetadataField('category')
|
category = MetadataField('category')
|
||||||
|
rights = MetadataField('rights')
|
||||||
series = MetadataField('series', is_dc=False)
|
series = MetadataField('series', is_dc=False)
|
||||||
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
||||||
rating = MetadataField('rating', is_dc=False, formatter=int)
|
rating = MetadataField('rating', is_dc=False, formatter=int)
|
||||||
pubdate = MetadataField('date', formatter=parser.parse)
|
pubdate = MetadataField('date', formatter=parser.parse)
|
||||||
|
publication_type = MetadataField('publication_type', is_dc=False)
|
||||||
timestamp = MetadataField('timestamp', is_dc=False, formatter=parser.parse)
|
timestamp = MetadataField('timestamp', is_dc=False, formatter=parser.parse)
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,7 +21,8 @@ class NCXSoup(BeautifulStoneSoup):
|
|||||||
class TOC(list):
|
class TOC(list):
|
||||||
|
|
||||||
def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
|
def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
|
||||||
base_path=os.getcwd(), type='unknown'):
|
base_path=os.getcwd(), type='unknown', author=None,
|
||||||
|
description=None):
|
||||||
self.href = href
|
self.href = href
|
||||||
self.fragment = fragment
|
self.fragment = fragment
|
||||||
if not self.fragment:
|
if not self.fragment:
|
||||||
@ -31,6 +32,8 @@ class TOC(list):
|
|||||||
self.base_path = base_path
|
self.base_path = base_path
|
||||||
self.play_order = play_order
|
self.play_order = play_order
|
||||||
self.type = type
|
self.type = type
|
||||||
|
self.author = author
|
||||||
|
self.description = description
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
lines = ['TOC: %s#%s'%(self.href, self.fragment)]
|
lines = ['TOC: %s#%s'%(self.href, self.fragment)]
|
||||||
@ -59,11 +62,13 @@ class TOC(list):
|
|||||||
list.remove(self, entry)
|
list.remove(self, entry)
|
||||||
entry.parent = None
|
entry.parent = None
|
||||||
|
|
||||||
def add_item(self, href, fragment, text, play_order=None, type='unknown'):
|
def add_item(self, href, fragment, text, play_order=None, type='unknown',
|
||||||
|
author=None, description=None):
|
||||||
if play_order is None:
|
if play_order is None:
|
||||||
play_order = (self[-1].play_order if len(self) else self.play_order) + 1
|
play_order = (self[-1].play_order if len(self) else self.play_order) + 1
|
||||||
self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
|
self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
|
||||||
base_path=self.base_path, play_order=play_order, type=type))
|
base_path=self.base_path, play_order=play_order,
|
||||||
|
type=type, author=author, description=description))
|
||||||
return self[-1]
|
return self[-1]
|
||||||
|
|
||||||
def top_level_items(self):
|
def top_level_items(self):
|
||||||
|
@ -6,6 +6,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from calibre.customize.conversion import OutputFormatPlugin
|
from calibre.customize.conversion import OutputFormatPlugin
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
@ -31,16 +32,80 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
OptionRecommendation(name='toc_title', recommended_value=None,
|
OptionRecommendation(name='toc_title', recommended_value=None,
|
||||||
help=_('Title for any generated in-line table of contents.')
|
help=_('Title for any generated in-line table of contents.')
|
||||||
),
|
),
|
||||||
OptionRecommendation(name='mobi_periodical',
|
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
|
||||||
help=_('Generate a periodical rather than a book.')
|
|
||||||
),
|
|
||||||
OptionRecommendation(name='dont_compress',
|
OptionRecommendation(name='dont_compress',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Disable compression of the file contents.')
|
help=_('Disable compression of the file contents.')
|
||||||
),
|
),
|
||||||
])
|
])
|
||||||
|
|
||||||
|
def check_for_periodical(self):
|
||||||
|
if self.oeb.metadata.publication_type and \
|
||||||
|
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'):
|
||||||
|
self.periodicalize_toc()
|
||||||
|
self.check_for_masthead()
|
||||||
|
self.opts.mobi_periodical = True
|
||||||
|
else:
|
||||||
|
self.opts.mobi_periodical = False
|
||||||
|
|
||||||
|
def check_for_masthead(self):
|
||||||
|
found = False
|
||||||
|
for typ in self.oeb.guide:
|
||||||
|
if type == 'masthead':
|
||||||
|
found = True
|
||||||
|
break
|
||||||
|
if not found:
|
||||||
|
self.oeb.log.debug('No masthead found, generating default one...')
|
||||||
|
from calibre.resources import server_resources
|
||||||
|
try:
|
||||||
|
from PIL import Image as PILImage
|
||||||
|
PILImage
|
||||||
|
except ImportError:
|
||||||
|
import Image as PILImage
|
||||||
|
|
||||||
|
raw = StringIO(server_resources['calibre.png'])
|
||||||
|
im = PILImage.open(raw)
|
||||||
|
of = StringIO()
|
||||||
|
im.save(of, 'GIF')
|
||||||
|
raw = of.getvalue()
|
||||||
|
id, href = self.oeb.manifest.generate('masthead', 'masthead')
|
||||||
|
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
|
||||||
|
self.oeb.guide.add('masthead', 'Masthead Image', href)
|
||||||
|
|
||||||
|
|
||||||
|
def periodicalize_toc(self):
|
||||||
|
from calibre.ebooks.oeb.base import TOC
|
||||||
|
toc = self.oeb.toc
|
||||||
|
if toc and toc[0].klass != 'periodical':
|
||||||
|
self.log('Converting TOC for MOBI periodical indexing...')
|
||||||
|
articles = {}
|
||||||
|
if toc.depth < 3:
|
||||||
|
sections = [TOC(klass='section')]
|
||||||
|
for x in toc:
|
||||||
|
sections[0].append(x)
|
||||||
|
else:
|
||||||
|
sections = list(toc)
|
||||||
|
for x in sections:
|
||||||
|
x.klass = 'section'
|
||||||
|
for sec in sections:
|
||||||
|
articles[id(sec)] = []
|
||||||
|
for a in list(sec):
|
||||||
|
a.klass = 'article'
|
||||||
|
articles[id(sec)].append(a)
|
||||||
|
sec.nodes.remove(a)
|
||||||
|
root = TOC(klass='periodical',
|
||||||
|
title=unicode(self.oeb.metadata.title[0]))
|
||||||
|
for s in sections:
|
||||||
|
if articles[id(s)]:
|
||||||
|
for a in articles[id(s)]:
|
||||||
|
s.nodes.append(a)
|
||||||
|
root.nodes.append(s)
|
||||||
|
|
||||||
|
for x in list(toc.nodes):
|
||||||
|
toc.nodes.remove(x)
|
||||||
|
|
||||||
|
toc.nodes.append(root)
|
||||||
|
|
||||||
|
|
||||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||||
self.log, self.opts, self.oeb = log, opts, oeb
|
self.log, self.opts, self.oeb = log, opts, oeb
|
||||||
from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, \
|
from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, \
|
||||||
@ -60,6 +125,7 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
rasterizer(oeb, opts)
|
rasterizer(oeb, opts)
|
||||||
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
|
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
|
||||||
mobimlizer(oeb, opts)
|
mobimlizer(oeb, opts)
|
||||||
|
self.check_for_periodical()
|
||||||
write_page_breaks_after_item = not input_plugin is plugin_for_input_format('cbz')
|
write_page_breaks_after_item = not input_plugin is plugin_for_input_format('cbz')
|
||||||
writer = MobiWriter(opts, imagemax=imagemax,
|
writer = MobiWriter(opts, imagemax=imagemax,
|
||||||
compression=UNCOMPRESSED if opts.dont_compress else PALMDOC,
|
compression=UNCOMPRESSED if opts.dont_compress else PALMDOC,
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -47,7 +47,7 @@ XPNSMAP = {'h' : XHTML_NS, 'o1' : OPF1_NS, 'o2' : OPF2_NS,
|
|||||||
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
|
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
|
||||||
'xsi': XSI_NS, 'dt' : DCTERMS_NS, 'ncx': NCX_NS,
|
'xsi': XSI_NS, 'dt' : DCTERMS_NS, 'ncx': NCX_NS,
|
||||||
'svg': SVG_NS, 'xl' : XLINK_NS, 're': RE_NS,
|
'svg': SVG_NS, 'xl' : XLINK_NS, 're': RE_NS,
|
||||||
'mbp': MBP_NS }
|
'mbp': MBP_NS, 'calibre': CALIBRE_NS }
|
||||||
|
|
||||||
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
|
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
|
||||||
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
|
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
|
||||||
@ -455,7 +455,8 @@ class Metadata(object):
|
|||||||
'description', 'format', 'identifier', 'language',
|
'description', 'format', 'identifier', 'language',
|
||||||
'publisher', 'relation', 'rights', 'source',
|
'publisher', 'relation', 'rights', 'source',
|
||||||
'subject', 'title', 'type'])
|
'subject', 'title', 'type'])
|
||||||
CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp'])
|
CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp',
|
||||||
|
'publication_type'])
|
||||||
OPF_ATTRS = {'role': OPF('role'), 'file-as': OPF('file-as'),
|
OPF_ATTRS = {'role': OPF('role'), 'file-as': OPF('file-as'),
|
||||||
'scheme': OPF('scheme'), 'event': OPF('event'),
|
'scheme': OPF('scheme'), 'event': OPF('event'),
|
||||||
'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'}
|
'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'}
|
||||||
|
@ -192,6 +192,7 @@ class OEBReader(object):
|
|||||||
if not scheme and href not in known:
|
if not scheme and href not in known:
|
||||||
new.add(href)
|
new.add(href)
|
||||||
unchecked.clear()
|
unchecked.clear()
|
||||||
|
warned = set([])
|
||||||
for href in new:
|
for href in new:
|
||||||
known.add(href)
|
known.add(href)
|
||||||
is_invalid = False
|
is_invalid = False
|
||||||
@ -202,9 +203,13 @@ class OEBReader(object):
|
|||||||
if is_invalid:
|
if is_invalid:
|
||||||
continue
|
continue
|
||||||
if not self.oeb.container.exists(href):
|
if not self.oeb.container.exists(href):
|
||||||
self.logger.warn('Referenced file %r not found' % href)
|
if href not in warned:
|
||||||
|
self.logger.warn('Referenced file %r not found' % href)
|
||||||
|
warned.add(href)
|
||||||
continue
|
continue
|
||||||
self.logger.warn('Referenced file %r not in manifest' % href)
|
if href not in warned:
|
||||||
|
self.logger.warn('Referenced file %r not in manifest' % href)
|
||||||
|
warned.add(href)
|
||||||
id, _ = manifest.generate(id='added')
|
id, _ = manifest.generate(id='added')
|
||||||
guessed = guess_type(href)[0]
|
guessed = guess_type(href)[0]
|
||||||
media_type = guessed or BINARY_MIME
|
media_type = guessed or BINARY_MIME
|
||||||
@ -330,14 +335,14 @@ class OEBReader(object):
|
|||||||
po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
|
po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
|
||||||
|
|
||||||
authorElement = xpath(child,
|
authorElement = xpath(child,
|
||||||
'descendant::mbp:meta[@name = "author"]')
|
'descendant::calibre:meta[@name = "author"]')
|
||||||
if authorElement :
|
if authorElement :
|
||||||
author = authorElement[0].text
|
author = authorElement[0].text
|
||||||
else :
|
else :
|
||||||
author = None
|
author = None
|
||||||
|
|
||||||
descriptionElement = xpath(child,
|
descriptionElement = xpath(child,
|
||||||
'descendant::mbp:meta[@name = "description"]')
|
'descendant::calibre:meta[@name = "description"]')
|
||||||
if descriptionElement :
|
if descriptionElement :
|
||||||
description = descriptionElement[0].text
|
description = descriptionElement[0].text
|
||||||
else :
|
else :
|
||||||
|
@ -63,11 +63,16 @@ def meta_info_to_oeb_metadata(mi, m, log):
|
|||||||
if mi.timestamp is not None:
|
if mi.timestamp is not None:
|
||||||
m.clear('timestamp')
|
m.clear('timestamp')
|
||||||
m.add('timestamp', mi.timestamp.isoformat())
|
m.add('timestamp', mi.timestamp.isoformat())
|
||||||
|
if mi.rights is not None:
|
||||||
|
m.clear('rights')
|
||||||
|
m.add('rights', mi.rights)
|
||||||
|
if mi.publication_type is not None:
|
||||||
|
m.clear('publication_type')
|
||||||
|
m.add('publication_type', mi.publication_type)
|
||||||
if not m.timestamp:
|
if not m.timestamp:
|
||||||
m.add('timestamp', datetime.utcnow().isoformat())
|
m.add('timestamp', datetime.utcnow().isoformat())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MergeMetadata(object):
|
class MergeMetadata(object):
|
||||||
'Merge in user metadata, including cover'
|
'Merge in user metadata, including cover'
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ from urlparse import urlparse
|
|||||||
|
|
||||||
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
|
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
|
||||||
from calibre.ebooks import ConversionError
|
from calibre.ebooks import ConversionError
|
||||||
|
|
||||||
def XPath(x):
|
def XPath(x):
|
||||||
try:
|
try:
|
||||||
return etree.XPath(x, namespaces=XPNSMAP)
|
return etree.XPath(x, namespaces=XPNSMAP)
|
||||||
|
@ -64,8 +64,5 @@ class ManifestTrimmer(object):
|
|||||||
unchecked = new
|
unchecked = new
|
||||||
for item in oeb.manifest.values():
|
for item in oeb.manifest.values():
|
||||||
if item not in used:
|
if item not in used:
|
||||||
if getattr(self.opts, 'mobi_periodical', False) and \
|
|
||||||
item.href == 'images/mastheadImage.gif':
|
|
||||||
continue
|
|
||||||
oeb.logger.info('Trimming %r from manifest' % item.href)
|
oeb.logger.info('Trimming %r from manifest' % item.href)
|
||||||
oeb.manifest.remove(item)
|
oeb.manifest.remove(item)
|
||||||
|
@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent, 'mobi_output',
|
Widget.__init__(self, parent, 'mobi_output',
|
||||||
['prefer_author_sort', 'rescale_images', 'toc_title',
|
['prefer_author_sort', 'rescale_images', 'toc_title',
|
||||||
'dont_compress', 'mobi_periodical']
|
'dont_compress',]
|
||||||
)
|
)
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
@ -41,7 +41,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="6" column="0">
|
<item row="5" column="0">
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Vertical</enum>
|
<enum>Qt::Vertical</enum>
|
||||||
@ -61,13 +61,6 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="5" column="0">
|
|
||||||
<widget class="QCheckBox" name="opt_mobi_periodical">
|
|
||||||
<property name="text">
|
|
||||||
<string>Generate a periodical rather than a book</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="0" column="0">
|
<item row="0" column="0">
|
||||||
<widget class="QCheckBox" name="opt_no_inline_toc">
|
<widget class="QCheckBox" name="opt_no_inline_toc">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
|
@ -16,6 +16,7 @@ from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, QString, \
|
|||||||
|
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
from calibre.utils.pyparsing import ParseException
|
||||||
from calibre.library.database2 import FIELD_MAP
|
from calibre.library.database2 import FIELD_MAP
|
||||||
from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
|
from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
|
||||||
error_dialog
|
error_dialog
|
||||||
@ -267,7 +268,11 @@ class BooksModel(QAbstractTableModel):
|
|||||||
self.count_changed()
|
self.count_changed()
|
||||||
|
|
||||||
def search(self, text, refinement, reset=True):
|
def search(self, text, refinement, reset=True):
|
||||||
self.db.search(text)
|
try:
|
||||||
|
self.db.search(text)
|
||||||
|
except ParseException:
|
||||||
|
self.emit(SIGNAL('parse_exception()'))
|
||||||
|
return
|
||||||
self.last_search = text
|
self.last_search = text
|
||||||
if reset:
|
if reset:
|
||||||
self.clear_caches()
|
self.clear_caches()
|
||||||
@ -898,7 +903,12 @@ class DeviceBooksModel(BooksModel):
|
|||||||
if not text or not text.strip():
|
if not text or not text.strip():
|
||||||
self.map = list(range(len(self.db)))
|
self.map = list(range(len(self.db)))
|
||||||
else:
|
else:
|
||||||
matches = self.search_engine.parse(text)
|
try:
|
||||||
|
matches = self.search_engine.parse(text)
|
||||||
|
except ParseException:
|
||||||
|
self.emit(SIGNAL('parse_exception()'))
|
||||||
|
return
|
||||||
|
|
||||||
self.map = []
|
self.map = []
|
||||||
for i in range(len(self.db)):
|
for i in range(len(self.db)):
|
||||||
if i in matches:
|
if i in matches:
|
||||||
|
@ -221,9 +221,20 @@ class LibraryServer(object):
|
|||||||
|
|
||||||
def get_format(self, id, format):
|
def get_format(self, id, format):
|
||||||
format = format.upper()
|
format = format.upper()
|
||||||
fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb')
|
fmt = self.db.format(id, format, index_is_id=True, as_file=True,
|
||||||
|
mode='r+b')
|
||||||
if fmt is None:
|
if fmt is None:
|
||||||
raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
|
raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
|
||||||
|
if format == 'EPUB':
|
||||||
|
from tempfile import TemporaryFile
|
||||||
|
from calibre.ebooks.metadata.meta import set_metadata
|
||||||
|
raw = fmt.read()
|
||||||
|
fmt = TemporaryFile()
|
||||||
|
fmt.write(raw)
|
||||||
|
fmt.seek(0)
|
||||||
|
set_metadata(fmt, self.db.get_metadata(id, index_is_id=True),
|
||||||
|
'epub')
|
||||||
|
fmt.seek(0)
|
||||||
mt = guess_type('dummy.'+format.lower())[0]
|
mt = guess_type('dummy.'+format.lower())[0]
|
||||||
if mt is None:
|
if mt is None:
|
||||||
mt = 'application/octet-stream'
|
mt = 'application/octet-stream'
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -64,6 +64,10 @@ class BasicNewsRecipe(Recipe):
|
|||||||
#: Delay between consecutive downloads in seconds
|
#: Delay between consecutive downloads in seconds
|
||||||
delay = 0
|
delay = 0
|
||||||
|
|
||||||
|
#: Publication type
|
||||||
|
#: Set to newspaper, magazine or blog
|
||||||
|
publication_type = 'unknown'
|
||||||
|
|
||||||
#: Number of simultaneous downloads. Set to 1 if the server is picky.
|
#: Number of simultaneous downloads. Set to 1 if the server is picky.
|
||||||
#: Automatically reduced to 1 if :attr:`BasicNewsRecipe.delay` > 0
|
#: Automatically reduced to 1 if :attr:`BasicNewsRecipe.delay` > 0
|
||||||
simultaneous_downloads = 5
|
simultaneous_downloads = 5
|
||||||
@ -848,6 +852,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
mi = MetaInformation(self.title + strftime(self.timefmt), [__appname__])
|
mi = MetaInformation(self.title + strftime(self.timefmt), [__appname__])
|
||||||
mi.publisher = __appname__
|
mi.publisher = __appname__
|
||||||
mi.author_sort = __appname__
|
mi.author_sort = __appname__
|
||||||
|
mi.publication_type = 'periodical:'+self.publication_type
|
||||||
opf_path = os.path.join(dir, 'index.opf')
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
ncx_path = os.path.join(dir, 'index.ncx')
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
opf = OPFCreator(dir, mi)
|
opf = OPFCreator(dir, mi)
|
||||||
@ -878,13 +883,16 @@ class BasicNewsRecipe(Recipe):
|
|||||||
for j, a in enumerate(f):
|
for j, a in enumerate(f):
|
||||||
if getattr(a, 'downloaded', False):
|
if getattr(a, 'downloaded', False):
|
||||||
adir = 'feed_%d/article_%d/'%(num, j)
|
adir = 'feed_%d/article_%d/'%(num, j)
|
||||||
|
desc = a.text_summary
|
||||||
|
if not desc:
|
||||||
|
desc = None
|
||||||
entries.append('%sindex.html'%adir)
|
entries.append('%sindex.html'%adir)
|
||||||
po = self.play_order_map.get(entries[-1], None)
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
if po is None:
|
if po is None:
|
||||||
self.play_order_counter += 1
|
self.play_order_counter += 1
|
||||||
po = self.play_order_counter
|
po = self.play_order_counter
|
||||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||||
play_order=po)
|
play_order=po, description=desc)
|
||||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||||
for sp in a.sub_pages:
|
for sp in a.sub_pages:
|
||||||
prefix = os.path.commonprefix([opf_path, sp])
|
prefix = os.path.commonprefix([opf_path, sp])
|
||||||
@ -915,7 +923,11 @@ class BasicNewsRecipe(Recipe):
|
|||||||
if po is None:
|
if po is None:
|
||||||
self.play_order_counter += 1
|
self.play_order_counter += 1
|
||||||
po = self.play_order_counter
|
po = self.play_order_counter
|
||||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None, f.title, play_order=po))
|
desc = f.description
|
||||||
|
if not desc:
|
||||||
|
desc = None
|
||||||
|
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||||
|
f.title, play_order=po, description=desc))
|
||||||
else:
|
else:
|
||||||
entries.append('feed_%d/index.html'%0)
|
entries.append('feed_%d/index.html'%0)
|
||||||
feed_index(0, toc)
|
feed_index(0, toc)
|
||||||
|
@ -7,9 +7,9 @@ clarin.com
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Clarin(BasicNewsRecipe):
|
class Clarin(BasicNewsRecipe):
|
||||||
title = 'Clarin'
|
title = 'Clarin'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -22,14 +22,19 @@ class Clarin(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
language = _('Spanish')
|
||||||
|
lang = 'es-AR'
|
||||||
|
direction = 'ltr'
|
||||||
|
extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='a' , attrs={'class':'Imp' })
|
dict(name='a' , attrs={'class':'Imp' })
|
||||||
@ -48,17 +53,20 @@ class Clarin(BasicNewsRecipe):
|
|||||||
,(u'Deportes' , u'http://www.clarin.com/diario/hoy/deportes.xml' )
|
,(u'Deportes' , u'http://www.clarin.com/diario/hoy/deportes.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def print_version(self, url):
|
||||||
artl = article.get('link', None)
|
rest = url.partition('-0')[-1]
|
||||||
rest = artl.partition('-0')[-1]
|
|
||||||
lmain = rest.partition('.')[0]
|
lmain = rest.partition('.')[0]
|
||||||
return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
|
lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
|
||||||
|
return lurl
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
soup.html['lang'] = self.lang
|
||||||
soup.head.insert(0,mtag)
|
soup.html['dir' ] = self.direction
|
||||||
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
|
soup.head.insert(0,mlang)
|
||||||
|
soup.head.insert(1,mcharset)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Spanish')
|
|
@ -7,94 +7,108 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
# http://online.wsj.com/page/us_in_todays_paper.html
|
# http://online.wsj.com/page/us_in_todays_paper.html
|
||||||
|
|
||||||
class WallStreetJournal(BasicNewsRecipe):
|
class WallStreetJournal(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'The Wall Street Journal'
|
title = 'The Wall Street Journal'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||||
description = 'News and current affairs.'
|
description = 'News and current affairs.'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
language = _('English')
|
language = _('English')
|
||||||
max_articles_per_feed = 10
|
max_articles_per_feed = 10
|
||||||
timefmt = ' [%a, %b %d, %Y]'
|
timefmt = ' [%a, %b %d, %Y]'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
|
extra_css = '''h1{color:#093D72 ; font-size:large ; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; }
|
||||||
|
h2{color:gray; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
|
||||||
|
.subhead{color:gray; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
|
||||||
|
.insettipUnit {color:#666666; font-family:Arial,Sans-serif;font-size:xx-small }
|
||||||
|
.targetCaption{ font-size:x-small; color:#333333; font-family:Arial,Helvetica,sans-serif}
|
||||||
|
.article{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
|
||||||
|
.tagline {color:#333333; font-size:xx-small}
|
||||||
|
.dateStamp {color:#666666; font-family:Arial,Helvetica,sans-serif}
|
||||||
|
h3{color:blue ;font-family:Arial,Helvetica,sans-serif; font-size:xx-small}
|
||||||
|
.byline{color:blue;font-family:Arial,Helvetica,sans-serif; font-size:xx-small}
|
||||||
|
h6{color:#333333; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small;font-style:italic; }
|
||||||
|
.paperLocation{color:#666666; font-size:xx-small}'''
|
||||||
|
|
||||||
remove_tags_before = dict(name='h1')
|
remove_tags_before = dict(name='h1')
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive"]),
|
dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive","articleTabs_tab_video","articleTabs_tab_map","articleTabs_tab_slideshow"]),
|
||||||
{'class':['more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
|
{'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map','insettip','more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
|
||||||
dict(rel='shortcut icon'),
|
dict(rel='shortcut icon'),
|
||||||
]
|
]
|
||||||
remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]
|
remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]
|
||||||
|
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('http://commerce.wsj.com/auth/login')
|
br.open('http://commerce.wsj.com/auth/login')
|
||||||
br.select_form(nr=0)
|
br.select_form(nr=0)
|
||||||
br['user'] = self.username
|
br['user'] = self.username
|
||||||
br['password'] = self.password
|
br['password'] = self.password
|
||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
def postprocess_html(self, soup, first):
|
||||||
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
||||||
tag.name = 'div'
|
tag.name = 'div'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
try:
|
try:
|
||||||
return article.feedburner_origlink.split('?')[0]
|
return article.feedburner_origlink.split('?')[0]
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return article.link.split('?')[0]
|
return article.link.split('?')[0]
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')
|
self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
#('Most Emailed - Day', 'http://online.wsj.com/xml/rss/3_7030.xml'),
|
#('Most Emailed - Day', 'http://online.wsj.com/xml/rss/3_7030.xml'),
|
||||||
#('Most Emailed - Week', 'http://online.wsj.com/xml/rss/3_7253.xml'),
|
#('Most Emailed - Week', 'http://online.wsj.com/xml/rss/3_7253.xml'),
|
||||||
#('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'),
|
#('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'),
|
||||||
(' Most Viewed - Day', 'http://online.wsj.com/xml/rss/3_7198.xml'),
|
(' Most Viewed - Day', 'http://online.wsj.com/xml/rss/3_7198.xml'),
|
||||||
(' Most Viewed - Week', 'http://online.wsj.com/xml/rss/3_7251.xml'),
|
(' Most Viewed - Week', 'http://online.wsj.com/xml/rss/3_7251.xml'),
|
||||||
# ('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'),
|
# ('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'),
|
||||||
('Today\'s Newspaper - Page One', 'http://online.wsj.com/xml/rss/3_7205.xml'),
|
('Today\'s Newspaper - Page One', 'http://online.wsj.com/xml/rss/3_7205.xml'),
|
||||||
('Today\'s Newspaper - Marketplace', 'http://online.wsj.com/xml/rss/3_7206.xml'),
|
('Today\'s Newspaper - Marketplace', 'http://online.wsj.com/xml/rss/3_7206.xml'),
|
||||||
('Today\'s Newspaper - Money & Investing', 'http://online.wsj.com/xml/rss/3_7207.xml'),
|
('Today\'s Newspaper - Money & Investing', 'http://online.wsj.com/xml/rss/3_7207.xml'),
|
||||||
('Today\'s Newspaper - Personal Journal', 'http://online.wsj.com/xml/rss/3_7208.xml'),
|
('Today\'s Newspaper - Personal Journal', 'http://online.wsj.com/xml/rss/3_7208.xml'),
|
||||||
('Today\'s Newspaper - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7209.xml'),
|
('Today\'s Newspaper - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7209.xml'),
|
||||||
('Opinion', 'http://online.wsj.com/xml/rss/3_7041.xml'),
|
('Opinion', 'http://online.wsj.com/xml/rss/3_7041.xml'),
|
||||||
('News - U.S.: What\'s News', 'http://online.wsj.com/xml/rss/3_7011.xml'),
|
('News - U.S.: What\'s News', 'http://online.wsj.com/xml/rss/3_7011.xml'),
|
||||||
('News - U.S. Business', 'http://online.wsj.com/xml/rss/3_7014.xml'),
|
('News - U.S. Business', 'http://online.wsj.com/xml/rss/3_7014.xml'),
|
||||||
('News - Europe: What\'s News', 'http://online.wsj.com/xml/rss/3_7012.xml'),
|
('News - Europe: What\'s News', 'http://online.wsj.com/xml/rss/3_7012.xml'),
|
||||||
('News - Asia: What\'s News', 'http://online.wsj.com/xml/rss/3_7013.xml'),
|
('News - Asia: What\'s News', 'http://online.wsj.com/xml/rss/3_7013.xml'),
|
||||||
('News - World News', 'http://online.wsj.com/xml/rss/3_7085.xml'),
|
('News - World News', 'http://online.wsj.com/xml/rss/3_7085.xml'),
|
||||||
('News - Economy', 'http://online.wsj.com/xml/rss/3_7086.xml'),
|
('News - Economy', 'http://online.wsj.com/xml/rss/3_7086.xml'),
|
||||||
('News - Earnings', 'http://online.wsj.com/xml/rss/3_7088.xml'),
|
('News - Earnings', 'http://online.wsj.com/xml/rss/3_7088.xml'),
|
||||||
('News - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
|
('News - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
|
||||||
('News - Law', 'http://online.wsj.com/xml/rss/3_7091.xml'),
|
('News - Law', 'http://online.wsj.com/xml/rss/3_7091.xml'),
|
||||||
('News - Media & Marketing', 'http://online.wsj.com/xml/rss/3_7020.xml'),
|
('News - Media & Marketing', 'http://online.wsj.com/xml/rss/3_7020.xml'),
|
||||||
('Technology - What\'s News', 'http://online.wsj.com/xml/rss/3_7015.xml'),
|
('Technology - What\'s News', 'http://online.wsj.com/xml/rss/3_7015.xml'),
|
||||||
('Technology - Gadgets', 'http://online.wsj.com/xml/rss/3_7094.xml'),
|
('Technology - Gadgets', 'http://online.wsj.com/xml/rss/3_7094.xml'),
|
||||||
('Technology - Telecommunications', 'http://online.wsj.com/xml/rss/3_7095.xml'),
|
('Technology - Telecommunications', 'http://online.wsj.com/xml/rss/3_7095.xml'),
|
||||||
('Technology - E-commerce/Media', 'http://online.wsj.com/xml/rss/3_7096.xml'),
|
('Technology - E-commerce/Media', 'http://online.wsj.com/xml/rss/3_7096.xml'),
|
||||||
('Technology - Asia', 'http://online.wsj.com/xml/rss/3_7097.xml'),
|
('Technology - Asia', 'http://online.wsj.com/xml/rss/3_7097.xml'),
|
||||||
('Technology - Europe', 'http://online.wsj.com/xml/rss/3_7098.xml'),
|
('Technology - Europe', 'http://online.wsj.com/xml/rss/3_7098.xml'),
|
||||||
('Markets - News', 'http://online.wsj.com/xml/rss/3_7031.xml'),
|
('Markets - News', 'http://online.wsj.com/xml/rss/3_7031.xml'),
|
||||||
('Markets - Europe News', 'http://online.wsj.com/xml/rss/3_7101.xml'),
|
('Markets - Europe News', 'http://online.wsj.com/xml/rss/3_7101.xml'),
|
||||||
('Markets - Asia News', 'http://online.wsj.com/xml/rss/3_7102.xml'),
|
('Markets - Asia News', 'http://online.wsj.com/xml/rss/3_7102.xml'),
|
||||||
('Markets - Deals & Deal Makers', 'http://online.wsj.com/xml/rss/3_7099.xml'),
|
('Markets - Deals & Deal Makers', 'http://online.wsj.com/xml/rss/3_7099.xml'),
|
||||||
('Markets - Hedge Funds', 'http://online.wsj.com/xml/rss/3_7199.xml'),
|
('Markets - Hedge Funds', 'http://online.wsj.com/xml/rss/3_7199.xml'),
|
||||||
('Personal Journal', 'http://online.wsj.com/xml/rss/3_7200.xml'),
|
('Personal Journal', 'http://online.wsj.com/xml/rss/3_7200.xml'),
|
||||||
('Personal Journal - Money', 'http://online.wsj.com/xml/rss/3_7104.xml'),
|
('Personal Journal - Money', 'http://online.wsj.com/xml/rss/3_7104.xml'),
|
||||||
('Personal Journal - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
|
('Personal Journal - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
|
||||||
('Personal Journal - Autos', 'http://online.wsj.com/xml/rss/3_7092.xml'),
|
('Personal Journal - Autos', 'http://online.wsj.com/xml/rss/3_7092.xml'),
|
||||||
('Personal Journal - Homes', 'http://online.wsj.com/xml/rss/3_7105.xml'),
|
('Personal Journal - Homes', 'http://online.wsj.com/xml/rss/3_7105.xml'),
|
||||||
('Personal Journal - Travel', 'http://online.wsj.com/xml/rss/3_7106.xml'),
|
('Personal Journal - Travel', 'http://online.wsj.com/xml/rss/3_7106.xml'),
|
||||||
('Personal Journal - Careers', 'http://online.wsj.com/xml/rss/3_7107.xml'),
|
('Personal Journal - Careers', 'http://online.wsj.com/xml/rss/3_7107.xml'),
|
||||||
('Weekend & Leisure', 'http://online.wsj.com/xml/rss/3_7201.xml'),
|
('Weekend & Leisure', 'http://online.wsj.com/xml/rss/3_7201.xml'),
|
||||||
('Weekend & Leisure - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7202.xml'),
|
('Weekend & Leisure - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7202.xml'),
|
||||||
('Weekend & Leisure - Arts & Entertainment', 'http://online.wsj.com/xml/rss/3_7177.xml'),
|
('Weekend & Leisure - Arts & Entertainment', 'http://online.wsj.com/xml/rss/3_7177.xml'),
|
||||||
('Weekend & Leisure - Books', 'http://online.wsj.com/xml/rss/3_7203.xml'),
|
('Weekend & Leisure - Books', 'http://online.wsj.com/xml/rss/3_7203.xml'),
|
||||||
('Weekend & Leisure - Sports', 'http://online.wsj.com/xml/rss/3_7204.xml'),
|
('Weekend & Leisure - Sports', 'http://online.wsj.com/xml/rss/3_7204.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user