mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
0e8cc35f18
@ -250,6 +250,19 @@ class KindleDXOutput(OutputProfile):
|
||||
def tags_to_string(cls, tags):
|
||||
return 'ttt '.join(tags)+'ttt '
|
||||
|
||||
class IlliadOutput(OutputProfile):
|
||||
|
||||
name = 'Illiad'
|
||||
short_name = 'illiad'
|
||||
description = _('This profile is intended for the Irex Illiad.')
|
||||
|
||||
screen_size = (760, 925)
|
||||
comic_screen_size = (760, 925)
|
||||
dpi = 160.0
|
||||
fbase = 12
|
||||
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
||||
|
||||
|
||||
|
||||
output_profiles = [OutputProfile, SonyReaderOutput, MSReaderOutput,
|
||||
MobipocketOutput, HanlinV3Output, CybookG3Output, KindleOutput,
|
||||
|
@ -17,6 +17,34 @@ from calibre.customize.conversion import OptionRecommendation
|
||||
|
||||
from lxml import etree
|
||||
|
||||
block_level_tags = (
|
||||
'address',
|
||||
'body',
|
||||
'blockquote',
|
||||
'center',
|
||||
'dir',
|
||||
'div',
|
||||
'dl',
|
||||
'fieldset',
|
||||
'form',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'hr',
|
||||
'isindex',
|
||||
'menu',
|
||||
'noframes',
|
||||
'noscript',
|
||||
'ol',
|
||||
'p',
|
||||
'pre',
|
||||
'table',
|
||||
'ul',
|
||||
)
|
||||
|
||||
|
||||
class EPUBOutput(OutputFormatPlugin):
|
||||
|
||||
@ -197,8 +225,6 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
if 'titlepage' in self.oeb.guide.refs:
|
||||
self.oeb.guide.refs['titlepage'].href = item.href
|
||||
|
||||
|
||||
|
||||
def condense_ncx(self, ncx_path):
|
||||
if not self.opts.pretty_print:
|
||||
tree = etree.parse(ncx_path)
|
||||
@ -210,46 +236,46 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
|
||||
open(ncx_path, 'wb').write(compressed)
|
||||
|
||||
|
||||
|
||||
def workaround_ade_quirks(self):
|
||||
'''
|
||||
Perform various markup transforms to get the output to render correctly
|
||||
in the quirky ADE.
|
||||
'''
|
||||
from calibre.ebooks.oeb.base import XPNSMAP, XHTML, OEB_STYLES
|
||||
from lxml.etree import XPath as _XPath
|
||||
from functools import partial
|
||||
XPath = partial(_XPath, namespaces=XPNSMAP)
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename
|
||||
|
||||
for x in self.oeb.spine:
|
||||
root = x.data
|
||||
body = XPath('//h:body')(root)
|
||||
if body:
|
||||
body = body[0]
|
||||
|
||||
# Replace <br> that are children of <body> as ADE doesn't handle them
|
||||
if hasattr(body, 'xpath'):
|
||||
for br in XPath('./h:br')(body):
|
||||
if br.getparent() is None:
|
||||
continue
|
||||
try:
|
||||
sibling = br.itersiblings().next()
|
||||
prior = br.itersiblings(preceding=True).next()
|
||||
priortag = barename(prior.tag)
|
||||
priortext = prior.tail
|
||||
except:
|
||||
sibling = None
|
||||
priortag = 'body'
|
||||
priortext = body.text
|
||||
if priortext:
|
||||
priortext = priortext.strip()
|
||||
br.tag = XHTML('p')
|
||||
br.text = u'\u00a0'
|
||||
if (br.tail and br.tail.strip()) or sibling is None or \
|
||||
getattr(sibling, 'tag', '') != XHTML('br'):
|
||||
style = br.get('style', '').split(';')
|
||||
style = filter(None, map(lambda x: x.strip(), style))
|
||||
style.append('margin: 0pt; border:0pt; height:0pt')
|
||||
br.set('style', '; '.join(style))
|
||||
style = br.get('style', '').split(';')
|
||||
style = filter(None, map(lambda x: x.strip(), style))
|
||||
style.append('margin:0pt; border:0pt')
|
||||
# If the prior tag is a block (including a <br> we replaced)
|
||||
# then this <br> replacement should have a 1-line height.
|
||||
# Otherwise it should have no height.
|
||||
if not priortext and priortag in block_level_tags:
|
||||
style.append('height:1em')
|
||||
else:
|
||||
sibling.getparent().remove(sibling)
|
||||
if sibling.tail:
|
||||
if not br.tail:
|
||||
br.tail = ''
|
||||
br.tail += sibling.tail
|
||||
style.append('height:0pt')
|
||||
br.set('style', '; '.join(style))
|
||||
|
||||
for tag in XPath('//h:embed')(root):
|
||||
tag.getparent().remove(tag)
|
||||
|
@ -218,7 +218,7 @@ class MetaInformation(object):
|
||||
'isbn', 'tags', 'cover_data', 'application_id', 'guide',
|
||||
'manifest', 'spine', 'toc', 'cover', 'language',
|
||||
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc',
|
||||
'pubdate'):
|
||||
'pubdate', 'rights', 'publication_type'):
|
||||
if hasattr(mi, attr):
|
||||
setattr(ans, attr, getattr(mi, attr))
|
||||
|
||||
@ -243,7 +243,8 @@ class MetaInformation(object):
|
||||
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
|
||||
'series', 'series_index', 'rating', 'isbn', 'language',
|
||||
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
|
||||
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'
|
||||
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
|
||||
'rights', 'publication_type',
|
||||
):
|
||||
setattr(self, x, getattr(mi, x, None))
|
||||
|
||||
@ -262,7 +263,8 @@ class MetaInformation(object):
|
||||
'publisher', 'series', 'series_index', 'rating',
|
||||
'isbn', 'application_id', 'manifest', 'spine', 'toc',
|
||||
'cover', 'language', 'guide', 'book_producer',
|
||||
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'):
|
||||
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
|
||||
'publication_type'):
|
||||
if hasattr(mi, attr):
|
||||
val = getattr(mi, attr)
|
||||
if val is not None:
|
||||
@ -332,6 +334,8 @@ class MetaInformation(object):
|
||||
fmt('Timestamp', self.timestamp.isoformat(' '))
|
||||
if self.pubdate is not None:
|
||||
fmt('Published', self.pubdate.isoformat(' '))
|
||||
if self.rights is not None:
|
||||
fmt('Rights', unicode(self.rights))
|
||||
if self.lccn:
|
||||
fmt('LCCN', unicode(self.lccn))
|
||||
if self.lcc:
|
||||
@ -362,6 +366,8 @@ class MetaInformation(object):
|
||||
ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
|
||||
if self.pubdate is not None:
|
||||
ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
|
||||
if self.rights is not None:
|
||||
ans += [(_('Rights'), unicode(self.rights.isoformat(' ')))]
|
||||
for i, x in enumerate(ans):
|
||||
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
|
||||
return u'<table>%s</table>'%u'\n'.join(ans)
|
||||
|
@ -7,6 +7,7 @@ import re
|
||||
xml:lang="en"
|
||||
xmlns="http://www.daisy.org/z3986/2005/ncx/"
|
||||
xmlns:py="http://genshi.edgewall.org/"
|
||||
xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata"
|
||||
>
|
||||
<head>
|
||||
<meta name="dtb:uid" content="${uid}"/>
|
||||
@ -23,6 +24,8 @@ import re
|
||||
${'%*s'%(4*level,'')}<text>${re.sub(r'\s+', ' ', np.text)}</text>
|
||||
${'%*s'%(4*level,'')}</navLabel>
|
||||
${'%*s'%(4*level,'')}<content src="${unicode(np.href)+(('#' + unicode(np.fragment)) if np.fragment else '')}" />
|
||||
${'%*s'%(4*level,'')}<calibre:meta py:if="np.author" name="author">${np.author}</calibre:meta>
|
||||
${'%*s'%(4*level,'')}<calibre:meta py:if="np.description" name="description">${np.description}</calibre:meta>
|
||||
<py:for each="np2 in np">${navpoint(np2, level+1)}</py:for>
|
||||
${'%*s'%(4*level,'')}</navPoint>
|
||||
</py:def>
|
||||
|
@ -19,6 +19,7 @@
|
||||
<meta py:if="mi.series_index is not None" name="calibre:series_index" content="${mi.format_series_index()}"/>
|
||||
<meta py:if="mi.rating is not None" name="calibre:rating" content="${mi.rating}"/>
|
||||
<meta py:if="mi.timestamp is not None" name="calibre:timestamp" content="${mi.timestamp.isoformat()}"/>
|
||||
<meta py:if="mi.publication_type is not None" name="calibre:publication_type" content="${mi.publication_type}" />
|
||||
<py:for each="tag in mi.tags">
|
||||
<dc:subject py:if="mi.tags is not None">${tag}</dc:subject>
|
||||
</py:for>
|
||||
|
@ -440,10 +440,12 @@ class OPF(object):
|
||||
language = MetadataField('language')
|
||||
comments = MetadataField('description')
|
||||
category = MetadataField('category')
|
||||
rights = MetadataField('rights')
|
||||
series = MetadataField('series', is_dc=False)
|
||||
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
||||
rating = MetadataField('rating', is_dc=False, formatter=int)
|
||||
pubdate = MetadataField('date', formatter=parser.parse)
|
||||
publication_type = MetadataField('publication_type', is_dc=False)
|
||||
timestamp = MetadataField('timestamp', is_dc=False, formatter=parser.parse)
|
||||
|
||||
|
||||
|
@ -21,7 +21,8 @@ class NCXSoup(BeautifulStoneSoup):
|
||||
class TOC(list):
|
||||
|
||||
def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
|
||||
base_path=os.getcwd(), type='unknown'):
|
||||
base_path=os.getcwd(), type='unknown', author=None,
|
||||
description=None):
|
||||
self.href = href
|
||||
self.fragment = fragment
|
||||
if not self.fragment:
|
||||
@ -31,6 +32,8 @@ class TOC(list):
|
||||
self.base_path = base_path
|
||||
self.play_order = play_order
|
||||
self.type = type
|
||||
self.author = author
|
||||
self.description = description
|
||||
|
||||
def __str__(self):
|
||||
lines = ['TOC: %s#%s'%(self.href, self.fragment)]
|
||||
@ -59,11 +62,13 @@ class TOC(list):
|
||||
list.remove(self, entry)
|
||||
entry.parent = None
|
||||
|
||||
def add_item(self, href, fragment, text, play_order=None, type='unknown'):
|
||||
def add_item(self, href, fragment, text, play_order=None, type='unknown',
|
||||
author=None, description=None):
|
||||
if play_order is None:
|
||||
play_order = (self[-1].play_order if len(self) else self.play_order) + 1
|
||||
self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
|
||||
base_path=self.base_path, play_order=play_order, type=type))
|
||||
base_path=self.base_path, play_order=play_order,
|
||||
type=type, author=author, description=description))
|
||||
return self[-1]
|
||||
|
||||
def top_level_items(self):
|
||||
|
@ -6,6 +6,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
@ -31,16 +32,80 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
OptionRecommendation(name='toc_title', recommended_value=None,
|
||||
help=_('Title for any generated in-line table of contents.')
|
||||
),
|
||||
OptionRecommendation(name='mobi_periodical',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Generate a periodical rather than a book.')
|
||||
),
|
||||
OptionRecommendation(name='dont_compress',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Disable compression of the file contents.')
|
||||
),
|
||||
])
|
||||
|
||||
def check_for_periodical(self):
|
||||
if self.oeb.metadata.publication_type and \
|
||||
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'):
|
||||
self.periodicalize_toc()
|
||||
self.check_for_masthead()
|
||||
self.opts.mobi_periodical = True
|
||||
else:
|
||||
self.opts.mobi_periodical = False
|
||||
|
||||
def check_for_masthead(self):
|
||||
found = False
|
||||
for typ in self.oeb.guide:
|
||||
if type == 'masthead':
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
self.oeb.log.debug('No masthead found, generating default one...')
|
||||
from calibre.resources import server_resources
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
PILImage
|
||||
except ImportError:
|
||||
import Image as PILImage
|
||||
|
||||
raw = StringIO(server_resources['calibre.png'])
|
||||
im = PILImage.open(raw)
|
||||
of = StringIO()
|
||||
im.save(of, 'GIF')
|
||||
raw = of.getvalue()
|
||||
id, href = self.oeb.manifest.generate('masthead', 'masthead')
|
||||
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
|
||||
self.oeb.guide.add('masthead', 'Masthead Image', href)
|
||||
|
||||
|
||||
def periodicalize_toc(self):
|
||||
from calibre.ebooks.oeb.base import TOC
|
||||
toc = self.oeb.toc
|
||||
if toc and toc[0].klass != 'periodical':
|
||||
self.log('Converting TOC for MOBI periodical indexing...')
|
||||
articles = {}
|
||||
if toc.depth < 3:
|
||||
sections = [TOC(klass='section')]
|
||||
for x in toc:
|
||||
sections[0].append(x)
|
||||
else:
|
||||
sections = list(toc)
|
||||
for x in sections:
|
||||
x.klass = 'section'
|
||||
for sec in sections:
|
||||
articles[id(sec)] = []
|
||||
for a in list(sec):
|
||||
a.klass = 'article'
|
||||
articles[id(sec)].append(a)
|
||||
sec.nodes.remove(a)
|
||||
root = TOC(klass='periodical',
|
||||
title=unicode(self.oeb.metadata.title[0]))
|
||||
for s in sections:
|
||||
if articles[id(s)]:
|
||||
for a in articles[id(s)]:
|
||||
s.nodes.append(a)
|
||||
root.nodes.append(s)
|
||||
|
||||
for x in list(toc.nodes):
|
||||
toc.nodes.remove(x)
|
||||
|
||||
toc.nodes.append(root)
|
||||
|
||||
|
||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||
self.log, self.opts, self.oeb = log, opts, oeb
|
||||
from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, \
|
||||
@ -60,6 +125,7 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
rasterizer(oeb, opts)
|
||||
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
|
||||
mobimlizer(oeb, opts)
|
||||
self.check_for_periodical()
|
||||
write_page_breaks_after_item = not input_plugin is plugin_for_input_format('cbz')
|
||||
writer = MobiWriter(opts, imagemax=imagemax,
|
||||
compression=UNCOMPRESSED if opts.dont_compress else PALMDOC,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -47,7 +47,7 @@ XPNSMAP = {'h' : XHTML_NS, 'o1' : OPF1_NS, 'o2' : OPF2_NS,
|
||||
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
|
||||
'xsi': XSI_NS, 'dt' : DCTERMS_NS, 'ncx': NCX_NS,
|
||||
'svg': SVG_NS, 'xl' : XLINK_NS, 're': RE_NS,
|
||||
'mbp': MBP_NS }
|
||||
'mbp': MBP_NS, 'calibre': CALIBRE_NS }
|
||||
|
||||
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
|
||||
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
|
||||
@ -455,7 +455,8 @@ class Metadata(object):
|
||||
'description', 'format', 'identifier', 'language',
|
||||
'publisher', 'relation', 'rights', 'source',
|
||||
'subject', 'title', 'type'])
|
||||
CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp'])
|
||||
CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp',
|
||||
'publication_type'])
|
||||
OPF_ATTRS = {'role': OPF('role'), 'file-as': OPF('file-as'),
|
||||
'scheme': OPF('scheme'), 'event': OPF('event'),
|
||||
'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'}
|
||||
|
@ -192,6 +192,7 @@ class OEBReader(object):
|
||||
if not scheme and href not in known:
|
||||
new.add(href)
|
||||
unchecked.clear()
|
||||
warned = set([])
|
||||
for href in new:
|
||||
known.add(href)
|
||||
is_invalid = False
|
||||
@ -202,9 +203,13 @@ class OEBReader(object):
|
||||
if is_invalid:
|
||||
continue
|
||||
if not self.oeb.container.exists(href):
|
||||
self.logger.warn('Referenced file %r not found' % href)
|
||||
if href not in warned:
|
||||
self.logger.warn('Referenced file %r not found' % href)
|
||||
warned.add(href)
|
||||
continue
|
||||
self.logger.warn('Referenced file %r not in manifest' % href)
|
||||
if href not in warned:
|
||||
self.logger.warn('Referenced file %r not in manifest' % href)
|
||||
warned.add(href)
|
||||
id, _ = manifest.generate(id='added')
|
||||
guessed = guess_type(href)[0]
|
||||
media_type = guessed or BINARY_MIME
|
||||
@ -330,14 +335,14 @@ class OEBReader(object):
|
||||
po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
|
||||
|
||||
authorElement = xpath(child,
|
||||
'descendant::mbp:meta[@name = "author"]')
|
||||
'descendant::calibre:meta[@name = "author"]')
|
||||
if authorElement :
|
||||
author = authorElement[0].text
|
||||
else :
|
||||
author = None
|
||||
|
||||
descriptionElement = xpath(child,
|
||||
'descendant::mbp:meta[@name = "description"]')
|
||||
'descendant::calibre:meta[@name = "description"]')
|
||||
if descriptionElement :
|
||||
description = descriptionElement[0].text
|
||||
else :
|
||||
|
@ -63,11 +63,16 @@ def meta_info_to_oeb_metadata(mi, m, log):
|
||||
if mi.timestamp is not None:
|
||||
m.clear('timestamp')
|
||||
m.add('timestamp', mi.timestamp.isoformat())
|
||||
if mi.rights is not None:
|
||||
m.clear('rights')
|
||||
m.add('rights', mi.rights)
|
||||
if mi.publication_type is not None:
|
||||
m.clear('publication_type')
|
||||
m.add('publication_type', mi.publication_type)
|
||||
if not m.timestamp:
|
||||
m.add('timestamp', datetime.utcnow().isoformat())
|
||||
|
||||
|
||||
|
||||
class MergeMetadata(object):
|
||||
'Merge in user metadata, including cover'
|
||||
|
||||
|
@ -13,6 +13,7 @@ from urlparse import urlparse
|
||||
|
||||
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
|
||||
from calibre.ebooks import ConversionError
|
||||
|
||||
def XPath(x):
|
||||
try:
|
||||
return etree.XPath(x, namespaces=XPNSMAP)
|
||||
|
@ -64,8 +64,5 @@ class ManifestTrimmer(object):
|
||||
unchecked = new
|
||||
for item in oeb.manifest.values():
|
||||
if item not in used:
|
||||
if getattr(self.opts, 'mobi_periodical', False) and \
|
||||
item.href == 'images/mastheadImage.gif':
|
||||
continue
|
||||
oeb.logger.info('Trimming %r from manifest' % item.href)
|
||||
oeb.manifest.remove(item)
|
||||
|
@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form):
|
||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||
Widget.__init__(self, parent, 'mobi_output',
|
||||
['prefer_author_sort', 'rescale_images', 'toc_title',
|
||||
'dont_compress', 'mobi_periodical']
|
||||
'dont_compress',]
|
||||
)
|
||||
self.db, self.book_id = db, book_id
|
||||
self.initialize_options(get_option, get_help, db, book_id)
|
||||
|
@ -41,7 +41,7 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="6" column="0">
|
||||
<item row="5" column="0">
|
||||
<spacer name="verticalSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
@ -61,13 +61,6 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0">
|
||||
<widget class="QCheckBox" name="opt_mobi_periodical">
|
||||
<property name="text">
|
||||
<string>Generate a periodical rather than a book</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="0">
|
||||
<widget class="QCheckBox" name="opt_no_inline_toc">
|
||||
<property name="text">
|
||||
|
@ -16,6 +16,7 @@ from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, QString, \
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.pyparsing import ParseException
|
||||
from calibre.library.database2 import FIELD_MAP
|
||||
from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
|
||||
error_dialog
|
||||
@ -267,7 +268,11 @@ class BooksModel(QAbstractTableModel):
|
||||
self.count_changed()
|
||||
|
||||
def search(self, text, refinement, reset=True):
|
||||
self.db.search(text)
|
||||
try:
|
||||
self.db.search(text)
|
||||
except ParseException:
|
||||
self.emit(SIGNAL('parse_exception()'))
|
||||
return
|
||||
self.last_search = text
|
||||
if reset:
|
||||
self.clear_caches()
|
||||
@ -898,7 +903,12 @@ class DeviceBooksModel(BooksModel):
|
||||
if not text or not text.strip():
|
||||
self.map = list(range(len(self.db)))
|
||||
else:
|
||||
matches = self.search_engine.parse(text)
|
||||
try:
|
||||
matches = self.search_engine.parse(text)
|
||||
except ParseException:
|
||||
self.emit(SIGNAL('parse_exception()'))
|
||||
return
|
||||
|
||||
self.map = []
|
||||
for i in range(len(self.db)):
|
||||
if i in matches:
|
||||
|
@ -221,9 +221,20 @@ class LibraryServer(object):
|
||||
|
||||
def get_format(self, id, format):
|
||||
format = format.upper()
|
||||
fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb')
|
||||
fmt = self.db.format(id, format, index_is_id=True, as_file=True,
|
||||
mode='r+b')
|
||||
if fmt is None:
|
||||
raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
|
||||
if format == 'EPUB':
|
||||
from tempfile import TemporaryFile
|
||||
from calibre.ebooks.metadata.meta import set_metadata
|
||||
raw = fmt.read()
|
||||
fmt = TemporaryFile()
|
||||
fmt.write(raw)
|
||||
fmt.seek(0)
|
||||
set_metadata(fmt, self.db.get_metadata(id, index_is_id=True),
|
||||
'epub')
|
||||
fmt.seek(0)
|
||||
mt = guess_type('dummy.'+format.lower())[0]
|
||||
if mt is None:
|
||||
mt = 'application/octet-stream'
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -64,6 +64,10 @@ class BasicNewsRecipe(Recipe):
|
||||
#: Delay between consecutive downloads in seconds
|
||||
delay = 0
|
||||
|
||||
#: Publication type
|
||||
#: Set to newspaper, magazine or blog
|
||||
publication_type = 'unknown'
|
||||
|
||||
#: Number of simultaneous downloads. Set to 1 if the server is picky.
|
||||
#: Automatically reduced to 1 if :attr:`BasicNewsRecipe.delay` > 0
|
||||
simultaneous_downloads = 5
|
||||
@ -848,6 +852,7 @@ class BasicNewsRecipe(Recipe):
|
||||
mi = MetaInformation(self.title + strftime(self.timefmt), [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
mi.publication_type = 'periodical:'+self.publication_type
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
@ -878,13 +883,16 @@ class BasicNewsRecipe(Recipe):
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po)
|
||||
play_order=po, description=desc)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
@ -915,7 +923,11 @@ class BasicNewsRecipe(Recipe):
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None, f.title, play_order=po))
|
||||
desc = f.description
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc))
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
|
@ -7,9 +7,9 @@ clarin.com
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Clarin(BasicNewsRecipe):
|
||||
title = 'Clarin'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -22,14 +22,19 @@ class Clarin(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
||||
remove_javascript = True
|
||||
|
||||
encoding = 'cp1252'
|
||||
language = _('Spanish')
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='a' , attrs={'class':'Imp' })
|
||||
@ -48,17 +53,20 @@ class Clarin(BasicNewsRecipe):
|
||||
,(u'Deportes' , u'http://www.clarin.com/diario/hoy/deportes.xml' )
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
artl = article.get('link', None)
|
||||
rest = artl.partition('-0')[-1]
|
||||
def print_version(self, url):
|
||||
rest = url.partition('-0')[-1]
|
||||
lmain = rest.partition('.')[0]
|
||||
return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
|
||||
lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
|
||||
return lurl
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -7,94 +7,108 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
# http://online.wsj.com/page/us_in_todays_paper.html
|
||||
|
||||
class WallStreetJournal(BasicNewsRecipe):
|
||||
|
||||
title = 'The Wall Street Journal'
|
||||
__author__ = 'Kovid Goyal'
|
||||
class WallStreetJournal(BasicNewsRecipe):
|
||||
|
||||
title = 'The Wall Street Journal'
|
||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||
description = 'News and current affairs.'
|
||||
needs_subscription = True
|
||||
language = _('English')
|
||||
max_articles_per_feed = 10
|
||||
timefmt = ' [%a, %b %d, %Y]'
|
||||
timefmt = ' [%a, %b %d, %Y]'
|
||||
no_stylesheets = True
|
||||
|
||||
extra_css = '''h1{color:#093D72 ; font-size:large ; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; }
|
||||
h2{color:gray; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
|
||||
.subhead{color:gray; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
|
||||
.insettipUnit {color:#666666; font-family:Arial,Sans-serif;font-size:xx-small }
|
||||
.targetCaption{ font-size:x-small; color:#333333; font-family:Arial,Helvetica,sans-serif}
|
||||
.article{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
|
||||
.tagline {color:#333333; font-size:xx-small}
|
||||
.dateStamp {color:#666666; font-family:Arial,Helvetica,sans-serif}
|
||||
h3{color:blue ;font-family:Arial,Helvetica,sans-serif; font-size:xx-small}
|
||||
.byline{color:blue;font-family:Arial,Helvetica,sans-serif; font-size:xx-small}
|
||||
h6{color:#333333; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small;font-style:italic; }
|
||||
.paperLocation{color:#666666; font-size:xx-small}'''
|
||||
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags = [
|
||||
dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive"]),
|
||||
{'class':['more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
|
||||
dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive","articleTabs_tab_video","articleTabs_tab_map","articleTabs_tab_slideshow"]),
|
||||
{'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map','insettip','more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
|
||||
dict(rel='shortcut icon'),
|
||||
]
|
||||
remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://commerce.wsj.com/auth/login')
|
||||
br.select_form(nr=0)
|
||||
br['user'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://commerce.wsj.com/auth/login')
|
||||
br.select_form(nr=0)
|
||||
br['user'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
||||
tag.name = 'div'
|
||||
return soup
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
try:
|
||||
return article.feedburner_origlink.split('?')[0]
|
||||
except AttributeError:
|
||||
return article.link.split('?')[0]
|
||||
|
||||
def cleanup(self):
|
||||
self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')
|
||||
|
||||
feeds = [
|
||||
#('Most Emailed - Day', 'http://online.wsj.com/xml/rss/3_7030.xml'),
|
||||
#('Most Emailed - Week', 'http://online.wsj.com/xml/rss/3_7253.xml'),
|
||||
#('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'),
|
||||
(' Most Viewed - Day', 'http://online.wsj.com/xml/rss/3_7198.xml'),
|
||||
(' Most Viewed - Week', 'http://online.wsj.com/xml/rss/3_7251.xml'),
|
||||
# ('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'),
|
||||
('Today\'s Newspaper - Page One', 'http://online.wsj.com/xml/rss/3_7205.xml'),
|
||||
('Today\'s Newspaper - Marketplace', 'http://online.wsj.com/xml/rss/3_7206.xml'),
|
||||
('Today\'s Newspaper - Money & Investing', 'http://online.wsj.com/xml/rss/3_7207.xml'),
|
||||
('Today\'s Newspaper - Personal Journal', 'http://online.wsj.com/xml/rss/3_7208.xml'),
|
||||
('Today\'s Newspaper - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7209.xml'),
|
||||
('Opinion', 'http://online.wsj.com/xml/rss/3_7041.xml'),
|
||||
('News - U.S.: What\'s News', 'http://online.wsj.com/xml/rss/3_7011.xml'),
|
||||
('News - U.S. Business', 'http://online.wsj.com/xml/rss/3_7014.xml'),
|
||||
('News - Europe: What\'s News', 'http://online.wsj.com/xml/rss/3_7012.xml'),
|
||||
('News - Asia: What\'s News', 'http://online.wsj.com/xml/rss/3_7013.xml'),
|
||||
('News - World News', 'http://online.wsj.com/xml/rss/3_7085.xml'),
|
||||
('News - Economy', 'http://online.wsj.com/xml/rss/3_7086.xml'),
|
||||
('News - Earnings', 'http://online.wsj.com/xml/rss/3_7088.xml'),
|
||||
('News - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
|
||||
('News - Law', 'http://online.wsj.com/xml/rss/3_7091.xml'),
|
||||
('News - Media & Marketing', 'http://online.wsj.com/xml/rss/3_7020.xml'),
|
||||
('Technology - What\'s News', 'http://online.wsj.com/xml/rss/3_7015.xml'),
|
||||
('Technology - Gadgets', 'http://online.wsj.com/xml/rss/3_7094.xml'),
|
||||
('Technology - Telecommunications', 'http://online.wsj.com/xml/rss/3_7095.xml'),
|
||||
('Technology - E-commerce/Media', 'http://online.wsj.com/xml/rss/3_7096.xml'),
|
||||
('Technology - Asia', 'http://online.wsj.com/xml/rss/3_7097.xml'),
|
||||
('Technology - Europe', 'http://online.wsj.com/xml/rss/3_7098.xml'),
|
||||
('Markets - News', 'http://online.wsj.com/xml/rss/3_7031.xml'),
|
||||
('Markets - Europe News', 'http://online.wsj.com/xml/rss/3_7101.xml'),
|
||||
('Markets - Asia News', 'http://online.wsj.com/xml/rss/3_7102.xml'),
|
||||
('Markets - Deals & Deal Makers', 'http://online.wsj.com/xml/rss/3_7099.xml'),
|
||||
('Markets - Hedge Funds', 'http://online.wsj.com/xml/rss/3_7199.xml'),
|
||||
('Personal Journal', 'http://online.wsj.com/xml/rss/3_7200.xml'),
|
||||
('Personal Journal - Money', 'http://online.wsj.com/xml/rss/3_7104.xml'),
|
||||
('Personal Journal - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
|
||||
('Personal Journal - Autos', 'http://online.wsj.com/xml/rss/3_7092.xml'),
|
||||
('Personal Journal - Homes', 'http://online.wsj.com/xml/rss/3_7105.xml'),
|
||||
('Personal Journal - Travel', 'http://online.wsj.com/xml/rss/3_7106.xml'),
|
||||
('Personal Journal - Careers', 'http://online.wsj.com/xml/rss/3_7107.xml'),
|
||||
('Weekend & Leisure', 'http://online.wsj.com/xml/rss/3_7201.xml'),
|
||||
('Weekend & Leisure - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7202.xml'),
|
||||
('Weekend & Leisure - Arts & Entertainment', 'http://online.wsj.com/xml/rss/3_7177.xml'),
|
||||
('Weekend & Leisure - Books', 'http://online.wsj.com/xml/rss/3_7203.xml'),
|
||||
('Weekend & Leisure - Sports', 'http://online.wsj.com/xml/rss/3_7204.xml'),
|
||||
|
||||
def cleanup(self):
|
||||
self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')
|
||||
|
||||
feeds = [
|
||||
#('Most Emailed - Day', 'http://online.wsj.com/xml/rss/3_7030.xml'),
|
||||
#('Most Emailed - Week', 'http://online.wsj.com/xml/rss/3_7253.xml'),
|
||||
#('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'),
|
||||
(' Most Viewed - Day', 'http://online.wsj.com/xml/rss/3_7198.xml'),
|
||||
(' Most Viewed - Week', 'http://online.wsj.com/xml/rss/3_7251.xml'),
|
||||
# ('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'),
|
||||
('Today\'s Newspaper - Page One', 'http://online.wsj.com/xml/rss/3_7205.xml'),
|
||||
('Today\'s Newspaper - Marketplace', 'http://online.wsj.com/xml/rss/3_7206.xml'),
|
||||
('Today\'s Newspaper - Money & Investing', 'http://online.wsj.com/xml/rss/3_7207.xml'),
|
||||
('Today\'s Newspaper - Personal Journal', 'http://online.wsj.com/xml/rss/3_7208.xml'),
|
||||
('Today\'s Newspaper - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7209.xml'),
|
||||
('Opinion', 'http://online.wsj.com/xml/rss/3_7041.xml'),
|
||||
('News - U.S.: What\'s News', 'http://online.wsj.com/xml/rss/3_7011.xml'),
|
||||
('News - U.S. Business', 'http://online.wsj.com/xml/rss/3_7014.xml'),
|
||||
('News - Europe: What\'s News', 'http://online.wsj.com/xml/rss/3_7012.xml'),
|
||||
('News - Asia: What\'s News', 'http://online.wsj.com/xml/rss/3_7013.xml'),
|
||||
('News - World News', 'http://online.wsj.com/xml/rss/3_7085.xml'),
|
||||
('News - Economy', 'http://online.wsj.com/xml/rss/3_7086.xml'),
|
||||
('News - Earnings', 'http://online.wsj.com/xml/rss/3_7088.xml'),
|
||||
('News - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
|
||||
('News - Law', 'http://online.wsj.com/xml/rss/3_7091.xml'),
|
||||
('News - Media & Marketing', 'http://online.wsj.com/xml/rss/3_7020.xml'),
|
||||
('Technology - What\'s News', 'http://online.wsj.com/xml/rss/3_7015.xml'),
|
||||
('Technology - Gadgets', 'http://online.wsj.com/xml/rss/3_7094.xml'),
|
||||
('Technology - Telecommunications', 'http://online.wsj.com/xml/rss/3_7095.xml'),
|
||||
('Technology - E-commerce/Media', 'http://online.wsj.com/xml/rss/3_7096.xml'),
|
||||
('Technology - Asia', 'http://online.wsj.com/xml/rss/3_7097.xml'),
|
||||
('Technology - Europe', 'http://online.wsj.com/xml/rss/3_7098.xml'),
|
||||
('Markets - News', 'http://online.wsj.com/xml/rss/3_7031.xml'),
|
||||
('Markets - Europe News', 'http://online.wsj.com/xml/rss/3_7101.xml'),
|
||||
('Markets - Asia News', 'http://online.wsj.com/xml/rss/3_7102.xml'),
|
||||
('Markets - Deals & Deal Makers', 'http://online.wsj.com/xml/rss/3_7099.xml'),
|
||||
('Markets - Hedge Funds', 'http://online.wsj.com/xml/rss/3_7199.xml'),
|
||||
('Personal Journal', 'http://online.wsj.com/xml/rss/3_7200.xml'),
|
||||
('Personal Journal - Money', 'http://online.wsj.com/xml/rss/3_7104.xml'),
|
||||
('Personal Journal - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
|
||||
('Personal Journal - Autos', 'http://online.wsj.com/xml/rss/3_7092.xml'),
|
||||
('Personal Journal - Homes', 'http://online.wsj.com/xml/rss/3_7105.xml'),
|
||||
('Personal Journal - Travel', 'http://online.wsj.com/xml/rss/3_7106.xml'),
|
||||
('Personal Journal - Careers', 'http://online.wsj.com/xml/rss/3_7107.xml'),
|
||||
('Weekend & Leisure', 'http://online.wsj.com/xml/rss/3_7201.xml'),
|
||||
('Weekend & Leisure - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7202.xml'),
|
||||
('Weekend & Leisure - Arts & Entertainment', 'http://online.wsj.com/xml/rss/3_7177.xml'),
|
||||
('Weekend & Leisure - Books', 'http://online.wsj.com/xml/rss/3_7203.xml'),
|
||||
('Weekend & Leisure - Sports', 'http://online.wsj.com/xml/rss/3_7204.xml'),
|
||||
]
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user