mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to pluginize
This commit is contained in:
commit
e773e0e13e
@ -571,9 +571,6 @@ Condition 08195201-0797-932C-4B51-E5EF9D1D41BD -active Yes -parent 710F2507-2557
|
||||
Condition 2E18F4AE-F1BB-5C62-2900-73A576A49261 -active Yes -parent 710F2507-2557-652D-EA55-440D710EFDFA -title {String Is Condition} -component StringIsCondition -TreeObject::id 2E18F4AE-F1BB-5C62-2900-73A576A49261
|
||||
InstallComponent 21B897C4-24BE-70D1-58EA-DE78EFA60719 -setup Install -type action -conditions 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5 -title {Message Box} -component MessageBox -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708
|
||||
Condition 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5 -active Yes -parent 21B897C4-24BE-70D1-58EA-DE78EFA60719 -title {String Is Condition} -component StringIsCondition -TreeObject::id 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5
|
||||
InstallComponent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -setup Install -type action -conditions {E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C A8856922-E6C1-160B-E55C-5C1806A89136} -title {Launch Application Checkbutton} -component AddWidget -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708
|
||||
Condition E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C -active Yes -parent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -title {File Exists Condition} -component FileExistsCondition -TreeObject::id E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C
|
||||
Condition A8856922-E6C1-160B-E55C-5C1806A89136 -active Yes -parent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -title {String Is Condition} -component StringIsCondition -TreeObject::id A8856922-E6C1-160B-E55C-5C1806A89136
|
||||
InstallComponent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -setup Install -type action -conditions {96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0 FBA33088-C809-DD6B-D337-EADBF1CEE966} -title {Desktop Shortcut Checkbutton} -component AddWidget -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708
|
||||
Condition 96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0 -active Yes -parent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0
|
||||
Condition FBA33088-C809-DD6B-D337-EADBF1CEE966 -active Yes -parent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -title {String Is Condition} -component StringIsCondition -TreeObject::id FBA33088-C809-DD6B-D337-EADBF1CEE966
|
||||
@ -630,7 +627,7 @@ Condition 03FA7EEF-F626-B69A-09C6-0AA7A54EE9E7 -active Yes -parent E32519F3-A540
|
||||
InstallComponent D86BBA5C-4903-33BA-59F8-4266A3D45896 -setup Install -type action -conditions {C4C0A903-CF2A-D25A-27AB-A64219FB7E70 5EC7056B-6F90-311E-2C6F-76E96164CFFD} -title {Install Quick Launch Shortcut} -component InstallWindowsShortcut -command insert -active Yes -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC
|
||||
Condition C4C0A903-CF2A-D25A-27AB-A64219FB7E70 -active Yes -parent D86BBA5C-4903-33BA-59F8-4266A3D45896 -title {String Is Condition} -component StringIsCondition -TreeObject::id C4C0A903-CF2A-D25A-27AB-A64219FB7E70
|
||||
Condition 5EC7056B-6F90-311E-2C6F-76E96164CFFD -active Yes -parent D86BBA5C-4903-33BA-59F8-4266A3D45896 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 5EC7056B-6F90-311E-2C6F-76E96164CFFD
|
||||
InstallComponent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -setup Install -type action -conditions {4E5FC4FE-5D37-B216-CFFE-E046A2D6321E E560F3A1-208D-2B4F-2C87-E08595F8E1CD 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475} -title {Launch Application} -component ExecuteExternalProgram -command insert -active Yes -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC
|
||||
InstallComponent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -setup Install -type action -conditions {4E5FC4FE-5D37-B216-CFFE-E046A2D6321E E560F3A1-208D-2B4F-2C87-E08595F8E1CD 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475} -title {Launch Application} -component ExecuteExternalProgram -command insert -active No -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC
|
||||
Condition 4E5FC4FE-5D37-B216-CFFE-E046A2D6321E -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {String Is Condition} -component StringIsCondition -TreeObject::id 4E5FC4FE-5D37-B216-CFFE-E046A2D6321E
|
||||
Condition E560F3A1-208D-2B4F-2C87-E08595F8E1CD -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {String Is Condition} -component StringIsCondition -TreeObject::id E560F3A1-208D-2B4F-2C87-E08595F8E1CD
|
||||
Condition 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475 -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475
|
||||
@ -802,6 +799,9 @@ CreateQuickLaunchShortcut
|
||||
28FDA3F4-B799-901F-8A27-AA04F0C022AB,Title,subst
|
||||
1
|
||||
|
||||
2A230259-3A6F-8669-8B8B-23C3E7C1BFC2,Active
|
||||
No
|
||||
|
||||
2A230259-3A6F-8669-8B8B-23C3E7C1BFC2,Conditions
|
||||
{3 conditions}
|
||||
|
||||
@ -976,27 +976,6 @@ disabled
|
||||
5C66451D-6042-DBDE-0D8C-31156EE244AD,Widget
|
||||
{Back Button;Next Button}
|
||||
|
||||
5D20DD8D-064A-9922-29E1-A7FABEF3666A,Background
|
||||
white
|
||||
|
||||
5D20DD8D-064A-9922-29E1-A7FABEF3666A,Conditions
|
||||
{2 conditions}
|
||||
|
||||
5D20DD8D-064A-9922-29E1-A7FABEF3666A,Text,subst
|
||||
1
|
||||
|
||||
5D20DD8D-064A-9922-29E1-A7FABEF3666A,Type
|
||||
checkbutton
|
||||
|
||||
5D20DD8D-064A-9922-29E1-A7FABEF3666A,VirtualText
|
||||
LaunchApplication
|
||||
|
||||
5D20DD8D-064A-9922-29E1-A7FABEF3666A,X
|
||||
185
|
||||
|
||||
5D20DD8D-064A-9922-29E1-A7FABEF3666A,Y
|
||||
130
|
||||
|
||||
5EC7056B-6F90-311E-2C6F-76E96164CFFD,CheckCondition
|
||||
{Before Action is Executed}
|
||||
|
||||
@ -1408,15 +1387,6 @@ disabled
|
||||
A75C97CC-01AC-C12A-D663-A54E3257F11B,Widget
|
||||
{Back Button;Next Button}
|
||||
|
||||
A8856922-E6C1-160B-E55C-5C1806A89136,CheckCondition
|
||||
{Before Action is Executed}
|
||||
|
||||
A8856922-E6C1-160B-E55C-5C1806A89136,Operator
|
||||
false
|
||||
|
||||
A8856922-E6C1-160B-E55C-5C1806A89136,String
|
||||
<%InstallStopped%>
|
||||
|
||||
AAEC34E6-7F02-18F2-30BB-744738192A3B,Conditions
|
||||
{2 conditions}
|
||||
|
||||
@ -1730,12 +1700,6 @@ disabled
|
||||
E5CBB018-A89D-3145-CFF5-CFC3B62BEA97,Widget
|
||||
{NextButton; CancelButton}
|
||||
|
||||
E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C,CheckCondition
|
||||
{Before Action is Executed}
|
||||
|
||||
E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C,Filename
|
||||
<%ProgramExecutable%>
|
||||
|
||||
E611105F-DC85-9E20-4F7B-E63C54E5DF06,Message,subst
|
||||
1
|
||||
|
||||
@ -2340,9 +2304,6 @@ Please make sure that calibre is not running, as this will cause the install to
|
||||
48E8A9D6-B57E-C506-680D-898C65DD2A1B,Title
|
||||
<%InstallApplicationText%>
|
||||
|
||||
5D20DD8D-064A-9922-29E1-A7FABEF3666A,Text
|
||||
<%LaunchApplicationText%>
|
||||
|
||||
64B8D0F3-4B11-DA22-D6E7-7248872D5FA7,Message
|
||||
<%UninstallStartupText%>
|
||||
|
||||
@ -2356,7 +2317,7 @@ Please make sure that calibre is not running, as this will cause the install to
|
||||
{<%AppName%> Installation complete}
|
||||
|
||||
8A7FD0C2-F053-8764-F204-4BAE71E05708,Message
|
||||
{Installation of <%AppName%> was successful. Click Finish to quit the installer.}
|
||||
{Installation of <%AppName%> was successful. Click Finish to quit the installer. <%AppName%> can be launched from the start menu.}
|
||||
|
||||
940F7FED-7D20-7264-3BF9-ED78205A76B3,Text
|
||||
<%CreateDesktopShortcutText%>
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.5.6'
|
||||
__version__ = '0.5.7'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
'''
|
||||
Various run time constants.
|
||||
|
@ -143,7 +143,7 @@ class OutputProfile(Plugin):
|
||||
|
||||
# ADE dies an agonizing, long drawn out death if HTML files have more
|
||||
# bytes than this.
|
||||
flow_size = sys.maxint
|
||||
flow_size = -1
|
||||
# ADE runs screaming when it sees these characters
|
||||
remove_special_chars = re.compile(u'[\u200b\u00ad]')
|
||||
# ADE falls to the ground in a dead faint when it sees an <object>
|
||||
|
@ -2,7 +2,7 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os, shutil, traceback, functools, sys
|
||||
import os, shutil, traceback, functools, sys, re
|
||||
|
||||
from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
|
||||
MetadataWriterPlugin
|
||||
@ -55,7 +55,14 @@ def load_plugin(path_to_zip_file):
|
||||
for name in zf.namelist():
|
||||
if name.lower().endswith('plugin.py'):
|
||||
locals = {}
|
||||
exec zf.read(name) in locals
|
||||
raw = zf.read(name)
|
||||
match = re.search(r'coding[:=]\s*([-\w.]+)', raw[:300])
|
||||
encoding = 'utf-8'
|
||||
if match is not None:
|
||||
encoding = match.group(1)
|
||||
raw = raw.decode(encoding)
|
||||
raw = re.sub('\r\n', '\n', raw)
|
||||
exec raw in locals
|
||||
for x in locals.values():
|
||||
if isinstance(x, type) and issubclass(x, Plugin):
|
||||
if x.minimum_calibre_version > version or \
|
||||
|
@ -31,6 +31,11 @@ Run an embedded python interpreter.
|
||||
parser.add_option('--migrate', action='store_true', default=False,
|
||||
help='Migrate old database. Needs two arguments. Path '
|
||||
'to library1.db and path to new library folder.')
|
||||
parser.add_option('--add-simple-plugin', default=None,
|
||||
help='Add a simple plugin (i.e. a plugin that consists of only a '
|
||||
'.py file), by specifying the path to the py file containing the '
|
||||
'plugin code.')
|
||||
|
||||
return parser
|
||||
|
||||
def update_zipfile(zipfile, mod, path):
|
||||
@ -115,6 +120,22 @@ def debug_device_driver():
|
||||
print 'Total space:', d.total_space()
|
||||
break
|
||||
|
||||
def add_simple_plugin(path_to_plugin):
|
||||
import tempfile, zipfile, shutil
|
||||
tdir = tempfile.mkdtemp()
|
||||
open(os.path.join(tdir, 'custom_plugin.py'),
|
||||
'wb').write(open(path_to_plugin, 'rb').read())
|
||||
odir = os.getcwd()
|
||||
os.chdir(tdir)
|
||||
zf = zipfile.ZipFile('plugin.zip', 'w')
|
||||
zf.write('custom_plugin.py')
|
||||
zf.close()
|
||||
from calibre.customize.ui import main
|
||||
main(['calibre-customize', '-a', 'plugin.zip'])
|
||||
os.chdir(odir)
|
||||
shutil.rmtree(tdir)
|
||||
|
||||
|
||||
|
||||
def main(args=sys.argv):
|
||||
opts, args = option_parser().parse_args(args)
|
||||
@ -137,6 +158,8 @@ def main(args=sys.argv):
|
||||
print 'You must specify the path to library1.db and the path to the new library folder'
|
||||
return 1
|
||||
migrate(args[1], args[2])
|
||||
elif opts.add_simple_plugin is not None:
|
||||
add_simple_plugin(opts.add_simple_plugin)
|
||||
else:
|
||||
from IPython.Shell import IPShellEmbed
|
||||
ipshell = IPShellEmbed()
|
||||
|
@ -209,7 +209,7 @@ class Device(_Device):
|
||||
time.sleep(6)
|
||||
drives = {}
|
||||
wmi = __import__('wmi', globals(), locals(), [], -1)
|
||||
c = wmi.WMI()
|
||||
c = wmi.WMI(find_classes=False)
|
||||
for drive in c.Win32_DiskDrive():
|
||||
if self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_MAIN_MEM):
|
||||
drives['main'] = self.windows_get_drive_prefix(drive)
|
||||
|
@ -94,7 +94,8 @@ OptionRecommendation(name='font_size_mapping',
|
||||
OptionRecommendation(name='line_height',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('The line height in pts. Controls spacing between consecutive '
|
||||
'lines of text. By default ??'
|
||||
'lines of text. By default no line height manipulation is '
|
||||
'performed.'
|
||||
)
|
||||
),
|
||||
|
||||
@ -102,12 +103,25 @@ OptionRecommendation(name='linearize_tables',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Some badly designed documents use tables to control the '
|
||||
'layout of text on the page. When converted these documents '
|
||||
'often have text that runs of the page and other artifacts. '
|
||||
'often have text that runs off the page and other artifacts. '
|
||||
'This option will extract the content from the tables and '
|
||||
'present it in a linear fashion.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='dont_split_on_page_breaks',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Turn off splitting at page breaks. Normally, input '
|
||||
'files are automatically split at every page break into '
|
||||
'two files. This gives an output ebook that can be '
|
||||
'parsed faster and with less resources. However, '
|
||||
'splitting is slow and if your source file contains a '
|
||||
'very large number of page breaks, you should turn off '
|
||||
'splitting on page breaks.'
|
||||
)
|
||||
),
|
||||
|
||||
|
||||
OptionRecommendation(name='read_metadata_from_opf',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
short_switch='m',
|
||||
@ -330,6 +344,17 @@ OptionRecommendation(name='language',
|
||||
untable=self.opts.linearize_tables)
|
||||
flattener(self.oeb, self.opts)
|
||||
|
||||
if self.opts.linearize_tables:
|
||||
from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
|
||||
LinearizeTables()(self.oeb, self.opts)
|
||||
|
||||
from calibre.ebooks.oeb.transforms.split import Split
|
||||
pbx = accelerators.get('pagebreaks', None)
|
||||
split = Split(not self.opts.dont_split_on_page_breaks,
|
||||
max_flow_size=self.opts.output_profile.flow_size,
|
||||
page_breaks_xpath=pbx)
|
||||
split(self.oeb, self.opts)
|
||||
|
||||
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
|
||||
|
||||
self.log.info('Cleaning up manifest...')
|
||||
|
@ -33,7 +33,7 @@ def get_metadata(stream):
|
||||
covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True)
|
||||
idx = 0
|
||||
if len(covers) > 1:
|
||||
if covers[1][1] == covers[1][0]+'-standard':
|
||||
if covers[1][1] == covers[0][1]+'-standard':
|
||||
idx = 1
|
||||
mi.cover_data = ('jpg', covers[idx][0])
|
||||
return mi
|
||||
|
@ -108,7 +108,7 @@ def set_metadata(stream, mi, stream_type='lrf'):
|
||||
|
||||
|
||||
def metadata_from_filename(name, pat=None):
|
||||
name = os.path.splitext(name)[0]
|
||||
name = name.rpartition('.')[0]
|
||||
mi = MetaInformation(None, None)
|
||||
if pat is None:
|
||||
pat = re.compile(prefs.get('filename_pattern'))
|
||||
|
@ -1,9 +1,8 @@
|
||||
'''Read meta information from PDF files'''
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''Read meta information from PDF files'''
|
||||
|
||||
import sys, os, StringIO
|
||||
|
||||
@ -52,23 +51,18 @@ def get_metadata(stream, extract_cover=True):
|
||||
|
||||
def set_metadata(stream, mi):
|
||||
stream.seek(0)
|
||||
|
||||
# Use a StringIO object for the pdf because we will want to over
|
||||
# write it later and if we are working on the stream directly it
|
||||
# could cause some issues.
|
||||
raw = StringIO.StringIO(stream.read())
|
||||
orig_pdf = PdfFileReader(raw)
|
||||
|
||||
title = mi.title if mi.title else orig_pdf.documentInfo.title
|
||||
author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
|
||||
|
||||
out_pdf = PdfFileWriter(title=title, author=author)
|
||||
for page in orig_pdf.pages:
|
||||
out_pdf.addPage(page)
|
||||
|
||||
out_str = StringIO.StringIO()
|
||||
out_pdf.write(out_str)
|
||||
|
||||
stream.seek(0)
|
||||
stream.truncate()
|
||||
out_str.seek(0)
|
||||
|
@ -29,5 +29,5 @@ class MOBIInput(InputFormatPlugin):
|
||||
with open(f, 'wb') as q:
|
||||
q.write(html.tostring(root, encoding='utf-8', method='xml',
|
||||
include_meta_content_type=False))
|
||||
accelerators['pagebreaks'] = {f: '//*[@class="mbp_pagebreak"]'}
|
||||
accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]'
|
||||
return mr.created_opf_path
|
||||
|
@ -160,10 +160,8 @@ class BookHeader(object):
|
||||
class MetadataHeader(BookHeader):
|
||||
def __init__(self, stream, log):
|
||||
self.stream = stream
|
||||
|
||||
self.ident = self.identity()
|
||||
self.num_sections = self.section_count()
|
||||
|
||||
if self.num_sections >= 2:
|
||||
header = self.header()
|
||||
BookHeader.__init__(self, header, self.ident, None, log)
|
||||
@ -173,7 +171,6 @@ class MetadataHeader(BookHeader):
|
||||
def identity(self):
|
||||
self.stream.seek(60)
|
||||
ident = self.stream.read(8).upper()
|
||||
|
||||
if ident not in ['BOOKMOBI', 'TEXTREAD']:
|
||||
raise MobiError('Unknown book type: %s' % ident)
|
||||
return ident
|
||||
@ -188,7 +185,6 @@ class MetadataHeader(BookHeader):
|
||||
|
||||
def header(self):
|
||||
section_headers = []
|
||||
|
||||
# First section with the metadata
|
||||
section_headers.append(self.section_offset(0))
|
||||
# Second section used to get the lengh of the first
|
||||
@ -196,20 +192,16 @@ class MetadataHeader(BookHeader):
|
||||
|
||||
end_off = section_headers[1]
|
||||
off = section_headers[0]
|
||||
|
||||
self.stream.seek(off)
|
||||
return self.stream.read(end_off - off)
|
||||
|
||||
def section_data(self, number):
|
||||
start = self.section_offset(number)
|
||||
|
||||
if number == self.num_sections -1:
|
||||
end = os.stat(self.stream.name).st_size
|
||||
else:
|
||||
end = self.section_offset(number + 1)
|
||||
|
||||
self.stream.seek(start)
|
||||
|
||||
return self.stream.read(end - start)
|
||||
|
||||
|
||||
@ -666,7 +658,6 @@ def get_metadata(stream):
|
||||
mr.extract_content(tdir, parse_cache)
|
||||
if mr.embedded_mi is not None:
|
||||
mi = mr.embedded_mi
|
||||
|
||||
if hasattr(mh.exth, 'cover_offset'):
|
||||
cover_index = mh.first_image_index + mh.exth.cover_offset
|
||||
data = mh.section_data(int(cover_index))
|
||||
@ -679,5 +670,4 @@ def get_metadata(stream):
|
||||
mi.cover_data = ('jpg', obuf.getvalue())
|
||||
except:
|
||||
log.exception()
|
||||
|
||||
return mi
|
||||
|
@ -218,7 +218,7 @@ class Serializer(object):
|
||||
for elem in item.data.find(XHTML('body')):
|
||||
self.serialize_elem(elem, item)
|
||||
#buffer.write('</mbp:section>')
|
||||
buffer.write('</mbp:pagebreak>')
|
||||
buffer.write('<mbp:pagebreak/>')
|
||||
|
||||
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
||||
buffer = self.buffer
|
||||
|
@ -272,11 +272,26 @@ def XPath(expr):
|
||||
def xpath(elem, expr):
|
||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||
|
||||
def xml2str(root, pretty_print=False):
|
||||
return etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
||||
def _prepare_xml_for_serialization(root):
|
||||
root.set('xmlns', XHTML_NS)
|
||||
root.set('{%s}xlink'%XHTML_NS, XLINK_NS)
|
||||
for x in root.iter():
|
||||
if hasattr(x.tag, 'rpartition') and x.tag.rpartition('}')[-1].lower() == 'svg':
|
||||
x.set('xmlns', SVG_NS)
|
||||
|
||||
def xml2str(root, pretty_print=False, strip_comments=False):
|
||||
_prepare_xml_for_serialization(root)
|
||||
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
||||
pretty_print=pretty_print)
|
||||
|
||||
if strip_comments:
|
||||
ans = re.compile(r'<!--.*?-->', re.DOTALL).sub('', ans)
|
||||
|
||||
return ans
|
||||
|
||||
|
||||
def xml2unicode(root, pretty_print=False):
|
||||
_prepare_xml_for_serialization(root)
|
||||
return etree.tostring(root, pretty_print=pretty_print)
|
||||
|
||||
ASCII_CHARS = set(chr(x) for x in xrange(128))
|
||||
@ -826,6 +841,11 @@ class Manifest(object):
|
||||
return xml2str(data, pretty_print=self.oeb.pretty_print)
|
||||
if isinstance(data, unicode):
|
||||
return data.encode('utf-8')
|
||||
if hasattr(data, 'cssText'):
|
||||
data = data.cssText
|
||||
if isinstance(data, unicode):
|
||||
data = data.encode('utf-8')
|
||||
return data
|
||||
return str(data)
|
||||
|
||||
def __unicode__(self):
|
||||
@ -834,6 +854,8 @@ class Manifest(object):
|
||||
return xml2unicode(data, pretty_print=self.oeb.pretty_print)
|
||||
if isinstance(data, unicode):
|
||||
return data
|
||||
if hasattr(data, 'cssText'):
|
||||
return data.cssText
|
||||
return unicode(data)
|
||||
|
||||
def __eq__(self, other):
|
||||
@ -1044,6 +1066,12 @@ class Spine(object):
|
||||
self.items[i].spine_position = i
|
||||
item.spine_position = None
|
||||
|
||||
def index(self, item):
|
||||
for i, x in enumerate(self):
|
||||
if item == x:
|
||||
return i
|
||||
return -1
|
||||
|
||||
def __iter__(self):
|
||||
for item in self.items:
|
||||
yield item
|
||||
|
@ -163,7 +163,6 @@ class EbookIterator(object):
|
||||
s.pages = p
|
||||
start = 1
|
||||
|
||||
|
||||
for s in self.spine:
|
||||
s.start_page = start
|
||||
start += s.pages
|
||||
|
@ -22,7 +22,6 @@ class OEBOutput(OutputFormatPlugin):
|
||||
if not os.path.exists(output_path):
|
||||
os.makedirs(output_path)
|
||||
from calibre.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME
|
||||
from calibre.ebooks.html import tostring as html_tostring
|
||||
with CurrentDir(output_path):
|
||||
results = oeb_book.to_opf2(page_map=True)
|
||||
for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
|
||||
@ -38,16 +37,7 @@ class OEBOutput(OutputFormatPlugin):
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
raw = item.data
|
||||
if not isinstance(raw, basestring):
|
||||
if hasattr(raw, 'cssText'):
|
||||
raw = raw.cssText
|
||||
else:
|
||||
raw = html_tostring(raw,
|
||||
pretty_print=opts.pretty_print)
|
||||
if isinstance(raw, unicode):
|
||||
raw = raw.encode('utf-8')
|
||||
with open(path, 'wb') as f:
|
||||
f.write(raw)
|
||||
f.write(str(item))
|
||||
|
||||
|
||||
|
21
src/calibre/ebooks/oeb/transforms/linearize_tables.py
Normal file
21
src/calibre/ebooks/oeb/transforms/linearize_tables.py
Normal file
@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS, XPNSMAP
|
||||
|
||||
class LinearizeTables(object):
|
||||
|
||||
def linearize(self, root):
|
||||
for x in root.xpath('//h:table|//h:td|//h:tr|//h:th',
|
||||
namespaces=XPNSMAP):
|
||||
x.tag = 'div'
|
||||
|
||||
def __call__(self, oeb, context):
|
||||
for x in oeb.manifest.items:
|
||||
if x.media_type in OEB_DOCS:
|
||||
self.linearize(x.data)
|
@ -4,21 +4,25 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Split the flows in an epub file to conform to size limitations.
|
||||
Splitting of the XHTML flows. Splitting can happen on page boundaries or can be
|
||||
forces at "likely" locations to conform to size limitations. This transform
|
||||
assumes a prior call to the flatcss transform.
|
||||
'''
|
||||
|
||||
import os, math, functools, collections, re, copy, sys
|
||||
import os, math, functools, collections, re, copy
|
||||
|
||||
from lxml.etree import XPath as _XPath
|
||||
from lxml import etree, html
|
||||
from lxml.cssselect import CSSSelector
|
||||
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \
|
||||
rewrite_links
|
||||
from calibre.ebooks.epub import tostring, rules
|
||||
from calibre import CurrentDir
|
||||
|
||||
XPath = functools.partial(_XPath, namespaces={'re':'http://exslt.org/regular-expressions'})
|
||||
content = functools.partial(os.path.join, 'content')
|
||||
NAMESPACES = dict(XPNSMAP)
|
||||
NAMESPACES['re'] = 'http://exslt.org/regular-expressions'
|
||||
|
||||
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
|
||||
|
||||
SPLIT_ATTR = 'cs'
|
||||
SPLIT_POINT_ATTR = 'csp'
|
||||
@ -27,149 +31,166 @@ class SplitError(ValueError):
|
||||
|
||||
def __init__(self, path, root):
|
||||
size = len(tostring(root))/1024.
|
||||
ValueError.__init__(self, _('Could not find reasonable point at which to split: %s Sub-tree size: %d KB')%
|
||||
(os.path.basename(path), size))
|
||||
ValueError.__init__(self,
|
||||
_('Could not find reasonable point at which to split: '
|
||||
'%s Sub-tree size: %d KB')%
|
||||
(path, size))
|
||||
|
||||
class Split(object):
|
||||
|
||||
def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None,
|
||||
max_flow_size=0):
|
||||
self.split_on_page_breaks = split_on_page_breaks
|
||||
self.page_breaks_xpath = page_breaks_xpath
|
||||
self.max_flow_size = max_flow_size
|
||||
if self.page_breaks_xpath is not None:
|
||||
self.page_breaks_xpath = XPath(self.page_breaks_xpath)
|
||||
|
||||
def __call__(self, oeb, context):
|
||||
self.oeb = oeb
|
||||
self.log = oeb.log
|
||||
self.map = {}
|
||||
self.page_break_selectors = None
|
||||
for item in self.oeb.manifest.items:
|
||||
if etree.iselement(item.data):
|
||||
self.split_item(item)
|
||||
|
||||
self.fix_links()
|
||||
|
||||
def split_item(self, item):
|
||||
if self.split_on_page_breaks:
|
||||
if self.page_breaks_xpath is None:
|
||||
page_breaks, page_break_ids = self.find_page_breaks(item)
|
||||
else:
|
||||
page_breaks, page_break_ids = self.page_breaks_xpath(item.data)
|
||||
|
||||
splitter = FlowSplitter(item, page_breaks, page_break_ids,
|
||||
self.max_flow_size, self.oeb)
|
||||
if splitter.was_split:
|
||||
self.map[item.href] = dict(splitter.anchor_map)
|
||||
|
||||
def find_page_breaks(self, item):
|
||||
if self.page_break_selectors is None:
|
||||
self.page_break_selectors = set([])
|
||||
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
|
||||
OEB_STYLES]
|
||||
page_break_selectors = set([])
|
||||
for rule in rules(stylesheets):
|
||||
before = getattr(rule.style.getPropertyCSSValue(
|
||||
'page-break-before'), 'cssText', '').strip().lower()
|
||||
after = getattr(rule.style.getPropertyCSSValue(
|
||||
'page-break-after'), 'cssText', '').strip().lower()
|
||||
try:
|
||||
if before and before != 'avoid':
|
||||
page_break_selectors.add((CSSSelector(rule.selectorText),
|
||||
True))
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
if after and after != 'avoid':
|
||||
page_break_selectors.add((CSSSelector(rule.selectorText),
|
||||
False))
|
||||
except:
|
||||
pass
|
||||
|
||||
page_breaks = set([])
|
||||
for selector, before in page_break_selectors:
|
||||
for elem in selector(item.data):
|
||||
elem.pb_before = before
|
||||
page_breaks.add(elem)
|
||||
|
||||
for i, elem in enumerate(item.data.iter()):
|
||||
elem.pb_order = i
|
||||
|
||||
page_breaks = list(page_breaks)
|
||||
page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
|
||||
page_break_ids, page_breaks_ = [], []
|
||||
for i, x in enumerate(page_breaks):
|
||||
x.set('id', x.get('id', 'calibre_pb_%d'%i))
|
||||
id = x.get('id')
|
||||
page_breaks_.append((XPath('//*[@id="%s"]'%id), x.pb_before))
|
||||
page_break_ids.append(id)
|
||||
|
||||
return page_breaks_, page_break_ids
|
||||
|
||||
def fix_links(self, opf):
|
||||
'''
|
||||
Fix references to the split files in other content files.
|
||||
'''
|
||||
for item in self.oeb.manifest:
|
||||
if etree.iselement(item.data):
|
||||
self.current_item = item
|
||||
rewrite_links(item.data, self.rewrite_links)
|
||||
|
||||
def rewrite_links(self, url):
|
||||
href, frag = urldefrag(url)
|
||||
href = self.current_item.abshref(href)
|
||||
if href in self.map:
|
||||
anchor_map = self.map[href]
|
||||
nhref = anchor_map[frag if frag else None]
|
||||
if frag:
|
||||
nhref = '#'.joinn(href, frag)
|
||||
return nhref
|
||||
return url
|
||||
|
||||
|
||||
|
||||
class Splitter(object):
|
||||
class FlowSplitter(object):
|
||||
|
||||
def __init__(self, path, opts, stylesheet_map, opf):
|
||||
self.setup_cli_handler(opts.verbose)
|
||||
self.path = path
|
||||
self.always_remove = not opts.preserve_tag_structure or \
|
||||
os.stat(content(path)).st_size > 5*opts.profile.flow_size
|
||||
self.base = (os.path.splitext(path)[0].replace('%', '%%') + '_split_%d.html')
|
||||
self.opts = opts
|
||||
self.orig_size = os.stat(content(path)).st_size
|
||||
self.log_info('\tSplitting %s (%d KB)', path, self.orig_size/1024.)
|
||||
root = html.fromstring(open(content(path)).read())
|
||||
def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb):
|
||||
self.item = item
|
||||
self.oeb = oeb
|
||||
self.log = oeb.log
|
||||
self.page_breaks = page_breaks
|
||||
self.page_break_ids = page_break_ids
|
||||
self.max_flow_size = max_flow_size
|
||||
self.base = item.abshref(item.href)
|
||||
|
||||
self.page_breaks, self.trees = [], []
|
||||
self.split_size = 0
|
||||
base, ext = os.path.splitext(self.base)
|
||||
self.base = base.replace('%', '%%')+'_split_%d'+ext
|
||||
|
||||
# Split on page breaks
|
||||
self.trees = [self.item.data]
|
||||
self.splitting_on_page_breaks = True
|
||||
if not opts.dont_split_on_page_breaks:
|
||||
self.log_info('\tSplitting on page breaks...')
|
||||
if self.path in stylesheet_map:
|
||||
self.find_page_breaks(stylesheet_map[self.path], root)
|
||||
self.split_on_page_breaks(root.getroottree())
|
||||
trees = list(self.trees)
|
||||
else:
|
||||
self.trees = [root.getroottree()]
|
||||
trees = list(self.trees)
|
||||
|
||||
# Split any remaining over-sized trees
|
||||
if self.page_breaks:
|
||||
self.split_on_page_breaks(self.item.data)
|
||||
self.splitting_on_page_breaks = False
|
||||
if self.opts.profile.flow_size < sys.maxint:
|
||||
|
||||
if self.max_flow_size > 0:
|
||||
lt_found = False
|
||||
self.log_info('\tLooking for large trees...')
|
||||
for i, tree in enumerate(list(trees)):
|
||||
self.log('\tLooking for large trees...')
|
||||
trees = list(self.trees)
|
||||
for i, tree in enumerate(list(self.trees)):
|
||||
self.trees = []
|
||||
size = len(tostring(tree.getroot()))
|
||||
if size > self.opts.profile.flow_size:
|
||||
lt_found = True
|
||||
try:
|
||||
self.split_to_size(tree)
|
||||
except (SplitError, RuntimeError): # Splitting fails
|
||||
if not self.always_remove:
|
||||
self.always_remove = True
|
||||
self.split_to_size(tree)
|
||||
else:
|
||||
raise
|
||||
self.split_to_size(tree)
|
||||
trees[i:i+1] = list(self.trees)
|
||||
if not lt_found:
|
||||
self.log_info('\tNo large trees found')
|
||||
self.trees = trees
|
||||
|
||||
self.trees = trees
|
||||
self.was_split = len(self.trees) > 1
|
||||
if self.was_split:
|
||||
self.commit()
|
||||
self.log_info('\t\tSplit into %d parts.', len(self.trees))
|
||||
if self.opts.verbose:
|
||||
for f in self.files:
|
||||
self.log_info('\t\t\t%s - %d KB', f, os.stat(content(f)).st_size/1024.)
|
||||
self.fix_opf(opf)
|
||||
self.commit()
|
||||
|
||||
self.trees = None
|
||||
def split_on_page_breaks(self, orig_tree):
|
||||
ordered_ids = []
|
||||
for elem in orig_tree.xpath('//*[@id]'):
|
||||
id = elem.get('id')
|
||||
if id in self.page_break_ids:
|
||||
ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
|
||||
|
||||
|
||||
def split_text(self, text, root, size):
|
||||
self.log_debug('\t\t\tSplitting text of length: %d'%len(text))
|
||||
rest = text.replace('\r', '')
|
||||
parts = re.split('\n\n', rest)
|
||||
self.log_debug('\t\t\t\tFound %d parts'%len(parts))
|
||||
if max(map(len, parts)) > size:
|
||||
raise SplitError('Cannot split as file contains a <pre> tag with a very large paragraph', root)
|
||||
ans = []
|
||||
buf = ''
|
||||
for part in parts:
|
||||
if len(buf) + len(part) < size:
|
||||
buf += '\n\n'+part
|
||||
else:
|
||||
ans.append(buf)
|
||||
buf = part
|
||||
return ans
|
||||
|
||||
|
||||
def split_to_size(self, tree):
|
||||
self.log_debug('\t\tSplitting...')
|
||||
root = tree.getroot()
|
||||
# Split large <pre> tags
|
||||
for pre in list(root.xpath('//pre')):
|
||||
text = u''.join(pre.xpath('descendant::text()'))
|
||||
pre.text = text
|
||||
for child in list(pre.iterchildren()):
|
||||
pre.remove(child)
|
||||
if len(pre.text) > self.opts.profile.flow_size*0.5:
|
||||
frags = self.split_text(pre.text, root, int(0.2*self.opts.profile.flow_size))
|
||||
new_pres = []
|
||||
for frag in frags:
|
||||
pre2 = copy.copy(pre)
|
||||
pre2.text = frag
|
||||
pre2.tail = u''
|
||||
new_pres.append(pre2)
|
||||
new_pres[-1].tail = pre.tail
|
||||
p = pre.getparent()
|
||||
i = p.index(pre)
|
||||
p[i:i+1] = new_pres
|
||||
|
||||
split_point, before = self.find_split_point(root)
|
||||
if split_point is None or self.split_size > 6*self.orig_size:
|
||||
if not self.always_remove:
|
||||
self.log_warn(_('\t\tToo much markup. Re-splitting without '
|
||||
'structure preservation. This may cause '
|
||||
'incorrect rendering.'))
|
||||
raise SplitError(self.path, root)
|
||||
|
||||
for t in self.do_split(tree, split_point, before):
|
||||
r = t.getroot()
|
||||
if self.is_page_empty(r):
|
||||
continue
|
||||
size = len(tostring(r))
|
||||
if size <= self.opts.profile.flow_size:
|
||||
self.trees.append(t)
|
||||
#print tostring(t.getroot(), pretty_print=True)
|
||||
self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)',
|
||||
len(self.trees), size/1024.)
|
||||
self.split_size += size
|
||||
else:
|
||||
self.split_to_size(t)
|
||||
|
||||
def is_page_empty(self, root):
|
||||
body = root.find('body')
|
||||
if body is None:
|
||||
return False
|
||||
txt = re.sub(r'\s+', '', html.tostring(body, method='text', encoding=unicode))
|
||||
if len(txt) > 4:
|
||||
#if len(txt) < 100:
|
||||
# print 1111111, html.tostring(body, method='html', encoding=unicode)
|
||||
return False
|
||||
for img in root.xpath('//img'):
|
||||
if img.get('style', '') != 'display:none':
|
||||
return False
|
||||
return True
|
||||
self.trees = []
|
||||
tree = orig_tree
|
||||
for pattern, before in ordered_ids:
|
||||
self.log.debug('\t\tSplitting on page-break')
|
||||
elem = pattern(tree)
|
||||
if elem:
|
||||
before, after = self.do_split(tree, elem[0], before)
|
||||
self.trees.append(before)
|
||||
tree = after
|
||||
self.trees.append(tree)
|
||||
self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
|
||||
|
||||
def do_split(self, tree, split_point, before):
|
||||
'''
|
||||
@ -190,7 +211,7 @@ class Splitter(object):
|
||||
split_point2 = root2.xpath(path)[0]
|
||||
|
||||
def nix_element(elem, top=True):
|
||||
if self.always_remove:
|
||||
if True:
|
||||
parent = elem.getparent()
|
||||
index = parent.index(elem)
|
||||
if top:
|
||||
@ -198,7 +219,6 @@ class Splitter(object):
|
||||
else:
|
||||
index = parent.index(elem)
|
||||
parent[index:index+1] = list(elem.iterchildren())
|
||||
|
||||
else:
|
||||
elem.text = u''
|
||||
elem.tail = u''
|
||||
@ -241,67 +261,76 @@ class Splitter(object):
|
||||
|
||||
return tree, tree2
|
||||
|
||||
def is_page_empty(self, root):
|
||||
body = root.find('body')
|
||||
if body is None:
|
||||
return False
|
||||
txt = re.sub(r'\s+', '', html.tostring(body, method='text', encoding=unicode))
|
||||
if len(txt) > 4:
|
||||
return False
|
||||
for img in root.xpath('//img'):
|
||||
if img.get('style', '') != 'display:none':
|
||||
return False
|
||||
return True
|
||||
|
||||
def split_on_page_breaks(self, orig_tree):
|
||||
ordered_ids = []
|
||||
for elem in orig_tree.xpath('//*[@id]'):
|
||||
id = elem.get('id')
|
||||
if id in self.page_break_ids:
|
||||
ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
|
||||
|
||||
self.trees = []
|
||||
tree = orig_tree
|
||||
for pattern, before in ordered_ids:
|
||||
self.log_info('\t\tSplitting on page-break')
|
||||
elem = pattern(tree)
|
||||
if elem:
|
||||
before, after = self.do_split(tree, elem[0], before)
|
||||
self.trees.append(before)
|
||||
tree = after
|
||||
self.trees.append(tree)
|
||||
self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
|
||||
def split_text(self, text, root, size):
|
||||
self.log.debug('\t\t\tSplitting text of length: %d'%len(text))
|
||||
rest = text.replace('\r', '')
|
||||
parts = re.split('\n\n', rest)
|
||||
self.log.debug('\t\t\t\tFound %d parts'%len(parts))
|
||||
if max(map(len, parts)) > size:
|
||||
raise SplitError('Cannot split as file contains a <pre> tag '
|
||||
'with a very large paragraph', root)
|
||||
ans = []
|
||||
buf = ''
|
||||
for part in parts:
|
||||
if len(buf) + len(part) < size:
|
||||
buf += '\n\n'+part
|
||||
else:
|
||||
ans.append(buf)
|
||||
buf = part
|
||||
return ans
|
||||
|
||||
|
||||
def split_to_size(self, tree):
|
||||
self.log.debug('\t\tSplitting...')
|
||||
root = tree.getroot()
|
||||
# Split large <pre> tags
|
||||
for pre in list(root.xpath('//pre')):
|
||||
text = u''.join(pre.xpath('descendant::text()'))
|
||||
pre.text = text
|
||||
for child in list(pre.iterchildren()):
|
||||
pre.remove(child)
|
||||
if len(pre.text) > self.max_flow_size*0.5:
|
||||
frags = self.split_text(pre.text, root, int(0.2*self.max_flow_size))
|
||||
new_pres = []
|
||||
for frag in frags:
|
||||
pre2 = copy.copy(pre)
|
||||
pre2.text = frag
|
||||
pre2.tail = u''
|
||||
new_pres.append(pre2)
|
||||
new_pres[-1].tail = pre.tail
|
||||
p = pre.getparent()
|
||||
i = p.index(pre)
|
||||
p[i:i+1] = new_pres
|
||||
|
||||
def find_page_breaks(self, stylesheets, root):
|
||||
'''
|
||||
Find all elements that have either page-break-before or page-break-after set.
|
||||
Populates `self.page_breaks` with id based XPath selectors (for elements that don't
|
||||
have ids, an id is created).
|
||||
'''
|
||||
page_break_selectors = set([])
|
||||
for rule in rules(stylesheets):
|
||||
before = getattr(rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower()
|
||||
after = getattr(rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower()
|
||||
try:
|
||||
if before and before != 'avoid':
|
||||
page_break_selectors.add((CSSSelector(rule.selectorText), True))
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
if after and after != 'avoid':
|
||||
page_break_selectors.add((CSSSelector(rule.selectorText), False))
|
||||
except:
|
||||
pass
|
||||
|
||||
page_breaks = set([])
|
||||
for selector, before in page_break_selectors:
|
||||
for elem in selector(root):
|
||||
elem.pb_before = before
|
||||
page_breaks.add(elem)
|
||||
|
||||
for i, elem in enumerate(root.iter()):
|
||||
elem.pb_order = i
|
||||
|
||||
page_breaks = list(page_breaks)
|
||||
page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
|
||||
self.page_break_ids = []
|
||||
for i, x in enumerate(page_breaks):
|
||||
x.set('id', x.get('id', 'calibre_pb_%d'%i))
|
||||
id = x.get('id')
|
||||
self.page_breaks.append((XPath('//*[@id="%s"]'%id), x.pb_before))
|
||||
self.page_break_ids.append(id)
|
||||
split_point, before = self.find_split_point(root)
|
||||
if split_point is None:
|
||||
raise SplitError(self.item.href, root)
|
||||
|
||||
for t in self.do_split(tree, split_point, before):
|
||||
r = t.getroot()
|
||||
if self.is_page_empty(r):
|
||||
continue
|
||||
size = len(tostring(r))
|
||||
if size <= self.max_flow_size:
|
||||
self.trees.append(t)
|
||||
#print tostring(t.getroot(), pretty_print=True)
|
||||
self.log.debug('\t\t\tCommitted sub-tree #%d (%d KB)',
|
||||
len(self.trees), size/1024.)
|
||||
self.split_size += size
|
||||
else:
|
||||
self.split_to_size(t)
|
||||
|
||||
def find_split_point(self, root):
|
||||
'''
|
||||
@ -336,8 +365,7 @@ class Splitter(object):
|
||||
'//br',
|
||||
'//li',
|
||||
):
|
||||
elems = root.xpath(path,
|
||||
namespaces={'re':'http://exslt.org/regular-expressions'})
|
||||
elems = root.xpath(path, namespaces=NAMESPACES)
|
||||
elem = pick_elem(elems)
|
||||
if elem is not None:
|
||||
try:
|
||||
@ -355,6 +383,8 @@ class Splitter(object):
|
||||
all anchors in the original tree. Internal links are re-directed. The
|
||||
original file is deleted and the split files are saved.
|
||||
'''
|
||||
if not self.was_split:
|
||||
return
|
||||
self.anchor_map = collections.defaultdict(lambda :self.base%0)
|
||||
self.files = []
|
||||
|
||||
@ -368,134 +398,46 @@ class Splitter(object):
|
||||
elem.attrib.pop(SPLIT_ATTR, None)
|
||||
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
|
||||
|
||||
for current, tree in zip(self.files, self.trees):
|
||||
for a in tree.getroot().xpath('//a[@href]'):
|
||||
spine_pos = self.item.spine_pos
|
||||
for current, tree in zip(map(reversed, (self.files, self.trees))):
|
||||
for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
|
||||
href = a.get('href').strip()
|
||||
if href.startswith('#'):
|
||||
anchor = href[1:]
|
||||
file = self.anchor_map[anchor]
|
||||
if file != current:
|
||||
a.set('href', file+href)
|
||||
open(content(current), 'wb').\
|
||||
write(tostring(tree.getroot(), pretty_print=self.opts.pretty_print))
|
||||
|
||||
os.remove(content(self.path))
|
||||
new_id = self.oeb.manifest.generate(id=self.item.id)[0]
|
||||
new_item = self.oeb.manifest.add(new_id, current,
|
||||
self.item.media_type, data=tree.getroot())
|
||||
self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
|
||||
|
||||
if self.oeb.guide:
|
||||
for ref in self.oeb.guide:
|
||||
href, frag = urldefrag(ref.href)
|
||||
if href == self.item.href:
|
||||
nhref = self.anchor_map[frag if frag else None]
|
||||
if frag:
|
||||
nhref = '#'.join(nhref, frag)
|
||||
ref.href = nhref
|
||||
|
||||
def fix_toc_entry(toc):
|
||||
if toc.href:
|
||||
href, frag = urldefrag(toc.href)
|
||||
if href == self.item.href:
|
||||
nhref = self.anchor_map[frag if frag else None]
|
||||
if frag:
|
||||
nhref = '#'.join(nhref, frag)
|
||||
toc.href = nhref
|
||||
for x in toc:
|
||||
fix_toc_entry(x)
|
||||
|
||||
|
||||
def fix_opf(self, opf):
|
||||
'''
|
||||
Fix references to the split file in the OPF.
|
||||
'''
|
||||
items = [item for item in opf.itermanifest() if item.get('href') == 'content/'+self.path]
|
||||
new_items = [('content/'+f, None) for f in self.files]
|
||||
id_map = {}
|
||||
for item in items:
|
||||
id_map[item.get('id')] = opf.replace_manifest_item(item, new_items)
|
||||
if self.oeb.toc:
|
||||
fix_toc_entry(self.oeb.toc)
|
||||
|
||||
for id in id_map.keys():
|
||||
opf.replace_spine_items_by_idref(id, id_map[id])
|
||||
|
||||
for ref in opf.iterguide():
|
||||
href = ref.get('href', '')
|
||||
if href.startswith('content/'+self.path):
|
||||
href = href.split('#')
|
||||
frag = None
|
||||
if len(href) > 1:
|
||||
frag = href[1]
|
||||
if frag not in self.anchor_map:
|
||||
self.log_warning('\t\tUnable to re-map OPF link', href)
|
||||
continue
|
||||
new_file = self.anchor_map[frag]
|
||||
ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
|
||||
self.oeb.manifest.remove(self.item)
|
||||
|
||||
|
||||
|
||||
def fix_content_links(html_files, changes, opts):
|
||||
split_files = [f.path for f in changes]
|
||||
anchor_maps = [f.anchor_map for f in changes]
|
||||
files = list(html_files)
|
||||
for j, f in enumerate(split_files):
|
||||
try:
|
||||
i = files.index(f)
|
||||
files[i:i+1] = changes[j].files
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
for htmlfile in files:
|
||||
changed = False
|
||||
root = html.fromstring(open(content(htmlfile), 'rb').read())
|
||||
for a in root.xpath('//a[@href]'):
|
||||
href = a.get('href')
|
||||
if not href.startswith('#'):
|
||||
href = href.split('#')
|
||||
anchor = href[1] if len(href) > 1 else None
|
||||
href = href[0]
|
||||
if href in split_files:
|
||||
try:
|
||||
newf = anchor_maps[split_files.index(href)][anchor]
|
||||
except:
|
||||
print '\t\tUnable to remap HTML link:', href, anchor
|
||||
continue
|
||||
frag = ('#'+anchor) if anchor else ''
|
||||
a.set('href', newf+frag)
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
open(content(htmlfile), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
|
||||
|
||||
def fix_ncx(path, changes):
|
||||
split_files = [f.path for f in changes]
|
||||
anchor_maps = [f.anchor_map for f in changes]
|
||||
tree = etree.parse(path)
|
||||
changed = False
|
||||
for content in tree.getroot().xpath('//x:content[@src]',
|
||||
namespaces={'x':"http://www.daisy.org/z3986/2005/ncx/"}):
|
||||
href = content.get('src')
|
||||
if not href.startswith('#'):
|
||||
href = href.split('#')
|
||||
anchor = href[1] if len(href) > 1 else None
|
||||
href = href[0].split('/')[-1]
|
||||
if href in split_files:
|
||||
try:
|
||||
newf = anchor_maps[split_files.index(href)][anchor]
|
||||
except:
|
||||
print 'Unable to remap NCX link:', href, anchor
|
||||
frag = ('#'+anchor) if anchor else ''
|
||||
content.set('src', 'content/'+newf+frag)
|
||||
changed = True
|
||||
if changed:
|
||||
open(path, 'wb').write(etree.tostring(tree.getroot(), encoding='UTF-8', xml_declaration=True))
|
||||
|
||||
def find_html_files(opf):
|
||||
'''
|
||||
Find all HTML files referenced by `opf`.
|
||||
'''
|
||||
html_files = []
|
||||
for item in opf.itermanifest():
|
||||
if 'html' in item.get('media-type', '').lower():
|
||||
f = item.get('href').split('/')[-1]
|
||||
f2 = f.replace('&', '%26')
|
||||
if not os.path.exists(content(f)) and os.path.exists(content(f2)):
|
||||
f = f2
|
||||
item.set('href', item.get('href').replace('&', '%26'))
|
||||
if os.path.exists(content(f)):
|
||||
html_files.append(f)
|
||||
return html_files
|
||||
|
||||
|
||||
def split(pathtoopf, opts, stylesheet_map):
|
||||
pathtoopf = os.path.abspath(pathtoopf)
|
||||
opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
|
||||
|
||||
with CurrentDir(os.path.dirname(pathtoopf)):
|
||||
html_files = find_html_files(opf)
|
||||
changes = [Splitter(f, opts, stylesheet_map, opf) for f in html_files]
|
||||
changes = [c for c in changes if c.was_split]
|
||||
|
||||
fix_content_links(html_files, changes, opts)
|
||||
for item in opf.itermanifest():
|
||||
if item.get('media-type', '') == 'application/x-dtbncx+xml':
|
||||
fix_ncx(item.get('href'), changes)
|
||||
break
|
||||
|
||||
open(pathtoopf, 'wb').write(opf.render())
|
@ -67,6 +67,10 @@ def _config():
|
||||
c.add_opt('default_send_to_device_action', default=None,
|
||||
help=_('Default action to perform when send to device button is '
|
||||
'clicked'))
|
||||
c.add_opt('show_donate_button', default=True,
|
||||
help='Show donation button')
|
||||
c.add_opt('asked_library_thing_password', default=False,
|
||||
help='Asked library thing password at least once.')
|
||||
return ConfigProxy(c)
|
||||
|
||||
config = _config()
|
||||
|
@ -12,6 +12,7 @@ from PyQt4.Qt import QMenu, QAction, QActionGroup, QIcon, SIGNAL, QPixmap, \
|
||||
|
||||
from calibre.customize.ui import available_input_formats, available_output_formats
|
||||
from calibre.devices import devices
|
||||
from calibre.constants import iswindows
|
||||
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
||||
from calibre.parallel import Job
|
||||
from calibre.devices.scanner import DeviceScanner
|
||||
@ -71,7 +72,14 @@ class DeviceManager(Thread):
|
||||
if connected and not device[1]:
|
||||
try:
|
||||
dev = device[0]()
|
||||
dev.open()
|
||||
if iswindows:
|
||||
import pythoncom
|
||||
pythoncom.CoInitialize()
|
||||
try:
|
||||
dev.open()
|
||||
finally:
|
||||
if iswindows:
|
||||
pythoncom.CoUninitialize()
|
||||
self.device = dev
|
||||
self.device_class = dev.__class__
|
||||
self.connected_slot(True)
|
||||
@ -670,7 +678,9 @@ class DeviceGUI(object):
|
||||
bad = '\n'.join('<li>%s</li>'%(i,) for i in bad)
|
||||
d = warning_dialog(self, _('No suitable formats'),
|
||||
_('Could not upload the following books to the device, '
|
||||
'as no suitable formats were found:<br><ul>%s</ul>')%(bad,))
|
||||
'as no suitable formats were found. Try changing the output '
|
||||
'format in the upper right corner next to the red heart and '
|
||||
're-converting. <br><ul>%s</ul>')%(bad,))
|
||||
d.exec_()
|
||||
|
||||
def upload_booklists(self):
|
||||
|
@ -176,19 +176,19 @@ class Config(ResizableDialog, Ui_Dialog):
|
||||
def get_metadata(self):
|
||||
title, authors = self.get_title_and_authors()
|
||||
mi = MetaInformation(title, authors)
|
||||
publisher = unicode(self.publisher.text())
|
||||
publisher = unicode(self.publisher.text()).strip()
|
||||
if publisher:
|
||||
mi.publisher = publisher
|
||||
author_sort = unicode(self.author_sort.text())
|
||||
author_sort = unicode(self.author_sort.text()).strip()
|
||||
if author_sort:
|
||||
mi.author_sort = author_sort
|
||||
comments = unicode(self.comment.toPlainText())
|
||||
comments = unicode(self.comment.toPlainText()).strip()
|
||||
if comments:
|
||||
mi.comments = comments
|
||||
mi.series_index = int(self.series_index.value())
|
||||
if self.series.currentIndex() > -1:
|
||||
mi.series = unicode(self.series.currentText())
|
||||
tags = [t.strip() for t in unicode(self.tags.text()).split(',')]
|
||||
mi.series = unicode(self.series.currentText()).strip()
|
||||
tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')]
|
||||
if tags:
|
||||
mi.tags = tags
|
||||
|
||||
@ -267,6 +267,7 @@ class Config(ResizableDialog, Ui_Dialog):
|
||||
).exec_()
|
||||
return
|
||||
mi = self.get_metadata()
|
||||
self.user_mi = mi
|
||||
self.read_settings()
|
||||
self.cover_file = None
|
||||
if self.row is not None:
|
||||
|
@ -25,24 +25,48 @@ from calibre import islinux
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.utils.config import prefs
|
||||
from calibre.customize.ui import run_plugins_on_import
|
||||
from calibre.gui2 import config as gui_conf
|
||||
|
||||
class CoverFetcher(QThread):
|
||||
|
||||
def __init__(self, username, password, isbn, timeout):
|
||||
self.username = username
|
||||
self.password = password
|
||||
def __init__(self, username, password, isbn, timeout, title, author):
|
||||
self.username = username.strip() if username else username
|
||||
self.password = password.strip() if password else password
|
||||
self.timeout = timeout
|
||||
self.isbn = isbn
|
||||
self.title = title
|
||||
self.needs_isbn = False
|
||||
self.author = author
|
||||
QThread.__init__(self)
|
||||
self.exception = self.traceback = self.cover_data = None
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
login(self.username, self.password, force=False)
|
||||
if not self.isbn:
|
||||
from calibre.ebooks.metadata.fetch import search
|
||||
if not self.title:
|
||||
self.needs_isbn = True
|
||||
return
|
||||
au = self.author if self.author else None
|
||||
key = prefs['isbndb_com_key']
|
||||
if not key:
|
||||
key = None
|
||||
results = search(title=self.title, author=au,
|
||||
isbndb_key=key)[0]
|
||||
results = sorted([x.isbn for x in results if x.isbn],
|
||||
cmp=lambda x,y:cmp(len(x),len(y)), reverse=True)
|
||||
if not results:
|
||||
self.needs_isbn = True
|
||||
return
|
||||
self.isbn = results[0]
|
||||
|
||||
if self.username and self.password:
|
||||
login(self.username, self.password, force=False)
|
||||
self.cover_data = cover_from_isbn(self.isbn, timeout=self.timeout)[0]
|
||||
except Exception, e:
|
||||
self.exception = e
|
||||
self.traceback = traceback.format_exc()
|
||||
print self.traceback
|
||||
|
||||
|
||||
|
||||
@ -64,6 +88,8 @@ class AuthorCompleter(QCompleter):
|
||||
|
||||
class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
|
||||
COVER_FETCH_TIMEOUT = 240 # seconds
|
||||
|
||||
def do_reset_cover(self, *args):
|
||||
pix = QPixmap(':/images/book.svg')
|
||||
self.cover.setPixmap(pix)
|
||||
@ -345,36 +371,39 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
|
||||
def lt_password_dialog(self):
|
||||
return PasswordDialog(self, 'LibraryThing account',
|
||||
_('<p>Enter your username and password for <b>LibraryThing.com</b>. <br/>If you do not have one, you can <a href=\'http://www.librarything.com\'>register</a> for free!.</p>'))
|
||||
_('<p>Enter your username and password for '
|
||||
'<b>LibraryThing.com</b>. This is <b>optional</b>. It will '
|
||||
'make fetching of covers faster and more reliable.<br/>If '
|
||||
'you do not have an account, you can '
|
||||
'<a href=\'http://www.librarything.com\'>register</a> for '
|
||||
'free.</p>'))
|
||||
|
||||
def change_password(self):
|
||||
d = self.lt_password_dialog()
|
||||
d.exec_()
|
||||
|
||||
def fetch_cover(self):
|
||||
isbn = qstring_to_unicode(self.isbn.text())
|
||||
if isbn:
|
||||
d = self.lt_password_dialog()
|
||||
if not d.username() or not d.password():
|
||||
d.exec_()
|
||||
if d.result() != PasswordDialog.Accepted:
|
||||
return
|
||||
self.fetch_cover_button.setEnabled(False)
|
||||
self.setCursor(Qt.WaitCursor)
|
||||
self.cover_fetcher = CoverFetcher(d.username(), d.password(), isbn,
|
||||
self.timeout)
|
||||
self.cover_fetcher.start()
|
||||
self._hangcheck = QTimer(self)
|
||||
self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck)
|
||||
self.cf_start_time = time.time()
|
||||
self.pi.start(_('Downloading cover...'))
|
||||
self._hangcheck.start(100)
|
||||
else:
|
||||
error_dialog(self, _('Cannot fetch cover'),
|
||||
_('You must specify the ISBN identifier for this book.')).exec_()
|
||||
isbn = unicode(self.isbn.text()).strip()
|
||||
d = self.lt_password_dialog()
|
||||
if not gui_conf['asked_library_thing_password'] and \
|
||||
(not d.username() or not d.password()):
|
||||
d.exec_()
|
||||
gui_conf['asked_library_thing_password'] = True
|
||||
self.fetch_cover_button.setEnabled(False)
|
||||
self.setCursor(Qt.WaitCursor)
|
||||
title, author = map(unicode, (self.title.text(), self.authors.text()))
|
||||
self.cover_fetcher = CoverFetcher(d.username(), d.password(), isbn,
|
||||
self.timeout, title, author)
|
||||
self.cover_fetcher.start()
|
||||
self._hangcheck = QTimer(self)
|
||||
self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck)
|
||||
self.cf_start_time = time.time()
|
||||
self.pi.start(_('Downloading cover...'))
|
||||
self._hangcheck.start(100)
|
||||
|
||||
def hangcheck(self):
|
||||
if not (self.cover_fetcher.isFinished() or time.time()-self.cf_start_time > 150):
|
||||
if not self.cover_fetcher.isFinished() and \
|
||||
time.time()-self.cf_start_time < self.COVER_FETCH_TIMEOUT:
|
||||
return
|
||||
|
||||
self._hangcheck.stop()
|
||||
@ -385,6 +414,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
_('<b>Could not fetch cover.</b><br/>')+
|
||||
_('The download timed out.')).exec_()
|
||||
return
|
||||
if self.cover_fetcher.needs_isbn:
|
||||
error_dialog(self, _('Cannot fetch cover'),
|
||||
_('Could not find cover for this book. Try '
|
||||
'specifying the ISBN first.')).exec_()
|
||||
return
|
||||
if self.cover_fetcher.exception is not None:
|
||||
err = self.cover_fetcher.exception
|
||||
error_dialog(self, _('Cannot fetch cover'),
|
||||
|
@ -1,7 +1,8 @@
|
||||
<ui version="4.0" >
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ui version="4.0">
|
||||
<class>Dialog</class>
|
||||
<widget class="QDialog" name="Dialog" >
|
||||
<property name="geometry" >
|
||||
<widget class="QDialog" name="Dialog">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
@ -9,66 +10,70 @@
|
||||
<height>209</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="windowTitle" >
|
||||
<property name="windowTitle">
|
||||
<string>Password needed</string>
|
||||
</property>
|
||||
<property name="windowIcon" >
|
||||
<iconset resource="../images.qrc" >:/images/mimetypes/unknown.svg</iconset>
|
||||
<property name="windowIcon">
|
||||
<iconset resource="../images.qrc">
|
||||
<normaloff>:/images/mimetypes/unknown.svg</normaloff>:/images/mimetypes/unknown.svg</iconset>
|
||||
</property>
|
||||
<layout class="QGridLayout" >
|
||||
<item row="0" column="1" >
|
||||
<widget class="QLabel" name="msg" >
|
||||
<property name="text" >
|
||||
<layout class="QGridLayout">
|
||||
<item row="0" column="1">
|
||||
<widget class="QLabel" name="msg">
|
||||
<property name="text">
|
||||
<string>TextLabel</string>
|
||||
</property>
|
||||
<property name="openExternalLinks" >
|
||||
<property name="wordWrap">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="openExternalLinks">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0" >
|
||||
<widget class="QLabel" name="label" >
|
||||
<property name="text" >
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
<string>&Username:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<property name="buddy">
|
||||
<cstring>gui_username</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1" >
|
||||
<widget class="QLineEdit" name="gui_username" />
|
||||
<item row="1" column="1">
|
||||
<widget class="QLineEdit" name="gui_username"/>
|
||||
</item>
|
||||
<item row="2" column="0" >
|
||||
<widget class="QLabel" name="label_2" >
|
||||
<property name="text" >
|
||||
<item row="2" column="0">
|
||||
<widget class="QLabel" name="label_2">
|
||||
<property name="text">
|
||||
<string>&Password:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<property name="buddy">
|
||||
<cstring>gui_password</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="1" >
|
||||
<widget class="QLineEdit" name="gui_password" >
|
||||
<property name="echoMode" >
|
||||
<item row="2" column="1">
|
||||
<widget class="QLineEdit" name="gui_password">
|
||||
<property name="echoMode">
|
||||
<enum>QLineEdit::Password</enum>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="1" >
|
||||
<widget class="QDialogButtonBox" name="buttonBox" >
|
||||
<property name="orientation" >
|
||||
<item row="4" column="1">
|
||||
<widget class="QDialogButtonBox" name="buttonBox">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
</property>
|
||||
<property name="standardButtons" >
|
||||
<set>QDialogButtonBox::Cancel|QDialogButtonBox::NoButton|QDialogButtonBox::Ok</set>
|
||||
<property name="standardButtons">
|
||||
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="1" >
|
||||
<widget class="QCheckBox" name="show_password" >
|
||||
<property name="text" >
|
||||
<item row="3" column="1">
|
||||
<widget class="QCheckBox" name="show_password">
|
||||
<property name="text">
|
||||
<string>&Show password</string>
|
||||
</property>
|
||||
</widget>
|
||||
@ -76,7 +81,7 @@
|
||||
</layout>
|
||||
</widget>
|
||||
<resources>
|
||||
<include location="../images.qrc" />
|
||||
<include location="../images.qrc"/>
|
||||
</resources>
|
||||
<connections>
|
||||
<connection>
|
||||
@ -85,11 +90,11 @@
|
||||
<receiver>Dialog</receiver>
|
||||
<slot>accept()</slot>
|
||||
<hints>
|
||||
<hint type="sourcelabel" >
|
||||
<hint type="sourcelabel">
|
||||
<x>248</x>
|
||||
<y>254</y>
|
||||
</hint>
|
||||
<hint type="destinationlabel" >
|
||||
<hint type="destinationlabel">
|
||||
<x>157</x>
|
||||
<y>274</y>
|
||||
</hint>
|
||||
@ -101,11 +106,11 @@
|
||||
<receiver>Dialog</receiver>
|
||||
<slot>reject()</slot>
|
||||
<hints>
|
||||
<hint type="sourcelabel" >
|
||||
<hint type="sourcelabel">
|
||||
<x>316</x>
|
||||
<y>260</y>
|
||||
</hint>
|
||||
<hint type="destinationlabel" >
|
||||
<hint type="destinationlabel">
|
||||
<x>286</x>
|
||||
<y>274</y>
|
||||
</hint>
|
||||
|
BIN
src/calibre/gui2/images/news/hna.png
Normal file
BIN
src/calibre/gui2/images/news/hna.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 827 B |
BIN
src/calibre/gui2/images/news/nzz_ger.png
Normal file
BIN
src/calibre/gui2/images/news/nzz_ger.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 811 B |
@ -108,6 +108,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
||||
self.donate_action = self.system_tray_menu.addAction(
|
||||
QIcon(':/images/donate.svg'), _('&Donate to support calibre'))
|
||||
self.donate_button.setDefaultAction(self.donate_action)
|
||||
if not config['show_donate_button']:
|
||||
self.donate_button.setVisible(False)
|
||||
self.addAction(self.quit_action)
|
||||
self.action_restart = QAction(_('&Restart'), self)
|
||||
self.addAction(self.action_restart)
|
||||
|
@ -225,10 +225,11 @@ def convert_bulk(fmt, parent, db, comics, others):
|
||||
if others:
|
||||
d = get_dialog(fmt)(parent, db)
|
||||
if d.exec_() != QDialog.Accepted:
|
||||
others = []
|
||||
others, user_mi = [], None
|
||||
else:
|
||||
opts = d.opts
|
||||
opts.verbose = 2
|
||||
user_mi = d.user_mi
|
||||
if comics:
|
||||
comic_opts = ComicConf.get_bulk_conversion_options(parent)
|
||||
if not comic_opts:
|
||||
@ -256,6 +257,11 @@ def convert_bulk(fmt, parent, db, comics, others):
|
||||
continue
|
||||
options = opts.copy()
|
||||
mi = db.get_metadata(row)
|
||||
if user_mi is not None:
|
||||
if user_mi.series_index == 1:
|
||||
user_mi.series_index = None
|
||||
mi.smart_update(user_mi)
|
||||
db.set_metadata(db.id(row), mi)
|
||||
opf = OPFCreator(os.getcwdu(), mi)
|
||||
opf_file = PersistentTemporaryFile('.opf')
|
||||
opf.render(opf_file)
|
||||
|
@ -243,9 +243,22 @@ class LibraryServer(object):
|
||||
raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field)
|
||||
cmpf = cmp if field in ('rating', 'size', 'timestamp') else \
|
||||
lambda x, y: cmp(x.lower() if x else '', y.lower() if y else '')
|
||||
field = FIELD_MAP[field]
|
||||
getter = operator.itemgetter(field)
|
||||
items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order)
|
||||
if field == 'series':
|
||||
items.sort(cmp=self.seriescmp, reverse=not order)
|
||||
else:
|
||||
field = FIELD_MAP[field]
|
||||
getter = operator.itemgetter(field)
|
||||
items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order)
|
||||
|
||||
def seriescmp(self, x, y):
|
||||
si = FIELD_MAP['series']
|
||||
try:
|
||||
ans = cmp(x[si].lower(), y[si].lower())
|
||||
except AttributeError: # Some entries may be None
|
||||
ans = cmp(x[si], y[si])
|
||||
if ans != 0: return ans
|
||||
return cmp(x[FIELD_MAP['series_index']], y[FIELD_MAP['series_index']])
|
||||
|
||||
|
||||
def last_modified(self, updated):
|
||||
lm = updated.strftime('day, %d month %Y %H:%M:%S GMT')
|
||||
|
@ -69,6 +69,7 @@ else:
|
||||
|
||||
DOWNLOAD_DIR = '/var/www/calibre.kovidgoyal.net/htdocs/downloads'
|
||||
MOBILEREAD = 'https://dev.mobileread.com/dist/kovid/calibre/'
|
||||
#MOBILEREAD = 'http://calibre.kovidgoyal.net/downloads/'
|
||||
|
||||
class OS(dict):
|
||||
"""Dictionary with a default value for unknown keys."""
|
||||
@ -197,6 +198,8 @@ else:
|
||||
import sys, os, shutil, tarfile, subprocess, tempfile, urllib2, re, stat
|
||||
|
||||
MOBILEREAD='https://dev.mobileread.com/dist/kovid/calibre/'
|
||||
#MOBILEREAD='http://calibre.kovidgoyal.net/downloads/'
|
||||
|
||||
|
||||
class TerminalController:
|
||||
BOL = '' #: Move the cursor to the beginning of the line
|
||||
|
BIN
src/calibre/trac/plugins/htdocs/images/binary_logo.png
Normal file
BIN
src/calibre/trac/plugins/htdocs/images/binary_logo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 50 KiB |
BIN
src/calibre/trac/plugins/htdocs/images/foresight_logo.png
Normal file
BIN
src/calibre/trac/plugins/htdocs/images/foresight_logo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 36 KiB |
@ -128,6 +128,12 @@ class BasicNewsRecipe(object):
|
||||
#:
|
||||
extra_css = None
|
||||
|
||||
#: If True empty feeds are removed from the output.
|
||||
#: This option has no effect if parse_index is overriden in
|
||||
#: the sub class. It is meant only for recipes that return a list
|
||||
#: of feeds using :member:`feeds` or :method:`get_feeds`.
|
||||
remove_empty_feeds = False
|
||||
|
||||
#: List of regular expressions that determines which links to follow
|
||||
#: If empty, it is ignored. For example::
|
||||
#:
|
||||
@ -985,6 +991,11 @@ class BasicNewsRecipe(object):
|
||||
self.log.exception(msg)
|
||||
|
||||
|
||||
remove = [f for f in parsed_feeds if len(f) == 0 and
|
||||
self.remove_empty_feeds]
|
||||
for f in remove:
|
||||
parsed_feeds.remove(f)
|
||||
|
||||
return parsed_feeds
|
||||
|
||||
@classmethod
|
||||
|
@ -39,6 +39,7 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs',
|
||||
'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
|
||||
'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
|
||||
'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna',
|
||||
)]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
42
src/calibre/web/feeds/recipes/recipe_der_standard.py
Normal file
42
src/calibre/web/feeds/recipes/recipe_der_standard.py
Normal file
@ -0,0 +1,42 @@
|
||||
|
||||
''' http://www.derstandard.at - Austrian Newspaper '''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DerStandardRecipe(BasicNewsRecipe):
|
||||
title = u'derStandard'
|
||||
__author__ = 'Gerhard Aigner'
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
feeds = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'),
|
||||
(u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'),
|
||||
(u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'),
|
||||
(u'Web', u'http://derstandard.at/?page=rss&ressort=webstandard'),
|
||||
(u'Sport', u'http://derstandard.at/?page=rss&ressort=sport'),
|
||||
(u'Panorama', u'http://derstandard.at/?page=rss&ressort=panorama'),
|
||||
(u'Etat', u'http://derstandard.at/?page=rss&ressort=etat'),
|
||||
(u'Kultur', u'http://derstandard.at/?page=rss&ressort=kultur'),
|
||||
(u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'),
|
||||
(u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'),
|
||||
(u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')]
|
||||
|
||||
encoding = 'utf-8'
|
||||
language = _('German')
|
||||
recursions = 0
|
||||
remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'),
|
||||
dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'\[[\d*]\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('?id=', 'txt/?id=')
|
||||
|
||||
def get_article_url(self, article):
|
||||
'''if the article links to a index page (ressort) or a picture gallery
|
||||
(ansichtssache), don't add it'''
|
||||
if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0):
|
||||
return None
|
||||
return article.link
|
40
src/calibre/web/feeds/recipes/recipe_diepresse.py
Normal file
40
src/calibre/web/feeds/recipes/recipe_diepresse.py
Normal file
@ -0,0 +1,40 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DiePresseRecipe(BasicNewsRecipe):
|
||||
title = u'diePresse'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
recursions = 0
|
||||
language = _('German')
|
||||
__author__ = 'Gerhard Aigner'
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'Textversion', re.DOTALL), lambda match: ''),
|
||||
]
|
||||
remove_tags = [dict(name='hr'),
|
||||
dict(name='br'),
|
||||
dict(name='small'),
|
||||
dict(name='img'),
|
||||
dict(name='div', attrs={'class':'textnavi'}),
|
||||
dict(name='h1', attrs={'class':'titel'}),
|
||||
dict(name='a', attrs={'class':'print'}),
|
||||
dict(name='div', attrs={'class':'hline'})]
|
||||
feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'),
|
||||
(u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'),
|
||||
(u'Europa', u'http://diepresse.com/rss/EU'),
|
||||
(u'Panorama', u'http://diepresse.com/rss/Panorama'),
|
||||
(u'Sport', u'http://diepresse.com/rss/Sport'),
|
||||
(u'Kultur', u'http://diepresse.com/rss/Kultur'),
|
||||
(u'Leben', u'http://diepresse.com/rss/Leben'),
|
||||
(u'Tech', u'http://diepresse.com/rss/Tech'),
|
||||
(u'Science', u'http://diepresse.com/rss/Science'),
|
||||
(u'Bildung', u'http://diepresse.com/rss/Bildung'),
|
||||
(u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'),
|
||||
(u'Recht', u'http://diepresse.com/rss/Recht'),
|
||||
(u'Spectrum', u'http://diepresse.com/rss/Spectrum'),
|
||||
(u'Meinung', u'http://diepresse.com/rss/Meinung')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('home','text/home')
|
40
src/calibre/web/feeds/recipes/recipe_hna.py
Normal file
40
src/calibre/web/feeds/recipes/recipe_hna.py
Normal file
@ -0,0 +1,40 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
'''
|
||||
Fetch Hessisch Niedersachsische Allgemeine.
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class hnaDe(BasicNewsRecipe):
|
||||
|
||||
title = 'HNA'
|
||||
description = 'local news from Hessen/Germany'
|
||||
__author__ = 'Oliver Niesner'
|
||||
use_embedded_content = False
|
||||
language = _('German')
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 40
|
||||
no_stylesheets = True
|
||||
encoding = 'iso-8859-1'
|
||||
|
||||
remove_tags = [dict(id='topnav'),
|
||||
dict(id='nav_main'),
|
||||
dict(id='suchen'),
|
||||
dict(id=''),
|
||||
dict(name='span'),
|
||||
dict(name='ul', attrs={'class':'linklist'}),
|
||||
dict(name='a', attrs={'href':'#'}),
|
||||
dict(name='p', attrs={'class':'breadcrumb'}),
|
||||
dict(name='p', attrs={'class':'h5'})]
|
||||
#remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})]
|
||||
remove_tags_after = [dict(name='a', attrs={'href':'#'})]
|
||||
|
||||
feeds = [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'),
|
||||
('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ]
|
||||
|
||||
|
||||
|
@ -14,16 +14,16 @@ class Sueddeutsche(BasicNewsRecipe):
|
||||
description = 'News about Linux driven Hardware'
|
||||
__author__ = 'Oliver Niesner'
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
language = _('English')
|
||||
timefmt = ' [%a %d %b %Y]'
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
|
||||
encoding = 'latin1'
|
||||
|
||||
|
||||
remove_tags_after = [dict(id='nointelliTXT')]
|
||||
filter_regexps = [r'ad\.doubleclick\.net']
|
||||
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
|
||||
dict(name='div', attrs={'class':'bannerSky'}),
|
||||
dict(name='div', attrs={'class':'footerLinks'}),
|
||||
@ -38,7 +38,6 @@ class Sueddeutsche(BasicNewsRecipe):
|
||||
dict(name='table', attrs={'class':'artikelBox'}),
|
||||
dict(name='table', attrs={'class':'kommentare'}),
|
||||
dict(name='table', attrs={'class':'pageBoxBot'}),
|
||||
#dict(name='table', attrs={'with':'100%'}),
|
||||
dict(name='td', attrs={'nowrap':'nowrap'}),
|
||||
dict(name='td', attrs={'valign':'middle'}),
|
||||
dict(name='td', attrs={'align':'left'}),
|
||||
@ -56,7 +55,6 @@ class Sueddeutsche(BasicNewsRecipe):
|
||||
dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
|
||||
dict(name='iframe'),
|
||||
dict(name='form'),
|
||||
#dict(name='tr', attrs={'td':'Click here to learn'}),
|
||||
dict(name='span', attrs={'class':'hidePrint'}),
|
||||
dict(id='headerLBox'),
|
||||
dict(id='nointelliTXT'),
|
||||
|
49
src/calibre/web/feeds/recipes/recipe_moneynews.py
Normal file
49
src/calibre/web/feeds/recipes/recipe_moneynews.py
Normal file
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
moneynews.newsmax.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MoneyNews(BasicNewsRecipe):
|
||||
title = 'Moneynews.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Financial news worldwide'
|
||||
publisher = 'moneynews.com'
|
||||
category = 'news, finances, USA, business'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
feeds = [
|
||||
(u'Street Talk' , u'http://moneynews.newsmax.com/xml/streettalk.xml' )
|
||||
,(u'Finance News' , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' )
|
||||
,(u'Economy' , u'http://moneynews.newsmax.com/xml/economy.xml' )
|
||||
,(u'Companies' , u'http://moneynews.newsmax.com/xml/companies.xml' )
|
||||
,(u'Markets' , u'http://moneynews.newsmax.com/xml/Markets.xml' )
|
||||
,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml' )
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='td' , attrs={'id':'article_fontsize'})
|
||||
,dict(name='table', attrs={'id':'toolbox' })
|
||||
,dict(name='tr' , attrs={'id':'noprint3' })
|
||||
]
|
||||
|
66
src/calibre/web/feeds/recipes/recipe_nzz_ger.py
Normal file
66
src/calibre/web/feeds/recipes/recipe_nzz_ger.py
Normal file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
www.nzz.ch
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Nzz(BasicNewsRecipe):
|
||||
title = 'NZZ Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Laufend aktualisierte Nachrichten, Analysen und Hintergruende zu Politik, Wirtschaft, Kultur und Sport'
|
||||
publisher = 'NZZ AG'
|
||||
category = 'news, politics, nachrichten, Switzerland'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
lang = 'de-CH'
|
||||
language = _('German')
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','base','script'])
|
||||
,dict(name='div',attrs={'class':['more','teaser','advXertXoriXals','legal']})
|
||||
,dict(name='div',attrs={'id':['popup-src','readercomments','google-ad','advXertXoriXals']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Neuste Artikel', u'http://www.nzz.ch/feeds/recent/' )
|
||||
,(u'International' , u'http://www.nzz.ch/nachrichten/international?rss=true')
|
||||
,(u'Schweiz' , u'http://www.nzz.ch/nachrichten/schweiz?rss=true')
|
||||
,(u'Wirtschaft' , u'http://www.nzz.ch/nachrichten/wirtschaft/aktuell?rss=true')
|
||||
,(u'Finanzmaerkte' , u'http://www.nzz.ch/finanzen/nachrichten?rss=true')
|
||||
,(u'Zuerich' , u'http://www.nzz.ch/nachrichten/zuerich?rss=true')
|
||||
,(u'Sport' , u'http://www.nzz.ch/nachrichten/sport?rss=true')
|
||||
,(u'Panorama' , u'http://www.nzz.ch/nachrichten/panorama?rss=true')
|
||||
,(u'Kultur' , u'http://www.nzz.ch/nachrichten/kultur/aktuell?rss=true')
|
||||
,(u'Wissenschaft' , u'http://www.nzz.ch/nachrichten/wissenschaft?rss=true')
|
||||
,(u'Medien' , u'http://www.nzz.ch/nachrichten/medien?rss=true')
|
||||
,(u'Reisen' , u'http://www.nzz.ch/magazin/reisen?rss=true')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?printview=true'
|
||||
|
@ -8,25 +8,18 @@ Fetch tomshardware.
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TomsHardwareDe(BasicNewsRecipe):
|
||||
class cdnet(BasicNewsRecipe):
|
||||
|
||||
title = 'Tom\'s Hardware German'
|
||||
description = 'Computer news in german'
|
||||
title = 'tomshardware'
|
||||
description = 'computer news in german'
|
||||
__author__ = 'Oliver Niesner'
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 50
|
||||
language = _('German')
|
||||
no_stylesheets = True
|
||||
language = _('German')
|
||||
encoding = 'utf-8'
|
||||
|
||||
#preprocess_regexps = \
|
||||
# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
# [
|
||||
# (r'<84>', lambda match: ''),
|
||||
# (r'<93>', lambda match: ''),
|
||||
# ]
|
||||
# ]
|
||||
|
||||
remove_tags = [dict(id='outside-advert'),
|
||||
dict(id='advertRightWhite'),
|
||||
@ -36,9 +29,15 @@ class TomsHardwareDe(BasicNewsRecipe):
|
||||
dict(id='header-top'),
|
||||
dict(id='header-tools'),
|
||||
dict(id='nbComment'),
|
||||
dict(id='commentTools'),
|
||||
dict(id='internalSidebar'),
|
||||
dict(id='header-news-infos'),
|
||||
dict(id='header-news-tools'),
|
||||
dict(id='breadcrumbs'),
|
||||
dict(id='emailTools'),
|
||||
dict(id='bookmarkTools'),
|
||||
dict(id='printTools'),
|
||||
dict(id='header-nextNews'),
|
||||
dict(id=''),
|
||||
dict(name='div', attrs={'class':'pyjama'}),
|
||||
dict(name='href', attrs={'class':'comment'}),
|
||||
@ -47,8 +46,10 @@ class TomsHardwareDe(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'greyBox clearfix'}),
|
||||
dict(id='')]
|
||||
#remove_tags_before = [dict(id='header-news-title')]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'btmGreyTables'})]
|
||||
#remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
|
||||
|
||||
feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ]
|
||||
|
||||
|
||||
|
||||
|
@ -11,20 +11,23 @@ from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Vreme(BasicNewsRecipe):
|
||||
title = 'Vreme'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Politicki Nedeljnik Srbije'
|
||||
publisher = 'Vreme d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
needs_subscription = True
|
||||
INDEX = 'http://www.vreme.com'
|
||||
LOGIN = 'http://www.vreme.com/account/index.php'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
title = 'Vreme'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Politicki Nedeljnik Srbije'
|
||||
publisher = 'NP Vreme d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
delay = 1
|
||||
no_stylesheets = True
|
||||
needs_subscription = True
|
||||
INDEX = 'http://www.vreme.com'
|
||||
LOGIN = 'http://www.vreme.com/account/login.php?url=%2F'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
@ -52,20 +55,11 @@ class Vreme(BasicNewsRecipe):
|
||||
articles = []
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
|
||||
for item in soup.findAll('span', attrs={'class':'toc2'}):
|
||||
for item in soup.findAll(['h3','h4']):
|
||||
description = ''
|
||||
title_prefix = ''
|
||||
|
||||
descript_title_tag = item.findPreviousSibling('span', attrs={'class':'toc1'})
|
||||
if descript_title_tag:
|
||||
title_prefix = self.tag_to_string(descript_title_tag) + ' '
|
||||
|
||||
descript_tag = item.findNextSibling('span', attrs={'class':'toc3'})
|
||||
if descript_tag:
|
||||
description = self.tag_to_string(descript_tag)
|
||||
|
||||
feed_link = item.find('a')
|
||||
if feed_link and feed_link.has_key('href'):
|
||||
if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('/cms/view.php'):
|
||||
url = self.INDEX + feed_link['href']
|
||||
title = title_prefix + self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
@ -93,14 +87,17 @@ class Vreme(BasicNewsRecipe):
|
||||
del item['face']
|
||||
for item in soup.findAll(size=True):
|
||||
del item['size']
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mtag = '<meta http-equiv="Content-Language" content="' + self.lang + '"/>'
|
||||
mtag += '\n<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('img',attrs={'alt':'Naslovna strana broja'})
|
||||
cover_item = soup.find('div',attrs={'id':'najava'})
|
||||
if cover_item:
|
||||
cover_url = self.INDEX + cover_item['src']
|
||||
cover_url = self.INDEX + cover_item.img['src']
|
||||
return cover_url
|
||||
|
Loading…
x
Reference in New Issue
Block a user