diff --git a/installer/windows/calibre/calibre.mpi b/installer/windows/calibre/calibre.mpi index 8073c45f29..a519695367 100644 --- a/installer/windows/calibre/calibre.mpi +++ b/installer/windows/calibre/calibre.mpi @@ -571,9 +571,6 @@ Condition 08195201-0797-932C-4B51-E5EF9D1D41BD -active Yes -parent 710F2507-2557 Condition 2E18F4AE-F1BB-5C62-2900-73A576A49261 -active Yes -parent 710F2507-2557-652D-EA55-440D710EFDFA -title {String Is Condition} -component StringIsCondition -TreeObject::id 2E18F4AE-F1BB-5C62-2900-73A576A49261 InstallComponent 21B897C4-24BE-70D1-58EA-DE78EFA60719 -setup Install -type action -conditions 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5 -title {Message Box} -component MessageBox -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708 Condition 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5 -active Yes -parent 21B897C4-24BE-70D1-58EA-DE78EFA60719 -title {String Is Condition} -component StringIsCondition -TreeObject::id 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5 -InstallComponent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -setup Install -type action -conditions {E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C A8856922-E6C1-160B-E55C-5C1806A89136} -title {Launch Application Checkbutton} -component AddWidget -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708 -Condition E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C -active Yes -parent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -title {File Exists Condition} -component FileExistsCondition -TreeObject::id E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C -Condition A8856922-E6C1-160B-E55C-5C1806A89136 -active Yes -parent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -title {String Is Condition} -component StringIsCondition -TreeObject::id A8856922-E6C1-160B-E55C-5C1806A89136 InstallComponent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -setup Install -type action -conditions {96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0 FBA33088-C809-DD6B-D337-EADBF1CEE966} -title {Desktop Shortcut Checkbutton} -component AddWidget -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708 Condition 96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0 -active Yes -parent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0 Condition FBA33088-C809-DD6B-D337-EADBF1CEE966 -active Yes -parent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -title {String Is Condition} -component StringIsCondition -TreeObject::id FBA33088-C809-DD6B-D337-EADBF1CEE966 @@ -630,7 +627,7 @@ Condition 03FA7EEF-F626-B69A-09C6-0AA7A54EE9E7 -active Yes -parent E32519F3-A540 InstallComponent D86BBA5C-4903-33BA-59F8-4266A3D45896 -setup Install -type action -conditions {C4C0A903-CF2A-D25A-27AB-A64219FB7E70 5EC7056B-6F90-311E-2C6F-76E96164CFFD} -title {Install Quick Launch Shortcut} -component InstallWindowsShortcut -command insert -active Yes -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC Condition C4C0A903-CF2A-D25A-27AB-A64219FB7E70 -active Yes -parent D86BBA5C-4903-33BA-59F8-4266A3D45896 -title {String Is Condition} -component StringIsCondition -TreeObject::id C4C0A903-CF2A-D25A-27AB-A64219FB7E70 Condition 5EC7056B-6F90-311E-2C6F-76E96164CFFD -active Yes -parent D86BBA5C-4903-33BA-59F8-4266A3D45896 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 5EC7056B-6F90-311E-2C6F-76E96164CFFD -InstallComponent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -setup Install -type action -conditions {4E5FC4FE-5D37-B216-CFFE-E046A2D6321E E560F3A1-208D-2B4F-2C87-E08595F8E1CD 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475} -title {Launch Application} -component ExecuteExternalProgram -command insert -active Yes -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC +InstallComponent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -setup Install -type action -conditions {4E5FC4FE-5D37-B216-CFFE-E046A2D6321E E560F3A1-208D-2B4F-2C87-E08595F8E1CD 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475} -title {Launch Application} -component ExecuteExternalProgram -command insert -active No -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC Condition 4E5FC4FE-5D37-B216-CFFE-E046A2D6321E -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {String Is Condition} -component StringIsCondition -TreeObject::id 4E5FC4FE-5D37-B216-CFFE-E046A2D6321E Condition E560F3A1-208D-2B4F-2C87-E08595F8E1CD -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {String Is Condition} -component StringIsCondition -TreeObject::id E560F3A1-208D-2B4F-2C87-E08595F8E1CD Condition 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475 -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475 @@ -802,6 +799,9 @@ CreateQuickLaunchShortcut 28FDA3F4-B799-901F-8A27-AA04F0C022AB,Title,subst 1 +2A230259-3A6F-8669-8B8B-23C3E7C1BFC2,Active +No + 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2,Conditions {3 conditions} @@ -976,27 +976,6 @@ disabled 5C66451D-6042-DBDE-0D8C-31156EE244AD,Widget {Back Button;Next Button} -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Background -white - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Conditions -{2 conditions} - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Text,subst -1 - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Type -checkbutton - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,VirtualText -LaunchApplication - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,X -185 - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Y -130 - 5EC7056B-6F90-311E-2C6F-76E96164CFFD,CheckCondition {Before Action is Executed} @@ -1408,15 +1387,6 @@ disabled A75C97CC-01AC-C12A-D663-A54E3257F11B,Widget {Back Button;Next Button} -A8856922-E6C1-160B-E55C-5C1806A89136,CheckCondition -{Before Action is Executed} - -A8856922-E6C1-160B-E55C-5C1806A89136,Operator -false - -A8856922-E6C1-160B-E55C-5C1806A89136,String -<%InstallStopped%> - AAEC34E6-7F02-18F2-30BB-744738192A3B,Conditions {2 conditions} @@ -1730,12 +1700,6 @@ disabled E5CBB018-A89D-3145-CFF5-CFC3B62BEA97,Widget {NextButton; CancelButton} -E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C,CheckCondition -{Before Action is Executed} - -E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C,Filename -<%ProgramExecutable%> - E611105F-DC85-9E20-4F7B-E63C54E5DF06,Message,subst 1 @@ -2340,9 +2304,6 @@ Please make sure that calibre is not running, as this will cause the install to 48E8A9D6-B57E-C506-680D-898C65DD2A1B,Title <%InstallApplicationText%> -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Text -<%LaunchApplicationText%> - 64B8D0F3-4B11-DA22-D6E7-7248872D5FA7,Message <%UninstallStartupText%> @@ -2356,7 +2317,7 @@ Please make sure that calibre is not running, as this will cause the install to {<%AppName%> Installation complete} 8A7FD0C2-F053-8764-F204-4BAE71E05708,Message -{Installation of <%AppName%> was successful. Click Finish to quit the installer.} +{Installation of <%AppName%> was successful. Click Finish to quit the installer. <%AppName%> can be launched from the start menu.} 940F7FED-7D20-7264-3BF9-ED78205A76B3,Text <%CreateDesktopShortcutText%> diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 00276f6970..138a631b7c 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.5.6' +__version__ = '0.5.7' __author__ = "Kovid Goyal " ''' Various run time constants. diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 8623a94ddd..c11529f025 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -143,7 +143,7 @@ class OutputProfile(Plugin): # ADE dies an agonizing, long drawn out death if HTML files have more # bytes than this. - flow_size = sys.maxint + flow_size = -1 # ADE runs screaming when it sees these characters remove_special_chars = re.compile(u'[\u200b\u00ad]') # ADE falls to the ground in a dead faint when it sees an diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py index 99c74ce5f0..4bd5c9b284 100644 --- a/src/calibre/customize/ui.py +++ b/src/calibre/customize/ui.py @@ -2,7 +2,7 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -import os, shutil, traceback, functools, sys +import os, shutil, traceback, functools, sys, re from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \ MetadataWriterPlugin @@ -55,7 +55,14 @@ def load_plugin(path_to_zip_file): for name in zf.namelist(): if name.lower().endswith('plugin.py'): locals = {} - exec zf.read(name) in locals + raw = zf.read(name) + match = re.search(r'coding[:=]\s*([-\w.]+)', raw[:300]) + encoding = 'utf-8' + if match is not None: + encoding = match.group(1) + raw = raw.decode(encoding) + raw = re.sub('\r\n', '\n', raw) + exec raw in locals for x in locals.values(): if isinstance(x, type) and issubclass(x, Plugin): if x.minimum_calibre_version > version or \ diff --git a/src/calibre/debug.py b/src/calibre/debug.py index 45ce9987e0..6444eaa691 100644 --- a/src/calibre/debug.py +++ b/src/calibre/debug.py @@ -31,6 +31,11 @@ Run an embedded python interpreter. parser.add_option('--migrate', action='store_true', default=False, help='Migrate old database. Needs two arguments. Path ' 'to library1.db and path to new library folder.') + parser.add_option('--add-simple-plugin', default=None, + help='Add a simple plugin (i.e. a plugin that consists of only a ' + '.py file), by specifying the path to the py file containing the ' + 'plugin code.') + return parser def update_zipfile(zipfile, mod, path): @@ -115,6 +120,22 @@ def debug_device_driver(): print 'Total space:', d.total_space() break +def add_simple_plugin(path_to_plugin): + import tempfile, zipfile, shutil + tdir = tempfile.mkdtemp() + open(os.path.join(tdir, 'custom_plugin.py'), + 'wb').write(open(path_to_plugin, 'rb').read()) + odir = os.getcwd() + os.chdir(tdir) + zf = zipfile.ZipFile('plugin.zip', 'w') + zf.write('custom_plugin.py') + zf.close() + from calibre.customize.ui import main + main(['calibre-customize', '-a', 'plugin.zip']) + os.chdir(odir) + shutil.rmtree(tdir) + + def main(args=sys.argv): opts, args = option_parser().parse_args(args) @@ -137,6 +158,8 @@ def main(args=sys.argv): print 'You must specify the path to library1.db and the path to the new library folder' return 1 migrate(args[1], args[2]) + elif opts.add_simple_plugin is not None: + add_simple_plugin(opts.add_simple_plugin) else: from IPython.Shell import IPShellEmbed ipshell = IPShellEmbed() diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index 63dabe001a..8f2755d3fa 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -209,7 +209,7 @@ class Device(_Device): time.sleep(6) drives = {} wmi = __import__('wmi', globals(), locals(), [], -1) - c = wmi.WMI() + c = wmi.WMI(find_classes=False) for drive in c.Win32_DiskDrive(): if self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_MAIN_MEM): drives['main'] = self.windows_get_drive_prefix(drive) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index ab30e71ba1..119ae4d63e 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -94,7 +94,8 @@ OptionRecommendation(name='font_size_mapping', OptionRecommendation(name='line_height', recommended_value=None, level=OptionRecommendation.LOW, help=_('The line height in pts. Controls spacing between consecutive ' - 'lines of text. By default ??' + 'lines of text. By default no line height manipulation is ' + 'performed.' ) ), @@ -102,12 +103,25 @@ OptionRecommendation(name='linearize_tables', recommended_value=False, level=OptionRecommendation.LOW, help=_('Some badly designed documents use tables to control the ' 'layout of text on the page. When converted these documents ' - 'often have text that runs of the page and other artifacts. ' + 'often have text that runs off the page and other artifacts. ' 'This option will extract the content from the tables and ' 'present it in a linear fashion.' ) ), +OptionRecommendation(name='dont_split_on_page_breaks', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Turn off splitting at page breaks. Normally, input ' + 'files are automatically split at every page break into ' + 'two files. This gives an output ebook that can be ' + 'parsed faster and with less resources. However, ' + 'splitting is slow and if your source file contains a ' + 'very large number of page breaks, you should turn off ' + 'splitting on page breaks.' + ) + ), + + OptionRecommendation(name='read_metadata_from_opf', recommended_value=None, level=OptionRecommendation.LOW, short_switch='m', @@ -330,6 +344,17 @@ OptionRecommendation(name='language', untable=self.opts.linearize_tables) flattener(self.oeb, self.opts) + if self.opts.linearize_tables: + from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables + LinearizeTables()(self.oeb, self.opts) + + from calibre.ebooks.oeb.transforms.split import Split + pbx = accelerators.get('pagebreaks', None) + split = Split(not self.opts.dont_split_on_page_breaks, + max_flow_size=self.opts.output_profile.flow_size, + page_breaks_xpath=pbx) + split(self.oeb, self.opts) + from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer self.log.info('Cleaning up manifest...') diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py index 071111e0f7..2a57d2f2d2 100644 --- a/src/calibre/ebooks/metadata/lit.py +++ b/src/calibre/ebooks/metadata/lit.py @@ -25,7 +25,7 @@ def get_metadata(stream): for item in litfile.manifest.values(): if item.path in candidates: try: - covers.append((litfile.get_file('/data/'+item.internal), + covers.append((litfile.get_file('/data/'+item.internal), ctype)) except: pass @@ -33,7 +33,7 @@ def get_metadata(stream): covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True) idx = 0 if len(covers) > 1: - if covers[1][1] == covers[1][0]+'-standard': + if covers[1][1] == covers[0][1]+'-standard': idx = 1 mi.cover_data = ('jpg', covers[idx][0]) return mi diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index de7ac8eeea..a176c12c2b 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal ' import os, re, collections from calibre.utils.config import prefs - + from calibre.ebooks.metadata.opf2 import OPF from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata @@ -37,18 +37,18 @@ def metadata_from_formats(formats): mi2 = opf_metadata(opf) if mi2 is not None and mi2.title: return mi2 - + for path, ext in zip(formats, extensions): with open(path, 'rb') as stream: try: - newmi = get_metadata(stream, stream_type=ext, + newmi = get_metadata(stream, stream_type=ext, use_libprs_metadata=True) mi.smart_update(newmi) except: continue if getattr(mi, 'application_id', None) is not None: return mi - + if not mi.title: mi.title = _('Unknown') if not mi.authors: @@ -64,20 +64,20 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False): stream_type = 'mobi' if stream_type in ('odt', 'ods', 'odp', 'odg', 'odf'): stream_type = 'odt' - + opf = None if hasattr(stream, 'name'): c = os.path.splitext(stream.name)[0]+'.opf' if os.access(c, os.R_OK): opf = opf_metadata(os.path.abspath(c)) - + if use_libprs_metadata and getattr(opf, 'application_id', None) is not None: return opf - + mi = MetaInformation(None, None) if prefs['read_file_metadata']: mi = get_file_type_metadata(stream, stream_type) - + name = os.path.basename(getattr(stream, 'name', '')) base = metadata_from_filename(name) if base.title == os.path.splitext(name)[0] and base.authors is None: @@ -98,17 +98,17 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False): base.smart_update(mi) if opf is not None: base.smart_update(opf) - + return base def set_metadata(stream, mi, stream_type='lrf'): if stream_type: stream_type = stream_type.lower() set_file_type_metadata(stream, mi, stream_type) - - + + def metadata_from_filename(name, pat=None): - name = os.path.splitext(name)[0] + name = name.rpartition('.')[0] mi = MetaInformation(None, None) if pat is None: pat = re.compile(prefs.get('filename_pattern')) @@ -161,7 +161,7 @@ def opf_metadata(opfpath): mi = MetaInformation(opf) if hasattr(opf, 'cover') and opf.cover: cpath = os.path.join(os.path.dirname(opfpath), opf.cover) - if os.access(cpath, os.R_OK): + if os.access(cpath, os.R_OK): fmt = cpath.rpartition('.')[-1] data = open(cpath, 'rb').read() mi.cover_data = (fmt, data) diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index 06a02939ba..4476eb0847 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -1,9 +1,8 @@ -'''Read meta information from PDF files''' - from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' +'''Read meta information from PDF files''' import sys, os, StringIO @@ -31,7 +30,7 @@ def get_metadata(stream, extract_cover=True): except: import traceback traceback.print_exc() - + try: info = PdfFileReader(stream).getDocumentInfo() if info.title: @@ -52,23 +51,18 @@ def get_metadata(stream, extract_cover=True): def set_metadata(stream, mi): stream.seek(0) - # Use a StringIO object for the pdf because we will want to over # write it later and if we are working on the stream directly it # could cause some issues. raw = StringIO.StringIO(stream.read()) orig_pdf = PdfFileReader(raw) - title = mi.title if mi.title else orig_pdf.documentInfo.title author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author - out_pdf = PdfFileWriter(title=title, author=author) for page in orig_pdf.pages: out_pdf.addPage(page) - out_str = StringIO.StringIO() out_pdf.write(out_str) - stream.seek(0) stream.truncate() out_str.seek(0) @@ -77,31 +71,31 @@ def set_metadata(stream, mi): def get_cover(stream): data = StringIO.StringIO() - + try: pdf = PdfFileReader(stream) output = PdfFileWriter() - + if len(pdf.pages) >= 1: output.addPage(pdf.getPage(0)) - + with TemporaryDirectory('_pdfmeta') as tdir: cover_path = os.path.join(tdir, 'cover.pdf') - + outputStream = file(cover_path, "wb") output.write(outputStream) outputStream.close() - + wand = NewMagickWand() MagickReadImage(wand, cover_path) MagickSetImageFormat(wand, 'JPEG') MagickWriteImage(wand, '%s.jpg' % cover_path) - + img = Image.open('%s.jpg' % cover_path) - + img.save(data, 'JPEG') except: import traceback traceback.print_exc() - + return data.getvalue() diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index 2eb45c9161..97d94a0e33 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -29,5 +29,5 @@ class MOBIInput(InputFormatPlugin): with open(f, 'wb') as q: q.write(html.tostring(root, encoding='utf-8', method='xml', include_meta_content_type=False)) - accelerators['pagebreaks'] = {f: '//*[@class="mbp_pagebreak"]'} + accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]' return mr.created_opf_path diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 161a6995ba..b68263ab28 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -160,35 +160,31 @@ class BookHeader(object): class MetadataHeader(BookHeader): def __init__(self, stream, log): self.stream = stream - self.ident = self.identity() self.num_sections = self.section_count() - if self.num_sections >= 2: header = self.header() BookHeader.__init__(self, header, self.ident, None, log) else: self.exth = None - + def identity(self): self.stream.seek(60) ident = self.stream.read(8).upper() - if ident not in ['BOOKMOBI', 'TEXTREAD']: raise MobiError('Unknown book type: %s' % ident) return ident - + def section_count(self): self.stream.seek(76) return struct.unpack('>H', self.stream.read(2))[0] - + def section_offset(self, number): self.stream.seek(78+number*8) return struct.unpack('>LBBBB', self.stream.read(8))[0] - + def header(self): section_headers = [] - # First section with the metadata section_headers.append(self.section_offset(0)) # Second section used to get the lengh of the first @@ -196,20 +192,16 @@ class MetadataHeader(BookHeader): end_off = section_headers[1] off = section_headers[0] - self.stream.seek(off) return self.stream.read(end_off - off) def section_data(self, number): start = self.section_offset(number) - if number == self.num_sections -1: end = os.stat(self.stream.name).st_size else: end = self.section_offset(number + 1) - self.stream.seek(start) - return self.stream.read(end - start) @@ -651,7 +643,7 @@ class MobiReader(object): def get_metadata(stream): from calibre.utils.logging import Log log = Log() - + mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) try: mh = MetadataHeader(stream, log) @@ -666,7 +658,6 @@ def get_metadata(stream): mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi - if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) @@ -679,5 +670,4 @@ def get_metadata(stream): mi.cover_data = ('jpg', obuf.getvalue()) except: log.exception() - return mi diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 39aea3fa30..c521ba9977 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -218,7 +218,7 @@ class Serializer(object): for elem in item.data.find(XHTML('body')): self.serialize_elem(elem, item) #buffer.write('') - buffer.write('') + buffer.write('') def serialize_elem(self, elem, item, nsrmap=NSRMAP): buffer = self.buffer diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 76a6648e8d..ed7981df4f 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -272,11 +272,26 @@ def XPath(expr): def xpath(elem, expr): return elem.xpath(expr, namespaces=XPNSMAP) -def xml2str(root, pretty_print=False): - return etree.tostring(root, encoding='utf-8', xml_declaration=True, +def _prepare_xml_for_serialization(root): + root.set('xmlns', XHTML_NS) + root.set('{%s}xlink'%XHTML_NS, XLINK_NS) + for x in root.iter(): + if hasattr(x.tag, 'rpartition') and x.tag.rpartition('}')[-1].lower() == 'svg': + x.set('xmlns', SVG_NS) + +def xml2str(root, pretty_print=False, strip_comments=False): + _prepare_xml_for_serialization(root) + ans = etree.tostring(root, encoding='utf-8', xml_declaration=True, pretty_print=pretty_print) + if strip_comments: + ans = re.compile(r'', re.DOTALL).sub('', ans) + + return ans + + def xml2unicode(root, pretty_print=False): + _prepare_xml_for_serialization(root) return etree.tostring(root, pretty_print=pretty_print) ASCII_CHARS = set(chr(x) for x in xrange(128)) @@ -826,6 +841,11 @@ class Manifest(object): return xml2str(data, pretty_print=self.oeb.pretty_print) if isinstance(data, unicode): return data.encode('utf-8') + if hasattr(data, 'cssText'): + data = data.cssText + if isinstance(data, unicode): + data = data.encode('utf-8') + return data return str(data) def __unicode__(self): @@ -834,6 +854,8 @@ class Manifest(object): return xml2unicode(data, pretty_print=self.oeb.pretty_print) if isinstance(data, unicode): return data + if hasattr(data, 'cssText'): + return data.cssText return unicode(data) def __eq__(self, other): @@ -1044,6 +1066,12 @@ class Spine(object): self.items[i].spine_position = i item.spine_position = None + def index(self, item): + for i, x in enumerate(self): + if item == x: + return i + return -1 + def __iter__(self): for item in self.items: yield item diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index 88fffc604a..df4f3b88f1 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -163,7 +163,6 @@ class EbookIterator(object): s.pages = p start = 1 - for s in self.spine: s.start_page = start start += s.pages diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py index ea986f49fa..480ca3776e 100644 --- a/src/calibre/ebooks/oeb/output.py +++ b/src/calibre/ebooks/oeb/output.py @@ -22,7 +22,6 @@ class OEBOutput(OutputFormatPlugin): if not os.path.exists(output_path): os.makedirs(output_path) from calibre.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME - from calibre.ebooks.html import tostring as html_tostring with CurrentDir(output_path): results = oeb_book.to_opf2(page_map=True) for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME): @@ -38,16 +37,7 @@ class OEBOutput(OutputFormatPlugin): dir = os.path.dirname(path) if not os.path.exists(dir): os.makedirs(dir) - raw = item.data - if not isinstance(raw, basestring): - if hasattr(raw, 'cssText'): - raw = raw.cssText - else: - raw = html_tostring(raw, - pretty_print=opts.pretty_print) - if isinstance(raw, unicode): - raw = raw.encode('utf-8') with open(path, 'wb') as f: - f.write(raw) + f.write(str(item)) diff --git a/src/calibre/ebooks/oeb/transforms/linearize_tables.py b/src/calibre/ebooks/oeb/transforms/linearize_tables.py new file mode 100644 index 0000000000..a0c11f848c --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/linearize_tables.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from calibre.ebooks.oeb.base import OEB_DOCS, XPNSMAP + +class LinearizeTables(object): + + def linearize(self, root): + for x in root.xpath('//h:table|//h:td|//h:tr|//h:th', + namespaces=XPNSMAP): + x.tag = 'div' + + def __call__(self, oeb, context): + for x in oeb.manifest.items: + if x.media_type in OEB_DOCS: + self.linearize(x.data) diff --git a/src/calibre/ebooks/epub/split.py b/src/calibre/ebooks/oeb/transforms/split.py similarity index 51% rename from src/calibre/ebooks/epub/split.py rename to src/calibre/ebooks/oeb/transforms/split.py index 8ff62a1c4b..20205e9c6d 100644 --- a/src/calibre/ebooks/epub/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -4,21 +4,25 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' ''' -Split the flows in an epub file to conform to size limitations. +Splitting of the XHTML flows. Splitting can happen on page boundaries or can be +forces at "likely" locations to conform to size limitations. This transform +assumes a prior call to the flatcss transform. ''' -import os, math, functools, collections, re, copy, sys +import os, math, functools, collections, re, copy from lxml.etree import XPath as _XPath from lxml import etree, html from lxml.cssselect import CSSSelector -from calibre.ebooks.metadata.opf2 import OPF +from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \ + rewrite_links from calibre.ebooks.epub import tostring, rules -from calibre import CurrentDir -XPath = functools.partial(_XPath, namespaces={'re':'http://exslt.org/regular-expressions'}) -content = functools.partial(os.path.join, 'content') +NAMESPACES = dict(XPNSMAP) +NAMESPACES['re'] = 'http://exslt.org/regular-expressions' + +XPath = functools.partial(_XPath, namespaces=NAMESPACES) SPLIT_ATTR = 'cs' SPLIT_POINT_ATTR = 'csp' @@ -27,149 +31,166 @@ class SplitError(ValueError): def __init__(self, path, root): size = len(tostring(root))/1024. - ValueError.__init__(self, _('Could not find reasonable point at which to split: %s Sub-tree size: %d KB')% - (os.path.basename(path), size)) + ValueError.__init__(self, + _('Could not find reasonable point at which to split: ' + '%s Sub-tree size: %d KB')% + (path, size)) + +class Split(object): + + def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None, + max_flow_size=0): + self.split_on_page_breaks = split_on_page_breaks + self.page_breaks_xpath = page_breaks_xpath + self.max_flow_size = max_flow_size + if self.page_breaks_xpath is not None: + self.page_breaks_xpath = XPath(self.page_breaks_xpath) + + def __call__(self, oeb, context): + self.oeb = oeb + self.log = oeb.log + self.map = {} + self.page_break_selectors = None + for item in self.oeb.manifest.items: + if etree.iselement(item.data): + self.split_item(item) + + self.fix_links() + + def split_item(self, item): + if self.split_on_page_breaks: + if self.page_breaks_xpath is None: + page_breaks, page_break_ids = self.find_page_breaks(item) + else: + page_breaks, page_break_ids = self.page_breaks_xpath(item.data) + + splitter = FlowSplitter(item, page_breaks, page_break_ids, + self.max_flow_size, self.oeb) + if splitter.was_split: + self.map[item.href] = dict(splitter.anchor_map) + + def find_page_breaks(self, item): + if self.page_break_selectors is None: + self.page_break_selectors = set([]) + stylesheets = [x.data for x in self.oeb.manifest if x.media_type in + OEB_STYLES] + page_break_selectors = set([]) + for rule in rules(stylesheets): + before = getattr(rule.style.getPropertyCSSValue( + 'page-break-before'), 'cssText', '').strip().lower() + after = getattr(rule.style.getPropertyCSSValue( + 'page-break-after'), 'cssText', '').strip().lower() + try: + if before and before != 'avoid': + page_break_selectors.add((CSSSelector(rule.selectorText), + True)) + except: + pass + try: + if after and after != 'avoid': + page_break_selectors.add((CSSSelector(rule.selectorText), + False)) + except: + pass + + page_breaks = set([]) + for selector, before in page_break_selectors: + for elem in selector(item.data): + elem.pb_before = before + page_breaks.add(elem) + + for i, elem in enumerate(item.data.iter()): + elem.pb_order = i + + page_breaks = list(page_breaks) + page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order)) + page_break_ids, page_breaks_ = [], [] + for i, x in enumerate(page_breaks): + x.set('id', x.get('id', 'calibre_pb_%d'%i)) + id = x.get('id') + page_breaks_.append((XPath('//*[@id="%s"]'%id), x.pb_before)) + page_break_ids.append(id) + + return page_breaks_, page_break_ids + + def fix_links(self, opf): + ''' + Fix references to the split files in other content files. + ''' + for item in self.oeb.manifest: + if etree.iselement(item.data): + self.current_item = item + rewrite_links(item.data, self.rewrite_links) + + def rewrite_links(self, url): + href, frag = urldefrag(url) + href = self.current_item.abshref(href) + if href in self.map: + anchor_map = self.map[href] + nhref = anchor_map[frag if frag else None] + if frag: + nhref = '#'.joinn(href, frag) + return nhref + return url -class Splitter(object): +class FlowSplitter(object): - def __init__(self, path, opts, stylesheet_map, opf): - self.setup_cli_handler(opts.verbose) - self.path = path - self.always_remove = not opts.preserve_tag_structure or \ - os.stat(content(path)).st_size > 5*opts.profile.flow_size - self.base = (os.path.splitext(path)[0].replace('%', '%%') + '_split_%d.html') - self.opts = opts - self.orig_size = os.stat(content(path)).st_size - self.log_info('\tSplitting %s (%d KB)', path, self.orig_size/1024.) - root = html.fromstring(open(content(path)).read()) + def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb): + self.item = item + self.oeb = oeb + self.log = oeb.log + self.page_breaks = page_breaks + self.page_break_ids = page_break_ids + self.max_flow_size = max_flow_size + self.base = item.abshref(item.href) - self.page_breaks, self.trees = [], [] - self.split_size = 0 + base, ext = os.path.splitext(self.base) + self.base = base.replace('%', '%%')+'_split_%d'+ext - # Split on page breaks + self.trees = [self.item.data] self.splitting_on_page_breaks = True - if not opts.dont_split_on_page_breaks: - self.log_info('\tSplitting on page breaks...') - if self.path in stylesheet_map: - self.find_page_breaks(stylesheet_map[self.path], root) - self.split_on_page_breaks(root.getroottree()) - trees = list(self.trees) - else: - self.trees = [root.getroottree()] - trees = list(self.trees) - - # Split any remaining over-sized trees + if self.page_breaks: + self.split_on_page_breaks(self.item.data) self.splitting_on_page_breaks = False - if self.opts.profile.flow_size < sys.maxint: + + if self.max_flow_size > 0: lt_found = False - self.log_info('\tLooking for large trees...') - for i, tree in enumerate(list(trees)): + self.log('\tLooking for large trees...') + trees = list(self.trees) + for i, tree in enumerate(list(self.trees)): self.trees = [] size = len(tostring(tree.getroot())) if size > self.opts.profile.flow_size: lt_found = True - try: - self.split_to_size(tree) - except (SplitError, RuntimeError): # Splitting fails - if not self.always_remove: - self.always_remove = True - self.split_to_size(tree) - else: - raise + self.split_to_size(tree) trees[i:i+1] = list(self.trees) if not lt_found: self.log_info('\tNo large trees found') + self.trees = trees - self.trees = trees self.was_split = len(self.trees) > 1 - if self.was_split: - self.commit() - self.log_info('\t\tSplit into %d parts.', len(self.trees)) - if self.opts.verbose: - for f in self.files: - self.log_info('\t\t\t%s - %d KB', f, os.stat(content(f)).st_size/1024.) - self.fix_opf(opf) + self.commit() - self.trees = None + def split_on_page_breaks(self, orig_tree): + ordered_ids = [] + for elem in orig_tree.xpath('//*[@id]'): + id = elem.get('id') + if id in self.page_break_ids: + ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)]) - - def split_text(self, text, root, size): - self.log_debug('\t\t\tSplitting text of length: %d'%len(text)) - rest = text.replace('\r', '') - parts = re.split('\n\n', rest) - self.log_debug('\t\t\t\tFound %d parts'%len(parts)) - if max(map(len, parts)) > size: - raise SplitError('Cannot split as file contains a
 tag with a very large paragraph', root)
-        ans = []
-        buf = ''
-        for part in parts:
-            if len(buf) + len(part) < size:
-                buf += '\n\n'+part
-            else:
-                ans.append(buf)
-                buf = part
-        return ans
-
-
-    def split_to_size(self, tree):
-        self.log_debug('\t\tSplitting...')
-        root = tree.getroot()
-        # Split large 
 tags
-        for pre in list(root.xpath('//pre')):
-            text = u''.join(pre.xpath('descendant::text()'))
-            pre.text = text
-            for child in list(pre.iterchildren()):
-                pre.remove(child)
-            if len(pre.text) > self.opts.profile.flow_size*0.5:
-                frags = self.split_text(pre.text, root, int(0.2*self.opts.profile.flow_size))
-                new_pres = []
-                for frag in frags:
-                    pre2 = copy.copy(pre)
-                    pre2.text = frag
-                    pre2.tail = u''
-                    new_pres.append(pre2)
-                new_pres[-1].tail = pre.tail
-                p = pre.getparent()
-                i = p.index(pre)
-                p[i:i+1] = new_pres
-
-        split_point, before = self.find_split_point(root)
-        if split_point is None or self.split_size > 6*self.orig_size:
-            if not self.always_remove:
-                self.log_warn(_('\t\tToo much markup. Re-splitting without '
-                                'structure preservation. This may cause '
-                                'incorrect rendering.'))
-            raise SplitError(self.path, root)
-
-        for t in self.do_split(tree, split_point, before):
-            r = t.getroot()
-            if self.is_page_empty(r):
-                continue
-            size = len(tostring(r))
-            if size <= self.opts.profile.flow_size:
-                self.trees.append(t)
-                #print tostring(t.getroot(), pretty_print=True)
-                self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)',
-                               len(self.trees), size/1024.)
-                self.split_size += size
-            else:
-                self.split_to_size(t)
-
-    def is_page_empty(self, root):
-        body = root.find('body')
-        if body is None:
-            return False
-        txt = re.sub(r'\s+', '', html.tostring(body, method='text', encoding=unicode))
-        if len(txt) > 4:
-            #if len(txt) < 100:
-            #    print 1111111, html.tostring(body, method='html', encoding=unicode)
-            return False
-        for img in root.xpath('//img'):
-            if img.get('style', '') != 'display:none':
-                return False
-        return True
+        self.trees = []
+        tree = orig_tree
+        for pattern, before in ordered_ids:
+            self.log.debug('\t\tSplitting on page-break')
+            elem = pattern(tree)
+            if elem:
+                before, after = self.do_split(tree, elem[0], before)
+                self.trees.append(before)
+                tree = after
+        self.trees.append(tree)
+        self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
 
     def do_split(self, tree, split_point, before):
         '''
@@ -190,7 +211,7 @@ class Splitter(object):
         split_point2 = root2.xpath(path)[0]
 
         def nix_element(elem, top=True):
-            if self.always_remove:
+            if True:
                 parent = elem.getparent()
                 index = parent.index(elem)
                 if top:
@@ -198,7 +219,6 @@ class Splitter(object):
                 else:
                     index = parent.index(elem)
                     parent[index:index+1] = list(elem.iterchildren())
-
             else:
                 elem.text = u''
                 elem.tail = u''
@@ -241,67 +261,76 @@ class Splitter(object):
 
         return tree, tree2
 
+    def is_page_empty(self, root):
+        body = root.find('body')
+        if body is None:
+            return False
+        txt = re.sub(r'\s+', '', html.tostring(body, method='text', encoding=unicode))
+        if len(txt) > 4:
+            return False
+        for img in root.xpath('//img'):
+            if img.get('style', '') != 'display:none':
+                return False
+        return True
 
-    def split_on_page_breaks(self, orig_tree):
-        ordered_ids = []
-        for elem in orig_tree.xpath('//*[@id]'):
-            id = elem.get('id')
-            if id in self.page_break_ids:
-                ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
-
-        self.trees = []
-        tree = orig_tree
-        for pattern, before in ordered_ids:
-            self.log_info('\t\tSplitting on page-break')
-            elem = pattern(tree)
-            if elem:
-                before, after = self.do_split(tree, elem[0], before)
-                self.trees.append(before)
-                tree = after
-        self.trees.append(tree)
-        self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
+    def split_text(self, text, root, size):
+        self.log.debug('\t\t\tSplitting text of length: %d'%len(text))
+        rest = text.replace('\r', '')
+        parts = re.split('\n\n', rest)
+        self.log.debug('\t\t\t\tFound %d parts'%len(parts))
+        if max(map(len, parts)) > size:
+            raise SplitError('Cannot split as file contains a 
 tag '
+                'with a very large paragraph', root)
+        ans = []
+        buf = ''
+        for part in parts:
+            if len(buf) + len(part) < size:
+                buf += '\n\n'+part
+            else:
+                ans.append(buf)
+                buf = part
+        return ans
 
 
+    def split_to_size(self, tree):
+        self.log.debug('\t\tSplitting...')
+        root = tree.getroot()
+        # Split large 
 tags
+        for pre in list(root.xpath('//pre')):
+            text = u''.join(pre.xpath('descendant::text()'))
+            pre.text = text
+            for child in list(pre.iterchildren()):
+                pre.remove(child)
+            if len(pre.text) > self.max_flow_size*0.5:
+                frags = self.split_text(pre.text, root, int(0.2*self.max_flow_size))
+                new_pres = []
+                for frag in frags:
+                    pre2 = copy.copy(pre)
+                    pre2.text = frag
+                    pre2.tail = u''
+                    new_pres.append(pre2)
+                new_pres[-1].tail = pre.tail
+                p = pre.getparent()
+                i = p.index(pre)
+                p[i:i+1] = new_pres
 
-    def find_page_breaks(self, stylesheets, root):
-        '''
-        Find all elements that have either page-break-before or page-break-after set.
-        Populates `self.page_breaks` with id based XPath selectors (for elements that don't
-        have ids, an id is created).
-        '''
-        page_break_selectors = set([])
-        for rule in rules(stylesheets):
-            before = getattr(rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower()
-            after  = getattr(rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower()
-            try:
-                if before and before != 'avoid':
-                    page_break_selectors.add((CSSSelector(rule.selectorText), True))
-            except:
-                pass
-            try:
-                if after and after != 'avoid':
-                    page_break_selectors.add((CSSSelector(rule.selectorText), False))
-            except:
-                pass
-
-        page_breaks = set([])
-        for selector, before in page_break_selectors:
-            for elem in selector(root):
-                elem.pb_before = before
-                page_breaks.add(elem)
-
-        for i, elem in enumerate(root.iter()):
-            elem.pb_order = i
-
-        page_breaks = list(page_breaks)
-        page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
-        self.page_break_ids = []
-        for i, x in enumerate(page_breaks):
-            x.set('id', x.get('id', 'calibre_pb_%d'%i))
-            id = x.get('id')
-            self.page_breaks.append((XPath('//*[@id="%s"]'%id), x.pb_before))
-            self.page_break_ids.append(id)
+        split_point, before = self.find_split_point(root)
+        if split_point is None:
+            raise SplitError(self.item.href, root)
 
+        for t in self.do_split(tree, split_point, before):
+            r = t.getroot()
+            if self.is_page_empty(r):
+                continue
+            size = len(tostring(r))
+            if size <= self.max_flow_size:
+                self.trees.append(t)
+                #print tostring(t.getroot(), pretty_print=True)
+                self.log.debug('\t\t\tCommitted sub-tree #%d (%d KB)',
+                               len(self.trees), size/1024.)
+                self.split_size += size
+            else:
+                self.split_to_size(t)
 
     def find_split_point(self, root):
         '''
@@ -336,8 +365,7 @@ class Splitter(object):
                      '//br',
                      '//li',
                      ):
-            elems = root.xpath(path,
-                    namespaces={'re':'http://exslt.org/regular-expressions'})
+            elems = root.xpath(path, namespaces=NAMESPACES)
             elem = pick_elem(elems)
             if elem is not None:
                 try:
@@ -355,6 +383,8 @@ class Splitter(object):
         all anchors in the original tree. Internal links are re-directed. The
         original file is deleted and the split files are saved.
         '''
+        if not self.was_split:
+            return
         self.anchor_map = collections.defaultdict(lambda :self.base%0)
         self.files = []
 
@@ -368,134 +398,46 @@ class Splitter(object):
                 elem.attrib.pop(SPLIT_ATTR, None)
                 elem.attrib.pop(SPLIT_POINT_ATTR, '0')
 
-        for current, tree in zip(self.files, self.trees):
-            for a in tree.getroot().xpath('//a[@href]'):
+        spine_pos = self.item.spine_pos
+        for current, tree in zip(map(reversed, (self.files, self.trees))):
+            for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
                 href = a.get('href').strip()
                 if href.startswith('#'):
                     anchor = href[1:]
                     file = self.anchor_map[anchor]
                     if file != current:
                         a.set('href', file+href)
-            open(content(current), 'wb').\
-                write(tostring(tree.getroot(), pretty_print=self.opts.pretty_print))
 
-        os.remove(content(self.path))
+            new_id = self.oeb.manifest.generate(id=self.item.id)[0]
+            new_item = self.oeb.manifest.add(new_id, current,
+                    self.item.media_type, data=tree.getroot())
+            self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
+
+        if self.oeb.guide:
+            for ref in self.oeb.guide:
+                href, frag = urldefrag(ref.href)
+                if href == self.item.href:
+                    nhref = self.anchor_map[frag if frag else None]
+                    if frag:
+                        nhref = '#'.join(nhref, frag)
+                    ref.href = nhref
+
+        def fix_toc_entry(toc):
+            if toc.href:
+                href, frag = urldefrag(toc.href)
+                if href == self.item.href:
+                    nhref = self.anchor_map[frag if frag else None]
+                    if frag:
+                        nhref = '#'.join(nhref, frag)
+                    toc.href = nhref
+            for x in toc:
+                fix_toc_entry(x)
 
 
-    def fix_opf(self, opf):
-        '''
-        Fix references to the split file in the OPF.
-        '''
-        items = [item for item in opf.itermanifest() if item.get('href') == 'content/'+self.path]
-        new_items = [('content/'+f, None) for f in self.files]
-        id_map = {}
-        for item in items:
-            id_map[item.get('id')] = opf.replace_manifest_item(item, new_items)
+        if self.oeb.toc:
+            fix_toc_entry(self.oeb.toc)
 
-        for id in id_map.keys():
-            opf.replace_spine_items_by_idref(id, id_map[id])
-
-        for ref in opf.iterguide():
-            href = ref.get('href', '')
-            if href.startswith('content/'+self.path):
-                href = href.split('#')
-                frag = None
-                if len(href) > 1:
-                    frag = href[1]
-                if frag not in self.anchor_map:
-                    self.log_warning('\t\tUnable to re-map OPF link', href)
-                    continue
-                new_file = self.anchor_map[frag]
-                ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
+        self.oeb.manifest.remove(self.item)
 
 
 
-def fix_content_links(html_files, changes, opts):
-    split_files = [f.path for f in changes]
-    anchor_maps = [f.anchor_map for f in changes]
-    files = list(html_files)
-    for j, f in enumerate(split_files):
-        try:
-            i = files.index(f)
-            files[i:i+1] = changes[j].files
-        except ValueError:
-            continue
-
-    for htmlfile in files:
-        changed = False
-        root = html.fromstring(open(content(htmlfile), 'rb').read())
-        for a in root.xpath('//a[@href]'):
-            href = a.get('href')
-            if not href.startswith('#'):
-                href = href.split('#')
-                anchor = href[1] if len(href) > 1 else None
-                href = href[0]
-                if href in split_files:
-                    try:
-                        newf = anchor_maps[split_files.index(href)][anchor]
-                    except:
-                        print '\t\tUnable to remap HTML link:', href, anchor
-                        continue
-                    frag = ('#'+anchor) if anchor else ''
-                    a.set('href', newf+frag)
-                    changed = True
-
-        if changed:
-            open(content(htmlfile), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
-
-def fix_ncx(path, changes):
-    split_files = [f.path for f in changes]
-    anchor_maps = [f.anchor_map for f in changes]
-    tree = etree.parse(path)
-    changed = False
-    for content in tree.getroot().xpath('//x:content[@src]',
-                    namespaces={'x':"http://www.daisy.org/z3986/2005/ncx/"}):
-        href = content.get('src')
-        if not href.startswith('#'):
-            href = href.split('#')
-            anchor = href[1] if len(href) > 1 else None
-            href = href[0].split('/')[-1]
-            if href in split_files:
-                try:
-                    newf = anchor_maps[split_files.index(href)][anchor]
-                except:
-                    print 'Unable to remap NCX link:', href, anchor
-                frag = ('#'+anchor) if anchor else ''
-                content.set('src', 'content/'+newf+frag)
-                changed = True
-    if changed:
-        open(path, 'wb').write(etree.tostring(tree.getroot(), encoding='UTF-8', xml_declaration=True))
-
-def find_html_files(opf):
-    '''
-    Find all HTML files referenced by `opf`.
-    '''
-    html_files = []
-    for item in opf.itermanifest():
-        if 'html' in item.get('media-type', '').lower():
-            f = item.get('href').split('/')[-1]
-            f2 = f.replace('&', '%26')
-            if not os.path.exists(content(f)) and os.path.exists(content(f2)):
-                f = f2
-                item.set('href', item.get('href').replace('&', '%26'))
-            if os.path.exists(content(f)):
-                html_files.append(f)
-    return html_files
-
-
-def split(pathtoopf, opts, stylesheet_map):
-    pathtoopf = os.path.abspath(pathtoopf)
-    opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
-
-    with CurrentDir(os.path.dirname(pathtoopf)):
-        html_files = find_html_files(opf)
-        changes = [Splitter(f, opts, stylesheet_map, opf) for f in html_files]
-        changes = [c for c in changes if c.was_split]
-
-        fix_content_links(html_files, changes, opts)
-        for item in opf.itermanifest():
-            if item.get('media-type', '') == 'application/x-dtbncx+xml':
-                fix_ncx(item.get('href'), changes)
-                break
-
-        open(pathtoopf, 'wb').write(opf.render())
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 225f7a9e33..b3a67d003e 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -67,6 +67,10 @@ def _config():
     c.add_opt('default_send_to_device_action', default=None,
             help=_('Default action to perform when send to device button is '
                 'clicked'))
+    c.add_opt('show_donate_button', default=True,
+            help='Show donation button')
+    c.add_opt('asked_library_thing_password', default=False,
+            help='Asked library thing password at least once.')
     return ConfigProxy(c)
 
 config = _config()
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index d21a249395..8d1b7a1b3a 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -12,6 +12,7 @@ from PyQt4.Qt import QMenu, QAction, QActionGroup, QIcon, SIGNAL, QPixmap, \
 
 from calibre.customize.ui import available_input_formats, available_output_formats
 from calibre.devices import devices
+from calibre.constants import iswindows
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
 from calibre.parallel import Job
 from calibre.devices.scanner import DeviceScanner
@@ -71,7 +72,14 @@ class DeviceManager(Thread):
             if connected and not device[1]:
                 try:
                     dev = device[0]()
-                    dev.open()
+                    if iswindows:
+                        import pythoncom
+                        pythoncom.CoInitialize()
+                    try:
+                        dev.open()
+                    finally:
+                        if iswindows:
+                            pythoncom.CoUninitialize()
                     self.device       = dev
                     self.device_class = dev.__class__
                     self.connected_slot(True)
@@ -670,7 +678,9 @@ class DeviceGUI(object):
             bad = '\n'.join('
  • %s
  • '%(i,) for i in bad) d = warning_dialog(self, _('No suitable formats'), _('Could not upload the following books to the device, ' - 'as no suitable formats were found:
      %s
    ')%(bad,)) + 'as no suitable formats were found. Try changing the output ' + 'format in the upper right corner next to the red heart and ' + 're-converting.
      %s
    ')%(bad,)) d.exec_() def upload_booklists(self): diff --git a/src/calibre/gui2/dialogs/epub.py b/src/calibre/gui2/dialogs/epub.py index 0773440b01..e61d034642 100644 --- a/src/calibre/gui2/dialogs/epub.py +++ b/src/calibre/gui2/dialogs/epub.py @@ -176,19 +176,19 @@ class Config(ResizableDialog, Ui_Dialog): def get_metadata(self): title, authors = self.get_title_and_authors() mi = MetaInformation(title, authors) - publisher = unicode(self.publisher.text()) + publisher = unicode(self.publisher.text()).strip() if publisher: mi.publisher = publisher - author_sort = unicode(self.author_sort.text()) + author_sort = unicode(self.author_sort.text()).strip() if author_sort: mi.author_sort = author_sort - comments = unicode(self.comment.toPlainText()) + comments = unicode(self.comment.toPlainText()).strip() if comments: mi.comments = comments mi.series_index = int(self.series_index.value()) if self.series.currentIndex() > -1: - mi.series = unicode(self.series.currentText()) - tags = [t.strip() for t in unicode(self.tags.text()).split(',')] + mi.series = unicode(self.series.currentText()).strip() + tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')] if tags: mi.tags = tags @@ -267,6 +267,7 @@ class Config(ResizableDialog, Ui_Dialog): ).exec_() return mi = self.get_metadata() + self.user_mi = mi self.read_settings() self.cover_file = None if self.row is not None: diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index a9d20905c6..c48c7c3640 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -25,24 +25,48 @@ from calibre import islinux from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.config import prefs from calibre.customize.ui import run_plugins_on_import +from calibre.gui2 import config as gui_conf class CoverFetcher(QThread): - def __init__(self, username, password, isbn, timeout): - self.username = username - self.password = password + def __init__(self, username, password, isbn, timeout, title, author): + self.username = username.strip() if username else username + self.password = password.strip() if password else password self.timeout = timeout self.isbn = isbn + self.title = title + self.needs_isbn = False + self.author = author QThread.__init__(self) self.exception = self.traceback = self.cover_data = None def run(self): try: - login(self.username, self.password, force=False) + if not self.isbn: + from calibre.ebooks.metadata.fetch import search + if not self.title: + self.needs_isbn = True + return + au = self.author if self.author else None + key = prefs['isbndb_com_key'] + if not key: + key = None + results = search(title=self.title, author=au, + isbndb_key=key)[0] + results = sorted([x.isbn for x in results if x.isbn], + cmp=lambda x,y:cmp(len(x),len(y)), reverse=True) + if not results: + self.needs_isbn = True + return + self.isbn = results[0] + + if self.username and self.password: + login(self.username, self.password, force=False) self.cover_data = cover_from_isbn(self.isbn, timeout=self.timeout)[0] except Exception, e: self.exception = e self.traceback = traceback.format_exc() + print self.traceback @@ -64,6 +88,8 @@ class AuthorCompleter(QCompleter): class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): + COVER_FETCH_TIMEOUT = 240 # seconds + def do_reset_cover(self, *args): pix = QPixmap(':/images/book.svg') self.cover.setPixmap(pix) @@ -345,36 +371,39 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): def lt_password_dialog(self): return PasswordDialog(self, 'LibraryThing account', - _('

    Enter your username and password for LibraryThing.com.
    If you do not have one, you can register for free!.

    ')) + _('

    Enter your username and password for ' + 'LibraryThing.com. This is optional. It will ' + 'make fetching of covers faster and more reliable.
    If ' + 'you do not have an account, you can ' + 'register for ' + 'free.

    ')) def change_password(self): d = self.lt_password_dialog() d.exec_() def fetch_cover(self): - isbn = qstring_to_unicode(self.isbn.text()) - if isbn: - d = self.lt_password_dialog() - if not d.username() or not d.password(): - d.exec_() - if d.result() != PasswordDialog.Accepted: - return - self.fetch_cover_button.setEnabled(False) - self.setCursor(Qt.WaitCursor) - self.cover_fetcher = CoverFetcher(d.username(), d.password(), isbn, - self.timeout) - self.cover_fetcher.start() - self._hangcheck = QTimer(self) - self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck) - self.cf_start_time = time.time() - self.pi.start(_('Downloading cover...')) - self._hangcheck.start(100) - else: - error_dialog(self, _('Cannot fetch cover'), - _('You must specify the ISBN identifier for this book.')).exec_() + isbn = unicode(self.isbn.text()).strip() + d = self.lt_password_dialog() + if not gui_conf['asked_library_thing_password'] and \ + (not d.username() or not d.password()): + d.exec_() + gui_conf['asked_library_thing_password'] = True + self.fetch_cover_button.setEnabled(False) + self.setCursor(Qt.WaitCursor) + title, author = map(unicode, (self.title.text(), self.authors.text())) + self.cover_fetcher = CoverFetcher(d.username(), d.password(), isbn, + self.timeout, title, author) + self.cover_fetcher.start() + self._hangcheck = QTimer(self) + self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck) + self.cf_start_time = time.time() + self.pi.start(_('Downloading cover...')) + self._hangcheck.start(100) def hangcheck(self): - if not (self.cover_fetcher.isFinished() or time.time()-self.cf_start_time > 150): + if not self.cover_fetcher.isFinished() and \ + time.time()-self.cf_start_time < self.COVER_FETCH_TIMEOUT: return self._hangcheck.stop() @@ -385,6 +414,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): _('Could not fetch cover.
    ')+ _('The download timed out.')).exec_() return + if self.cover_fetcher.needs_isbn: + error_dialog(self, _('Cannot fetch cover'), + _('Could not find cover for this book. Try ' + 'specifying the ISBN first.')).exec_() + return if self.cover_fetcher.exception is not None: err = self.cover_fetcher.exception error_dialog(self, _('Cannot fetch cover'), diff --git a/src/calibre/gui2/dialogs/password.ui b/src/calibre/gui2/dialogs/password.ui index 865c065a10..3fc982371e 100644 --- a/src/calibre/gui2/dialogs/password.ui +++ b/src/calibre/gui2/dialogs/password.ui @@ -1,7 +1,8 @@ - + + Dialog - - + + 0 0 @@ -9,66 +10,70 @@ 209 - + Password needed - - :/images/mimetypes/unknown.svg + + + :/images/mimetypes/unknown.svg:/images/mimetypes/unknown.svg - - - - + + + + TextLabel - + + true + + true - - - + + + &Username: - + gui_username - - + + - - - + + + &Password: - + gui_password - - - + + + QLineEdit::Password - - - + + + Qt::Horizontal - - QDialogButtonBox::Cancel|QDialogButtonBox::NoButton|QDialogButtonBox::Ok + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok - - - + + + &Show password @@ -76,7 +81,7 @@ - + @@ -85,11 +90,11 @@ Dialog accept() - + 248 254 - + 157 274 @@ -101,11 +106,11 @@ Dialog reject() - + 316 260 - + 286 274 diff --git a/src/calibre/gui2/images/news/hna.png b/src/calibre/gui2/images/news/hna.png new file mode 100644 index 0000000000..f4e1135dd5 Binary files /dev/null and b/src/calibre/gui2/images/news/hna.png differ diff --git a/src/calibre/gui2/images/news/nzz_ger.png b/src/calibre/gui2/images/news/nzz_ger.png new file mode 100644 index 0000000000..ba9591853f Binary files /dev/null and b/src/calibre/gui2/images/news/nzz_ger.png differ diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index f7362e556c..86d1b013e3 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -108,6 +108,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): self.donate_action = self.system_tray_menu.addAction( QIcon(':/images/donate.svg'), _('&Donate to support calibre')) self.donate_button.setDefaultAction(self.donate_action) + if not config['show_donate_button']: + self.donate_button.setVisible(False) self.addAction(self.quit_action) self.action_restart = QAction(_('&Restart'), self) self.addAction(self.action_restart) diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index d004dcb502..a3002089a9 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -25,7 +25,7 @@ from calibre.ebooks.lrf.comic.convert_from import config as comicconfig # Ordered list of source formats. Items closer to the beginning are # preferred for conversion over those toward the end. -PREFERRED_SOURCE_FORMATS = ['epub', 'lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf', +PREFERRED_SOURCE_FORMATS = ['epub', 'lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf', 'txt', 'pdf', 'oebzip', 'htm', 'html'] def get_dialog(fmt): @@ -43,20 +43,20 @@ def get_config(fmt): def auto_convert(fmt, parent, db, rows): changed = False jobs = [] - + total = len(rows) if total == 0: return None, None, None parent.status_bar.showMessage(_('Starting auto conversion of %d books')%total, 2000) - + i = 0 bad_rows = [] - + for i, row in enumerate(rows): row_id = db.id(row) - + temp_files = [] - + data = None in_formats = [f.lower() for f in db.formats(row).split(',')] in_formats = list(set(in_formats).intersection(available_input_formats())) @@ -88,10 +88,10 @@ def auto_convert(fmt, parent, db, rows): for row in bad_rows: title = db.title(row) res.append('
  • %s
  • '%title) - + msg = _('

    Could not convert %d of %d books, because no suitable source format was found.

      %s
    ')%(len(res), total, '\n'.join(res)) warning_dialog(parent, _('Could not convert some books'), msg).exec_() - + return jobs, changed, bad_rows def convert_single(fmt, parent, db, comics, others): @@ -120,10 +120,10 @@ def convert_single(fmt, parent, db, comics, others): temp_files.append(d.cover_file) opts.cover = d.cover_file.name temp_files.extend([d.opf_file, pt, of]) - jobs.append(('any2'+fmt, args, _('Convert book: ')+d.mi.title, + jobs.append(('any2'+fmt, args, _('Convert book: ')+d.mi.title, fmt.upper(), row_id, temp_files)) changed = True - + for row, row_id in zip(comics, comics_ids): mi = db.get_metadata(row) title = author = _('Unknown') @@ -140,7 +140,7 @@ def convert_single(fmt, parent, db, comics, others): try: data = db.format(row, _fmt.upper()) if data is not None: - break + break except: continue pt = PersistentTemporaryFile('.'+_fmt) @@ -152,12 +152,12 @@ def convert_single(fmt, parent, db, comics, others): opts.verbose = 2 args = [pt.name, opts] changed = True - jobs.append(('comic2'+fmt, args, _('Convert comic: ')+opts.title, + jobs.append(('comic2'+fmt, args, _('Convert comic: ')+opts.title, fmt.upper(), row_id, [pt, of])) - + return jobs, changed - - + + def convert_single_lrf(parent, db, comics, others): changed = False @@ -182,10 +182,10 @@ def convert_single_lrf(parent, db, comics, others): if d.cover_file: temp_files.append(d.cover_file) temp_files.extend([pt, of]) - jobs.append(('any2lrf', [cmdline], _('Convert book: ')+d.title(), + jobs.append(('any2lrf', [cmdline], _('Convert book: ')+d.title(), 'LRF', row_id, temp_files)) changed = True - + for row, row_id in zip(comics, comics_ids): mi = db.get_metadata(row) title = author = _('Unknown') @@ -202,7 +202,7 @@ def convert_single_lrf(parent, db, comics, others): try: data = db.format(row, fmt.upper()) if data is not None: - break + break except: continue if data is None: @@ -216,19 +216,20 @@ def convert_single_lrf(parent, db, comics, others): opts.verbose = 1 args = [pt.name, opts] changed = True - jobs.append(('comic2lrf', args, _('Convert comic: ')+opts.title, + jobs.append(('comic2lrf', args, _('Convert comic: ')+opts.title, 'LRF', row_id, [pt, of])) - + return jobs, changed def convert_bulk(fmt, parent, db, comics, others): if others: d = get_dialog(fmt)(parent, db) if d.exec_() != QDialog.Accepted: - others = [] + others, user_mi = [], None else: opts = d.opts opts.verbose = 2 + user_mi = d.user_mi if comics: comic_opts = ComicConf.get_bulk_conversion_options(parent) if not comic_opts: @@ -239,7 +240,7 @@ def convert_bulk(fmt, parent, db, comics, others): if total == 0: return parent.status_bar.showMessage(_('Starting Bulk conversion of %d books')%total, 2000) - + for i, row in enumerate(others+comics): row_id = db.id(row) if row in others: @@ -256,6 +257,11 @@ def convert_bulk(fmt, parent, db, comics, others): continue options = opts.copy() mi = db.get_metadata(row) + if user_mi is not None: + if user_mi.series_index == 1: + user_mi.series_index = None + mi.smart_update(user_mi) + db.set_metadata(db.id(row), mi) opf = OPFCreator(os.getcwdu(), mi) opf_file = PersistentTemporaryFile('.opf') opf.render(opf_file) @@ -291,10 +297,10 @@ def convert_bulk(fmt, parent, db, comics, others): try: data = db.format(row, _fmt.upper()) if data is not None: - break + break except: continue - + pt = PersistentTemporaryFile('.'+_fmt.lower()) pt.write(data) pt.close() @@ -304,17 +310,17 @@ def convert_bulk(fmt, parent, db, comics, others): options.verbose = 1 args = [pt.name, options] desc = _('Convert book %d of %d (%s)')%(i+1, total, repr(mi.title)) - jobs.append(('comic2'+fmt, args, desc, fmt.upper(), row_id, [pt, of])) - + jobs.append(('comic2'+fmt, args, desc, fmt.upper(), row_id, [pt, of])) + if bad_rows: res = [] for row in bad_rows: title = db.title(row) res.append('
  • %s
  • '%title) - + msg = _('

    Could not convert %d of %d books, because no suitable source format was found.

      %s
    ')%(len(res), total, '\n'.join(res)) warning_dialog(parent, _('Could not convert some books'), msg).exec_() - + return jobs, False @@ -333,7 +339,7 @@ def convert_bulk_lrf(parent, db, comics, others): if total == 0: return parent.status_bar.showMessage(_('Starting Bulk conversion of %d books')%total, 2000) - + for i, row in enumerate(others+comics): row_id = db.id(row) if row in others: @@ -388,10 +394,10 @@ def convert_bulk_lrf(parent, db, comics, others): try: data = db.format(row, fmt.upper()) if data is not None: - break + break except: continue - + pt = PersistentTemporaryFile('.'+fmt.lower()) pt.write(data) pt.close() @@ -401,17 +407,17 @@ def convert_bulk_lrf(parent, db, comics, others): options.verbose = 1 args = [pt.name, options] desc = _('Convert book %d of %d (%s)')%(i+1, total, repr(mi.title)) - jobs.append(('comic2lrf', args, desc, 'LRF', row_id, [pt, of])) - + jobs.append(('comic2lrf', args, desc, 'LRF', row_id, [pt, of])) + if bad_rows: res = [] for row in bad_rows: title = db.title(row) res.append('
  • %s
  • '%title) - + msg = _('

    Could not convert %d of %d books, because no suitable source format was found.

      %s
    ')%(len(res), total, '\n'.join(res)) warning_dialog(parent, _('Could not convert some books'), msg).exec_() - + return jobs, False def set_conversion_defaults_lrf(comic, parent, db): @@ -438,7 +444,7 @@ def _fetch_news(data, fmt): args.extend(['--password', data['password']]) args.append(data['script'] if data['script'] else data['title']) return 'feeds2'+fmt.lower(), [args], _('Fetch news from ')+data['title'], fmt.upper(), [pt] - + def fetch_scheduled_recipe(recipe, script): from calibre.gui2.dialogs.scheduler import config @@ -453,7 +459,7 @@ def fetch_scheduled_recipe(recipe, script): args.extend(['--username', x[0], '--password', x[1]]) args.append(script) return 'feeds2'+fmt, [args], _('Fetch news from ')+recipe.title, fmt.upper(), [pt] - + def auto_convert_ebook(*args): return auto_convert(*args) @@ -463,14 +469,14 @@ def convert_single_ebook(*args): return convert_single_lrf(*args) elif fmt in ('epub', 'mobi'): return convert_single(fmt, *args) - + def convert_bulk_ebooks(*args): fmt = prefs['output_format'].lower() if fmt == 'lrf': return convert_bulk_lrf(*args) elif fmt in ('epub', 'mobi'): return convert_bulk(fmt, *args) - + def set_conversion_defaults(comic, parent, db): fmt = prefs['output_format'].lower() if fmt == 'lrf': diff --git a/src/calibre/library/__init__.py b/src/calibre/library/__init__.py index c32d6732df..8c304e5dce 100644 --- a/src/calibre/library/__init__.py +++ b/src/calibre/library/__init__.py @@ -7,19 +7,19 @@ from calibre.utils.config import Config, StringConfig def server_config(defaults=None): desc=_('Settings to control the calibre content server') c = Config('server', desc) if defaults is None else StringConfig(defaults, desc) - - c.add_opt('port', ['-p', '--port'], default=8080, + + c.add_opt('port', ['-p', '--port'], default=8080, help=_('The port on which to listen. Default is %default')) - c.add_opt('timeout', ['-t', '--timeout'], default=120, + c.add_opt('timeout', ['-t', '--timeout'], default=120, help=_('The server timeout in seconds. Default is %default')) - c.add_opt('thread_pool', ['--thread-pool'], default=30, + c.add_opt('thread_pool', ['--thread-pool'], default=30, help=_('The max number of worker threads to use. Default is %default')) - c.add_opt('password', ['--password'], default=None, + c.add_opt('password', ['--password'], default=None, help=_('Set a password to restrict access. By default access is unrestricted.')) c.add_opt('username', ['--username'], default='calibre', help=_('Username for access. By default, it is: %default')) c.add_opt('develop', ['--develop'], default=False, help='Development mode. Server automatically restarts on file changes and serves code files (html, css, js) from the file system instead of calibre\'s resource system.') - c.add_opt('max_cover', ['--max-cover'], default='600x800', + c.add_opt('max_cover', ['--max-cover'], default='600x800', help=_('The maximum size for displayed covers. Default is %default.')) return c diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py index 4ba6253819..8e9b6278d8 100644 --- a/src/calibre/library/server.py +++ b/src/calibre/library/server.py @@ -30,31 +30,31 @@ build_time = datetime.strptime(build_time, '%d %m %Y %H%M%S') server_resources['jquery.js'] = jquery def expose(func): - + def do(self, *args, **kwargs): dict.update(cherrypy.response.headers, {'Server':self.server_name}) return func(self, *args, **kwargs) - + return cherrypy.expose(do) log_access_file = os.path.join(config_dir, 'server_access_log.txt') log_error_file = os.path.join(config_dir, 'server_error_log.txt') - + class LibraryServer(object): - + server_name = __appname__ + '/' + __version__ BOOK = textwrap.dedent('''\ - ${r[8] if r[8] else ''} ''') - + LIBRARY = MarkupTemplate(textwrap.dedent('''\ @@ -72,7 +72,7 @@ class LibraryServer(object): ''')) - + STANZA_ENTRY=MarkupTemplate(textwrap.dedent('''\ ${record[FM['title']]} @@ -87,7 +87,7 @@ class LibraryServer(object): ''')) - + STANZA = MarkupTemplate(textwrap.dedent('''\ @@ -107,7 +107,7 @@ class LibraryServer(object): ''')) - + def __init__(self, db, opts, embedded=False, show_tracebacks=True): self.db = db for item in self.db: @@ -116,7 +116,7 @@ class LibraryServer(object): self.opts = opts self.max_cover_width, self.max_cover_height = \ map(int, self.opts.max_cover.split('x')) - + cherrypy.config.update({ 'log.screen' : opts.develop, 'engine.autoreload_on' : opts.develop, @@ -141,10 +141,10 @@ class LibraryServer(object): 'tools.digest_auth.realm' : (_('Password to access your calibre library. Username is ') + opts.username.strip()).encode('ascii', 'replace'), 'tools.digest_auth.users' : {opts.username.strip():opts.password.strip()}, } - + self.is_running = False self.exception = None - + def setup_loggers(self): access_file = log_access_file error_file = log_error_file @@ -152,20 +152,20 @@ class LibraryServer(object): maxBytes = getattr(log, "rot_maxBytes", 10000000) backupCount = getattr(log, "rot_backupCount", 1000) - + # Make a new RotatingFileHandler for the error log. h = RotatingFileHandler(error_file, 'a', maxBytes, backupCount) h.setLevel(logging.DEBUG) h.setFormatter(cherrypy._cplogging.logfmt) log.error_log.addHandler(h) - + # Make a new RotatingFileHandler for the access log. h = RotatingFileHandler(access_file, 'a', maxBytes, backupCount) h.setLevel(logging.DEBUG) h.setFormatter(cherrypy._cplogging.logfmt) log.access_log.addHandler(h) - + def start(self): self.is_running = False self.setup_loggers() @@ -173,7 +173,7 @@ class LibraryServer(object): try: cherrypy.engine.start() self.is_running = True - publish_zeroconf('Books in calibre', '_stanza._tcp', + publish_zeroconf('Books in calibre', '_stanza._tcp', self.opts.port, {'path':'/stanza'}) cherrypy.engine.block() except Exception, e: @@ -181,10 +181,10 @@ class LibraryServer(object): finally: self.is_running = False stop_zeroconf() - + def exit(self): cherrypy.engine.exit() - + def get_cover(self, id, thumbnail=False): cover = self.db.cover(id, index_is_id=True, as_file=False) if cover is None: @@ -196,14 +196,14 @@ class LibraryServer(object): try: if QApplication.instance() is None: QApplication([]) - + im = QImage() im.loadFromData(cover) if im.isNull(): raise cherrypy.HTTPError(404, 'No valid cover found') width, height = im.width(), im.height() - scaled, width, height = fit_image(width, height, - 60 if thumbnail else self.max_cover_width, + scaled, width, height = fit_image(width, height, + 60 if thumbnail else self.max_cover_width, 80 if thumbnail else self.max_cover_height) if not scaled: return cover @@ -217,7 +217,7 @@ class LibraryServer(object): import traceback traceback.print_exc() raise cherrypy.HTTPError(404, 'Failed to generate cover: %s'%err) - + def get_format(self, id, format): format = format.upper() fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb') @@ -232,7 +232,7 @@ class LibraryServer(object): updated = datetime.utcfromtimestamp(os.stat(path).st_mtime) cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) return fmt.read() - + def sort(self, items, field, order): field = field.lower().strip() if field == 'author': @@ -243,10 +243,23 @@ class LibraryServer(object): raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field) cmpf = cmp if field in ('rating', 'size', 'timestamp') else \ lambda x, y: cmp(x.lower() if x else '', y.lower() if y else '') - field = FIELD_MAP[field] - getter = operator.itemgetter(field) - items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order) - + if field == 'series': + items.sort(cmp=self.seriescmp, reverse=not order) + else: + field = FIELD_MAP[field] + getter = operator.itemgetter(field) + items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order) + + def seriescmp(self, x, y): + si = FIELD_MAP['series'] + try: + ans = cmp(x[si].lower(), y[si].lower()) + except AttributeError: # Some entries may be None + ans = cmp(x[si], y[si]) + if ans != 0: return ans + return cmp(x[FIELD_MAP['series_index']], y[FIELD_MAP['series_index']]) + + def last_modified(self, updated): lm = updated.strftime('day, %d month %Y %H:%M:%S GMT') day ={0:'Sun', 1:'Mon', 2:'Tue', 3:'Wed', 4:'Thu', 5:'Fri', 6:'Sat'} @@ -254,8 +267,8 @@ class LibraryServer(object): month = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'} return lm.replace('month', month[updated.month]) - - + + @expose def stanza(self): ' Feeds to read calibre books on a ipod with stanza.' @@ -264,7 +277,7 @@ class LibraryServer(object): r = record[FIELD_MAP['formats']] r = r.upper() if r else '' if 'EPUB' in r or 'PDB' in r: - authors = ' & '.join([i.replace('|', ',') for i in + authors = ' & '.join([i.replace('|', ',') for i in record[FIELD_MAP['authors']].split(',')]) extra = [] rating = record[FIELD_MAP['rating']] @@ -276,7 +289,7 @@ class LibraryServer(object): extra.append('TAGS: %s
    '%', '.join(tags.split(','))) series = record[FIELD_MAP['series']] if series: - extra.append('SERIES: %s [%d]
    '%(series, + extra.append('SERIES: %s [%d]
    '%(series, record[FIELD_MAP['series_index']])) fmt = 'epub' if 'EPUB' in r else 'pdb' mimetype = guess_type('dummy.'+fmt)[0] @@ -288,24 +301,24 @@ class LibraryServer(object): mimetype=mimetype, fmt=fmt, ).render('xml').decode('utf8')) - + updated = self.db.last_modified() cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) cherrypy.response.headers['Content-Type'] = 'text/xml' - + return self.STANZA.generate(subtitle='', data=books, FM=FIELD_MAP, updated=updated, id='urn:calibre:main').render('xml') - + @expose - def library(self, start='0', num='50', sort=None, search=None, + def library(self, start='0', num='50', sort=None, search=None, _=None, order='ascending'): ''' Serves metadata from the calibre database as XML. - + :param sort: Sort results by ``sort``. Can be one of `title,author,rating`. :param search: Filter results by ``search`` query. See :class:`SearchQueryParser` for query syntax :param start,num: Return the slice `[start:start+num]` of the sorted and filtered results - :param _: Firefox seems to sometimes send this when using XMLHttpRequest with no caching + :param _: Firefox seems to sometimes send this when using XMLHttpRequest with no caching ''' try: start = int(start) @@ -321,19 +334,19 @@ class LibraryServer(object): items = [r for r in iter(self.db) if r[0] in ids] if sort is not None: self.sort(items, sort, order) - + book, books = MarkupTemplate(self.BOOK), [] for record in items[start:start+num]: aus = record[2] if record[2] else _('Unknown') authors = '|'.join([i.replace('|', ',') for i in aus.split(',')]) books.append(book.generate(r=record, authors=authors).render('xml').decode('utf-8')) updated = self.db.last_modified() - + cherrypy.response.headers['Content-Type'] = 'text/xml' cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) - return self.LIBRARY.generate(books=books, start=start, updated=updated, + return self.LIBRARY.generate(books=books, start=start, updated=updated, total=len(ids)).render('xml') - + @expose def index(self, **kwargs): 'The / URL' @@ -341,8 +354,8 @@ class LibraryServer(object): if stanza == 919: return self.static('index.html') return self.stanza() - - + + @expose def get(self, what, id): 'Serves files, covers, thumbnails from the calibre database' @@ -361,7 +374,7 @@ class LibraryServer(object): if what == 'cover': return self.get_cover(id) return self.get_format(id, what) - + @expose def static(self, name): 'Serves static content' @@ -392,11 +405,11 @@ def start_threaded_server(db, opts): server.thread.setDaemon(True) server.thread.start() return server - + def stop_threaded_server(server): server.exit() server.thread = None - + def option_parser(): return config().option_parser('%prog '+ _('[options]\n\nStart the calibre content server.')) diff --git a/src/calibre/trac/plugins/download.py b/src/calibre/trac/plugins/download.py index e4a0fe36af..a6c9876f20 100644 --- a/src/calibre/trac/plugins/download.py +++ b/src/calibre/trac/plugins/download.py @@ -69,6 +69,7 @@ else: DOWNLOAD_DIR = '/var/www/calibre.kovidgoyal.net/htdocs/downloads' MOBILEREAD = 'https://dev.mobileread.com/dist/kovid/calibre/' + #MOBILEREAD = 'http://calibre.kovidgoyal.net/downloads/' class OS(dict): """Dictionary with a default value for unknown keys.""" @@ -197,6 +198,8 @@ else: import sys, os, shutil, tarfile, subprocess, tempfile, urllib2, re, stat MOBILEREAD='https://dev.mobileread.com/dist/kovid/calibre/' + #MOBILEREAD='http://calibre.kovidgoyal.net/downloads/' + class TerminalController: BOL = '' #: Move the cursor to the beginning of the line diff --git a/src/calibre/trac/plugins/htdocs/images/binary_logo.png b/src/calibre/trac/plugins/htdocs/images/binary_logo.png new file mode 100644 index 0000000000..4387cc8cfe Binary files /dev/null and b/src/calibre/trac/plugins/htdocs/images/binary_logo.png differ diff --git a/src/calibre/trac/plugins/htdocs/images/foresight_logo.png b/src/calibre/trac/plugins/htdocs/images/foresight_logo.png new file mode 100644 index 0000000000..b947eca1a3 Binary files /dev/null and b/src/calibre/trac/plugins/htdocs/images/foresight_logo.png differ diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index c7a39cbc4b..adf86c11e9 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -128,6 +128,12 @@ class BasicNewsRecipe(object): #: extra_css = None + #: If True empty feeds are removed from the output. + #: This option has no effect if parse_index is overriden in + #: the sub class. It is meant only for recipes that return a list + #: of feeds using :member:`feeds` or :method:`get_feeds`. + remove_empty_feeds = False + #: List of regular expressions that determines which links to follow #: If empty, it is ignored. For example:: #: @@ -985,6 +991,11 @@ class BasicNewsRecipe(object): self.log.exception(msg) + remove = [f for f in parsed_feeds if len(f) == 0 and + self.remove_empty_feeds] + for f in remove: + parsed_feeds.remove(f) + return parsed_feeds @classmethod diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index a2dbcd7d24..9e2ef1969d 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -39,6 +39,7 @@ recipe_modules = ['recipe_' + r for r in ( 'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs', 'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet', 'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en', + 'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_der_standard.py b/src/calibre/web/feeds/recipes/recipe_der_standard.py new file mode 100644 index 0000000000..eec4c4e74d --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_der_standard.py @@ -0,0 +1,42 @@ + +''' http://www.derstandard.at - Austrian Newspaper ''' +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class DerStandardRecipe(BasicNewsRecipe): + title = u'derStandard' + __author__ = 'Gerhard Aigner' + + oldest_article = 1 + max_articles_per_feed = 100 + feeds = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'), + (u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'), + (u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'), + (u'Web', u'http://derstandard.at/?page=rss&ressort=webstandard'), + (u'Sport', u'http://derstandard.at/?page=rss&ressort=sport'), + (u'Panorama', u'http://derstandard.at/?page=rss&ressort=panorama'), + (u'Etat', u'http://derstandard.at/?page=rss&ressort=etat'), + (u'Kultur', u'http://derstandard.at/?page=rss&ressort=kultur'), + (u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'), + (u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'), + (u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')] + + encoding = 'utf-8' + language = _('German') + recursions = 0 + remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'), + dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')] + preprocess_regexps = [ + (re.compile(r'\[[\d*]\]', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '') + ] + + def print_version(self, url): + return url.replace('?id=', 'txt/?id=') + + def get_article_url(self, article): + '''if the article links to a index page (ressort) or a picture gallery + (ansichtssache), don't add it''' + if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0): + return None + return article.link diff --git a/src/calibre/web/feeds/recipes/recipe_diepresse.py b/src/calibre/web/feeds/recipes/recipe_diepresse.py new file mode 100644 index 0000000000..c806575356 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_diepresse.py @@ -0,0 +1,40 @@ +import re + +from calibre.web.feeds.news import BasicNewsRecipe + +class DiePresseRecipe(BasicNewsRecipe): + title = u'diePresse' + oldest_article = 1 + max_articles_per_feed = 100 + recursions = 0 + language = _('German') + __author__ = 'Gerhard Aigner' + + preprocess_regexps = [ + (re.compile(r'Textversion', re.DOTALL), lambda match: ''), + ] + remove_tags = [dict(name='hr'), + dict(name='br'), + dict(name='small'), + dict(name='img'), + dict(name='div', attrs={'class':'textnavi'}), + dict(name='h1', attrs={'class':'titel'}), + dict(name='a', attrs={'class':'print'}), + dict(name='div', attrs={'class':'hline'})] + feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'), + (u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'), + (u'Europa', u'http://diepresse.com/rss/EU'), + (u'Panorama', u'http://diepresse.com/rss/Panorama'), + (u'Sport', u'http://diepresse.com/rss/Sport'), + (u'Kultur', u'http://diepresse.com/rss/Kultur'), + (u'Leben', u'http://diepresse.com/rss/Leben'), + (u'Tech', u'http://diepresse.com/rss/Tech'), + (u'Science', u'http://diepresse.com/rss/Science'), + (u'Bildung', u'http://diepresse.com/rss/Bildung'), + (u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'), + (u'Recht', u'http://diepresse.com/rss/Recht'), + (u'Spectrum', u'http://diepresse.com/rss/Spectrum'), + (u'Meinung', u'http://diepresse.com/rss/Meinung')] + + def print_version(self, url): + return url.replace('home','text/home') diff --git a/src/calibre/web/feeds/recipes/recipe_hna.py b/src/calibre/web/feeds/recipes/recipe_hna.py new file mode 100644 index 0000000000..c4faec94ba --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_hna.py @@ -0,0 +1,40 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +''' +Fetch Hessisch Niedersachsische Allgemeine. +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class hnaDe(BasicNewsRecipe): + + title = 'HNA' + description = 'local news from Hessen/Germany' + __author__ = 'Oliver Niesner' + use_embedded_content = False + language = _('German') + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 40 + no_stylesheets = True + encoding = 'iso-8859-1' + + remove_tags = [dict(id='topnav'), + dict(id='nav_main'), + dict(id='suchen'), + dict(id=''), + dict(name='span'), + dict(name='ul', attrs={'class':'linklist'}), + dict(name='a', attrs={'href':'#'}), + dict(name='p', attrs={'class':'breadcrumb'}), + dict(name='p', attrs={'class':'h5'})] + #remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})] + remove_tags_after = [dict(name='a', attrs={'href':'#'})] + + feeds = [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'), + ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ] + + + diff --git a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py index 04db6b02d5..cd914e96ad 100644 --- a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py +++ b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py @@ -1,80 +1,78 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - -''' -Fetch Linuxdevices. -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Sueddeutsche(BasicNewsRecipe): - - title = u'Linuxdevices' - description = 'News about Linux driven Hardware' - __author__ = 'Oliver Niesner' - use_embedded_content = False - timefmt = ' [%a, %d %b %Y]' - language = _('English') - max_articles_per_feed = 50 - no_stylesheets = True - encoding = 'latin1' - - remove_tags_after = [dict(id='nointelliTXT')] - filter_regexps = [r'ad\.doubleclick\.net'] - - - remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}), - dict(name='div', attrs={'class':'bannerSky'}), - dict(name='div', attrs={'class':'footerLinks'}), - dict(name='div', attrs={'class':'seitenanfang'}), - dict(name='td', attrs={'class':'mar5'}), - dict(name='td', attrs={'class':'mar5'}), - dict(name='table', attrs={'class':'pageAktiv'}), - dict(name='table', attrs={'class':'xartable'}), - dict(name='table', attrs={'class':'wpnavi'}), - dict(name='table', attrs={'class':'bgcontent absatz'}), - dict(name='table', attrs={'class':'footer'}), - dict(name='table', attrs={'class':'artikelBox'}), - dict(name='table', attrs={'class':'kommentare'}), - dict(name='table', attrs={'class':'pageBoxBot'}), - #dict(name='table', attrs={'with':'100%'}), - dict(name='td', attrs={'nowrap':'nowrap'}), - dict(name='td', attrs={'valign':'middle'}), - dict(name='td', attrs={'align':'left'}), - dict(name='td', attrs={'align':'center'}), - dict(name='td', attrs={'height':'5'}), - dict(name='div', attrs={'class':'artikelBox navigatorBox'}), - dict(name='div', attrs={'class':'similar-article-box'}), - dict(name='div', attrs={'class':'videoBigHack'}), - dict(name='td', attrs={'class':'artikelDruckenRight'}), - dict(name='td', attrs={'class':'width="200"'}), - dict(name='a', attrs={'href':'/news'}), - dict(name='a', attrs={'href':'/'}), - dict(name='a', attrs={'href':'/articles'}), - dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}), - dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}), - dict(name='iframe'), - dict(name='form'), - #dict(name='tr', attrs={'td':'Click here to learn'}), - dict(name='span', attrs={'class':'hidePrint'}), - dict(id='headerLBox'), - dict(id='nointelliTXT'), - dict(id='rechteSpalte'), - dict(id='newsticker-list-small'), - dict(id='ntop5'), - dict(id='ntop5send'), - dict(id='ntop5commented'), - dict(id='nnav-bgheader'), - dict(id='nnav-headerteaser'), - dict(id='nnav-head'), - dict(id='nnav-top'), - dict(id='nnav-logodiv'), - dict(id='nnav-logo'), - dict(id='nnav-oly'), - dict(id='readcomment')] - - - - feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] - +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +''' +Fetch Linuxdevices. +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Sueddeutsche(BasicNewsRecipe): + + title = u'Linuxdevices' + description = 'News about Linux driven Hardware' + __author__ = 'Oliver Niesner' + use_embedded_content = False + timefmt = ' [%a %d %b %Y]' + max_articles_per_feed = 50 + no_stylesheets = True + html2epub_options = 'linearize_tables = True\nbase_font_size2=14' + encoding = 'latin1' + + + remove_tags_after = [dict(id='nointelliTXT')] + filter_regexps = [r'ad\.doubleclick\.net'] + + remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}), + dict(name='div', attrs={'class':'bannerSky'}), + dict(name='div', attrs={'class':'footerLinks'}), + dict(name='div', attrs={'class':'seitenanfang'}), + dict(name='td', attrs={'class':'mar5'}), + dict(name='td', attrs={'class':'mar5'}), + dict(name='table', attrs={'class':'pageAktiv'}), + dict(name='table', attrs={'class':'xartable'}), + dict(name='table', attrs={'class':'wpnavi'}), + dict(name='table', attrs={'class':'bgcontent absatz'}), + dict(name='table', attrs={'class':'footer'}), + dict(name='table', attrs={'class':'artikelBox'}), + dict(name='table', attrs={'class':'kommentare'}), + dict(name='table', attrs={'class':'pageBoxBot'}), + dict(name='td', attrs={'nowrap':'nowrap'}), + dict(name='td', attrs={'valign':'middle'}), + dict(name='td', attrs={'align':'left'}), + dict(name='td', attrs={'align':'center'}), + dict(name='td', attrs={'height':'5'}), + dict(name='div', attrs={'class':'artikelBox navigatorBox'}), + dict(name='div', attrs={'class':'similar-article-box'}), + dict(name='div', attrs={'class':'videoBigHack'}), + dict(name='td', attrs={'class':'artikelDruckenRight'}), + dict(name='td', attrs={'class':'width="200"'}), + dict(name='a', attrs={'href':'/news'}), + dict(name='a', attrs={'href':'/'}), + dict(name='a', attrs={'href':'/articles'}), + dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}), + dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}), + dict(name='iframe'), + dict(name='form'), + dict(name='span', attrs={'class':'hidePrint'}), + dict(id='headerLBox'), + dict(id='nointelliTXT'), + dict(id='rechteSpalte'), + dict(id='newsticker-list-small'), + dict(id='ntop5'), + dict(id='ntop5send'), + dict(id='ntop5commented'), + dict(id='nnav-bgheader'), + dict(id='nnav-headerteaser'), + dict(id='nnav-head'), + dict(id='nnav-top'), + dict(id='nnav-logodiv'), + dict(id='nnav-logo'), + dict(id='nnav-oly'), + dict(id='readcomment')] + + + + feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] + diff --git a/src/calibre/web/feeds/recipes/recipe_moneynews.py b/src/calibre/web/feeds/recipes/recipe_moneynews.py new file mode 100644 index 0000000000..96656e490d --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_moneynews.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +moneynews.newsmax.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class MoneyNews(BasicNewsRecipe): + title = 'Moneynews.com' + __author__ = 'Darko Miletic' + description = 'Financial news worldwide' + publisher = 'moneynews.com' + category = 'news, finances, USA, business' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1252' + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + , '--ignore-tables' + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + + feeds = [ + (u'Street Talk' , u'http://moneynews.newsmax.com/xml/streettalk.xml' ) + ,(u'Finance News' , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' ) + ,(u'Economy' , u'http://moneynews.newsmax.com/xml/economy.xml' ) + ,(u'Companies' , u'http://moneynews.newsmax.com/xml/companies.xml' ) + ,(u'Markets' , u'http://moneynews.newsmax.com/xml/Markets.xml' ) + ,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml' ) + ] + + + keep_only_tags = [dict(name='table', attrs={'class':'copy'})] + + remove_tags = [ + dict(name='td' , attrs={'id':'article_fontsize'}) + ,dict(name='table', attrs={'id':'toolbox' }) + ,dict(name='tr' , attrs={'id':'noprint3' }) + ] + \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_nzz_ger.py b/src/calibre/web/feeds/recipes/recipe_nzz_ger.py new file mode 100644 index 0000000000..cdd23064bb --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_nzz_ger.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +www.nzz.ch +''' + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class Nzz(BasicNewsRecipe): + title = 'NZZ Online' + __author__ = 'Darko Miletic' + description = 'Laufend aktualisierte Nachrichten, Analysen und Hintergruende zu Politik, Wirtschaft, Kultur und Sport' + publisher = 'NZZ AG' + category = 'news, politics, nachrichten, Switzerland' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False + lang = 'de-CH' + language = _('German') + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' + + keep_only_tags = [dict(name='div', attrs={'class':'article'})] + + remove_tags = [ + dict(name=['object','link','base','script']) + ,dict(name='div',attrs={'class':['more','teaser','advXertXoriXals','legal']}) + ,dict(name='div',attrs={'id':['popup-src','readercomments','google-ad','advXertXoriXals']}) + ] + + feeds = [ + (u'Neuste Artikel', u'http://www.nzz.ch/feeds/recent/' ) + ,(u'International' , u'http://www.nzz.ch/nachrichten/international?rss=true') + ,(u'Schweiz' , u'http://www.nzz.ch/nachrichten/schweiz?rss=true') + ,(u'Wirtschaft' , u'http://www.nzz.ch/nachrichten/wirtschaft/aktuell?rss=true') + ,(u'Finanzmaerkte' , u'http://www.nzz.ch/finanzen/nachrichten?rss=true') + ,(u'Zuerich' , u'http://www.nzz.ch/nachrichten/zuerich?rss=true') + ,(u'Sport' , u'http://www.nzz.ch/nachrichten/sport?rss=true') + ,(u'Panorama' , u'http://www.nzz.ch/nachrichten/panorama?rss=true') + ,(u'Kultur' , u'http://www.nzz.ch/nachrichten/kultur/aktuell?rss=true') + ,(u'Wissenschaft' , u'http://www.nzz.ch/nachrichten/wissenschaft?rss=true') + ,(u'Medien' , u'http://www.nzz.ch/nachrichten/medien?rss=true') + ,(u'Reisen' , u'http://www.nzz.ch/magazin/reisen?rss=true') + ] + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mtag = '' + soup.head.insert(0,mtag) + return soup + + def print_version(self, url): + return url + '?printview=true' + diff --git a/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py b/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py index 52f1583408..7ba656e1d5 100644 --- a/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py +++ b/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py @@ -8,26 +8,19 @@ Fetch tomshardware. from calibre.web.feeds.news import BasicNewsRecipe -class TomsHardwareDe(BasicNewsRecipe): - - title = 'Tom\'s Hardware German' - description = 'Computer news in german' +class cdnet(BasicNewsRecipe): + + title = 'tomshardware' + description = 'computer news in german' __author__ = 'Oliver Niesner' use_embedded_content = False timefmt = ' [%d %b %Y]' max_articles_per_feed = 50 - language = _('German') no_stylesheets = True + language = _('German') encoding = 'utf-8' - #preprocess_regexps = \ -# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in -# [ -# (r'<84>', lambda match: ''), -# (r'<93>', lambda match: ''), -# ] -# ] - + remove_tags = [dict(id='outside-advert'), dict(id='advertRightWhite'), dict(id='header-advert'), @@ -36,9 +29,15 @@ class TomsHardwareDe(BasicNewsRecipe): dict(id='header-top'), dict(id='header-tools'), dict(id='nbComment'), + dict(id='commentTools'), dict(id='internalSidebar'), dict(id='header-news-infos'), + dict(id='header-news-tools'), dict(id='breadcrumbs'), + dict(id='emailTools'), + dict(id='bookmarkTools'), + dict(id='printTools'), + dict(id='header-nextNews'), dict(id=''), dict(name='div', attrs={'class':'pyjama'}), dict(name='href', attrs={'class':'comment'}), @@ -47,8 +46,10 @@ class TomsHardwareDe(BasicNewsRecipe): dict(name='div', attrs={'class':'greyBox clearfix'}), dict(id='')] #remove_tags_before = [dict(id='header-news-title')] - remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})] + remove_tags_after = [dict(name='div', attrs={'class':'btmGreyTables'})] #remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})] - - feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] - + + feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] + + + diff --git a/src/calibre/web/feeds/recipes/recipe_vreme.py b/src/calibre/web/feeds/recipes/recipe_vreme.py index 697413f2f3..1df953cae3 100644 --- a/src/calibre/web/feeds/recipes/recipe_vreme.py +++ b/src/calibre/web/feeds/recipes/recipe_vreme.py @@ -11,20 +11,23 @@ from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Vreme(BasicNewsRecipe): - title = 'Vreme' - __author__ = 'Darko Miletic' - description = 'Politicki Nedeljnik Srbije' - publisher = 'Vreme d.o.o.' - category = 'news, politics, Serbia' - no_stylesheets = True - remove_javascript = True - needs_subscription = True - INDEX = 'http://www.vreme.com' - LOGIN = 'http://www.vreme.com/account/index.php' - remove_javascript = True - use_embedded_content = False - language = _('Serbian') - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}' + title = 'Vreme' + __author__ = 'Darko Miletic' + description = 'Politicki Nedeljnik Srbije' + publisher = 'NP Vreme d.o.o.' + category = 'news, politics, Serbia' + delay = 1 + no_stylesheets = True + needs_subscription = True + INDEX = 'http://www.vreme.com' + LOGIN = 'http://www.vreme.com/account/login.php?url=%2F' + remove_javascript = True + use_embedded_content = False + encoding = 'utf-8' + language = _('Serbian') + lang = 'sr-Latn-RS' + direction = 'ltr' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}' html2lrf_options = [ '--comment' , description @@ -52,20 +55,11 @@ class Vreme(BasicNewsRecipe): articles = [] soup = self.index_to_soup(self.INDEX) - for item in soup.findAll('span', attrs={'class':'toc2'}): + for item in soup.findAll(['h3','h4']): description = '' title_prefix = '' - - descript_title_tag = item.findPreviousSibling('span', attrs={'class':'toc1'}) - if descript_title_tag: - title_prefix = self.tag_to_string(descript_title_tag) + ' ' - - descript_tag = item.findNextSibling('span', attrs={'class':'toc3'}) - if descript_tag: - description = self.tag_to_string(descript_tag) - feed_link = item.find('a') - if feed_link and feed_link.has_key('href'): + if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('/cms/view.php'): url = self.INDEX + feed_link['href'] title = title_prefix + self.tag_to_string(feed_link) date = strftime(self.timefmt) @@ -93,14 +87,17 @@ class Vreme(BasicNewsRecipe): del item['face'] for item in soup.findAll(size=True): del item['size'] - mtag = '' - soup.head.insert(0,mtag) + soup.html['lang'] = self.lang + soup.html['dir' ] = self.direction + mtag = '' + mtag += '\n' + soup.head.insert(0,mtag) return soup def get_cover_url(self): cover_url = None soup = self.index_to_soup(self.INDEX) - cover_item = soup.find('img',attrs={'alt':'Naslovna strana broja'}) + cover_item = soup.find('div',attrs={'id':'najava'}) if cover_item: - cover_url = self.INDEX + cover_item['src'] + cover_url = self.INDEX + cover_item.img['src'] return cover_url diff --git a/upload.py b/upload.py index b2fc81c8b6..6bc90aada2 100644 --- a/upload.py +++ b/upload.py @@ -530,6 +530,7 @@ class build_windows(VMInstaller): self.run_windows_install_jammer(installer) return os.path.basename(installer) + @classmethod def run_windows_install_jammer(self, installer): ibp = os.path.abspath('installer/windows') sys.path.insert(0, ibp)