From f9ff180347431e17d1a398031e9c1c567fac63bd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Sep 2009 19:03:52 -0600 Subject: [PATCH] Implement a check setup command that uses PyFlakes to check for various errors --- .bzrignore | 2 +- setup.py | 3 +- setup/check.py | 75 + setup/commands.py | 3 + setup/gui.py | 3 +- src/calibre/__init__.py | 5 + .../ebooks/chardet/codingstatemachine.py | 6 +- src/calibre/ebooks/chardet/escprober.py | 8 +- src/calibre/ebooks/chardet/sbcsgroupprober.py | 6 +- src/calibre/ebooks/chardet/utf8prober.py | 6 +- src/calibre/ebooks/epub/pages.py | 9 +- src/calibre/ebooks/lit/lzx.py | 1 - src/calibre/ebooks/lit/maps/__init__.py | 2 + src/calibre/ebooks/lrf/fonts/__init__.py | 14 +- src/calibre/ebooks/lrf/html/color_map.py | 6 +- src/calibre/ebooks/lrf/html/convert_to.py | 30 +- src/calibre/ebooks/lrf/html/table.py | 138 +- src/calibre/ebooks/lrf/pylrs/elements.py | 162 +- src/calibre/ebooks/lrf/pylrs/pylrf.py | 1568 ++--- src/calibre/ebooks/lrf/pylrs/pylrfopt.py | 86 +- src/calibre/ebooks/lrf/pylrs/pylrs.py | 5187 +++++++++-------- src/calibre/ebooks/markdown/__init__.py | 4 +- src/calibre/ebooks/markdown/mdx_toc.py | 28 +- src/calibre/ebooks/metadata/imp.py | 6 +- src/calibre/ebooks/metadata/lrx.py | 8 +- src/calibre/ebooks/metadata/odt.py | 10 +- src/calibre/ebooks/metadata/zip.py | 8 +- src/calibre/ebooks/pdb/palmdoc/writer.py | 1 - src/calibre/ebooks/pdb/ztxt/__init__.py | 1 - src/calibre/ebooks/pdf/manipulate/decrypt.py | 24 +- src/calibre/ebooks/pdf/manipulate/encrypt.py | 14 +- src/calibre/ebooks/pdf/verify.py | 8 +- src/calibre/ebooks/rb/writer.py | 1 - src/calibre/ebooks/rtf2xml/copy.py | 6 +- src/calibre/ebooks/rtf2xml/options_trem.py | 1 - src/calibre/ebooks/rtf2xml/output.py | 1 - src/calibre/ebooks/rtf2xml/override_table.py | 2 - src/calibre/gui2/dialogs/choose_format.py | 8 +- src/calibre/gui2/dialogs/conversion_error.py | 6 +- src/calibre/gui2/lrf_renderer/bookview.py | 14 +- src/calibre/gui2/viewer/documentview.py | 2 +- src/calibre/gui2/viewer/printing.py | 41 +- src/calibre/manual/conf.py | 2 +- src/calibre/path.py | 970 --- src/calibre/translations/automatic.py | 121 - src/calibre/utils/localization.py | 5 +- src/calibre/utils/pyparsing.py | 20 +- src/calibre/utils/rss_gen.py | 29 +- src/calibre/web/feeds/recipes/__init__.py | 13 +- .../web/feeds/recipes/recipe_24sata.py | 120 +- .../web/feeds/recipes/recipe_24sata_rs.py | 134 +- src/calibre/web/feeds/recipes/recipe_7dias.py | 142 +- .../feeds/recipes/recipe_accountancyage.py | 116 +- .../feeds/recipes/recipe_adventuregamers.py | 150 +- .../web/feeds/recipes/recipe_ambito.py | 115 +- .../web/feeds/recipes/recipe_amspec.py | 108 +- .../web/feeds/recipes/recipe_axxon_news.py | 122 +- .../web/feeds/recipes/recipe_azstarnet.py | 128 +- src/calibre/web/feeds/recipes/recipe_b92.py | 136 +- .../web/feeds/recipes/recipe_barrons.py | 184 +- .../web/feeds/recipes/recipe_bbcvietnamese.py | 68 +- src/calibre/web/feeds/recipes/recipe_beta.py | 100 +- .../web/feeds/recipes/recipe_beta_en.py | 73 +- src/calibre/web/feeds/recipes/recipe_blic.py | 129 +- src/calibre/web/feeds/recipes/recipe_borba.py | 188 +- .../recipes/recipe_buenosaireseconomico.py | 142 +- .../recipes/recipe_chicago_breaking_news.py | 90 +- .../feeds/recipes/recipe_chicago_tribune.py | 4 - .../web/feeds/recipes/recipe_clarin.py | 144 +- .../feeds/recipes/recipe_climate_progress.py | 90 +- .../web/feeds/recipes/recipe_coding_horror.py | 80 +- .../recipes/recipe_corriere_della_sera_en.py | 90 +- .../recipes/recipe_corriere_della_sera_it.py | 110 +- .../recipes/recipe_courrierinternational.py | 9 +- .../feeds/recipes/recipe_criticadigital.py | 122 +- .../web/feeds/recipes/recipe_cubadebate.py | 87 +- .../web/feeds/recipes/recipe_daily_mail.py | 66 +- src/calibre/web/feeds/recipes/recipe_danas.py | 122 +- .../web/feeds/recipes/recipe_degentenaar.py | 150 +- .../web/feeds/recipes/recipe_der_standard.py | 134 +- .../web/feeds/recipes/recipe_diagonales.py | 142 +- .../web/feeds/recipes/recipe_diepresse.py | 142 +- .../web/feeds/recipes/recipe_dnevni_avaz.py | 136 +- .../web/feeds/recipes/recipe_dnevnik_cro.py | 148 +- .../web/feeds/recipes/recipe_e_novine.py | 116 +- .../web/feeds/recipes/recipe_ecogeek.py | 62 +- .../feeds/recipes/recipe_el_mercurio_chile.py | 115 +- .../web/feeds/recipes/recipe_el_universal.py | 130 +- .../web/feeds/recipes/recipe_elargentino.py | 122 +- .../web/feeds/recipes/recipe_elcronista.py | 142 +- .../web/feeds/recipes/recipe_elmundo.py | 113 +- .../recipes/recipe_elperiodico_catalan.py | 110 +- .../recipes/recipe_elperiodico_spanish.py | 110 +- .../web/feeds/recipes/recipe_eltiempo_hn.py | 104 +- .../web/feeds/recipes/recipe_endgadget.py | 61 +- .../web/feeds/recipes/recipe_esquire.py | 124 +- .../web/feeds/recipes/recipe_exiled.py | 114 +- .../feeds/recipes/recipe_expansion_spanish.py | 116 +- .../web/feeds/recipes/recipe_fastcompany.py | 108 +- .../web/feeds/recipes/recipe_faznet.py | 100 +- .../web/feeds/recipes/recipe_fudzilla.py | 53 +- .../web/feeds/recipes/recipe_glas_srpske.py | 192 +- .../feeds/recipes/recipe_glasgow_herald.py | 68 +- .../web/feeds/recipes/recipe_glasjavnosti.py | 156 +- .../feeds/recipes/recipe_globe_and_mail.py | 138 +- .../web/feeds/recipes/recipe_granma.py | 107 +- .../web/feeds/recipes/recipe_greader.py | 74 +- .../web/feeds/recipes/recipe_gva_be.py | 126 +- .../web/feeds/recipes/recipe_harpers.py | 96 +- .../web/feeds/recipes/recipe_harpers_full.py | 162 +- src/calibre/web/feeds/recipes/recipe_hln.py | 104 +- .../web/feeds/recipes/recipe_hln_be.py | 70 +- .../recipes/recipe_honoluluadvertiser.py | 118 +- src/calibre/web/feeds/recipes/recipe_hrt.py | 132 +- .../web/feeds/recipes/recipe_infobae.py | 116 +- .../web/feeds/recipes/recipe_inquirer_net.py | 122 +- .../web/feeds/recipes/recipe_instapaper.py | 154 +- .../web/feeds/recipes/recipe_intelligencer.py | 90 +- .../web/feeds/recipes/recipe_irish_times.py | 76 +- .../feeds/recipes/recipe_joelonsoftware.py | 54 +- .../web/feeds/recipes/recipe_jutarnji.py | 164 +- .../feeds/recipes/recipe_juventudrebelde.py | 108 +- .../recipes/recipe_juventudrebelde_english.py | 81 +- .../web/feeds/recipes/recipe_krstarica.py | 130 +- .../web/feeds/recipes/recipe_krstarica_en.py | 114 +- .../web/feeds/recipes/recipe_la_cuarta.py | 99 +- .../web/feeds/recipes/recipe_la_segunda.py | 120 +- .../web/feeds/recipes/recipe_la_tercera.py | 121 +- .../feeds/recipes/recipe_lamujerdemivida.py | 152 +- .../web/feeds/recipes/recipe_lanacion.py | 113 +- .../feeds/recipes/recipe_lanacion_chile.py | 101 +- .../web/feeds/recipes/recipe_laprensa.py | 113 +- .../web/feeds/recipes/recipe_laprensa_hn.py | 108 +- .../web/feeds/recipes/recipe_laprensa_ni.py | 158 +- .../web/feeds/recipes/recipe_latribuna.py | 130 +- .../web/feeds/recipes/recipe_lavanguardia.py | 138 +- .../web/feeds/recipes/recipe_liberation.py | 78 +- .../feeds/recipes/recipe_linux_magazine.py | 74 +- .../web/feeds/recipes/recipe_livemint.py | 80 +- src/calibre/web/feeds/recipes/recipe_marca.py | 110 +- .../web/feeds/recipes/recipe_mediapart.py | 11 +- .../web/feeds/recipes/recipe_miami_herald.py | 106 +- .../web/feeds/recipes/recipe_miradasalsur.py | 142 +- .../web/feeds/recipes/recipe_mondedurable.py | 90 +- .../web/feeds/recipes/recipe_moneynews.py | 100 +- .../web/feeds/recipes/recipe_monitor.py | 196 +- .../web/feeds/recipes/recipe_msdnmag_en.py | 122 +- .../web/feeds/recipes/recipe_nacional_cro.py | 120 +- src/calibre/web/feeds/recipes/recipe_nasa.py | 174 +- .../web/feeds/recipes/recipe_new_scientist.py | 138 +- .../web/feeds/recipes/recipe_new_yorker.py | 114 +- .../recipes/recipe_newsweek_argentina.py | 142 +- src/calibre/web/feeds/recipes/recipe_nin.py | 182 +- src/calibre/web/feeds/recipes/recipe_noaa.py | 82 +- .../web/feeds/recipes/recipe_novosti.py | 114 +- src/calibre/web/feeds/recipes/recipe_nspm.py | 133 +- .../web/feeds/recipes/recipe_nspm_int.py | 76 +- .../web/feeds/recipes/recipe_nzz_ger.py | 132 +- .../web/feeds/recipes/recipe_ourdailybread.py | 70 +- .../web/feeds/recipes/recipe_outlook_india.py | 2 - .../web/feeds/recipes/recipe_pagina12.py | 106 +- .../web/feeds/recipes/recipe_pescanik.py | 130 +- .../web/feeds/recipes/recipe_physics_today.py | 76 +- .../web/feeds/recipes/recipe_physics_world.py | 8 +- .../web/feeds/recipes/recipe_pobjeda.py | 202 +- .../web/feeds/recipes/recipe_politico.py | 132 +- .../web/feeds/recipes/recipe_politika.py | 140 +- .../web/feeds/recipes/recipe_pressonline.py | 132 +- .../web/feeds/recipes/recipe_republika.py | 160 +- src/calibre/web/feeds/recipes/recipe_rts.py | 120 +- .../web/feeds/recipes/recipe_sciencedaily.py | 66 +- .../feeds/recipes/recipe_scott_hanselman.py | 80 +- .../web/feeds/recipes/recipe_seattle_times.py | 100 +- .../web/feeds/recipes/recipe_shacknews.py | 54 +- .../web/feeds/recipes/recipe_slashdot.py | 72 +- src/calibre/web/feeds/recipes/recipe_slate.py | 115 +- .../web/feeds/recipes/recipe_soldiers.py | 114 +- .../web/feeds/recipes/recipe_spiegel_int.py | 78 +- .../web/feeds/recipes/recipe_spiegelde.py | 128 +- .../recipes/recipe_st_petersburg_times.py | 96 +- .../web/feeds/recipes/recipe_stackoverflow.py | 66 +- .../web/feeds/recipes/recipe_starbulletin.py | 118 +- .../web/feeds/recipes/recipe_straitstimes.py | 112 +- .../web/feeds/recipes/recipe_tanjug.py | 90 +- .../feeds/recipes/recipe_telepolis_artikel.py | 2 +- .../recipes/recipe_the_budget_fashionista.py | 86 +- .../feeds/recipes/recipe_thedgesingapore.py | 128 +- .../recipes/recipe_theeconomictimes_india.py | 112 +- .../feeds/recipes/recipe_themarketticker.py | 48 +- .../web/feeds/recipes/recipe_theoldfoodie.py | 58 +- .../web/feeds/recipes/recipe_theonion.py | 90 +- src/calibre/web/feeds/recipes/recipe_tijd.py | 140 +- .../web/feeds/recipes/recipe_times_online.py | 130 +- src/calibre/web/feeds/recipes/recipe_tnxm.py | 56 +- .../web/feeds/recipes/recipe_tomshardware.py | 157 +- .../web/feeds/recipes/recipe_twitchfilms.py | 84 +- .../web/feeds/recipes/recipe_uncrate.py | 94 +- .../web/feeds/recipes/recipe_usnews.py | 120 +- src/calibre/web/feeds/recipes/recipe_utne.py | 100 +- .../web/feeds/recipes/recipe_vecernji_list.py | 124 +- .../web/feeds/recipes/recipe_veintitres.py | 142 +- .../web/feeds/recipes/recipe_vijesti.py | 114 +- .../web/feeds/recipes/recipe_vnexpress.py | 72 +- src/calibre/web/feeds/recipes/recipe_vreme.py | 224 +- .../web/feeds/recipes/recipe_wikinews_en.py | 140 +- .../web/feeds/recipes/recipe_winsupersite.py | 54 +- 206 files changed, 12460 insertions(+), 13498 deletions(-) create mode 100644 setup/check.py delete mode 100644 src/calibre/path.py delete mode 100644 src/calibre/translations/automatic.py diff --git a/.bzrignore b/.bzrignore index 5ae1ec3117..bd265eb830 100644 --- a/.bzrignore +++ b/.bzrignore @@ -1,5 +1,5 @@ *_ui.py -moc_*.cpp +.check-cache.pickle src/calibre/plugins resources/images.qrc src/calibre/manual/.build/ diff --git a/setup.py b/setup.py index d5cf8a406a..b0acff3963 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,6 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' - import sys, os, optparse sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) @@ -70,7 +69,7 @@ def main(args=sys.argv): command.clean() return 0 - if opts.clean_all(): + if opts.clean_all: for cmd in commands.__all__: prints('Cleaning', cmd) getattr(commands, cmd).clean() diff --git a/setup/check.py b/setup/check.py new file mode 100644 index 0000000000..75a6d82530 --- /dev/null +++ b/setup/check.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import sys, os, cPickle, subprocess +from operator import attrgetter +from setup import Command + +def check_for_python_errors(filename, builtins): + from pyflakes import checker, ast + + contents = open(filename, 'rb').read() + + try: + tree = ast.parse(contents, filename) + except: + import traceback + traceback.print_exc() + try: + value = sys.exc_info()[1] + lineno, offset, line = value[1][1:] + except IndexError: + lineno, offset, line = 1, 0, '' + if line.endswith("\n"): + line = line[:-1] + + return [SyntaxError(filename, lineno, offset, str(value))] + else: + w = checker.Checker(tree, filename, builtins = builtins) + w.messages.sort(key = attrgetter('lineno')) + return w.messages + + +class Check(Command): + + BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P'] + CACHE = '.check-cache.pickle' + + def run(self, opts): + cache = {} + if os.path.exists(self.CACHE): + cache = cPickle.load(open(self.CACHE, 'rb')) + for x in os.walk(self.j(self.SRC, 'calibre')): + for f in x[-1]: + f = self.j(x[0], f) + mtime = os.stat(f).st_mtime + if f.endswith('.py') and cache.get(f, 0) != mtime and \ + self.b(f) not in ('ptempfile.py', 'feedparser.py', + 'pyparsing.py', 'markdown.py') and 'genshi' not in f and \ + 'prs500/driver.py' not in f: + self.info('\tChecking', f) + w = check_for_python_errors(f, self.BUILTINS) + if w: + self.report_errors(w) + cPickle.dump(cache, open(self.CACHE, 'wb'), -1) + subprocess.call(['gvim', '-f', f]) + raise SystemExit(1) + cache[f] = mtime + cPickle.dump(cache, open(self.CACHE, 'wb'), -1) + + + def report_errors(self, errors): + for err in errors: + if isinstance(err, SyntaxError): + print '\t\tSyntax Error' + else: + col = getattr(err, 'col', 0) if getattr(err, 'col', 0) else 0 + lineno = err.lineno if err.lineno else 0 + self.info('\t\t%d:%d:'%(lineno, col), + err.message%err.message_args) + diff --git a/setup/commands.py b/setup/commands.py index de44c75538..57ef2c63bf 100644 --- a/setup/commands.py +++ b/setup/commands.py @@ -11,6 +11,7 @@ __all__ = [ 'build', 'gui', 'develop', + 'check', ] @@ -29,6 +30,8 @@ develop = Develop() from setup.gui import GUI gui = GUI() +from setup.check import Check +check = Check() commands = {} for x in __all__: diff --git a/setup/gui.py b/setup/gui.py index d3c4071ffd..dd0bdfd204 100644 --- a/setup/gui.py +++ b/setup/gui.py @@ -78,9 +78,10 @@ class GUI(Command): dat = pat.sub(sub, dat) if form.endswith('viewer%smain.ui'%os.sep): - self.inf('\t\tPromoting WebView') + self.info('\t\tPromoting WebView') dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(') dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView' + dat += '\nQtWebKit' open(compiled_form, 'wb').write(dat) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index bfe101ffa7..1f14e3868e 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -21,6 +21,11 @@ from calibre.constants import iswindows, isosx, islinux, isfrozen, \ filesystem_encoding import mechanize +if False: + winutil, winutilerror, __appname__, islinux, __version__ + fcntl, win32event, isfrozen, __author__, terminal_controller + winerror, win32api + mimetypes.add_type('application/epub+zip', '.epub') mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs') mimetypes.add_type('application/xhtml+xml', '.xhtml') diff --git a/src/calibre/ebooks/chardet/codingstatemachine.py b/src/calibre/ebooks/chardet/codingstatemachine.py index 452d3b0a06..5e759007ea 100644 --- a/src/calibre/ebooks/chardet/codingstatemachine.py +++ b/src/calibre/ebooks/chardet/codingstatemachine.py @@ -13,19 +13,19 @@ # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from constants import eStart, eError, eItsMe +from constants import eStart class CodingStateMachine: def __init__(self, sm): diff --git a/src/calibre/ebooks/chardet/escprober.py b/src/calibre/ebooks/chardet/escprober.py index 572ed7be37..5d98b2aad6 100644 --- a/src/calibre/ebooks/chardet/escprober.py +++ b/src/calibre/ebooks/chardet/escprober.py @@ -13,19 +13,19 @@ # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -import constants, sys +import constants from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel from charsetprober import CharSetProber from codingstatemachine import CodingStateMachine @@ -75,5 +75,5 @@ class EscCharSetProber(CharSetProber): self._mState = constants.eFoundIt self._mDetectedCharset = codingSM.get_coding_state_machine() return self.get_state() - + return self.get_state() diff --git a/src/calibre/ebooks/chardet/sbcsgroupprober.py b/src/calibre/ebooks/chardet/sbcsgroupprober.py index d19160c86c..6269d4c1d8 100644 --- a/src/calibre/ebooks/chardet/sbcsgroupprober.py +++ b/src/calibre/ebooks/chardet/sbcsgroupprober.py @@ -14,19 +14,19 @@ # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -import constants, sys +import constants from charsetgroupprober import CharSetGroupProber from sbcharsetprober import SingleByteCharSetProber from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model diff --git a/src/calibre/ebooks/chardet/utf8prober.py b/src/calibre/ebooks/chardet/utf8prober.py index c1792bb377..1a1618ecc2 100644 --- a/src/calibre/ebooks/chardet/utf8prober.py +++ b/src/calibre/ebooks/chardet/utf8prober.py @@ -13,19 +13,19 @@ # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -import constants, sys +import constants from constants import eStart, eError, eItsMe from charsetprober import CharSetProber from codingstatemachine import CodingStateMachine diff --git a/src/calibre/ebooks/epub/pages.py b/src/calibre/ebooks/epub/pages.py index 4737107a6c..6cd2b60672 100644 --- a/src/calibre/ebooks/epub/pages.py +++ b/src/calibre/ebooks/epub/pages.py @@ -8,11 +8,10 @@ __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' __docformat__ = 'restructuredtext en' -import os, re -from itertools import count, chain -from calibre.ebooks.oeb.base import XHTML, XHTML_NS +import re +from itertools import count +from calibre.ebooks.oeb.base import XHTML_NS from calibre.ebooks.oeb.base import OEBBook -from lxml import etree, html from lxml.etree import XPath NSMAP = {'h': XHTML_NS, 'html': XHTML_NS, 'xhtml': XHTML_NS} @@ -55,5 +54,5 @@ def add_page_map(opfpath, opts): id = elem.attrib['id'] = idgen.next() href = '#'.join((item.href, id)) oeb.pages.add(name, href) - writer = DirWriter(version='2.0', page_map=True) + writer = None#DirWriter(version='2.0', page_map=True) writer.dump(oeb, opfpath) diff --git a/src/calibre/ebooks/lit/lzx.py b/src/calibre/ebooks/lit/lzx.py index ee46e729c3..3f324a65a6 100644 --- a/src/calibre/ebooks/lit/lzx.py +++ b/src/calibre/ebooks/lit/lzx.py @@ -6,7 +6,6 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import sys from calibre import plugins _lzx, _error = plugins['lzx'] diff --git a/src/calibre/ebooks/lit/maps/__init__.py b/src/calibre/ebooks/lit/maps/__init__.py index 2235c384ff..b30974ba6b 100644 --- a/src/calibre/ebooks/lit/maps/__init__.py +++ b/src/calibre/ebooks/lit/maps/__init__.py @@ -7,3 +7,5 @@ Microsoft LIT tag and attribute tables. from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP from calibre.ebooks.lit.maps.html import MAP as HTML_MAP + +OPF_MAP, HTML_MAP diff --git a/src/calibre/ebooks/lrf/fonts/__init__.py b/src/calibre/ebooks/lrf/fonts/__init__.py index 1f67a50f25..7fef457bc1 100644 --- a/src/calibre/ebooks/lrf/fonts/__init__.py +++ b/src/calibre/ebooks/lrf/fonts/__init__.py @@ -1,14 +1,14 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -import sys, os -from calibre import iswindows +import os from calibre.ptempfile import PersistentTemporaryFile try: from PIL import ImageFont + ImageFont except ImportError: import ImageFont - + ''' Default fonts used in the PRS500 ''' @@ -48,11 +48,11 @@ def get_font_path(name): # then, try calibre shipped ones try: try: - font_mod = __import__('calibre.ebooks.lrf.fonts.prs500', {}, {}, + font_mod = __import__('calibre.ebooks.lrf.fonts.prs500', {}, {}, [fname], -1) getattr(font_mod, fname) except (ImportError, AttributeError): - font_mod = __import__('calibre.ebooks.lrf.fonts.liberation', {}, {}, + font_mod = __import__('calibre.ebooks.lrf.fonts.liberation', {}, {}, [LIBERATION_FONT_MAP[name]], -1) p = PersistentTemporaryFile('.ttf', 'font_') p.write(getattr(font_mod, fname).font_data) @@ -61,7 +61,7 @@ def get_font_path(name): return p.name except ImportError: pass - + # finally, try system default ones if SYSTEM_FONT_MAP.has_key(name) and os.access(SYSTEM_FONT_MAP[name], os.R_OK): return SYSTEM_FONT_MAP[name] @@ -71,7 +71,7 @@ def get_font_path(name): def get_font(name, size, encoding='unic'): ''' - Get an ImageFont object by name. + Get an ImageFont object by name. @param size: Font height in pixels. To convert from pts: sz in pixels = (dpi/72) * size in pts @param encoding: Font encoding to use. E.g. 'unic', 'symbol', 'ADOB', 'ADBE', 'aprm' diff --git a/src/calibre/ebooks/lrf/html/color_map.py b/src/calibre/ebooks/lrf/html/color_map.py index 78377de657..c1b5ea3d95 100644 --- a/src/calibre/ebooks/lrf/html/color_map.py +++ b/src/calibre/ebooks/lrf/html/color_map.py @@ -94,7 +94,7 @@ NAME_MAP = { u'springgreen': u'#00FF7F', u'violet': u'#EE82EE', u'yellowgreen': u'#9ACD32' - } + } hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})') rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE) @@ -109,5 +109,5 @@ def lrs_color(html_color): if hcol in NAME_MAP: return NAME_MAP[hcol].replace('#', '0x00') return '0x00000000' - - \ No newline at end of file + + diff --git a/src/calibre/ebooks/lrf/html/convert_to.py b/src/calibre/ebooks/lrf/html/convert_to.py index a86e4e072e..fa4fe5aae6 100644 --- a/src/calibre/ebooks/lrf/html/convert_to.py +++ b/src/calibre/ebooks/lrf/html/convert_to.py @@ -10,13 +10,13 @@ from calibre.ebooks.lrf.lrfparser import LRFDocument from calibre.ebooks.metadata.opf import OPFCreator from calibre.ebooks.lrf.objects import PageAttr, BlockAttr, TextAttr - +from calibre.ebooks.lrf.pylrs.pylrs import TextStyle class BlockStyle(object): - + def __init__(self, ba): self.ba = ba - + def __str__(self): ans = '.'+str(self.ba.id)+' {\n' if hasattr(self.ba, 'sidemargin'): @@ -37,10 +37,10 @@ class BlockStyle(object): ans += '\tbackground-color: %s;\n'%(self.ba.bgcolor.to_html()) #TODO: Fixed size blocks return ans + '}\n' - + class LRFConverter(object): - + def __init__(self, document, opts, logger): self.lrf = document self.opts = opts @@ -48,15 +48,15 @@ class LRFConverter(object): self.logger = logger logger.info('Parsing LRF...') self.lrf.parse() - + self.create_metadata() self.create_styles() - + def create_metadata(self): self.logger.info('Reading metadata...') mi = get_metadata(self.lrf) self.opf = OPFCreator(self.output_dir, mi) - + def create_page_styles(self): self.page_css = '' for obj in self.lrf.objects.values(): @@ -65,21 +65,21 @@ class LRFConverter(object): self.page_css = selector + ' {\n' # TODO: Headers and footers self.page_css += '}\n' - - + + def create_block_styles(self): self.block_css = '' for obj in self.lrf.objects.values(): if isinstance(obj, BlockAttr): self.block_css += str(BlockStyle(obj)) - + def create_text_styles(self): self.text_css = '' for obj in self.lrf.objects.values(): if isinstance(obj, TextAttr): self.text_css += str(TextStyle(obj)) print self.text_css - + def create_styles(self): self.logger.info('Creating CSS stylesheet...') self.create_page_styles() @@ -104,9 +104,9 @@ def process_file(lrfpath, opts, logger=None): raise ConversionError(opts.out + ' is not a directory') if not os.path.exists(opts.out): os.makedirs(opts.out) - + document = LRFDocument(open(lrfpath, 'rb')) - conv = LRFConverter(document, opts, logger) + conv = LRFConverter(document, opts, logger) def main(args=sys.argv): @@ -116,7 +116,7 @@ def main(args=sys.argv): parser.print_help() return 1 process_file(args[1], opts) - + return 0 diff --git a/src/calibre/ebooks/lrf/html/table.py b/src/calibre/ebooks/lrf/html/table.py index a3b3123293..dc246fa693 100644 --- a/src/calibre/ebooks/lrf/html/table.py +++ b/src/calibre/ebooks/lrf/html/table.py @@ -11,23 +11,23 @@ def ceil(num): return int(math.ceil(num)) def print_xml(elem): - from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter + from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter elem = elem.toElement('utf8') ew = ElementWriter(elem, sourceEncoding='utf8') ew.write(sys.stdout) print - + def cattrs(base, extra): new = base.copy() new.update(extra) return new - + def tokens(tb): ''' Return the next token. A token is : - 1. A string + 1. A string a block of text that has the same style - ''' + ''' def process_element(x, attrs): if isinstance(x, CR): yield 2, None @@ -49,22 +49,22 @@ def tokens(tb): for y in x.contents: for z in process_element(y, attrs): yield z - - + + for i in tb.contents: if isinstance(i, CR): yield 1, None elif isinstance(i, Paragraph): - for j in i.contents: + for j in i.contents: attrs = {} if hasattr(j, 'attrs'): attrs = j.attrs - for k in process_element(j, attrs): + for k in process_element(j, attrs): yield k - + class Cell(object): - + def __init__(self, conv, tag, css): self.conv = conv self.tag = tag @@ -89,7 +89,7 @@ class Cell(object): self.rowspan = int(tag['rowspan']) if tag.has_key('rowspan') else 1 except: pass - + pp = conv.current_page conv.book.allow_new_page = False conv.current_page = conv.book.create_page() @@ -99,7 +99,7 @@ class Cell(object): if isinstance(item, TextBlock): self.text_blocks.append(item) conv.current_page = pp - conv.book.allow_new_page = True + conv.book.allow_new_page = True if not self.text_blocks: tb = conv.book.create_text_block() tb.Paragraph(' ') @@ -107,7 +107,7 @@ class Cell(object): for tb in self.text_blocks: tb.parent = None tb.objId = 0 - # Needed as we have to eventually change this BlockStyle's width and + # Needed as we have to eventually change this BlockStyle's width and # height attributes. This blockstyle may be shared with other # elements, so doing that causes havoc. tb.blockStyle = conv.book.create_block_style() @@ -117,17 +117,17 @@ class Cell(object): if ts.attrs['align'] == 'foot': if isinstance(tb.contents[-1], Paragraph): tb.contents[-1].append(' ') - - - - + + + + def pts_to_pixels(self, pts): pts = int(pts) return ceil((float(self.conv.profile.dpi)/72.)*(pts/10.)) - + def minimum_width(self): return max([self.minimum_tb_width(tb) for tb in self.text_blocks]) - + def minimum_tb_width(self, tb): ts = tb.textStyle.attrs default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize'])) @@ -135,7 +135,7 @@ class Cell(object): mwidth = 0 for token, attrs in tokens(tb): font = default_font - if isinstance(token, int): # Handle para and line breaks + if isinstance(token, int): # Handle para and line breaks continue if isinstance(token, Plot): return self.pts_to_pixels(token.xsize) @@ -151,24 +151,24 @@ class Cell(object): if width > mwidth: mwidth = width return parindent + mwidth + 2 - + def text_block_size(self, tb, maxwidth=sys.maxint, debug=False): ts = tb.textStyle.attrs default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize'])) parindent = self.pts_to_pixels(ts['parindent']) top, bottom, left, right = 0, 0, parindent, parindent - - def add_word(width, height, left, right, top, bottom, ls, ws): + + def add_word(width, height, left, right, top, bottom, ls, ws): if left + width > maxwidth: left = width + ws top += ls bottom = top+ls if top+ls > bottom else bottom else: left += (width + ws) - right = left if left > right else right + right = left if left > right else right bottom = top+ls if top+ls > bottom else bottom return left, right, top, bottom - + for token, attrs in tokens(tb): if attrs == None: attrs = {} @@ -196,17 +196,17 @@ class Cell(object): width, height = font.getsize(word) left, right, top, bottom = add_word(width, height, left, right, top, bottom, ls, ws) return right+3+max(parindent, 10), bottom - + def text_block_preferred_width(self, tb, debug=False): return self.text_block_size(tb, sys.maxint, debug=debug)[0] - + def preferred_width(self, debug=False): return ceil(max([self.text_block_preferred_width(i, debug=debug) for i in self.text_blocks])) - + def height(self, width): return sum([self.text_block_size(i, width)[1] for i in self.text_blocks]) - - + + class Row(object): def __init__(self, conv, row, css, colpad): @@ -221,15 +221,15 @@ class Row(object): name = a['name'] if a.has_key('name') else a['id'] if a.has_key('id') else None if name is not None: self.targets.append(name.replace('#', '')) - - + + def number_of_cells(self): '''Number of cells in this row. Respects colspan''' ans = 0 for cell in self.cells: ans += cell.colspan return ans - + def height(self, widths): i, heights = 0, [] for cell in self.cells: @@ -239,11 +239,11 @@ class Row(object): if not heights: return 0 return max(heights) - + def cell_from_index(self, col): i = -1 - cell = None - for cell in self.cells: + cell = None + for cell in self.cells: for k in range(0, cell.colspan): if i == col: break @@ -251,30 +251,30 @@ class Row(object): if i == col: break return cell - + def minimum_width(self, col): cell = self.cell_from_index(col) if not cell: return 0 return cell.minimum_width() - + def preferred_width(self, col): cell = self.cell_from_index(col) if not cell: return 0 return 0 if cell.colspan > 1 else cell.preferred_width() - + def width_percent(self, col): cell = self.cell_from_index(col) if not cell: return -1 return -1 if cell.colspan > 1 else cell.pwidth - + def cell_iterator(self): for c in self.cells: yield c - - + + class Table(object): def __init__(self, conv, table, css, rowpad=10, colpad=10): self.rows = [] @@ -283,31 +283,31 @@ class Table(object): self.colpad = colpad rows = table.findAll('tr') conv.in_table = True - for row in rows: + for row in rows: rcss = conv.tag_css(row, css)[0] self.rows.append(Row(conv, row, rcss, colpad)) conv.in_table = False - + def number_of_columns(self): max = 0 for row in self.rows: max = row.number_of_cells() if row.number_of_cells() > max else max return max - + def number_or_rows(self): return len(self.rows) - + def height(self, maxwidth): ''' Return row heights + self.rowpad''' widths = self.get_widths(maxwidth) return sum([row.height(widths) + self.rowpad for row in self.rows]) - self.rowpad - + def minimum_width(self, col): return max([row.minimum_width(col) for row in self.rows]) - + def width_percent(self, col): return max([row.width_percent(col) for row in self.rows]) - + def get_widths(self, maxwidth): ''' Return widths of columns + self.colpad @@ -320,29 +320,29 @@ class Table(object): try: cellwidths[r] = self.rows[r].preferred_width(c) except IndexError: - continue + continue widths[c] = max(cellwidths) - + min_widths = [self.minimum_width(i)+10 for i in xrange(cols)] for i in xrange(len(widths)): wp = self.width_percent(i) if wp >= 0.: widths[i] = max(min_widths[i], ceil((wp/100.) * (maxwidth - (cols-1)*self.colpad))) - - + + itercount = 0 - + while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100: for i in range(cols): widths[i] = ceil((95./100.)*widths[i]) if \ ceil((95./100.)*widths[i]) >= min_widths[i] else widths[i] itercount += 1 - + return [i+self.colpad for i in widths] - - def blocks(self, maxwidth, maxheight): + + def blocks(self, maxwidth, maxheight): rows, cols = self.number_or_rows(), self.number_of_columns() - cellmatrix = [[None for c in range(cols)] for r in range(rows)] + cellmatrix = [[None for c in range(cols)] for r in range(rows)] rowpos = [0 for i in range(rows)] for r in range(rows): nc = self.rows[r].cell_iterator() @@ -358,14 +358,14 @@ class Table(object): break except StopIteration: # No more cells in this row continue - - + + widths = self.get_widths(maxwidth) heights = [row.height(widths) for row in self.rows] - + xpos = [sum(widths[:i]) for i in range(cols)] delta = maxwidth - sum(widths) - if delta < 0: + if delta < 0: delta = 0 for r in range(len(cellmatrix)): yield None, 0, heights[r], 0, self.rows[r].targets @@ -377,13 +377,13 @@ class Table(object): sypos = 0 for tb in cell.text_blocks: tb.blockStyle = self.conv.book.create_block_style( - blockwidth=width, + blockwidth=width, blockheight=cell.text_block_size(tb, width)[1], blockrule='horz-fixed') - + yield tb, xpos[c], sypos, delta, None sypos += tb.blockStyle.attrs['blockheight'] - - - - \ No newline at end of file + + + + diff --git a/src/calibre/ebooks/lrf/pylrs/elements.py b/src/calibre/ebooks/lrf/pylrs/elements.py index 0cb02dd21b..0e9ec4d7d0 100644 --- a/src/calibre/ebooks/lrf/pylrs/elements.py +++ b/src/calibre/ebooks/lrf/pylrs/elements.py @@ -1,81 +1,81 @@ -""" elements.py -- replacements and helpers for ElementTree """ - -class ElementWriter(object): - def __init__(self, e, header=False, sourceEncoding="ascii", - spaceBeforeClose=True, outputEncodingName="UTF-16"): - self.header = header - self.e = e - self.sourceEncoding=sourceEncoding - self.spaceBeforeClose = spaceBeforeClose - self.outputEncodingName = outputEncodingName - - - def _encodeCdata(self, rawText): - if type(rawText) is str: - rawText = rawText.decode(self.sourceEncoding) - - text = rawText.replace("&", "&") - text = text.replace("<", "<") - text = text.replace(">", ">") - return text - - - def _writeAttribute(self, f, name, value): - f.write(u' %s="' % unicode(name)) - if not isinstance(value, basestring): - value = unicode(value) - value = self._encodeCdata(value) - value = value.replace('"', '"') - f.write(value) - f.write(u'"') - - - def _writeText(self, f, rawText): - text = self._encodeCdata(rawText) - f.write(text) - - - def _write(self, f, e): - f.write(u'<' + unicode(e.tag)) - - attributes = e.items() - attributes.sort() - for name, value in attributes: - self._writeAttribute(f, name, value) - - if e.text is not None or len(e) > 0: - f.write(u'>') - - if e.text: - self._writeText(f, e.text) - - for e2 in e: - self._write(f, e2) - - f.write(u'' % e.tag) - else: - if self.spaceBeforeClose: - f.write(' ') - f.write(u'/>') - - if e.tail is not None: - self._writeText(f, e.tail) - - - def toString(self): - class x: - pass - buffer = [] - x.write = buffer.append - self.write(x) - return u''.join(buffer) - - - def write(self, f): - if self.header: - f.write(u'\n' % self.outputEncodingName) - - self._write(f, self.e) - - - +""" elements.py -- replacements and helpers for ElementTree """ + +class ElementWriter(object): + def __init__(self, e, header=False, sourceEncoding="ascii", + spaceBeforeClose=True, outputEncodingName="UTF-16"): + self.header = header + self.e = e + self.sourceEncoding=sourceEncoding + self.spaceBeforeClose = spaceBeforeClose + self.outputEncodingName = outputEncodingName + + + def _encodeCdata(self, rawText): + if type(rawText) is str: + rawText = rawText.decode(self.sourceEncoding) + + text = rawText.replace("&", "&") + text = text.replace("<", "<") + text = text.replace(">", ">") + return text + + + def _writeAttribute(self, f, name, value): + f.write(u' %s="' % unicode(name)) + if not isinstance(value, basestring): + value = unicode(value) + value = self._encodeCdata(value) + value = value.replace('"', '"') + f.write(value) + f.write(u'"') + + + def _writeText(self, f, rawText): + text = self._encodeCdata(rawText) + f.write(text) + + + def _write(self, f, e): + f.write(u'<' + unicode(e.tag)) + + attributes = e.items() + attributes.sort() + for name, value in attributes: + self._writeAttribute(f, name, value) + + if e.text is not None or len(e) > 0: + f.write(u'>') + + if e.text: + self._writeText(f, e.text) + + for e2 in e: + self._write(f, e2) + + f.write(u'' % e.tag) + else: + if self.spaceBeforeClose: + f.write(' ') + f.write(u'/>') + + if e.tail is not None: + self._writeText(f, e.tail) + + + def toString(self): + class x: + pass + buffer = [] + x.write = buffer.append + self.write(x) + return u''.join(buffer) + + + def write(self, f): + if self.header: + f.write(u'\n' % self.outputEncodingName) + + self._write(f, self.e) + + + diff --git a/src/calibre/ebooks/lrf/pylrs/pylrf.py b/src/calibre/ebooks/lrf/pylrs/pylrf.py index 02c575d0b0..f3db518010 100644 --- a/src/calibre/ebooks/lrf/pylrs/pylrf.py +++ b/src/calibre/ebooks/lrf/pylrs/pylrf.py @@ -1,784 +1,784 @@ -""" - pylrf.py -- very low level interface to create lrf files. See pylrs for - higher level interface that can use this module to render books to lrf. -""" - -import struct -import zlib -import StringIO -import codecs -import os - -from pylrfopt import tagListOptimizer - -PYLRF_VERSION = "1.0" - -# -# Acknowledgement: -# This software would not have been possible without the pioneering -# efforts of the author of lrf2lrs.py, Igor Skochinsky. -# -# Copyright (c) 2007 Mike Higgins (Falstaff) -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# -# Change History: -# -# V1.0 06 Feb 2007 -# Initial Release. - -# -# Current limitations and bugs: -# Never "scrambles" any streams (even if asked to). This does not seem -# to hurt anything. -# -# Not based on any official documentation, so many assumptions had to be made. -# -# Can be used to create lrf files that can lock up an eBook reader. -# This is your only warning. -# -# Unsupported objects: Canvas, Window, PopUpWindow, Sound, Import, -# SoundStream, ObjectInfo -# -# The only button type supported is JumpButton. -# -# Unsupported tags: SoundStop, Wait, pos on BlockSpace (and those used by -# unsupported objects). -# -# Tags supporting Japanese text and Asian layout have not been tested. -# -# Tested on Python 2.4 and 2.5, Windows XP and Sony PRS-500. -# -# Commented even less than pylrs, but not very useful when called directly, -# anyway. -# - -class LrfError(Exception): - pass - -def writeByte(f, byte): - f.write(struct.pack(" 65535: - raise LrfError('Cannot encode a number greater than 65535 in a word.') - if int(word) < 0: - raise LrfError('Cannot encode a number < 0 in a word: '+str(word)) - f.write(struct.pack("I", int(color, 0))) - -def writeLineWidth(f, width): - writeWord(f, int(width)) - -def writeUnicode(f, string, encoding): - if isinstance(string, str): - string = string.decode(encoding) - string = string.encode("utf-16-le") - length = len(string) - if length > 65535: - raise LrfError('Cannot write strings longer than 65535 characters.') - writeWord(f, length) - writeString(f, string) - -def writeRaw(f, string, encoding): - if isinstance(string, str): - string = string.decode(encoding) - - string = string.encode("utf-16-le") - writeString(f, string) - -def writeRubyAA(f, rubyAA): - ralign, radjust = rubyAA - radjust = {"line-edge":0x10, "none":0}[radjust] - ralign = {"start":1, "center":2}[ralign] - writeWord(f, ralign | radjust) - -def writeBgImage(f, bgInfo): - imode, iid = bgInfo - imode = {"pfix": 0, "fix":1, "tile":2, "centering":3}[imode] - writeWord(f, imode) - writeDWord(f, iid) - -def writeEmpDots(f, dotsInfo, encoding): - refDotsFont, dotsFontName, dotsCode = dotsInfo - writeDWord(f, refDotsFont) - LrfTag("fontfacename", dotsFontName).write(f, encoding) - writeWord(f, int(dotsCode, 0)) - -def writeRuledLine(f, lineInfo): - lineLength, lineType, lineWidth, lineColor = lineInfo - writeWord(f, lineLength) - writeWord(f, LINE_TYPE_ENCODING[lineType]) - writeWord(f, lineWidth) - writeColor(f, lineColor) - - -LRF_SIGNATURE = "L\x00R\x00F\x00\x00\x00" - -#XOR_KEY = 48 -XOR_KEY = 65024 # that's what lrf2lrs says -- not used, anyway... - -LRF_VERSION = 1000 # is 999 for librie? lrf2lrs uses 1000 - -IMAGE_TYPE_ENCODING = dict(GIF=0x14, PNG=0x12, BMP=0x13, JPEG=0x11, JPG=0x11) - -OBJECT_TYPE_ENCODING = dict( - PageTree = 0x01, - Page = 0x02, - Header = 0x03, - Footer = 0x04, - PageAtr = 0x05, PageStyle=0x05, - Block = 0x06, - BlockAtr = 0x07, BlockStyle=0x07, - MiniPage = 0x08, - TextBlock = 0x0A, Text=0x0A, - TextAtr = 0x0B, TextStyle=0x0B, - ImageBlock = 0x0C, Image=0x0C, - Canvas = 0x0D, - ESound = 0x0E, - ImageStream = 0x11, - Import = 0x12, - Button = 0x13, - Window = 0x14, - PopUpWindow = 0x15, - Sound = 0x16, - SoundStream = 0x17, - Font = 0x19, - ObjectInfo = 0x1A, - BookAtr = 0x1C, BookStyle=0x1C, - SimpleTextBlock = 0x1D, - TOC=0x1E -) - -LINE_TYPE_ENCODING = { - 'none':0, 'solid':0x10, 'dashed':0x20, 'double':0x30, 'dotted':0x40 -} - -BINDING_DIRECTION_ENCODING = dict(Lr=1, Rl=16) - - -TAG_INFO = dict( - rawtext = (0, writeRaw), - ObjectStart = (0xF500, " 1: - raise LrfError("only one parameter allowed on tag %s" % name) - - if len(parameters) == 0: - self.parameter = None - else: - self.parameter = parameters[0] - - - def write(self, lrf, encoding=None): - if self.type != 0: - writeWord(lrf, self.type) - - p = self.parameter - if p is None: - return - - #print " Writing tag", self.name - for f in self.format: - if isinstance(f, dict): - p = f[p] - elif isinstance(f, str): - if isinstance(p, tuple): - writeString(lrf, struct.pack(f, *p)) - else: - writeString(lrf, struct.pack(f, p)) - else: - if f in [writeUnicode, writeRaw, writeEmpDots]: - if encoding is None: - raise LrfError, "Tag requires encoding" - f(lrf, p, encoding) - else: - f(lrf, p) - - -STREAM_SCRAMBLED = 0x200 -STREAM_COMPRESSED = 0x100 -STREAM_FORCE_COMPRESSED = 0x8100 -STREAM_TOC = 0x0051 - -class LrfStreamBase(object): - def __init__(self, streamFlags, streamData=None): - self.streamFlags = streamFlags - self.streamData = streamData - - - def setStreamData(self, streamData): - self.streamData = streamData - - - def getStreamTags(self, optimize=False): - # tags: - # StreamFlags - # StreamSize - # StreamStart - # (data) - # StreamEnd - # - # if flags & 0x200, stream is scrambled - # if flags & 0x100, stream is compressed - - - flags = self.streamFlags - streamBuffer = self.streamData - - # implement scramble? I never scramble anything... - - if flags & STREAM_FORCE_COMPRESSED == STREAM_FORCE_COMPRESSED: - optimize = False - - if flags & STREAM_COMPRESSED == STREAM_COMPRESSED: - uncompLen = len(streamBuffer) - compStreamBuffer = zlib.compress(streamBuffer) - if optimize and uncompLen <= len(compStreamBuffer) + 4: - flags &= ~STREAM_COMPRESSED - else: - streamBuffer = struct.pack(" 65535: + raise LrfError('Cannot encode a number greater than 65535 in a word.') + if int(word) < 0: + raise LrfError('Cannot encode a number < 0 in a word: '+str(word)) + f.write(struct.pack("I", int(color, 0))) + +def writeLineWidth(f, width): + writeWord(f, int(width)) + +def writeUnicode(f, string, encoding): + if isinstance(string, str): + string = string.decode(encoding) + string = string.encode("utf-16-le") + length = len(string) + if length > 65535: + raise LrfError('Cannot write strings longer than 65535 characters.') + writeWord(f, length) + writeString(f, string) + +def writeRaw(f, string, encoding): + if isinstance(string, str): + string = string.decode(encoding) + + string = string.encode("utf-16-le") + writeString(f, string) + +def writeRubyAA(f, rubyAA): + ralign, radjust = rubyAA + radjust = {"line-edge":0x10, "none":0}[radjust] + ralign = {"start":1, "center":2}[ralign] + writeWord(f, ralign | radjust) + +def writeBgImage(f, bgInfo): + imode, iid = bgInfo + imode = {"pfix": 0, "fix":1, "tile":2, "centering":3}[imode] + writeWord(f, imode) + writeDWord(f, iid) + +def writeEmpDots(f, dotsInfo, encoding): + refDotsFont, dotsFontName, dotsCode = dotsInfo + writeDWord(f, refDotsFont) + LrfTag("fontfacename", dotsFontName).write(f, encoding) + writeWord(f, int(dotsCode, 0)) + +def writeRuledLine(f, lineInfo): + lineLength, lineType, lineWidth, lineColor = lineInfo + writeWord(f, lineLength) + writeWord(f, LINE_TYPE_ENCODING[lineType]) + writeWord(f, lineWidth) + writeColor(f, lineColor) + + +LRF_SIGNATURE = "L\x00R\x00F\x00\x00\x00" + +#XOR_KEY = 48 +XOR_KEY = 65024 # that's what lrf2lrs says -- not used, anyway... + +LRF_VERSION = 1000 # is 999 for librie? lrf2lrs uses 1000 + +IMAGE_TYPE_ENCODING = dict(GIF=0x14, PNG=0x12, BMP=0x13, JPEG=0x11, JPG=0x11) + +OBJECT_TYPE_ENCODING = dict( + PageTree = 0x01, + Page = 0x02, + Header = 0x03, + Footer = 0x04, + PageAtr = 0x05, PageStyle=0x05, + Block = 0x06, + BlockAtr = 0x07, BlockStyle=0x07, + MiniPage = 0x08, + TextBlock = 0x0A, Text=0x0A, + TextAtr = 0x0B, TextStyle=0x0B, + ImageBlock = 0x0C, Image=0x0C, + Canvas = 0x0D, + ESound = 0x0E, + ImageStream = 0x11, + Import = 0x12, + Button = 0x13, + Window = 0x14, + PopUpWindow = 0x15, + Sound = 0x16, + SoundStream = 0x17, + Font = 0x19, + ObjectInfo = 0x1A, + BookAtr = 0x1C, BookStyle=0x1C, + SimpleTextBlock = 0x1D, + TOC=0x1E +) + +LINE_TYPE_ENCODING = { + 'none':0, 'solid':0x10, 'dashed':0x20, 'double':0x30, 'dotted':0x40 +} + +BINDING_DIRECTION_ENCODING = dict(Lr=1, Rl=16) + + +TAG_INFO = dict( + rawtext = (0, writeRaw), + ObjectStart = (0xF500, " 1: + raise LrfError("only one parameter allowed on tag %s" % name) + + if len(parameters) == 0: + self.parameter = None + else: + self.parameter = parameters[0] + + + def write(self, lrf, encoding=None): + if self.type != 0: + writeWord(lrf, self.type) + + p = self.parameter + if p is None: + return + + #print " Writing tag", self.name + for f in self.format: + if isinstance(f, dict): + p = f[p] + elif isinstance(f, str): + if isinstance(p, tuple): + writeString(lrf, struct.pack(f, *p)) + else: + writeString(lrf, struct.pack(f, p)) + else: + if f in [writeUnicode, writeRaw, writeEmpDots]: + if encoding is None: + raise LrfError, "Tag requires encoding" + f(lrf, p, encoding) + else: + f(lrf, p) + + +STREAM_SCRAMBLED = 0x200 +STREAM_COMPRESSED = 0x100 +STREAM_FORCE_COMPRESSED = 0x8100 +STREAM_TOC = 0x0051 + +class LrfStreamBase(object): + def __init__(self, streamFlags, streamData=None): + self.streamFlags = streamFlags + self.streamData = streamData + + + def setStreamData(self, streamData): + self.streamData = streamData + + + def getStreamTags(self, optimize=False): + # tags: + # StreamFlags + # StreamSize + # StreamStart + # (data) + # StreamEnd + # + # if flags & 0x200, stream is scrambled + # if flags & 0x100, stream is compressed + + + flags = self.streamFlags + streamBuffer = self.streamData + + # implement scramble? I never scramble anything... + + if flags & STREAM_FORCE_COMPRESSED == STREAM_FORCE_COMPRESSED: + optimize = False + + if flags & STREAM_COMPRESSED == STREAM_COMPRESSED: + uncompLen = len(streamBuffer) + compStreamBuffer = zlib.compress(streamBuffer) + if optimize and uncompLen <= len(compStreamBuffer) + 4: + flags &= ~STREAM_COMPRESSED + else: + streamBuffer = struct.pack(" 0 and tagList[-1].name == tagName: - del tagList[-1] - - -def tagListOptimizer(tagList): - # this function eliminates redundant or unnecessary tags - # it scans a list of tags, looking for text settings that are - # changed before any text is output - # for example, - # fontsize=100, fontsize=200, text, fontsize=100, fontsize=200 - # should be: - # fontsize=200 text - oldSize = len(tagList) - _optimize(tagList, "fontsize", int) - _optimize(tagList, "fontweight", int) - return oldSize - len(tagList) - - +def _optimize(tagList, tagName, conversion): + # copy the tag of interest plus any text + newTagList = [] + for tag in tagList: + if tag.name == tagName or tag.name == "rawtext": + newTagList.append(tag) + + # now, eliminate any duplicates (leaving the last one) + for i, newTag in enumerate(newTagList[:-1]): + if newTag.name == tagName and newTagList[i+1].name == tagName: + tagList.remove(newTag) + + # eliminate redundant settings to same value across text strings + newTagList = [] + for tag in tagList: + if tag.name == tagName: + newTagList.append(tag) + + for i, newTag in enumerate(newTagList[:-1]): + value = conversion(newTag.parameter) + nextValue = conversion(newTagList[i+1].parameter) + if value == nextValue: + tagList.remove(newTagList[i+1]) + + # eliminate any setting that don't have text after them + while len(tagList) > 0 and tagList[-1].name == tagName: + del tagList[-1] + + +def tagListOptimizer(tagList): + # this function eliminates redundant or unnecessary tags + # it scans a list of tags, looking for text settings that are + # changed before any text is output + # for example, + # fontsize=100, fontsize=200, text, fontsize=100, fontsize=200 + # should be: + # fontsize=200 text + oldSize = len(tagList) + _optimize(tagList, "fontsize", int) + _optimize(tagList, "fontweight", int) + return oldSize - len(tagList) + + diff --git a/src/calibre/ebooks/lrf/pylrs/pylrs.py b/src/calibre/ebooks/lrf/pylrs/pylrs.py index e2bfc2e2a9..0847d4ba73 100644 --- a/src/calibre/ebooks/lrf/pylrs/pylrs.py +++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py @@ -1,2593 +1,2594 @@ -# Copyright (c) 2007 Mike Higgins (Falstaff) -# Modifications from the original: -# Copyright (C) 2007 Kovid Goyal -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -# -# Current limitations and bugs: -# Bug: Does not check if most setting values are valid unless lrf is created. -# -# Unsupported objects: MiniPage, SimpleTextBlock, Canvas, Window, -# PopUpWindow, Sound, Import, SoundStream, -# ObjectInfo -# -# Does not support background images for blocks or pages. -# -# The only button type supported are JumpButtons. -# -# None of the Japanese language tags are supported. -# -# Other unsupported tags: PageDiv, SoundStop, Wait, pos, -# Plot, Image (outside of ImageBlock), -# EmpLine, EmpDots - -import os, re, codecs, operator -from xml.sax.saxutils import escape -from datetime import date -try: - from elementtree.ElementTree import (Element, SubElement) -except ImportError: - from xml.etree.ElementTree import (Element, SubElement) - -from elements import ElementWriter -from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc, - STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING, - BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream, - STREAM_FORCE_COMPRESSED) - -DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set -DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs - -from calibre import __appname__, __version__ -from calibre import entity_to_unicode - -class LrsError(Exception): - pass - -class ContentError(Exception): - pass - -def _checkExists(filename): - if not os.path.exists(filename): - raise LrsError, "file '%s' not found" % filename - - -def _formatXml(root): - """ A helper to make the LRS output look nicer. """ - for elem in root.getiterator(): - if len(elem) > 0 and (not elem.text or not elem.text.strip()): - elem.text = "\n" - if not elem.tail or not elem.tail.strip(): - elem.tail = "\n" - - - -def ElementWithText(tag, text, **extra): - """ A shorthand function to create Elements with text. """ - e = Element(tag, **extra) - e.text = text - return e - - - -def ElementWithReading(tag, text, reading=False): - """ A helper function that creates reading attributes. """ - - # note: old lrs2lrf parser only allows reading = "" - - if text is None: - readingText = "" - elif isinstance(text, basestring): - readingText = text - else: - # assumed to be a sequence of (name, sortas) - readingText = text[1] - text = text[0] - - - if not reading: - readingText = "" - return ElementWithText(tag, text, reading=readingText) - - - -def appendTextElements(e, contentsList, se): - """ A helper function to convert text streams into the proper elements. """ - - def uconcat(text, newText, se): - if type(newText) != type(text): - if type(text) is str: - text = text.decode(se) - else: - newText = newText.decode(se) - - return text + newText - - - e.text = "" - lastElement = None - - for content in contentsList: - if not isinstance(content, Text): - newElement = content.toElement(se) - if newElement is None: - continue - lastElement = newElement - lastElement.tail = "" - e.append(lastElement) - else: - if lastElement is None: - e.text = uconcat(e.text, content.text, se) - else: - lastElement.tail = uconcat(lastElement.tail, content.text, se) - - - -class Delegator(object): - """ A mixin class to create delegated methods that create elements. """ - def __init__(self, delegates): - self.delegates = delegates - self.delegatedMethods = [] - #self.delegatedSettingsDict = {} - #self.delegatedSettings = [] - for d in delegates: - d.parent = self - methods = d.getMethods() - self.delegatedMethods += methods - for m in methods: - setattr(self, m, getattr(d, m)) - - """ - for setting in d.getSettings(): - if isinstance(setting, basestring): - setting = (d, setting) - delegates = \ - self.delegatedSettingsDict.setdefault(setting[1], []) - delegates.append(setting[0]) - self.delegatedSettings.append(setting) - """ - - - def applySetting(self, name, value, testValid=False): - applied = False - if name in self.getSettings(): - setattr(self, name, value) - applied = True - - for d in self.delegates: - if hasattr(d, "applySetting"): - applied = applied or d.applySetting(name, value) - else: - if name in d.getSettings(): - setattr(d, name, value) - applied = True - - if testValid and not applied: - raise LrsError, "setting %s not valid" % name - - return applied - - - def applySettings(self, settings, testValid=False): - for (setting, value) in settings.items(): - self.applySetting(setting, value, testValid) - """ - if setting not in self.delegatedSettingsDict: - raise LrsError, "setting %s not valid" % setting - delegates = self.delegatedSettingsDict[setting] - for d in delegates: - setattr(d, setting, value) - """ - - - def appendDelegates(self, element, sourceEncoding): - for d in self.delegates: - e = d.toElement(sourceEncoding) - if e is not None: - if isinstance(e, list): - for e1 in e: element.append(e1) - else: - element.append(e) - - - def appendReferencedObjects(self, parent): - for d in self.delegates: - d.appendReferencedObjects(parent) - - - def getMethods(self): - return self.delegatedMethods - - - def getSettings(self): - return [] - - - def toLrfDelegates(self, lrfWriter): - for d in self.delegates: - d.toLrf(lrfWriter) - - - def toLrf(self, lrfWriter): - self.toLrfDelegates(lrfWriter) - - - -class LrsAttributes(object): - """ A mixin class to handle default and user supplied attributes. """ - def __init__(self, defaults, alsoAllow=None, **settings): - if alsoAllow is None: - alsoAllow = [] - self.attrs = defaults.copy() - for (name, value) in settings.items(): - if name not in self.attrs and name not in alsoAllow: - raise LrsError, "%s does not support setting %s" % \ - (self.__class__.__name__, name) - if type(value) is int: - value = str(value) - self.attrs[name] = value - - - -class LrsContainer(object): - """ This class is a mixin class for elements that are contained in or - contain an unknown number of other elements. - """ - def __init__(self, validChildren): - self.parent = None - self.contents = [] - self.validChildren = validChildren - self.must_append = False #: If True even an empty container is appended by append_to - - def has_text(self): - ''' Return True iff this container has non whitespace text ''' - if hasattr(self, 'text'): - if self.text.strip(): - return True - if hasattr(self, 'contents'): - for child in self.contents: - if child.has_text(): - return True - for item in self.contents: - if isinstance(item, (Plot, ImageBlock, Canvas, CR)): - return True - return False - - def append_to(self, parent): - ''' - Append self to C{parent} iff self has non whitespace textual content - @type parent: LrsContainer - ''' - if self.contents or self.must_append: - parent.append(self) - - - def appendReferencedObjects(self, parent): - for c in self.contents: - c.appendReferencedObjects(parent) - - - def setParent(self, parent): - if self.parent is not None: - raise LrsError, "object already has parent" - - self.parent = parent - - - def append(self, content, convertText=True): - """ - Appends valid objects to container. Can auto-covert text strings - to Text objects. - """ - for validChild in self.validChildren: - if isinstance(content, validChild): - break - else: - raise LrsError, "can't append %s to %s" % \ - (content.__class__.__name__, - self.__class__.__name__) - - if convertText and isinstance(content, basestring): - content = Text(content) - - content.setParent(self) - - if isinstance(content, LrsObject): - content.assignId() - - self.contents.append(content) - return self - - def get_all(self, predicate=lambda x: x): - for child in self.contents: - if predicate(child): - yield child - if hasattr(child, 'get_all'): - for grandchild in child.get_all(predicate): - yield grandchild - - - -class LrsObject(object): - """ A mixin class for elements that need an object id. """ - nextObjId = 0 - - @classmethod - def getNextObjId(selfClass): - selfClass.nextObjId += 1 - return selfClass.nextObjId - - def __init__(self, assignId=False): - if assignId: - self.objId = LrsObject.getNextObjId() - else: - self.objId = 0 - - - def assignId(self): - if self.objId != 0: - raise LrsError, "id already assigned to " + self.__class__.__name__ - - self.objId = LrsObject.getNextObjId() - - - def lrsObjectElement(self, name, objlabel="objlabel", labelName=None, - labelDecorate=True, **settings): - element = Element(name) - element.attrib["objid"] = str(self.objId) - if labelName is None: - labelName = name - if labelDecorate: - label = "%s.%d" % (labelName, self.objId) - else: - label = str(self.objId) - element.attrib[objlabel] = label - element.attrib.update(settings) - return element - - - -class Book(Delegator): - """ - Main class for any lrs or lrf. All objects must be appended to - the Book class in some way or another in order to be rendered as - an LRS or LRF file. - - The following settings are available on the contructor of Book: - - author="book author" or author=("book author", "sort as") - Author of the book. - - title="book title" or title=("book title", "sort as") - Title of the book. - - sourceencoding="codec" - Gives the assumed encoding for all non-unicode strings. - - - thumbnail="thumbnail file name" - A small (80x80?) graphics file with a thumbnail of the book's cover. - - bookid="book id" - A unique id for the book. - - textstyledefault= - Sets the default values for all TextStyles. - - pagetstyledefault= - Sets the default values for all PageStyles. - - blockstyledefault= - Sets the default values for all BlockStyles. - - booksetting=BookSetting() - Override the default BookSetting. - - setdefault=StyleDefault() - Override the default SetDefault. - - There are several other settings -- see the BookInfo class for more. - """ - - def __init__(self, textstyledefault=None, blockstyledefault=None, - pagestyledefault=None, - optimizeTags=False, - optimizeCompression=False, - **settings): - - self.parent = None # we are the top of the parent chain - - if "thumbnail" in settings: - _checkExists(settings["thumbnail"]) - - # highly experimental -- use with caution - self.optimizeTags = optimizeTags - self.optimizeCompression = optimizeCompression - - pageStyle = PageStyle(**PageStyle.baseDefaults.copy()) - blockStyle = BlockStyle(**BlockStyle.baseDefaults.copy()) - textStyle = TextStyle(**TextStyle.baseDefaults.copy()) - - if textstyledefault is not None: - textStyle.update(textstyledefault) - - if blockstyledefault is not None: - blockStyle.update(blockstyledefault) - - if pagestyledefault is not None: - pageStyle.update(pagestyledefault) - - self.defaultPageStyle = pageStyle - self.defaultTextStyle = textStyle - self.defaultBlockStyle = blockStyle - LrsObject.nextObjId += 1 - - styledefault = StyleDefault() - if settings.has_key('setdefault'): - styledefault = settings.pop('setdefault') - Delegator.__init__(self, [BookInformation(), Main(), - Template(), Style(styledefault), Solos(), Objects()]) - - self.sourceencoding = None - - # apply default settings - self.applySetting("genreading", DEFAULT_GENREADING) - self.applySetting("sourceencoding", DEFAULT_SOURCE_ENCODING) - - self.applySettings(settings, testValid=True) - - self.allow_new_page = True #: If False L{create_page} raises an exception - self.gc_count = 0 - - - def set_title(self, title): - ot = self.delegates[0].delegates[0].delegates[0].title - self.delegates[0].delegates[0].delegates[0].title = (title, ot[1]) - - def set_author(self, author): - ot = self.delegates[0].delegates[0].delegates[0].author - self.delegates[0].delegates[0].delegates[0].author = (author, ot[1]) - - def create_text_style(self, **settings): - ans = TextStyle(**self.defaultTextStyle.attrs.copy()) - ans.update(settings) - return ans - - def create_block_style(self, **settings): - ans = BlockStyle(**self.defaultBlockStyle.attrs.copy()) - ans.update(settings) - return ans - - def create_page_style(self, **settings): - if not self.allow_new_page: - raise ContentError - ans = PageStyle(**self.defaultPageStyle.attrs.copy()) - ans.update(settings) - return ans - - def create_page(self, pageStyle=None, **settings): - ''' - Return a new L{Page}. The page has not been appended to this book. - @param pageStyle: If None the default pagestyle is used. - @type pageStyle: L{PageStyle} - ''' - if not pageStyle: - pageStyle = self.defaultPageStyle - return Page(pageStyle=pageStyle, **settings) - - def create_text_block(self, textStyle=None, blockStyle=None, **settings): - ''' - Return a new L{TextBlock}. The block has not been appended to this - book. - @param textStyle: If None the default text style is used - @type textStyle: L{TextStyle} - @param blockStyle: If None the default block style is used. - @type blockStyle: L{BlockStyle} - ''' - if not textStyle: - textStyle = self.defaultTextStyle - if not blockStyle: - blockStyle = self.defaultBlockStyle - return TextBlock(textStyle=textStyle, blockStyle=blockStyle, **settings) - - def pages(self): - '''Return list of Page objects in this book ''' - ans = [] - for item in self.delegates: - if isinstance(item, Main): - for candidate in item.contents: - if isinstance(candidate, Page): - ans.append(candidate) - break - return ans - - def last_page(self): - '''Return last Page in this book ''' - for item in self.delegates: - if isinstance(item, Main): - temp = list(item.contents) - temp.reverse() - for candidate in temp: - if isinstance(candidate, Page): - return candidate - - def embed_font(self, file, facename): - f = Font(file, facename) - self.append(f) - - def getSettings(self): - return ["sourceencoding"] - - - def append(self, content): - """ Find and invoke the correct appender for this content. """ - - className = content.__class__.__name__ - try: - method = getattr(self, "append" + className) - except AttributeError: - raise LrsError, "can't append %s to Book" % className - - method(content) - - - def rationalize_font_sizes(self, base_font_size=10): - base_font_size *= 10. - main = None - for obj in self.delegates: - if isinstance(obj, Main): - main = obj - break - - fonts = {} - for text in main.get_all(lambda x: isinstance(x, Text)): - fs = base_font_size - ancestor = text.parent - while ancestor: - try: - fs = int(ancestor.attrs['fontsize']) - break - except (AttributeError, KeyError): - pass - try: - fs = int(ancestor.textSettings['fontsize']) - break - except (AttributeError, KeyError): - pass - try: - fs = int(ancestor.textStyle.attrs['fontsize']) - break - except (AttributeError, KeyError): - pass - ancestor = ancestor.parent - length = len(text.text) - fonts[fs] = fonts.get(fs, 0) + length - if not fonts: - print 'WARNING: LRF seems to have no textual content. Cannot rationalize font sizes.' - return - - old_base_font_size = float(max(fonts.items(), key=operator.itemgetter(1))[0]) - factor = base_font_size / old_base_font_size - def rescale(old): - return str(int(int(old) * factor)) - - text_blocks = list(main.get_all(lambda x: isinstance(x, TextBlock))) - for tb in text_blocks: - if tb.textSettings.has_key('fontsize'): - tb.textSettings['fontsize'] = rescale(tb.textSettings['fontsize']) - for span in tb.get_all(lambda x: isinstance(x, Span)): - if span.attrs.has_key('fontsize'): - span.attrs['fontsize'] = rescale(span.attrs['fontsize']) - if span.attrs.has_key('baselineskip'): - span.attrs['baselineskip'] = rescale(span.attrs['baselineskip']) - - text_styles = set(tb.textStyle for tb in text_blocks) - for ts in text_styles: - ts.attrs['fontsize'] = rescale(ts.attrs['fontsize']) - ts.attrs['baselineskip'] = rescale(ts.attrs['baselineskip']) - - - def renderLrs(self, lrsFile, encoding="UTF-8"): - if isinstance(lrsFile, basestring): - lrsFile = codecs.open(lrsFile, "wb", encoding=encoding) - self.render(lrsFile, outputEncodingName=encoding) - lrsFile.close() - - - def renderLrf(self, lrfFile): - self.appendReferencedObjects(self) - if isinstance(lrfFile, basestring): - lrfFile = file(lrfFile, "wb") - lrfWriter = LrfWriter(self.sourceencoding) - - lrfWriter.optimizeTags = self.optimizeTags - lrfWriter.optimizeCompression = self.optimizeCompression - - self.toLrf(lrfWriter) - lrfWriter.writeFile(lrfFile) - lrfFile.close() - - - def toElement(self, se): - root = Element("BBeBXylog", version="1.0") - root.append(Element("Property")) - self.appendDelegates(root, self.sourceencoding) - return root - - - def render(self, f, outputEncodingName='UTF-8'): - """ Write the book as an LRS to file f. """ - - self.appendReferencedObjects(self) - - # create the root node, and populate with the parts of the book - - root = self.toElement(self.sourceencoding) - - # now, add some newlines to make it easier to look at - - _formatXml(root) - - writer = ElementWriter(root, header=True, - sourceEncoding=self.sourceencoding, - spaceBeforeClose=False, - outputEncodingName=outputEncodingName) - writer.write(f) - - - -class BookInformation(Delegator): - """ Just a container for the Info and TableOfContents elements. """ - def __init__(self): - Delegator.__init__(self, [Info(), TableOfContents()]) - - - def toElement(self, se): - bi = Element("BookInformation") - self.appendDelegates(bi, se) - return bi - - - -class Info(Delegator): - """ Just a container for the BookInfo and DocInfo elements. """ - def __init__(self): - self.genreading = DEFAULT_GENREADING - Delegator.__init__(self, [BookInfo(), DocInfo()]) - - - def getSettings(self): - return ["genreading"] #+ self.delegatedSettings - - - def toElement(self, se): - info = Element("Info", version="1.1") - info.append( - self.delegates[0].toElement(se, reading="s" in self.genreading)) - info.append(self.delegates[1].toElement(se)) - return info - - - def toLrf(self, lrfWriter): - # this info is set in XML form in the LRF - info = Element("Info", version="1.1") - #self.appendDelegates(info) - info.append( - self.delegates[0].toElement(lrfWriter.getSourceEncoding(), reading="f" in self.genreading)) - info.append(self.delegates[1].toElement(lrfWriter.getSourceEncoding())) - - # look for the thumbnail file and get the filename - tnail = info.find("DocInfo/CThumbnail") - if tnail is not None: - lrfWriter.setThumbnailFile(tnail.get("file")) - # does not work: info.remove(tnail) - - - _formatXml(info) - - # fix up the doc info to match the LRF format - # NB: generates an encoding attribute, which lrs2lrf does not - xmlInfo = ElementWriter(info, header=True, sourceEncoding=lrfWriter.getSourceEncoding(), - spaceBeforeClose=False).toString() - - xmlInfo = re.sub(r"\n", "", xmlInfo) - xmlInfo = xmlInfo.replace("SumPage>", "Page>") - lrfWriter.docInfoXml = xmlInfo - - - -class TableOfContents(object): - def __init__(self): - self.tocEntries = [] - - - def appendReferencedObjects(self, parent): - pass - - - def getMethods(self): - return ["addTocEntry"] - - - def getSettings(self): - return [] - - - def addTocEntry(self, tocLabel, textBlock): - if not isinstance(textBlock, (Canvas, TextBlock, ImageBlock, RuledLine)): - raise LrsError, "TOC destination must be a Canvas, TextBlock, ImageBlock or RuledLine"+\ - " not a " + str(type(textBlock)) - - if textBlock.parent is None: - raise LrsError, "TOC text block must be already appended to a page" - - if False and textBlock.parent.parent is None: - raise LrsError, \ - "TOC destination page must be already appended to a book" - - if not hasattr(textBlock.parent, 'objId'): - raise LrsError, "TOC destination must be appended to a container with an objID" - - for tl in self.tocEntries: - if tl.label == tocLabel and tl.textBlock == textBlock: - return - - self.tocEntries.append(TocLabel(tocLabel, textBlock)) - textBlock.tocLabel = tocLabel - - - def toElement(self, se): - if len(self.tocEntries) == 0: - return None - - toc = Element("TOC") - - for t in self.tocEntries: - toc.append(t.toElement(se)) - - return toc - - - def toLrf(self, lrfWriter): - if len(self.tocEntries) == 0: - return - - toc = [] - for t in self.tocEntries: - toc.append((t.textBlock.parent.objId, t.textBlock.objId, t.label)) - - lrfToc = LrfToc(LrsObject.getNextObjId(), toc, lrfWriter.getSourceEncoding()) - lrfWriter.append(lrfToc) - lrfWriter.setTocObject(lrfToc) - - - -class TocLabel(object): - def __init__(self, label, textBlock): - self.label = escape(re.sub(r'&(\S+?);', entity_to_unicode, label)) - self.textBlock = textBlock - - - def toElement(self, se): - return ElementWithText("TocLabel", self.label, - refobj=str(self.textBlock.objId), - refpage=str(self.textBlock.parent.objId)) - - - -class BookInfo(object): - def __init__(self): - self.title = "Untitled" - self.author = "Anonymous" - self.bookid = None - self.pi = None - self.isbn = None - self.publisher = None - self.freetext = "\n\n" - self.label = None - self.category = None - self.classification = None - - def appendReferencedObjects(self, parent): - pass - - - def getMethods(self): - return [] - - - def getSettings(self): - return ["author", "title", "bookid", "isbn", "publisher", - "freetext", "label", "category", "classification"] - - - def _appendISBN(self, bi): - pi = Element("ProductIdentifier") - isbnElement = ElementWithText("ISBNPrintable", self.isbn) - isbnValueElement = ElementWithText("ISBNValue", - self.isbn.replace("-", "")) - - pi.append(isbnElement) - pi.append(isbnValueElement) - bi.append(pi) - - - def toElement(self, se, reading=True): - bi = Element("BookInfo") - bi.append(ElementWithReading("Title", self.title, reading=reading)) - bi.append(ElementWithReading("Author", self.author, reading=reading)) - bi.append(ElementWithText("BookID", self.bookid)) - if self.isbn is not None: - self._appendISBN(bi) - - if self.publisher is not None: - bi.append(ElementWithReading("Publisher", self.publisher)) - - bi.append(ElementWithReading("Label", self.label, reading=reading)) - bi.append(ElementWithText("Category", self.category)) - bi.append(ElementWithText("Classification", self.classification)) - bi.append(ElementWithText("FreeText", self.freetext)) - return bi - - - -class DocInfo(object): - def __init__(self): - self.thumbnail = None - self.language = "en" - self.creator = None - self.creationdate = date.today().isoformat() - self.producer = "%s v%s"%(__appname__, __version__) - self.numberofpages = "0" - - - def appendReferencedObjects(self, parent): - pass - - - def getMethods(self): - return [] - - - def getSettings(self): - return ["thumbnail", "language", "creator", "creationdate", - "producer", "numberofpages"] - - - def toElement(self, se): - docInfo = Element("DocInfo") - - if self.thumbnail is not None: - docInfo.append(Element("CThumbnail", file=self.thumbnail)) - - docInfo.append(ElementWithText("Language", self.language)) - docInfo.append(ElementWithText("Creator", self.creator)) - docInfo.append(ElementWithText("CreationDate", self.creationdate)) - docInfo.append(ElementWithText("Producer", self.producer)) - docInfo.append(ElementWithText("SumPage", str(self.numberofpages))) - return docInfo - - - -class Main(LrsContainer): - def __init__(self): - LrsContainer.__init__(self, [Page]) - - - def getMethods(self): - return ["appendPage", "Page"] - - - def getSettings(self): - return [] - - - def Page(self, *args, **kwargs): - p = Page(*args, **kwargs) - self.append(p) - return p - - - def appendPage(self, page): - self.append(page) - - - def toElement(self, sourceEncoding): - main = Element(self.__class__.__name__) - - for page in self.contents: - main.append(page.toElement(sourceEncoding)) - - return main - - - def toLrf(self, lrfWriter): - pageIds = [] - - # set this id now so that pages can see it - pageTreeId = LrsObject.getNextObjId() - lrfWriter.setPageTreeId(pageTreeId) - - # create a list of all the page object ids while dumping the pages - - for p in self.contents: - pageIds.append(p.objId) - p.toLrf(lrfWriter) - - # create a page tree object - - pageTree = LrfObject("PageTree", pageTreeId) - pageTree.appendLrfTag(LrfTag("PageList", pageIds)) - - lrfWriter.append(pageTree) - - - -class Solos(LrsContainer): - def __init__(self): - LrsContainer.__init__(self, [Solo]) - - - def getMethods(self): - return ["appendSolo", "Solo"] - - - def getSettings(self): - return [] - - - def Solo(self, *args, **kwargs): - p = Solo(*args, **kwargs) - self.append(p) - return p - - - def appendSolo(self, solo): - self.append(solo) - - - def toLrf(self, lrfWriter): - for s in self.contents: - s.toLrf(lrfWriter) - - - def toElement(self, se): - solos = [] - for s in self.contents: - solos.append(s.toElement(se)) - - if len(solos) == 0: - return None - - - return solos - - - -class Solo(Main): - pass - - -class Template(object): - """ Does nothing that I know of. """ - - def appendReferencedObjects(self, parent): - pass - - - def getMethods(self): - return [] - - - def getSettings(self): - return [] - - - def toElement(self, se): - t = Element("Template") - t.attrib["version"] = "1.0" - return t - - def toLrf(self, lrfWriter): - # does nothing - pass - -class StyleDefault(LrsAttributes): - """ - Supply some defaults for all TextBlocks. - The legal values are a subset of what is allowed on a - TextBlock -- ruby, emphasis, and waitprop settings. - """ - defaults = dict(rubyalign="start", rubyadjust="none", - rubyoverhang="none", empdotsposition="before", - empdotsfontname="Dutch801 Rm BT Roman", - empdotscode="0x002e", emplineposition="after", - emplinetype = "solid", setwaitprop="noreplay") - - alsoAllow = ["refempdotsfont", "rubyAlignAndAdjust"] - - def __init__(self, **settings): - LrsAttributes.__init__(self, self.defaults, - alsoAllow=self.alsoAllow, **settings) - - - def toElement(self, se): - return Element("SetDefault", self.attrs) - - -class Style(LrsContainer, Delegator): - def __init__(self, styledefault=StyleDefault()): - LrsContainer.__init__(self, [PageStyle, TextStyle, BlockStyle]) - Delegator.__init__(self, [BookStyle(styledefault=styledefault)]) - self.bookStyle = self.delegates[0] - self.appendPageStyle = self.appendTextStyle = \ - self.appendBlockStyle = self.append - - - def appendReferencedObjects(self, parent): - LrsContainer.appendReferencedObjects(self, parent) - - - def getMethods(self): - return ["PageStyle", "TextStyle", "BlockStyle", - "appendPageStyle", "appendTextStyle", "appendBlockStyle"] + \ - self.delegatedMethods - - def getSettings(self): - return [(self.bookStyle, x) for x in self.bookStyle.getSettings()] - - - def PageStyle(self, *args, **kwargs): - ps = PageStyle(*args, **kwargs) - self.append(ps) - return ps - - - def TextStyle(self, *args, **kwargs): - ts = TextStyle(*args, **kwargs) - self.append(ts) - return ts - - - def BlockStyle(self, *args, **kwargs): - bs = BlockStyle(*args, **kwargs) - self.append(bs) - return bs - - - def toElement(self, se): - style = Element("Style") - style.append(self.bookStyle.toElement(se)) - - for content in self.contents: - style.append(content.toElement(se)) - - return style - - - def toLrf(self, lrfWriter): - self.bookStyle.toLrf(lrfWriter) - - for s in self.contents: - s.toLrf(lrfWriter) - - - -class BookStyle(LrsObject, LrsContainer): - def __init__(self, styledefault=StyleDefault()): - LrsObject.__init__(self, assignId=True) - LrsContainer.__init__(self, [Font]) - self.styledefault = styledefault - self.booksetting = BookSetting() - self.appendFont = self.append - - - def getSettings(self): - return ["styledefault", "booksetting"] - - - def getMethods(self): - return ["Font", "appendFont"] - - - def Font(self, *args, **kwargs): - f = Font(*args, **kwargs) - self.append(f) - return - - - def toElement(self, se): - bookStyle = self.lrsObjectElement("BookStyle", objlabel="stylelabel", - labelDecorate=False) - bookStyle.append(self.styledefault.toElement(se)) - bookStyle.append(self.booksetting.toElement(se)) - for font in self.contents: - bookStyle.append(font.toElement(se)) - - return bookStyle - - - def toLrf(self, lrfWriter): - bookAtr = LrfObject("BookAtr", self.objId) - bookAtr.appendLrfTag(LrfTag("ChildPageTree", lrfWriter.getPageTreeId())) - bookAtr.appendTagDict(self.styledefault.attrs) - - self.booksetting.toLrf(lrfWriter) - - lrfWriter.append(bookAtr) - lrfWriter.setRootObject(bookAtr) - - for font in self.contents: - font.toLrf(lrfWriter) - - - - - - -class BookSetting(LrsAttributes): - def __init__(self, **settings): - defaults = dict(bindingdirection="Lr", dpi="1660", - screenheight="800", screenwidth="600", colordepth="24") - LrsAttributes.__init__(self, defaults, **settings) - - - def toLrf(self, lrfWriter): - a = self.attrs - lrfWriter.dpi = int(a["dpi"]) - lrfWriter.bindingdirection = \ - BINDING_DIRECTION_ENCODING[a["bindingdirection"]] - lrfWriter.height = int(a["screenheight"]) - lrfWriter.width = int(a["screenwidth"]) - lrfWriter.colorDepth = int(a["colordepth"]) - - def toElement(self, se): - return Element("BookSetting", self.attrs) - - - -class LrsStyle(LrsObject, LrsAttributes, LrsContainer): - """ A mixin class for styles. """ - def __init__(self, elementName, defaults=None, alsoAllow=None, **overrides): - if defaults is None: - defaults = {} - - LrsObject.__init__(self) - LrsAttributes.__init__(self, defaults, alsoAllow=alsoAllow, **overrides) - LrsContainer.__init__(self, []) - self.elementName = elementName - self.objectsAppended = False - #self.label = "%s.%d" % (elementName, self.objId) - #self.label = str(self.objId) - #self.parent = None - - - def update(self, settings): - for name, value in settings.items(): - if name not in self.__class__.validSettings: - raise LrsError, "%s not a valid setting for %s" % \ - (name, self.__class__.__name__) - self.attrs[name] = value - - def getLabel(self): - return str(self.objId) - - - def toElement(self, se): - element = Element(self.elementName, stylelabel=self.getLabel(), - objid=str(self.objId)) - element.attrib.update(self.attrs) - return element - - - def toLrf(self, lrfWriter): - obj = LrfObject(self.elementName, self.objId) - obj.appendTagDict(self.attrs, self.__class__.__name__) - lrfWriter.append(obj) - - def __eq__(self, other): - if hasattr(other, 'attrs'): - return self.__class__ == other.__class__ and self.attrs == other.attrs - return False - -class TextStyle(LrsStyle): - """ - The text style of a TextBlock. Default is 10 pt. Times Roman. - - Setting Value Default - -------- ----- ------- - align "head","center","foot" "head" (left aligned) - baselineskip points * 10 120 (12 pt. distance between - bottoms of lines) - fontsize points * 10 100 (10 pt.) - fontweight 1 to 1000 400 (normal, 800 is bold) - fontwidth points * 10 or -10 -10 (use values from font) - linespace points * 10 10 (min space btw. lines?) - wordspace points * 10 25 (min space btw. each word) - - """ - baseDefaults = dict( - columnsep="0", charspace="0", - textlinewidth="2", align="head", linecolor="0x00000000", - column="1", fontsize="100", fontwidth="-10", fontescapement="0", - fontorientation="0", fontweight="400", - fontfacename="Dutch801 Rm BT Roman", - textcolor="0x00000000", wordspace="25", letterspace="0", - baselineskip="120", linespace="10", parindent="0", parskip="0", - textbgcolor="0xFF000000") - - alsoAllow = ["empdotscode", "empdotsfontname", "refempdotsfont", - "rubyadjust", "rubyalign", "rubyoverhang", - "empdotsposition", 'emplinetype', 'emplineposition'] - - validSettings = baseDefaults.keys() + alsoAllow - - defaults = baseDefaults.copy() - - def __init__(self, **overrides): - LrsStyle.__init__(self, "TextStyle", self.defaults, - alsoAllow=self.alsoAllow, **overrides) - - def copy(self): - tb = TextStyle() - tb.attrs = self.attrs.copy() - return tb - - - -class BlockStyle(LrsStyle): - """ - The block style of a TextBlock. Default is an expandable 560 pixel - wide area with no space for headers or footers. - - Setting Value Default - -------- ----- ------- - blockwidth pixels 560 - sidemargin pixels 0 - """ - - baseDefaults = dict( - bgimagemode="fix", framemode="square", blockwidth="560", - blockheight="100", blockrule="horz-adjustable", layout="LrTb", - framewidth="0", framecolor="0x00000000", topskip="0", - sidemargin="0", footskip="0", bgcolor="0xFF000000") - - validSettings = baseDefaults.keys() - defaults = baseDefaults.copy() - - def __init__(self, **overrides): - LrsStyle.__init__(self, "BlockStyle", self.defaults, **overrides) - - def copy(self): - tb = BlockStyle() - tb.attrs = self.attrs.copy() - return tb - - - -class PageStyle(LrsStyle): - """ - Setting Value Default - -------- ----- ------- - evensidemargin pixels 20 - oddsidemargin pixels 20 - topmargin pixels 20 - """ - baseDefaults = dict( - topmargin="20", headheight="0", headsep="0", - oddsidemargin="20", textheight="747", textwidth="575", - footspace="0", evensidemargin="20", footheight="0", - layout="LrTb", bgimagemode="fix", pageposition="any", - setwaitprop="noreplay", setemptyview="show") - - alsoAllow = ["header", "evenheader", "oddheader", - "footer", "evenfooter", "oddfooter"] - - validSettings = baseDefaults.keys() + alsoAllow - defaults = baseDefaults.copy() - - @classmethod - def translateHeaderAndFooter(selfClass, parent, settings): - selfClass._fixup(parent, "header", settings) - selfClass._fixup(parent, "footer", settings) - - - @classmethod - def _fixup(selfClass, parent, basename, settings): - evenbase = "even" + basename - oddbase = "odd" + basename - if basename in settings: - baseObj = settings[basename] - del settings[basename] - settings[evenbase] = settings[oddbase] = baseObj - - if evenbase in settings: - evenObj = settings[evenbase] - del settings[evenbase] - if evenObj.parent is None: - parent.append(evenObj) - settings[evenbase + "id"] = str(evenObj.objId) - - if oddbase in settings: - oddObj = settings[oddbase] - del settings[oddbase] - if oddObj.parent is None: - parent.append(oddObj) - settings[oddbase + "id"] = str(oddObj.objId) - - - def appendReferencedObjects(self, parent): - if self.objectsAppended: - return - PageStyle.translateHeaderAndFooter(parent, self.attrs) - self.objectsAppended = True - - - - def __init__(self, **settings): - #self.fixHeaderSettings(settings) - LrsStyle.__init__(self, "PageStyle", self.defaults, - alsoAllow=self.alsoAllow, **settings) - - -class Page(LrsObject, LrsContainer): - """ - Pages are added to Books. Pages can be supplied a PageStyle. - If they are not, Page.defaultPageStyle will be used. - """ - defaultPageStyle = PageStyle() - - def __init__(self, pageStyle=defaultPageStyle, **settings): - LrsObject.__init__(self) - LrsContainer.__init__(self, [TextBlock, BlockSpace, RuledLine, - ImageBlock, Canvas]) - - self.pageStyle = pageStyle - - for settingName in settings.keys(): - if settingName not in PageStyle.defaults and \ - settingName not in PageStyle.alsoAllow: - raise LrsError, "setting %s not allowed on Page" % settingName - - self.settings = settings.copy() - - - def appendReferencedObjects(self, parent): - PageStyle.translateHeaderAndFooter(parent, self.settings) - - self.pageStyle.appendReferencedObjects(parent) - - if self.pageStyle.parent is None: - parent.append(self.pageStyle) - - LrsContainer.appendReferencedObjects(self, parent) - - - def RuledLine(self, *args, **kwargs): - rl = RuledLine(*args, **kwargs) - self.append(rl) - return rl - - - def BlockSpace(self, *args, **kwargs): - bs = BlockSpace(*args, **kwargs) - self.append(bs) - return bs - - - def TextBlock(self, *args, **kwargs): - """ Create and append a new text block (shortcut). """ - tb = TextBlock(*args, **kwargs) - self.append(tb) - return tb - - - def ImageBlock(self, *args, **kwargs): - """ Create and append and new Image block (shorthand). """ - ib = ImageBlock(*args, **kwargs) - self.append(ib) - return ib - - - def addLrfObject(self, objId): - self.stream.appendLrfTag(LrfTag("Link", objId)) - - - def appendLrfTag(self, lrfTag): - self.stream.appendLrfTag(lrfTag) - - - def toLrf(self, lrfWriter): - # tags: - # ObjectList - # Link to pagestyle - # Parent page tree id - # stream of tags - - p = LrfObject("Page", self.objId) - lrfWriter.append(p) - - pageContent = set() - self.stream = LrfTagStream(0) - for content in self.contents: - content.toLrfContainer(lrfWriter, self) - if hasattr(content, "getReferencedObjIds"): - pageContent.update(content.getReferencedObjIds()) - - - #print "page contents:", pageContent - # ObjectList not needed and causes slowdown in SONY LRF renderer - #p.appendLrfTag(LrfTag("ObjectList", pageContent)) - p.appendLrfTag(LrfTag("Link", self.pageStyle.objId)) - p.appendLrfTag(LrfTag("ParentPageTree", lrfWriter.getPageTreeId())) - p.appendTagDict(self.settings) - p.appendLrfTags(self.stream.getStreamTags(lrfWriter.getSourceEncoding())) - - - def toElement(self, sourceEncoding): - page = self.lrsObjectElement("Page") - page.set("pagestyle", self.pageStyle.getLabel()) - page.attrib.update(self.settings) - - for content in self.contents: - page.append(content.toElement(sourceEncoding)) - - return page - - - - - -class TextBlock(LrsObject, LrsContainer): - """ - TextBlocks are added to Pages. They hold Paragraphs or CRs. - - If a TextBlock is used in a header, it should be appended to - the Book, not to a specific Page. - """ - defaultTextStyle = TextStyle() - defaultBlockStyle = BlockStyle() - - def __init__(self, textStyle=defaultTextStyle, \ - blockStyle=defaultBlockStyle, \ - **settings): - ''' - Create TextBlock. - @param textStyle: The L{TextStyle} for this block. - @param blockStyle: The L{BlockStyle} for this block. - @param settings: C{dict} of extra settings to apply to this block. - ''' - LrsObject.__init__(self) - LrsContainer.__init__(self, [Paragraph, CR]) - - self.textSettings = {} - self.blockSettings = {} - - for name, value in settings.items(): - if name in TextStyle.validSettings: - self.textSettings[name] = value - elif name in BlockStyle.validSettings: - self.blockSettings[name] = value - elif name == 'toclabel': - self.tocLabel = value - else: - raise LrsError, "%s not a valid setting for TextBlock" % name - - self.textStyle = textStyle - self.blockStyle = blockStyle - - # create a textStyle with our current text settings (for Span to find) - self.currentTextStyle = textStyle.copy() if self.textSettings else textStyle - self.currentTextStyle.attrs.update(self.textSettings) - - - def appendReferencedObjects(self, parent): - if self.textStyle.parent is None: - parent.append(self.textStyle) - - if self.blockStyle.parent is None: - parent.append(self.blockStyle) - - LrsContainer.appendReferencedObjects(self, parent) - - - def Paragraph(self, *args, **kwargs): - """ - Create and append a Paragraph to this TextBlock. A CR is - automatically inserted after the Paragraph. To avoid this - behavior, create the Paragraph and append it to the TextBlock - in a separate call. - """ - p = Paragraph(*args, **kwargs) - self.append(p) - self.append(CR()) - return p - - - - def toElement(self, sourceEncoding): - tb = self.lrsObjectElement("TextBlock", labelName="Block") - tb.attrib.update(self.textSettings) - tb.attrib.update(self.blockSettings) - tb.set("textstyle", self.textStyle.getLabel()) - tb.set("blockstyle", self.blockStyle.getLabel()) - if hasattr(self, "tocLabel"): - tb.set("toclabel", self.tocLabel) - - for content in self.contents: - tb.append(content.toElement(sourceEncoding)) - - return tb - - def getReferencedObjIds(self): - ids = [self.objId, self.extraId, self.blockStyle.objId, - self.textStyle.objId] - for content in self.contents: - if hasattr(content, "getReferencedObjIds"): - ids.extend(content.getReferencedObjIds()) - - return ids - - - def toLrf(self, lrfWriter): - self.toLrfContainer(lrfWriter, lrfWriter) - - - def toLrfContainer(self, lrfWriter, container): - # id really belongs to the outer block - extraId = LrsObject.getNextObjId() - - b = LrfObject("Block", self.objId) - b.appendLrfTag(LrfTag("Link", self.blockStyle.objId)) - b.appendLrfTags( - LrfTagStream(0, [LrfTag("Link", extraId)]). \ - getStreamTags(lrfWriter.getSourceEncoding())) - b.appendTagDict(self.blockSettings) - container.addLrfObject(b.objId) - lrfWriter.append(b) - - tb = LrfObject("TextBlock", extraId) - tb.appendLrfTag(LrfTag("Link", self.textStyle.objId)) - tb.appendTagDict(self.textSettings) - - stream = LrfTagStream(STREAM_COMPRESSED) - for content in self.contents: - content.toLrfContainer(lrfWriter, stream) - - if lrfWriter.saveStreamTags: # true only if testing - tb.saveStreamTags = stream.tags - - tb.appendLrfTags( - stream.getStreamTags(lrfWriter.getSourceEncoding(), - optimizeTags=lrfWriter.optimizeTags, - optimizeCompression=lrfWriter.optimizeCompression)) - lrfWriter.append(tb) - - self.extraId = extraId - - -class Paragraph(LrsContainer): - """ - Note:

alone does not make a paragraph. Only a CR inserted - into a text block right after a

makes a real paragraph. - Two Paragraphs appended in a row act like a single Paragraph. - - Also note that there are few autoappenders for Paragraph (and - the things that can go in it.) It's less confusing (to me) to use - explicit .append methods to build up the text stream. - """ - def __init__(self, text=None): - LrsContainer.__init__(self, [Text, CR, DropCaps, CharButton, - LrsSimpleChar1, basestring]) - if text is not None: - if isinstance(text, basestring): - text = Text(text) - self.append(text) - - def CR(self): - # Okay, here's a single autoappender for this common operation - cr = CR() - self.append(cr) - return cr - - - def getReferencedObjIds(self): - ids = [] - for content in self.contents: - if hasattr(content, "getReferencedObjIds"): - ids.extend(content.getReferencedObjIds()) - - return ids - - - def toLrfContainer(self, lrfWriter, parent): - parent.appendLrfTag(LrfTag("pstart", 0)) - for content in self.contents: - content.toLrfContainer(lrfWriter, parent) - parent.appendLrfTag(LrfTag("pend")) - - - def toElement(self, sourceEncoding): - p = Element("P") - appendTextElements(p, self.contents, sourceEncoding) - return p - - - -class LrsTextTag(LrsContainer): - def __init__(self, text, validContents): - LrsContainer.__init__(self, [Text, basestring] + validContents) - if text is not None: - self.append(text) - - - def toLrfContainer(self, lrfWriter, parent): - if hasattr(self, "tagName"): - tagName = self.tagName - else: - tagName = self.__class__.__name__ - - parent.appendLrfTag(LrfTag(tagName)) - - for content in self.contents: - content.toLrfContainer(lrfWriter, parent) - - parent.appendLrfTag(LrfTag(tagName + "End")) - - - def toElement(self, se): - if hasattr(self, "tagName"): - tagName = self.tagName - else: - tagName = self.__class__.__name__ - - p = Element(tagName) - appendTextElements(p, self.contents, se) - return p - - -class LrsSimpleChar1(object): - def isEmpty(self): - for content in self.contents: - if not content.isEmpty(): - return False - return True - - def hasFollowingContent(self): - foundSelf = False - for content in self.parent.contents: - if content == self: - foundSelf = True - elif foundSelf: - if not content.isEmpty(): - return True - return False - - -class DropCaps(LrsTextTag): - - def __init__(self, line=1): - LrsTextTag.__init__(self, None, [LrsSimpleChar1]) - if int(line) <= 0: - raise LrsError('A DrawChar must span at least one line.') - self.line = int(line) - - def isEmpty(self): - return self.text == None or not self.text.strip() - - def toElement(self, se): - elem = Element('DrawChar', line=str(self.line)) - appendTextElements(elem, self.contents, se) - return elem - - def toLrfContainer(self, lrfWriter, parent): - parent.appendLrfTag(LrfTag('DrawChar', (int(self.line),))) - - for content in self.contents: - content.toLrfContainer(lrfWriter, parent) - - parent.appendLrfTag(LrfTag("DrawCharEnd")) - - - -class Button(LrsObject, LrsContainer): - def __init__(self, **settings): - LrsObject.__init__(self, **settings) - LrsContainer.__init__(self, [PushButton]) - - def findJumpToRefs(self): - for sub1 in self.contents: - if isinstance(sub1, PushButton): - for sub2 in sub1.contents: - if isinstance(sub2, JumpTo): - return (sub2.textBlock.objId, sub2.textBlock.parent.objId) - raise LrsError, "%s has no PushButton or JumpTo subs"%self.__class__.__name__ - - def toLrf(self, lrfWriter): - (refobj, refpage) = self.findJumpToRefs() - # print "Button writing JumpTo refobj=", jumpto.refobj, ", and refpage=", jumpto.refpage - button = LrfObject("Button", self.objId) - button.appendLrfTag(LrfTag("buttonflags", 0x10)) # pushbutton - button.appendLrfTag(LrfTag("PushButtonStart")) - button.appendLrfTag(LrfTag("buttonactions")) - button.appendLrfTag(LrfTag("jumpto", (int(refpage), int(refobj)))) - button.append(LrfTag("endbuttonactions")) - button.appendLrfTag(LrfTag("PushButtonEnd")) - lrfWriter.append(button) - - def toElement(self, se): - b = self.lrsObjectElement("Button") - - for content in self.contents: - b.append(content.toElement(se)) - - return b - -class ButtonBlock(Button): - pass - -class PushButton(LrsContainer): - - def __init__(self, **settings): - LrsContainer.__init__(self, [JumpTo]) - - def toElement(self, se): - b = Element("PushButton") - - for content in self.contents: - b.append(content.toElement(se)) - - return b - -class JumpTo(LrsContainer): - - def __init__(self, textBlock): - LrsContainer.__init__(self, []) - self.textBlock=textBlock - - def setTextBlock(self, textBlock): - self.textBlock = textBlock - - def toElement(self, se): - return Element("JumpTo", refpage=str(self.textBlock.parent.objId), refobj=str(self.textBlock.objId)) - - - - - -class Plot(LrsSimpleChar1, LrsContainer): - - ADJUSTMENT_VALUES = {'center':1, 'baseline':2, 'top':3, 'bottom':4} - - def __init__(self, obj, xsize=0, ysize=0, adjustment=None): - LrsContainer.__init__(self, []) - if obj != None: - self.setObj(obj) - if xsize < 0 or ysize < 0: - raise LrsError('Sizes must be positive semi-definite') - self.xsize = int(xsize) - self.ysize = int(ysize) - if adjustment and adjustment not in Plot.ADJUSTMENT_VALUES.keys(): - raise LrsError('adjustment must be one of' + Plot.ADJUSTMENT_VALUES.keys()) - self.adjustment = adjustment - - def setObj(self, obj): - if not isinstance(obj, (Image, Button)): - raise LrsError('Plot elements can only refer to Image or Button elements') - self.obj = obj - - def getReferencedObjIds(self): - return [self.obj.objId] - - def appendReferencedObjects(self, parent): - if self.obj.parent is None: - parent.append(self.obj) - - def toElement(self, se): - elem = Element('Plot', xsize=str(self.xsize), ysize=str(self.ysize), \ - refobj=str(self.obj.objId)) - if self.adjustment: - elem.set('adjustment', self.adjustment) - return elem - - def toLrfContainer(self, lrfWriter, parent): - adj = self.adjustment if self.adjustment else 'bottom' - params = (int(self.xsize), int(self.ysize), int(self.obj.objId), \ - Plot.ADJUSTMENT_VALUES[adj]) - parent.appendLrfTag(LrfTag("Plot", params)) - -class Text(LrsContainer): - """ A object that represents raw text. Does not have a toElement. """ - def __init__(self, text): - LrsContainer.__init__(self, []) - self.text = text - - def isEmpty(self): - return not self.text or not self.text.strip() - - def toLrfContainer(self, lrfWriter, parent): - if self.text: - if isinstance(self.text, str): - parent.appendLrfTag(LrfTag("rawtext", self.text)) - else: - parent.appendLrfTag(LrfTag("textstring", self.text)) - - -class CR(LrsSimpleChar1, LrsContainer): - """ - A line break (when appended to a Paragraph) or a paragraph break - (when appended to a TextBlock). - """ - def __init__(self): - LrsContainer.__init__(self, []) - - - def toElement(self, se): - return Element("CR") - - - def toLrfContainer(self, lrfWriter, parent): - parent.appendLrfTag(LrfTag("CR")) - - - -class Italic(LrsSimpleChar1, LrsTextTag): - def __init__(self, text=None): - LrsTextTag.__init__(self, text, [LrsSimpleChar1]) - -class Sub(LrsSimpleChar1, LrsTextTag): - def __init__(self, text=None): - LrsTextTag.__init__(self, text, []) - - - -class Sup(LrsSimpleChar1, LrsTextTag): - def __init__(self, text=None): - LrsTextTag.__init__(self, text, []) - - - -class NoBR(LrsSimpleChar1, LrsTextTag): - def __init__(self, text=None): - LrsTextTag.__init__(self, text, [LrsSimpleChar1]) - - -class Space(LrsSimpleChar1, LrsContainer): - def __init__(self, xsize=0, x=0): - LrsContainer.__init__(self, []) - if xsize == 0 and x != 0: xsize = x - self.xsize = xsize - - - def toElement(self, se): - if self.xsize == 0: - return - - return Element("Space", xsize=str(self.xsize)) - - - def toLrfContainer(self, lrfWriter, container): - if self.xsize != 0: - container.appendLrfTag(LrfTag("Space", self.xsize)) - - -class Box(LrsSimpleChar1, LrsContainer): - """ - Draw a box around text. Unfortunately, does not seem to do - anything on the PRS-500. - """ - def __init__(self, linetype="solid"): - LrsContainer.__init__(self, [Text, basestring]) - if linetype not in LINE_TYPE_ENCODING: - raise LrsError, linetype + " is not a valid line type" - self.linetype = linetype - - - def toElement(self, se): - e = Element("Box", linetype=self.linetype) - appendTextElements(e, self.contents, se) - return e - - - def toLrfContainer(self, lrfWriter, container): - container.appendLrfTag(LrfTag("Box", self.linetype)) - for content in self.contents: - content.toLrfContainer(lrfWriter, container) - container.appendLrfTag(LrfTag("BoxEnd")) - - - - -class Span(LrsSimpleChar1, LrsContainer): - def __init__(self, text=None, **attrs): - LrsContainer.__init__(self, [LrsSimpleChar1, Text, basestring]) - if text is not None: - if isinstance(text, basestring): - text = Text(text) - self.append(text) - - for attrname in attrs.keys(): - if attrname not in TextStyle.defaults and \ - attrname not in TextStyle.alsoAllow: - raise LrsError, "setting %s not allowed on Span" % attrname - self.attrs = attrs - - - def findCurrentTextStyle(self): - parent = self.parent - while 1: - if parent is None or hasattr(parent, "currentTextStyle"): - break - parent = parent.parent - - if parent is None: - raise LrsError, "no enclosing current TextStyle found" - - return parent.currentTextStyle - - - - def toLrfContainer(self, lrfWriter, container): - - # find the currentTextStyle - oldTextStyle = self.findCurrentTextStyle() - - # set the attributes we want changed - for (name, value) in self.attrs.items(): - if name in oldTextStyle.attrs and oldTextStyle.attrs[name] == self.attrs[name]: - self.attrs.pop(name) - else: - container.appendLrfTag(LrfTag(name, value)) - - # set a currentTextStyle so nested span can put things back - oldTextStyle = self.findCurrentTextStyle() - self.currentTextStyle = oldTextStyle.copy() - self.currentTextStyle.attrs.update(self.attrs) - - for content in self.contents: - content.toLrfContainer(lrfWriter, container) - - # put the attributes back the way we found them - # the attributes persist beyond the next

- # if self.hasFollowingContent(): - for name in self.attrs.keys(): - container.appendLrfTag(LrfTag(name, oldTextStyle.attrs[name])) - - - def toElement(self, se): - element = Element('Span') - for (key, value) in self.attrs.items(): - element.set(key, str(value)) - - appendTextElements(element, self.contents, se) - return element - -class EmpLine(LrsTextTag, LrsSimpleChar1): - emplinetypes = ['none', 'solid', 'dotted', 'dashed', 'double'] - emplinepositions = ['before', 'after'] - - def __init__(self, text=None, emplineposition='before', emplinetype='solid'): - LrsTextTag.__init__(self, text, [LrsSimpleChar1]) - if emplineposition not in self.__class__.emplinepositions: - raise LrsError('emplineposition for an EmpLine must be one of: '+str(self.__class__.emplinepositions)) - if emplinetype not in self.__class__.emplinetypes: - raise LrsError('emplinetype for an EmpLine must be one of: '+str(self.__class__.emplinetypes)) - - self.emplinetype = emplinetype - self.emplineposition = emplineposition - - - - def toLrfContainer(self, lrfWriter, parent): - parent.appendLrfTag(LrfTag(self.__class__.__name__, (self.emplineposition, self.emplinetype))) - parent.appendLrfTag(LrfTag('emplineposition', self.emplineposition)) - parent.appendLrfTag(LrfTag('emplinetype', self.emplinetype)) - for content in self.contents: - content.toLrfContainer(lrfWriter, parent) - - parent.appendLrfTag(LrfTag(self.__class__.__name__ + "End")) - - def toElement(self, se): - element = Element(self.__class__.__name__) - element.set('emplineposition', self.emplineposition) - element.set('emplinetype', self.emplinetype) - - appendTextElements(element, self.contents, se) - return element - -class Bold(Span): - """ - There is no known "bold" lrf tag. Use Span with a fontweight in LRF, - but use the word Bold in the LRS. - """ - def __init__(self, text=None): - Span.__init__(self, text, fontweight=800) - - def toElement(self, se): - e = Element("Bold") - appendTextElements(e, self.contents, se) - return e - - -class BlockSpace(LrsContainer): - """ Can be appended to a page to move the text point. """ - def __init__(self, xspace=0, yspace=0, x=0, y=0): - LrsContainer.__init__(self, []) - if xspace == 0 and x != 0: - xspace = x - if yspace == 0 and y != 0: - yspace = y - self.xspace = xspace - self.yspace = yspace - - - def toLrfContainer(self, lrfWriter, container): - if self.xspace != 0: - container.appendLrfTag(LrfTag("xspace", self.xspace)) - if self.yspace != 0: - container.appendLrfTag(LrfTag("yspace", self.yspace)) - - - def toElement(self, se): - element = Element("BlockSpace") - - if self.xspace != 0: - element.attrib["xspace"] = str(self.xspace) - if self.yspace != 0: - element.attrib["yspace"] = str(self.yspace) - - return element - - - -class CharButton(LrsSimpleChar1, LrsContainer): - """ - Define the text and target of a CharButton. Must be passed a - JumpButton that is the destination of the CharButton. - - Only text or SimpleChars can be appended to the CharButton. - """ - def __init__(self, button, text=None): - LrsContainer.__init__(self, [basestring, Text, LrsSimpleChar1]) - self.button = None - if button != None: - self.setButton(button) - - if text is not None: - self.append(text) - - def setButton(self, button): - if not isinstance(button, (JumpButton, Button)): - raise LrsError, "CharButton button must be a JumpButton or Button" - - self.button = button - - - def appendReferencedObjects(self, parent): - if self.button.parent is None: - parent.append(self.button) - - - def getReferencedObjIds(self): - return [self.button.objId] - - - def toLrfContainer(self, lrfWriter, container): - container.appendLrfTag(LrfTag("CharButton", self.button.objId)) - - for content in self.contents: - content.toLrfContainer(lrfWriter, container) - - container.appendLrfTag(LrfTag("CharButtonEnd")) - - - def toElement(self, se): - cb = Element("CharButton", refobj=str(self.button.objId)) - appendTextElements(cb, self.contents, se) - return cb - - - -class Objects(LrsContainer): - def __init__(self): - LrsContainer.__init__(self, [JumpButton, TextBlock, HeaderOrFooter, - ImageStream, Image, ImageBlock, Button, ButtonBlock]) - self.appendJumpButton = self.appendTextBlock = self.appendHeader = \ - self.appendFooter = self.appendImageStream = \ - self.appendImage = self.appendImageBlock = self.append - - - def getMethods(self): - return ["JumpButton", "appendJumpButton", "TextBlock", - "appendTextBlock", "Header", "appendHeader", - "Footer", "appendFooter", "ImageBlock", - "ImageStream", "appendImageStream", - 'Image','appendImage', 'appendImageBlock'] - - - def getSettings(self): - return [] - - - def ImageBlock(self, *args, **kwargs): - ib = ImageBlock(*args, **kwargs) - self.append(ib) - return ib - - def JumpButton(self, textBlock): - b = JumpButton(textBlock) - self.append(b) - return b - - - def TextBlock(self, *args, **kwargs): - tb = TextBlock(*args, **kwargs) - self.append(tb) - return tb - - - def Header(self, *args, **kwargs): - h = Header(*args, **kwargs) - self.append(h) - return h - - - def Footer(self, *args, **kwargs): - h = Footer(*args, **kwargs) - self.append(h) - return h - - - def ImageStream(self, *args, **kwargs): - i = ImageStream(*args, **kwargs) - self.append(i) - return i - - def Image(self, *args, **kwargs): - i = Image(*args, **kwargs) - self.append(i) - return i - - def toElement(self, se): - o = Element("Objects") - - for content in self.contents: - o.append(content.toElement(se)) - - return o - - - def toLrf(self, lrfWriter): - for content in self.contents: - content.toLrf(lrfWriter) - - -class JumpButton(LrsObject, LrsContainer): - """ - The target of a CharButton. Needs a parented TextBlock to jump to. - Actually creates several elements in the XML. JumpButtons must - be eventually appended to a Book (actually, an Object.) - """ - def __init__(self, textBlock): - LrsObject.__init__(self) - LrsContainer.__init__(self, []) - self.textBlock = textBlock - - def setTextBlock(self, textBlock): - self.textBlock = textBlock - - def toLrf(self, lrfWriter): - button = LrfObject("Button", self.objId) - button.appendLrfTag(LrfTag("buttonflags", 0x10)) # pushbutton - button.appendLrfTag(LrfTag("PushButtonStart")) - button.appendLrfTag(LrfTag("buttonactions")) - button.appendLrfTag(LrfTag("jumpto", - (self.textBlock.parent.objId, self.textBlock.objId))) - button.append(LrfTag("endbuttonactions")) - button.appendLrfTag(LrfTag("PushButtonEnd")) - lrfWriter.append(button) - - - def toElement(self, se): - b = self.lrsObjectElement("Button") - pb = SubElement(b, "PushButton") - jt = SubElement(pb, "JumpTo", - refpage=str(self.textBlock.parent.objId), - refobj=str(self.textBlock.objId)) - return b - - - -class RuledLine(LrsContainer, LrsAttributes, LrsObject): - """ A line. Default is 500 pixels long, 2 pixels wide. """ - - defaults = dict( - linelength="500", linetype="solid", linewidth="2", - linecolor="0x00000000") - - def __init__(self, **settings): - LrsContainer.__init__(self, []) - LrsAttributes.__init__(self, self.defaults, **settings) - LrsObject.__init__(self) - - - def toLrfContainer(self, lrfWriter, container): - a = self.attrs - container.appendLrfTag(LrfTag("RuledLine", - (a["linelength"], a["linetype"], a["linewidth"], a["linecolor"]))) - - - def toElement(self, se): - return Element("RuledLine", self.attrs) - - - -class HeaderOrFooter(LrsObject, LrsContainer, LrsAttributes): - """ - Creates empty header or footer objects. Append PutObj objects to - the header or footer to create the text. - - Note: it seems that adding multiple PutObjs to a header or footer - only shows the last one. - """ - defaults = dict(framemode="square", layout="LrTb", framewidth="0", - framecolor="0x00000000", bgcolor="0xFF000000") - - def __init__(self, **settings): - LrsObject.__init__(self) - LrsContainer.__init__(self, [PutObj]) - LrsAttributes.__init__(self, self.defaults, **settings) - - def put_object(self, obj, x1, y1): - self.append(PutObj(obj, x1, y1)) - - def PutObj(self, *args, **kwargs): - p = PutObj(*args, **kwargs) - self.append(p) - return p - - - def toLrf(self, lrfWriter): - hd = LrfObject(self.__class__.__name__, self.objId) - hd.appendTagDict(self.attrs) - - stream = LrfTagStream(0) - for content in self.contents: - content.toLrfContainer(lrfWriter, stream) - - hd.appendLrfTags(stream.getStreamTags(lrfWriter.getSourceEncoding())) - lrfWriter.append(hd) - - - def toElement(self, se): - name = self.__class__.__name__ - labelName = name.lower() + "label" - hd = self.lrsObjectElement(name, objlabel=labelName) - hd.attrib.update(self.attrs) - - for content in self.contents: - hd.append(content.toElement(se)) - - return hd - - -class Header(HeaderOrFooter): - pass - - - -class Footer(HeaderOrFooter): - pass - -class Canvas(LrsObject, LrsContainer, LrsAttributes): - defaults = dict(framemode="square", layout="LrTb", framewidth="0", - framecolor="0x00000000", bgcolor="0xFF000000", - canvasheight=0, canvaswidth=0, blockrule='block-adjustable') - - def __init__(self, width, height, **settings): - LrsObject.__init__(self) - LrsContainer.__init__(self, [PutObj]) - LrsAttributes.__init__(self, self.defaults, **settings) - - self.settings = self.defaults.copy() - self.settings.update(settings) - self.settings['canvasheight'] = int(height) - self.settings['canvaswidth'] = int(width) - - def put_object(self, obj, x1, y1): - self.append(PutObj(obj, x1, y1)) - - def toElement(self, source_encoding): - el = self.lrsObjectElement("Canvas", **self.settings) - for po in self.contents: - el.append(po.toElement(source_encoding)) - return el - - def toLrf(self, lrfWriter): - self.toLrfContainer(lrfWriter, lrfWriter) - - - def toLrfContainer(self, lrfWriter, container): - c = LrfObject("Canvas", self.objId) - c.appendTagDict(self.settings) - stream = LrfTagStream(STREAM_COMPRESSED) - for content in self.contents: - content.toLrfContainer(lrfWriter, stream) - if lrfWriter.saveStreamTags: # true only if testing - c.saveStreamTags = stream.tags - - c.appendLrfTags( - stream.getStreamTags(lrfWriter.getSourceEncoding(), - optimizeTags=lrfWriter.optimizeTags, - optimizeCompression=lrfWriter.optimizeCompression)) - container.addLrfObject(c.objId) - lrfWriter.append(c) - - def has_text(self): - return bool(self.contents) - - - -class PutObj(LrsContainer): - """ PutObj holds other objects that are drawn on a Canvas or Header. """ - - def __init__(self, content, x1=0, y1=0): - LrsContainer.__init__(self, [TextBlock, ImageBlock]) - self.content = content - self.x1 = int(x1) - self.y1 = int(y1) - - def setContent(self, content): - self.content = content - - def appendReferencedObjects(self, parent): - if self.content.parent is None: - parent.append(self.content) - - def toLrfContainer(self, lrfWriter, container): - container.appendLrfTag(LrfTag("PutObj", (self.x1, self.y1, - self.content.objId))) - - - def toElement(self, se): - el = Element("PutObj", x1=str(self.x1), y1=str(self.y1), - refobj=str(self.content.objId)) - return el - - - - -class ImageStream(LrsObject, LrsContainer): - """ - Embed an image file into an Lrf. - """ - - VALID_ENCODINGS = [ "JPEG", "GIF", "BMP", "PNG" ] - - def __init__(self, file=None, encoding=None, comment=None): - LrsObject.__init__(self) - LrsContainer.__init__(self, []) - _checkExists(file) - self.filename = file - self.comment = comment - # TODO: move encoding from extension to lrf module - if encoding is None: - extension = os.path.splitext(file)[1] - if not extension: - raise LrsError, \ - "file must have extension if encoding is not specified" - extension = extension[1:].upper() - - if extension == "JPG": - extension = "JPEG" - - encoding = extension - else: - encoding = encoding.upper() - - if encoding not in self.VALID_ENCODINGS: - raise LrsError, \ - "encoding or file extension not JPEG, GIF, BMP, or PNG" - - self.encoding = encoding - - - def toLrf(self, lrfWriter): - imageFile = file(self.filename, "rb") - imageData = imageFile.read() - imageFile.close() - - isObj = LrfObject("ImageStream", self.objId) - if self.comment is not None: - isObj.appendLrfTag(LrfTag("comment", self.comment)) - - streamFlags = IMAGE_TYPE_ENCODING[self.encoding] - stream = LrfStreamBase(streamFlags, imageData) - isObj.appendLrfTags(stream.getStreamTags()) - lrfWriter.append(isObj) - - - def toElement(self, se): - element = self.lrsObjectElement("ImageStream", - objlabel="imagestreamlabel", - encoding=self.encoding, file=self.filename) - element.text = self.comment - return element - -class Image(LrsObject, LrsContainer, LrsAttributes): - - defaults = dict() - - def __init__(self, refstream, x0=0, x1=0, \ - y0=0, y1=0, xsize=0, ysize=0, **settings): - LrsObject.__init__(self) - LrsContainer.__init__(self, []) - LrsAttributes.__init__(self, self.defaults, settings) - self.x0, self.y0, self.x1, self.y1 = int(x0), int(y0), int(x1), int(y1) - self.xsize, self.ysize = int(xsize), int(ysize) - self.setRefstream(refstream) - - def setRefstream(self, refstream): - self.refstream = refstream - - def appendReferencedObjects(self, parent): - if self.refstream.parent is None: - parent.append(self.refstream) - - def getReferencedObjIds(self): - return [self.objId, self.refstream.objId] - - def toElement(self, se): - element = self.lrsObjectElement("Image", **self.attrs) - element.set("refstream", str(self.refstream.objId)) - for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]: - element.set(name, str(getattr(self, name))) - return element - - def toLrf(self, lrfWriter): - ib = LrfObject("Image", self.objId) - ib.appendLrfTag(LrfTag("ImageRect", - (self.x0, self.y0, self.x1, self.y1))) - ib.appendLrfTag(LrfTag("ImageSize", (self.xsize, self.ysize))) - ib.appendLrfTag(LrfTag("RefObjId", self.refstream.objId)) - lrfWriter.append(ib) - - - - - -class ImageBlock(LrsObject, LrsContainer, LrsAttributes): - """ Create an image on a page. """ - # TODO: allow other block attributes - - defaults = BlockStyle.baseDefaults.copy() - - def __init__(self, refstream, x0="0", y0="0", x1="600", y1="800", - xsize="600", ysize="800", - blockStyle=BlockStyle(blockrule='block-fixed'), - alttext=None, **settings): - LrsObject.__init__(self) - LrsContainer.__init__(self, [Text, Image]) - LrsAttributes.__init__(self, self.defaults, **settings) - self.x0, self.y0, self.x1, self.y1 = int(x0), int(y0), int(x1), int(y1) - self.xsize, self.ysize = int(xsize), int(ysize) - self.setRefstream(refstream) - self.blockStyle = blockStyle - self.alttext = alttext - - def setRefstream(self, refstream): - self.refstream = refstream - - def appendReferencedObjects(self, parent): - if self.refstream.parent is None: - parent.append(self.refstream) - - if self.blockStyle is not None and self.blockStyle.parent is None: - parent.append(self.blockStyle) - - - def getReferencedObjIds(self): - objects = [self.objId, self.extraId, self.refstream.objId] - if self.blockStyle is not None: - objects.append(self.blockStyle.objId) - - return objects - - - def toLrf(self, lrfWriter): - self.toLrfContainer(lrfWriter, lrfWriter) - - - def toLrfContainer(self, lrfWriter, container): - # id really belongs to the outer block - - extraId = LrsObject.getNextObjId() - - b = LrfObject("Block", self.objId) - if self.blockStyle is not None: - b.appendLrfTag(LrfTag("Link", self.blockStyle.objId)) - b.appendTagDict(self.attrs) - - b.appendLrfTags( - LrfTagStream(0, - [LrfTag("Link", extraId)]).getStreamTags(lrfWriter.getSourceEncoding())) - container.addLrfObject(b.objId) - lrfWriter.append(b) - - ib = LrfObject("Image", extraId) - - ib.appendLrfTag(LrfTag("ImageRect", - (self.x0, self.y0, self.x1, self.y1))) - ib.appendLrfTag(LrfTag("ImageSize", (self.xsize, self.ysize))) - ib.appendLrfTag(LrfTag("RefObjId", self.refstream.objId)) - if self.alttext: - ib.appendLrfTag("Comment", self.alttext) - - - lrfWriter.append(ib) - self.extraId = extraId - - - def toElement(self, se): - element = self.lrsObjectElement("ImageBlock", **self.attrs) - element.set("refstream", str(self.refstream.objId)) - for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]: - element.set(name, str(getattr(self, name))) - element.text = self.alttext - return element - - - -class Font(LrsContainer): - """ Allows a TrueType file to be embedded in an Lrf. """ - def __init__(self, file=None, fontname=None, fontfilename=None, encoding=None): - LrsContainer.__init__(self, []) - try: - _checkExists(fontfilename) - self.truefile = fontfilename - except: - try: - _checkExists(file) - self.truefile = file - except: - raise LrsError, "neither '%s' nor '%s' exists"%(fontfilename, file) - - self.file = file - self.fontname = fontname - self.fontfilename = fontfilename - self.encoding = encoding - - - def toLrf(self, lrfWriter): - font = LrfObject("Font", LrsObject.getNextObjId()) - lrfWriter.registerFontId(font.objId) - font.appendLrfTag(LrfTag("FontFilename", - lrfWriter.toUnicode(self.truefile))) - font.appendLrfTag(LrfTag("FontFacename", - lrfWriter.toUnicode(self.fontname))) - - stream = LrfFileStream(STREAM_FORCE_COMPRESSED, self.truefile) - font.appendLrfTags(stream.getStreamTags()) - - lrfWriter.append(font) - - - def toElement(self, se): - element = Element("RegistFont", encoding="TTF", fontname=self.fontname, - file=self.file, fontfilename=self.file) - return element +# Copyright (c) 2007 Mike Higgins (Falstaff) +# Modifications from the original: +# Copyright (C) 2007 Kovid Goyal +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +# Current limitations and bugs: +# Bug: Does not check if most setting values are valid unless lrf is created. +# +# Unsupported objects: MiniPage, SimpleTextBlock, Canvas, Window, +# PopUpWindow, Sound, Import, SoundStream, +# ObjectInfo +# +# Does not support background images for blocks or pages. +# +# The only button type supported are JumpButtons. +# +# None of the Japanese language tags are supported. +# +# Other unsupported tags: PageDiv, SoundStop, Wait, pos, +# Plot, Image (outside of ImageBlock), +# EmpLine, EmpDots + +import os, re, codecs, operator +from xml.sax.saxutils import escape +from datetime import date +try: + from elementtree.ElementTree import (Element, SubElement) + Element, SubElement +except ImportError: + from xml.etree.ElementTree import (Element, SubElement) + +from elements import ElementWriter +from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc, + STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING, + BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream, + STREAM_FORCE_COMPRESSED) + +DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set +DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs + +from calibre import __appname__, __version__ +from calibre import entity_to_unicode + +class LrsError(Exception): + pass + +class ContentError(Exception): + pass + +def _checkExists(filename): + if not os.path.exists(filename): + raise LrsError, "file '%s' not found" % filename + + +def _formatXml(root): + """ A helper to make the LRS output look nicer. """ + for elem in root.getiterator(): + if len(elem) > 0 and (not elem.text or not elem.text.strip()): + elem.text = "\n" + if not elem.tail or not elem.tail.strip(): + elem.tail = "\n" + + + +def ElementWithText(tag, text, **extra): + """ A shorthand function to create Elements with text. """ + e = Element(tag, **extra) + e.text = text + return e + + + +def ElementWithReading(tag, text, reading=False): + """ A helper function that creates reading attributes. """ + + # note: old lrs2lrf parser only allows reading = "" + + if text is None: + readingText = "" + elif isinstance(text, basestring): + readingText = text + else: + # assumed to be a sequence of (name, sortas) + readingText = text[1] + text = text[0] + + + if not reading: + readingText = "" + return ElementWithText(tag, text, reading=readingText) + + + +def appendTextElements(e, contentsList, se): + """ A helper function to convert text streams into the proper elements. """ + + def uconcat(text, newText, se): + if type(newText) != type(text): + if type(text) is str: + text = text.decode(se) + else: + newText = newText.decode(se) + + return text + newText + + + e.text = "" + lastElement = None + + for content in contentsList: + if not isinstance(content, Text): + newElement = content.toElement(se) + if newElement is None: + continue + lastElement = newElement + lastElement.tail = "" + e.append(lastElement) + else: + if lastElement is None: + e.text = uconcat(e.text, content.text, se) + else: + lastElement.tail = uconcat(lastElement.tail, content.text, se) + + + +class Delegator(object): + """ A mixin class to create delegated methods that create elements. """ + def __init__(self, delegates): + self.delegates = delegates + self.delegatedMethods = [] + #self.delegatedSettingsDict = {} + #self.delegatedSettings = [] + for d in delegates: + d.parent = self + methods = d.getMethods() + self.delegatedMethods += methods + for m in methods: + setattr(self, m, getattr(d, m)) + + """ + for setting in d.getSettings(): + if isinstance(setting, basestring): + setting = (d, setting) + delegates = \ + self.delegatedSettingsDict.setdefault(setting[1], []) + delegates.append(setting[0]) + self.delegatedSettings.append(setting) + """ + + + def applySetting(self, name, value, testValid=False): + applied = False + if name in self.getSettings(): + setattr(self, name, value) + applied = True + + for d in self.delegates: + if hasattr(d, "applySetting"): + applied = applied or d.applySetting(name, value) + else: + if name in d.getSettings(): + setattr(d, name, value) + applied = True + + if testValid and not applied: + raise LrsError, "setting %s not valid" % name + + return applied + + + def applySettings(self, settings, testValid=False): + for (setting, value) in settings.items(): + self.applySetting(setting, value, testValid) + """ + if setting not in self.delegatedSettingsDict: + raise LrsError, "setting %s not valid" % setting + delegates = self.delegatedSettingsDict[setting] + for d in delegates: + setattr(d, setting, value) + """ + + + def appendDelegates(self, element, sourceEncoding): + for d in self.delegates: + e = d.toElement(sourceEncoding) + if e is not None: + if isinstance(e, list): + for e1 in e: element.append(e1) + else: + element.append(e) + + + def appendReferencedObjects(self, parent): + for d in self.delegates: + d.appendReferencedObjects(parent) + + + def getMethods(self): + return self.delegatedMethods + + + def getSettings(self): + return [] + + + def toLrfDelegates(self, lrfWriter): + for d in self.delegates: + d.toLrf(lrfWriter) + + + def toLrf(self, lrfWriter): + self.toLrfDelegates(lrfWriter) + + + +class LrsAttributes(object): + """ A mixin class to handle default and user supplied attributes. """ + def __init__(self, defaults, alsoAllow=None, **settings): + if alsoAllow is None: + alsoAllow = [] + self.attrs = defaults.copy() + for (name, value) in settings.items(): + if name not in self.attrs and name not in alsoAllow: + raise LrsError, "%s does not support setting %s" % \ + (self.__class__.__name__, name) + if type(value) is int: + value = str(value) + self.attrs[name] = value + + + +class LrsContainer(object): + """ This class is a mixin class for elements that are contained in or + contain an unknown number of other elements. + """ + def __init__(self, validChildren): + self.parent = None + self.contents = [] + self.validChildren = validChildren + self.must_append = False #: If True even an empty container is appended by append_to + + def has_text(self): + ''' Return True iff this container has non whitespace text ''' + if hasattr(self, 'text'): + if self.text.strip(): + return True + if hasattr(self, 'contents'): + for child in self.contents: + if child.has_text(): + return True + for item in self.contents: + if isinstance(item, (Plot, ImageBlock, Canvas, CR)): + return True + return False + + def append_to(self, parent): + ''' + Append self to C{parent} iff self has non whitespace textual content + @type parent: LrsContainer + ''' + if self.contents or self.must_append: + parent.append(self) + + + def appendReferencedObjects(self, parent): + for c in self.contents: + c.appendReferencedObjects(parent) + + + def setParent(self, parent): + if self.parent is not None: + raise LrsError, "object already has parent" + + self.parent = parent + + + def append(self, content, convertText=True): + """ + Appends valid objects to container. Can auto-covert text strings + to Text objects. + """ + for validChild in self.validChildren: + if isinstance(content, validChild): + break + else: + raise LrsError, "can't append %s to %s" % \ + (content.__class__.__name__, + self.__class__.__name__) + + if convertText and isinstance(content, basestring): + content = Text(content) + + content.setParent(self) + + if isinstance(content, LrsObject): + content.assignId() + + self.contents.append(content) + return self + + def get_all(self, predicate=lambda x: x): + for child in self.contents: + if predicate(child): + yield child + if hasattr(child, 'get_all'): + for grandchild in child.get_all(predicate): + yield grandchild + + + +class LrsObject(object): + """ A mixin class for elements that need an object id. """ + nextObjId = 0 + + @classmethod + def getNextObjId(selfClass): + selfClass.nextObjId += 1 + return selfClass.nextObjId + + def __init__(self, assignId=False): + if assignId: + self.objId = LrsObject.getNextObjId() + else: + self.objId = 0 + + + def assignId(self): + if self.objId != 0: + raise LrsError, "id already assigned to " + self.__class__.__name__ + + self.objId = LrsObject.getNextObjId() + + + def lrsObjectElement(self, name, objlabel="objlabel", labelName=None, + labelDecorate=True, **settings): + element = Element(name) + element.attrib["objid"] = str(self.objId) + if labelName is None: + labelName = name + if labelDecorate: + label = "%s.%d" % (labelName, self.objId) + else: + label = str(self.objId) + element.attrib[objlabel] = label + element.attrib.update(settings) + return element + + + +class Book(Delegator): + """ + Main class for any lrs or lrf. All objects must be appended to + the Book class in some way or another in order to be rendered as + an LRS or LRF file. + + The following settings are available on the contructor of Book: + + author="book author" or author=("book author", "sort as") + Author of the book. + + title="book title" or title=("book title", "sort as") + Title of the book. + + sourceencoding="codec" + Gives the assumed encoding for all non-unicode strings. + + + thumbnail="thumbnail file name" + A small (80x80?) graphics file with a thumbnail of the book's cover. + + bookid="book id" + A unique id for the book. + + textstyledefault= + Sets the default values for all TextStyles. + + pagetstyledefault= + Sets the default values for all PageStyles. + + blockstyledefault= + Sets the default values for all BlockStyles. + + booksetting=BookSetting() + Override the default BookSetting. + + setdefault=StyleDefault() + Override the default SetDefault. + + There are several other settings -- see the BookInfo class for more. + """ + + def __init__(self, textstyledefault=None, blockstyledefault=None, + pagestyledefault=None, + optimizeTags=False, + optimizeCompression=False, + **settings): + + self.parent = None # we are the top of the parent chain + + if "thumbnail" in settings: + _checkExists(settings["thumbnail"]) + + # highly experimental -- use with caution + self.optimizeTags = optimizeTags + self.optimizeCompression = optimizeCompression + + pageStyle = PageStyle(**PageStyle.baseDefaults.copy()) + blockStyle = BlockStyle(**BlockStyle.baseDefaults.copy()) + textStyle = TextStyle(**TextStyle.baseDefaults.copy()) + + if textstyledefault is not None: + textStyle.update(textstyledefault) + + if blockstyledefault is not None: + blockStyle.update(blockstyledefault) + + if pagestyledefault is not None: + pageStyle.update(pagestyledefault) + + self.defaultPageStyle = pageStyle + self.defaultTextStyle = textStyle + self.defaultBlockStyle = blockStyle + LrsObject.nextObjId += 1 + + styledefault = StyleDefault() + if settings.has_key('setdefault'): + styledefault = settings.pop('setdefault') + Delegator.__init__(self, [BookInformation(), Main(), + Template(), Style(styledefault), Solos(), Objects()]) + + self.sourceencoding = None + + # apply default settings + self.applySetting("genreading", DEFAULT_GENREADING) + self.applySetting("sourceencoding", DEFAULT_SOURCE_ENCODING) + + self.applySettings(settings, testValid=True) + + self.allow_new_page = True #: If False L{create_page} raises an exception + self.gc_count = 0 + + + def set_title(self, title): + ot = self.delegates[0].delegates[0].delegates[0].title + self.delegates[0].delegates[0].delegates[0].title = (title, ot[1]) + + def set_author(self, author): + ot = self.delegates[0].delegates[0].delegates[0].author + self.delegates[0].delegates[0].delegates[0].author = (author, ot[1]) + + def create_text_style(self, **settings): + ans = TextStyle(**self.defaultTextStyle.attrs.copy()) + ans.update(settings) + return ans + + def create_block_style(self, **settings): + ans = BlockStyle(**self.defaultBlockStyle.attrs.copy()) + ans.update(settings) + return ans + + def create_page_style(self, **settings): + if not self.allow_new_page: + raise ContentError + ans = PageStyle(**self.defaultPageStyle.attrs.copy()) + ans.update(settings) + return ans + + def create_page(self, pageStyle=None, **settings): + ''' + Return a new L{Page}. The page has not been appended to this book. + @param pageStyle: If None the default pagestyle is used. + @type pageStyle: L{PageStyle} + ''' + if not pageStyle: + pageStyle = self.defaultPageStyle + return Page(pageStyle=pageStyle, **settings) + + def create_text_block(self, textStyle=None, blockStyle=None, **settings): + ''' + Return a new L{TextBlock}. The block has not been appended to this + book. + @param textStyle: If None the default text style is used + @type textStyle: L{TextStyle} + @param blockStyle: If None the default block style is used. + @type blockStyle: L{BlockStyle} + ''' + if not textStyle: + textStyle = self.defaultTextStyle + if not blockStyle: + blockStyle = self.defaultBlockStyle + return TextBlock(textStyle=textStyle, blockStyle=blockStyle, **settings) + + def pages(self): + '''Return list of Page objects in this book ''' + ans = [] + for item in self.delegates: + if isinstance(item, Main): + for candidate in item.contents: + if isinstance(candidate, Page): + ans.append(candidate) + break + return ans + + def last_page(self): + '''Return last Page in this book ''' + for item in self.delegates: + if isinstance(item, Main): + temp = list(item.contents) + temp.reverse() + for candidate in temp: + if isinstance(candidate, Page): + return candidate + + def embed_font(self, file, facename): + f = Font(file, facename) + self.append(f) + + def getSettings(self): + return ["sourceencoding"] + + + def append(self, content): + """ Find and invoke the correct appender for this content. """ + + className = content.__class__.__name__ + try: + method = getattr(self, "append" + className) + except AttributeError: + raise LrsError, "can't append %s to Book" % className + + method(content) + + + def rationalize_font_sizes(self, base_font_size=10): + base_font_size *= 10. + main = None + for obj in self.delegates: + if isinstance(obj, Main): + main = obj + break + + fonts = {} + for text in main.get_all(lambda x: isinstance(x, Text)): + fs = base_font_size + ancestor = text.parent + while ancestor: + try: + fs = int(ancestor.attrs['fontsize']) + break + except (AttributeError, KeyError): + pass + try: + fs = int(ancestor.textSettings['fontsize']) + break + except (AttributeError, KeyError): + pass + try: + fs = int(ancestor.textStyle.attrs['fontsize']) + break + except (AttributeError, KeyError): + pass + ancestor = ancestor.parent + length = len(text.text) + fonts[fs] = fonts.get(fs, 0) + length + if not fonts: + print 'WARNING: LRF seems to have no textual content. Cannot rationalize font sizes.' + return + + old_base_font_size = float(max(fonts.items(), key=operator.itemgetter(1))[0]) + factor = base_font_size / old_base_font_size + def rescale(old): + return str(int(int(old) * factor)) + + text_blocks = list(main.get_all(lambda x: isinstance(x, TextBlock))) + for tb in text_blocks: + if tb.textSettings.has_key('fontsize'): + tb.textSettings['fontsize'] = rescale(tb.textSettings['fontsize']) + for span in tb.get_all(lambda x: isinstance(x, Span)): + if span.attrs.has_key('fontsize'): + span.attrs['fontsize'] = rescale(span.attrs['fontsize']) + if span.attrs.has_key('baselineskip'): + span.attrs['baselineskip'] = rescale(span.attrs['baselineskip']) + + text_styles = set(tb.textStyle for tb in text_blocks) + for ts in text_styles: + ts.attrs['fontsize'] = rescale(ts.attrs['fontsize']) + ts.attrs['baselineskip'] = rescale(ts.attrs['baselineskip']) + + + def renderLrs(self, lrsFile, encoding="UTF-8"): + if isinstance(lrsFile, basestring): + lrsFile = codecs.open(lrsFile, "wb", encoding=encoding) + self.render(lrsFile, outputEncodingName=encoding) + lrsFile.close() + + + def renderLrf(self, lrfFile): + self.appendReferencedObjects(self) + if isinstance(lrfFile, basestring): + lrfFile = file(lrfFile, "wb") + lrfWriter = LrfWriter(self.sourceencoding) + + lrfWriter.optimizeTags = self.optimizeTags + lrfWriter.optimizeCompression = self.optimizeCompression + + self.toLrf(lrfWriter) + lrfWriter.writeFile(lrfFile) + lrfFile.close() + + + def toElement(self, se): + root = Element("BBeBXylog", version="1.0") + root.append(Element("Property")) + self.appendDelegates(root, self.sourceencoding) + return root + + + def render(self, f, outputEncodingName='UTF-8'): + """ Write the book as an LRS to file f. """ + + self.appendReferencedObjects(self) + + # create the root node, and populate with the parts of the book + + root = self.toElement(self.sourceencoding) + + # now, add some newlines to make it easier to look at + + _formatXml(root) + + writer = ElementWriter(root, header=True, + sourceEncoding=self.sourceencoding, + spaceBeforeClose=False, + outputEncodingName=outputEncodingName) + writer.write(f) + + + +class BookInformation(Delegator): + """ Just a container for the Info and TableOfContents elements. """ + def __init__(self): + Delegator.__init__(self, [Info(), TableOfContents()]) + + + def toElement(self, se): + bi = Element("BookInformation") + self.appendDelegates(bi, se) + return bi + + + +class Info(Delegator): + """ Just a container for the BookInfo and DocInfo elements. """ + def __init__(self): + self.genreading = DEFAULT_GENREADING + Delegator.__init__(self, [BookInfo(), DocInfo()]) + + + def getSettings(self): + return ["genreading"] #+ self.delegatedSettings + + + def toElement(self, se): + info = Element("Info", version="1.1") + info.append( + self.delegates[0].toElement(se, reading="s" in self.genreading)) + info.append(self.delegates[1].toElement(se)) + return info + + + def toLrf(self, lrfWriter): + # this info is set in XML form in the LRF + info = Element("Info", version="1.1") + #self.appendDelegates(info) + info.append( + self.delegates[0].toElement(lrfWriter.getSourceEncoding(), reading="f" in self.genreading)) + info.append(self.delegates[1].toElement(lrfWriter.getSourceEncoding())) + + # look for the thumbnail file and get the filename + tnail = info.find("DocInfo/CThumbnail") + if tnail is not None: + lrfWriter.setThumbnailFile(tnail.get("file")) + # does not work: info.remove(tnail) + + + _formatXml(info) + + # fix up the doc info to match the LRF format + # NB: generates an encoding attribute, which lrs2lrf does not + xmlInfo = ElementWriter(info, header=True, sourceEncoding=lrfWriter.getSourceEncoding(), + spaceBeforeClose=False).toString() + + xmlInfo = re.sub(r"\n", "", xmlInfo) + xmlInfo = xmlInfo.replace("SumPage>", "Page>") + lrfWriter.docInfoXml = xmlInfo + + + +class TableOfContents(object): + def __init__(self): + self.tocEntries = [] + + + def appendReferencedObjects(self, parent): + pass + + + def getMethods(self): + return ["addTocEntry"] + + + def getSettings(self): + return [] + + + def addTocEntry(self, tocLabel, textBlock): + if not isinstance(textBlock, (Canvas, TextBlock, ImageBlock, RuledLine)): + raise LrsError, "TOC destination must be a Canvas, TextBlock, ImageBlock or RuledLine"+\ + " not a " + str(type(textBlock)) + + if textBlock.parent is None: + raise LrsError, "TOC text block must be already appended to a page" + + if False and textBlock.parent.parent is None: + raise LrsError, \ + "TOC destination page must be already appended to a book" + + if not hasattr(textBlock.parent, 'objId'): + raise LrsError, "TOC destination must be appended to a container with an objID" + + for tl in self.tocEntries: + if tl.label == tocLabel and tl.textBlock == textBlock: + return + + self.tocEntries.append(TocLabel(tocLabel, textBlock)) + textBlock.tocLabel = tocLabel + + + def toElement(self, se): + if len(self.tocEntries) == 0: + return None + + toc = Element("TOC") + + for t in self.tocEntries: + toc.append(t.toElement(se)) + + return toc + + + def toLrf(self, lrfWriter): + if len(self.tocEntries) == 0: + return + + toc = [] + for t in self.tocEntries: + toc.append((t.textBlock.parent.objId, t.textBlock.objId, t.label)) + + lrfToc = LrfToc(LrsObject.getNextObjId(), toc, lrfWriter.getSourceEncoding()) + lrfWriter.append(lrfToc) + lrfWriter.setTocObject(lrfToc) + + + +class TocLabel(object): + def __init__(self, label, textBlock): + self.label = escape(re.sub(r'&(\S+?);', entity_to_unicode, label)) + self.textBlock = textBlock + + + def toElement(self, se): + return ElementWithText("TocLabel", self.label, + refobj=str(self.textBlock.objId), + refpage=str(self.textBlock.parent.objId)) + + + +class BookInfo(object): + def __init__(self): + self.title = "Untitled" + self.author = "Anonymous" + self.bookid = None + self.pi = None + self.isbn = None + self.publisher = None + self.freetext = "\n\n" + self.label = None + self.category = None + self.classification = None + + def appendReferencedObjects(self, parent): + pass + + + def getMethods(self): + return [] + + + def getSettings(self): + return ["author", "title", "bookid", "isbn", "publisher", + "freetext", "label", "category", "classification"] + + + def _appendISBN(self, bi): + pi = Element("ProductIdentifier") + isbnElement = ElementWithText("ISBNPrintable", self.isbn) + isbnValueElement = ElementWithText("ISBNValue", + self.isbn.replace("-", "")) + + pi.append(isbnElement) + pi.append(isbnValueElement) + bi.append(pi) + + + def toElement(self, se, reading=True): + bi = Element("BookInfo") + bi.append(ElementWithReading("Title", self.title, reading=reading)) + bi.append(ElementWithReading("Author", self.author, reading=reading)) + bi.append(ElementWithText("BookID", self.bookid)) + if self.isbn is not None: + self._appendISBN(bi) + + if self.publisher is not None: + bi.append(ElementWithReading("Publisher", self.publisher)) + + bi.append(ElementWithReading("Label", self.label, reading=reading)) + bi.append(ElementWithText("Category", self.category)) + bi.append(ElementWithText("Classification", self.classification)) + bi.append(ElementWithText("FreeText", self.freetext)) + return bi + + + +class DocInfo(object): + def __init__(self): + self.thumbnail = None + self.language = "en" + self.creator = None + self.creationdate = date.today().isoformat() + self.producer = "%s v%s"%(__appname__, __version__) + self.numberofpages = "0" + + + def appendReferencedObjects(self, parent): + pass + + + def getMethods(self): + return [] + + + def getSettings(self): + return ["thumbnail", "language", "creator", "creationdate", + "producer", "numberofpages"] + + + def toElement(self, se): + docInfo = Element("DocInfo") + + if self.thumbnail is not None: + docInfo.append(Element("CThumbnail", file=self.thumbnail)) + + docInfo.append(ElementWithText("Language", self.language)) + docInfo.append(ElementWithText("Creator", self.creator)) + docInfo.append(ElementWithText("CreationDate", self.creationdate)) + docInfo.append(ElementWithText("Producer", self.producer)) + docInfo.append(ElementWithText("SumPage", str(self.numberofpages))) + return docInfo + + + +class Main(LrsContainer): + def __init__(self): + LrsContainer.__init__(self, [Page]) + + + def getMethods(self): + return ["appendPage", "Page"] + + + def getSettings(self): + return [] + + + def Page(self, *args, **kwargs): + p = Page(*args, **kwargs) + self.append(p) + return p + + + def appendPage(self, page): + self.append(page) + + + def toElement(self, sourceEncoding): + main = Element(self.__class__.__name__) + + for page in self.contents: + main.append(page.toElement(sourceEncoding)) + + return main + + + def toLrf(self, lrfWriter): + pageIds = [] + + # set this id now so that pages can see it + pageTreeId = LrsObject.getNextObjId() + lrfWriter.setPageTreeId(pageTreeId) + + # create a list of all the page object ids while dumping the pages + + for p in self.contents: + pageIds.append(p.objId) + p.toLrf(lrfWriter) + + # create a page tree object + + pageTree = LrfObject("PageTree", pageTreeId) + pageTree.appendLrfTag(LrfTag("PageList", pageIds)) + + lrfWriter.append(pageTree) + + + +class Solos(LrsContainer): + def __init__(self): + LrsContainer.__init__(self, [Solo]) + + + def getMethods(self): + return ["appendSolo", "Solo"] + + + def getSettings(self): + return [] + + + def Solo(self, *args, **kwargs): + p = Solo(*args, **kwargs) + self.append(p) + return p + + + def appendSolo(self, solo): + self.append(solo) + + + def toLrf(self, lrfWriter): + for s in self.contents: + s.toLrf(lrfWriter) + + + def toElement(self, se): + solos = [] + for s in self.contents: + solos.append(s.toElement(se)) + + if len(solos) == 0: + return None + + + return solos + + + +class Solo(Main): + pass + + +class Template(object): + """ Does nothing that I know of. """ + + def appendReferencedObjects(self, parent): + pass + + + def getMethods(self): + return [] + + + def getSettings(self): + return [] + + + def toElement(self, se): + t = Element("Template") + t.attrib["version"] = "1.0" + return t + + def toLrf(self, lrfWriter): + # does nothing + pass + +class StyleDefault(LrsAttributes): + """ + Supply some defaults for all TextBlocks. + The legal values are a subset of what is allowed on a + TextBlock -- ruby, emphasis, and waitprop settings. + """ + defaults = dict(rubyalign="start", rubyadjust="none", + rubyoverhang="none", empdotsposition="before", + empdotsfontname="Dutch801 Rm BT Roman", + empdotscode="0x002e", emplineposition="after", + emplinetype = "solid", setwaitprop="noreplay") + + alsoAllow = ["refempdotsfont", "rubyAlignAndAdjust"] + + def __init__(self, **settings): + LrsAttributes.__init__(self, self.defaults, + alsoAllow=self.alsoAllow, **settings) + + + def toElement(self, se): + return Element("SetDefault", self.attrs) + + +class Style(LrsContainer, Delegator): + def __init__(self, styledefault=StyleDefault()): + LrsContainer.__init__(self, [PageStyle, TextStyle, BlockStyle]) + Delegator.__init__(self, [BookStyle(styledefault=styledefault)]) + self.bookStyle = self.delegates[0] + self.appendPageStyle = self.appendTextStyle = \ + self.appendBlockStyle = self.append + + + def appendReferencedObjects(self, parent): + LrsContainer.appendReferencedObjects(self, parent) + + + def getMethods(self): + return ["PageStyle", "TextStyle", "BlockStyle", + "appendPageStyle", "appendTextStyle", "appendBlockStyle"] + \ + self.delegatedMethods + + def getSettings(self): + return [(self.bookStyle, x) for x in self.bookStyle.getSettings()] + + + def PageStyle(self, *args, **kwargs): + ps = PageStyle(*args, **kwargs) + self.append(ps) + return ps + + + def TextStyle(self, *args, **kwargs): + ts = TextStyle(*args, **kwargs) + self.append(ts) + return ts + + + def BlockStyle(self, *args, **kwargs): + bs = BlockStyle(*args, **kwargs) + self.append(bs) + return bs + + + def toElement(self, se): + style = Element("Style") + style.append(self.bookStyle.toElement(se)) + + for content in self.contents: + style.append(content.toElement(se)) + + return style + + + def toLrf(self, lrfWriter): + self.bookStyle.toLrf(lrfWriter) + + for s in self.contents: + s.toLrf(lrfWriter) + + + +class BookStyle(LrsObject, LrsContainer): + def __init__(self, styledefault=StyleDefault()): + LrsObject.__init__(self, assignId=True) + LrsContainer.__init__(self, [Font]) + self.styledefault = styledefault + self.booksetting = BookSetting() + self.appendFont = self.append + + + def getSettings(self): + return ["styledefault", "booksetting"] + + + def getMethods(self): + return ["Font", "appendFont"] + + + def Font(self, *args, **kwargs): + f = Font(*args, **kwargs) + self.append(f) + return + + + def toElement(self, se): + bookStyle = self.lrsObjectElement("BookStyle", objlabel="stylelabel", + labelDecorate=False) + bookStyle.append(self.styledefault.toElement(se)) + bookStyle.append(self.booksetting.toElement(se)) + for font in self.contents: + bookStyle.append(font.toElement(se)) + + return bookStyle + + + def toLrf(self, lrfWriter): + bookAtr = LrfObject("BookAtr", self.objId) + bookAtr.appendLrfTag(LrfTag("ChildPageTree", lrfWriter.getPageTreeId())) + bookAtr.appendTagDict(self.styledefault.attrs) + + self.booksetting.toLrf(lrfWriter) + + lrfWriter.append(bookAtr) + lrfWriter.setRootObject(bookAtr) + + for font in self.contents: + font.toLrf(lrfWriter) + + + + + + +class BookSetting(LrsAttributes): + def __init__(self, **settings): + defaults = dict(bindingdirection="Lr", dpi="1660", + screenheight="800", screenwidth="600", colordepth="24") + LrsAttributes.__init__(self, defaults, **settings) + + + def toLrf(self, lrfWriter): + a = self.attrs + lrfWriter.dpi = int(a["dpi"]) + lrfWriter.bindingdirection = \ + BINDING_DIRECTION_ENCODING[a["bindingdirection"]] + lrfWriter.height = int(a["screenheight"]) + lrfWriter.width = int(a["screenwidth"]) + lrfWriter.colorDepth = int(a["colordepth"]) + + def toElement(self, se): + return Element("BookSetting", self.attrs) + + + +class LrsStyle(LrsObject, LrsAttributes, LrsContainer): + """ A mixin class for styles. """ + def __init__(self, elementName, defaults=None, alsoAllow=None, **overrides): + if defaults is None: + defaults = {} + + LrsObject.__init__(self) + LrsAttributes.__init__(self, defaults, alsoAllow=alsoAllow, **overrides) + LrsContainer.__init__(self, []) + self.elementName = elementName + self.objectsAppended = False + #self.label = "%s.%d" % (elementName, self.objId) + #self.label = str(self.objId) + #self.parent = None + + + def update(self, settings): + for name, value in settings.items(): + if name not in self.__class__.validSettings: + raise LrsError, "%s not a valid setting for %s" % \ + (name, self.__class__.__name__) + self.attrs[name] = value + + def getLabel(self): + return str(self.objId) + + + def toElement(self, se): + element = Element(self.elementName, stylelabel=self.getLabel(), + objid=str(self.objId)) + element.attrib.update(self.attrs) + return element + + + def toLrf(self, lrfWriter): + obj = LrfObject(self.elementName, self.objId) + obj.appendTagDict(self.attrs, self.__class__.__name__) + lrfWriter.append(obj) + + def __eq__(self, other): + if hasattr(other, 'attrs'): + return self.__class__ == other.__class__ and self.attrs == other.attrs + return False + +class TextStyle(LrsStyle): + """ + The text style of a TextBlock. Default is 10 pt. Times Roman. + + Setting Value Default + -------- ----- ------- + align "head","center","foot" "head" (left aligned) + baselineskip points * 10 120 (12 pt. distance between + bottoms of lines) + fontsize points * 10 100 (10 pt.) + fontweight 1 to 1000 400 (normal, 800 is bold) + fontwidth points * 10 or -10 -10 (use values from font) + linespace points * 10 10 (min space btw. lines?) + wordspace points * 10 25 (min space btw. each word) + + """ + baseDefaults = dict( + columnsep="0", charspace="0", + textlinewidth="2", align="head", linecolor="0x00000000", + column="1", fontsize="100", fontwidth="-10", fontescapement="0", + fontorientation="0", fontweight="400", + fontfacename="Dutch801 Rm BT Roman", + textcolor="0x00000000", wordspace="25", letterspace="0", + baselineskip="120", linespace="10", parindent="0", parskip="0", + textbgcolor="0xFF000000") + + alsoAllow = ["empdotscode", "empdotsfontname", "refempdotsfont", + "rubyadjust", "rubyalign", "rubyoverhang", + "empdotsposition", 'emplinetype', 'emplineposition'] + + validSettings = baseDefaults.keys() + alsoAllow + + defaults = baseDefaults.copy() + + def __init__(self, **overrides): + LrsStyle.__init__(self, "TextStyle", self.defaults, + alsoAllow=self.alsoAllow, **overrides) + + def copy(self): + tb = TextStyle() + tb.attrs = self.attrs.copy() + return tb + + + +class BlockStyle(LrsStyle): + """ + The block style of a TextBlock. Default is an expandable 560 pixel + wide area with no space for headers or footers. + + Setting Value Default + -------- ----- ------- + blockwidth pixels 560 + sidemargin pixels 0 + """ + + baseDefaults = dict( + bgimagemode="fix", framemode="square", blockwidth="560", + blockheight="100", blockrule="horz-adjustable", layout="LrTb", + framewidth="0", framecolor="0x00000000", topskip="0", + sidemargin="0", footskip="0", bgcolor="0xFF000000") + + validSettings = baseDefaults.keys() + defaults = baseDefaults.copy() + + def __init__(self, **overrides): + LrsStyle.__init__(self, "BlockStyle", self.defaults, **overrides) + + def copy(self): + tb = BlockStyle() + tb.attrs = self.attrs.copy() + return tb + + + +class PageStyle(LrsStyle): + """ + Setting Value Default + -------- ----- ------- + evensidemargin pixels 20 + oddsidemargin pixels 20 + topmargin pixels 20 + """ + baseDefaults = dict( + topmargin="20", headheight="0", headsep="0", + oddsidemargin="20", textheight="747", textwidth="575", + footspace="0", evensidemargin="20", footheight="0", + layout="LrTb", bgimagemode="fix", pageposition="any", + setwaitprop="noreplay", setemptyview="show") + + alsoAllow = ["header", "evenheader", "oddheader", + "footer", "evenfooter", "oddfooter"] + + validSettings = baseDefaults.keys() + alsoAllow + defaults = baseDefaults.copy() + + @classmethod + def translateHeaderAndFooter(selfClass, parent, settings): + selfClass._fixup(parent, "header", settings) + selfClass._fixup(parent, "footer", settings) + + + @classmethod + def _fixup(selfClass, parent, basename, settings): + evenbase = "even" + basename + oddbase = "odd" + basename + if basename in settings: + baseObj = settings[basename] + del settings[basename] + settings[evenbase] = settings[oddbase] = baseObj + + if evenbase in settings: + evenObj = settings[evenbase] + del settings[evenbase] + if evenObj.parent is None: + parent.append(evenObj) + settings[evenbase + "id"] = str(evenObj.objId) + + if oddbase in settings: + oddObj = settings[oddbase] + del settings[oddbase] + if oddObj.parent is None: + parent.append(oddObj) + settings[oddbase + "id"] = str(oddObj.objId) + + + def appendReferencedObjects(self, parent): + if self.objectsAppended: + return + PageStyle.translateHeaderAndFooter(parent, self.attrs) + self.objectsAppended = True + + + + def __init__(self, **settings): + #self.fixHeaderSettings(settings) + LrsStyle.__init__(self, "PageStyle", self.defaults, + alsoAllow=self.alsoAllow, **settings) + + +class Page(LrsObject, LrsContainer): + """ + Pages are added to Books. Pages can be supplied a PageStyle. + If they are not, Page.defaultPageStyle will be used. + """ + defaultPageStyle = PageStyle() + + def __init__(self, pageStyle=defaultPageStyle, **settings): + LrsObject.__init__(self) + LrsContainer.__init__(self, [TextBlock, BlockSpace, RuledLine, + ImageBlock, Canvas]) + + self.pageStyle = pageStyle + + for settingName in settings.keys(): + if settingName not in PageStyle.defaults and \ + settingName not in PageStyle.alsoAllow: + raise LrsError, "setting %s not allowed on Page" % settingName + + self.settings = settings.copy() + + + def appendReferencedObjects(self, parent): + PageStyle.translateHeaderAndFooter(parent, self.settings) + + self.pageStyle.appendReferencedObjects(parent) + + if self.pageStyle.parent is None: + parent.append(self.pageStyle) + + LrsContainer.appendReferencedObjects(self, parent) + + + def RuledLine(self, *args, **kwargs): + rl = RuledLine(*args, **kwargs) + self.append(rl) + return rl + + + def BlockSpace(self, *args, **kwargs): + bs = BlockSpace(*args, **kwargs) + self.append(bs) + return bs + + + def TextBlock(self, *args, **kwargs): + """ Create and append a new text block (shortcut). """ + tb = TextBlock(*args, **kwargs) + self.append(tb) + return tb + + + def ImageBlock(self, *args, **kwargs): + """ Create and append and new Image block (shorthand). """ + ib = ImageBlock(*args, **kwargs) + self.append(ib) + return ib + + + def addLrfObject(self, objId): + self.stream.appendLrfTag(LrfTag("Link", objId)) + + + def appendLrfTag(self, lrfTag): + self.stream.appendLrfTag(lrfTag) + + + def toLrf(self, lrfWriter): + # tags: + # ObjectList + # Link to pagestyle + # Parent page tree id + # stream of tags + + p = LrfObject("Page", self.objId) + lrfWriter.append(p) + + pageContent = set() + self.stream = LrfTagStream(0) + for content in self.contents: + content.toLrfContainer(lrfWriter, self) + if hasattr(content, "getReferencedObjIds"): + pageContent.update(content.getReferencedObjIds()) + + + #print "page contents:", pageContent + # ObjectList not needed and causes slowdown in SONY LRF renderer + #p.appendLrfTag(LrfTag("ObjectList", pageContent)) + p.appendLrfTag(LrfTag("Link", self.pageStyle.objId)) + p.appendLrfTag(LrfTag("ParentPageTree", lrfWriter.getPageTreeId())) + p.appendTagDict(self.settings) + p.appendLrfTags(self.stream.getStreamTags(lrfWriter.getSourceEncoding())) + + + def toElement(self, sourceEncoding): + page = self.lrsObjectElement("Page") + page.set("pagestyle", self.pageStyle.getLabel()) + page.attrib.update(self.settings) + + for content in self.contents: + page.append(content.toElement(sourceEncoding)) + + return page + + + + + +class TextBlock(LrsObject, LrsContainer): + """ + TextBlocks are added to Pages. They hold Paragraphs or CRs. + + If a TextBlock is used in a header, it should be appended to + the Book, not to a specific Page. + """ + defaultTextStyle = TextStyle() + defaultBlockStyle = BlockStyle() + + def __init__(self, textStyle=defaultTextStyle, \ + blockStyle=defaultBlockStyle, \ + **settings): + ''' + Create TextBlock. + @param textStyle: The L{TextStyle} for this block. + @param blockStyle: The L{BlockStyle} for this block. + @param settings: C{dict} of extra settings to apply to this block. + ''' + LrsObject.__init__(self) + LrsContainer.__init__(self, [Paragraph, CR]) + + self.textSettings = {} + self.blockSettings = {} + + for name, value in settings.items(): + if name in TextStyle.validSettings: + self.textSettings[name] = value + elif name in BlockStyle.validSettings: + self.blockSettings[name] = value + elif name == 'toclabel': + self.tocLabel = value + else: + raise LrsError, "%s not a valid setting for TextBlock" % name + + self.textStyle = textStyle + self.blockStyle = blockStyle + + # create a textStyle with our current text settings (for Span to find) + self.currentTextStyle = textStyle.copy() if self.textSettings else textStyle + self.currentTextStyle.attrs.update(self.textSettings) + + + def appendReferencedObjects(self, parent): + if self.textStyle.parent is None: + parent.append(self.textStyle) + + if self.blockStyle.parent is None: + parent.append(self.blockStyle) + + LrsContainer.appendReferencedObjects(self, parent) + + + def Paragraph(self, *args, **kwargs): + """ + Create and append a Paragraph to this TextBlock. A CR is + automatically inserted after the Paragraph. To avoid this + behavior, create the Paragraph and append it to the TextBlock + in a separate call. + """ + p = Paragraph(*args, **kwargs) + self.append(p) + self.append(CR()) + return p + + + + def toElement(self, sourceEncoding): + tb = self.lrsObjectElement("TextBlock", labelName="Block") + tb.attrib.update(self.textSettings) + tb.attrib.update(self.blockSettings) + tb.set("textstyle", self.textStyle.getLabel()) + tb.set("blockstyle", self.blockStyle.getLabel()) + if hasattr(self, "tocLabel"): + tb.set("toclabel", self.tocLabel) + + for content in self.contents: + tb.append(content.toElement(sourceEncoding)) + + return tb + + def getReferencedObjIds(self): + ids = [self.objId, self.extraId, self.blockStyle.objId, + self.textStyle.objId] + for content in self.contents: + if hasattr(content, "getReferencedObjIds"): + ids.extend(content.getReferencedObjIds()) + + return ids + + + def toLrf(self, lrfWriter): + self.toLrfContainer(lrfWriter, lrfWriter) + + + def toLrfContainer(self, lrfWriter, container): + # id really belongs to the outer block + extraId = LrsObject.getNextObjId() + + b = LrfObject("Block", self.objId) + b.appendLrfTag(LrfTag("Link", self.blockStyle.objId)) + b.appendLrfTags( + LrfTagStream(0, [LrfTag("Link", extraId)]). \ + getStreamTags(lrfWriter.getSourceEncoding())) + b.appendTagDict(self.blockSettings) + container.addLrfObject(b.objId) + lrfWriter.append(b) + + tb = LrfObject("TextBlock", extraId) + tb.appendLrfTag(LrfTag("Link", self.textStyle.objId)) + tb.appendTagDict(self.textSettings) + + stream = LrfTagStream(STREAM_COMPRESSED) + for content in self.contents: + content.toLrfContainer(lrfWriter, stream) + + if lrfWriter.saveStreamTags: # true only if testing + tb.saveStreamTags = stream.tags + + tb.appendLrfTags( + stream.getStreamTags(lrfWriter.getSourceEncoding(), + optimizeTags=lrfWriter.optimizeTags, + optimizeCompression=lrfWriter.optimizeCompression)) + lrfWriter.append(tb) + + self.extraId = extraId + + +class Paragraph(LrsContainer): + """ + Note:

alone does not make a paragraph. Only a CR inserted + into a text block right after a

makes a real paragraph. + Two Paragraphs appended in a row act like a single Paragraph. + + Also note that there are few autoappenders for Paragraph (and + the things that can go in it.) It's less confusing (to me) to use + explicit .append methods to build up the text stream. + """ + def __init__(self, text=None): + LrsContainer.__init__(self, [Text, CR, DropCaps, CharButton, + LrsSimpleChar1, basestring]) + if text is not None: + if isinstance(text, basestring): + text = Text(text) + self.append(text) + + def CR(self): + # Okay, here's a single autoappender for this common operation + cr = CR() + self.append(cr) + return cr + + + def getReferencedObjIds(self): + ids = [] + for content in self.contents: + if hasattr(content, "getReferencedObjIds"): + ids.extend(content.getReferencedObjIds()) + + return ids + + + def toLrfContainer(self, lrfWriter, parent): + parent.appendLrfTag(LrfTag("pstart", 0)) + for content in self.contents: + content.toLrfContainer(lrfWriter, parent) + parent.appendLrfTag(LrfTag("pend")) + + + def toElement(self, sourceEncoding): + p = Element("P") + appendTextElements(p, self.contents, sourceEncoding) + return p + + + +class LrsTextTag(LrsContainer): + def __init__(self, text, validContents): + LrsContainer.__init__(self, [Text, basestring] + validContents) + if text is not None: + self.append(text) + + + def toLrfContainer(self, lrfWriter, parent): + if hasattr(self, "tagName"): + tagName = self.tagName + else: + tagName = self.__class__.__name__ + + parent.appendLrfTag(LrfTag(tagName)) + + for content in self.contents: + content.toLrfContainer(lrfWriter, parent) + + parent.appendLrfTag(LrfTag(tagName + "End")) + + + def toElement(self, se): + if hasattr(self, "tagName"): + tagName = self.tagName + else: + tagName = self.__class__.__name__ + + p = Element(tagName) + appendTextElements(p, self.contents, se) + return p + + +class LrsSimpleChar1(object): + def isEmpty(self): + for content in self.contents: + if not content.isEmpty(): + return False + return True + + def hasFollowingContent(self): + foundSelf = False + for content in self.parent.contents: + if content == self: + foundSelf = True + elif foundSelf: + if not content.isEmpty(): + return True + return False + + +class DropCaps(LrsTextTag): + + def __init__(self, line=1): + LrsTextTag.__init__(self, None, [LrsSimpleChar1]) + if int(line) <= 0: + raise LrsError('A DrawChar must span at least one line.') + self.line = int(line) + + def isEmpty(self): + return self.text == None or not self.text.strip() + + def toElement(self, se): + elem = Element('DrawChar', line=str(self.line)) + appendTextElements(elem, self.contents, se) + return elem + + def toLrfContainer(self, lrfWriter, parent): + parent.appendLrfTag(LrfTag('DrawChar', (int(self.line),))) + + for content in self.contents: + content.toLrfContainer(lrfWriter, parent) + + parent.appendLrfTag(LrfTag("DrawCharEnd")) + + + +class Button(LrsObject, LrsContainer): + def __init__(self, **settings): + LrsObject.__init__(self, **settings) + LrsContainer.__init__(self, [PushButton]) + + def findJumpToRefs(self): + for sub1 in self.contents: + if isinstance(sub1, PushButton): + for sub2 in sub1.contents: + if isinstance(sub2, JumpTo): + return (sub2.textBlock.objId, sub2.textBlock.parent.objId) + raise LrsError, "%s has no PushButton or JumpTo subs"%self.__class__.__name__ + + def toLrf(self, lrfWriter): + (refobj, refpage) = self.findJumpToRefs() + # print "Button writing JumpTo refobj=", jumpto.refobj, ", and refpage=", jumpto.refpage + button = LrfObject("Button", self.objId) + button.appendLrfTag(LrfTag("buttonflags", 0x10)) # pushbutton + button.appendLrfTag(LrfTag("PushButtonStart")) + button.appendLrfTag(LrfTag("buttonactions")) + button.appendLrfTag(LrfTag("jumpto", (int(refpage), int(refobj)))) + button.append(LrfTag("endbuttonactions")) + button.appendLrfTag(LrfTag("PushButtonEnd")) + lrfWriter.append(button) + + def toElement(self, se): + b = self.lrsObjectElement("Button") + + for content in self.contents: + b.append(content.toElement(se)) + + return b + +class ButtonBlock(Button): + pass + +class PushButton(LrsContainer): + + def __init__(self, **settings): + LrsContainer.__init__(self, [JumpTo]) + + def toElement(self, se): + b = Element("PushButton") + + for content in self.contents: + b.append(content.toElement(se)) + + return b + +class JumpTo(LrsContainer): + + def __init__(self, textBlock): + LrsContainer.__init__(self, []) + self.textBlock=textBlock + + def setTextBlock(self, textBlock): + self.textBlock = textBlock + + def toElement(self, se): + return Element("JumpTo", refpage=str(self.textBlock.parent.objId), refobj=str(self.textBlock.objId)) + + + + + +class Plot(LrsSimpleChar1, LrsContainer): + + ADJUSTMENT_VALUES = {'center':1, 'baseline':2, 'top':3, 'bottom':4} + + def __init__(self, obj, xsize=0, ysize=0, adjustment=None): + LrsContainer.__init__(self, []) + if obj != None: + self.setObj(obj) + if xsize < 0 or ysize < 0: + raise LrsError('Sizes must be positive semi-definite') + self.xsize = int(xsize) + self.ysize = int(ysize) + if adjustment and adjustment not in Plot.ADJUSTMENT_VALUES.keys(): + raise LrsError('adjustment must be one of' + Plot.ADJUSTMENT_VALUES.keys()) + self.adjustment = adjustment + + def setObj(self, obj): + if not isinstance(obj, (Image, Button)): + raise LrsError('Plot elements can only refer to Image or Button elements') + self.obj = obj + + def getReferencedObjIds(self): + return [self.obj.objId] + + def appendReferencedObjects(self, parent): + if self.obj.parent is None: + parent.append(self.obj) + + def toElement(self, se): + elem = Element('Plot', xsize=str(self.xsize), ysize=str(self.ysize), \ + refobj=str(self.obj.objId)) + if self.adjustment: + elem.set('adjustment', self.adjustment) + return elem + + def toLrfContainer(self, lrfWriter, parent): + adj = self.adjustment if self.adjustment else 'bottom' + params = (int(self.xsize), int(self.ysize), int(self.obj.objId), \ + Plot.ADJUSTMENT_VALUES[adj]) + parent.appendLrfTag(LrfTag("Plot", params)) + +class Text(LrsContainer): + """ A object that represents raw text. Does not have a toElement. """ + def __init__(self, text): + LrsContainer.__init__(self, []) + self.text = text + + def isEmpty(self): + return not self.text or not self.text.strip() + + def toLrfContainer(self, lrfWriter, parent): + if self.text: + if isinstance(self.text, str): + parent.appendLrfTag(LrfTag("rawtext", self.text)) + else: + parent.appendLrfTag(LrfTag("textstring", self.text)) + + +class CR(LrsSimpleChar1, LrsContainer): + """ + A line break (when appended to a Paragraph) or a paragraph break + (when appended to a TextBlock). + """ + def __init__(self): + LrsContainer.__init__(self, []) + + + def toElement(self, se): + return Element("CR") + + + def toLrfContainer(self, lrfWriter, parent): + parent.appendLrfTag(LrfTag("CR")) + + + +class Italic(LrsSimpleChar1, LrsTextTag): + def __init__(self, text=None): + LrsTextTag.__init__(self, text, [LrsSimpleChar1]) + +class Sub(LrsSimpleChar1, LrsTextTag): + def __init__(self, text=None): + LrsTextTag.__init__(self, text, []) + + + +class Sup(LrsSimpleChar1, LrsTextTag): + def __init__(self, text=None): + LrsTextTag.__init__(self, text, []) + + + +class NoBR(LrsSimpleChar1, LrsTextTag): + def __init__(self, text=None): + LrsTextTag.__init__(self, text, [LrsSimpleChar1]) + + +class Space(LrsSimpleChar1, LrsContainer): + def __init__(self, xsize=0, x=0): + LrsContainer.__init__(self, []) + if xsize == 0 and x != 0: xsize = x + self.xsize = xsize + + + def toElement(self, se): + if self.xsize == 0: + return + + return Element("Space", xsize=str(self.xsize)) + + + def toLrfContainer(self, lrfWriter, container): + if self.xsize != 0: + container.appendLrfTag(LrfTag("Space", self.xsize)) + + +class Box(LrsSimpleChar1, LrsContainer): + """ + Draw a box around text. Unfortunately, does not seem to do + anything on the PRS-500. + """ + def __init__(self, linetype="solid"): + LrsContainer.__init__(self, [Text, basestring]) + if linetype not in LINE_TYPE_ENCODING: + raise LrsError, linetype + " is not a valid line type" + self.linetype = linetype + + + def toElement(self, se): + e = Element("Box", linetype=self.linetype) + appendTextElements(e, self.contents, se) + return e + + + def toLrfContainer(self, lrfWriter, container): + container.appendLrfTag(LrfTag("Box", self.linetype)) + for content in self.contents: + content.toLrfContainer(lrfWriter, container) + container.appendLrfTag(LrfTag("BoxEnd")) + + + + +class Span(LrsSimpleChar1, LrsContainer): + def __init__(self, text=None, **attrs): + LrsContainer.__init__(self, [LrsSimpleChar1, Text, basestring]) + if text is not None: + if isinstance(text, basestring): + text = Text(text) + self.append(text) + + for attrname in attrs.keys(): + if attrname not in TextStyle.defaults and \ + attrname not in TextStyle.alsoAllow: + raise LrsError, "setting %s not allowed on Span" % attrname + self.attrs = attrs + + + def findCurrentTextStyle(self): + parent = self.parent + while 1: + if parent is None or hasattr(parent, "currentTextStyle"): + break + parent = parent.parent + + if parent is None: + raise LrsError, "no enclosing current TextStyle found" + + return parent.currentTextStyle + + + + def toLrfContainer(self, lrfWriter, container): + + # find the currentTextStyle + oldTextStyle = self.findCurrentTextStyle() + + # set the attributes we want changed + for (name, value) in self.attrs.items(): + if name in oldTextStyle.attrs and oldTextStyle.attrs[name] == self.attrs[name]: + self.attrs.pop(name) + else: + container.appendLrfTag(LrfTag(name, value)) + + # set a currentTextStyle so nested span can put things back + oldTextStyle = self.findCurrentTextStyle() + self.currentTextStyle = oldTextStyle.copy() + self.currentTextStyle.attrs.update(self.attrs) + + for content in self.contents: + content.toLrfContainer(lrfWriter, container) + + # put the attributes back the way we found them + # the attributes persist beyond the next

+ # if self.hasFollowingContent(): + for name in self.attrs.keys(): + container.appendLrfTag(LrfTag(name, oldTextStyle.attrs[name])) + + + def toElement(self, se): + element = Element('Span') + for (key, value) in self.attrs.items(): + element.set(key, str(value)) + + appendTextElements(element, self.contents, se) + return element + +class EmpLine(LrsTextTag, LrsSimpleChar1): + emplinetypes = ['none', 'solid', 'dotted', 'dashed', 'double'] + emplinepositions = ['before', 'after'] + + def __init__(self, text=None, emplineposition='before', emplinetype='solid'): + LrsTextTag.__init__(self, text, [LrsSimpleChar1]) + if emplineposition not in self.__class__.emplinepositions: + raise LrsError('emplineposition for an EmpLine must be one of: '+str(self.__class__.emplinepositions)) + if emplinetype not in self.__class__.emplinetypes: + raise LrsError('emplinetype for an EmpLine must be one of: '+str(self.__class__.emplinetypes)) + + self.emplinetype = emplinetype + self.emplineposition = emplineposition + + + + def toLrfContainer(self, lrfWriter, parent): + parent.appendLrfTag(LrfTag(self.__class__.__name__, (self.emplineposition, self.emplinetype))) + parent.appendLrfTag(LrfTag('emplineposition', self.emplineposition)) + parent.appendLrfTag(LrfTag('emplinetype', self.emplinetype)) + for content in self.contents: + content.toLrfContainer(lrfWriter, parent) + + parent.appendLrfTag(LrfTag(self.__class__.__name__ + "End")) + + def toElement(self, se): + element = Element(self.__class__.__name__) + element.set('emplineposition', self.emplineposition) + element.set('emplinetype', self.emplinetype) + + appendTextElements(element, self.contents, se) + return element + +class Bold(Span): + """ + There is no known "bold" lrf tag. Use Span with a fontweight in LRF, + but use the word Bold in the LRS. + """ + def __init__(self, text=None): + Span.__init__(self, text, fontweight=800) + + def toElement(self, se): + e = Element("Bold") + appendTextElements(e, self.contents, se) + return e + + +class BlockSpace(LrsContainer): + """ Can be appended to a page to move the text point. """ + def __init__(self, xspace=0, yspace=0, x=0, y=0): + LrsContainer.__init__(self, []) + if xspace == 0 and x != 0: + xspace = x + if yspace == 0 and y != 0: + yspace = y + self.xspace = xspace + self.yspace = yspace + + + def toLrfContainer(self, lrfWriter, container): + if self.xspace != 0: + container.appendLrfTag(LrfTag("xspace", self.xspace)) + if self.yspace != 0: + container.appendLrfTag(LrfTag("yspace", self.yspace)) + + + def toElement(self, se): + element = Element("BlockSpace") + + if self.xspace != 0: + element.attrib["xspace"] = str(self.xspace) + if self.yspace != 0: + element.attrib["yspace"] = str(self.yspace) + + return element + + + +class CharButton(LrsSimpleChar1, LrsContainer): + """ + Define the text and target of a CharButton. Must be passed a + JumpButton that is the destination of the CharButton. + + Only text or SimpleChars can be appended to the CharButton. + """ + def __init__(self, button, text=None): + LrsContainer.__init__(self, [basestring, Text, LrsSimpleChar1]) + self.button = None + if button != None: + self.setButton(button) + + if text is not None: + self.append(text) + + def setButton(self, button): + if not isinstance(button, (JumpButton, Button)): + raise LrsError, "CharButton button must be a JumpButton or Button" + + self.button = button + + + def appendReferencedObjects(self, parent): + if self.button.parent is None: + parent.append(self.button) + + + def getReferencedObjIds(self): + return [self.button.objId] + + + def toLrfContainer(self, lrfWriter, container): + container.appendLrfTag(LrfTag("CharButton", self.button.objId)) + + for content in self.contents: + content.toLrfContainer(lrfWriter, container) + + container.appendLrfTag(LrfTag("CharButtonEnd")) + + + def toElement(self, se): + cb = Element("CharButton", refobj=str(self.button.objId)) + appendTextElements(cb, self.contents, se) + return cb + + + +class Objects(LrsContainer): + def __init__(self): + LrsContainer.__init__(self, [JumpButton, TextBlock, HeaderOrFooter, + ImageStream, Image, ImageBlock, Button, ButtonBlock]) + self.appendJumpButton = self.appendTextBlock = self.appendHeader = \ + self.appendFooter = self.appendImageStream = \ + self.appendImage = self.appendImageBlock = self.append + + + def getMethods(self): + return ["JumpButton", "appendJumpButton", "TextBlock", + "appendTextBlock", "Header", "appendHeader", + "Footer", "appendFooter", "ImageBlock", + "ImageStream", "appendImageStream", + 'Image','appendImage', 'appendImageBlock'] + + + def getSettings(self): + return [] + + + def ImageBlock(self, *args, **kwargs): + ib = ImageBlock(*args, **kwargs) + self.append(ib) + return ib + + def JumpButton(self, textBlock): + b = JumpButton(textBlock) + self.append(b) + return b + + + def TextBlock(self, *args, **kwargs): + tb = TextBlock(*args, **kwargs) + self.append(tb) + return tb + + + def Header(self, *args, **kwargs): + h = Header(*args, **kwargs) + self.append(h) + return h + + + def Footer(self, *args, **kwargs): + h = Footer(*args, **kwargs) + self.append(h) + return h + + + def ImageStream(self, *args, **kwargs): + i = ImageStream(*args, **kwargs) + self.append(i) + return i + + def Image(self, *args, **kwargs): + i = Image(*args, **kwargs) + self.append(i) + return i + + def toElement(self, se): + o = Element("Objects") + + for content in self.contents: + o.append(content.toElement(se)) + + return o + + + def toLrf(self, lrfWriter): + for content in self.contents: + content.toLrf(lrfWriter) + + +class JumpButton(LrsObject, LrsContainer): + """ + The target of a CharButton. Needs a parented TextBlock to jump to. + Actually creates several elements in the XML. JumpButtons must + be eventually appended to a Book (actually, an Object.) + """ + def __init__(self, textBlock): + LrsObject.__init__(self) + LrsContainer.__init__(self, []) + self.textBlock = textBlock + + def setTextBlock(self, textBlock): + self.textBlock = textBlock + + def toLrf(self, lrfWriter): + button = LrfObject("Button", self.objId) + button.appendLrfTag(LrfTag("buttonflags", 0x10)) # pushbutton + button.appendLrfTag(LrfTag("PushButtonStart")) + button.appendLrfTag(LrfTag("buttonactions")) + button.appendLrfTag(LrfTag("jumpto", + (self.textBlock.parent.objId, self.textBlock.objId))) + button.append(LrfTag("endbuttonactions")) + button.appendLrfTag(LrfTag("PushButtonEnd")) + lrfWriter.append(button) + + + def toElement(self, se): + b = self.lrsObjectElement("Button") + pb = SubElement(b, "PushButton") + jt = SubElement(pb, "JumpTo", + refpage=str(self.textBlock.parent.objId), + refobj=str(self.textBlock.objId)) + return b + + + +class RuledLine(LrsContainer, LrsAttributes, LrsObject): + """ A line. Default is 500 pixels long, 2 pixels wide. """ + + defaults = dict( + linelength="500", linetype="solid", linewidth="2", + linecolor="0x00000000") + + def __init__(self, **settings): + LrsContainer.__init__(self, []) + LrsAttributes.__init__(self, self.defaults, **settings) + LrsObject.__init__(self) + + + def toLrfContainer(self, lrfWriter, container): + a = self.attrs + container.appendLrfTag(LrfTag("RuledLine", + (a["linelength"], a["linetype"], a["linewidth"], a["linecolor"]))) + + + def toElement(self, se): + return Element("RuledLine", self.attrs) + + + +class HeaderOrFooter(LrsObject, LrsContainer, LrsAttributes): + """ + Creates empty header or footer objects. Append PutObj objects to + the header or footer to create the text. + + Note: it seems that adding multiple PutObjs to a header or footer + only shows the last one. + """ + defaults = dict(framemode="square", layout="LrTb", framewidth="0", + framecolor="0x00000000", bgcolor="0xFF000000") + + def __init__(self, **settings): + LrsObject.__init__(self) + LrsContainer.__init__(self, [PutObj]) + LrsAttributes.__init__(self, self.defaults, **settings) + + def put_object(self, obj, x1, y1): + self.append(PutObj(obj, x1, y1)) + + def PutObj(self, *args, **kwargs): + p = PutObj(*args, **kwargs) + self.append(p) + return p + + + def toLrf(self, lrfWriter): + hd = LrfObject(self.__class__.__name__, self.objId) + hd.appendTagDict(self.attrs) + + stream = LrfTagStream(0) + for content in self.contents: + content.toLrfContainer(lrfWriter, stream) + + hd.appendLrfTags(stream.getStreamTags(lrfWriter.getSourceEncoding())) + lrfWriter.append(hd) + + + def toElement(self, se): + name = self.__class__.__name__ + labelName = name.lower() + "label" + hd = self.lrsObjectElement(name, objlabel=labelName) + hd.attrib.update(self.attrs) + + for content in self.contents: + hd.append(content.toElement(se)) + + return hd + + +class Header(HeaderOrFooter): + pass + + + +class Footer(HeaderOrFooter): + pass + +class Canvas(LrsObject, LrsContainer, LrsAttributes): + defaults = dict(framemode="square", layout="LrTb", framewidth="0", + framecolor="0x00000000", bgcolor="0xFF000000", + canvasheight=0, canvaswidth=0, blockrule='block-adjustable') + + def __init__(self, width, height, **settings): + LrsObject.__init__(self) + LrsContainer.__init__(self, [PutObj]) + LrsAttributes.__init__(self, self.defaults, **settings) + + self.settings = self.defaults.copy() + self.settings.update(settings) + self.settings['canvasheight'] = int(height) + self.settings['canvaswidth'] = int(width) + + def put_object(self, obj, x1, y1): + self.append(PutObj(obj, x1, y1)) + + def toElement(self, source_encoding): + el = self.lrsObjectElement("Canvas", **self.settings) + for po in self.contents: + el.append(po.toElement(source_encoding)) + return el + + def toLrf(self, lrfWriter): + self.toLrfContainer(lrfWriter, lrfWriter) + + + def toLrfContainer(self, lrfWriter, container): + c = LrfObject("Canvas", self.objId) + c.appendTagDict(self.settings) + stream = LrfTagStream(STREAM_COMPRESSED) + for content in self.contents: + content.toLrfContainer(lrfWriter, stream) + if lrfWriter.saveStreamTags: # true only if testing + c.saveStreamTags = stream.tags + + c.appendLrfTags( + stream.getStreamTags(lrfWriter.getSourceEncoding(), + optimizeTags=lrfWriter.optimizeTags, + optimizeCompression=lrfWriter.optimizeCompression)) + container.addLrfObject(c.objId) + lrfWriter.append(c) + + def has_text(self): + return bool(self.contents) + + + +class PutObj(LrsContainer): + """ PutObj holds other objects that are drawn on a Canvas or Header. """ + + def __init__(self, content, x1=0, y1=0): + LrsContainer.__init__(self, [TextBlock, ImageBlock]) + self.content = content + self.x1 = int(x1) + self.y1 = int(y1) + + def setContent(self, content): + self.content = content + + def appendReferencedObjects(self, parent): + if self.content.parent is None: + parent.append(self.content) + + def toLrfContainer(self, lrfWriter, container): + container.appendLrfTag(LrfTag("PutObj", (self.x1, self.y1, + self.content.objId))) + + + def toElement(self, se): + el = Element("PutObj", x1=str(self.x1), y1=str(self.y1), + refobj=str(self.content.objId)) + return el + + + + +class ImageStream(LrsObject, LrsContainer): + """ + Embed an image file into an Lrf. + """ + + VALID_ENCODINGS = [ "JPEG", "GIF", "BMP", "PNG" ] + + def __init__(self, file=None, encoding=None, comment=None): + LrsObject.__init__(self) + LrsContainer.__init__(self, []) + _checkExists(file) + self.filename = file + self.comment = comment + # TODO: move encoding from extension to lrf module + if encoding is None: + extension = os.path.splitext(file)[1] + if not extension: + raise LrsError, \ + "file must have extension if encoding is not specified" + extension = extension[1:].upper() + + if extension == "JPG": + extension = "JPEG" + + encoding = extension + else: + encoding = encoding.upper() + + if encoding not in self.VALID_ENCODINGS: + raise LrsError, \ + "encoding or file extension not JPEG, GIF, BMP, or PNG" + + self.encoding = encoding + + + def toLrf(self, lrfWriter): + imageFile = file(self.filename, "rb") + imageData = imageFile.read() + imageFile.close() + + isObj = LrfObject("ImageStream", self.objId) + if self.comment is not None: + isObj.appendLrfTag(LrfTag("comment", self.comment)) + + streamFlags = IMAGE_TYPE_ENCODING[self.encoding] + stream = LrfStreamBase(streamFlags, imageData) + isObj.appendLrfTags(stream.getStreamTags()) + lrfWriter.append(isObj) + + + def toElement(self, se): + element = self.lrsObjectElement("ImageStream", + objlabel="imagestreamlabel", + encoding=self.encoding, file=self.filename) + element.text = self.comment + return element + +class Image(LrsObject, LrsContainer, LrsAttributes): + + defaults = dict() + + def __init__(self, refstream, x0=0, x1=0, \ + y0=0, y1=0, xsize=0, ysize=0, **settings): + LrsObject.__init__(self) + LrsContainer.__init__(self, []) + LrsAttributes.__init__(self, self.defaults, settings) + self.x0, self.y0, self.x1, self.y1 = int(x0), int(y0), int(x1), int(y1) + self.xsize, self.ysize = int(xsize), int(ysize) + self.setRefstream(refstream) + + def setRefstream(self, refstream): + self.refstream = refstream + + def appendReferencedObjects(self, parent): + if self.refstream.parent is None: + parent.append(self.refstream) + + def getReferencedObjIds(self): + return [self.objId, self.refstream.objId] + + def toElement(self, se): + element = self.lrsObjectElement("Image", **self.attrs) + element.set("refstream", str(self.refstream.objId)) + for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]: + element.set(name, str(getattr(self, name))) + return element + + def toLrf(self, lrfWriter): + ib = LrfObject("Image", self.objId) + ib.appendLrfTag(LrfTag("ImageRect", + (self.x0, self.y0, self.x1, self.y1))) + ib.appendLrfTag(LrfTag("ImageSize", (self.xsize, self.ysize))) + ib.appendLrfTag(LrfTag("RefObjId", self.refstream.objId)) + lrfWriter.append(ib) + + + + + +class ImageBlock(LrsObject, LrsContainer, LrsAttributes): + """ Create an image on a page. """ + # TODO: allow other block attributes + + defaults = BlockStyle.baseDefaults.copy() + + def __init__(self, refstream, x0="0", y0="0", x1="600", y1="800", + xsize="600", ysize="800", + blockStyle=BlockStyle(blockrule='block-fixed'), + alttext=None, **settings): + LrsObject.__init__(self) + LrsContainer.__init__(self, [Text, Image]) + LrsAttributes.__init__(self, self.defaults, **settings) + self.x0, self.y0, self.x1, self.y1 = int(x0), int(y0), int(x1), int(y1) + self.xsize, self.ysize = int(xsize), int(ysize) + self.setRefstream(refstream) + self.blockStyle = blockStyle + self.alttext = alttext + + def setRefstream(self, refstream): + self.refstream = refstream + + def appendReferencedObjects(self, parent): + if self.refstream.parent is None: + parent.append(self.refstream) + + if self.blockStyle is not None and self.blockStyle.parent is None: + parent.append(self.blockStyle) + + + def getReferencedObjIds(self): + objects = [self.objId, self.extraId, self.refstream.objId] + if self.blockStyle is not None: + objects.append(self.blockStyle.objId) + + return objects + + + def toLrf(self, lrfWriter): + self.toLrfContainer(lrfWriter, lrfWriter) + + + def toLrfContainer(self, lrfWriter, container): + # id really belongs to the outer block + + extraId = LrsObject.getNextObjId() + + b = LrfObject("Block", self.objId) + if self.blockStyle is not None: + b.appendLrfTag(LrfTag("Link", self.blockStyle.objId)) + b.appendTagDict(self.attrs) + + b.appendLrfTags( + LrfTagStream(0, + [LrfTag("Link", extraId)]).getStreamTags(lrfWriter.getSourceEncoding())) + container.addLrfObject(b.objId) + lrfWriter.append(b) + + ib = LrfObject("Image", extraId) + + ib.appendLrfTag(LrfTag("ImageRect", + (self.x0, self.y0, self.x1, self.y1))) + ib.appendLrfTag(LrfTag("ImageSize", (self.xsize, self.ysize))) + ib.appendLrfTag(LrfTag("RefObjId", self.refstream.objId)) + if self.alttext: + ib.appendLrfTag("Comment", self.alttext) + + + lrfWriter.append(ib) + self.extraId = extraId + + + def toElement(self, se): + element = self.lrsObjectElement("ImageBlock", **self.attrs) + element.set("refstream", str(self.refstream.objId)) + for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]: + element.set(name, str(getattr(self, name))) + element.text = self.alttext + return element + + + +class Font(LrsContainer): + """ Allows a TrueType file to be embedded in an Lrf. """ + def __init__(self, file=None, fontname=None, fontfilename=None, encoding=None): + LrsContainer.__init__(self, []) + try: + _checkExists(fontfilename) + self.truefile = fontfilename + except: + try: + _checkExists(file) + self.truefile = file + except: + raise LrsError, "neither '%s' nor '%s' exists"%(fontfilename, file) + + self.file = file + self.fontname = fontname + self.fontfilename = fontfilename + self.encoding = encoding + + + def toLrf(self, lrfWriter): + font = LrfObject("Font", LrsObject.getNextObjId()) + lrfWriter.registerFontId(font.objId) + font.appendLrfTag(LrfTag("FontFilename", + lrfWriter.toUnicode(self.truefile))) + font.appendLrfTag(LrfTag("FontFacename", + lrfWriter.toUnicode(self.fontname))) + + stream = LrfFileStream(STREAM_FORCE_COMPRESSED, self.truefile) + font.appendLrfTags(stream.getStreamTags()) + + lrfWriter.append(font) + + + def toElement(self, se): + element = Element("RegistFont", encoding="TTF", fontname=self.fontname, + file=self.file, fontfilename=self.file) + return element diff --git a/src/calibre/ebooks/markdown/__init__.py b/src/calibre/ebooks/markdown/__init__.py index 466ba03d7e..2676e91934 100644 --- a/src/calibre/ebooks/markdown/__init__.py +++ b/src/calibre/ebooks/markdown/__init__.py @@ -2,4 +2,6 @@ # Initialize extensions from calibre.ebooks.markdown import mdx_footnotes from calibre.ebooks.markdown import mdx_tables -from calibre.ebooks.markdown import mdx_toc \ No newline at end of file +from calibre.ebooks.markdown import mdx_toc + +mdx_footnotes, mdx_tables, mdx_toc diff --git a/src/calibre/ebooks/markdown/mdx_toc.py b/src/calibre/ebooks/markdown/mdx_toc.py index 66a34d90a0..a8c1db267d 100644 --- a/src/calibre/ebooks/markdown/mdx_toc.py +++ b/src/calibre/ebooks/markdown/mdx_toc.py @@ -8,8 +8,6 @@ My markdown extensions for adding: Table of Contents (aka toc) """ -import os -import sys import re import markdown @@ -18,7 +16,7 @@ DEFAULT_TITLE = None def extract_alphanumeric(in_str=None): """take alpha-numeric (7bit ascii) and return as a string """ - # I'm sure this is really inefficient and + # I'm sure this is really inefficient and # could be done with a lambda/map() #x.strip().title().replace(' ', "") out_str=[] @@ -42,7 +40,7 @@ class TocExtension (markdown.Extension): toc is returned in a div tag with class='toc' toc is either: appended to end of document - OR + OR replaces first string occurence of "///Table of Contents Goes Here///" """ @@ -75,7 +73,7 @@ class TocExtension (markdown.Extension): """ Creates Table Of Contents based on headers. - @returns: toc as a single as a dom element + @returns: toc as a single as a dom element in a
tag with class='toc' """ @@ -85,9 +83,9 @@ class TocExtension (markdown.Extension): if element.type=='element': if headers_compiled_re.match(element.nodeName): return True - + headers_doc_list = doc.find(findHeadersFn) - + # Insert anchor tags into dom generated_anchor_id=0 headers_list=[] @@ -99,19 +97,19 @@ class TocExtension (markdown.Extension): if heading_type == self.auto_toc_heading_type: min_header_size_found=min(min_header_size_found, heading_type) - + html_anchor_name= (extract_alphanumeric(heading_title) +'__MD_autoTOC_%d' % (generated_anchor_id)) - + # insert anchor tag inside header tags html_anchor = doc.createElement("a") html_anchor.setAttribute('name', html_anchor_name) element.appendChild(html_anchor) - + headers_list.append( (heading_type, heading_title, html_anchor_name) ) generated_anchor_id = generated_anchor_id + 1 - + # create dom for toc if headers_list != []: # Create list @@ -125,9 +123,9 @@ class TocExtension (markdown.Extension): toc_doc_link.appendChild(toc_doc_text) toc_doc_entry.appendChild(toc_doc_link) toc_doc_list.appendChild(toc_doc_entry) - - - # Put list into div + + + # Put list into div div = doc.createElement("div") div.setAttribute('class', 'toc') if self.TOC_TITLE: @@ -149,7 +147,7 @@ class TocPostprocessor (markdown.Postprocessor): def run(self, doc): tocPlaceholder = self.toc.findTocPlaceholder(doc) - + tocDiv = self.toc.createTocDiv(doc) if tocDiv: if tocPlaceholder : diff --git a/src/calibre/ebooks/metadata/imp.py b/src/calibre/ebooks/metadata/imp.py index e35fc848ef..e2a2b61f31 100644 --- a/src/calibre/ebooks/metadata/imp.py +++ b/src/calibre/ebooks/metadata/imp.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Ashish Kulkarni ' '''Read meta information from IMP files''' -import sys, os +import sys from calibre.ebooks.metadata import MetaInformation, string_to_authors @@ -17,7 +17,7 @@ def get_metadata(stream): if stream.read(10) not in MAGIC: print >>sys.stderr, u'Couldn\'t read IMP header from file' return mi - + def cString(skip=0): result = '' while 1: @@ -30,7 +30,7 @@ def get_metadata(stream): stream.read(38) # skip past some uninteresting headers _, category, title, author = cString(), cString(), cString(1), cString(2) - + if title: mi.title = title if author: diff --git a/src/calibre/ebooks/metadata/lrx.py b/src/calibre/ebooks/metadata/lrx.py index af0e53121e..82473e81d1 100644 --- a/src/calibre/ebooks/metadata/lrx.py +++ b/src/calibre/ebooks/metadata/lrx.py @@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en' Read metadata from LRX files ''' -import sys, struct +import struct from zlib import decompress from lxml import etree @@ -33,7 +33,7 @@ def short_be(buf): def get_metadata(f): read = lambda at, amount: _read(f, at, amount) f.seek(0) - buf = f.read(12) + buf = f.read(12) if buf[4:] == 'ftypLRX2': offset = 0 while True: @@ -74,9 +74,9 @@ def get_metadata(f): mi.tags = [x.text for x in bi.findall('Category')] mi.language = root.find('DocInfo').find('Language').text return mi - + elif buf[4:8] == 'LRX': raise ValueError('Librie LRX format not supported') else: raise ValueError('Not a LRX file') - + diff --git a/src/calibre/ebooks/metadata/odt.py b/src/calibre/ebooks/metadata/odt.py index f5b1805e8b..f4b0986295 100755 --- a/src/calibre/ebooks/metadata/odt.py +++ b/src/calibre/ebooks/metadata/odt.py @@ -17,7 +17,7 @@ # # Contributor(s): # -import zipfile, sys, re +import zipfile, re import xml.sax.saxutils from cStringIO import StringIO @@ -46,7 +46,7 @@ fields = { } def normalize(str): - """ + """ The normalize-space function returns the argument string with whitespace normalized by stripping leading and trailing whitespace and replacing sequences of whitespace characters by a single space. @@ -125,7 +125,7 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator): else: texttag = self._tag self.seenfields[texttag] = self.data() - + if field in self.deletefields: self.output.dowrite = True else: @@ -140,7 +140,7 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator): def data(self): return normalize(''.join(self._data)) - + def get_metadata(stream): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() @@ -161,6 +161,6 @@ def get_metadata(stream): mi.language = data['language'] if data.get('keywords', ''): mi.tags = data['keywords'].split(',') - + return mi diff --git a/src/calibre/ebooks/metadata/zip.py b/src/calibre/ebooks/metadata/zip.py index 441aa7e3da..624e0fe73c 100644 --- a/src/calibre/ebooks/metadata/zip.py +++ b/src/calibre/ebooks/metadata/zip.py @@ -3,8 +3,8 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' import os -from zipfile import ZipFile -from cStringIO import StringIO +from zipfile import ZipFile +from cStringIO import StringIO def get_metadata(stream): @@ -20,5 +20,5 @@ def get_metadata(stream): stream = StringIO(zf.read(f)) return get_metadata(stream, stream_type) raise ValueError('No ebook found in ZIP archive') - - \ No newline at end of file + + diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py index 12c1c4aaa7..91a5eb3d97 100644 --- a/src/calibre/ebooks/pdb/palmdoc/writer.py +++ b/src/calibre/ebooks/pdb/palmdoc/writer.py @@ -3,7 +3,6 @@ ''' Writer content to palmdoc pdb file. ''' -import os __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' diff --git a/src/calibre/ebooks/pdb/ztxt/__init__.py b/src/calibre/ebooks/pdb/ztxt/__init__.py index 2c2028b74f..4dd1a954b0 100644 --- a/src/calibre/ebooks/pdb/ztxt/__init__.py +++ b/src/calibre/ebooks/pdb/ztxt/__init__.py @@ -4,7 +4,6 @@ __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' -import os class zTXTError(Exception): pass diff --git a/src/calibre/ebooks/pdf/manipulate/decrypt.py b/src/calibre/ebooks/pdf/manipulate/decrypt.py index 5f4265b5ed..ede12f15ee 100644 --- a/src/calibre/ebooks/pdf/manipulate/decrypt.py +++ b/src/calibre/ebooks/pdf/manipulate/decrypt.py @@ -12,8 +12,6 @@ Decrypt content of PDF. import os, sys from optparse import OptionGroup, Option -from calibre.ebooks.metadata.meta import metadata_from_formats -from calibre.ebooks.metadata import authors_to_string from calibre.utils.config import OptionParser from calibre.utils.logging import Log from calibre.constants import preferred_encoding @@ -36,8 +34,8 @@ OPTIONS = set([ class DecryptionError(Exception): def __init__(self, pdf_path): - self.value = 'Unable to decrypt file `%s`.' % value - + self.value = 'Unable to decrypt file `%s`.' % pdf_path + def __str__(self): return repr(self.value) @@ -62,20 +60,20 @@ def add_options(parser): group = OptionGroup(parser, _('Decrypt Options:'), _('Options to control the transformation of pdf')) parser.add_option_group(group) add_option = group.add_option - + for rec in OPTIONS: option_recommendation_to_cli_option(add_option, rec) def decrypt(pdf_path, out_path, password): pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb')) - + if pdf.decrypt(str(password)) == 0: raise DecryptionError(pdf_path) - + title = pdf.documentInfo.title if pdf.documentInfo.title else _('Unknown') author = pdf.documentInfo.author if pdf.documentInfo.author else _('Unknown') out_pdf = PdfFileWriter(title=title, author=author) - + for page in pdf.pages: out_pdf.addPage(page) @@ -86,23 +84,23 @@ def main(args=sys.argv, name=''): log = Log() parser = option_parser(name) add_options(parser) - + opts, args = parser.parse_args(args) args = args[1:] - + if len(args) < 2: print 'Error: A PDF file and decryption password is required.\n' print_help(parser, log) return 1 - + if not is_valid_pdf(args[0]): print 'Error: Could not read file `%s`.' % args[0] return 1 - + if not is_encrypted(args[0]): print 'Error: file `%s` is not encrypted.' % args[0] return 1 - + try: decrypt(args[0], opts.output, args[1]) except DecryptionError, e: diff --git a/src/calibre/ebooks/pdf/manipulate/encrypt.py b/src/calibre/ebooks/pdf/manipulate/encrypt.py index 15600fb07c..ff3b47b11a 100644 --- a/src/calibre/ebooks/pdf/manipulate/encrypt.py +++ b/src/calibre/ebooks/pdf/manipulate/encrypt.py @@ -17,6 +17,8 @@ from calibre.utils.logging import Log from calibre.constants import preferred_encoding from calibre.customize.conversion import OptionRecommendation from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted +from calibre.ebooks.metadata import authors_to_string +from calibre.ebooks.metadata.meta import metadata_from_formats from pyPdf import PdfFileWriter, PdfFileReader @@ -52,7 +54,7 @@ def add_options(parser): group = OptionGroup(parser, _('Encrypt Options:'), _('Options to control the transformation of pdf')) parser.add_option_group(group) add_option = group.add_option - + for rec in OPTIONS: option_recommendation_to_cli_option(add_option, rec) @@ -78,23 +80,23 @@ def main(args=sys.argv, name=''): log = Log() parser = option_parser(name) add_options(parser) - + opts, args = parser.parse_args(args) args = args[1:] - + if len(args) < 2: print 'Error: A PDF file and decryption password is required.\n' print_help(parser, log) return 1 - + if not is_valid_pdf(args[0]): print 'Error: Could not read file `%s`.' % args[0] return 1 - + if is_encrypted(args[0]): print 'Error: file `%s` is already encrypted.' % args[0] return 1 - + mi = metadata_from_formats([args[0]]) encrypt(args[0], opts.output, args[1], mi) diff --git a/src/calibre/ebooks/pdf/verify.py b/src/calibre/ebooks/pdf/verify.py index 3a8a8073ce..862cf00ee8 100644 --- a/src/calibre/ebooks/pdf/verify.py +++ b/src/calibre/ebooks/pdf/verify.py @@ -11,25 +11,25 @@ Verify PDF files. import os -from pyPdf import PdfFileWriter, PdfFileReader +from pyPdf import PdfFileReader def is_valid_pdf(pdf_path): ''' Returns True if the pdf file is valid. ''' - + try: with open(os.path.abspath(pdf_path), 'rb') as pdf_file: pdf = PdfFileReader(pdf_file) except: return False return True - + def is_valid_pdfs(pdf_paths): ''' Returns a list of invalid pdf files. ''' - + invalid = [] for pdf_path in pdf_paths: if not is_valid_pdf(pdf_path): diff --git a/src/calibre/ebooks/rb/writer.py b/src/calibre/ebooks/rb/writer.py index 515c95a6fe..c8908ee95f 100644 --- a/src/calibre/ebooks/rb/writer.py +++ b/src/calibre/ebooks/rb/writer.py @@ -4,7 +4,6 @@ __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' -import os import struct import zlib diff --git a/src/calibre/ebooks/rtf2xml/copy.py b/src/calibre/ebooks/rtf2xml/copy.py index 26ca300696..ff029c1841 100755 --- a/src/calibre/ebooks/rtf2xml/copy.py +++ b/src/calibre/ebooks/rtf2xml/copy.py @@ -15,7 +15,7 @@ # # # # ######################################################################### -import sys, os, shutil +import os, shutil class Copy: """Copy each changed file to a directory for debugging purposes""" @@ -66,6 +66,6 @@ class Copy: """ write_file = os.path.join(Copy.__dir,new_file) shutil.copyfile(file, write_file) - + def rename(self, source, dest): - shutil.copyfile(source, dest) \ No newline at end of file + shutil.copyfile(source, dest) diff --git a/src/calibre/ebooks/rtf2xml/options_trem.py b/src/calibre/ebooks/rtf2xml/options_trem.py index 12ab79b5b3..86c11a6e85 100755 --- a/src/calibre/ebooks/rtf2xml/options_trem.py +++ b/src/calibre/ebooks/rtf2xml/options_trem.py @@ -1,5 +1,4 @@ import sys -from calibre.ebooks import rtf2xml class ParseOptions: """ Requires: diff --git a/src/calibre/ebooks/rtf2xml/output.py b/src/calibre/ebooks/rtf2xml/output.py index bb17228fce..f193d2376e 100755 --- a/src/calibre/ebooks/rtf2xml/output.py +++ b/src/calibre/ebooks/rtf2xml/output.py @@ -16,7 +16,6 @@ # # ######################################################################### import sys, os, codecs -from calibre.ebooks import rtf2xml class Output: """ Output file diff --git a/src/calibre/ebooks/rtf2xml/override_table.py b/src/calibre/ebooks/rtf2xml/override_table.py index 6186e7ec55..146c73397a 100755 --- a/src/calibre/ebooks/rtf2xml/override_table.py +++ b/src/calibre/ebooks/rtf2xml/override_table.py @@ -15,8 +15,6 @@ # # # # ######################################################################### -import sys,os -from calibre.ebooks import rtf2xml class OverrideTable: """ Parse a line of text to make the override table. Return a string diff --git a/src/calibre/gui2/dialogs/choose_format.py b/src/calibre/gui2/dialogs/choose_format.py index 809b636690..e0fcb0868b 100644 --- a/src/calibre/gui2/dialogs/choose_format.py +++ b/src/calibre/gui2/dialogs/choose_format.py @@ -7,21 +7,19 @@ from calibre.gui2 import file_icon_provider from calibre.gui2.dialogs.choose_format_ui import Ui_ChooseFormatDialog class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog): - + def __init__(self, window, msg, formats): QDialog.__init__(self, window) Ui_ChooseFormatDialog.__init__(self) self.setupUi(self) self.connect(self.formats, SIGNAL('activated(QModelIndex)'), lambda i: self.accept()) - + self.msg.setText(msg) for format in formats: self.formats.addItem(QListWidgetItem(file_icon_provider().icon_from_ext(format.lower()), format.upper())) self._formats = formats self.formats.setCurrentRow(0) - + def format(self): return self._formats[self.formats.currentRow()] - - \ No newline at end of file diff --git a/src/calibre/gui2/dialogs/conversion_error.py b/src/calibre/gui2/dialogs/conversion_error.py index cfa573e371..7b47c59d5a 100644 --- a/src/calibre/gui2/dialogs/conversion_error.py +++ b/src/calibre/gui2/dialogs/conversion_error.py @@ -5,7 +5,7 @@ from PyQt4.QtGui import QDialog from calibre.gui2.dialogs.conversion_error_ui import Ui_ConversionErrorDialog class ConversionErrorDialog(QDialog, Ui_ConversionErrorDialog): - + def __init__(self, window, title, html, show=False): QDialog.__init__(self, window) Ui_ConversionErrorDialog.__init__(self) @@ -14,7 +14,7 @@ class ConversionErrorDialog(QDialog, Ui_ConversionErrorDialog): self.set_message(html) if show: self.show() - + def set_message(self, html): self.text.setHtml('%s%s' % content) - + css = dom_tree.findAll('link') for c in css: c.extract() - + print_css = Tag(BeautifulSoup(), 'style', [('type', 'text/css'), ('title', 'override_css')]) print_css.insert(0, PRINTCSS) dom_tree.findAll('head')[0].insert(0, print_css) - + return unicode(dom_tree) def print_preview(self, ok): printer = QPrinter(QPrinter.HighResolution) printer.setPageMargins(1, 1, 1, 1, QPrinter.Inch) - + previewDialog = QPrintPreviewDialog(printer) - + self.connect(previewDialog, SIGNAL('paintRequested(QPrinter *)'), self.view.print_) previewDialog.exec_() self.disconnect(previewDialog, SIGNAL('paintRequested(QPrinter *)'), self.view.print_) - + self.loop.quit() - + def print_book(self, ok): printer = QPrinter(QPrinter.HighResolution) printer.setPageMargins(1, 1, 1, 1, QPrinter.Inch) - + printDialog = QPrintDialog(printer) printDialog.setWindowTitle(_("Print eBook")) - + printDialog.exec_() if printDialog.result() == QDialog.Accepted: self.view.print_(printer) - + self.loop.quit() def main(): diff --git a/src/calibre/manual/conf.py b/src/calibre/manual/conf.py index 1000d5c5f2..8bea871349 100644 --- a/src/calibre/manual/conf.py +++ b/src/calibre/manual/conf.py @@ -18,7 +18,7 @@ sys.path.append(os.path.abspath('../../../')) sys.path.append(os.path.abspath('.')) from calibre import __appname__, __version__ import custom - +custom # General configuration # --------------------- diff --git a/src/calibre/path.py b/src/calibre/path.py deleted file mode 100644 index 44f07a1455..0000000000 --- a/src/calibre/path.py +++ /dev/null @@ -1,970 +0,0 @@ -""" path.py - An object representing a path to a file or directory. - -Example: - -from path import path -d = path('/home/guido/bin') -for f in d.files('*.py'): - f.chmod(0755) - -This module requires Python 2.2 or later. - - -URL: http://www.jorendorff.com/articles/python/path -Author: Jason Orendorff (and others - see the url!) -Date: 9 Mar 2007 -""" - - -# TODO -# - Tree-walking functions don't avoid symlink loops. Matt Harrison -# sent me a patch for this. -# - Bug in write_text(). It doesn't support Universal newline mode. -# - Better error message in listdir() when self isn't a -# directory. (On Windows, the error message really sucks.) -# - Make sure everything has a good docstring. -# - Add methods for regex find and replace. -# - guess_content_type() method? -# - Perhaps support arguments to touch(). - -from __future__ import generators - -import sys, warnings, os, fnmatch, glob, shutil, codecs, hashlib - -__version__ = '2.2' -__all__ = ['path'] - -# Platform-specific support for path.owner -if os.name == 'nt': - try: - import win32security - except ImportError: - win32security = None -else: - try: - import pwd - except ImportError: - pwd = None - -# Pre-2.3 support. Are unicode filenames supported? -_base = str -_getcwd = os.getcwd -try: - if os.path.supports_unicode_filenames: - _base = unicode - _getcwd = os.getcwdu -except AttributeError: - pass - -# Pre-2.3 workaround for booleans -try: - True, False -except NameError: - True, False = 1, 0 - -# Pre-2.3 workaround for basestring. -try: - basestring -except NameError: - basestring = (str, unicode) - -# Universal newline support -_textmode = 'r' -if hasattr(file, 'newlines'): - _textmode = 'U' - - -class TreeWalkWarning(Warning): - pass - -class path(_base): - """ Represents a filesystem path. - - For documentation on individual methods, consult their - counterparts in os.path. - """ - - # --- Special Python methods. - - def __repr__(self): - return 'path(%s)' % _base.__repr__(self) - - # Adding a path and a string yields a path. - def __add__(self, more): - try: - resultStr = _base.__add__(self, more) - except TypeError: #Python bug - resultStr = NotImplemented - if resultStr is NotImplemented: - return resultStr - return self.__class__(resultStr) - - def __radd__(self, other): - if isinstance(other, basestring): - return self.__class__(other.__add__(self)) - else: - return NotImplemented - - # The / operator joins paths. - def __div__(self, rel): - """ fp.__div__(rel) == fp / rel == fp.joinpath(rel) - - Join two path components, adding a separator character if - needed. - """ - return self.__class__(os.path.join(self, rel)) - - # Make the / operator work even when true division is enabled. - __truediv__ = __div__ - - def getcwd(cls): - """ Return the current working directory as a path object. """ - return cls(_getcwd()) - getcwd = classmethod(getcwd) - - - # --- Operations on path strings. - - isabs = os.path.isabs - def abspath(self): return self.__class__(os.path.abspath(self)) - def normcase(self): return self.__class__(os.path.normcase(self)) - def normpath(self): return self.__class__(os.path.normpath(self)) - def realpath(self): return self.__class__(os.path.realpath(self)) - def expanduser(self): return self.__class__(os.path.expanduser(self)) - def expandvars(self): return self.__class__(os.path.expandvars(self)) - def dirname(self): return self.__class__(os.path.dirname(self)) - basename = os.path.basename - - def expand(self): - """ Clean up a filename by calling expandvars(), - expanduser(), and normpath() on it. - - This is commonly everything needed to clean up a filename - read from a configuration file, for example. - """ - return self.expandvars().expanduser().normpath() - - def _get_namebase(self): - base, ext = os.path.splitext(self.name) - return base - - def _get_ext(self): - f, ext = os.path.splitext(_base(self)) - return ext - - def _get_drive(self): - drive, r = os.path.splitdrive(self) - return self.__class__(drive) - - parent = property( - dirname, None, None, - """ This path's parent directory, as a new path object. - - For example, path('/usr/local/lib/libpython.so').parent == path('/usr/local/lib') - """) - - name = property( - basename, None, None, - """ The name of this file or directory without the full path. - - For example, path('/usr/local/lib/libpython.so').name == 'libpython.so' - """) - - namebase = property( - _get_namebase, None, None, - """ The same as path.name, but with one file extension stripped off. - - For example, path('/home/guido/python.tar.gz').name == 'python.tar.gz', - but path('/home/guido/python.tar.gz').namebase == 'python.tar' - """) - - ext = property( - _get_ext, None, None, - """ The file extension, for example '.py'. """) - - drive = property( - _get_drive, None, None, - """ The drive specifier, for example 'C:'. - This is always empty on systems that don't use drive specifiers. - """) - - def splitpath(self): - """ p.splitpath() -> Return (p.parent, p.name). """ - parent, child = os.path.split(self) - return self.__class__(parent), child - - def splitdrive(self): - """ p.splitdrive() -> Return (p.drive, ). - - Split the drive specifier from this path. If there is - no drive specifier, p.drive is empty, so the return value - is simply (path(''), p). This is always the case on Unix. - """ - drive, rel = os.path.splitdrive(self) - return self.__class__(drive), rel - - def splitext(self): - """ p.splitext() -> Return (p.stripext(), p.ext). - - Split the filename extension from this path and return - the two parts. Either part may be empty. - - The extension is everything from '.' to the end of the - last path segment. This has the property that if - (a, b) == p.splitext(), then a + b == p. - """ - filename, ext = os.path.splitext(self) - return self.__class__(filename), ext - - def stripext(self): - """ p.stripext() -> Remove one file extension from the path. - - For example, path('/home/guido/python.tar.gz').stripext() - returns path('/home/guido/python.tar'). - """ - return self.splitext()[0] - - if hasattr(os.path, 'splitunc'): - def splitunc(self): - unc, rest = os.path.splitunc(self) - return self.__class__(unc), rest - - def _get_uncshare(self): - unc, r = os.path.splitunc(self) - return self.__class__(unc) - - uncshare = property( - _get_uncshare, None, None, - """ The UNC mount point for this path. - This is empty for paths on local drives. """) - - def joinpath(self, *args): - """ Join two or more path components, adding a separator - character (os.sep) if needed. Returns a new path - object. - """ - return self.__class__(os.path.join(self, *args)) - - def splitall(self): - r""" Return a list of the path components in this path. - - The first item in the list will be a path. Its value will be - either os.curdir, os.pardir, empty, or the root directory of - this path (for example, '/' or 'C:\\'). The other items in - the list will be strings. - - path.path.joinpath(*result) will yield the original path. - """ - parts = [] - loc = self - while loc != os.curdir and loc != os.pardir: - prev = loc - loc, child = prev.splitpath() - if loc == prev: - break - parts.append(child) - parts.append(loc) - parts.reverse() - return parts - - def relpath(self): - """ Return this path as a relative path, - based from the current working directory. - """ - cwd = self.__class__(os.getcwd()) - return cwd.relpathto(self) - - def relpathto(self, dest): - """ Return a relative path from self to dest. - - If there is no relative path from self to dest, for example if - they reside on different drives in Windows, then this returns - dest.abspath(). - """ - origin = self.abspath() - dest = self.__class__(dest).abspath() - - orig_list = origin.normcase().splitall() - # Don't normcase dest! We want to preserve the case. - dest_list = dest.splitall() - - if orig_list[0] != os.path.normcase(dest_list[0]): - # Can't get here from there. - return dest - - # Find the location where the two paths start to differ. - i = 0 - for start_seg, dest_seg in zip(orig_list, dest_list): - if start_seg != os.path.normcase(dest_seg): - break - i += 1 - - # Now i is the point where the two paths diverge. - # Need a certain number of "os.pardir"s to work up - # from the origin to the point of divergence. - segments = [os.pardir] * (len(orig_list) - i) - # Need to add the diverging part of dest_list. - segments += dest_list[i:] - if len(segments) == 0: - # If they happen to be identical, use os.curdir. - relpath = os.curdir - else: - relpath = os.path.join(*segments) - return self.__class__(relpath) - - # --- Listing, searching, walking, and matching - - def listdir(self, pattern=None): - """ D.listdir() -> List of items in this directory. - - Use D.files() or D.dirs() instead if you want a listing - of just files or just subdirectories. - - The elements of the list are path objects. - - With the optional 'pattern' argument, this only lists - items whose names match the given pattern. - """ - names = os.listdir(self) - if pattern is not None: - names = fnmatch.filter(names, pattern) - return [self / child for child in names] - - def dirs(self, pattern=None): - """ D.dirs() -> List of this directory's subdirectories. - - The elements of the list are path objects. - This does not walk recursively into subdirectories - (but see path.walkdirs). - - With the optional 'pattern' argument, this only lists - directories whose names match the given pattern. For - example, d.dirs('build-*'). - """ - return [p for p in self.listdir(pattern) if p.isdir()] - - def files(self, pattern=None): - """ D.files() -> List of the files in this directory. - - The elements of the list are path objects. - This does not walk into subdirectories (see path.walkfiles). - - With the optional 'pattern' argument, this only lists files - whose names match the given pattern. For example, - d.files('*.pyc'). - """ - - return [p for p in self.listdir(pattern) if p.isfile()] - - def walk(self, pattern=None, errors='strict'): - """ D.walk() -> iterator over files and subdirs, recursively. - - The iterator yields path objects naming each child item of - this directory and its descendants. This requires that - D.isdir(). - - This performs a depth-first traversal of the directory tree. - Each directory is returned just before all its children. - - The errors= keyword argument controls behavior when an - error occurs. The default is 'strict', which causes an - exception. The other allowed values are 'warn', which - reports the error via warnings.warn(), and 'ignore'. - """ - if errors not in ('strict', 'warn', 'ignore'): - raise ValueError("invalid errors parameter") - - try: - childList = self.listdir() - except Exception: - if errors == 'ignore': - return - elif errors == 'warn': - warnings.warn( - "Unable to list directory '%s': %s" - % (self, sys.exc_info()[1]), - TreeWalkWarning) - return - else: - raise - - for child in childList: - if pattern is None or child.fnmatch(pattern): - yield child - try: - isdir = child.isdir() - except Exception: - if errors == 'ignore': - isdir = False - elif errors == 'warn': - warnings.warn( - "Unable to access '%s': %s" - % (child, sys.exc_info()[1]), - TreeWalkWarning) - isdir = False - else: - raise - - if isdir: - for item in child.walk(pattern, errors): - yield item - - def walkdirs(self, pattern=None, errors='strict'): - """ D.walkdirs() -> iterator over subdirs, recursively. - - With the optional 'pattern' argument, this yields only - directories whose names match the given pattern. For - example, mydir.walkdirs('*test') yields only directories - with names ending in 'test'. - - The errors= keyword argument controls behavior when an - error occurs. The default is 'strict', which causes an - exception. The other allowed values are 'warn', which - reports the error via warnings.warn(), and 'ignore'. - """ - if errors not in ('strict', 'warn', 'ignore'): - raise ValueError("invalid errors parameter") - - try: - dirs = self.dirs() - except Exception: - if errors == 'ignore': - return - elif errors == 'warn': - warnings.warn( - "Unable to list directory '%s': %s" - % (self, sys.exc_info()[1]), - TreeWalkWarning) - return - else: - raise - - for child in dirs: - if pattern is None or child.fnmatch(pattern): - yield child - for subsubdir in child.walkdirs(pattern, errors): - yield subsubdir - - def walkfiles(self, pattern=None, errors='strict'): - """ D.walkfiles() -> iterator over files in D, recursively. - - The optional argument, pattern, limits the results to files - with names that match the pattern. For example, - mydir.walkfiles('*.tmp') yields only files with the .tmp - extension. - """ - if errors not in ('strict', 'warn', 'ignore'): - raise ValueError("invalid errors parameter") - - try: - childList = self.listdir() - except Exception: - if errors == 'ignore': - return - elif errors == 'warn': - warnings.warn( - "Unable to list directory '%s': %s" - % (self, sys.exc_info()[1]), - TreeWalkWarning) - return - else: - raise - - for child in childList: - try: - isfile = child.isfile() - isdir = not isfile and child.isdir() - except: - if errors == 'ignore': - continue - elif errors == 'warn': - warnings.warn( - "Unable to access '%s': %s" - % (self, sys.exc_info()[1]), - TreeWalkWarning) - continue - else: - raise - - if isfile: - if pattern is None or child.fnmatch(pattern): - yield child - elif isdir: - for f in child.walkfiles(pattern, errors): - yield f - - def fnmatch(self, pattern): - """ Return True if self.name matches the given pattern. - - pattern - A filename pattern with wildcards, - for example '*.py'. - """ - return fnmatch.fnmatch(self.name, pattern) - - def glob(self, pattern): - """ Return a list of path objects that match the pattern. - - pattern - a path relative to this directory, with wildcards. - - For example, path('/users').glob('*/bin/*') returns a list - of all the files users have in their bin directories. - """ - cls = self.__class__ - return [cls(s) for s in glob.glob(_base(self / pattern))] - - - # --- Reading or writing an entire file at once. - - def open(self, mode='r'): - """ Open this file. Return a file object. """ - return file(self, mode) - - def bytes(self): - """ Open this file, read all bytes, return them as a string. """ - f = self.open('rb') - try: - return f.read() - finally: - f.close() - - def write_bytes(self, bytes, append=False): - """ Open this file and write the given bytes to it. - - Default behavior is to overwrite any existing file. - Call p.write_bytes(bytes, append=True) to append instead. - """ - if append: - mode = 'ab' - else: - mode = 'wb' - f = self.open(mode) - try: - f.write(bytes) - finally: - f.close() - - def text(self, encoding=None, errors='strict'): - r""" Open this file, read it in, return the content as a string. - - This uses 'U' mode in Python 2.3 and later, so '\r\n' and '\r' - are automatically translated to '\n'. - - Optional arguments: - - encoding - The Unicode encoding (or character set) of - the file. If present, the content of the file is - decoded and returned as a unicode object; otherwise - it is returned as an 8-bit str. - errors - How to handle Unicode errors; see help(str.decode) - for the options. Default is 'strict'. - """ - if encoding is None: - # 8-bit - f = self.open(_textmode) - try: - return f.read() - finally: - f.close() - else: - # Unicode - f = codecs.open(self, 'r', encoding, errors) - # (Note - Can't use 'U' mode here, since codecs.open - # doesn't support 'U' mode, even in Python 2.3.) - try: - t = f.read() - finally: - f.close() - return (t.replace(u'\r\n', u'\n') - .replace(u'\r\x85', u'\n') - .replace(u'\r', u'\n') - .replace(u'\x85', u'\n') - .replace(u'\u2028', u'\n')) - - def write_text(self, text, encoding=None, errors='strict', linesep=os.linesep, append=False): - r""" Write the given text to this file. - - The default behavior is to overwrite any existing file; - to append instead, use the 'append=True' keyword argument. - - There are two differences between path.write_text() and - path.write_bytes(): newline handling and Unicode handling. - See below. - - Parameters: - - - text - str/unicode - The text to be written. - - - encoding - str - The Unicode encoding that will be used. - This is ignored if 'text' isn't a Unicode string. - - - errors - str - How to handle Unicode encoding errors. - Default is 'strict'. See help(unicode.encode) for the - options. This is ignored if 'text' isn't a Unicode - string. - - - linesep - keyword argument - str/unicode - The sequence of - characters to be used to mark end-of-line. The default is - os.linesep. You can also specify None; this means to - leave all newlines as they are in 'text'. - - - append - keyword argument - bool - Specifies what to do if - the file already exists (True: append to the end of it; - False: overwrite it.) The default is False. - - - --- Newline handling. - - write_text() converts all standard end-of-line sequences - ('\n', '\r', and '\r\n') to your platform's default end-of-line - sequence (see os.linesep; on Windows, for example, the - end-of-line marker is '\r\n'). - - If you don't like your platform's default, you can override it - using the 'linesep=' keyword argument. If you specifically want - write_text() to preserve the newlines as-is, use 'linesep=None'. - - This applies to Unicode text the same as to 8-bit text, except - there are three additional standard Unicode end-of-line sequences: - u'\x85', u'\r\x85', and u'\u2028'. - - (This is slightly different from when you open a file for - writing with fopen(filename, "w") in C or file(filename, 'w') - in Python.) - - - --- Unicode - - If 'text' isn't Unicode, then apart from newline handling, the - bytes are written verbatim to the file. The 'encoding' and - 'errors' arguments are not used and must be omitted. - - If 'text' is Unicode, it is first converted to bytes using the - specified 'encoding' (or the default encoding if 'encoding' - isn't specified). The 'errors' argument applies only to this - conversion. - - """ - if isinstance(text, unicode): - if linesep is not None: - # Convert all standard end-of-line sequences to - # ordinary newline characters. - text = (text.replace(u'\r\n', u'\n') - .replace(u'\r\x85', u'\n') - .replace(u'\r', u'\n') - .replace(u'\x85', u'\n') - .replace(u'\u2028', u'\n')) - text = text.replace(u'\n', linesep) - if encoding is None: - encoding = sys.getdefaultencoding() - bytes = text.encode(encoding, errors) - else: - # It is an error to specify an encoding if 'text' is - # an 8-bit string. - assert encoding is None - - if linesep is not None: - text = (text.replace('\r\n', '\n') - .replace('\r', '\n')) - bytes = text.replace('\n', linesep) - - self.write_bytes(bytes, append) - - def lines(self, encoding=None, errors='strict', retain=True): - r""" Open this file, read all lines, return them in a list. - - Optional arguments: - encoding - The Unicode encoding (or character set) of - the file. The default is None, meaning the content - of the file is read as 8-bit characters and returned - as a list of (non-Unicode) str objects. - errors - How to handle Unicode errors; see help(str.decode) - for the options. Default is 'strict' - retain - If true, retain newline characters; but all newline - character combinations ('\r', '\n', '\r\n') are - translated to '\n'. If false, newline characters are - stripped off. Default is True. - - This uses 'U' mode in Python 2.3 and later. - """ - if encoding is None and retain: - f = self.open(_textmode) - try: - return f.readlines() - finally: - f.close() - else: - return self.text(encoding, errors).splitlines(retain) - - def write_lines(self, lines, encoding=None, errors='strict', - linesep=os.linesep, append=False): - r""" Write the given lines of text to this file. - - By default this overwrites any existing file at this path. - - This puts a platform-specific newline sequence on every line. - See 'linesep' below. - - lines - A list of strings. - - encoding - A Unicode encoding to use. This applies only if - 'lines' contains any Unicode strings. - - errors - How to handle errors in Unicode encoding. This - also applies only to Unicode strings. - - linesep - The desired line-ending. This line-ending is - applied to every line. If a line already has any - standard line ending ('\r', '\n', '\r\n', u'\x85', - u'\r\x85', u'\u2028'), that will be stripped off and - this will be used instead. The default is os.linesep, - which is platform-dependent ('\r\n' on Windows, '\n' on - Unix, etc.) Specify None to write the lines as-is, - like file.writelines(). - - Use the keyword argument append=True to append lines to the - file. The default is to overwrite the file. Warning: - When you use this with Unicode data, if the encoding of the - existing data in the file is different from the encoding - you specify with the encoding= parameter, the result is - mixed-encoding data, which can really confuse someone trying - to read the file later. - """ - if append: - mode = 'ab' - else: - mode = 'wb' - f = self.open(mode) - try: - for line in lines: - isUnicode = isinstance(line, unicode) - if linesep is not None: - # Strip off any existing line-end and add the - # specified linesep string. - if isUnicode: - if line[-2:] in (u'\r\n', u'\x0d\x85'): - line = line[:-2] - elif line[-1:] in (u'\r', u'\n', - u'\x85', u'\u2028'): - line = line[:-1] - else: - if line[-2:] == '\r\n': - line = line[:-2] - elif line[-1:] in ('\r', '\n'): - line = line[:-1] - line += linesep - if isUnicode: - if encoding is None: - encoding = sys.getdefaultencoding() - line = line.encode(encoding, errors) - f.write(line) - finally: - f.close() - - def read_md5(self): - """ Calculate the md5 hash for this file. - - This reads through the entire file. - """ - f = self.open('rb') - try: - m = hashlib.md5() - while True: - d = f.read(8192) - if not d: - break - m.update(d) - finally: - f.close() - return m.digest() - - # --- Methods for querying the filesystem. - - exists = os.path.exists - isdir = os.path.isdir - isfile = os.path.isfile - islink = os.path.islink - ismount = os.path.ismount - - if hasattr(os.path, 'samefile'): - samefile = os.path.samefile - - getatime = os.path.getatime - atime = property( - getatime, None, None, - """ Last access time of the file. """) - - getmtime = os.path.getmtime - mtime = property( - getmtime, None, None, - """ Last-modified time of the file. """) - - if hasattr(os.path, 'getctime'): - getctime = os.path.getctime - ctime = property( - getctime, None, None, - """ Creation time of the file. """) - - getsize = os.path.getsize - size = property( - getsize, None, None, - """ Size of the file, in bytes. """) - - if hasattr(os, 'access'): - def access(self, mode): - """ Return true if current user has access to this path. - - mode - One of the constants os.F_OK, os.R_OK, os.W_OK, os.X_OK - """ - return os.access(self, mode) - - def stat(self): - """ Perform a stat() system call on this path. """ - return os.stat(self) - - def lstat(self): - """ Like path.stat(), but do not follow symbolic links. """ - return os.lstat(self) - - def get_owner(self): - r""" Return the name of the owner of this file or directory. - - This follows symbolic links. - - On Windows, this returns a name of the form ur'DOMAIN\User Name'. - On Windows, a group can own a file or directory. - """ - if os.name == 'nt': - if win32security is None: - raise Exception("path.owner requires win32all to be installed") - desc = win32security.GetFileSecurity( - self, win32security.OWNER_SECURITY_INFORMATION) - sid = desc.GetSecurityDescriptorOwner() - account, domain, typecode = win32security.LookupAccountSid(None, sid) - return domain + u'\\' + account - else: - if pwd is None: - raise NotImplementedError("path.owner is not implemented on this platform.") - st = self.stat() - return pwd.getpwuid(st.st_uid).pw_name - - owner = property( - get_owner, None, None, - """ Name of the owner of this file or directory. """) - - if hasattr(os, 'statvfs'): - def statvfs(self): - """ Perform a statvfs() system call on this path. """ - return os.statvfs(self) - - if hasattr(os, 'pathconf'): - def pathconf(self, name): - return os.pathconf(self, name) - - - # --- Modifying operations on files and directories - - def utime(self, times): - """ Set the access and modified times of this file. """ - os.utime(self, times) - - def chmod(self, mode): - os.chmod(self, mode) - - if hasattr(os, 'chown'): - def chown(self, uid, gid): - os.chown(self, uid, gid) - - def rename(self, new): - os.rename(self, new) - - def renames(self, new): - os.renames(self, new) - - - # --- Create/delete operations on directories - - def mkdir(self, mode=0777): - os.mkdir(self, mode) - - def makedirs(self, mode=0777): - os.makedirs(self, mode) - - def rmdir(self): - os.rmdir(self) - - def removedirs(self): - os.removedirs(self) - - - # --- Modifying operations on files - - def touch(self): - """ Set the access/modified times of this file to the current time. - Create the file if it does not exist. - """ - fd = os.open(self, os.O_WRONLY | os.O_CREAT, 0666) - os.close(fd) - os.utime(self, None) - - def remove(self): - os.remove(self) - - def unlink(self): - os.unlink(self) - - - # --- Links - - if hasattr(os, 'link'): - def link(self, newpath): - """ Create a hard link at 'newpath', pointing to this file. """ - os.link(self, newpath) - - if hasattr(os, 'symlink'): - def symlink(self, newlink): - """ Create a symbolic link at 'newlink', pointing here. """ - os.symlink(self, newlink) - - if hasattr(os, 'readlink'): - def readlink(self): - """ Return the path to which this symbolic link points. - - The result may be an absolute or a relative path. - """ - return self.__class__(os.readlink(self)) - - def readlinkabs(self): - """ Return the path to which this symbolic link points. - - The result is always an absolute path. - """ - p = self.readlink() - if p.isabs(): - return p - else: - return (self.parent / p).abspath() - - - # --- High-level functions from shutil - - copyfile = shutil.copyfile - copymode = shutil.copymode - copystat = shutil.copystat - copy = shutil.copy - copy2 = shutil.copy2 - copytree = shutil.copytree - if hasattr(shutil, 'move'): - move = shutil.move - rmtree = shutil.rmtree - - - # --- Special stuff from os - - if hasattr(os, 'chroot'): - def chroot(self): - os.chroot(self) - - if hasattr(os, 'startfile'): - def startfile(self): - os.startfile(self) - diff --git a/src/calibre/translations/automatic.py b/src/calibre/translations/automatic.py deleted file mode 100644 index 0ef1553061..0000000000 --- a/src/calibre/translations/automatic.py +++ /dev/null @@ -1,121 +0,0 @@ - -import sys, glob, re - -import mechanize - -URL = 'http://translate.google.com/translate_t?text=%(text)s&langpair=en|%(lang)s&oe=UTF8' - -def browser(): - opener = mechanize.Browser() - opener.set_handle_refresh(True) - opener.set_handle_robots(False) - opener.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; i686 Linux; en_US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4')] - return opener - - -class PoFile(object): - - SANITIZE = re.compile(r'&|<[^<>]+>|\%') - STRING = re.compile(r'"(.*)"') - - def __init__(self, po_file): - self.po_file = open(po_file, 'r+b') - self.browser = browser() - self.entries = [] - self.read() - - def sanitize_line(self, line): - return self.SANITIZE.sub(line) - - def read(self): - translated_lines = [] - self.po_file.seek(0) - - ID = 0 - STR = 1 - WHR = 2 - - mode = None - where, msgid, msgstr, fuzzy = [], [], [], False - - for line in self.po_file.readlines(): - prev_mode = mode - if line.startswith('#:'): - mode = WHR - elif line.startswith('msgid'): - mode = ID - elif line.startswith('msgstr'): - mode = STR - elif line.startswith('#,'): - fuzzy = True - continue - elif line.startswith('#') or not line.strip(): - mode = None - - - if mode != prev_mode: - if prev_mode == STR: - self.add_entry(where, fuzzy, msgid, msgstr) - where, msgid, msgstr, fuzzy = [], [], [], False - - if mode == WHR: - where.append(line[2:].strip()) - elif mode == ID: - msgid.append(self.get_string(line)) - elif mode == STR: - msgstr.append(self.get_string(line)) - elif mode == None: - self.add_line(line) - - def get_string(self, line): - return self.STRING.search(line).group(1) - - def add_line(self, line): - self.entries.append(line.strip()) - - def add_entry(self, where, fuzzy, msgid, msgstr): - self.entries.append(Entry(where, fuzzy, msgid, msgstr)) - - def __str__(self): - return '\n'.join([str(i) for i in self.entries]) + '\n' - - -class Entry(object): - - def __init__(self, where, fuzzy, msgid, msgstr, encoding='utf-8'): - self.fuzzy = fuzzy - self.where = [i.decode(encoding) for i in where] - self.msgid = [i.decode(encoding) for i in msgid] - self.msgstr = [i.decode(encoding) for i in msgstr] - self.encoding = encoding - - def __str__(self): - ans = [] - for line in self.where: - ans.append('#: ' + line.encode(self.encoding)) - if self.fuzzy: - ans.append('#, fuzzy') - first = True - for line in self.msgid: - prefix = 'msgid ' if first else '' - ans.append(prefix + '"%s"'%line.encode(self.encoding)) - first = False - first = True - for line in self.msgstr: - prefix = 'msgstr ' if first else '' - ans.append(prefix + '"%s"'%line.encode(self.encoding)) - first = False - return '\n'.join(ans) - - - -def main(): - po_files = glob.glob('*.po') - for po_file in po_files: - PoFile(po_file) - pass - -if __name__ == '__main__': - pof = PoFile('de.po') - open('/tmp/de.po', 'wb').write(str(pof)) - #sys.exit(main()) \ No newline at end of file diff --git a/src/calibre/utils/localization.py b/src/calibre/utils/localization.py index b4323e0a65..115bb81e4c 100644 --- a/src/calibre/utils/localization.py +++ b/src/calibre/utils/localization.py @@ -15,7 +15,10 @@ def available_translations(): global _available_translations if _available_translations is None: stats = P('localization/stats.pickle') - stats = cPickle.load(open(stats, 'rb')) + if os.path.exists(stats): + stats = cPickle.load(open(stats, 'rb')) + else: + stats = {} _available_translations = [x for x in stats if stats[x] > 0.1] return _available_translations diff --git a/src/calibre/utils/pyparsing.py b/src/calibre/utils/pyparsing.py index 5404758186..9d12066e7f 100644 --- a/src/calibre/utils/pyparsing.py +++ b/src/calibre/utils/pyparsing.py @@ -85,7 +85,7 @@ __all__ = [ 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', -'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', +'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 'indentedBlock', 'originalTextFor', @@ -425,7 +425,7 @@ class ParseResults(object): self[k] = v if isinstance(v[0],ParseResults): v[0].__parent = wkref(self) - + self.__toklist += other.__toklist self.__accumNames.update( other.__accumNames ) del other @@ -3231,12 +3231,12 @@ def originalTextFor(expr, asString=True): restore the parsed fields of an HTML start tag into the raw tag text itself, or to revert separate tokens with intervening whitespace back to the original matching input text. Simpler to use than the parse action keepOriginalText, and does not - require the inspect module to chase up the call stack. By default, returns a - string containing the original parsed text. - - If the optional asString argument is passed as False, then the return value is a - ParseResults containing any results names that were originally matched, and a - single token containing the original matched text from the input string. So if + require the inspect module to chase up the call stack. By default, returns a + string containing the original parsed text. + + If the optional asString argument is passed as False, then the return value is a + ParseResults containing any results names that were originally matched, and a + single token containing the original matched text from the input string. So if the expression passed to originalTextFor contains expressions with defined results names, you must set asString to False if you want to preserve those results name values.""" @@ -3252,7 +3252,7 @@ def originalTextFor(expr, asString=True): del t["_original_end"] matchExpr.setParseAction(extractText) return matchExpr - + # convenience constants for positional expressions empty = Empty().setName("empty") lineStart = LineStart().setName("lineStart") @@ -3532,7 +3532,7 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): ).setParseAction(lambda t:t[0].strip())) else: if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + + content = (Combine(OneOrMore(~ignoreExpr + ~Literal(opener) + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) ).setParseAction(lambda t:t[0].strip())) diff --git a/src/calibre/utils/rss_gen.py b/src/calibre/utils/rss_gen.py index fc1f1cf245..125b6d4eca 100644 --- a/src/calibre/utils/rss_gen.py +++ b/src/calibre/utils/rss_gen.py @@ -20,6 +20,7 @@ class WriteXmlMixin: def to_xml(self, encoding = "iso-8859-1"): try: import cStringIO as StringIO + StringIO except ImportError: import StringIO f = StringIO.StringIO() @@ -64,7 +65,7 @@ def _format_date(dt): "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1], dt.year, dt.hour, dt.minute, dt.second) - + ## # A couple simple wrapper objects for the fields which # take a simple value other than a string. @@ -72,7 +73,7 @@ class IntElement: """implements the 'publish' API for integers Takes the tag name and the integer value to publish. - + (Could be used for anything which uses str() to be published to text for XML.) """ @@ -138,7 +139,7 @@ class Image: self.width = width self.height = height self.description = description - + def publish(self, handler): handler.startElement("image", self.element_attrs) @@ -150,7 +151,7 @@ class Image: if isinstance(width, int): width = IntElement("width", width) _opt_element(handler, "width", width) - + height = self.height if isinstance(height, int): height = IntElement("height", height) @@ -196,7 +197,7 @@ class TextInput: _element(handler, "name", self.name) _element(handler, "link", self.link) handler.endElement("textInput") - + class Enclosure: """Publish an enclosure""" @@ -255,7 +256,7 @@ class RSS2(WriteXmlMixin): Stores the channel attributes, with the "category" elements under ".categories" and the RSS items under ".items". """ - + rss_attrs = {"version": "2.0"} element_attrs = {} def __init__(self, @@ -269,7 +270,7 @@ class RSS2(WriteXmlMixin): webMaster = None, pubDate = None, # a datetime, *in* *GMT* lastBuildDate = None, # a datetime - + categories = None, # list of strings or Category generator = _generator_name, docs = "http://blogs.law.harvard.edu/tech/rss", @@ -294,7 +295,7 @@ class RSS2(WriteXmlMixin): self.webMaster = webMaster self.pubDate = pubDate self.lastBuildDate = lastBuildDate - + if categories is None: categories = [] self.categories = categories @@ -320,7 +321,7 @@ class RSS2(WriteXmlMixin): _element(handler, "description", self.description) self.publish_extensions(handler) - + _opt_element(handler, "language", self.language) _opt_element(handler, "copyright", self.copyright) _opt_element(handler, "managingEditor", self.managingEditor) @@ -374,8 +375,8 @@ class RSS2(WriteXmlMixin): # output after the three required fields. pass - - + + class RSSItem(WriteXmlMixin): """Publish an RSS Item""" element_attrs = {} @@ -391,7 +392,7 @@ class RSSItem(WriteXmlMixin): pubDate = None, # a datetime source = None, # a Source ): - + if title is None and description is None: raise TypeError( "must define at least one of 'title' or 'description'") @@ -421,7 +422,7 @@ class RSSItem(WriteXmlMixin): if isinstance(category, basestring): category = Category(category) category.publish(handler) - + _opt_element(handler, "comments", self.comments) if self.enclosure is not None: self.enclosure.publish(handler) @@ -434,7 +435,7 @@ class RSSItem(WriteXmlMixin): if self.source is not None: self.source.publish(handler) - + handler.endElement("item") def publish_extensions(self, handler): diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index adbc69c4e1..1513948bed 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -57,13 +57,13 @@ recipe_modules = ['recipe_' + r for r in ( 'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti', 'esquire', 'livemint', 'thedgesingapore', 'darknet', 'rga', 'intelligencer', 'theoldfoodie', 'hln_be', 'honvedelem', + 'the_new_republic', )] import re, imp, inspect, time, os from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup -from calibre.path import path from calibre.ptempfile import PersistentTemporaryDirectory from calibre import __appname__, english_sort @@ -102,8 +102,8 @@ def compile_recipe(src): ''' global _tdir, _crep if _tdir is None or not os.path.exists(_tdir): - _tdir = path(PersistentTemporaryDirectory('_recipes')) - temp = _tdir/('recipe%d.py'%_crep) + _tdir = PersistentTemporaryDirectory('_recipes') + temp = os.path.join(_tdir, 'recipe%d.py'%_crep) _crep += 1 if not isinstance(src, unicode): match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200]) @@ -118,8 +118,9 @@ def compile_recipe(src): src = src.replace('from libprs500', 'from calibre').encode('utf-8') f.write(src) f.close() - module = imp.find_module(temp.namebase, [temp.dirname()]) - module = imp.load_module(temp.namebase, *module) + module = imp.find_module(os.path.splitext(os.path.basename(temp))[0], + [os.path.dirname(temp)]) + module = imp.load_module(os.path.splitext(os.path.basename(temp))[0], *module) classes = inspect.getmembers(module, lambda x : inspect.isclass(x) and \ issubclass(x, (BasicNewsRecipe,)) and \ @@ -148,6 +149,7 @@ _titles.sort(cmp=english_sort) titles = _titles def migrate_automatic_profile_to_automatic_recipe(profile): + BeautifulSoup oprofile = profile profile = compile_recipe(profile) if 'BasicUserProfile' not in profile.__name__: @@ -165,3 +167,4 @@ class BasicUserRecipe%d(AutomaticNewsRecipe): '''%(int(time.time()), repr(profile.title), profile.oldest_article, profile.max_articles_per_feed, profile.summary_length, repr(profile.feeds)) + diff --git a/src/calibre/web/feeds/recipes/recipe_24sata.py b/src/calibre/web/feeds/recipes/recipe_24sata.py index 637d0ce626..5fdc405950 100644 --- a/src/calibre/web/feeds/recipes/recipe_24sata.py +++ b/src/calibre/web/feeds/recipes/recipe_24sata.py @@ -1,61 +1,61 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' - -''' -24sata.hr -''' - -import re -from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag - -class Cro24Sata(BasicNewsRecipe): - title = '24 Sata - Hr' - __author__ = 'Darko Miletic' - description = "News Portal from Croatia" - publisher = '24sata.hr' - category = 'news, politics, Croatia' - oldest_article = 2 - max_articles_per_feed = 100 - delay = 4 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +24sata.hr +''' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class Cro24Sata(BasicNewsRecipe): + title = '24 Sata - Hr' + __author__ = 'Darko Miletic' + description = "News Portal from Croatia" + publisher = '24sata.hr' + category = 'news, politics, Croatia' + oldest_article = 2 + max_articles_per_feed = 100 + delay = 4 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False language = 'hr' - - lang = 'hr-HR' - - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' - - conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True - } - - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - remove_tags = [ - dict(name=['object','link','embed']) - ,dict(name='table', attrs={'class':'enumbox'}) - ] - - feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')] - - def preprocess_html(self, soup): - soup.html['lang'] = self.lang - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) - for item in soup.findAll(style=True): - del item['style'] - return soup - - def print_version(self, url): - return url + '&action=ispis' - \ No newline at end of file + + lang = 'hr-HR' + + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : lang + , 'pretty_print' : True + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + remove_tags = [ + dict(name=['object','link','embed']) + ,dict(name='table', attrs={'class':'enumbox'}) + ] + + feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')] + + def preprocess_html(self, soup): + soup.html['lang'] = self.lang + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + for item in soup.findAll(style=True): + del item['style'] + return soup + + def print_version(self, url): + return url + '&action=ispis' + diff --git a/src/calibre/web/feeds/recipes/recipe_24sata_rs.py b/src/calibre/web/feeds/recipes/recipe_24sata_rs.py index 9c14527a8b..b306c3ee6c 100644 --- a/src/calibre/web/feeds/recipes/recipe_24sata_rs.py +++ b/src/calibre/web/feeds/recipes/recipe_24sata_rs.py @@ -1,68 +1,68 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' - -''' -24sata.rs -''' - -import re -from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag - -class Ser24Sata(BasicNewsRecipe): - title = '24 Sata - Sr' - __author__ = 'Darko Miletic' - description = '24 sata portal vesti iz Srbije' - publisher = 'Ringier d.o.o.' - category = 'news, politics, entertainment, Serbia' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +24sata.rs +''' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class Ser24Sata(BasicNewsRecipe): + title = '24 Sata - Sr' + __author__ = 'Darko Miletic' + description = '24 sata portal vesti iz Srbije' + publisher = 'Ringier d.o.o.' + category = 'news, politics, entertainment, Serbia' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False language = 'sr' - - lang = 'sr-Latn-RS' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' - - conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True - } - - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')] - - def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - - attribs = [ 'style','font','valign' - ,'colspan','width','height' - ,'rowspan','summary','align' - ,'cellspacing','cellpadding' - ,'frames','rules','border' - ] - for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): - item.name = 'div' - for attrib in attribs: - if item.has_key(attrib): - del item[attrib] - - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) - return self.adeify_images(soup) - - def print_version(self, url): - article = url.partition('#')[0] - article_id = article.partition('id=')[2] - return 'http://www.24sata.rs/_print.php?id=' + article_id - + + lang = 'sr-Latn-RS' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : lang + , 'pretty_print' : True + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')] + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + + attribs = [ 'style','font','valign' + ,'colspan','width','height' + ,'rowspan','summary','align' + ,'cellspacing','cellpadding' + ,'frames','rules','border' + ] + for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): + item.name = 'div' + for attrib in attribs: + if item.has_key(attrib): + del item[attrib] + + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return self.adeify_images(soup) + + def print_version(self, url): + article = url.partition('#')[0] + article_id = article.partition('id=')[2] + return 'http://www.24sata.rs/_print.php?id=' + article_id + diff --git a/src/calibre/web/feeds/recipes/recipe_7dias.py b/src/calibre/web/feeds/recipes/recipe_7dias.py index 2507687677..e111617b8d 100644 --- a/src/calibre/web/feeds/recipes/recipe_7dias.py +++ b/src/calibre/web/feeds/recipes/recipe_7dias.py @@ -1,72 +1,72 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -elargentino.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag - -class SieteDias(BasicNewsRecipe): - title = '7 dias' - __author__ = 'Darko Miletic' - description = 'Revista Argentina' - publisher = 'ElArgentino.com' - category = 'news, politics, show, Argentina' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +elargentino.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class SieteDias(BasicNewsRecipe): + title = '7 dias' + __author__ = 'Darko Miletic' + description = 'Revista Argentina' + publisher = 'ElArgentino.com' + category = 'news, politics, show, Argentina' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' language = 'es' - - lang = 'es-AR' - direction = 'ltr' - INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html' - extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} ' - - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' - - keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})] - - remove_tags = [dict(name='link')] - - feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=125&Content-Type=text/xml&ChannelDesc=7%20D%C3%ADas')] - - def print_version(self, url): - main, sep, article_part = url.partition('/nota-') - article_id, rsep, rrest = article_part.partition('-') - return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - soup.html['lang'] = self.lang - soup.html['dir' ] = self.direction - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) - return soup - - def get_cover_url(self): - cover_url = None - soup = self.index_to_soup(self.INDEX) - cover_item = soup.find('div',attrs={'class':'colder'}) - if cover_item: - clean_url = self.image_url_processor(None,cover_item.div.img['src']) - cover_url = 'http://www.elargentino.com' + clean_url + '&height=600' - return cover_url - - def image_url_processor(self, baseurl, url): - base, sep, rest = url.rpartition('?Id=') - img, sep2, rrest = rest.partition('&') - return base + sep + img + + lang = 'es-AR' + direction = 'ltr' + INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html' + extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} ' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' + + keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})] + + remove_tags = [dict(name='link')] + + feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=125&Content-Type=text/xml&ChannelDesc=7%20D%C3%ADas')] + + def print_version(self, url): + main, sep, article_part = url.partition('/nota-') + article_id, rsep, rrest = article_part.partition('-') + return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + soup.html['lang'] = self.lang + soup.html['dir' ] = self.direction + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return soup + + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup(self.INDEX) + cover_item = soup.find('div',attrs={'class':'colder'}) + if cover_item: + clean_url = self.image_url_processor(None,cover_item.div.img['src']) + cover_url = 'http://www.elargentino.com' + clean_url + '&height=600' + return cover_url + + def image_url_processor(self, baseurl, url): + base, sep, rest = url.rpartition('?Id=') + img, sep2, rrest = rest.partition('&') + return base + sep + img diff --git a/src/calibre/web/feeds/recipes/recipe_accountancyage.py b/src/calibre/web/feeds/recipes/recipe_accountancyage.py index b6be176083..a7264499c1 100644 --- a/src/calibre/web/feeds/recipes/recipe_accountancyage.py +++ b/src/calibre/web/feeds/recipes/recipe_accountancyage.py @@ -1,59 +1,59 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' -''' -www.accountancyage.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag - -class AccountancyAge(BasicNewsRecipe): - title = 'Accountancy Age' - __author__ = 'Darko Miletic' - description = 'business news' - publisher = 'accountancyage.com' - category = 'news, politics, finances' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - simultaneous_downloads = 1 - encoding = 'utf-8' - lang = 'en' +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2008-2009, Darko Miletic ' +''' +www.accountancyage.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class AccountancyAge(BasicNewsRecipe): + title = 'Accountancy Age' + __author__ = 'Darko Miletic' + description = 'business news' + publisher = 'accountancyage.com' + category = 'news, politics, finances' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + simultaneous_downloads = 1 + encoding = 'utf-8' + lang = 'en' language = 'en' - - - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - - keep_only_tags = [dict(name='div', attrs={'class':'bodycol'})] - remove_tags = [dict(name=['embed','object'])] - remove_tags_after = dict(name='div', attrs={'id':'permalink'}) - remove_tags_before = dict(name='div', attrs={'class':'gap6'}) - - feeds = [(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')] - - def print_version(self, url): - rest, sep, miss = url.rpartition('/') - rr, ssep, artid = rest.rpartition('/') - return u'http://www.accountancyage.com/articles/print/' + artid - - def get_article_url(self, article): - return article.get('guid', None) - - def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) - return self.adeify_images(soup) - + + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [dict(name='div', attrs={'class':'bodycol'})] + remove_tags = [dict(name=['embed','object'])] + remove_tags_after = dict(name='div', attrs={'id':'permalink'}) + remove_tags_before = dict(name='div', attrs={'class':'gap6'}) + + feeds = [(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')] + + def print_version(self, url): + rest, sep, miss = url.rpartition('/') + rr, ssep, artid = rest.rpartition('/') + return u'http://www.accountancyage.com/articles/print/' + artid + + def get_article_url(self, article): + return article.get('guid', None) + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return self.adeify_images(soup) + diff --git a/src/calibre/web/feeds/recipes/recipe_adventuregamers.py b/src/calibre/web/feeds/recipes/recipe_adventuregamers.py index 86e741c441..1cde045953 100644 --- a/src/calibre/web/feeds/recipes/recipe_adventuregamers.py +++ b/src/calibre/web/feeds/recipes/recipe_adventuregamers.py @@ -1,77 +1,77 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.adventuregamers.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class AdventureGamers(BasicNewsRecipe): - title = u'Adventure Gamers' +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.adventuregamers.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdventureGamers(BasicNewsRecipe): + title = u'Adventure Gamers' language = 'en' - - __author__ = 'Darko Miletic' - description = 'Adventure games portal' - publisher = 'Adventure Gamers' - category = 'news, games, adventure, technology' + + __author__ = 'Darko Miletic' + description = 'Adventure games portal' + publisher = 'Adventure Gamers' + category = 'news, games, adventure, technology' language = 'en' - - oldest_article = 10 - delay = 10 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'cp1252' - remove_javascript = True - use_embedded_content = False - INDEX = u'http://www.adventuregamers.com' - - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - - keep_only_tags = [ - dict(name='div', attrs={'class':'content_middle'}) - ] - - remove_tags = [ - dict(name=['object','link','embed','form']) - ,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']}) - ] - - remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})] - - feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')] - - def get_article_url(self, article): - return article.get('guid', None) - - def append_page(self, soup, appendtag, position): - pager = soup.find('div',attrs={'class':'toolbar_fat_next'}) - if pager: - nexturl = self.INDEX + pager.a['href'] - soup2 = self.index_to_soup(nexturl) - texttag = soup2.find('div', attrs={'class':'bodytext'}) - for it in texttag.findAll(style=True): - del it['style'] - newpos = len(texttag.contents) - self.append_page(soup2,texttag,newpos) - texttag.extract() - appendtag.insert(position,texttag) - - - def preprocess_html(self, soup): - mtag = '\n' - soup.head.insert(0,mtag) - for item in soup.findAll(style=True): - del item['style'] - self.append_page(soup, soup.body, 3) - pager = soup.find('div',attrs={'class':'toolbar_fat'}) - if pager: - pager.extract() - return soup + + oldest_article = 10 + delay = 10 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'cp1252' + remove_javascript = True + use_embedded_content = False + INDEX = u'http://www.adventuregamers.com' + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [ + dict(name='div', attrs={'class':'content_middle'}) + ] + + remove_tags = [ + dict(name=['object','link','embed','form']) + ,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']}) + ] + + remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})] + + feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')] + + def get_article_url(self, article): + return article.get('guid', None) + + def append_page(self, soup, appendtag, position): + pager = soup.find('div',attrs={'class':'toolbar_fat_next'}) + if pager: + nexturl = self.INDEX + pager.a['href'] + soup2 = self.index_to_soup(nexturl) + texttag = soup2.find('div', attrs={'class':'bodytext'}) + for it in texttag.findAll(style=True): + del it['style'] + newpos = len(texttag.contents) + self.append_page(soup2,texttag,newpos) + texttag.extract() + appendtag.insert(position,texttag) + + + def preprocess_html(self, soup): + mtag = '\n' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + self.append_page(soup, soup.body, 3) + pager = soup.find('div',attrs={'class':'toolbar_fat'}) + if pager: + pager.extract() + return soup diff --git a/src/calibre/web/feeds/recipes/recipe_ambito.py b/src/calibre/web/feeds/recipes/recipe_ambito.py index f0fb73e873..7074463e34 100644 --- a/src/calibre/web/feeds/recipes/recipe_ambito.py +++ b/src/calibre/web/feeds/recipes/recipe_ambito.py @@ -1,62 +1,61 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' -''' -ambito.com -''' - +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2008-2009, Darko Miletic ' +''' +ambito.com +''' + from calibre.web.feeds.news import BasicNewsRecipe -class Ambito(BasicNewsRecipe): - title = 'Ambito.com' - __author__ = 'Darko Miletic' - description = 'Informacion Libre las 24 horas' - publisher = 'Ambito.com' - category = 'news, politics, Argentina' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'iso-8859-1' - cover_url = 'http://www.ambito.com/img/logo_.jpg' - remove_javascript = True - use_embedded_content = False - - html2lrf_options = [ +class Ambito(BasicNewsRecipe): + title = 'Ambito.com' + __author__ = 'Darko Miletic' + description = 'Informacion Libre las 24 horas' + publisher = 'Ambito.com' + category = 'news, politics, Argentina' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'iso-8859-1' + cover_url = 'http://www.ambito.com/img/logo_.jpg' + remove_javascript = True + use_embedded_content = False + + html2lrf_options = [ '--comment', description - , '--category', category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - - keep_only_tags = [dict(name='div', attrs={'align':'justify'})] - - remove_tags = [dict(name=['object','link'])] - - feeds = [ - (u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' ) - ,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' ) - ,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' ) - ,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General') - ,(u'Agro' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' ) - ,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' ) - ,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' ) - ,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' ) - ,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnologia' ) - ,(u'Salud' , u'http://www.ambito.com/rss/noticias.asp?S=Salud' ) - ,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' ) - ] - - def print_version(self, url): - return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?') - - def preprocess_html(self, soup): - mtag = '' - soup.head.insert(0,mtag) - for item in soup.findAll(style=True): - del item['style'] - return soup - + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [dict(name='div', attrs={'align':'justify'})] + + remove_tags = [dict(name=['object','link'])] + + feeds = [ + (u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' ) + ,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' ) + ,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' ) + ,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General') + ,(u'Agro' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' ) + ,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' ) + ,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' ) + ,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' ) + ,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnologia' ) + ,(u'Salud' , u'http://www.ambito.com/rss/noticias.asp?S=Salud' ) + ,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' ) + ] + + def print_version(self, url): + return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?') + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + language = 'es' - \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_amspec.py b/src/calibre/web/feeds/recipes/recipe_amspec.py index 967e4a542a..62bec5ae18 100644 --- a/src/calibre/web/feeds/recipes/recipe_amspec.py +++ b/src/calibre/web/feeds/recipes/recipe_amspec.py @@ -1,55 +1,55 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -spectator.org -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class TheAmericanSpectator(BasicNewsRecipe): - title = 'The American Spectator' - __author__ = 'Darko Miletic' +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +spectator.org +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TheAmericanSpectator(BasicNewsRecipe): + title = 'The American Spectator' + __author__ = 'Darko Miletic' language = 'en' - - description = 'News from USA' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - INDEX = 'http://spectator.org' - - html2lrf_options = [ - '--comment' , description - , '--category' , 'news, politics, USA' - , '--publisher' , title - ] - - keep_only_tags = [ - dict(name='div', attrs={'class':'post inner'}) - ,dict(name='div', attrs={'class':'author-bio'}) - ] - - remove_tags = [ - dict(name='object') - ,dict(name='div', attrs={'class':'col3' }) - ,dict(name='div', attrs={'class':'post-options' }) - ,dict(name='p' , attrs={'class':'letter-editor'}) - ,dict(name='div', attrs={'class':'social' }) - ] - - feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')] - - def get_cover_url(self): - cover_url = None - soup = self.index_to_soup(self.INDEX) - link_item = soup.find('a',attrs={'class':'cover'}) - if link_item: - soup2 = self.index_to_soup(link_item['href']) - link_item2 = soup2.find('div',attrs={'class':'post inner issues'}) - cover_url = self.INDEX + link_item2.img['src'] - return cover_url - - def print_version(self, url): - return url + '/print' + + description = 'News from USA' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + INDEX = 'http://spectator.org' + + html2lrf_options = [ + '--comment' , description + , '--category' , 'news, politics, USA' + , '--publisher' , title + ] + + keep_only_tags = [ + dict(name='div', attrs={'class':'post inner'}) + ,dict(name='div', attrs={'class':'author-bio'}) + ] + + remove_tags = [ + dict(name='object') + ,dict(name='div', attrs={'class':'col3' }) + ,dict(name='div', attrs={'class':'post-options' }) + ,dict(name='p' , attrs={'class':'letter-editor'}) + ,dict(name='div', attrs={'class':'social' }) + ] + + feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')] + + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup(self.INDEX) + link_item = soup.find('a',attrs={'class':'cover'}) + if link_item: + soup2 = self.index_to_soup(link_item['href']) + link_item2 = soup2.find('div',attrs={'class':'post inner issues'}) + cover_url = self.INDEX + link_item2.img['src'] + return cover_url + + def print_version(self, url): + return url + '/print' diff --git a/src/calibre/web/feeds/recipes/recipe_axxon_news.py b/src/calibre/web/feeds/recipes/recipe_axxon_news.py index ec5d260aed..a9a99e1de1 100644 --- a/src/calibre/web/feeds/recipes/recipe_axxon_news.py +++ b/src/calibre/web/feeds/recipes/recipe_axxon_news.py @@ -1,62 +1,62 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -axxon.com.ar -''' -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag - -class Axxon_news(BasicNewsRecipe): - title = 'Axxon noticias' - __author__ = 'Darko Miletic' - description = 'Axxon, Ciencia Ficcion en Bits' - publisher = 'Axxon' - category = 'news, SF, Argentina, science, movies' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = False - use_embedded_content = False +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +axxon.com.ar +''' +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class Axxon_news(BasicNewsRecipe): + title = 'Axxon noticias' + __author__ = 'Darko Miletic' + description = 'Axxon, Ciencia Ficcion en Bits' + publisher = 'Axxon' + category = 'news, SF, Argentina, science, movies' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = False + use_embedded_content = False language = 'es' - - lang = 'es-AR' - - conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True - } - - - keep_only_tags = [dict(name='div', attrs={'class':'post'})] - - remove_tags = [dict(name=['object','link','iframe','embed'])] - - feeds = [(u'Noticias', u'http://axxon.com.ar/noticias/feed/')] - - remove_attributes = ['style','width','height','font','border','align'] - - - def adeify_images2(cls, soup): - for item in soup.findAll('img'): - for attrib in ['height','width','border','align','style']: - if item.has_key(attrib): - del item[attrib] - oldParent = item.parent - if oldParent.name == 'a': - oldParent.name == 'p' - myIndex = oldParent.contents.index(item) - brtag = Tag(soup,'br') - oldParent.insert(myIndex+1,brtag) - return soup - - def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - soup.html.insert(0,mlang) - return self.adeify_images2(soup) - + + lang = 'es-AR' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : lang + , 'pretty_print' : True + } + + + keep_only_tags = [dict(name='div', attrs={'class':'post'})] + + remove_tags = [dict(name=['object','link','iframe','embed'])] + + feeds = [(u'Noticias', u'http://axxon.com.ar/noticias/feed/')] + + remove_attributes = ['style','width','height','font','border','align'] + + + def adeify_images2(cls, soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align','style']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + if oldParent.name == 'a': + oldParent.name == 'p' + myIndex = oldParent.contents.index(item) + brtag = Tag(soup,'br') + oldParent.insert(myIndex+1,brtag) + return soup + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + soup.html.insert(0,mlang) + return self.adeify_images2(soup) + diff --git a/src/calibre/web/feeds/recipes/recipe_azstarnet.py b/src/calibre/web/feeds/recipes/recipe_azstarnet.py index 391f21ef56..9b18081598 100644 --- a/src/calibre/web/feeds/recipes/recipe_azstarnet.py +++ b/src/calibre/web/feeds/recipes/recipe_azstarnet.py @@ -1,65 +1,65 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.azstarnet.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class Azstarnet(BasicNewsRecipe): - title = 'Arizona Daily Star' - __author__ = 'Darko Miletic' - description = 'news from Arizona' +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.azstarnet.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Azstarnet(BasicNewsRecipe): + title = 'Arizona Daily Star' + __author__ = 'Darko Miletic' + description = 'news from Arizona' language = 'en' - - publisher = 'azstarnet.com' - category = 'news, politics, Arizona, USA' - delay = 1 - oldest_article = 1 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - needs_subscription = True - remove_javascript = True - - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - - def get_browser(self): - br = BasicNewsRecipe.get_browser() - if self.username is not None and self.password is not None: - br.open('http://azstarnet.com/registration/retro.php') - br.select_form(nr=1) - br['email'] = self.username - br['pass' ] = self.password - br.submit() - return br - - - keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})] - - remove_tags = [ - dict(name=['object','link','iframe','base','img']) - ,dict(name='div',attrs={'class':'bannerinstory'}) - ] - - - feeds = [(u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')] - - def preprocess_html(self, soup): - soup.html['dir' ] = 'ltr' - soup.html['lang'] = 'en-US' - mtag = '\n\n\n' - soup.head.insert(0,mtag) - for item in soup.findAll(style=True): - del item['style'] - return soup - + + publisher = 'azstarnet.com' + category = 'news, politics, Arizona, USA' + delay = 1 + oldest_article = 1 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + needs_subscription = True + remove_javascript = True + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('http://azstarnet.com/registration/retro.php') + br.select_form(nr=1) + br['email'] = self.username + br['pass' ] = self.password + br.submit() + return br + + + keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})] + + remove_tags = [ + dict(name=['object','link','iframe','base','img']) + ,dict(name='div',attrs={'class':'bannerinstory'}) + ] + + + feeds = [(u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')] + + def preprocess_html(self, soup): + soup.html['dir' ] = 'ltr' + soup.html['lang'] = 'en-US' + mtag = '\n\n\n' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/src/calibre/web/feeds/recipes/recipe_b92.py b/src/calibre/web/feeds/recipes/recipe_b92.py index decb5d898b..612aee4d67 100644 --- a/src/calibre/web/feeds/recipes/recipe_b92.py +++ b/src/calibre/web/feeds/recipes/recipe_b92.py @@ -1,69 +1,69 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' -''' -b92.net -''' -import re -from calibre.web.feeds.news import BasicNewsRecipe - -class B92(BasicNewsRecipe): - title = 'B92' - __author__ = 'Darko Miletic' - description = 'Dnevne vesti iz Srbije i sveta' - publisher = 'B92' - category = 'news, politics, Serbia' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'cp1250' +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2008-2009, Darko Miletic ' +''' +b92.net +''' +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class B92(BasicNewsRecipe): + title = 'B92' + __author__ = 'Darko Miletic' + description = 'Dnevne vesti iz Srbije i sveta' + publisher = 'B92' + category = 'news, politics, Serbia' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1250' language = 'sr' - - lang = 'sr-Latn-RS' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' - - conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : lang - } - - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - keep_only_tags = [dict(name='table', attrs={'class':'maindocument'})] - - remove_tags = [ - dict(name='ul', attrs={'class':'comment-nav'}) - ,dict(name=['embed','link','base'] ) - ,dict(name='div', attrs={'class':'udokum'} ) - ] - - feeds = [ - (u'Vesti', u'http://www.b92.net/info/rss/vesti.xml') - ,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' ) - ] - - def print_version(self, url): - return url + '&version=print' - - def preprocess_html(self, soup): - del soup.body['onload'] - for item in soup.findAll('font'): - item.name='div' - if item.has_key('size'): - del item['size'] - attribs = [ 'style','font','valign' - ,'colspan','width','height' - ,'rowspan','summary','align' - ,'cellspacing','cellpadding' - ,'frames','rules','border' - ] - for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): - item.name = 'div' - for attrib in attribs: - if item.has_key(attrib): - del item[attrib] - return soup + + lang = 'sr-Latn-RS' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : lang + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + keep_only_tags = [dict(name='table', attrs={'class':'maindocument'})] + + remove_tags = [ + dict(name='ul', attrs={'class':'comment-nav'}) + ,dict(name=['embed','link','base'] ) + ,dict(name='div', attrs={'class':'udokum'} ) + ] + + feeds = [ + (u'Vesti', u'http://www.b92.net/info/rss/vesti.xml') + ,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' ) + ] + + def print_version(self, url): + return url + '&version=print' + + def preprocess_html(self, soup): + del soup.body['onload'] + for item in soup.findAll('font'): + item.name='div' + if item.has_key('size'): + del item['size'] + attribs = [ 'style','font','valign' + ,'colspan','width','height' + ,'rowspan','summary','align' + ,'cellspacing','cellpadding' + ,'frames','rules','border' + ] + for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): + item.name = 'div' + for attrib in attribs: + if item.has_key(attrib): + del item[attrib] + return soup diff --git a/src/calibre/web/feeds/recipes/recipe_barrons.py b/src/calibre/web/feeds/recipes/recipe_barrons.py index 3e0e4a64ca..8040fcc11f 100644 --- a/src/calibre/web/feeds/recipes/recipe_barrons.py +++ b/src/calibre/web/feeds/recipes/recipe_barrons.py @@ -1,93 +1,93 @@ -## -## web2lrf profile to download articles from Barrons.com -## can download subscriber-only content if username and -## password are supplied. -## -''' -''' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - -class Barrons(BasicNewsRecipe): - - title = 'Barron\'s' - max_articles_per_feed = 50 - needs_subscription = True +## +## web2lrf profile to download articles from Barrons.com +## can download subscriber-only content if username and +## password are supplied. +## +''' +''' + +import re + +from calibre.web.feeds.news import BasicNewsRecipe + +class Barrons(BasicNewsRecipe): + + title = 'Barron\'s' + max_articles_per_feed = 50 + needs_subscription = True language = 'en' - - __author__ = 'Kovid Goyal' - description = 'Weekly publication for investors from the publisher of the Wall Street Journal' - timefmt = ' [%a, %b %d, %Y]' - use_embedded_content = False - no_stylesheets = False - match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*'] - conversion_options = {'linearize_tables': True} - ##delay = 1 - - ## Don't grab articles more than 7 days old - oldest_article = 7 - - - preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in - [ - ## Remove anything before the body of the article. - (r'))', lambda match: '
'), - - ## Remove any links/ads/comments/cruft from the end of the body of the article. - (r'(()|(
)|(

©)|(