From d7fa576f6f5642fc8fcb2fdfabbb70812f9a6aad Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 11 Jan 2008 18:39:03 +0000 Subject: [PATCH] Partial implementation of lrs2lrf from Roger Critchlow --- setup.py | 1 + src/libprs500/ebooks/lrf/lrs/convert_from.py | 953 +++++++++++++++++++ src/libprs500/ebooks/lrf/pylrs/elements.py | 8 +- src/libprs500/ebooks/lrf/pylrs/pylrs.py | 111 ++- src/libprs500/trac/plugins/download.py | 1 + 5 files changed, 1050 insertions(+), 24 deletions(-) create mode 100644 src/libprs500/ebooks/lrf/lrs/convert_from.py diff --git a/setup.py b/setup.py index 82e8056c38..45abce7d0b 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ entry_points = { 'pdf2lrf = libprs500.ebooks.lrf.pdf.convert_from:main', 'any2lrf = libprs500.ebooks.lrf.any.convert_from:main', 'lrf2lrs = libprs500.ebooks.lrf.parser:main', + 'lrs2lrf = libprs500.ebooks.lrf.lrs.convert_from:main', 'isbndb = libprs500.ebooks.metadata.isbndb:main', 'librarything = libprs500.ebooks.metadata.library_thing:main', 'lrf2html = libprs500.ebooks.lrf.html.convert_to:main', diff --git a/src/libprs500/ebooks/lrf/lrs/convert_from.py b/src/libprs500/ebooks/lrf/lrs/convert_from.py new file mode 100644 index 0000000000..05784d4337 --- /dev/null +++ b/src/libprs500/ebooks/lrf/lrs/convert_from.py @@ -0,0 +1,953 @@ +## Copyright (C) 2008 Roger Critchlow +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# +# major mismatch in Button/ButtonBlock/JumpButton +# major mismatch in providing referenced object instead of object reference +# + +import sys, os + +try: + from elementtree.ElementTree import ElementTree +except ImportError: + from xml.etree.ElementTree import ElementTree + +from libprs500.ebooks.lrf.pylrs.pylrs import \ + Book, StyleDefault, BookSetting, \ + ImageBlock, Header, Footer, PutObj, \ + Paragraph, CR, Italic, Bold, ImageStream, \ + CharButton, Button, PushButton, JumpTo, \ + Plot, Image, RuledLine, Canvas, DropCaps, \ + Sup, Sub, Span, Text, \ + LrsError, Space, Box, ButtonBlock, NoBR + +from libprs500 import __appname__, __version__ + +class LrsParser(object): + filterAttrib = ['objid', 'refobj', 'objlabel', 'pagestyle', 'blockstyle', 'textstyle', 'stylelabel', + 'evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'] + def __init__(self, file): + self.file = file + self.book = Book() + self.objects = dict() + self.dobjects = dict() + self.tocs = list() + self.charbuttons = list() + self.jumptos = list() + self.pagestyles = list() + self.blockstyles = list() + self.textstyles = list() + self.footers = list() + self.headers = list() + self.putobjs = list() + self.root = ElementTree(file=file) + + # + # find an element by objid + # + def get_element_by_objid(self, objid): + if objid not in self.objects: + for element in self.root.getiterator(): + if 'objid' in element.attrib: + id = element.attrib['objid'] + if id not in self.objects: + self.objects[id] = element + elif self.objects[id] != element: + raise LrsError, "multiple objects with same objid=%d, %s and %s"%(id, element.tag, self.objects[id].tag) + if id == objid: + break + if objid in self.objects: + return self.objects[objid] + return None + + # + # compare two attrib dictionaries for equivalence + # + def equal_attrib(self, e1, e2): + #print "comparing %s to %s in equal_attrib"%(e1.tag,e2.tag) + a1 = e1.attrib + a2 = e2.attrib + ignore = LrsParser.filterAttrib + for name in a1.keys(): + if name in ignore: + continue + if name not in a2: + #print "compare: %s in e1 not in e2"%name + return False + if a1[name] != a2[name]: + #print "compare: %s e1=%s != e2=%s"%(name, a1[name], a2[name]) + return False + for name in a2.keys(): + if name in ignore: + continue + if name not in a1: + #print "compare: %s in e1 not in e2"%name + return False + if a1[name] != a2[name]: + #print "compare: %s e1=%s != e2=%s"%(name, a1[name], a2[name]) + return False + return True + + # + # process an attrib dictionary for passing into a pylrs create + # + def process_attrib(self, element): + attrib = element.attrib.copy() + for name in LrsParser.filterAttrib: + if name in attrib: + id = attrib[name] + if name == 'objid': + if id not in self.objects: + self.objects[id] = element + elif self.objects[id] != element: + raise LrsError, "multiple objects with same objid=%s, %s and %s"%(id, element.tag, self.objects[id].tag) + + del attrib[name] + + return attrib + + # + # get and parse a style element + # + def fetch_style(self, element, stylename): + """get the style element referenced by stylename in element.attrib""" + + if stylename not in element.attrib: + return None + id = element.attrib[stylename] + if id in self.dobjects: + return self.dobjects[id] + style = self.get_element_by_objid(id) + if style == None: + raise LrsError, "no %s style element found for objid=%s"%(stylename, id) + #print "found style type %s with objid = %s after getting %s"%(style.tag, style.attrib['objid'], id) + newstyle = None + # + # yuck - headers and footers really mess this up + # + if stylename == 'pagestyle': + for e in self.pagestyles: + if self.equal_attrib(e, style): + #print "making pagestyle %s alias to %s"%(id, e.attrib['objid']) + newstyle = self.dobjects[e.attrib['objid']] + break + if newstyle == None: + #print "making pagestyle %s"%id + self.pagestyles.append(style) + newstyle = self.book.create_page_style(**self.process_attrib(style)) + elif stylename == 'blockstyle': + for e in self.blockstyles: + if self.equal_attrib(e, style): + #print "making blockstyle %s alias to %s"%(id, e.attrib['objid']) + newstyle = self.dobjects[e.attrib['objid']] + break + if newstyle == None: + #print "making blockstyle %s"%id + self.blockstyles.append(style) + newstyle = self.book.create_block_style(**self.process_attrib(style)) + elif stylename == 'textstyle': + for e in self.textstyles: + if self.equal_attrib(e, style): + #print "making textstyle %s alias to %s"%(id, e.attrib['objid']) + newstyle = self.dobjects[e.attrib['objid']] + break + if newstyle == None: + #print "making textstyle %s"%id + self.textstyles.append(style) + newstyle = self.book.create_text_style(**self.process_attrib(style)) + else: + raise LrsError, "no handler for %s style name" + self.dobjects[id] = newstyle + return newstyle + + # + # get and parse a header or footer element + # + def fetch_header_footer(self, element, hfname): + """get the header/footer element referenced by hfname in element.attrib""" + + if hfname not in element.attrib: + return None + id = element.attrib[hfname] + if id in self.dobjects: + return self.dobjects[id] + hf = self.get_element_by_objid(id) + if hf == None: + raise LrsError, "no %s element found for objid=%s"%(hfname, id) + #print "found header/footer type %s with objid = %s after getting %s"%(hf.tag, hf.attrib['objid'], id) + newhf = None + if hfname == 'evenheaderid' or hfname == 'oddheaderid': + for e in self.headers: + if self.equal_header_footer(e, hf): + #print "making header/footer %s alias to %s"%(id, e.attrib['objid']) + newhf = self.dobjects[e.attrib['objid']] + break + if newhf == None: + #print "making header %s"%id + self.headers.append(hf) + newhf = self.process_Header(hf) + elif hfname == 'evenfooterid' or hfname == 'oddfooterid': + for e in self.footers: + if self.equal_header_footer(e, hf): + #print "making footer %s alias to %s"%(id, e.attrib['objid']) + newhf = self.dobjects[e.attrib['objid']] + break + if newhf == None: + #print "making footer %s"%id + self.footers.append(hf) + newhf = self.process_Footer(hf) + else: + raise LrsError, "no handler for %s header/footer name" + self.dobjects[id] = newhf + return newhf + + # + # these mostly ignore the terminal elements, should be errors in the end + # + def process_leaf(self, element): + raise LrsError, "process leaf element %s???"%element.tag + + def process_empty(self, element): + if element.text or element.getchildren(): + raise LrsError, "element %s is not empty???"%element.tag + + # + # elements referenced by sets of text elements + # + # def process_Rubi(rubi): + # """Process element""" + # for element in rubi: + # if element.tag == "Oyamoji": + # process_simple_char0(element) + # elif element.tag == "Rubimoji": + # process_simple_char0(element) + # else: + # print "No processor for ", element.tag + # + # def process_AltString(altString): + # """Process element""" + # for element in altString: + # if element.tag == "Org": + # process_text(element) + # elif element.tag == "Alt": + # process_text(element) + # else: + # print "No processor for ", element.tag + + # + # sets of text elements + # + def process_text(self, text, obj): + """process an element as text""" + + if text.text != None: + obj.append(Text(text.text)) + + for element in text: + print "No text processor for ", element.tag + if element.tail != None: + obj.append(Text(element.tail)) + + return obj + + def process_draw_char(self, draw_char, obj): + """Process an element in the DrawChar set""" + + if draw_char.text != None: + obj.append(Text(draw_char.text)) + + for element in draw_char: + if element.tag == "Span": + obj.append(self.process_draw_char(element, Span(**element.attrib))) + elif element.tag == "Plot": + obj.append(self.process_text(element, Plot(**element.attrib))) + elif element.tag == "CR": + obj.append(CR()) + elif element.tag == "Space": + obj.append(Space(**element.attrib)) + elif element.tag == "CharButton": + self.charbuttons.append(element) + element.lrscharbutton = CharButton(None, **self.process_attrib(element)) + obj.append(self.process_simple_char1(element, element.lrscharbutton)) + elif element.tag == "Sup": + obj.append(self.process_simple_char0(element, Sup(element.text))) + elif element.tag == "Sub": + obj.append(self.process_simple_char0(element, Sub(element.text))) + elif element.tag == "NoBR": + obj.append(self.process_simple_char1(element, NoBR())) + elif element.tag == "DrawChar": + obj.append(self.process_simple_char0(element, DropCaps(**element.attrib))) + elif element.tag == "Box": + obj.append(self.process_simple_char0(element, Box(**element.attrib))) + elif element.tag == "Italic": + obj.append(self.process_draw_char(element, Italic())) + elif element.tag == "Bold": + obj.append(self.process_draw_char(element, Bold())) + # elif element.tag == "Fill": + # obj.append(Fill(**element.attrib)) + # elif element.tag == "Rubi": + # obj.append(process_Rubi(element)) + # elif element.tag == "Yoko": + # obj.append(process_simple_char0(element, Yoko(**element.attrib))) + # elif element.tag == "Tate": + # obj.append(process_simple_char2(element, Tate(**element.attrib))) + # elif element.tag == "Nekase": + # obj.append(process_simple_char2(element, Nekase(**element.attrib))) + # elif element.tag == "EmpLine": + # obj.append(process_simple_char0(element, EmpLine(**element.attrib))) + # elif element.tag == "EmpDots": + # obj.append(process_simple_char0(element, EmpDots(**element.attrib))) + # elif element.tag == "Gaiji": + # obj.append(process_text(element, Gaiji(**element.attrib))) + # elif element.tag == "AltString": + # obj.append(process_AltString(element)) + else: + print "No DrawChar set processor for ", element.tag + if element.tail != None: + obj.append(Text(element.tail)) + + return obj + + def process_simple_char0(self, simple_char0, obj): + """Process an element in the SimpleChar0 set""" + + if simple_char0.text != None: + obj.append(Text(simple_char0.text)) + for element in simple_char0: + # if element.tag == "Gaiji": + # obj.append(process_text(element, Gaiji(**element.attrib))) + # elif element.tag == "AltString": + # obj.append(process_AltString(element)) + # else: + print "No SimpleChar0 set processor for ", element.tag + if element.tail != None: + obj.append(Text(element.tail)) + + return obj + + + def process_simple_char1(self, simple_char1, obj): + """Process an element in the SimpleChar1 set""" + + if simple_char1.text != None: + obj.append(Text(simple_char1.text)) + + for element in simple_char1: + if element.tag == "Box": + obj.append(self.process_simple_char0(element), Box(**element.attrib)) + elif element.tag == "Sub": + obj.append(self.process_simple_char0(element, Sub(**element.attrib))) + elif element.tag == "Sup": + obj.append(self.process_simple_char0(element, Sup(**element.attrib))) + elif element.tag == "Space": + obj.append(Space(**element.attrib)) + # elif element.tag == "Rubi": + # obj.append(process_Rubi(element)) + # elif element.tag == "Gaiji": + # obj.append(process_text(element, Gaiji(**element.attrib))) + # elif element.tag == "EmpDots": + # obj.append(process_simple_char0(element, EmpDots(**element.attrib))) + # elif element.tag == "EmpLine": + # obj.append(process_simple_char0(element, EmpLine(**element.attrib))) + # elif element.tag == "AltString": + # obj.append(process_AltString(element)) + else: + print "No SimpleChar1 set processor for ", element.tag + if element.tail != None: + obj.append(Text(element.tail)) + + return obj + + def process_simple_char2(self, simple_char2, obj): + """Process an element in the SimpleChar2 set""" + + if simple_char2.text != None: + obj.append(Text(simple_char2.text)) + + for element in simple_char2: + if element.tag == "Plot": + obj.append(self.process_text(element, Plot(**element.attrib))) + # elif element.tag == "Gaiji": + # obj.append(process_text(element, Gaiji(**element.attrib))) + # elif element.tag == "AltString": + # obj.append(process_AltString(element)) + else: + print "No SimpleChar2 set processor for ", element.tag + if element.tail != None: + obj.append(Text(element.tail)) + + return obj + + + # + # occurs in , , + # + def process_Canvas(self, canvas): + """Process the element""" + + dcanvas = Canvas(**canvas.attrib) + # text permitted? + for element in canvas: + if element.tag == "PutObj": + dcanvas.append(PutObj(**element.attrib)) + # elif element.tag == "MoveTo": + # dcanvas.append(MoveTo(**element.attrib)) + # elif element.tag == "LineTo": + # dcanvas.append(LineTo(**element.attrib)) + # elif element.tag == "DrawBox": + # dcanvas.append(DrawBox(**element.attrib)) + # elif element.tag == "DrawEllipse": + # dcanvas.append(DrawEllipse(**element.attrib)) + else: + print "No processor for ", element.tag + # tail text permitted? + return dcanvas + + + # + # occurs in , , , + # + def process_TextBlock(self, textBlock): + """Process the element""" + + self.dobjects[textBlock.attrib['objid']] = \ + dtextblock = self.book.create_text_block(textStyle=self.fetch_style(textBlock, 'textstyle'), + blockStyle=self.fetch_style(textBlock, 'blockstyle'), + **self.process_attrib(textBlock)) + # text permitted? + for element in textBlock: + if element.tag == "P": + dtextblock.append(self.process_draw_char(element, Paragraph())) + elif element.tag == "CR": + dtextblock.append(CR()) + else: + print "No processor for ", element.tag + # tail text permitted? + return dtextblock + + # + # helper for buttons + # + def process_some_Button(self, button, dbutton, name): + + # text permitted? + for element in button: + if element.tag == "JumpTo": + refobj = element.attrib['refobj'] + if refobj in self.dobjects: + dbutton.append(JumpTo(self.dobjects[refobj])) + else: + self.jumptos.append(element) + element.lrsjumpto = JumpTo(None) + dbutton.append(element.lrsjumpto) + #elif element.tag == "Run": + # dbutton.append(Run(**element.attrib)) + #elif element.tag == "SoundStop": + # dbutton.append(SoundStop(**element.attrib)) + #elif element.tag == "CloseWindow": + # dbutton.append(CloseWindow(**element.attrib)) + else: + print "No ", name, " processor for ", element.tag + # tail text permitted? + return dbutton + + # + # occurs in ,