From e602d726ab39c8c589db53c5400a2efe668259d1 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Wed, 29 Sep 2010 15:01:39 +0800 Subject: [PATCH 01/24] [SNBOutput] Add stub for SNB format output support. SNB is the only format supported by the E Ink device Bambook manufactured by Shanda (NASDAQ: SNDA) --- resources/images/mimetypes/snb.png | Bin 0 -> 6245 bytes src/calibre/customize/builtins.py | 2 + src/calibre/ebooks/snb/__init__.py | 9 +++ src/calibre/ebooks/snb/output.py | 70 +++++++++++++++++++++++ src/calibre/gui2/convert/snb_output.py | 36 ++++++++++++ src/calibre/gui2/convert/snb_output.ui | 74 +++++++++++++++++++++++++ 6 files changed, 191 insertions(+) create mode 100644 resources/images/mimetypes/snb.png create mode 100644 src/calibre/ebooks/snb/__init__.py create mode 100644 src/calibre/ebooks/snb/output.py create mode 100644 src/calibre/gui2/convert/snb_output.py create mode 100644 src/calibre/gui2/convert/snb_output.ui diff --git a/resources/images/mimetypes/snb.png b/resources/images/mimetypes/snb.png new file mode 100644 index 0000000000000000000000000000000000000000..41b55f4343ae14d4bccaeede12193c0cee478a01 GIT binary patch literal 6245 zcmV-r7@FsaP)Px#24YJ`L;(K){{a7>y{D4^000SaNLh0L01FcU01FcV0GgZ_00007bV*G`2igf8 z201Tr%Cw6B02k#+L_t(|+U=ctcog-y$3HW>n}m=cgi8WR!$nIxQsm;XRx9+Nc&K-< zJ#wkHBTpZ}V1qSg%YXpLc<3zxv8I4QZ#6BbFHuwr4Yn!C~Rm>WJ+k^qQfD9lD z$kZOYbw<$FQfB~)20Z}y0iX6*b;1Dj-k$~JXj?zc5JYY3YpH39w)uf}pjF$iR{+%s z1#oM?bAW-s0L>uWeFl(n1CTVYZ_)MzHG`0xN&@vEAX|e!NPFz*Gk}yEKoIb1&jUc4 z#z%0%0QA|vTbBo91N~K1eYTHDH`eijFeM@v6PySE(JBFtW(+hmHWCN~It)+{h~bcF*E*F#f?%#>63jWAotd)BIBz*Ir z?FMfIdNRO;`0i?(0mzCHi;ZI)K^XgPW?@_KdMDK&8AY%eaw?iTb@01}fJ9!ND)2_3 zTL}R|Z#IC$u+4eBUEmYRyy-AtH3X!gLb$sdz7emF3)m$1L_p1WxzIx(r!)i@hixp@ z3wDFI5C*Ko{Js!iQu1R$wlNGO1w9!>NMnU?Bm`KN{8WVjyOY8m7zVm61X%TYyTK%^=aWn)Z z?C3G*;daG;N7TU1+<|pjkvaWUJv)y6iSMkt zi$7QXU*duI@c$HJ6CyX=_0Vr!-kUxlKrrX!CY502?hPETtEPC+a8`czex`o?a`Lh- zpw-__Red#oIq?y%|NT!feo{H^2lfE46-T{SEI zcRhQLA7aI>^;}!>Ek{Fulc7L#Dg;FH@?^unA3k{xfLHEb&G#nFU}#PrF44unemPt| zay(0~{Z9`4_|=%6z3ke10eJY$wMeoQQz^Q5Y!tP~aC^A4a4gTy{W$=~YCr9w96f2t zl;|k@L}4I`2S(fH4YUI=DzC8XaoVv$WKyU*$D*n87?oGZ@w$_2djItk{smN=_(eyx)Ol@m^>pc=%}&XfE^fOe#f}^xvb_8G4G%DD z(*wL!{x)xwzs;z;LdK66%OwRD^6fE`m~!D|xLoeI<=m~~ui-lvUCuk7{*_~tl2D&FHC_5b9~%@6bH zJ!`n>8~+yb;*1;db>?67J$`*~I}g9Hnq3Qj)dfzpcY2>a(s52jk!fse=AEj)^0T+r zar3Y4=dA_Lac#+!hR&64)qv>4Ka@25Xh~hfK9}fXd|?TmUiU)) zp4_`BdL6Ix>gC^E$iRNNyj^veo#h7%m4W*B9qtIaee!pC`JSgyRE5WOZ;Tr6&z}$w z2ev8rCcAyZA!YHjy8)=EJrVW7>kVRX|A9O@V*vm^ee)TDQZVZG5fVwPATarBmjH0I zrowax5G-zKRRx#J|9w~A&c*p-sQ$8s4exB}0zO^|sQ2(A zH5C91$jXU-Y?MkMUBZBvO8A&OCVa6jUvoQ;{b3{fD&Oae=34wx07;gpX|7}Q2X8X_ z#U%jz`^8tqPU3X?-7Ysz&s+w;Q+qcV3jx}D6-A-`?3Zl++aB(C=^+5FA2+4Ph5-A@ zKnLK5pI6pZQ(0Hd+CTp$=Es)|8^z#ZYsNv{Uzb|$$D@?%+DP= zY!px2@?%rLh2AzRSm-*z-%_3sX$*&#ZDZH50~|c@H!A8*($Lz3szP4>3%I0U3^#pa z8uxr_7X3V##^h2nh^J>RWAE`p1ZBxE96=Bmkd?!~6pZ1PORr(!lsRN)^h48Fa#*jwjn@{s;aA+&YVfQ95k#1R`(?+&4dJC| zpaAIx-=y51kKmol0gS*-622kO4jY86)lzRWfP@L55!e=T0IOlZPC&x>v^X@<+mnWE zC;Sw9eNPP)jwFFj!cSK6odDk(@&m_515*uOFaTeYB!2(<-*f8JsieX8mgF#Hqk%$# zdfhVoXi#E+^85X4-@ctad-k9xiYe&5ttzm0G|+5VFaq0%mzz)mK@iBv$ss#Co7Y}@ z4X@YRE#7W;3ZRfy)50yp{qBGdP+VLL zz`=tDZ3XX?r}mhB04wF78L);HpT+?&ZrnJEii)VKtF!D?JvlG1wJTUK@|uC?rs10? z`@(I^m@!PAJej7ZCMqi{QB{?qq9Ss0bNk9bOG%)CMG}d637v)y;C8#Y>#n<~s;XlD z{{0LdJlGKY-c}9R8v@LFcQkl0`WMWEijO|}h=zs+a&vQ0Rh6r+zB($%=H_OKi;I~% zcPERnY-i2HWK2l)Q?zfWalC2QBNB@hU(YSk((yX-P1PMpZI&pyk@kt12UbSVJS zr%$&HTu;jl?Ekf|BuYF(h7#*}j0>Kfoy}v9J;qHp-Gs;E;j_;^JuccC&o>at<9jM00a9d-m+X<#I7($Pli$ z;tE^9_mzPrrQfKuTZJD!X5z$&+;PVpNRmWdT^-Lo_Z%f9C5#C zyN)44hS0x%f3|Pm&gs*q*|u#PFTVI99*>7Xg9cGjQo`WDgUQIqK$c}%TU&YIg%?=7 zcro?$^}RANbpAJgZ8Qceo;GyoPznnR@%#O>wzjf-`En*qn85Ji!s8f&BVz%?=nXV| zhi;qT1LWl7aOa(O;_-MWDk@^~ zWlCQeXr>RqP=}92={F+HqeqWs)v8s@nl+2U!a{cJ*uln)8{?LCH{N(7d3kxb-EPjD zIYU`lneAnu*|C7MF9VHM0Y(!&tKP4F{qe^i=a;|yB@GP?$g<3+QKPu^)>|nmDvJ8v zsZ*yibm&l;nwkJOapHvKZsN#My=bF+;eGngO6$=;E8g$q^6bdh3knKoY-~i9W!!Ez z*I$2qO!(mz9xMWAY;3f2eZmk`RgJspk2i8$T2=vuyx)TKCu1D*=g;Q{Klnk%#l7gF ziwxxsZg)s16U`uw_1V$DC4!p@`1XZ>C_}%=Q~{~TtV>Et=-00w?d|QzvYhaEm&=7D zNw{1t0)c=biPu9mBK3BJlCv7b)V0d5TyPbEiEk_3Xq+hoe+FH=&_&~ zg&F>z(oO*cvnql4pJlhpIQsSL*RdKX)}W9694AhkKvh*_S&lkYOfTorfEqP|c&|AD z-m!iFr@h~*ab#s>bzI1Tf`Yi`IB?(qwY9a#vWzH-Oq@6|VRC2I2#i40A7giDbaXH% zVPApG-fyJU)YNo5K700T-1BeUx|P<}Rzy)`@ZiBrnlveC@X6){(Gp5Kjlk`Q_Z#RV z=nn4(sI9HV<#Hj*GB3aUG6e+%%$YN%%lmii*umbtdpi=vE3dq=(;R|Gpz&bGJJw8C zNZTC1OuQH3lz~opfB3z@V36wSYGhd^Gc%K;N00L0gAY<$TgweM+`y<&qo}E=p`xOK z6)RSBEFaIx%HrmmZ|)R6i~=`uypfKec>B{k1a!0a>t8!^6 zMwTvJ%KG)|89H<*!C;W`^74-NWMpJeT3X8d`ST5VbhLUuSrv#C`|S(^j`RcA5$}@$ zoT%;HyO)NB21HTh_SeYsTM(gM? z8ZL|`f>FYNs@u{x35@6QskkdweSJNje)=hsCr>6bGn1J!XL93>H&R?&%+{@2dH3CS z2?PQJ0s;E>@6Wh#*G#p-BvwnfFr>}vjkzEsrZN~b z0H?+Zj2c~{xE`;2*GxW;j8P99I1oV)rUG?@7KXGDS{S0HQ~`|0za62#&Z;1pF5N_Z z`A&ucy#hphHwloF4Ff3+0cMlG?oft!(2NY^m^Ff~kM&9}=zd39?PLU^*fEY50D?Lr zND~7{vfOj31%p969uI!MpRBAb)ADa-6winegh8t+sH)Owzdp`~e&{4T86<2F0-^{N z6%~hpcFhR7-vCl2|H*j2$KyekWkc}egHufzfubl~-X9)|2)`D79(t^YFCdCSco8U~ zD59#N)z6KMjb|4vTJ#9u1^iklP|O;Eqceh>lK-S@Ubow@*T<{x<86wfbZn9P&plRA zRRvj=QB)O0mP7l)pQnbtzuV)+<#OQ?MMO~~7!1F=jm7aLwiMu}d;DZ{?00QY1 z0($6VNh@9-hAzu;2(0#4ZxEqPMNt(LSw@x>6y@AsRaH?5{WhW7?Z%^c$nbc;?WV1* z4X@WrAP`WtY}vAC=gys#gqDN1YdL_DlvAX%C$O93Kbf-6tO6*C(lO;c7z`2!1dt>d zNs>?$IRsu&kQEtOlC^| zapQYHOBC<#CIv89{&lDPTLmA_2*TbjNfJJvkM{O<{C>Y?1QJ0FxGc+>=WCmybjZIT z2)NvCGBYz7IB*~X1`I%vWdeaf$GOVN$~LWBx$;d7ew&rp->qd}s_Xt_z2A)d%d$)` z7^JevEatHMA-|Fh>KQ3Ij@HyZs zyWl%x01kSuQ_2v_EnM`rdwNEIeYdjUauE_Ab_k%y*>{RMUnjcVH6h^Q(Rn( z+wJZE9)=#?>gwwL{-YoLXr%_f74TW9{Zpp^NyL9A7KNBK0<&Gds;YLxf1l5X&*!7f z>&4sFh9pUdqDXdjHUloWfMG+2QdCq#K|ukks&?}7Nbr8YpXTP~e=c9XeDRlGe%TlS zU$F|`5d*Lq|5L3DM!Y|s5rpMDlmmpqfIkqx@ADx^5(^eAi2AvbB*ldv2n1+tZEak; zcI^Y@<>g-hXCpd&6g#e+bGM1>A@Xn3`{NryIIc@bvW%p~{a9B^@h;a*5=*3 zdGq4^`}bExfp6QCAdz0&%ovSzBhXYJc&^0j-#*14SrK9GTBneejy4+PHEc?;CUS|yIyYIfcYTdeZ z`y=4XcEfj80a66hs?o$+YGuR-LW4PKr!r5HLSaCbWrhvQPZ;wfRp9X9!_Tf1+#yg>IRPWdU(_>T-t?u03}HZb@nI&=Um>4*1MR#v|Hzyl9# zh^X}?C*j*}06lF8DABa+_zIwp|Dnu3tOAOnke{EAAP5O1-|zQx^5n^#rKP1$>3aFX z@MS0A_xdUz*+eiN{KzCw){O#&6d?3`Klb_=`AHR7D{e3~KQ|ln4r9=$16GV!p0_f9HMo-S?2!>uu6%eP1f!JKq(Y zh9#hx#h8f}euPTDP{Jq4GLjrB(?<*+PDVyXCk2RvUs+lC>izfM|5Pv-Jfn;G0jFbr z+8RK2$NogiwW1q>Hlah3kR=IJ6;X6iTs$&v7jIcv+4GAQE!v>V_VtrGlvG}e^F9YK zssf$Cqjds>asW9zrZ0+QWo0sK*w9$guO2>pcujEGgMZ%U0CpRjD2jMIo{pcL(TMhqDj7Ovw8XqAPwNVv`L(zD3Y6-+sXTzo10HQ{q)nN`}gnvRD&O$#F-ZG z`wYMa&v&_8WMpJ?goZHuy1KfL9(m-E#m9~vJ5BV-9BMjzk6vj2$=*Q0%>DPm2H4=2@D)o&E-(sy(U_uyV7-bIZ0|xLl#H?(_NZ`~7_U z@yEZOKY#vz>yo|5-aR#)u7lBj05xJ$x~p#Ou8QKEqJ$P{ilT@h3bSp^7#m$f7&sC~H}2{qnL8N(qY+A*Qkx*@L@1p!|$h|lMf zfA_oJ{cz2iH3xOcURsoSb{K%(>m?1kPy3P~84*RxH!iaK{&&e5h5o+$q9~#$N|(Q3 zB@hIG*49?<>eZ`fzxLW|6`I$lU8Zk`0;rlL2edEPqU~=7$jr>_m7PV2T2)om2MZP~ z`0@Mizkgbb_h~QrU9N+{V_bj-=uhZgK{-G_AXCdDdi$7y8W9GAn#6l`n?GGMeLF%x zIQEC{t{l`X-mSqG&NBm$bw&{2+-+Kvo&+Bg-~ + + Form + + + + 0 + 0 + 400 + 300 + + + + Form + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From dd69e247476fa000e7f5b2d4edb60034d862dbdd Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Sat, 9 Oct 2010 22:30:38 +0800 Subject: [PATCH 02/24] [SNBOutput] Add basic output support for SNB file. --- src/calibre/ebooks/snb/output.py | 193 ++++++++++++++++--- src/calibre/ebooks/snb/snbfile.py | 300 ++++++++++++++++++++++++++++++ src/calibre/ebooks/snb/snbml.py | 160 ++++++++++++++++ 3 files changed, 629 insertions(+), 24 deletions(-) create mode 100644 src/calibre/ebooks/snb/snbfile.py create mode 100644 src/calibre/ebooks/snb/snbml.py diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py index 4b94b65405..c302c17729 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/snb/output.py @@ -4,10 +4,29 @@ __license__ = 'GPL 3' __copyright__ = '2010, Li Fanxi ' __docformat__ = 'restructuredtext en' -import os +import os, string -from calibre.customize.conversion import OutputFormatPlugin, \ - OptionRecommendation +from lxml import etree +from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation +from calibre.ptempfile import TemporaryDirectory +from calibre.constants import __appname__, __version__ +from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace +from calibre.ebooks.snb.snbfile import SNBFile +from calibre.ebooks.snb.snbml import SNBMLizer + +def ProcessFileName(fileName): + # Flat the path + fileName = fileName.replace("/", "_").replace(os.sep, "_") + # Handle bookmark for HTML file + fileName = fileName.replace("#", "_") + # Make it lower case + fileName = fileName.lower() + # Change extension from jpeg to jpg + root, ext = os.path.splitext(fileName) + if ext in [ '.jpeg', '.jpg', '.gif', '.svg' ]: + fileName = root + '.png' + return fileName + class SNBOutput(OutputFormatPlugin): @@ -45,26 +64,152 @@ class SNBOutput(OutputFormatPlugin): ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): + # Create temp dir + with TemporaryDirectory('_snb_output') as tdir: + # Create stub directories + snbfDir = os.path.join(tdir, 'snbf') + snbcDir = os.path.join(tdir, 'snbc') + snbiDir = os.path.join(tdir, 'snbc/images') + os.mkdir(snbfDir) + os.mkdir(snbcDir) + os.mkdir(snbiDir) + + # Process Meta data + meta = oeb_book.metadata + if meta.title: + title = unicode(meta.title[0]) + else: + title = '' + authors = [unicode(x) for x in meta.creator if x.role == 'aut'] + if meta.publisher: + publishers = unicode(meta.publisher[0]) + else: + publishers = '' + if meta.language: + lang = unicode(meta.language[0]).upper() + else: + lang = '' + if meta.description: + abstract = unicode(meta.description[0]) + else: + abstract = '' + + # Process Cover + from calibre.ebooks.oeb.base import urldefrag + g, m, s = oeb_book.guide, oeb_book.manifest, oeb_book.spine + href = None + if 'titlepage' not in g: + if 'cover' in g: + href = g['cover'].href + + # Output book info file + bookInfoTree = etree.Element("book-snbf", version="1.0") + headTree = etree.SubElement(bookInfoTree, "head") + etree.SubElement(headTree, "name").text = title + etree.SubElement(headTree, "author").text = ' '.join(authors) + etree.SubElement(headTree, "language").text = lang + etree.SubElement(headTree, "rights") + etree.SubElement(headTree, "publisher").text = publishers + etree.SubElement(headTree, "generator").text = __appname__ + ' ' + __version__ + etree.SubElement(headTree, "created") + etree.SubElement(headTree, "abstract").text = abstract + if href != None: + etree.SubElement(headTree, "cover").text = ProcessFileName(href) + else: + etree.SubElement(headTree, "cover") + bookInfoFile = open(os.path.join(snbfDir, 'book.snbf'), 'wb') + bookInfoFile.write(etree.tostring(bookInfoTree, pretty_print=True, encoding='utf-8')) + bookInfoFile.close() + + # Output TOC + tocInfoTree = etree.Element("toc-snbf") + tocHead = etree.SubElement(tocInfoTree, "head") + tocBody = etree.SubElement(tocInfoTree, "body") + outputFiles = { } + if oeb_book.toc.count() == 0: + log.warn('This SNB file has no Table of Contents. ' + 'Creating a default TOC') + first = iter(oeb_book.spine).next() + oeb_book.toc.add(_('Start'), first.href) + + for tocitem in oeb_book.toc: + ch = etree.SubElement(tocBody, "chapter") + ch.set("src", ProcessFileName(tocitem.href) + ".snbc") + ch.text = tocitem.title + if tocitem.href.find('#') != -1: + item = string.split(tocitem.href, '#') + if len(item) != 2: + log.error('Error in TOC item: %s' % tocitem) + else: + if item[0] in outputFiles: + outputFiles[item[0]].append((item[1], tocitem.title)) + else: + outputFiles[item[0]] = [] + outputFiles[item[0]].append((item[1], tocitem.title)) + else: + if tocitem.href in outputFiles: + outputFiles[tocitem.href].append(("", tocitem)) + else: + outputFiles[tocitem.href] = [] + outputFiles[tocitem.href].append(("", tocitem)) + + etree.SubElement(tocHead, "chapters").text = '%d' % len(tocBody) + + tocInfoFile = open(os.path.join(snbfDir, 'toc.snbf'), 'wb') + tocInfoFile.write(etree.tostring(tocInfoTree, pretty_print=True, encoding='utf-8')) + tocInfoFile.close() + + # Output Files + for item in s: + from calibre.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES, PNG_MIME + if m.hrefs[item.href].media_type in OEB_DOCS: + if not item.href in outputFiles: + log.debug('Skipping %s because unused in TOC.' % item.href) + continue + log.debug('Converting %s to snbc...' % item.href) + snbwriter = SNBMLizer(log) + snbcTrees = snbwriter.extract_content(oeb_book, item, outputFiles[item.href], opts) + for subName in snbcTrees: + postfix = '' + if subName != '': + postfix = '_' + subName + outputFile = open(os.path.join(snbcDir, ProcessFileName(item.href + postfix + ".snbc")), 'wb') + outputFile.write(etree.tostring(snbcTrees[subName], pretty_print=True, encoding='utf-8')) + outputFile.close() + for item in m: + if m.hrefs[item.href].media_type in OEB_IMAGES: + log.debug('Converting image: %s ...' % item.href) + content = m.hrefs[item.href].data + if m.hrefs[item.href].media_type != PNG_MIME: + # Convert + from calibre.utils.magick import Image + img = Image() + img.load(content) + img.save(os.path.join(snbiDir, ProcessFileName(item.href))) + else: + outputFile = open(os.path.join(snbiDir, ProcessFileName(item.href)), 'wb') + outputFile.write(content) + outputFile.close() + + # Package as SNB File + snbFile = SNBFile() + snbFile.FromDir(tdir) + snbFile.Output(output_path) + +if __name__ == '__main__': + from calibre.ebooks.oeb.reader import OEBReader + from calibre.ebooks.oeb.base import OEBBook + from calibre.ebooks.conversion.preprocess import HTMLPreProcessor + from calibre.customize.profiles import HanlinV3Output + class OptionValues(object): pass - # writer = TXTMLizer(log) - # txt = writer.extract_content(oeb_book, opts) - - # log.debug('\tReplacing newlines with selected type...') - # txt = specified_newlines(TxtNewlines(opts.newline).newline, txt) - - # close = False - # if not hasattr(output_path, 'write'): - # close = True - # if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '': - # os.makedirs(os.path.dirname(output_path)) - # out_stream = open(output_path, 'wb') - # else: - # out_stream = output_path - - # out_stream.seek(0) - # out_stream.truncate() - # out_stream.write(txt.encode(opts.output_encoding, 'replace')) - - # if close: - # out_stream.close() + opts = OptionValues() + opts.output_profile = HanlinV3Output(None) + + html_preprocessor = HTMLPreProcessor(None, None, opts) + from calibre.utils.logging import default_log + oeb = OEBBook(default_log, html_preprocessor) + reader = OEBReader + reader()(oeb, '/tmp/bbb/processed/') + SNBOutput(None).convert(oeb, '/tmp/test.snb', None, None, default_log); diff --git a/src/calibre/ebooks/snb/snbfile.py b/src/calibre/ebooks/snb/snbfile.py new file mode 100644 index 0000000000..aa690fb92b --- /dev/null +++ b/src/calibre/ebooks/snb/snbfile.py @@ -0,0 +1,300 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2010, Li Fanxi ' +__docformat__ = 'restructuredtext en' + +import sys, struct, zlib, bz2, os, math + +class FileStream: + def IsBinary(self): + return self.attr & 0x41000000 != 0x41000000 + +def compareFileStream(file1, file2): + return cmp(file1.fileName, file2.fileName) + +class BlockData: + pass + +class SNBFile: + + files = [] + blocks = [] + + MAGIC = 'SNBP000B' + REV80 = 0x00008000 + REVA3 = 0x00A3A3A3 + REVZ1 = 0x00000000 + REVZ2 = 0x00000000 + + def __init__(self, inputFile = None): + if inputFile != None: + self.Parse(inputFile); + + def Parse(self, inputFile): + self.fileName = inputFile + + snbFile = open(self.fileName, "rb") + snbFile.seek(0) + + # Read header + vmbr = snbFile.read(44) + (self.magic, self.rev80, self.revA3, self.revZ1, + self.fileCount, self.vfatSize, self.vfatCompressed, + self.binStreamSize, self.plainStreamSizeUncompressed, + self.revZ2) = struct.unpack('>8siiiiiiiii', vmbr) + + # Read FAT + self.vfat = zlib.decompress(snbFile.read(self.vfatCompressed)) + self.ParseFile(self.vfat, self.fileCount) + + # Read tail + snbFile.seek(-16, os.SEEK_END) + #plainStreamEnd = snbFile.tell() + tailblock = snbFile.read(16) + (self.tailSize, self.tailOffset, self.tailMagic) = struct.unpack('>ii8s', tailblock) + snbFile.seek(self.tailOffset) + self.vTailUncompressed = zlib.decompress(snbFile.read(self.tailSize)) + self.tailSizeUncompressed = len(self.vTailUncompressed) + self.ParseTail(self.vTailUncompressed, self.fileCount) + + # Uncompress file data + # Read files + binPos = 0 + plainPos = 0 + uncompressedData = None + for f in self.files: + if f.attr & 0x41000000 == 0x41000000: + # Compressed Files + if uncompressedData == None: + uncompressedData = "" + for i in range(self.plainBlock): + bzdc = bz2.BZ2Decompressor() + if (i < self.plainBlock - 1): + bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset; + else: + bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset; + snbFile.seek(self.blocks[self.binBlock + i].Offset); + try: + data = snbFile.read(bSize) + uncompressedData += bzdc.decompress(data) + except EOFError, e: + print e + f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize] + plainPos += f.fileSize + elif f.attr & 0x01000000 == 0x01000000: + # Binary Files + snbFile.seek(44 + self.vfatCompressed + binPos) + f.fileBody = snbFile.read(f.fileSize) + binPos += f.fileSize + else: + print f.attr, f.fileName + raise Exception("Invalid file") + snbFile.close() + + def ParseFile(self, vfat, fileCount): + fileNames = vfat[fileCount*12:].split('\0'); + for i in range(fileCount): + f = FileStream() + (f.attr, f.fileNameOffset, f.fileSize) = struct.unpack('>iii', vfat[i * 12 : (i+1)*12]) + f.fileName = fileNames[i] + self.files.append(f) + + def ParseTail(self, vtail, fileCount): + self.binBlock = (self.binStreamSize + 0x8000 - 1) / 0x8000; + self.plainBlock = (self.plainStreamSizeUncompressed + 0x8000 - 1) / 0x8000; + for i in range(self.binBlock + self.plainBlock): + block = BlockData() + (block.Offset,) = struct.unpack('>i', vtail[i * 4 : (i+1) * 4]) + self.blocks.append(block) + for i in range(fileCount): + (self.files[i].blockIndex, self.files[i].contentOffset) = struct.unpack('>ii', vtail[(self.binBlock + self.plainBlock) * 4 + i * 8 : (self.binBlock + self.plainBlock) * 4 + (i+1) * 8]) + + def IsValid(self): + if self.magic != SNBFile.MAGIC: + return False + if self.rev80 != SNBFile.REV80: + return False + if self.revA3 != SNBFile.REVA3: + return False + if self.revZ1 != SNBFile.REVZ1: + return False + if self.revZ2 != SNBFile.REVZ2: + return False + if self.vfatSize != len(self.vfat): + return False + if self.fileCount != len(self.files): + return False + if (self.binBlock + self.plainBlock) * 4 + self.fileCount * 8 != self.tailSizeUncompressed: + return False + if self.tailMagic != SNBFile.MAGIC: + print self.tailMagic + return False + return True + + def FromDir(self, tdir): + for root, dirs, files in os.walk(tdir): + for name in files: + print name + p, ext = os.path.splitext(name) + if ext in [ ".snbf", ".snbc" ]: + self.AppendPlain(os.path.relpath(os.path.join(root, name), tdir), tdir) + else: + self.AppendBinary(os.path.relpath(os.path.join(root, name), tdir), tdir) + + def AppendPlain(self, fileName, tdir): + f = FileStream() + f.attr = 0x41000000 + f.fileSize = os.path.getsize(os.path.join(tdir,fileName)) + f.fileBody = open(os.path.join(tdir,fileName), 'rb').read() + f.fileName = fileName + print f.fileSize + self.files.append(f) + + def AppendBinary(self, fileName, tdir): + f = FileStream() + f.attr = 0x01000000 + f.fileSize = os.path.getsize(os.path.join(tdir,fileName)) + f.fileBody = open(os.path.join(tdir,fileName), 'rb').read() + f.fileName = fileName + print f.fileSize + self.files.append(f) + + def Output(self, outputFile): + + # Sort the files in file buffer, + # requried by the SNB file format + self.files.sort(compareFileStream) + + outputFile = open(outputFile, 'wb') + # File header part 1 + vmbrp1 = struct.pack('>8siiii', SNBFile.MAGIC, SNBFile.REV80, SNBFile.REVA3, SNBFile.REVZ1, len(self.files)) + + # Create VFAT & file stream + vfat = '' + fileNameTable = '' + plainStream = '' + binStream = '' + for f in self.files: + vfat += struct.pack('>iii', f.attr, len(fileNameTable), f.fileSize); + fileNameTable += (f.fileName + '\0') + + if f.attr & 0x41000000 == 0x41000000: + # Plain Files + f.contentOffset = len(plainStream) + plainStream += f.fileBody + elif f.attr & 0x01000000 == 0x01000000: + # Binary Files + f.contentOffset = len(binStream) + binStream += f.fileBody + else: + print f.attr, f.fileName + raise Exception("Unknown file type") + vfatCompressed = zlib.compress(vfat+fileNameTable) + + # File header part 2 + vmbrp2 = struct.pack('>iiiii', len(vfat+fileNameTable), len(vfatCompressed), len(binStream), len(plainStream), SNBFile.REVZ2) + # Write header + outputFile.write(vmbrp1 + vmbrp2) + # Write vfat + outputFile.write(vfatCompressed) + + # Generate block information + binBlockOffset = 0x2C + len(vfatCompressed) + plainBlockOffset = binBlockOffset + len(binStream) + + binBlock = (len(binStream) + 0x8000 - 1) / 0x8000 + plainBlock = (len(plainStream) + 0x8000 - 1) / 0x8000 + + offset = 0 + tailBlock = '' + for i in range(binBlock): + tailBlock += struct.pack('>i', binBlockOffset + offset) + offset += 0x8000; + tailRec = '' + for f in self.files: + t = 0 + if f.IsBinary(): + t = 0 + else: + t = binBlock + tailRec += struct.pack('>ii', f.contentOffset / 0x8000 + t, f.contentOffset % 0x8000); + + # Write binary stream + outputFile.write(binStream) + + # Write plain stream + pos = 0 + offset = 0 + while pos < len(plainStream): + tailBlock += struct.pack('>i', plainBlockOffset + offset); + block = plainStream[pos:pos+0x8000]; + compressed = bz2.compress(block) + outputFile.write(compressed) + offset += len(compressed) + pos += 0x8000 + + # Write tail block + compressedTail = zlib.compress(tailBlock + tailRec) + outputFile.write(compressedTail) + + # Write tail pointer + veom = struct.pack('>ii', len(compressedTail), plainBlockOffset + offset) + outputFile.write(veom) + + # Write file end mark + outputFile.write(SNBFile.MAGIC); + + # Close + outputFile.close() + return + + def Dump(self): + print "File Name:\t", self.fileName + print "File Count:\t", self.fileCount + print "VFAT Size(Compressed):\t%d(%d)" % (self.vfatSize, self.vfatCompressed) + print "Binary Stream Size:\t", self.binStreamSize + print "Plain Stream Uncompressed Size:\t", self.plainStreamSizeUncompressed + print "Binary Block Count:\t", self.binBlock + print "Plain Block Count:\t", self.plainBlock + for i in range(self.fileCount): + print "File ", i + f = self.files[i] + print "File Name: ", f.fileName + print "File Attr: ", f.attr + print "File Size: ", f.fileSize + print "Block Index: ", f.blockIndex + print "Content Offset: ", f.contentOffset + tempFile = open("/tmp/" + f.fileName, 'wb') + tempFile.write(f.fileBody) + tempFile.close() + +def usage(): + print "This unit test is for INTERNAL usage only!" + print "This unit test accept two parameters." + print "python snbfile.py " + print "The input file will be extracted and write to dest file. " + print "Meta data of the file will be shown during this process." + +def main(): + if len(sys.argv) != 3: + usage() + sys.exit(0) + inputFile = sys.argv[1] + outputFile = sys.argv[2] + + print "Input file: ", inputFile + print "Output file: ", outputFile + + snbFile = SNBFile(inputFile) + if snbFile.IsValid(): + snbFile.Dump() + snbFile.Output(outputFile) + else: + print "The input file is invalid." + return 1 + return 0 + +if __name__ == "__main__": + """SNB file unit test""" + sys.exit(main()) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py new file mode 100644 index 0000000000..e1956b5937 --- /dev/null +++ b/src/calibre/ebooks/snb/snbml.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2010, Li Fanxi ' +__docformat__ = 'restructuredtext en' + +''' +Transform OEB content into SNB format +''' + +import os +import re + +from lxml import etree + +from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace +from calibre.ebooks.oeb.stylizer import Stylizer + +def ProcessFileName(fileName): + # Flat the path + fileName = fileName.replace("/", "_").replace(os.sep, "_") + # Handle bookmark for HTML file + fileName = fileName.replace("#", "_") + # Make it lower case + fileName = fileName.lower() + # Change extension from jpeg to jpg + root, ext = os.path.splitext(fileName) + if ext in [ '.jpeg', '.jpg', '.gif', '.svg' ]: + fileName = root + '.png' + return fileName + + +BLOCK_TAGS = [ + 'div', + 'p', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'li', + 'tr', +] + +BLOCK_STYLES = [ + 'block', +] + +SPACE_TAGS = [ + 'td', +] + +CLIABRE_SNB_IMG_TAG = "" + +class SNBMLizer(object): + + curSubItem = "" + curText = [ ] + + def __init__(self, log): + self.log = log + + def extract_content(self, oeb_book, item, subitems, opts): + self.log.info('Converting XHTML to SNBC...') + self.oeb_book = oeb_book + self.opts = opts + self.item = item + self.subitems = subitems + return self.mlize(); + + + def mlize(self): + stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile) + content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode)) + content = self.remove_newlines(content) + trees = { } + for subitem, subtitle in self.subitems: + snbcTree = etree.Element("snbc") + etree.SubElement(etree.SubElement(snbcTree, "head"), "title").text = subtitle + etree.SubElement(snbcTree, "body") + trees[subitem] = snbcTree + + self.dump_text(trees, self.subitems, etree.fromstring(content), stylizer) + self.Output(trees) + return trees + + def remove_newlines(self, text): + self.log.debug('\tRemove newlines for processing...') + text = text.replace('\r\n', ' ') + text = text.replace('\n', ' ') + text = text.replace('\r', ' ') + + return text + + def dump_text(self, trees, subitems, elem, stylizer, end=''): + ''' + @elem: The element in the etree that we are working on. + @stylizer: The style information attached to the element. + @end: The last two characters of the text from the previous element. + This is used to determine if a blank line is needed when starting + a new block element. + ''' + if not isinstance(elem.tag, basestring) \ + or namespace(elem.tag) != XHTML_NS: + return [''] + + if elem.attrib.get('id') != None and elem.attrib['id'] in [ href for href, title in subitems ]: + if self.curSubItem != None and self.curSubItem != elem.attrib['id']: + self.Output(trees) + self.curSubItem = elem.attrib['id'] + self.curText = [ ] + + style = stylizer.style(elem) + + if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ + or style['visibility'] == 'hidden': + return [''] + + tag = barename(elem.tag) + in_block = False + + # Are we in a paragraph block? + if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES: + in_block = True + if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text: + self.curText.append(u'\n\n') + + if tag in SPACE_TAGS: + if not end.endswith('u ') and hasattr(elem, 'text') and elem.text: + self.curText.append(u' ') + + if tag == 'img': + self.curText.append(u'%s%s' % (CLIABRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src']))) + + # Process tags that contain text. + if hasattr(elem, 'text') and elem.text: + self.curText.append(elem.text) + + for item in elem: + en = u'' + if len(self.curText) >= 2: + en = self.curText[-1][-2:] + self.dump_text(trees, subitems, item, stylizer, en) + + if in_block: + self.curText.append(u'\n\n') + + if hasattr(elem, 'tail') and elem.tail: + self.curText.append(elem.tail) + + def Output(self, trees): + if self.curSubItem == None or not self.curSubItem in trees: + return + for t in self.curText: + if len(t.strip(' \t\n\r')) != 0: + if t.find(CLIABRE_SNB_IMG_TAG) == 0: + etree.SubElement(trees[self.curSubItem], "img").text = t[len(CLIABRE_SNB_IMG_TAG):] + else: + etree.SubElement(trees[self.curSubItem], "text").text = etree.CDATA(unicode('' + t)) From 603b8811893fc080b02b182e52b20cbf0aab8f33 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Sat, 9 Oct 2010 23:04:03 +0800 Subject: [PATCH 03/24] [SNBOutputProfile] Add a Bambook Output Profile --- src/calibre/customize/profiles.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 0310f09242..937cb9c3b4 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -642,11 +642,24 @@ class NookOutput(OutputProfile): fbase = 16 fsizes = [12, 12, 14, 16, 18, 20, 22, 24] +class BambookOutput(OutputProfile): + + name = 'Sanda Bambook' + short_name = 'bambook' + description = _('This profile is intended for the Sanda Bambook.') + + # Screen size is a best guess + screen_size = (800, 600) + dpi = 168.451 + fbase = 12 + fsizes = [10, 12, 14, 16] + output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output, SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output, HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput, iPadOutput, KoboReaderOutput, SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput, - IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,] + IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput, + BambookOutput, ] output_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower())) From 6a301a1a9a97a7bb769b05f12b5574e9e83a5116 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Sun, 10 Oct 2010 11:27:27 +0800 Subject: [PATCH 04/24] [SNBOutput] Add two spaces for each paragraph. --- src/calibre/ebooks/snb/snbml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index e1956b5937..15f3413489 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -157,4 +157,4 @@ class SNBMLizer(object): if t.find(CLIABRE_SNB_IMG_TAG) == 0: etree.SubElement(trees[self.curSubItem], "img").text = t[len(CLIABRE_SNB_IMG_TAG):] else: - etree.SubElement(trees[self.curSubItem], "text").text = etree.CDATA(unicode('' + t)) + etree.SubElement(trees[self.curSubItem], "text").text = etree.CDATA(unicode(u'\u3000\u3000' + t)) From 8fd3f0ebaab8f9f104842e671e1e2312695780d4 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Sun, 10 Oct 2010 11:28:43 +0800 Subject: [PATCH 05/24] [SNBOutput] Fix the path error on different OSes. --- src/calibre/ebooks/snb/snbfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/snb/snbfile.py b/src/calibre/ebooks/snb/snbfile.py index aa690fb92b..6d2c627fbb 100644 --- a/src/calibre/ebooks/snb/snbfile.py +++ b/src/calibre/ebooks/snb/snbfile.py @@ -147,7 +147,7 @@ class SNBFile: f.attr = 0x41000000 f.fileSize = os.path.getsize(os.path.join(tdir,fileName)) f.fileBody = open(os.path.join(tdir,fileName), 'rb').read() - f.fileName = fileName + f.fileName = fileName.replace(os.sep, '/') print f.fileSize self.files.append(f) @@ -156,7 +156,7 @@ class SNBFile: f.attr = 0x01000000 f.fileSize = os.path.getsize(os.path.join(tdir,fileName)) f.fileBody = open(os.path.join(tdir,fileName), 'rb').read() - f.fileName = fileName + f.fileName = fileName.replace(os.sep, '/') print f.fileSize self.files.append(f) From 0c387834f43b108ee02f839d1122e44f6758a4d6 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Sun, 10 Oct 2010 16:25:22 +0800 Subject: [PATCH 06/24] [SNBOutput] The conetent in html before the first bookmark should also be outputted. --- src/calibre/ebooks/snb/output.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py index a02d085b5e..65f06c7994 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/snb/output.py @@ -133,9 +133,6 @@ class SNBOutput(OutputFormatPlugin): oeb_book.toc.add(_('Start'), first.href) for tocitem in oeb_book.toc: - ch = etree.SubElement(tocBody, "chapter") - ch.set("src", ProcessFileName(tocitem.href) + ".snbc") - ch.text = tocitem.title if tocitem.href.find('#') != -1: item = string.split(tocitem.href, '#') if len(item) != 2: @@ -145,6 +142,11 @@ class SNBOutput(OutputFormatPlugin): outputFiles[item[0]].append((item[1], tocitem.title)) else: outputFiles[item[0]] = [] + if not "" in outputFiles[item[0]]: + outputFiles[item[0]].append(("", _("Start"))) + ch = etree.SubElement(tocBody, "chapter") + ch.set("src", ProcessFileName(item[0]) + ".snbc") + ch.text = _("Start") outputFiles[item[0]].append((item[1], tocitem.title)) else: if tocitem.href in outputFiles: @@ -152,6 +154,10 @@ class SNBOutput(OutputFormatPlugin): else: outputFiles[tocitem.href] = [] outputFiles[tocitem.href].append(("", tocitem.title)) + ch = etree.SubElement(tocBody, "chapter") + ch.set("src", ProcessFileName(tocitem.href) + ".snbc") + ch.text = tocitem.title + etree.SubElement(tocHead, "chapters").text = '%d' % len(tocBody) From b4c69ba6343cae44110ea28e364d82ba57c313ae Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Sun, 10 Oct 2010 17:47:23 +0800 Subject: [PATCH 07/24] [SNBOutput] Reduce the size of the images --- src/calibre/ebooks/snb/output.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py index 65f06c7994..08ede2ca37 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/snb/output.py @@ -21,7 +21,7 @@ def ProcessFileName(fileName): fileName = fileName.replace("#", "_") # Make it lower case fileName = fileName.lower() - # Change extension from jpeg to jpg + # Change extension for image files to png root, ext = os.path.splitext(fileName) if ext in [ '.jpeg', '.jpg', '.gif', '.svg' ]: fileName = root + '.png' @@ -187,11 +187,8 @@ class SNBOutput(OutputFormatPlugin): log.debug('Converting image: %s ...' % item.href) content = m.hrefs[item.href].data if m.hrefs[item.href].media_type != PNG_MIME: - # Convert - from calibre.utils.magick import Image - img = Image() - img.load(content) - img.save(os.path.join(snbiDir, ProcessFileName(item.href))) + # Convert & Resize image + self.HandleImage(content, os.path.join(snbiDir, ProcessFileName(item.href))) else: outputFile = open(os.path.join(snbiDir, ProcessFileName(item.href)), 'wb') outputFile.write(content) @@ -202,6 +199,29 @@ class SNBOutput(OutputFormatPlugin): snbFile.FromDir(tdir) snbFile.Output(output_path) + def HandleImage(self, imageData, imagePath): + from calibre.utils.magick import Image + img = Image() + img.load(imageData) + print img.size + (x,y) = img.size + # TODO use the data from device profile + SCREEN_X = 540 + SCREEN_Y = 700 + # Handle big image only + if x > SCREEN_X or y > SCREEN_Y: + SCREEN_RATIO = float(SCREEN_Y) / SCREEN_X + imgRatio = float(y) / x + xScale = float(x) / SCREEN_X + yScale = float(y) / SCREEN_Y + scale = max(xScale, yScale) + # TODO : intelligent image rotation + # img = img.rotate(90) + # x,y = y,x + img.size = (x / scale, y / scale) + print img.size + img.save(imagePath) + if __name__ == '__main__': from calibre.ebooks.oeb.reader import OEBReader from calibre.ebooks.oeb.base import OEBBook From e72c3ce0f83532688d26bda3d3fefb6e4e901729 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Sun, 10 Oct 2010 19:40:55 +0800 Subject: [PATCH 08/24] [[SNBOutput] Reuse the original html->txt algorithm in txtml.py to get better output. Removed some unnecessary debug prints. --- src/calibre/ebooks/snb/output.py | 2 - src/calibre/ebooks/snb/snbfile.py | 3 - src/calibre/ebooks/snb/snbml.py | 134 ++++++++++++++++++++++-------- 3 files changed, 100 insertions(+), 39 deletions(-) diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py index 08ede2ca37..a682062de2 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/snb/output.py @@ -203,7 +203,6 @@ class SNBOutput(OutputFormatPlugin): from calibre.utils.magick import Image img = Image() img.load(imageData) - print img.size (x,y) = img.size # TODO use the data from device profile SCREEN_X = 540 @@ -219,7 +218,6 @@ class SNBOutput(OutputFormatPlugin): # img = img.rotate(90) # x,y = y,x img.size = (x / scale, y / scale) - print img.size img.save(imagePath) if __name__ == '__main__': diff --git a/src/calibre/ebooks/snb/snbfile.py b/src/calibre/ebooks/snb/snbfile.py index 6d2c627fbb..ca10f800c7 100644 --- a/src/calibre/ebooks/snb/snbfile.py +++ b/src/calibre/ebooks/snb/snbfile.py @@ -135,7 +135,6 @@ class SNBFile: def FromDir(self, tdir): for root, dirs, files in os.walk(tdir): for name in files: - print name p, ext = os.path.splitext(name) if ext in [ ".snbf", ".snbc" ]: self.AppendPlain(os.path.relpath(os.path.join(root, name), tdir), tdir) @@ -148,7 +147,6 @@ class SNBFile: f.fileSize = os.path.getsize(os.path.join(tdir,fileName)) f.fileBody = open(os.path.join(tdir,fileName), 'rb').read() f.fileName = fileName.replace(os.sep, '/') - print f.fileSize self.files.append(f) def AppendBinary(self, fileName, tdir): @@ -157,7 +155,6 @@ class SNBFile: f.fileSize = os.path.getsize(os.path.join(tdir,fileName)) f.fileBody = open(os.path.join(tdir,fileName), 'rb').read() f.fileName = fileName.replace(os.sep, '/') - print f.fileSize self.files.append(f) def Output(self, outputFile): diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index 15f3413489..c357971b5e 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -51,12 +51,13 @@ SPACE_TAGS = [ 'td', ] -CLIABRE_SNB_IMG_TAG = "" +CALIBRE_SNB_IMG_TAG = "<$$calibre_snb_temp_img$$>" +CALIBRE_SNB_BM_TAG = "<$$calibre_snb_bm_tag$$>" class SNBMLizer(object): curSubItem = "" - curText = [ ] +# curText = [ ] def __init__(self, log): self.log = log @@ -71,6 +72,7 @@ class SNBMLizer(object): def mlize(self): + output = [ u'' ] stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile) content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode)) content = self.remove_newlines(content) @@ -80,9 +82,20 @@ class SNBMLizer(object): etree.SubElement(etree.SubElement(snbcTree, "head"), "title").text = subtitle etree.SubElement(snbcTree, "body") trees[subitem] = snbcTree + output.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, "")) + output += self.dump_text(self.subitems, etree.fromstring(content), stylizer) + output = self.cleanup_text(u''.join(output)) - self.dump_text(trees, self.subitems, etree.fromstring(content), stylizer) - self.Output(trees) + subitem = '' + for line in output.splitlines(): + line = line.strip(' \t\n\r') + if len(line) != 0: + if line.find(CALIBRE_SNB_IMG_TAG) == 0: + etree.SubElement(trees[subitem], "img").text = line[len(CALIBRE_SNB_IMG_TAG):] + elif line.find(CALIBRE_SNB_BM_TAG) == 0: + subitem = line[len(CALIBRE_SNB_BM_TAG):] + else: + etree.SubElement(trees[subitem], "text").text = etree.CDATA(unicode(u'\u3000\u3000' + line)) return trees def remove_newlines(self, text): @@ -93,25 +106,86 @@ class SNBMLizer(object): return text - def dump_text(self, trees, subitems, elem, stylizer, end=''): - ''' - @elem: The element in the etree that we are working on. - @stylizer: The style information attached to the element. - @end: The last two characters of the text from the previous element. - This is used to determine if a blank line is needed when starting - a new block element. - ''' + def cleanup_text(self, text): + self.log.debug('\tClean up text...') + # Replace bad characters. + text = text.replace(u'\xc2', '') + text = text.replace(u'\xa0', ' ') + text = text.replace(u'\xa9', '(C)') + + # Replace tabs, vertical tags and form feeds with single space. + text = text.replace('\t+', ' ') + text = text.replace('\v+', ' ') + text = text.replace('\f+', ' ') + + # Single line paragraph. + text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text) + + # Remove multiple spaces. + text = re.sub('[ ]{2,}', ' ', text) + + # Remove excessive newlines. + text = re.sub('\n[ ]+\n', '\n\n', text) + if self.opts.remove_paragraph_spacing: + text = re.sub('\n{2,}', '\n', text) + text = re.sub('(?imu)^(?=.)', '\t', text) + else: + text = re.sub('\n{3,}', '\n\n', text) + + # Replace spaces at the beginning and end of lines + text = re.sub('(?imu)^[ ]+', '', text) + text = re.sub('(?imu)[ ]+$', '', text) + + if self.opts.max_line_length: + max_length = self.opts.max_line_length + if self.opts.max_line_length < 25 and not self.opts.force_max_line_length: + max_length = 25 + short_lines = [] + lines = text.splitlines() + for line in lines: + while len(line) > max_length: + space = line.rfind(' ', 0, max_length) + if space != -1: + # Space was found. + short_lines.append(line[:space]) + line = line[space + 1:] + else: + # Space was not found. + if self.opts.force_max_line_length: + # Force breaking at max_lenght. + short_lines.append(line[:max_length]) + line = line[max_length:] + else: + # Look for the first space after max_length. + space = line.find(' ', max_length, len(line)) + if space != -1: + # Space was found. + short_lines.append(line[:space]) + line = line[space + 1:] + else: + # No space was found cannot break line. + short_lines.append(line) + line = '' + # Add the text that was less than max_lengh to the list + short_lines.append(line) + text = '\n'.join(short_lines) + + return text + + def dump_text(self, subitems, elem, stylizer, end=''): + if not isinstance(elem.tag, basestring) \ or namespace(elem.tag) != XHTML_NS: return [''] + + text = [''] + style = stylizer.style(elem) + if elem.attrib.get('id') != None and elem.attrib['id'] in [ href for href, title in subitems ]: if self.curSubItem != None and self.curSubItem != elem.attrib['id']: - self.Output(trees) self.curSubItem = elem.attrib['id'] - self.curText = [ ] - - style = stylizer.style(elem) + text.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, self.curSubItem)) if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ or style['visibility'] == 'hidden': @@ -124,37 +198,29 @@ class SNBMLizer(object): if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES: in_block = True if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text: - self.curText.append(u'\n\n') + text.append(u'\n\n') if tag in SPACE_TAGS: if not end.endswith('u ') and hasattr(elem, 'text') and elem.text: - self.curText.append(u' ') + text.append(u' ') if tag == 'img': - self.curText.append(u'%s%s' % (CLIABRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src']))) + text.append(u'%s%s' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src']))) # Process tags that contain text. if hasattr(elem, 'text') and elem.text: - self.curText.append(elem.text) + text.append(elem.text) for item in elem: en = u'' - if len(self.curText) >= 2: - en = self.curText[-1][-2:] - self.dump_text(trees, subitems, item, stylizer, en) + if len(text) >= 2: + en = text[-1][-2:] + text += self.dump_text(subitems, item, stylizer, en) if in_block: - self.curText.append(u'\n\n') + text.append(u'\n\n') if hasattr(elem, 'tail') and elem.tail: - self.curText.append(elem.tail) + text.append(elem.tail) - def Output(self, trees): - if self.curSubItem == None or not self.curSubItem in trees: - return - for t in self.curText: - if len(t.strip(' \t\n\r')) != 0: - if t.find(CLIABRE_SNB_IMG_TAG) == 0: - etree.SubElement(trees[self.curSubItem], "img").text = t[len(CLIABRE_SNB_IMG_TAG):] - else: - etree.SubElement(trees[self.curSubItem], "text").text = etree.CDATA(unicode(u'\u3000\u3000' + t)) + return text From 081c5385f2862be4d0a7e73f76b9c8f9477156ae Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Sun, 10 Oct 2010 21:24:27 +0800 Subject: [PATCH 09/24] [SNBOutput] Improve TOC handling. If an spice is not referenced in TOC, it will be appended to the last TOC item. --- src/calibre/ebooks/snb/output.py | 51 +++++++++++++++++++++++++------- src/calibre/ebooks/snb/snbml.py | 13 ++++++-- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py index a682062de2..c8457347ec 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/snb/output.py @@ -131,6 +131,10 @@ class SNBOutput(OutputFormatPlugin): 'Creating a default TOC') first = iter(oeb_book.spine).next() oeb_book.toc.add(_('Start'), first.href) + else: + first = iter(oeb_book.spine).next() + if oeb_book.toc[0].href != first.href: + oeb_book.toc.add(_('Start'), first.href) for tocitem in oeb_book.toc: if tocitem.href.find('#') != -1: @@ -166,22 +170,49 @@ class SNBOutput(OutputFormatPlugin): tocInfoFile.close() # Output Files + oldTree = None + mergeLast = False + lastName = None for item in s: from calibre.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES, PNG_MIME if m.hrefs[item.href].media_type in OEB_DOCS: if not item.href in outputFiles: - log.debug('Skipping %s because unused in TOC.' % item.href) - continue + log.debug('File %s is unused in TOC. Continue in last chapter' % item.href) + mergeLast = True + else: + log.debug('Output the modified chapter again: %s' % lastName) + if oldTree != None and mergeLast: + outputFile = open(os.path.join(snbcDir, lastName), 'wb') + outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) + outputFile.close() + mergeLast = False + log.debug('Converting %s to snbc...' % item.href) snbwriter = SNBMLizer(log) - snbcTrees = snbwriter.extract_content(oeb_book, item, outputFiles[item.href], opts) - for subName in snbcTrees: - postfix = '' - if subName != '': - postfix = '_' + subName - outputFile = open(os.path.join(snbcDir, ProcessFileName(item.href + postfix + ".snbc")), 'wb') - outputFile.write(etree.tostring(snbcTrees[subName], pretty_print=True, encoding='utf-8')) - outputFile.close() + snbcTrees = None + if not mergeLast: + snbcTrees = snbwriter.extract_content(oeb_book, item, outputFiles[item.href], opts) + for subName in snbcTrees: + postfix = '' + if subName != '': + postfix = '_' + subName + lastName = ProcessFileName(item.href + postfix + ".snbc") + oldTree = snbcTrees[subName] + outputFile = open(os.path.join(snbcDir, lastName), 'wb') + outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) + outputFile.close() + else: + log.debug('Merge %s with last TOC item...' % item.href) + snbwriter.merge_content(oldTree, oeb_book, item, [('', _("Start"))], opts) + + # Output the last one if needed + log.debug('Output the last modified chapter again: %s' % lastName) + if oldTree != None and mergeLast: + outputFile = open(os.path.join(snbcDir, lastName), 'wb') + outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) + outputFile.close() + mergeLast = False + for item in m: if m.hrefs[item.href].media_type in OEB_IMAGES: log.debug('Converting image: %s ...' % item.href) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index c357971b5e..bfdaf53cae 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -70,6 +70,14 @@ class SNBMLizer(object): self.subitems = subitems return self.mlize(); + def merge_content(self, old_tree, oeb_book, item, subitems, opts): + newTrees = self.extract_content(oeb_book, item, subitems, opts) + body = old_tree.find(".//body") + if body != None: + for subName in newTrees: + newbody = newTrees[subName].find(".//body") + for entity in newbody: + body.append(entity) def mlize(self): output = [ u'' ] @@ -91,11 +99,12 @@ class SNBMLizer(object): line = line.strip(' \t\n\r') if len(line) != 0: if line.find(CALIBRE_SNB_IMG_TAG) == 0: - etree.SubElement(trees[subitem], "img").text = line[len(CALIBRE_SNB_IMG_TAG):] + etree.SubElement(trees[subitem].find(".//body"), "img").text = line[len(CALIBRE_SNB_IMG_TAG):] elif line.find(CALIBRE_SNB_BM_TAG) == 0: subitem = line[len(CALIBRE_SNB_BM_TAG):] else: - etree.SubElement(trees[subitem], "text").text = etree.CDATA(unicode(u'\u3000\u3000' + line)) + etree.SubElement(trees[subitem].find(".//body"), "text").text = \ + etree.CDATA(unicode(u'\u3000\u3000' + line)) return trees def remove_newlines(self, text): From 9be246500bfc1534d953753e56c338941886c9fc Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 11 Oct 2010 00:19:12 +0800 Subject: [PATCH 10/24] [SNBOutput] Removed a duplicated function. --- src/calibre/ebooks/snb/output.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py index c8457347ec..bd27a0614e 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/snb/output.py @@ -12,21 +12,7 @@ from calibre.ptempfile import TemporaryDirectory from calibre.constants import __appname__, __version__ from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.snb.snbfile import SNBFile -from calibre.ebooks.snb.snbml import SNBMLizer - -def ProcessFileName(fileName): - # Flat the path - fileName = fileName.replace("/", "_").replace(os.sep, "_") - # Handle bookmark for HTML file - fileName = fileName.replace("#", "_") - # Make it lower case - fileName = fileName.lower() - # Change extension for image files to png - root, ext = os.path.splitext(fileName) - if ext in [ '.jpeg', '.jpg', '.gif', '.svg' ]: - fileName = root + '.png' - return fileName - +from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName class SNBOutput(OutputFormatPlugin): From 29e133c61a9f6d813b4137fd94f4c8305c4de4f5 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 11 Oct 2010 00:41:24 +0800 Subject: [PATCH 11/24] [SNBOutput] Fixed a bug in image path handling. --- src/calibre/ebooks/snb/snbml.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index bfdaf53cae..72600fa4d2 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -99,7 +99,13 @@ class SNBMLizer(object): line = line.strip(' \t\n\r') if len(line) != 0: if line.find(CALIBRE_SNB_IMG_TAG) == 0: - etree.SubElement(trees[subitem].find(".//body"), "img").text = line[len(CALIBRE_SNB_IMG_TAG):] + prefix = ProcessFileName(os.path.dirname(self.item.href)) + if prefix != '': + etree.SubElement(trees[subitem].find(".//body"), "img").text = \ + prefix + '_' + line[len(CALIBRE_SNB_IMG_TAG):] + else: + etree.SubElement(trees[subitem].find(".//body"), "img").text = \ + line[len(CALIBRE_SNB_IMG_TAG):] elif line.find(CALIBRE_SNB_BM_TAG) == 0: subitem = line[len(CALIBRE_SNB_BM_TAG):] else: From 3ec09a3b40ac6eb637fb635d9ad2f81cff6d06c5 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 11 Oct 2010 00:49:11 +0800 Subject: [PATCH 12/24] [SBNOutput] Use jpg instead of png. --- src/calibre/ebooks/snb/output.py | 13 ++++--------- src/calibre/ebooks/snb/snbml.py | 4 ++-- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py index bd27a0614e..7dd976ff25 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/snb/output.py @@ -160,7 +160,7 @@ class SNBOutput(OutputFormatPlugin): mergeLast = False lastName = None for item in s: - from calibre.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES, PNG_MIME + from calibre.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES if m.hrefs[item.href].media_type in OEB_DOCS: if not item.href in outputFiles: log.debug('File %s is unused in TOC. Continue in last chapter' % item.href) @@ -203,14 +203,9 @@ class SNBOutput(OutputFormatPlugin): if m.hrefs[item.href].media_type in OEB_IMAGES: log.debug('Converting image: %s ...' % item.href) content = m.hrefs[item.href].data - if m.hrefs[item.href].media_type != PNG_MIME: - # Convert & Resize image - self.HandleImage(content, os.path.join(snbiDir, ProcessFileName(item.href))) - else: - outputFile = open(os.path.join(snbiDir, ProcessFileName(item.href)), 'wb') - outputFile.write(content) - outputFile.close() - + # Convert & Resize image + self.HandleImage(content, os.path.join(snbiDir, ProcessFileName(item.href))) + # Package as SNB File snbFile = SNBFile() snbFile.FromDir(tdir) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index 72600fa4d2..7be15d9fc6 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -23,10 +23,10 @@ def ProcessFileName(fileName): fileName = fileName.replace("#", "_") # Make it lower case fileName = fileName.lower() - # Change extension from jpeg to jpg + # Change all images to jpg root, ext = os.path.splitext(fileName) if ext in [ '.jpeg', '.jpg', '.gif', '.svg' ]: - fileName = root + '.png' + fileName = root + '.jpg' return fileName From 3c0673dcf5627dce63d568da615fda9dc614f98e Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 11 Oct 2010 00:51:34 +0800 Subject: [PATCH 13/24] [SBNOutput] Change debug log position to avoid confusion. --- src/calibre/ebooks/snb/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py index 7dd976ff25..7b661dfe7f 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/snb/output.py @@ -166,8 +166,8 @@ class SNBOutput(OutputFormatPlugin): log.debug('File %s is unused in TOC. Continue in last chapter' % item.href) mergeLast = True else: - log.debug('Output the modified chapter again: %s' % lastName) if oldTree != None and mergeLast: + log.debug('Output the modified chapter again: %s' % lastName) outputFile = open(os.path.join(snbcDir, lastName), 'wb') outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) outputFile.close() From dafa2c9034962d87ca03c4fd6302d658413751cf Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 11 Oct 2010 01:14:58 +0800 Subject: [PATCH 14/24] [SNBOutput] Improved unused pages handling. --- src/calibre/ebooks/snb/output.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py index 7b661dfe7f..ef008013df 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/snb/output.py @@ -116,11 +116,19 @@ class SNBOutput(OutputFormatPlugin): log.warn('This SNB file has no Table of Contents. ' 'Creating a default TOC') first = iter(oeb_book.spine).next() - oeb_book.toc.add(_('Start'), first.href) + oeb_book.toc.add(_('Start Page'), first.href) else: first = iter(oeb_book.spine).next() if oeb_book.toc[0].href != first.href: - oeb_book.toc.add(_('Start'), first.href) + # The pages before the fist item in toc will be stored as + # "Cover Pages". + # oeb_book.toc does not support "insert", so we generate + # the tocInfoTree directly instead of modifying the toc + ch = etree.SubElement(tocBody, "chapter") + ch.set("src", ProcessFileName(first.href) + ".snbc") + ch.text = _('Cover Pages') + outputFiles[first.href] = [] + outputFiles[first.href].append(("", _("Cover Pages"))) for tocitem in oeb_book.toc: if tocitem.href.find('#') != -1: @@ -133,10 +141,10 @@ class SNBOutput(OutputFormatPlugin): else: outputFiles[item[0]] = [] if not "" in outputFiles[item[0]]: - outputFiles[item[0]].append(("", _("Start"))) + outputFiles[item[0]].append(("", _("Chapter Start"))) ch = etree.SubElement(tocBody, "chapter") ch.set("src", ProcessFileName(item[0]) + ".snbc") - ch.text = _("Start") + ch.text = _("Chapter Start") outputFiles[item[0]].append((item[1], tocitem.title)) else: if tocitem.href in outputFiles: From 63bb69d4ecf65834390887626ede389db76e15ef Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 11 Oct 2010 01:37:29 +0800 Subject: [PATCH 15/24] [SNBOutput] Change wording for the unused page content on each page before the first bookmark appeared in TOC. --- src/calibre/ebooks/snb/output.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py index ef008013df..cbe785d384 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/snb/output.py @@ -141,10 +141,10 @@ class SNBOutput(OutputFormatPlugin): else: outputFiles[item[0]] = [] if not "" in outputFiles[item[0]]: - outputFiles[item[0]].append(("", _("Chapter Start"))) + outputFiles[item[0]].append(("", tocitem.title + _(" (Preface)"))) ch = etree.SubElement(tocBody, "chapter") ch.set("src", ProcessFileName(item[0]) + ".snbc") - ch.text = _("Chapter Start") + ch.text = tocitem.title + _(" (Preface)") outputFiles[item[0]].append((item[1], tocitem.title)) else: if tocitem.href in outputFiles: From 424e69f5999d367e3582510c56d2ad4b07402319 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 11 Oct 2010 15:47:48 +0800 Subject: [PATCH 16/24] [SNBOutput] Also strip \u3000 (Full Mode space character in Chinese) --- src/calibre/ebooks/snb/snbml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index 7be15d9fc6..a5659bdca2 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -96,7 +96,7 @@ class SNBMLizer(object): subitem = '' for line in output.splitlines(): - line = line.strip(' \t\n\r') + line = line.strip(u' \t\n\r\u3000') if len(line) != 0: if line.find(CALIBRE_SNB_IMG_TAG) == 0: prefix = ProcessFileName(os.path.dirname(self.item.href)) From e89a58d7095ebfb0bde8443ead635b1f8f13e9fb Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 11 Oct 2010 16:19:46 +0800 Subject: [PATCH 17/24] [SNBOutput] Handle
tag to be a new line in output. --- src/calibre/ebooks/snb/snbml.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index a5659bdca2..9b2c24c758 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -222,6 +222,9 @@ class SNBMLizer(object): if tag == 'img': text.append(u'%s%s' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src']))) + if tag == 'br': + text.append(u'\n\n') + # Process tags that contain text. if hasattr(elem, 'text') and elem.text: text.append(elem.text) From 30e231c4a69a01818823681375d9e031b333bce5 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 11 Oct 2010 17:12:58 +0800 Subject: [PATCH 18/24] [SNBOutput] Handle
 tag in html.

---
 src/calibre/ebooks/snb/snbml.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py
index 9b2c24c758..f32ddaadf1 100644
--- a/src/calibre/ebooks/snb/snbml.py
+++ b/src/calibre/ebooks/snb/snbml.py
@@ -53,6 +53,7 @@ SPACE_TAGS = [
 
 CALIBRE_SNB_IMG_TAG = "<$$calibre_snb_temp_img$$>"
 CALIBRE_SNB_BM_TAG = "<$$calibre_snb_bm_tag$$>"
+CALIBRE_SNB_PRE_TAG = "<$$calibre_snb_pre_tag$$>"
 
 class SNBMLizer(object):
     
@@ -83,7 +84,7 @@ class SNBMLizer(object):
         output = [ u'' ]
         stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
         content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode))
-        content = self.remove_newlines(content)
+#        content = self.remove_newlines(content)
         trees = { }
         for subitem, subtitle in self.subitems:
             snbcTree = etree.Element("snbc")
@@ -96,7 +97,12 @@ class SNBMLizer(object):
 
         subitem = ''
         for line in output.splitlines():
-            line = line.strip(u' \t\n\r\u3000')
+            if not line.find(CALIBRE_SNB_PRE_TAG) == 0:
+                line = line.strip(u' \t\n\r\u3000')
+            else:
+                etree.SubElement(trees[subitem].find(".//body"), "text").text = \
+                    etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):])
+                continue
             if len(line) != 0:
                 if line.find(CALIBRE_SNB_IMG_TAG) == 0:
                     prefix = ProcessFileName(os.path.dirname(self.item.href))
@@ -137,7 +143,7 @@ class SNBMLizer(object):
         text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
 
         # Remove multiple spaces.
-        text = re.sub('[ ]{2,}', ' ', text)
+        #text = re.sub('[ ]{2,}', ' ', text)
 
         # Remove excessive newlines.
         text = re.sub('\n[ ]+\n', '\n\n', text)
@@ -187,7 +193,7 @@ class SNBMLizer(object):
 
         return text
 
-    def dump_text(self, subitems, elem, stylizer, end=''):
+    def dump_text(self, subitems, elem, stylizer, end='', pre=False):
 
         if not isinstance(elem.tag, basestring) \
            or namespace(elem.tag) != XHTML_NS:
@@ -225,20 +231,27 @@ class SNBMLizer(object):
         if tag == 'br':
             text.append(u'\n\n')
 
+        pre = (tag == 'pre' or pre)
         # Process tags that contain text.
         if hasattr(elem, 'text') and elem.text:
-            text.append(elem.text)
-
+            if pre:
+                text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join(elem.text.splitlines()))
+            else:
+                text.append(elem.text)
+            
         for item in elem:
             en = u''
             if len(text) >= 2:
                 en = text[-1][-2:]
-            text += self.dump_text(subitems, item, stylizer, en)
+            text += self.dump_text(subitems, item, stylizer, en, pre)
 
         if in_block:
             text.append(u'\n\n')
 
         if hasattr(elem, 'tail') and elem.tail:
-            text.append(elem.tail)
+            if pre:
+                text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join(elem.tail.splitlines()))
+            else:
+                text.append(elem.tail)
 
         return text

From 8f3e1ca4d5749b033168777d94971e76aa9212c9 Mon Sep 17 00:00:00 2001
From: Li Fanxi 
Date: Mon, 11 Oct 2010 18:17:03 +0800
Subject: [PATCH 19/24] [SNBOutput] Better handling of 
  • tag. --- src/calibre/ebooks/snb/snbml.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index f32ddaadf1..3542a2110f 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -92,7 +92,7 @@ class SNBMLizer(object): etree.SubElement(snbcTree, "body") trees[subitem] = snbcTree output.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, "")) - output += self.dump_text(self.subitems, etree.fromstring(content), stylizer) + output += self.dump_text(self.subitems, etree.fromstring(content), stylizer)[0] output = self.cleanup_text(u''.join(output)) subitem = '' @@ -193,7 +193,7 @@ class SNBMLizer(object): return text - def dump_text(self, subitems, elem, stylizer, end='', pre=False): + def dump_text(self, subitems, elem, stylizer, end='', pre=False, li = ''): if not isinstance(elem.tag, basestring) \ or namespace(elem.tag) != XHTML_NS: @@ -231,19 +231,24 @@ class SNBMLizer(object): if tag == 'br': text.append(u'\n\n') + if tag == 'li': + li = '-- ' + pre = (tag == 'pre' or pre) # Process tags that contain text. if hasattr(elem, 'text') and elem.text: if pre: - text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join(elem.text.splitlines())) + text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join((li + elem.text).splitlines())) else: - text.append(elem.text) + text.append(li + elem.text) + li = '' for item in elem: en = u'' if len(text) >= 2: en = text[-1][-2:] - text += self.dump_text(subitems, item, stylizer, en, pre) + t, li = self.dump_text(subitems, item, stylizer, en, pre, li) + text += t if in_block: text.append(u'\n\n') @@ -252,6 +257,7 @@ class SNBMLizer(object): if pre: text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join(elem.tail.splitlines())) else: - text.append(elem.tail) + text.append(li + elem.tail) + li = '' - return text + return text, li From f0fb8fb12a24dc043c01e665f1d9aa74866d66b5 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Tue, 12 Oct 2010 16:22:25 +0800 Subject: [PATCH 20/24] [SNBOutput] Fix bugs in handling
  • and bookmark. --- src/calibre/ebooks/snb/snbml.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index 3542a2110f..af0106aaa0 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -206,7 +206,7 @@ class SNBMLizer(object): if elem.attrib.get('id') != None and elem.attrib['id'] in [ href for href, title in subitems ]: if self.curSubItem != None and self.curSubItem != elem.attrib['id']: self.curSubItem = elem.attrib['id'] - text.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, self.curSubItem)) + text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_BM_TAG, self.curSubItem)) if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ or style['visibility'] == 'hidden': @@ -226,13 +226,13 @@ class SNBMLizer(object): text.append(u' ') if tag == 'img': - text.append(u'%s%s' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src']))) + text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src']))) if tag == 'br': text.append(u'\n\n') if tag == 'li': - li = '-- ' + li = '- ' pre = (tag == 'pre' or pre) # Process tags that contain text. From 6c9541723f2ac9677c4822bd61f2e75a043771bd Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Thu, 14 Oct 2010 16:53:39 +0800 Subject: [PATCH 21/24] [SNBOutput][Bug] Fixed a bug when using multiple SNBFile object, error will happen. [Feature] Get ready for SNB input plugin --- src/calibre/ebooks/snb/snbfile.py | 44 +++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/snb/snbfile.py b/src/calibre/ebooks/snb/snbfile.py index ca10f800c7..34830fa808 100644 --- a/src/calibre/ebooks/snb/snbfile.py +++ b/src/calibre/ebooks/snb/snbfile.py @@ -5,6 +5,7 @@ __copyright__ = '2010, Li Fanxi ' __docformat__ = 'restructuredtext en' import sys, struct, zlib, bz2, os, math +from mimetypes import types_map class FileStream: def IsBinary(self): @@ -18,9 +19,6 @@ class BlockData: class SNBFile: - files = [] - blocks = [] - MAGIC = 'SNBP000B' REV80 = 0x00008000 REVA3 = 0x00A3A3A3 @@ -28,15 +26,21 @@ class SNBFile: REVZ2 = 0x00000000 def __init__(self, inputFile = None): + self.files = [] + self.blocks = [] + if inputFile != None: - self.Parse(inputFile); - - def Parse(self, inputFile): + self.Open(inputFile) + + def Open(self, inputFile): self.fileName = inputFile snbFile = open(self.fileName, "rb") snbFile.seek(0) + self.Parse(snbFile) + snbFile.close() + def Parse(self, snbFile, metaOnly = False): # Read header vmbr = snbFile.read(44) (self.magic, self.rev80, self.revA3, self.revZ1, @@ -47,7 +51,7 @@ class SNBFile: # Read FAT self.vfat = zlib.decompress(snbFile.read(self.vfatCompressed)) self.ParseFile(self.vfat, self.fileCount) - + # Read tail snbFile.seek(-16, os.SEEK_END) #plainStreamEnd = snbFile.tell() @@ -57,7 +61,7 @@ class SNBFile: self.vTailUncompressed = zlib.decompress(snbFile.read(self.tailSize)) self.tailSizeUncompressed = len(self.vTailUncompressed) self.ParseTail(self.vTailUncompressed, self.fileCount) - + # Uncompress file data # Read files binPos = 0 @@ -78,7 +82,7 @@ class SNBFile: try: data = snbFile.read(bSize) uncompressedData += bzdc.decompress(data) - except EOFError, e: + except Exception, e: print e f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize] plainPos += f.fileSize @@ -90,7 +94,6 @@ class SNBFile: else: print f.attr, f.fileName raise Exception("Invalid file") - snbFile.close() def ParseFile(self, vfat, fileCount): fileNames = vfat[fileCount*12:].split('\0'); @@ -156,6 +159,24 @@ class SNBFile: f.fileBody = open(os.path.join(tdir,fileName), 'rb').read() f.fileName = fileName.replace(os.sep, '/') self.files.append(f) + + def GetFileStream(self, fileName): + for file in self.files: + if file.fileName == fileName: + return file.fileBody + return None + + def OutputImageFiles(self, path): + fileNames = [] + for f in self.files: + fname = os.path.basename(f.fileName) + root, ext = os.path.splitext(fname) + if ext in [ '.jpeg', '.jpg', '.gif', '.svg', '.png' ]: + file = open(os.path.join(path, fname), 'wb') + file.write(f.fileBody) + file.close() + fileNames.append((fname, types_map[ext])) + return fileNames def Output(self, outputFile): @@ -247,7 +268,8 @@ class SNBFile: return def Dump(self): - print "File Name:\t", self.fileName + if self.fileName: + print "File Name:\t", self.fileName print "File Count:\t", self.fileCount print "VFAT Size(Compressed):\t%d(%d)" % (self.vfatSize, self.vfatCompressed) print "Binary Stream Size:\t", self.binStreamSize From 83cf5d5f2820b985d72556b835578f2bf25ddac5 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Thu, 14 Oct 2010 16:59:22 +0800 Subject: [PATCH 22/24] [SNBInput][SNBMetadataReader] Add SNB input plugin and SNB Metadata Reader plugin. --- src/calibre/customize/builtins.py | 13 ++++ src/calibre/ebooks/__init__.py | 2 +- src/calibre/ebooks/metadata/meta.py | 2 +- src/calibre/ebooks/metadata/snb.py | 47 +++++++++++++ src/calibre/ebooks/snb/input.py | 104 ++++++++++++++++++++++++++++ src/calibre/gui2/actions/add.py | 1 + 6 files changed, 167 insertions(+), 2 deletions(-) create mode 100755 src/calibre/ebooks/metadata/snb.py create mode 100755 src/calibre/ebooks/snb/input.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 8550f57ee6..fe187a1400 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -292,6 +292,17 @@ class RTFMetadataReader(MetadataReaderPlugin): def get_metadata(self, stream, ftype): from calibre.ebooks.metadata.rtf import get_metadata return get_metadata(stream) + +class SNBMetadataReader(MetadataReaderPlugin): + + name = 'Read SNB metadata' + file_types = set(['snb']) + description = _('Read metadata from %s files') % 'SNB' + author = 'Li Fanxi' + + def get_metadata(self, stream, ftype): + from calibre.ebooks.metadata.snb import get_metadata + return get_metadata(stream) class TOPAZMetadataReader(MetadataReaderPlugin): @@ -420,6 +431,7 @@ from calibre.ebooks.tcr.input import TCRInput from calibre.ebooks.txt.input import TXTInput from calibre.ebooks.lrf.input import LRFInput from calibre.ebooks.chm.input import CHMInput +from calibre.ebooks.snb.input import SNBInput from calibre.ebooks.epub.output import EPUBOutput from calibre.ebooks.fb2.output import FB2Output @@ -496,6 +508,7 @@ plugins += [ TXTInput, LRFInput, CHMInput, + SNBInput, ] plugins += [ EPUBOutput, diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 624b277e61..9bdf937dd1 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -25,7 +25,7 @@ class DRMError(ValueError): BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm', 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', - 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan'] + 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan', 'snb'] class HTMLRenderer(object): diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index 87b8d3b535..cbd9db3f04 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -15,7 +15,7 @@ _METADATA_PRIORITIES = [ 'html', 'htm', 'xhtml', 'xhtm', 'rtf', 'fb2', 'pdf', 'prc', 'odt', 'epub', 'lit', 'lrx', 'lrf', 'mobi', - 'rb', 'imp', 'azw' + 'rb', 'imp', 'azw', 'snb' ] # The priorities for loading metadata from different file types diff --git a/src/calibre/ebooks/metadata/snb.py b/src/calibre/ebooks/metadata/snb.py new file mode 100755 index 0000000000..67bbc89a32 --- /dev/null +++ b/src/calibre/ebooks/metadata/snb.py @@ -0,0 +1,47 @@ +'''Read meta information from SNB files''' + +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2010, Li Fanxi ' + +import re, os +from StringIO import StringIO +from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.snb.snbfile import SNBFile +from lxml import etree + +def get_metadata(stream, extract_cover=True): + """ Return metadata as a L{MetaInfo} object """ + mi = MetaInformation(_('Unknown'), [_('Unknown')]) + snbFile = SNBFile() + + try: + if not hasattr(stream, 'write'): + snbFile.Parse(StringIO(stream), True) + else: + stream.seek(0) + snbFile.Parse(stream, True) + + meta = snbFile.GetFileStream('snbf/book.snbf') + + if meta != None: + meta = etree.fromstring(meta) + mi.title = meta.find('.//head/name').text + mi.authors = [meta.find('.//head/author').text] + mi.language = meta.find('.//head/language').text.lower().replace('_', '-') + mi.publisher = meta.find('.//head/publisher').text + + if extract_cover: + cover = meta.find('.//head/cover') + if cover != None and cover.text != None: + root, ext = os.path.splitext(cover.text) + if ext == '.jpeg': + ext = '.jpg' + mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) + + except Exception, e: + print e + pass + + return mi diff --git a/src/calibre/ebooks/snb/input.py b/src/calibre/ebooks/snb/input.py new file mode 100755 index 0000000000..a85feddbb2 --- /dev/null +++ b/src/calibre/ebooks/snb/input.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2010, Li Fanxi ' +__docformat__ = 'restructuredtext en' + +import os, uuid + +from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation +from calibre.ebooks.oeb.base import DirContainer +from calibre.ebooks.snb.snbfile import SNBFile +from calibre.ptempfile import TemporaryDirectory +from calibre.utils.filenames import ascii_filename +from calibre import prepare_string_for_xml +from lxml import etree + +HTML_TEMPLATE = u'%s\n%s\n' + +def html_encode(s): + return s.replace(u'&', u'&').replace(u'<', u'<').replace(u'>', u'>').replace(u'"', u'"').replace(u"'", u''').replace(u'\n', u'
    ').replace(u' ', u' ') + +class SNBInput(InputFormatPlugin): + + name = 'SNB Input' + author = 'Li Fanxi' + description = 'Convert SNB files to OEB' + file_types = set(['snb']) + + options = set([ + ]) + + def convert(self, stream, options, file_ext, log, + accelerators): + log.debug("Parsing SNB file...") + snbFile = SNBFile() + try: + snbFile.Parse(stream) + except: + raise ValueError("Invalid SNB file") + if not snbFile.IsValid(): + log.debug("Invaild SNB file") + raise ValueError("Invalid SNB file") + log.debug("Handle meta data ...") + from calibre.ebooks.conversion.plumber import create_oebbook + oeb = create_oebbook(log, None, options, self, + encoding=options.input_encoding, populate=False) + meta = snbFile.GetFileStream('snbf/book.snbf') + if meta != None: + meta = etree.fromstring(meta) + oeb.metadata.add('title', meta.find('.//head/name').text) + oeb.metadata.add('creator', meta.find('.//head/author').text, attrib={'role':'aut'}) + oeb.metadata.add('language', meta.find('.//head/language').text.lower().replace('_', '-')) + oeb.metadata.add('creator', meta.find('.//head/generator').text) + oeb.metadata.add('publisher', meta.find('.//head/publisher').text) + cover = meta.find('.//head/cover') + if cover != None and cover.text != None: + oeb.guide.add('cover', 'Cover', cover.text) + + bookid = str(uuid.uuid4()) + oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') + for ident in oeb.metadata.identifier: + if 'id' in ident.attrib: + oeb.uid = oeb.metadata.identifier[0] + break + + with TemporaryDirectory('_chm2oeb', keep=True) as tdir: + log.debug('Process TOC ...') + toc = snbFile.GetFileStream('snbf/toc.snbf') + oeb.container = DirContainer(tdir, log) + if toc != None: + toc = etree.fromstring(toc) + i = 1 + for ch in toc.find('.//body'): + chapterName = ch.text + chapterSrc = ch.get('src') + fname = 'ch_%d.htm' % i + data = snbFile.GetFileStream('snbc/' + chapterSrc) + if data != None: + snbc = etree.fromstring(data) + outputFile = open(os.path.join(tdir, fname), 'wb') + lines = [] + for line in snbc.find('.//body'): + if line.tag == 'text': + lines.append(u'

    %s

    ' % html_encode(line.text)) + elif line.tag == 'img': + lines.append(u'

    ' % html_encode(line.text)) + outputFile.write((HTML_TEMPLATE % (chapterName, u'\n'.join(lines))).encode('utf-8', 'replace')) + outputFile.close() + oeb.toc.add(ch.text, fname) + id, href = oeb.manifest.generate(id='html', + href=ascii_filename(fname)) + item = oeb.manifest.add(id, href, 'text/html') + item.html_input_href = fname + oeb.spine.add(item, True) + i = i + 1 + imageFiles = snbFile.OutputImageFiles(tdir) + for f, m in imageFiles: + id, href = oeb.manifest.generate(id='image', + href=ascii_filename(f)) + item = oeb.manifest.add(id, href, m) + item.html_input_href = f + + return oeb + diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py index be1f8f4eaf..5bcdf2254e 100644 --- a/src/calibre/gui2/actions/add.py +++ b/src/calibre/gui2/actions/add.py @@ -166,6 +166,7 @@ class AddAction(InterfaceAction): (_('Topaz books'), ['tpz','azw1']), (_('Text books'), ['txt', 'rtf']), (_('PDF Books'), ['pdf']), + (_('SNB Books'), ['snb']), (_('Comics'), ['cbz', 'cbr', 'cbc']), (_('Archives'), ['zip', 'rar']), ] From 8d26343231ae58962b8b80756f427827b58f8b4d Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Thu, 14 Oct 2010 20:57:47 +0800 Subject: [PATCH 23/24] [SNBOutput] Also convert png image to jpg. --- src/calibre/ebooks/snb/snbml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index af0106aaa0..1847e05a4b 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -25,7 +25,7 @@ def ProcessFileName(fileName): fileName = fileName.lower() # Change all images to jpg root, ext = os.path.splitext(fileName) - if ext in [ '.jpeg', '.jpg', '.gif', '.svg' ]: + if ext in [ '.jpeg', '.jpg', '.gif', '.svg', '.png' ]: fileName = root + '.jpg' return fileName From 565295b3531b9e6f1251c9ad9d352b6f2812003b Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Thu, 14 Oct 2010 21:19:08 +0800 Subject: [PATCH 24/24] [SNBOutput] Improve handling of comics, read screen size from profile. --- src/calibre/customize/profiles.py | 1 + src/calibre/ebooks/snb/output.py | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index bda2103484..4fa53b1cdb 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -655,6 +655,7 @@ class BambookOutput(OutputProfile): # Screen size is a best guess screen_size = (800, 600) + comic_screen_size = (700, 540) dpi = 168.451 fbase = 12 fsizes = [10, 12, 14, 16] diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py index cbe785d384..3aadb79185 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/snb/output.py @@ -50,6 +50,7 @@ class SNBOutput(OutputFormatPlugin): ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): + self.opts = opts # Create temp dir with TemporaryDirectory('_snb_output') as tdir: # Create stub directories @@ -224,9 +225,12 @@ class SNBOutput(OutputFormatPlugin): img = Image() img.load(imageData) (x,y) = img.size - # TODO use the data from device profile - SCREEN_X = 540 - SCREEN_Y = 700 + if self.opts: + SCREEN_Y, SCREEN_X = self.opts.output_profile.comic_screen_size + print SCREEN_Y, SCREEN_X + else: + SCREEN_X = 540 + SCREEN_Y = 700 # Handle big image only if x > SCREEN_X or y > SCREEN_Y: SCREEN_RATIO = float(SCREEN_Y) / SCREEN_X