From ee246b73ec23809bdb18acdfee0bb2e0da7584c8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 12 Jan 2011 17:46:35 -0700 Subject: [PATCH 1/6] Zip file reading: Be more tolerant when a zip file has a damaged directory --- src/calibre/utils/zipfile.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/calibre/utils/zipfile.py b/src/calibre/utils/zipfile.py index ff290abd25..c230b9dfa7 100644 --- a/src/calibre/utils/zipfile.py +++ b/src/calibre/utils/zipfile.py @@ -982,9 +982,12 @@ class ZipFile: zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) if fname != zinfo.orig_filename: - raise BadZipfile, \ - 'File name in directory "%s" and header "%s" differ.' % ( - zinfo.orig_filename, fname) + print ('WARNING: Header (%r) and directory (%r) filenames do not' + ' match inside ZipFile')%(fname, zinfo.orig_filename) + print 'Using directory filename %r'%zinfo.orig_filename + #raise BadZipfile, \ + # 'File name in directory "%r" and header "%r" differ.' % ( + # zinfo.orig_filename, fname) # check for encrypted flag & handle password is_encrypted = zinfo.flag_bits & 0x1 From d51bd60c9cad5a13df45f0edb7473aeb50d89beb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 12 Jan 2011 18:24:26 -0700 Subject: [PATCH 2/6] Nicer DRM error message --- resources/images/document-encrypt.png | Bin 0 -> 8988 bytes src/calibre/gui2/dialogs/drm_error.py | 21 ++++++ src/calibre/gui2/dialogs/drm_error.ui | 102 ++++++++++++++++++++++++++ src/calibre/gui2/ui.py | 8 +- src/calibre/gui2/viewer/main.py | 5 +- 5 files changed, 127 insertions(+), 9 deletions(-) create mode 100644 resources/images/document-encrypt.png create mode 100644 src/calibre/gui2/dialogs/drm_error.py create mode 100644 src/calibre/gui2/dialogs/drm_error.ui diff --git a/resources/images/document-encrypt.png b/resources/images/document-encrypt.png new file mode 100644 index 0000000000000000000000000000000000000000..07743420242a17d6c1c63d7a846d3b33d52c9ea6 GIT binary patch literal 8988 zcmXY1cQ{a0Qj?s7g#-WqkZWtHy~4K8|861z?6XI~Y6#m9 zSnFu20q+01@;l1D000~SZ8c?M-|YPy2+XWMbu|R!s<{7PZ-@BcMYT$$QFYyMLcMVhM~cQ==muR&*{Xz-B`DCmv(Y#Y>s`cL^2&SwTc z)j=JA8yw#W8O*ENSvmisHT1pQx+b#bA1gmVU*YoCp@6XP{lSkPKdvh){hKilsg*&o zWMex;wNouIfVJ#^g2nwizp|-Ic0$UBH&eul3#>1@L~%qE6}wn3!m$ofCnqOuyRDxG&WnR#EL!P|ipAW%Y_KTz_Y*el%EB9@cmB9gietzkghoF+x!-fEii}%5j zcJtzQ(|81fjjjSa#{XZ3$Ij^SSIgVD&HhA>^=^E?`N48Pu#3yVyKwYc9@}%@qv43? zHMspE3F_u15P0K5?dRs;P?2YRzPBe3fH`MzdvlZf>3ncilXbT5`i} zUh&_p&`B2x+W3-p^Goc@!>p$z&b$E4Pi&WP#5udLpddY`iXq+JN=LvRjFH+R=nNEm zyK8wmsUz%uuq2Zl%Y+}e4;hs?KU^JaTR9UtKyFeTbY8n$-3H&Ss43oGj`^Jo@>cA^ zaS17=z0B{gqaXVJCZ%P|eOl`L_(661%TJA(LTC6d4U8vhh zD>W~cEbdb{5JM{~k5)baknQc+i1145>aBye!|j!T!>!xH5Jh#vUkZYPg6Y|y;T`xu2wdGt9kFe11s z@9-W?8H`E{{>MeZCaqL9FWBqBh>+5KOi981`I$BR^S=_d5YJ%wtzB&j(edz%TdZ89vTsbDU^Rjrg)omRo=jDza zwO?Z89XY7OQf-BobJTQbZ_lZZ%2`=D=t_L-`6(G^H+Cv6Ptv=FOV&>EwNo=4%6cqh3{qv6^=r?e8Z#v!X2LAueGh- z9yoxO^OzQQ&pvYO^ef(piiwHs&DGnGpT-yF3+Nc1;T;96T;E>}a3zw}ln6&tJot;{ zKW1v>I)r?w5vx~wLEO%D?d`)x`tp%6b*`%lohPZabsxqaVpGvC)pe=2a+-_DujD;! z*~!hSbpY4k>g52~6ap_O6GZQ)WujvY(jJe>z(jAbX|r{3h+S^;p`1U8po}Y20^Nrr z&g&CHZZ;WL2ZH7AE;i*lF!y}{_btI)V_0f^qbkluH;gIh7}B-h?z`?xXtIBo>Hm*_ zSYallU|Z{sB<;HI#3I46^Yd-%YxonObl}C&3L5NIx_Y>yrGpd1C~WcT;&^j!wJU`D zbZ2{;FV5OT?QWe{;W+l*TmJguXKnD=6z55Qhed5OR$%uMxPd{JTj~5m$ynuyA!H9s zs;u%&x&HY*o)NPgaM(dQ+mQH1<@}`C=>Z6ImZw&%%g;Bma=jd0}^YI1UN{&Z)I@@(sT(RJ)#?K79=fAD&5 z@^L4}=i~M;vuw^w6T0v0mWK)!#Ci#U;jytXL1Ez#fk(G=r6ACMl8Wu&J++Xl+`K67 zT7A37e*0VV9`pL_44a=IQ;=di66qU^&9paqFcg;%%F!52$xg7gd>AYR0@Y)2w6wHz zIEuCE46+o~G~xna`5a3vx^qX6|66Me5 zECC_+lO;OvkHjkhGztLUyUQKFt<*PsabZ54r#bEC3lKW>^&j7E!mAGs{lz74>aar@ zm(grqe4uiUoeHduJV!a4A1=2sf^K^fgZ6AzGmP`t1NQ1`aYGs#8&j^gI(C{CU4}5v z&!&w4OWuplW=OZt)|x+O{&~3u>9ga;L?|rck3Vm1ZRKEV9e{~sB3C{j?VCWfT?khE z`Qq}bs;X|{wa;1Cu;aX63b+p;2v=qp|0aMnp`^tuEFkdfZ*gQKzHH9uL6K(ST{W@K zgA5wrTT|1kXQ=-K_Vm%*1lIcSjrb5&;c<^5H6Qx*>X#2?Qaq3~jvq+>TKVJF806A^ ztN~?{BtRbbv6{RkqJWBSKc(IEj4xFIFfcOSmm<=zj3*SKcQA#Bopxa8)I&X7?$)E( zbMrjAXoLRfSYCaqe5m)mgqW0DHuy#wfW?fBySpgp(Xw2{U6aEG&e2(INYMSp#>S`l z`T1;n?Z&#gq}s=i=9*pEE3Xj5-hT2|@Jb3lyoQDbK(@4d7`7<7AlC^11mVD$1@qI> z(`%l2s|*}q`R>(W378^@y0bDF1$uXj@$WFgz7rxjsE}A>QHvp86J7+1$bX4B!WI)O zRd0jyQd3h?%CClP1LW~>>RMZ`9ufK7iF+iIB3oN!BWi4N??PVMl#J{sU|L8nj5tW0 z7Rj<@y-0|@3sf{ItMO3a@d^R3%NYuyGk=G3ULcd#ZQ%>~XBC|mpJ1Y^+B{F`(@Q_d zxO3X(E#iP-G9vY(8gL|_s-|XzNEwiPs~d3hm+xy3cef9EM-Lf_Yt!e^)o_< z>+BO2ep@!Nv_D)8w7X8lv7}1ovqB>@OcX#~azL?Zhb60xArUpO)pz$lr$gIfgu$OmD zoCr1d%88oU6i)wh`-iQ+pL%2Vu*tjpM?_wQBWV?uuGk=*)MggUDwip!^yCtR9_ggC zv-#=(B-M*_I-5uzqaQpzyq%UGnkH6i+mj_P_HeO3*Of!Op=)+9wF}BBeF<1d$yl|3 zwLIV>J`jHvm{RI*OMAK~8n&+#d+ADyk}xp+njQg`%Y@qAH;0eEtOh8Rle}aBvz5bN zFT4!OF$FUH1UR$%FE}hD-D!v;Z~){)x(93C1u6J}q_}2W2nm!oE%f!jkChIN_#Bzg z{TLw4>HixN`_hy}tQ8bAM2aME3`gyk12ss^%2G>T1Hji|sPnjWhq4L=sJ9SUj#%jg zr%5%htSP!^Fn?bpBX%Ju&l~PYK28w2XeC1Xn7rua~sbE4Svlkv7J)(rkKX|=Q+xnSP^}3;|X0*=KEAu1rTNLcf zvrXrpKdyhrd%F*e8JS;y@Bf}$Zed+qq}&STYx6xUhm_Pb-!YfF1BbjOOr1H zD<2wD0S~PVyuwAu{EfY|L2{L?`8Dyrp3@I2TTuZ=*5#0RjDcVH1+pjOG=?u`?Xfk$ zMs#9Zj&Vr}BA~wRP9atyL=h3Xk@`vif&61+Q|#NMj2GmADtKh{C12}0bUXv8 zNRHR#g-2Rq)Su6%RZ5bhGRCX0laS>UnAnE_Lbz<+=?ck#&HYE+#}v%Pfk3J-z`&ec zHI$@sok)kGB-oXiqG=6yYP}%TF1c|lLCYq1BmfY~2dN?`@<(q1! z(dPTbYw&Kini7&bSP9hvKa(w{uL5!z&;b+0yT90~*C{Nevp{F^d5HLqgUL7;ik|vf zhSrMI;26@W7dyOTCY}2TbgMy`su#zIAyDz^2?-73oGf;xCpa`}L=Qs=^f=P+Xp1G) z${uk9QWip6!FI;uug8rpb)?z=#uYFE=B^CWc`#A(^8CJ{>f{9x|8PP3AQ-i^* z`ZmAJ7x8tGkJcl(IpCYPTCG}r?6yFNu34Jd)Xh}9_Jo=QY9Y-U?;%9qo?aXp#azKN zS}q-2DCrCQCRw<)aSw6&!7OtKEuq9h7d}oilSq)i%D7*=;h{nsW zSN&2T$Ow)2-9ec8Yd6~F0h*0Jneju1Bka7|Avv_5hSf&<#mui&sn6kld)4X=Bi=S5 zlStU{Mp87CX0p@JrJ8b~r*GTT9l+QT`N|(JPqGit8%pFTzmeJ+C5y9ET}W&X#W8^K zkJC}1fsbcq(4#mYdJ(8K*E11g(Y<&5MTw}|G^R&%R4O-$6@kvg)M^-m)NZEiFzP`& zWm*oH=8pBatj1+ntE&n+@{D|3ESTwtUYV8d82%KUM(;^4tE8Eq!bIpco%)J#Js8p~ zWR2Xy0XrukUMRhgaD+s#FadC36;Po11a~;BS%z$vi|QW>50#K3;o>BjQwphlJK$#@ zH*6bsdfw_xZz{Rz^MxippDw|t{ndZeM&+U12ox-1jyr5^a6Fy#CiL`H*`RV#r5789#-&WNd!=+L`E5%j^=tZV)t@EPS(rp;v8TU?45hz_iS ze^EVcq8Uy^#N@%KoC*NH3dU-q??YT+Q1!;Lze;+I6X>#6i|`Sb3kW&GQyyN{FMt?0 z3@^H_DYd}C8J1k=tt_GvCu!kQm}qywkK;h9*!WJ4f%*Wl@Y`26s>uM*9ZpD94n!uD z?}+RgNsjk(F)w7jZg32H%DCK6Y)G3<+n{&1MJN49RGE2Zmjw;(WTOH0xZ^zD#K(QB zn>zq{VB<@DVM4An?*IKVh2x1R(yB}o&xp^f#5na8sB4`=gwF7Zbi8kPf4)QC&H-=l%+Z{iqD*aKk;~9U3HuC=kuY(orz~uQ_tt8 zweU(=KljeNfJ&8)rLZOA2v15RG7thgo>utn2Vy_Pw|bE^GtB60+(JXu-Zy9144{sd zS^(n{lPKo9qf>#Q%(zVg&o(>YQEdz`@(b?hgfCZ#`r~WBa1i>5G0O?%D5TG~#>D}L zv}UTzGbkTVQ29j?34-1WTewZ@TU#Zs{p1~V@Rh8`3G-_n*fahlp-x1^Ey$L;r-BJV zRAdU3TjGmqdc)jNp7of?lt{lXde9gsF@QF#f~Xt6F90SX$i6D6{4lm)z1JP0iucTc zlu-{#AE%;Pv#M+uMO2Ic4GpqrxLQUS${gto<3MxlS!?Rylah^Jp)sX+h9~CAj zKKhY$s0(hJ6km##LATOD?0tSe8*nGJw|>9QDG3fMC^n#d0s@H>M0YQAzW!B9avW-m zZyS-+cSimF+6rhuu3j=OA~F-xchmJ?_q>3CmP?Vp&fR4PU!h3{CGixKeId#1(rcWXq|0z=~j z2@Ptj+enD<81NlQ#~a1|&V01N`2)r!+Mv(-fp)i#sZovFq3w3yZ=sEQuA(&X)Ockl zS%e5!YERq^ZR4OL1}xCA^nI{H^t3aGa3bw3KI-K=IFg24I(*0LD|d^vh-iwV9X6;} z5ePf3PFgoGiOgdm4XQ(v*%mmKC6YfvzF**Xi}oNu zDAO+dPIGyz*NZ3a=S5lCy+B3AKlB6{+VtaTxEd3chC9eFuQRHl}$lWVg;E( zudBQm7aw*?4NEi|`f1+Ez;1iN;sJE9(i*)%0IYDy2-gVEvQg%W z$*!xzfe_8e{1V`d2fnt;npWeOI%>xjMo=DJ>9#*@r;$rr5Ic>xt)l;U&V@ZpVnZU@ z+Gwpb(;farY-4jz-$crk|1{PJ|ybGvVtdHKI0U5!!I?@}U(K*>u_0z)&M{34nNMkuJTm z%=5j?TtF{_O04WFWpNhzj$HB2Tk8cp2G)XLd&vTRM<+wdEu6`#Ht)esU zYXHY6goJ`KnmTqsoY{$$2ictaM~yZ<}ij4!187Ora5_iBz~Nm z=noVUB8S-d5K)m^!OH!NTBU4ay`u}|s01mf3Gn3DYZuEA#U#&##9L$3#O-Z~aLa>e zQzJsve}bJudr?|)N*f(epU*N@xfR3~Dt!NlDrc^Z%c;q=<)XM_TSWg##sRAaOP~E~u-xc1mYYlRzI6ndm>}e3KjaLe?lA zs*Woat|}q!-~fP_eYsDJSAnoE%;^;p1(1@plhH$M(-WzoJMeEJ>F6Ao;600Ttk-*S z+~T4Os+vy#w3>4&jMB2G0}=^-F+7iW9>j_no~4nDI+FZ_ zGi_H&jZT{hqb`7$+HR^oV{uR+<~7k!+C0|192r{e^UHltA`rwLjEK zaBV?ilbL8C_HSscAO$(F0LeKpPgR-9LsZU8Pe5euxzQ=_PFw4uCJkW)d?PV( zOleUP*X*wNj4W^zkD+91pk$h7q>7=XT(P%IV;M_T`x~a|IE=v?{v4-@cIWOMJRX1C zzdwF|WUZ^9;Ao8_;$L-r=PmEFL4|}za1Ng!%guHlUFYqq_5x;DesFGMbV(?)m>#B; zKFrrDf6U#xFw~O;M^$h2gPX-h$7v9c`7Fau*Zuf z`y-TXY>g_>YQI)t3;2`1S;&}%4oqUyt^bCP;{s7PRMdq$e~1T#|2waHkZJsFFSyVZ z{;5|_M9_|JT3#ED60%apjYz6i4{Gp;LP)lT6HEL_qO?J~rZ7uZLsOX_v6B&B!vx`p_?+rh>G2$O z9U`{HKM@rDz(#6lyDHHp9!LIt!a1R-4i}@*a#{j)r?)&5pLGbeJTM`<=)`@rZO%w# z;P&IrK_Q9={DXprc3gzpmwf*h35(gFF;y~;X@lHvX`p?2#5@C)ld-o5A!`+}mZ^%e zDQgl=MwkQ(_|}=dd2WYQmDUY+k&GGCROGxr?ttf6F{}fMDh%@)k$r5{<86^Jn5)(* z+>tJs%)(j#sUVul9pL8^(Flj}w82T0H$2PDzvo-v>VeC~Q#d79O;I9m^%OeXH+3ln zie*t|HrBdARLexT+=}h;U-yw427SZ9@a(1SlB;8$EggL6F z^`3fXts6_U@X^JR%DzWdwT%oUt$n6aBYBOV;C1##?Af*lf7RE_V?#OE9>KSV22R$L zk||B& z_jWqObV*2|d3P*aQZJR2Ny}uYNh4uf%M2&%&vH}gG8LX=u2PoT%(G0djMW>)#bPv= z1+e^c+?ZI&lr<)*BP={YN^o{;ts?<)KT{fh`Z5cm1or*h5a{je9qnup9VsBr{FRvo ze503NK?CJcp1e8aupdD(ylPHs7ee)|CC2}h+H8len<=rbRpVP|vsRS>b)N~o&^@J2 z)}B8zqB;rtT&;2bMg_YK(c|*U_U#a^J)s|*oD+w^o-kB{S-}wpCzCn~E?~TC1EtnO zX*8|$N+bz-Ji|Baa>wp3j5oD$Un@BFJ1^d;taO{}Vt=@-z$i7Wk7F};Bg0mmuw6tx zX)Jf0f&L&yYR)=+E>28|O%@e9!W8rgvAdJ7h7M;IskzM!{7t5b;Q6~1ujNir9`J2I zbD~|#YoJKAjpJrzMJ=iMNWQD8-AJ#En);>mihid`>k?NP=2V3#Dzkm!(toyZxnUkN z-+MN(!{XyCChp?4IIA(%FZ$k$_5Iq`3cF92)5&vu-WmF&31wz;T2a}e*|yFV#1Qch zJA4PRFvRD4n8L1qcO7*@ZB#w0sK?T!-RkP_2VtMXaiyw$Bu=6?< z?WBv*@8-Ckca9LvBQa9)Ohp(A!P6C@(weePVV`}+TzU(5_M0P3#Z`l>~ZPU}y7BSc`(L2r?|a;v`fa+-DkW{~aTkEL`b)JM6>!-90nS=cZ2$lO literal 0 HcmV?d00001 diff --git a/src/calibre/gui2/dialogs/drm_error.py b/src/calibre/gui2/dialogs/drm_error.py new file mode 100644 index 0000000000..5fbba47165 --- /dev/null +++ b/src/calibre/gui2/dialogs/drm_error.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + +from PyQt4.Qt import QDialog +from calibre.gui2.dialogs.drm_error_ui import Ui_Dialog + +class DRMErrorMessage(QDialog, Ui_Dialog): + + def __init__(self, parent=None, title=None): + QDialog.__init__(self, parent) + self.setupUi(self) + if title is not None: + t = unicode(self.msg.text()) + self.msg.setText('

%s

%s'%(title, t)) + self.resize(self.sizeHint()) + diff --git a/src/calibre/gui2/dialogs/drm_error.ui b/src/calibre/gui2/dialogs/drm_error.ui new file mode 100644 index 0000000000..842807c9bc --- /dev/null +++ b/src/calibre/gui2/dialogs/drm_error.ui @@ -0,0 +1,102 @@ + + + Dialog + + + + 0 + 0 + 417 + 235 + + + + This book is DRMed + + + + + + + 0 + 0 + + + + + 132 + 16777215 + + + + + + + :/images/document-encrypt.png + + + + + + + <p>This book is locked by <b>DRM</b>. To learn more about DRM and why you cannot read or convert this book in calibre, +<a href="http://bugs.calibre-ebook.com/wiki/DRM">click here</a>. + + + true + + + true + + + + + + + Qt::Horizontal + + + QDialogButtonBox::Close + + + + + + + + + + + buttonBox + accepted() + Dialog + accept() + + + 248 + 254 + + + 157 + 274 + + + + + buttonBox + rejected() + Dialog + reject() + + + 316 + 260 + + + 286 + 274 + + + + + diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index a6eeabd57f..01d3180778 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -468,12 +468,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ try: if 'calibre.ebooks.DRMError' in job.details: if not minz: - d = error_dialog(self, _('Conversion Error'), - _('

Could not convert: %s

It is a ' - 'DRMed book. You must first remove the ' - 'DRM using third party tools.')%\ - (job.description.split(':')[-1], - 'http://bugs.calibre-ebook.com/wiki/DRM')) + from calibre.gui2.dialogs.drm_error import DRMErrorMessage + d = DRMErrorMessage(self, job.description.split(':')[-1]) d.setModal(False) d.show() self._modeless_dialogs.append(d) diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index 6468cd88c6..c5001659a0 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -627,9 +627,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer): QApplication.processEvents() if worker.exception is not None: if isinstance(worker.exception, DRMError): - error_dialog(self, _('DRM Error'), - _('

This book is protected by DRM') - %'http://wiki.mobileread.com/wiki/DRM').exec_() + from calibre.gui2.dialogs.drm_error import DRMErrorMessage + DRMErrorMessage(self).exec_() else: r = getattr(worker.exception, 'reason', worker.exception) error_dialog(self, _('Could not open ebook'), From f0881c3d26f5666dc2cd914ee5f55b737e166c8c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 12 Jan 2011 18:59:25 -0700 Subject: [PATCH 3/6] News download: Convert various HTML 5 tags into

--- src/calibre/web/feeds/news.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 7bd5301dfb..ee5b11c5f6 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -700,10 +700,17 @@ class BasicNewsRecipe(Recipe): for attr in self.remove_attributes: for x in soup.findAll(attrs={attr:True}): del x[attr] - for base in list(soup.findAll(['base', 'iframe'])): + for base in list(soup.findAll(['base', 'iframe', 'canvas', 'embed', + 'command', 'datalist', 'video', 'audio'])): base.extract() ans = self.postprocess_html(soup, first_fetch) + + # Nuke HTML5 tags + for x in ans.findAll(['article', 'aside', 'header', 'footer', 'nav', + 'figcaption', 'figure', 'section']): + x.name = 'div' + if job_info: url, f, a, feed_len = job_info try: From d0f92778f8309aa3f8f4d765fe61031a23246b35 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 12 Jan 2011 18:59:41 -0700 Subject: [PATCH 4/6] Fix Globe and Mail --- resources/recipes/globe_and_mail.recipe | 30 +++++++++++-------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/resources/recipes/globe_and_mail.recipe b/resources/recipes/globe_and_mail.recipe index 4cc76688c1..22cb6fa5bb 100644 --- a/resources/recipes/globe_and_mail.recipe +++ b/resources/recipes/globe_and_mail.recipe @@ -8,12 +8,13 @@ __docformat__ = 'restructuredtext en' globeandmail.com ''' +import re + from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1287083651(BasicNewsRecipe): title = u'Globe & Mail' - __license__ = 'GPL v3' - __author__ = 'Szing' + __author__ = 'Kovid Goyal' oldest_article = 2 no_stylesheets = True max_articles_per_feed = 100 @@ -38,24 +39,19 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe): (u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss') ] - keep_only_tags = [ - dict(name='h1'), - dict(name='h2', attrs={'id':'articletitle'}), - dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}), - dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}), - dict(name='id', attrs={'class':'article'}), - dict(name='table', attrs={'class':'todays-market'}), - dict(name='header', attrs={'id':'leadheader'}) - ] + preprocess_regexps = [ + (re.compile(r'', re.DOTALL), lambda m: ''), + (re.compile(r'', re.DOTALL), lambda m: ''), + ] + remove_tags_before = dict(name='h1') remove_tags = [ - dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']}) - ] - - #this has to be here or the text in the article appears twice. - remove_tags_after = [dict(id='article')] + dict(name='div', attrs={'id':['ShareArticles', 'topStories']}), + dict(href=lambda x: x and 'tracking=' in x), + {'class':['articleTools', 'pagination', 'Ads', 'topad', + 'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}] #Use the mobile version rather than the web version def print_version(self, url): - return url + '&service=mobile' + return url.rpartition('?')[0] + '?service=mobile' From bce5a1b4bc9ccb92112af849f64b020e6b4c5efb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 12 Jan 2011 23:29:02 -0700 Subject: [PATCH 5/6] Pure python implementation of WMF parser to extract bitmapped images stored in WMF files --- src/calibre/utils/wmf/parse.py | 269 +++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 src/calibre/utils/wmf/parse.py diff --git a/src/calibre/utils/wmf/parse.py b/src/calibre/utils/wmf/parse.py new file mode 100644 index 0000000000..c618884e33 --- /dev/null +++ b/src/calibre/utils/wmf/parse.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import sys, struct + + + +class WMFHeader(object): + + ''' + For header documentation, see + http://www.skynet.ie/~caolan/publink/libwmf/libwmf/doc/ora-wmf.html + ''' + + def __init__(self, data, log, verbose): + self.log, self.verbose = log, verbose + offset = 0 + file_type, header_size, windows_version = struct.unpack_from(' 0: + params = data[offset:offset+delta] + offset += delta + + func = self.function_map.get(func, func) + + if self.verbose > 3: + self.log.debug('WMF Record:', size, func) + self.records.append((func, params)) + + for rec in self.records: + f = getattr(self, rec[0], None) + if callable(f): + f(rec[1]) + elif self.verbose > 2: + self.log.debug('Ignoring record:', rec[0]) + + self.has_raster_image = len(self.bitmaps) > 0 + + + def SetMapMode(self, params): + if len(params) == 2: + self.map_mode = struct.unpack(' Date: Wed, 12 Jan 2011 23:29:29 -0700 Subject: [PATCH 6/6] RTF Input: Improved support for conversion of embedded WMF images --- src/calibre/ebooks/rtf/input.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index ba13668eb7..92ac8a2519 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -190,12 +190,11 @@ class RTFInput(InputFormatPlugin): return name def rasterize_wmf(self, name): - raise ValueError('Conversion of WMF images not supported') - from calibre.utils.wmf import extract_raster_image + from calibre.utils.wmf.parse import wmf_unwrap with open(name, 'rb') as f: data = f.read() - data = extract_raster_image(data) - name = name.replace('.wmf', '.jpg') + data = wmf_unwrap(data) + name = name.replace('.wmf', '.png') with open(name, 'wb') as f: f.write(data) return name