From 8abf49f8edd322972018709d8b602f840e0c54a6 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sat, 5 Oct 2024 18:40:53 +0530 Subject: [PATCH 1/2] NYT T Magazine --- recipes/icons/nyt_tmag.png | Bin 0 -> 416 bytes recipes/icons/nytimes_tech.png | Bin 9757 -> 416 bytes recipes/nyt_tmag.recipe | 127 ++++++++++ recipes/nytfeeds.recipe | 298 +++++++----------------- recipes/nytimes.recipe | 21 +- recipes/nytimes_sub.recipe | 21 +- recipes/nytimes_tech.recipe | 134 ++++++----- recipes/nytimesbook.recipe | 21 +- src/calibre/web/site_parsers/nytimes.py | 111 +++++---- 9 files changed, 381 insertions(+), 352 deletions(-) create mode 100644 recipes/icons/nyt_tmag.png create mode 100644 recipes/nyt_tmag.recipe diff --git a/recipes/icons/nyt_tmag.png b/recipes/icons/nyt_tmag.png new file mode 100644 index 0000000000000000000000000000000000000000..2d170d68f4a5ce7fc46817242854f5fe5ab8d616 GIT binary patch literal 416 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE3?yBabR7dyHUT~%uI>dsLPA2qU?3tQ0%XI1 zgoFf;4HN`Q!Ud(Iq~Po;{Iezk%@HmM@(X78sMRBxc;&=Qz6~}$kCyLZt5&=_EqgZu z10#c{i(`mI@6pLOC$%c@xZDiCdFKEB*DO0;3GC?)p3Lt3GI)ZYa`OM+e2ReOyN+N_5#KrjW+vTf!qZR2fyu^@I7#eXR1u{ z@+mhK@R=tTem4qWR8@hg(<ubDll77S1Cht<_H@rOJ*mdKI;Vst02fl1tpET3 literal 0 HcmV?d00001 diff --git a/recipes/icons/nytimes_tech.png b/recipes/icons/nytimes_tech.png index 0f36015d2d1f1c99719ed6b1e1a24e5039215960..2d170d68f4a5ce7fc46817242854f5fe5ab8d616 100644 GIT binary patch literal 416 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE3?yBabR7dyHUT~%uI>dsLPA2qU?3tQ0%XI1 zgoFf;4HN`Q!Ud(Iq~Po;{Iezk%@HmM@(X78sMRBxc;&=Qz6~}$kCyLZt5&=_EqgZu z10#c{i(`mI@6pLOC$%c@xZDiCdFKEB*DO0;3GC?)p3Lt3GI)ZYa`OM+e2ReOyN+N_5#KrjW+vTf!qZR2fyu^@I7#eXR1u{ z@+mhK@R=tTem4qWR8@hg(<ubDll77S1Cht<_H@rOJ*mdKI;Vst02fl1tpET3 literal 9757 zcmV+&CgRzNP)002t}0ssI2w=C_w001TANkl*$hCZ-em+{_^Q$hiVFS;` z-+V#ZKc2HK>qEtBJvV;(6#7v5SJe~1t*Y*{mjjR`O%j0LA#}d5&I-rbwr$(Nvu*qS zwQbMLB4*ZnGi%Or?3q?oR_1rTui`p>_4&SNJ?Gb%RawtDCnB`4ze5f2B~|s_kB^V< z+qZAmu3gj9)77hYf3x9okAk%})X;!30|5sagkTgBya#{&ij3wLUDv5by|f z1cs(ooVfDIvFRgb2||krzAX38A7N63N5JR?ha?poY}Z>P)yL#cU_vk-(U8h18fSz6k(V;j0;1bKXBz4Gwv{W$pH^z z@fY{+-`^<9i5A)-N&;3xd?$CJEQU2}*7WuD#aHjFYG#rb7}3-$nt%rb0S8J(n7}89 zAQQ%8R~~-J`Ws?MK9U&f$n~bxfaPZgK6a}AJ>Wr{Pt+Vj46+H78S%W}xFj5}c^D5wRE z<(hj6NWtNAq=29xh;h+#NB6nLg0@$6?HR+!WgJEkawDM@<#hl1Pu=<`0%R}+MfK~q z4a+&F3fFsIU0V(DrK(UxB!m!awajEWZ;6PRl}BQ#5hcXpG)E5Oa0Pn$*myqu)zKT@ zI(+S0&4EuJy7uf1oe!VA;+;DW?ZFqQoAxTr5y=$#ivKZF8l2o{83lkwMY z8y-4zC`pnS<(;dGdYp5W-kpVa?AWn=`}R1mR$rCj14;l7LJDW{;-_s}8%y^z7Vc`y zYi}&?y3fje_rIh4S*yua~k%&g=YQ&-p7b4h1nJP&LHi|M!N+x_f2{ zIdo;#qJ6J9-1pm*iNtUaG;N)IYb(J2Q$t{CYO2g(*<-6hov{q@co5Cu&3jHhZdun5 zq)>oUf(HeIc=(?ip6EFeAcwPKE9ZB;c^hJymslsdEpP|m2EH*YpG zZDDxl6KCL=f@ilMdHS{!c+fyl)M#N!aG3lb438f_<8(ZLgJ$3I1$*DL{lX8%xg{VF z6!8QFJctGA2rYoY!NCn1Hk5@LbBKr|n*@{`Uf9um@vc)$2SiZ86tK7}{67qj966~5 z_&i78(pcMHw!gLW#_z`DVn!h`hekA1G3tqV%kzB8mM!IKj!1z}j3HkpO)&%{w}tQc9MYjD|r| z%%jv1`%k2^v$JfofcJhHP)|tl$yDG~Z9UI#YdSQA)YPJ(_;Le*|AS%K^{X1Y^Tkkc zY59Q1P%vf`Q3k{C;4!ipEAS3xL>)y}^KZ<11zc6x*Y>`BX%q{r8As7kM-in#>~0;! zZl$}srBoVBq?=2MSTrhO_y?YlSG<){!ty{O6 znwlo1hwB<1UAlY`n9)zWMt@w!bC`vPWSS0+rNMR<;$0Wkg~J0sZ=T=2Amig znKKxZ5E7@+7@F(my!zt#`0e`Q&%ahS&nc{%QQ5e(uy#>t-SYB}%ZmP(U-bT`&Tb!A z2>5nm*BXWbnsCwXzefn(fNRsCKEembAHMdDFF#n>7Q+*^0u0jIVUADmhlU18qUR67 zO$J99dHby9MQM3___beUXR#RwP??#T09KgO#3j^J)NgXqgM)+J+}tcJE$!^==zk3W zY2|Pl(Ga2_Q8A{W>F>?GNgumy8u{nHwgoqK1~>PFwDv_bv_<~emM}O}${%?l9Kl)$ zrb9T4^6&u+Ds26Ggxouw&d zSFQwdJA~YJL2ui;hNjDXqlY^A%e#j)+Ix09DoPLVbxlc0vh{Fs@NzP@G}BtQX~nv2 z`uh6*{{E4Xkr5FQBsS8Lre@Y`wlqQ_Fr6Gdhq;6b0G>i5&Ah<@!ViJcdL~A>qr-yc z&jU9T5^rQi)it&BcaMP;dAJJZ{XIfhup<*40zn_<7C?wAS3%Kd*O&~Ss<*{O6%~4h z7L%qUJyG+b3o#pYdf=Rghli)9r=g+Yh7B9QjIOS(CMG7USFiT=xperbj;n{GkFSTRsj;P* zZAwCZ^H*M9f0qCUZ7#9c`+($b--_j=-(R<8z1+fSh=pLRTd0r|=FsvnojV}>dxY46 z0y4bxLoE)Z($N0t;nSz#G4@8*r&U*Lu2j~tws$BleVm=kF*Y?hrLX7VfBAAypt*&C zmc~*qXWN+YI3Jsc-_(AS`(@tlt-rgPI@nm6*m>C)8S3YRB!L0Xo;|CepnwnpU=|h@ zSFT)vmFe!`6Bn0u=~B>^O?#!KrZZ^F*|X&K?cI@<&dJKYD-Z}_!Ugv93}dIKppD9E z=gyqwCQkN*K-$@6MR8_8fI#9>R&A?$ zj<=t$iJh(3+{GePv~i8*p;LOMUg6$uPL6iw&fY#jq1S#ts54ncf#EC(fM- zjJyux#*G`mC|kB{nKTI~1Th$lnKNf@-M=j^E3xp+ladcL1vMon&YhYjEiHz=4Mj!8 zu&}VtpFbxgB!GHw{``4(QcX?mdwJd3+R9>K!%4w^G+D%)(vj~s<~DuUn*Ax_;CRnR z;TTR)%Bx?aXSHvHW5WOfL*!vsgxZ3Xz+jF-q$Nz7lA4aFlo_*TI=Z@azIl)zX0N$; z7XEU$Vo?dikWp4paC7&5URQrB#AM1tA731?IvM z)#&gLz{JrNh><_W8w8-8eS`TA3*nm>6r{C$UTwTfeZ;Z;0>7LfD@zl-OJTQz?!MZ2 zuW!-KI@!=aWG+9~ig{;!@4Zb})Xx380ho)6i?_G8o}S*qg$ofv%a<<)77fmdPO2=h zyx|*kKj~&sw%e_s!$#*cHm_g2c=5S&=fGp*;^Gb+I<#QH0tl5rOFlk6*4EbX@$sY$ zLHGh+KnSrBmBo}?yHvI>;rNiN6xU_iK*XNjRC|8(F>dS`km12gpHGbQaj+x>Og_Xk zT*z_zdI!2&AKv4bnH!qg+6P76#`lU&n~YF*?xi+o`L7;IuLJgE#DqKg1_yAemdCuG zg_FO%!QMb6=A-h7~(Q)^7ZD16*{Ra*lm_B_vLP%O#8cra%!=elf3NSG<%*f$* z`*PP-qb`L=k*Qe9zit?GYQt ztky9hKYjHcJ$?Bakde|dE^2r~{e!}o7d^k5Iv_d&mNHEFCCkEY z#?Yw@Ho~OUmVb4~UWV&Fx5Zn*)$|lzcGB~99z6Q+Nmcpl_d5@rVu()xu9u=oi6I6e z#@e-OLEV7L7%m#@qpGTkh~-M9Q-Cj|Cr{e9XYW=2fUKl+;8k~b_lp-V&YwRYZiknb z7XTvz357y&a&iKMkkEsxFIln#o~L8J5xIDD`{$cSy1eA;FR!nAl*Gjg4m;w@XQim3 zvIc8mz?Gx;07BlEX5NO7+!IMv4k-^I(z!U=oH=#&a$p2pkvI*0Mx-t~Y}`qD!e>sC zovi6a9Z@P`GQ^lnw!y^<@^fX4&KwI5@H=t%q@3)p8#WyW7lo_5aNz<*g^;|wJX|FZ z>BWoZfBI<-JOi$K;oSMyXlSjxDJ?F8#0{<+Jdj+#a7UO-CY+EqjE;^51CmM?Lnp03Wr8V@%kGbZWjnw#gtDA zsnJQQnW~^BPNPdunfe!vH*MNNLomdiB?k+gr23tw4Eo5M+nCYuV!(u>Z*7h97XZvc zixegMxx$Vq%(_n!cqaTpS)d`?^4h9_#O%7#`$}k?Hv$DXo-My+W_5-@sF3 zh07HVLF8x>2$yqSqpK+Nfgyo2Anw`Q+k;81tgL=nx{%7I-+%ll zBa@SylE%r&jY>)^D8)9!6~*qh6VygUq6=Z$ke=7$nVDm z11X$=?p*y8Rn&YYlJ_x;6V$M=mex_BPF6Px>qW_x1~yl5z+s>n*iMY48me6b!guc~ zzuvvl?ypntwkyTyAefCohlPjW;$U^LSirbQgpN!voCDYtBB_>?m~3Kfec_xLi!m9$ zNR@zFfEbj*k_XwjISFx`yzECA=}Gq=CK?+XgRMwZ;I7|IDoTSzA{G!GcO~NH>x!r4 zoa{$A4|;LwHu}RkCgk#k!!Mq_L9eQ6hQ@_Md@v&+7mxPAsYA~DNrCk!DBZx^OPUBe zl3*MbF>3W)zosTFHbf$~S?A!nQt=rdzM~B`teL|wZsdSh@cWwpT2>t!gJ}7ofP0cYl zI)g1KiD<~k))L-Az{Qqazd=(?LxBX3ClaSpSe2De6BFaJGV?Myc^PS(`vrL`R;*wW zLtI=O;$cZi2{<{>CW1j{R93wxDR`I~6`hikef#-ytt(d3T{ZJA?SN2 zC8VY#XT-(DrH#h`oDu*_c@bdiiT8C{Grftfy;?04kyaOcqN- zU2V0xnl=$o#aIj(aTYKVDl2-Fm6?&7lLetNFSj5qEf;!WB%P3a6A}{gSMI`4S5~Qh z{wygqE<1yh8kbi8sdWtJl>8q~xMrBIip$_GmVfcx)N%8Ku&+@#&V@1p2pE(h;V_@* zFXK=$h!qVWn75p?M41H(AbXn3W`YqIEad254X&8-tSqiD!!_7@_WaqCB_$5;*&phA zwW6?shUjcI4j>RPHPtt+Qq*S9#Yw9&(R3N+PtVKWJuNGZi;FZcIyH0lEIM5p!pWXJ zd!UB}VFs{~OoKxrg#np&S?S}{gxFk8PJD98`}X>FVIP>}hqDV-9(T0zRUv{ZcDypG z{qnlOG)v)dGob~LA%zE;^ce0cfMa5ysp%;*C{G>l7v)8E~23_42*|%Z>eT9K&S>|F4!j)~IvxvMIod6zai^ zyR)KFFI@?hoxjNFf-!@}PAR;er z88XwY>@5@X(|4adCL$^|6`_ILWbPgB(Odg}IESKjdT*Wn@T3ZrS3d8@>#d{jhEAvB zFTy7wW1}POB&mWslL;R2@X>?EEDD9jpsQ)9Z`!gMDk%pS=j?lTXU+S`Kj`Y_JNi0l z_6M()D{Jx)Kq| zu8Dy@1Q9W+3>gMhR8$~UP*zrk!hier?La(^wpLd|!dGZ)Ks0egp-CdtQJz#A=QtdF zf*W-VjxhUg9_GiM>ImD%i#ho<#%%P?;XjKLP}SRy!jVBB$+52b_85Jh-PF+=yM-O^ zK+%CY2JADz8x@YgNpn{-sS&|6GBKvHm~gUT+m@?gA*Iir?2otHd`<62jJ-qJ)n)6H znQVrEks+1MW_^ndbF|eqwsb1<_~>&T8tk4l&_QcxT&=3K8j0XHSecoNvEl#XBy+-& z+q-vfXlQ6^YO1!jHe{qyOr(46w6eMuLwquLth5Mito?&<%jRx!)x0u0O`}9Jv0fy{-%UW6g!7H-rt9AfbT;p|Rc3HA11_f)m1n zp|KGORYgO6^@ersKHmGTnH*1Y(Ya%9ml>jVU?Z$IdutmCW)vzB1DWg8RT{pkbGyYx zdeA@z>S;y;y*sp5ZA2KN7$C&i4}it~RC&BGqAuNiJKy0>C#n ze1VLV6wd!kbDoUt_bOhYcNGOZLf;w3BuZ#hZ7+oJI-LQUgT5;U9OVWarZ>4RX-hEt zRGi@KU`dvFHa0d0b173{)kr*+xGT;mEpBZb~jLlx=?4!I&DP_8j_$PNedH0!qk!^9$*?sfS~Y&RF*8AzvAIDT3e7Z_AhM+`DJy+&Mv6Hv=EW>fZJ| z7wUY*(FD>rR~vgaj{Z{sEsf6BSh1k9O{2Ss*3pOt`#c=Ef)M}|8{;d~tDuw+g*(9MMe^|vV!m;D3sa&ikF zJwABwAOvNce_EJRART3(6`R$w-I!*X)7o4Q#0W%T>FDTyU~&BTanQ&~t&yw@fndU-lI)z_ zon3G!qJgXRkyC)bMU|%xtX?> zRd$8N#^za#P5a-~Ip4W?+|X#dxsAO?fPf^H2abkr~&1Y6L+4agrv1G74aPs4hkgrKxb% z4J`9*dlDD!0UyUeK16)n%pyBV!@SMfT5MP(RGPi5ovE=2i-E%wRC>v&DYNFxh6MzV zb&d3JOs6yGWHCjBJap>pUu4ZJOyeU{IvTM79B~dsP0ry-3}l7dCtNU zhku_jbx~Bx&F>8iID&`@sua|dM~@zbFTdpB3dUqG*}8@%R2q@W(JB1FUf3c7BtjHM zJkk^4;z_j6D*rcq3&8{!kwvo=Sj-;W|V_dRza&~pn zGjR}?p5*TCN+X~G=^Z|@?-%(c*c$kT(>H9{x_B2W zr`wsFB@u$y0htY4+oDB_fOtsMW@dI2xBxn`GKW$d6dnl)Ng3F11|`9N+%Q32|KDrg z@ZN#339?fQS=+$S1gK^*GH@kS2YVNj^TsT;DCsDU-p)vb`r8V1XLpaVun;gHo8UV& z4OJyIO?V4AH8oVUG*zkJt0x9hRnSmTR0hA8gujMP6_*71g+LE91}rKiB?YPs86Lr; zOc4|vl{RDQoH??~j~zTU@23TR{$6#TYN1Oia|(&i zn!iFlIKXw~2K9?}b_OO6BCtRR7QbkrhfRiUO1%~l2}4m?4OmV^WA!R!CG4?O8oZ{i zq^O~?ipijn)f4>Np`^WSg_0CPFvaFAQafQ>^xy?B4Y>}1#y~G8-IX|s1o-$m+MLc! z0zoax z%C$3pU1b@3)2Q(6=E%I1wDRxOJq#!;zDtQ3TB=SKj+~@ZeS^kNKFE)NE)R=N*#95* z8U9yG{%vPwWP~@=_@?pF8s39Tp69o$KX3x1YZnLeEvvLwDQ)w;U*-1TnZlYquBJ|6 z1d*~SBBloB7%W&xRPK6cWPt5)Se#g!qKT~1Sg)v}1+S5bj)*jS=k^`prC-nHH!lx- zKn*LJ9`@)~Tt511K#lL<;6R?QU%x&yB)+{Z?ES|X9er|jy_0(TzLGNUzm9N}EGpVQ z^PT3_yDaHVH{Eya1YSLK1b7y!Z&kmQIXNWT>_Ow@Te-4wQ?Y!3Sk}gN*juF_vQ1gH z?6)`R&YvGgo!q)Yl8IC`H5Apf!JboPrs?e21#Ms(Z@2wfwaEVsEBFr--GHt0B= z@(MYi$pJYH9Cz>Dt+86^OVf^?9{SfN^rc6xy`xzOTtD*lUq?7H+zKGOySNh`3x@ob z_C{_|QBh(u#gGVwut06yuFN+m;+>d(wP(>M*8o>Ft>vQBNiJ5d3^C$9&}6nR{<$vZ z^hb}SgXtH`ZkX5`9XWIEwEHE0H#ZN3C5uEU^aZMl%VI0gwN|mvMs&Rig;r;C?vm63 z>ku*uko2LjfaC_cSJ!VueQE!tuZ`0Ab#hzxegTxYxV{_xA;J8ET-wsh7j^(kg{*CM zn%^I``p|)$Ou`$*m`HlIyislk@@qm79R`IB5FgIzAM(szsqMP{aqziZ0-oX|p6Fu0&N8)|gpc0EBcT1qfSWe;2q+ z@AO{U7NAylq!|W(z}WMSTV6>N|SO>z}p#+ z6miK5I)~Ek#FVxPS10}bwcPm zwaZ?%&30OEl+H$_1wc703ZP@jP$<6}nu|siiU!uLxKpc_@rPwlmY;(w+0%zHZQ3-b z4-Fvn~McxU$ z8XlV*e zymtGEZI5an+1p$IFeKSPX%C_vP!9PFh>nnT5?wNYhf8*%5bXaq5W-;`Knw{-Fk$ik zD!QPYCS3$}jSdM~#)RF_?OC#19c%w=$;6n1?7Ml%xp%L}WhSO@oa~I8k80O*d_P>> z_3_rZjoK@r2$Em$i-VQbX)FCe6IVoKoZG!CD)Uz0(^&IVe*+t}%As)082FHmqmY5*24F2l~`CupkR|vaJpw7a7lZ0>GIA9(Fjd6n5_5lkOZej1J zU`SJAEkZ!?Gz&BH^z`(UjI890Y&gOm4Y=M#KhMkvF#Y`ShMBdw$CY4fGb2e!Y0xZu z%-q~EZ~A8hUcBwHC)CI-C(_i&6%rZ{5I}l@^bHh#Xp%y&2DU5=4Du$3h7uG$M5Zmk zn#I54rDNn@RE@(NCDK29n1Jsc;gjufF02DQ#s_|QkRjU-Alvm_%GSbr$ z6O$86EUXcv>XLGR^Zc=$7fg()EE*AVSWGApj2w5yxE)Qj-4bbWJjuz0 zv4}*WYHF%O;aiZ!$<2(<%}X~lv1H-wTpBUNl$3s3sj0@GBK!!v_BNE33~4LPM{Vs$cHe%k{9g^j)a@;h&S1uWOnF=xleLfDq^6&1|8H8H!G7n?gws}?vFBa$qT#pym)M4{C_S&ve548 z>VjSR->XvCOaSYOq(o3YLE8B)X$J>|UrUNM3wE(j3P`LisGsP9#vT0opNkMa1^Xku zqy3N1!}ey_UI5!-pp_5W8Td17ZF*bx`F>ex>ciX*J&pgXdo#dbAO@o7{m<5t02s(6 ziexu!`^N8%XaI`rjzXfr%dZhrkCxrMPczb7n~>ZBPXRKJW?%YVDaFj7Wf_mI+QEAI z(F!BG3loNoAVTt-C++H!d`E=-mvh?yl-|Y%V|eKd=i_7;gg-uYKi*){EV!5E62QJL zVHkO%MjXM8AxmHUzAN|b)d`Un!)WhO-~Z3h9nx9<^~*kgLF9&I&Y52bi_u-W1|F4C rav$H!@YWH#Z%2WCKY4*bctCvsrCBj0)_lw900000NkvXXu0mjf#jVoM diff --git a/recipes/nyt_tmag.recipe b/recipes/nyt_tmag.recipe new file mode 100644 index 0000000000..bcda8e1d5f --- /dev/null +++ b/recipes/nyt_tmag.recipe @@ -0,0 +1,127 @@ +#!/usr/bin/env python + +from calibre.web.feeds.news import BasicNewsRecipe + + +use_wayback_machine = False + + +class NytMag(BasicNewsRecipe): + title = 'NYT T Magazine' + __author__ = 'unkn0wn' + description = 'The latest from The New York Times Style Magazine.' + oldest_article = 30 # days + encoding = 'utf-8' + use_embedded_content = False + language = 'en_US' + remove_empty_feeds = True + resolve_internal_links = True + ignore_duplicate_articles = {'title', 'url'} + masthead_url = 'https://static01.nytimes.com/newsgraphics/2015-12-23-masthead-2016/b15c3d81d3d7b59065fff9a3f3afe85aa2e2dff5/_assets/nyt-logo.png' + + feeds = [ + 'https://rss.nytimes.com/services/xml/rss/nyt/tmagazine.xml', + 'https://rss.nytimes.com/services/xml/rss/nyt/FashionandStyle.xml', + ] + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article), + }, + 'comp': {'short': 'Compress News Images?', 'long': 'enter yes', 'default': 'no'}, + 'rev': { + 'short': 'Reverse the order of articles in each feed?', + 'long': 'enter yes', + 'default': 'no', + }, + 'res': { + 'short': ( + 'For hi-res images, select a resolution from the following\noptions: ' + 'popup, jumbo, mobileMasterAt3x, superJumbo' + ), + 'long': ( + 'This is useful for non e-ink devices, and for a lower file size\nthan ' + 'the default, use mediumThreeByTwo440, mediumThreeByTwo225, articleInline.' + ), + }, + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + r = self.recipe_specific_options.get('rev') + if r and isinstance(r, str): + if r.lower() == 'yes': + self.reverse_article_order = True + c = self.recipe_specific_options.get('comp') + if c and isinstance(c, str): + if c.lower() == 'yes': + self.compress_news_images = True + + extra_css = """ + .byl, .time { font-size:small; color:#202020; } + .cap { font-size:small; text-align:center; } + .cred { font-style:italic; font-size:small; } + em, blockquote { color: #202020; } + .sc { font-variant: small-caps; } + .lbl { font-size:small; color:#404040; } + img { display:block; margin:0 auto; } + """ + + @property + def nyt_parser(self): + ans = getattr(self, '_nyt_parser', None) + if ans is None: + from calibre.live import load_module + + self._nyt_parser = ans = load_module('calibre.web.site_parsers.nytimes') + return ans + + def get_nyt_page(self, url, skip_wayback=False): + if use_wayback_machine and not skip_wayback: + from calibre import browser + + return self.nyt_parser.download_url(url, browser()) + return self.index_to_soup(url, raw=True) + + articles_are_obfuscated = use_wayback_machine + + if use_wayback_machine: + + def get_obfuscated_article(self, url): + from calibre.ptempfile import PersistentTemporaryFile + + with PersistentTemporaryFile() as tf: + tf.write(self.get_nyt_page(url)) + return tf.name + + def preprocess_raw_html(self, raw_html, url): + return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url) + + def get_browser(self, *args, **kwargs): + kwargs['user_agent'] = ( + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' + ) + br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + br.addheaders += [ + ('Referer', 'https://www.google.com/'), + ('X-Forwarded-For', '66.249.66.1'), + ] + return br + + def preprocess_html(self, soup): + w = self.recipe_specific_options.get('res') + if w and isinstance(w, str): + res = '-' + w + for img in soup.findAll('img', attrs={'src': True}): + if '-article' in img['src']: + ext = img['src'].split('?')[0].split('.')[-1] + img['src'] = img['src'].rsplit('-article', 1)[0] + res + '.' + ext + for c in soup.findAll('div', attrs={'class': 'cap'}): + for p in c.findAll(['p', 'div']): + p.name = 'span' + return soup diff --git a/recipes/nytfeeds.recipe b/recipes/nytfeeds.recipe index 6214051f19..257d48647b 100644 --- a/recipes/nytfeeds.recipe +++ b/recipes/nytfeeds.recipe @@ -1,179 +1,9 @@ #!/usr/bin/env python -import json import re - -from calibre.utils.iso8601 import parse_iso8601 from calibre.web.feeds.news import BasicNewsRecipe -def extract_json(raw): - pre = re.search(r'