From 96be3cc215cd2eb45e56a37988897ea36b6b39b1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 8 Jun 2020 09:19:49 +0530 Subject: [PATCH] Update Journal of Accountancy --- recipes/icons/journalofaccountancy.png | Bin 0 -> 12074 bytes recipes/journalofaccountancy.recipe | 154 +++++++++++++++++++------ 2 files changed, 120 insertions(+), 34 deletions(-) create mode 100644 recipes/icons/journalofaccountancy.png diff --git a/recipes/icons/journalofaccountancy.png b/recipes/icons/journalofaccountancy.png new file mode 100644 index 0000000000000000000000000000000000000000..ddc65c8c5e8bf9c0a9cc67a515c749c9b4a7f54e GIT binary patch literal 12074 zcmV+_FV)bAP)0012%dQ@0+Qek%> zaB^>EX>4U6ba`-PAZ2)IW&i+q+O3;wcHFwMr2lgiJp_Vy4+qb+dIop+{Y8Q$$=>$q z-nXx1TT+n)0{MuHNT9CuU;lew|BFAlKF<|mE~%z;^C#3$pT-a6K7Y=4cL)35{)G1u zzyI?3^>yLlCC_(%eogDWzwvtfenWoW;QFtxUwORaly@NaHL(5hda{0g?{yDF^ z_xG#5?`z@5a^8Q>c!zxl|NZxnF~*FH=b!6Y-uavVi=N`ozsA30|7UKx?G@)e|9kKL zY`g30$KQ(+1N*kz+)3{cD}~ts(Y~ZcEL^8 z+;+>p4>y=%c-DitpXV8mx7P~hug}2z&If(D{bXPLynMk=1Fv~re!k}7?77vRH(cS# z({bnH7|ijGe|%kkdhx&W^)=Am3SH;!@?|UL^(dy9hCHYLxeJH5{n|8|eeLV|5jXb_ z$9g;%&CG=bw!5DpdUW5h<;~BY`{Et0H=SLc?*N#HXGbREJRZ1{^U)=D=jX}JVaI}h z8Z&=8{c@Ja87yjY~GWd-TTDnybQG9h)B|8TvL6+Ss_H?Cx=1|wdiBG zVvH%~SYnMW`4m!2Ddi-LYT4(IV@^5el51|omr!C!C6`iaY4xcd2#YnAr^^d+NEDUV9sa&j=%qH1a5;jy9j^C+R=a%-1Zl&bHhllvY@ArIlA% zb+t{bZNI~gJMFy7uDgAC?e*%fUUR>B?mv6Yy;zD@jIr;3a4@nUpyyl{uGo3t!2g7nt-0sVF-=6!Ae!F7$SAKi`=DAC* z`=9XKCD*;4`={Uj;I-MTqIh$VvQRd?{RY^$sXc|i;PL0XCI(E{iLqT{ce3HydKAnwcRsUpZ9p?qpM^`-YZOoODY_x!hBM_ z-JrVoC1W;9Sl}KgfV)0F{Oj-OIOe%~DSoVL_D#ULx@H_09UJH5_j*#q89g~of7X~Y zAlICSgAC_$VZs<7VCt>Z%h)KzxPn{Q>t0wQU^Hy-^f9;3W0)~h?qtmDYvq~xA~N^$ z5Ch&ZT6y|P48&ZR)x=(_kUcGxFez>n(5FxAc=zSXUKqb)@|Fmk6?f(5XAo0kEdnvM zi-n~|=$=?vz_8{(1~9wacHp5;YE)+5qOQP?xUJ#YU2luO{xs6#qAm`?E3uB9=OR)G zVybN~Bjq_ZH?2r=5mue4jFtUdoe&(!Q&kD#HR4|7a;%<7EHX3iPLK^kvEOC1=KJ&X z=FK*lWca?YG_2430Cr!gzb_F5j%i-i>$EE3^di zesU+t8${KbxyLj^ZnwFB{NM8Al0{d=;d-?5tH*21NFMt&Fg{VD5;0qywP8szZao?< z?2gNDRgFyejjbv~VYb5)b^TJh_b1F$yqg3+fiJGy{_Xecw->x}iRba;%$y_ZBj;@n zzzcX)3DZd24^S=Ig6pjXUp<~|Vqv_2ny`tFOvYeiP=|O(icyPDtVE1bZcYG?EHk_T ze0?IgV|wV3Cb6D-wYlX%%!)nt@f!(A!YRGCUw4{#p4oAG1%bh%{^Y{*8cCxtSTR7% zc_8VyJz405RtIGJGk9p9iKL9KWoEpnX*1S!k<2l(fQn}l!Pa|KFpGmbCXknyP5J-VeTQ8PAS_AL^z<%25_=J zRU{-rj|nw1W@Ob25C+<03wm;yJY2Qn@hg(bSOyE#SeC0#+Ss8|`a3+kx2AMlq*G^- zeGeA;XnDQQ&a8;wo~mZ7IChjY=n2y&aS7TwV#fN!QRfA0yJhRxSX6LNjS~%(Jow?n z5uRRf!W|8<#~F$DRiWIizhu%&=}5CPT+ z=;k^Etma2*8{>g;A$8R?uD+ozj?mw_u{y95{f4rYdS~5C4aU#;R1Oa?fP@$!EJga& z@hF(&NLR>mA9u6Bq}(A zf2f%b!15j&!$O48w;eS^AqHig8$Vq#;kpSC;AsGWJCrHxo`%G`OzPqbziTQlxr1`g z8hJOMq&d1ZskE`rkX9QIEs!vWPAbB+YTpGLQWT=MxMjk@mhQADKg8#0A2bqlUl80{qF6T4X7LGwSEw zSus7`W__))POs{PQGkNAKarm;@*NbQ*K5UuU}j0V=DtmSw5c$VtdJSidY|GwCag6!+Z_`X?0U zFVNvm6@m*)k>+so-zn7p+xP411=a$VjBpBZ5uktJIkbdKAkL_wTEXxtQWh4(OvKfT za^#sT`d}>&*yn$O{k?v__>-+<7Y*w#a>h}YsjW@rllos6nJ9pALDpyOJ|8ne~9@H#q5F}Z~qaW5_wPz%NefDYOteMV*ml!%oBCdb+foFLZp zn0rxtCg{-iPTnElL&V%HLg6mXDyf?C3lZ#xIn`1xNe_f zGi<$*it%AZ4v_`!m<-@iI3^ZCr1sjvJ(T)zXt*dgAqGIRZc&A1gkfHQggHvWHy2e$ zNKpT*w$D35m63u6%zxmNl&0VHAY?9pO4EM-Bzd9bId2 z%KerQZBkB|q)pJkO{HFlB%~k+)3$7*@%Fw39?8!Ta-LMx&(*T{Px1%zq{uvfWE^=u>-G;kkN|2 z6c+9mzSEI;?uG;TNy*0aB`Uw1%@gef`5|0N{6Z%lqHZ!;& zAS7;J{!`*6G&l*s?ePz;%T2J)EFsR)l?c@MhMrVngY`g7y@=3I%s`hfBHT-FLdfnI z0?tWLRSX0|LK}t!281YyYYqQ{X^o)~~u0fgupf62-a=uaoFb15Kmk zq=3qcilRFl0*b-1ZcKrw38PZic-Ax30{aVi5u^8!GlX_wPA3_v5`lXN^~+)%9&$(! zti`}yyoz!IOA_@Ky-&%9ROB-RCk(QTU&-_J?^!lcVk}zmL~OyLN$Yuif#Ek4eyPAm zTv#NUv~~v>5DbKLIrM=tg0;@&D=FYVSM6GX&@J`($z9=EZ~>C1E?0z+)v4t{RpXOx z&>oLS{2^+j6z4FL8GAL6HPz^A3#Q8>$tE6$Wd#z&ypZLE+;9wUR>IFWvYS{3)EjbW zlw*q9a>wx0qC88M!?>S^eW4ZpB8Fko`EjdVq0XlO%~|;T^t(GKB_(k?85Y_HA?!>d z&m$cAH)f@t;3y>%C4m`rJ<$TLs}=DE+?kg|G}!~B#!PV#At9E9PaFRf*W;rYmvXpN za!C_)mPV|GCb?-|EBM*-vu7I(u*3J3en#Lp8s)C#r&tfyNtI~&MM<3== zdqQ!ARH3%tgzuv^W3|+@(%w~gRM;cz;=a0}vgV$%pp;OQ3BAhxgv*95>7;Jd_HR3i zlv^siPzpv_3U8!ne7Xl0(R93C!A0w7Zf)Nd>rG5Z|H>6$u{2j;J2gMR(9`@l>2(?5 zl)GEikU@>MKa}?W>-XztN&lySva*iloK^htv*LfV#V$g4{Y5_A>+co+XGNc(rK$kF zM)TpRYA#ZL$q~tKflU=?M{CE44swB9f2g_HRI6Wg>0L0cf#P3k)=i_q-TDRsN&hP7 z*0FX;yLqt%>7FjpE$lFfp4IYd2y|`Ys6aA0tRLEldfIvzfk6nX$U||fc-qdwN_7DZ z1p=H*ct`F8m;3_aA254?=i540UL8O+($!{=f;7Ag1b6?uWdH+IC|iBQ+#1=&y_*02 z<;OdSGqKyQ_e|+C0u2N{O%}qcLXM?gP0Ygh3Q>;v%cn{xP>^$U|v;2)*4yXh*edvAv4d5nW3qa$;kkUdtk8hi{nYlE1|i zD*;{Odsk5EPuCGe(Bp?vl%o3mihCSi!Duz#(M!z6+xE`&gFf_c`mm8(E1}NsoMb%gmn_g%;7gCE@22IY(U2KyK0 zf*vPAr(#NLy>WM=IQLd5QKc3@u1|O~Ko!B6+w`E3IRCXfc3*Oj!##-AJ>HtJrRyy{ zZh1)pA75dF0>Ac>d%(d|zI4?$>(_xt&$U|3>`N16g}7y)nuGn6>%Qnk_ zn69=&B0u}5rP8mjo(j2Yl0kq;N=R@jIZu5(9VtnqLDGl^5(KsZOEsjJXT-5>Dm%IzW4*@ zaC)LmjB`kyyjxDowl0@Q3*{Mk(jm>-Q6wx-#$b@YUUNZiYH_wl8y`P(tm3JTaubvh zcly2Sg|`+IUhAZ&!u_s6q>F*g>`-Z0y+r~#?QY+U5aD=`6@wHfm_8(dMe?KD=63C(#%&CP*?F*eEzaBXE5}7K}*Im-^K>)&5`8 z;Bf{*+(|Tmd{t)^zRpkRPy?D0+Yg<4ne8HYP!LI5+iEpZ{%x1~kehNUfm3y0PRm3T z$$jXmHf8K!$p*)++F)@okZe(^gDRv}QGMV|Ek=9E88r^~ud1Xj={?UGIaOc-5_ zXpEq4q_w;OL+v@)4(8#ta52IYx?l?9+D4qLi^L+PZ!y$CX^2yV3}FGcRF32BTMvLI z0jIM?h1N-CfHBYO964Ba7$UVoxI~@nspniz6#|f|D+4UG@)y0r|BXc|SetPR5JsBT zJtwq?hVMG_&S1yIslJSV`IJTmU0k{%nT9f3qn2c3m@-&_D?8Wlw0j|T(!I8RvyDu46Q6Jfv#a zwkinSzjk%FJNH$f;OM#2>yz3yNKuP~atHLFR&c)#*D&TAFF=9amSWVFsCy4rh4bde z*KFZ(ORzW#pEX-&4`I;ou}_0%s+$h`C)!p z>SO#XyzkHXQMJ{z{6p_O>&BMHU7*D@fiV;191ysNXKBoQ>2U53I*SJkI^~ivKl0et1pz>;DwP zS$hc;9>nay*0c1irGqEb3URi6-HIsjfKH7%Z5eOP{Hpq1$|*WJlCpzsZrQoQmaK)= zMRc(@Lh_`vUS>cuq%bO_Y#`>p9aS!ZvkewW)@~P$(}?Dt>r78ZdQ;vD>LeZzUNi z3agBsNmDng9m4RoeA3S%1u2GPEF!l1lYJq6&LSb=c4GwJE}$dCL!ei&6Q{DJ_5mAD zh);_YelM}7fYhF|n$kKx)eqAq-=SjrLg9ow@UHP2&F6E#LMQu@Z<5wQVnSQgYJcd6 z(RhzB;rZ0CZSI$12R^Og;XS9@@o}$2V4yCCc~hx;pyER{in6cBE_gvCc zt&htmolVbCCy%LUYkD=knoiSpJFVjFNK0yMX#;n7ncU7osl)KnSLE`ATVp1A5g=(> z2LYX5f1YbYB&A$*N^9Fe-DiEXf!7Knj;p27d|4(dsI=!4uUgd@&n>k%wC~HvFf+AT z_m^R6B|sLX!x}J`P7$m+*VEKJuqj0o0t$jx+r*?p5xqJ<)ONWDysj+}Z;1e>8*oU) zolQf%Ru)ZgS}2L?J_q)E4R z^_i(m6p3jn_VXM2+RagR*f!LX7Kz>tGe_(2=p^%dcuhyorU+x}6^}t#jD_Ft(mzb%r0AzQ?#H+?B&&VkwQ zoKRa(2P)#!mT%XYyLltQWD|?+*hrXQdxFXg8SVN#F?8Ms01Ij)?`3E5btB8owk;*& zTJ^nk5Xw74xUA#(f;jCFf!K&J;n6(4dc@>oBYdRKpg-@qak05&u|#InHudSVdtVev z==rSL?|@ZnbL0!K>?k_su2QBq<;Zqfbk{emWjjzf<3*b4T%%Wq&QPnK7Mh*DG~8mz z&wd*#7D6xm`QVy97vjT3ptRfTqa)i&-#O1JT>@J5G#3@ocQca-bL}F3PPrilqK=;G z`IPKodny}@!Y?{KsuLAz@1Z>AIy)g$y;cNG(t$cv)SW`2o(~vMukyu1h8YF_q@M5=lyeJf^>f zbUr~@-W|K9#!x*x3=NghJ1&w2Ngtnk8Tj4)dFORLgfj@_L!Wd?IGjWPfbHWtPQ$ol zou$R7qC}!XPt5074~rB(Vvv{hs}u8ckce$S$LIc!b20IBUw^!03)1PvQHb686m3Q- zhRV^9dYVb+aGz~T1~OZjp+u5J8K9J2Lu4(c&5)18YKS*SkBMzAtytG00009a7bBm001r{001r{0eGc9b^rhX2XskI zMF->t2L}x)p~cRL000r0NklzVmP2|DA&(0v|s_7*GQ+03w(m3?Kji zNX3sJfD~^c00=+;0>A)(XcXVie}Ld&591#HM+RU3U6>ZK6u{h`iU0_r&cg!1B47cb zJo4h~AIZ2E08#lx&1AEQc!C)O1po-KFnh@xN<x7KBW|Z5e_agK__W5detDbQv00Py`$T zT;JH7xf!jgHVz+dG=0|2y5Pdh#{Aq`Nr~6AxaC02xeE)R)s4x`&9!J%Wf_P_p<3hr z$DJN5Hwz*WQ9gqqkajSUNZAhl&hH&KdUW5l;W;OVum8ap>l=KTOg5Xf7Z;+;1S09A zZD(`&{0Eo=va&h+ME-FqBbR#z-J+cXv0I`h5cf*Wdh?%kRED+t%6K zynh!djm7ACJQmpQPmCx)SXx^4$`_ye?sxxtGq(4Imk!U}+W7tt-dbFYU%GOQ8Oj2I zrKQNhgOx9S?pa%B&z>1fY+1^biXCBJi3{`54C?U%K+Mx zvpJ5(Qv}vO|I=9j(GIk?SA63euQWDQUmu_B?OhN;+m`9+`D~UgpYg!t$bg48+%D_* z@x#GD_~iedzH((Ym$BGZ3bDG@>N&FOwbxEG@2%6EJ%4U+Va{PhB$=96+=?bky#bIs z`Nby*0Fk_2chBMdP0e+4w<1f6D|XJdOtXGh&5o*|-|r>B>6!KOXD3TaUCC5V1ZSpW z^9zxxs(?azJcaBY)D{pFsc;|=^p|$D?Pk%;Y#0hafuL>czTUBTBKNhgAN}zU&#f%; z>WVWqI(49PFB$S+5FT!mfd~pgA)q3q$TXD8Wx0ryB0|EAXzc8{fv&EdFaFkn)|N_U z)}n`pZ^RQx5eGz`Jb*kiLP8`$LLx*|ga#r3z}Wco^knq-@xwcJRe;trM+gxC#IjURPg_NWMTECzlVjIs zY{y|{5!oh$eI_ixn)T4g_}Y5v*<*)1Za1NjLSkh zqf!cyFkdbN5mbFc&7Qp#EKVlFLjx0uM7q$R;inCQM+rcXnHLr!moMFT=9!N2a2W`K zP@&!dieR9uq`j>HM3XXyhZmNY*Fd<~WPS!LVB2u%;)vroM~`;7OcRk1sh|uX3XNO^WW~eME?ep`gF5wH{F`rG|%> z78fI$h2@C?$Y;Z?t>lFZL%a6`+dK9cO38-`Eg}-4+ie^?*b)dR0&{Ue_xE4RW^57u zbgJ{i?FR~Ne%?cP`o?VEmDy*Hx7xP7yc{J&rPOx9liw5(VP9!Ud4+FlE0fDgfB*Ow zUOrat_CJY+CckLHB2FrmJ9DOY?q=%!Q==2(bD|3ierI&%8*0g9^uk;Mf!MSMM^fhiNJ#EbeS>TmHf= zL_iYp)EjU7;O9SEMpAN=yI{wN7~M>tJv-RACw%ZgOJ!x5 zVJJlWaAN=9`+|Y8KzC2euijg592^^4o1DB^Rb5W_NL&a30YI1;5fOke{|F!`;Apl- zuiYHzUpjoarJ_89fFOjTP!!yKnII;J0iX=i?CNf<-{l2_^;O;1JDka6Mebda>Heor zfa2J8GMN-s0Oa*rmTBboyB+OpZf%`8+wU@5Pao;@m3j=rMPvZdhc7IQd-CujyP91dTNyY$zCJQMrJ3$o4e0(OfJGplNXKFsKvD|p8p=Ih7a$8rCc_ub z4KK_^yStw%EA!pgswjmWo%+47$%M!WEUl&I7wdG$9T2&)=F`Vg%X01yzZZLJ-xvmhEo3*-cB0S7<>u&~ySo}Z6g?j094-KO)x z3!PP!6{3Y1H)Dyj=Lb{ibW3YvX^98%zIrP}GA+~SXx-bqH=se~Y_s>uSadxuT9~zb zDhmiClj(Ro&7uJq1VIcCb-N2;aFWUFyYE~a8B7Svk)u`5KY!RVK*SND8 zYky0VX{r0xc@PkEdAYx{bGJ~=aMAEkWc>Q9<}7^tTHFCZYd&@A;*Wm#Rx0VRFpCxu z0WDy&*djWUajsk$d+X$g&KeyZ?$=-cVqIN15erB%kvV;;cV!W(sspvP6#$w!e~Lv! z3Qz781T3OmE_JZ0Js2Q?^lC)^@|PE4v7|5;6_-y0Ktz6j_}zDg`uZl4N!!-8X3bh_ zt#etu8rgdLos0kQ_a|qjHru_pUH&AmuuL{^rotS=%OI7GY-nNmPOe>*}iA-O9EanH)H`pVpPJYra>t_S zh51N{$F0c7hq23Lx?CoHOiT+12rzRZk!I}xfOep)%nuS3goQ=;(GTCN2>W;L+<`!Ejg76vVnRS7yl;Q$u7+~UBtWf5Uj5Q@$BuOonRF+`%x{W`2(ykx z6K}rxlZAzK1c)cH1B1((n=FVRMBucw`5Wp(hJsgKeeT4G!$bgJ&p6cpQb#+DS{(8Uyp)N92 z0TDv93>Sz3CLr9N4fDq(L?FNYQR4H2LuG&%4&$y}Rd}~TqlL9;DnXIr58Z765wV?& zW3%IkonvNEiVf2+O;QS#0svElkn&nMGTYgVBchNPQHma4q{;aLE2tDGLLw8D+>vyO z^;NSL7RO-_+;$vFU|R`b0cJ52xjhzOekbK40ob+(Xw4$LjXK|6hk_Fb2mm55e?mu+fX#On@LlQnUGHtyCwitP-r88rb#&Cf@KV?M%GT3I+iuP- zB0^o=&WjhXy!6sDy}j3}t9OJ$<$b-Q&mQj{8XVcvTz7MJsl@9I1ik%zqeqW*_4kd` z*48N{b90N;HI?Tt-Z=H^=~uomQ(IlJy}i4ON^mBVaJjYD>r+a?;nJ$wa7CrRVRvn? zyreWh=}gS+Rh|+v71@(lV!6CjQ5o1#9o|tBE-iDDqOw5QRxIY&cJ<%aP?wJf@jd@0F?1vFnkQ)MhNR70*oFhy;WF z8#iKP4BniH`LECU{UuWqk)@UR#`>001YoUgZZXT;&G_)KcBT+j#A#G<=E2F zT6g!3JxvXW!udmEACZMXwi1c{zKLJ_?EU4X4G;?{e|*{jP>2X(0A&yiW(LRtm;k5& z5lrG7k!Dc{h9Dpc2#V<9$s!U4Ktx0l8^L6;5k +from __future__ import absolute_import, division, print_function, unicode_literals + +import json +from time import sleep +from mechanize import Request +from contextlib import closing from calibre.web.feeds.news import BasicNewsRecipe -class JournalOfAccountancyRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'en' - version = 1 +def absolutize(url): + if url.startswith('/'): + url = ('https://www.journalofaccountancy.com' + url).partition('#')[0] + return url + +def classes(classes): + q = frozenset(classes.split(' ')) + return dict( + attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)} + ) + + +class JournalOfAccountancy(BasicNewsRecipe): + __author__ = 'Jose Ortiz' + language = 'en_US' title = u'Journal of Accountancy' - publisher = u'AICPA' - category = u'News, Accountancy' - description = u'Publication of the American Institute of Certified Public Accountants' - - use_embedded_content = False - remove_empty_feeds = True - oldest_article = 30 - max_articles_per_feed = 100 - + description = ( + 'A monthly journal of tax, financial reporting, auditing and other' + ' topics of accountancy from American Institute of Certified Public' + ' Accountants (AICPA).' + ) + publication_type = 'magazine' + masthead_url = 'http://developmentprofits.com/images/JournalOfAccountancy.jpg' no_stylesheets = True remove_javascript = True - extra_css = ''' - body{font-family:verdana,arial,helvetica,geneva,sans-serif;} - div#Rubricname {font-size: small; color: #666666; margin-bottom: 1em;} - div#Headline {font-size: x-large; font-weight: bold; margin-bottom: 0.6em} - div#SubHeadline {font-size: medium; font-weight: bold; margin-bottom: 1em} - div#Authorname, div#Date {font-size: x-small; color: #696969;} - ''' + conversion_options = { + 'comments': description, + 'tags': 'News, Accountancy', + 'publisher': 'American Institute of Certified Public Accountants (AICPA)' + } - conversion_options = {'comments': description, 'tags': category, 'language': 'en', - 'publisher': publisher} + keep_only_tags = [classes('contentSectionArticlePage')] - keep_only_tags = [] - keep_only_tags.append(dict(name='div', attrs={'id': 'Rubricname'})) - keep_only_tags.append(dict(name='div', attrs={'id': 'Headline'})) - keep_only_tags.append(dict(name='div', attrs={'id': 'SubHeadline'})) - keep_only_tags.append(dict(name='div', attrs={'id': 'Authorname'})) - keep_only_tags.append(dict(name='div', attrs={'id': 'Date'})) - keep_only_tags.append(dict(name='div', attrs={'id': 'BodyContent'})) + def parse_index(self): + # ISSUES ###################### + issues_url = 'https://www.journalofaccountancy.com/issues.html' + with closing(self.browser.open(issues_url)): + pass + ############################### - remove_attributes = ['style'] + common_headers = { + 'X-Requested-With': 'XMLHttpRequest', + 'Accept': 'application/json, text/javascript, */*; q=0.01', + 'DNT': '1', + 'Pragma': 'no-cache', + 'Cache-Control': 'no-cache' + } - feeds = [] - feeds.append((u'Journal of Accountancy', - u'http://feeds2.feedburner.com/JournalOfAccountancy')) + URL_TEMPLATE = 'https://www.journalofaccountancy.com/content/jofa-home/issues/jcr:content/main-content-section/issuelibrary.%s.service' + + # INIT ################################################# + init_url = URL_TEMPLATE % 'init' + init_headers = {'Referer': issues_url} + init_headers.update(common_headers) + + self.log('\nINIT URL at ', init_url) + with closing(self.browser.open(Request(init_url, None, init_headers))) as r: + issue_path = json.loads(r.read())[0]['page']['path'] + ######################################################## + + # FILTER ############################### + filter_url = URL_TEMPLATE % ('filter.' + issue_path.split('/')[-2]) + filter_headers = {'issues': issue_path} + filter_headers.update(init_headers) + + self.log('\nFILTER URL at ', filter_url) + with closing( + self.browser.open(Request(filter_url, None, filter_headers)) + ) as r: + issue_data = json.loads(r.read())[0] + ######################################## + + self.cover_url = absolutize(issue_data['issueCover']['src']) + self.log('cover_url at ', self.cover_url) + self.timefmt = ' ' + issue_data['issueName'] + + # INDEX #################################### + index_url = absolutize(issue_path + '.html') + self.log('INDEX URL at ', index_url) + self.log('3 second pause') + sleep(3) # mimicking human user behavior + with closing(self.browser.open(index_url)): + pass + ############################################ + + service_headers = {'Referer': index_url} + service_headers.update(common_headers) + + def get_data(service): + service_url = ( + 'https://www.journalofaccountancy.com' + issue_path + + '/jcr:content/main-content-section/' + service + '.en.service' + ) + self.log('\nSERIVICE URL at ', service_url) + req = Request(service_url, None, service_headers) + with closing(self.browser.open(req)) as r: + return json.loads(r.read()) + + def make_topic(category, articles): + topic = (category, []) + self.log(topic[0]) + for article in articles: + title = article['articleTitle'] + url = absolutize(article['page']['path'] + '.html') + desc = article.get('articleAbstract') + self.log('\t', title, ' at ', url) + topic[1].append({'title': title, 'url': url, 'description': desc}) + return topic + + ans = [ + make_topic('SPOTLIGHT', get_data('issuelanding/articles1')), + make_topic('FEATURES', get_data('issuelanding/articles2')) + ] + + for category, articles in get_data('articletypelist').iteritems(): + ans.append(make_topic(category, articles)) + + return ans