From d4efac58636bc1f96733ebf4da8d9a2051961cf6 Mon Sep 17 00:00:00 2001 From: Christoph Kisfeld Date: Sun, 28 Oct 2018 02:18:25 +0200 Subject: [PATCH 1/6] Fix range pagenumbers in Python 3, prevent long cache filenames --- pdfquery/pdfquery.py | 12 ++++++++++-- tests/samples/bug67.pdf | Bin 0 -> 71136 bytes tests/tests.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 tests/samples/bug67.pdf diff --git a/pdfquery/pdfquery.py b/pdfquery/pdfquery.py index aa7f584..3708a96 100644 --- a/pdfquery/pdfquery.py +++ b/pdfquery/pdfquery.py @@ -7,6 +7,8 @@ import numbers import re import chardet +import sys +import hashlib try: from collections import OrderedDict except ImportError: @@ -84,7 +86,11 @@ def _comp_bbox(el, el2): # assorted helpers -def _flatten(l, ltypes=(list, tuple)): +LTYPES = (list, tuple) +if sys.version_info.major > 2: + LTYPES = (list, tuple, range) + +def _flatten(l, ltypes=LTYPES): # via http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html ltype = type(l) l = list(l) @@ -458,7 +464,9 @@ def get_tree(self, *page_numbers): Return lxml.etree.ElementTree for entire document, or page numbers given if any. """ - cache_key = "_".join(map(str, _flatten(page_numbers))) + hasher = hashlib.md5() + hasher.update(str(page_numbers).encode('UTF-8')) + cache_key = "_{}".format(hasher.hexdigest()) tree = self._parse_tree_cacher.get(cache_key) if tree is None: # set up root diff --git a/tests/samples/bug67.pdf b/tests/samples/bug67.pdf new file mode 100644 index 0000000000000000000000000000000000000000..42e0717eac8d0093fb62fbde6b6a2c6b6e7e796d GIT binary patch literal 71136 zcmd752{=_>8#dmci8P#)lsFBPRL(p^%3LU-Nn{=~hC)OmO%y4W=17tTp;Qu*CM1=L zCPK>4sFd`rvv=pT_g>rkegEt4cTVqh{odoO_CD+0&wkc@ul208o~2^0Z#b1cjU}y8 zm{fSRFuU-uG=oZ``Z_L`*3!~2b@6cvbf+>wiG_xthgYDBzlNcgL!gVki<7Uji?+6O zK%l>igST|>fn_mtnSxmZH+}m#eBY)+8kJXUWsHtITIHv%$lIK_&~Hdn%}cKLM3tQy zw?o(UyZeRi+e;_qzSR4ZZC;-HCkDJI_pHdQk7~>qu-llvn%ywExu5E)_|^oeJq0Sf zc=cR&nUfpkZ@8qDAK5)_TGXI^io1W+yVFk_M#>%7t1@Agf62rOH)f~Nq=T;gqrc7^ zN|}DUtl1$+!!C51vb2kj^Bt2c69p{3}w%hMt=9|M1kz`^*KC8YVBG znbUe?m`7$ZfUADaexhMD?GwB)+sxuE1rvmsC76>mhS~|2gyT2Qe4W zSQh=~T%fbEWp3B_%f6O=BfX$UkM={9J|uVrNCY5xf8p{VxO}m|J&%KXiqe+^t^f%E zE}ahHr3?J*|K?Ty|LvQF1Ds9iM* zEO3+m95L$x&c+3l{$z-S9*aTDph1Wk0!R7p5nse1E~N}0L5x5lcm+d99e>G@A#jxc z9C7A39O7pbX)?q@pT*i(NFEr(0!R7p5oh8M*HdK35DR@4i&)4Xe-SeUj?#^Yo%@n; zu^+<^M{ml9C|gM|BVZ7};0pd#@Ru}`L^S_jRP+A>zc~+D;IKT^?bwjGV@FzDPujaD z4T&4!?exmo&1>K|C%vneZ1gVm%IP({$L^G2KgnPVU6)V$3tLDd7*lt%>~&>oyznW# zq^u^xEOcHT#?(S0!C>xY?duXV&OMagQ$oow3*8rknFY%qtDEJoOUyU{RQg5}Nsg)9zU3YDrzc>@7M@C^8K;9vrW8LC%m^5Qoq4bh^2DNpEDl}=m6lLe zkYN`3F4oRMF8S*lJhA8?z>L#DC1;918D^pHVlfN3mXk|I!FkFDorNU6a)+rvye+Lz9AkRq!Ve+ znNEX+kWLei64D7us7uPwC>nyP=`2`XtrE;k4o zB5W3kN+R)gaX1Nn;OI^+H=^i2)=5|-Dv5;L#Q>Y2l7ZxMBborf!6s2jmUxDbY=TOz zr38|xB!UKV5+S`{2oMRp|L4~BBScb_PgAy$!4^#eV6Dw2QAs5BE)Li@l~j%=orIx_ zW&&`qNmP<87M1iP##5U~$G&s~4be&1Br3@ki%J66_>gKld7lJ9LxjyCQAs2SF9s*U ztE6%ta=8&r31FRsL!y!#v8W_ylDNRC@)6Q`ema5%vNnlIBB6M3SQ{5xRUS&--xf^_ zV6Dw1QAs2kFAmr^l~f)>I%P)}%?;pSlc*#TkQW1NyhCLx>GrHdvBu-4|0s3a1Z7YA&dN-8yz z6QF3G00)~yC3#{|NfwhZmNX_O7g{`lAuZAR8py(Ty79FkhMuvk}saIB$F_fTuweHL(mXl zGib0%GLTTcIGhBhl1i&c2W2Q42sW&e3?x)92H1F&R9a6uC_~Xeuwj*C(8QvWEGA(r z8ADk~qLL^Y2sW&e3?x+l-&>nYP|0DWeG(K65w>WN``;lj0}0iO12$eIf08l@iUxuW zt0aRi9+l)0RB{2S+@NS6*sw}6=;BdHEgLIKaXd zEr7>Z8wu5m12$eIH#?0x!fRVh)t47qLN6cUJTYIsN@04 zDl(N6O&s8?O`?)WsQy2Pok=68 zY$3g1VT(m2Gid~sbR?G>me7rHut`*sB^H$gu<>I_A9A@t&=8xXXh}TA+DNEg3{FB& z$<^e%Et)vMS{qrSPf$s=SX2@;N!(acc{q8;6+r`8n?xm%P`x;;jaSJ?a=8&r9AK?2 zS{sjX5+qbF4%m2=j3bvD(Zm4`Hi=3ip?WdECaB~ga=8&r9N=IJ=>=HbEuB$mK>fae##_S}>2XHWI2A2W1P#%*kv04TaYsV+;;=SeCF9BEMl^ANwYF&G zJjO|oP`x-{<5ltqx!i~*4sftZR1yi*ivc!4B~O#fjcDQk2U|!lSZj+%C0PWOyhI5m zlN&ynN{W`y|Aoy&LiOEg?H*mE1R(4H(GRD5CjpM)K^i4r@Xvz3q?yQ)ete^3(z;tG zy9SO^N_|&K8!2$1EAz0x;qfGs*4;|kC2+h}>g7@1k^&bxGY&Ycl}uW9k0`qajvG_z z-K8{=0vC4ru)s+&NrY` zi;mWD_S!6;;MspMb@7p2J%%10Zc2(?=+}G}2ECATu;{y6LjR}fqnoj&zv?GvMG9W% z+c?t;c?S!=yJhr$f}e}?o2zT&mXd-O`ZpH5XuUlKe7C3D`QPC2mftF8O$J^x*MI{r zZ}$9E1hm-IDr0!I$9OCPB`I6ui*AvEW53@G;=K zJrmFW29LA+)g-xPWZ*@!4><5b3c`Z#ZfX6W;HTm(e_qa(6ui*Aao~j%gazN-^7_BQ z<1K$p&W;qk(7kcsg%pGV-|b0yy677a)AZ;aCLbz^<|43t1JSB{Eb#7@*RFx%2B&&a zlzK9Y|4r6|8JvpN zaG^V6fs5AWV}N&i;-0R7;|8a?-zm??fQ#lNaKMFhg9YB*YT6}m{NPk?7o~v|xKK%P zz=c$U1>W6C+9hzp;8coIMG9Q#&RF2075W(9-JZv%YvB07=@3da8F10e1P(Zff_8f{ zpDuwD2B(UY=cK@e?u-L2q#CTnyIVcG296t?>P@1&AO$XTXB==L)nI^kds?5afD^*H zdgtFVytI${Xxg1kAnsZc!%sQ{sPh`>qG{jj<{-A|Vm$0z*J zQz8{06b}R(R!(3D@dVOYgp@yelSu^##RCC{m6O%&DSx`OIDSG@k3)G$qMRrm2so^q ztZq;G(-m+++8@0uq?1J`9wKnj>U~V(?Dn)jT?5CDIggXG%HPC4e+7soTECA4E-vLH zr2f(EAQd1Ml86YhIINs3B-xHQ=#Y^7N3TD*0R2t=gR!`14L`=>-JblXOIIN%ryMzt zi)JXWk}q1tj|DESF()AbkltC+ZY}F?0wAo#NtCnO699E>alCR;$pz?d3Lp${(Mo=d z#k)NPP}jimW6mk$0wkK0z`BZPEk72xxRjGk7<2N-1?X=YAgsknl(XB@0Cj0`f^zCp zUXdy1-$X!I;3UfV|C9)5S+u51FZz&;KYtCkmXQzA^S`are{ZPVs(w9X&VTMXVtwWJ zZwG59yErIZ+vjj&QpDSmf?jo+zEg}+R6qG2uZXcrjIvf*Fu8riqW*(kude%*bb8PG zUkASI^p2Zn;-gE^m>(L}@8kwktNhHhF=?5evvpMRJ(tQ2&3!mNdUyTrIqFZo99%JT z*PN|i7;k=58#5z%`bDafEp=dtW-;jOzvq0gMEe0Sj@D%&p+Cod(W!J6kIBFu^7VXm zNZ9B{kKWE;EDi+U&JQczrl>y8F(dd`u*m^~E#4obHYYFn)GIrANye^o ztiF`~gFI}qDaQH9YEt_vZ%w!#|7o4c(KWv&^|5W#9a}W@L+yKY6JyJ3*I0XA?n{fE z8g>8Lw_h*pe4F<1zf}jc%&3~tP?}ZSdxOIin;l1Ih;)j6WZ|dCK0KM?=ofoIWJRC(LhRY2K7s{WZ7pt}B0h zFz)1#s)6de)Z?utXBj3QJYF$+d(H{1fNMod?GxRCc(;FlOX#589v}Vbk-pKfFZ>2a zvmxqy<6B>h96j6iRrA)o{+%pmm0H>R4c&jgqN;DKXL8Kynu#-_Ps_lb{`wE!WF#C8J9uE+%!ABrhrh&*p1&vMw)MG<9Ru}LbhS53 z@s|lvF#0ucMdCEB+^!8f=1H$|wSTGl(tc2^?DmnBMny_D3*X*0c{BFOy79&H%L zjAwS)^64$xVy!}&XDXv4y&3Imc0G_!_j&UNAAKV{gpYku4?5f8noRN z4Nv>%&s~~Qmb{{3ZbgL4hTGv*SG*2|Wi{=N;6&AUW@x`rtx%reYa00`v2MrFi!|m3 z*~Px{&z4&6zv%td_vuv+r;5n0(XwAxX1x#dY~RmLTbzFN*y^HBE(=X2Yt^|VZyPaU z_%y$=ApTU<4Ymop(&wjT#7pOFcUc?nKcN3Zl_$ET=E{MUIUAx~l5GwqU&%h5S-MPl zwx79)F70}x_W&g|O^u^(EaN){s=Uov=)9Y^DbeKW()abH z*W1#)`zxPWDPvRPB<*|b-097njPtMk$M)N89KR>c$G%Z_X9Z_Q=+8x@$xO z$?UhipZjg1rrPGEtHoaV`Yv0_a!J|BenU6pv3)9~6YtXWYJX?=tasI^J+f)pwDN&b zr5~Ipawvt?RTkbcO_?(cg3Y$A=<-Z!gaa+-`mNR(o~5|BjKeHd#hTG8K)pv>z6#yfJW}@aB&5^-Dvo48LT? zehzu}^}OuFNsN&>%ejNb$5=L;k(r&R_k4unT0K4Po}^w!lc(laj=SDZWyhoF#lbtr zkDL|t^`V8Es#c48!JD>{;Kg!jXHs&aGiG}YsJJXOAaJ(L0JAa67TY|ZeT1X3p0_8| zvug0>pw<3iYHpf!DkfXZ)hUUU4)MbuMQ3iBKCE=@)Q#`Y?DD*jL%F(EX+*WRgU&$B zO!JRP(S4(4IVC5jr#^k2xiYj$dd|)TQM?1E&$YxkRC{MXq>Ws=<#fT6GtuYI_oBQT zn*V70tG4t4H(jf}n=UHJ&O7h;>c^rL-uB}?mdlNwfA7&IPnGHCXSkW?jGXs=RK;<% z8>%rE&Xm_vY*N!s$LIUZ+dq<@p?{on;tFl9gVBha4VMkKQoKEeZaCx_5cPchl|+*d zot*jHa;=AV;^-$!RP3_udAN*}Dbcria?oen}7N`U|b|EufYCz&McezKKF7DC?&9#-5+18r7^9(Mg4|O^?SYXE%n(8 z<41kIU6MWeM$Y!60sEJRxg_s>7FO%SyR`B^)P|C(s_-MlYhLH?$xaz*|F}V(!f??hM z9qmXwq8u06dAkTY)Vk%&;il zbYoV|oVQ8?`qwy)cMX*7)a_O6d4FyTd(vjzMDr_!*EZg49`=zv?wabR#hZgwA}8(b z`82BhTc_0Kp?)h9WfCSV{=lr--04(q*-Lfz&|m#r+|S1~?$}%AH+cED0pUC5q?zyi z{P5=5BX?KS)0T548XZ(O8vd+L^tb!xIF6#dHVmeiF5t-Y8bt)!KIOX*2laOH;)hsR$G|F|r} zO#0L*>1o%3f0p$f(=fPBUQ5HraqW{6Gabun=2e-t)K+bwTdP_sENmYUa)w{2F{xy_ zLganUSZm+Mr#jYieDC$zHDd3%wJ&%pYTY|6-oAo$9vD*ELqE&F%B_ZGBDA z(`lTUFJH@hwXa^Z;Z8>V>!Kmc2Cgf5WjC_nS9}%qMfUxbb4AAj2f_YZ``n= zB>&0PpSS6MPAvanGQ+%d!PI5->G#S4G^Y5MEq=4_Ui+Xu?RDQH(jtx>^O{p}NPdU( zYtPAtM>hY6bX)Q0`s2rQ6I-9!)<&MbV%U6huy5L$rpV+~i@lA0%{4l3wk@P1>rtZ2 zV4@!^u|dfqcf z&t$sZ{GKY8KIO380vEr0Ii&D#9(V!w z21bU2>$v%i8adUIwsT6Bm6~2+L#$TbtS^N!N7sz~Ak%Z5XHfZUBTY-|(P_!<6CcGr zY`5^+yyz^eq5S%++R=N?tQ&Q5>DI@O*IikzFe!jK__*6s|KQsEv$CqYa(dTBSj}v^ z{yI|jXnni7QXl`S=4PYinv>S+WT?FnBMkJSJW0*hvtxLS6I?1AHRlkQHq!eJ_GHc45-(YvqS zY_(r*&jPt)b56ZIweHr%xX0(|C(mBov)ig}d-A|vSqi_t%AM)QHoSkKV`|@=$Xj!+ zsi+rgI5#Tnva`RJTDLu|%4{+BOj|%^`}AcUsmU|;pTFE5*mp^Le}i`BC*8>loj;{~ z(%mY1IN(CH%BkdGk#ZE~?VbnE201+0y6xU+o5|_)K<>{486QhtcD5a~``z+ou}_}2 z-?ynhqh2>Bjcrdbo7UN27xA;^!mid|RVJy`Q7@C4lmTrw{~ss;C@$I)(&_^-?OeP(C_DV1%^e^$o9sLw_jbK>0}g* zGX0U7wKhgR{V3~RU!CR?t;<#v&h35sz3+=>mw#oix&3}sbFZnRC#8438)Nxx+q(Qr z_Qk3ce}kQMG|!Brsa9pzR4&ABy3|X??F%m~FTI}rMdrW@^G5lBpXdFwzt`{E5Bn$mRw!=_yI-?msq&Ps z`Ms3)hEYG~Ck=k_C4b}Ko!>{QyqKB5GnfI{t` z_e^hFjwnUZj&)Sw3B=Lr)J zYzaQi9+9cNGH8xrucSHmcTUrm*}dfZvXN4OFZP|=ahh(P+x|K#qQlFfI^^*h_hl>A=zV%o zw2&)1vorA1f|rjpFNdz`ySGYdjcHI~{`r|NKAXQcw_CV)SqpoinRoE}{MzX&lbkaf z9evq(RErfZ6?5;sD)~96Y1EUvh#i+&=G88rPW8RfQE?+dZHvvBa!Ru8Z8fXmX>T4$ z)6;HBJudKE=k8!3 zOHQdT7cF$z^{8K3xJGI2=m5K3-$Dj4u2(*tI%4-a7w^fBZ`V1gZ5=SZX2O!A{pT-9 z{~RfmrW-qK*s*sG?BkXjmZqQkq&Q@dDt}}x^?FZE1tVlerLATy&E%AON$TQMe}iR3 zCsuj9UF-j4_XBUgx}1S`Uv2mu-1>R&p**tA8T@ZuVXK#x{CI&DF=hx zOhfjazBBTRp>J~RBW8=jk~^n-uLaacR{4(EJh(M)3!n3-|5*2RbBCCQYlcw5Xc{5f z?YVEZyg8g7?Vl4j`l(cXWmwI)e&5pv9lYm$@8ZeNk3K(dH{4aOvDfSA0lQnADsSVe zMLH)QUXV&ljR`6|H+u8vQ|i^U#>fHB23=ZLwsyr?)`hSeY&Okp{JsavOKmN-$J*SB zY)@U_q5FKq{^nU1CL}Rbr)}$ZCTfc7$Jbv|>^e)-uS_3a9M|Ea8uN7$f`cz0u5v$FGGfnLp@rReF!h>rZPfJM`Sa zAUAMP%5c5A%O8!3cFnVY=vimoCq3lENax1`@_KLX{rI~_(Mi7^4Lx#umnLt!d0fU~H-j;aHOSSxL%6f5Q?wI<%y-!G$?(BD|ZJEioS=p=YDlB$DO%u_R>4LNJ4XUiBaO27L!OXlg#@qNyn?J=&;E45jL*RS;XpxaYRmu@Y6 zB(f$&t&_dgE5%veB2dG;Uan!zo$MW-cxtNZwE>L}d{g*`=c&YPF|H4Ho-%cE&)(0B zw5?hE-e(r9`njR3sVTh1`gTd){?&D{!~A+`|FDa29QLyP-NSYL$|kO1UC^>I->ukD zbk-oz)GmJTvAGLIejFXLqgp>>?$Z&T;kH+HYRL3&KmFq2w70u^>iAArbn)mse@D=mFgA8tFsNyV-(=^Z=# z`N%yXmYZx-%`ZP0FK-vp_VD9Zy^D(tOscQBwdDr1ovJu1Gqyf8^w9VEZFxMu_c~YV zM$%MXMri%0NGRso&i$3Q>v4XxGo63ScbW5)Dfh~_)2$5V4Vu3*?twwhO&cfE4{vmR z&LpY~%I%%Cf>+(Le($SA8KX{bx#OU6+;Y9e?WOG&)MT2Kv{UksNqxM+&#RWctDCdt zUUFg8?B|ZB=e0VznmEN8Eg0px`{s?QXN;>G&TYxd`_{2*$wYZ2tIs7rjjpYFbL-5D z{MV;~Q>!ZJ&w9MnTwTeWc%-%Q@U}h`_o9_vMx-!26OHL%@5gX0tmQ_=l_~y+QEOeX zbf)}K<%xA!OJ)Y#;=I^3^=>bNLFcw_FK@i?hVNB+WNlT%{Yk^$>dY82!}Ohl>ENC- z?3wB6zZT5(zHfQCuI$FB$KNK;;tj5T}iJ2_v<>+Qvw_xov7tj-wDOwsLWJYvc2 zr9QEAE7Qk2{q#Gk_?--CTUmYCUCT#W>dlgUa$uPEf}^A647M?Fd?f#E@v+8;dZ$X$ z86h$itd+)w_Fqp#ymcx5U_CK6g0ZVT=|JE|_2#0!+&jCRc6xjoK5A3S`VzINlME8~ z?D1E%oNr1~TWDiyZq+VL9Zs|A=`?)E$sS&|8R>D$OPW09Pp;10{qWMr+cn`knMrN$ z0*{xvhTm~52(pd%dc7pC(<#H{QtPQ}i&YP;`Fg)3sAnEGW-H&D-x-NkyYH;kto3rso;bQzUDMjdC4Howad^+2 zeP}s7ZjP8`-_!o{Z`QS>+C-cAPUA;pKHXsAbS5~a?`KOl^VK)M72Ab_9p`#^?s=s(dW?Ra}DYjtFsekw6*4$Y41`rIb<5{)ETk;-IOI?2d3vae~K%P zc@b?g=s@GrAm+_I(O*7)UAk+)>AmxuKQ}$InG*PH^B%Pd$DQYrgxdc(g)R{naI&CraP_J`}dMev|Cmi-YT;=1e=k%OQN*oD*Xg zrP-bc;stySAImjMDLSY+zx1<=aqe)1y&dVjoK)wzf6!bPq`PStZ=!u@!GJ=!UA=nA zX*{w_y;pijGs&#hm8-HKPj}Rn?;$(wGDjsbmmY~>%H{Cu6Ruo4RQ>G=H+1~gsH2=i z9ib2SS0gG{IZ#8+hii7cp4s!$vVNu3ccyh*U)uMq_V2Xtm}9cT2Oz`mmoD?OMWK$+-1Wm~mlZd879R zxm7O~6r?THZus(aP|f*i&p&m%E;q^x0P)|rjP~Jd6AhbS6*bKp*;B3FhOS|Jdo=p< zvBBC8{XYgQw%TW+<#to?d(WHu4{F_QRZ-o=)|_bMvXFnEtyiA?L{+ah*1y(sW@;oj zyzwvO=$ugwD3}nfHX}M?x!3mTV~u-HVfH$h85)1n_LTe;+mr(XCT@3m#Z3Qb$5&vg zH5`gAnK3GDF(*7vbtR|mNlaCnir$sh^~Pmm#jz z-_i@tdS9sCZdWxuQ-9A3FUPsoeIA2vo(!{DW#t#j9_CN1FG1`dPiNEW$Bwu#ag#p zxQAD5@l4g%x--iv;ksoUduJvV#+~65eSN)RZ_k=jClfP$c9{f5$<7(?BZtjO+8j7C_e%U^0?DHY? z=wq4T@uhG2?32G-mOQ{LbZkqX7cwRu+_rX0md>5HPe#jx!mV_$9B^F>khPP=y9)k>K{-VD3Aa^K@uTP=I;l~URK zpmD{bgF*G%GS-w1cwSffVOm5u_wFH`_*nA_2lXQF>Hf?eZ4Xi|f4N>D&3v6$I4SQ- z(#30yJUg!Y%P&itb8>G#zH(Fg-Mi(V21f0=rk9@XeR0e@+niADn~ZgF>U$O?e2 zo5$546Q2Bo?=JKVi%!c7i?H!NnlCgz+D<5cIkxa$!CcK} z+~MoHKhU)9z1=sqzE`Mu+xDibVNo4(io%L|-)X-y!>W%}Xhz&A!$%t@r@lDesb6ud zeaor|RR^9YGADZ!#b#YfFl~PE^@iRVoma(8W6#;&K0_D9ea?_q;cc4|H*|60&qbcT^&?Bl^EUgG zBKYyj)HAzYDP}DA>gsK_*z9UUq~B(lKD9b$##5ea-miExreVZNxq#inO(tK8oSl@$ zy_ddip#J#Y`N3vMc{aH%78%AqqX(6b+@9e7{-@?0Zd=p96!)C^53|e~vi&OUef{jc zPmixwmLBZqQk^W9kbQRG;2Sp#T|x{D+T)(SFxe97XtQ(MU@5)V*Ir%!YLgUjhFU%A zQrg53g9lSHQVx4g^_|wxkso|%W@(?FdNy|l*6t}SJrqMTShcZVjn0Ia8&~E(eBZa< zYR9982Cc|m(!RZ~LcM!TR8&Fr341N27=s^c@<*0#ip%-}X33JPTUrjq`1l0TZf+_# zf9OnjQ*};4;@v_A-ytg-TXzh6y;JYCMf{Zjs^a^W#?~6gV|OTA`PM-<4U&Hrd|Y=Z z#@@Hp_KTIBT+XUvyHab-o#HNWjuiZ4pMw|(jJ^Y9e2E$g!z^^g0{SW~|4){x!j zr#z+)8T75zIp5sR&CJ(K(dhl4K~kFc9o@4J#f;4AKWhE9zCZ82>)UT!In8+W!or(Y zx^gs^k|jw&{oO{S{w#dGlQ(|r47nBq)dQEflpNcKDNM%N^^rBtT@>Pi41d(#8ufbp z(IJy#9&ZgC*tR8Hmswm<&_}cHxF_<>vNu=6Ut1Cqq@|QwQ2R7DG$_Q}e{R3kClrm^ zIxbxM{=$99r{c9dW4l(vz=*uJmv!FR&EHc0b#%_u`I%-jq>JZBCEHAACB$pQo|wgK z)2}`MX^EL{i1U%V7m~xrr>e{VQ*^u++?})m4y7`&)a(uYSMKb;B`()_t;Ndmd&X}Y zV!gf9*g|K|tCHBhNn7nBG(H75k16PVOGSG_V0Hh-VqZUYb9=+_h2t{k#vP0GPZ*b} z)UN5bG5XfUVWG_8y4lR4FZPFq^RL{Z-WjTYZz)CZ4DG?jNqvm&eCS;4)RESyt|ph* zn5v&M`9q3ga-L#q-l&{@#Wg3>Ycq}Fvv%Bxm{BwC*0-`L1!I@({h<*#!{M29`TU~u z*0IO-S+uLAxNqYfOh0UqW#f8tD$i-+ryz5?6Ka}opE#N(8OBhnr&@iTp837UZmNF! zgUdyJ^%_53eJU?EO&)zLz~@--`7y`WXnPe;$hF{j9<|UPqW;X0Ydo&BYVK=lmfL3k zZSx;%tZ2D$skXCG>%fiC@f{D$8b-z?$%PoJga$ou+g@6=BfxLLwfKaF(vta_eNrD; z9%?Ck!AowQ6{gYIwEMA@@=_Ick%Z z?giC>#<6$iznD}o^pwG~7?+3KW2z35Q~UR6eD-pSVs2ndyw`Kq^z{!fI^3>YaZ2Zz z?btb%^WRM>ijSQ>O1@_6U=vQRG_&aD-I%=@_O4!AwHuQ^-DKQsNqG?I8fa=|o%C+E ztFcG53G=~MW@ls4$2}SDn`0KWsc>48vbUIAFg&S0wqs6+)^O{gZ^k5b&Q4ERz$(tO z&!J}d>ep<0a8)kBx$*e>yA?Wr*X$J#`DBrZv4>&-6ArgImD!tZ>PIbZuWZ>!>-jcvTAay&A=Jd92Qv$n zW{e6B`~0M#@Y{5+ZLR)G=0?W)uxl#qLx&T$3$9k!0eQ%rH z%DZacxhFWWu|3>aA zr%$z9)6ah2fA=gr936hMqt531=Qla>rF3g;@84hEq<9sj9Qi4iajW>pj$g|Cv}3}x z+Q5MOXaCGMW%j|o{+m8Of8=Hzk@w@^BL5L=P3skpBUXL)syW>;TD+0Pr4OS^HSqo{Ot#Fh(r4(CBfK67%eS6Z)>{HN_-HD%-<^?MfLrb$_rn_G~R zd-Tb^_w4LD^Boi@@0LuoF7$C2-!!zX&!T-$AVbcue zjN4I(HoRw%ZzEsJHvRbF=;l-?opL7s=_`-m^w-ZL%|^>L%YNwJ`S|eZh@*$<-fx@n zH7vJq;mot;kC(UAXN{UwdFtD{r3$l*Lg%CzanqdwFRyy8Jtl2Dd!qtpgJS9Rx4|X> zbLLbi)P2c5p}*(%J1Ip|InFk-@b_&SYwXu&`s7{qR(zFKu&yv_OWBL~lU_b1Qcb#+ z<2Lrt=vCPNZF8+_%`nIF!2@!SBuTy235$!%r|7qAPCn;+;&A*8Y9xQM;l3mK6J}@i zJgnlVJWgMGR?8UrIPDvyadNwEjoB4>{9bCmK2tP1xody(mC2SO{XHd%&*up)5UkRa zW?>U@E(q~+(J=J&3DkE9aPs%?3-t9zzRq*-cG1w)5&Tcx)Wgxm-yzV$*T>Su-@{c} z!_dpYEr7}ae}Efxb$x@ar?SDCvZ;b3Lv#k0N9FSv%QWT$I(T_F>G-&Lxln1+8ae?^ zEE67wdhAb<${8ln@qqen1#lST#Rb%h2|4`C8_k#6}dgn-WEveVr zn6IXX4olo|tnK{|?HIfJ8x9)Cy~&~7|DkPG^1b@Y5R2v33VF{Z$|s|xtmNv(so&M=;n~@o%J%pE>Q)<;`XzaO`~m+m`p597(|d>e7aX9r zZm#XmSDB~s=}^urZCb)Dqk5T7TS9hp%sxHInpTnH=K3z;b-)&rQpf1+n}fdFp6Pt6 z`gG=;*r#?yx0=p=u%7T!{ghjT!Plm<_vdEhO{ZSSn?CMBgzliG!xqYIW~qaJro_Aa zjxSyItBe)@^=0wLSHId7uG|l5=(yDwSAKfMor8Toh7K)uxH0nAwVn~%F65HBB4*{D z$mcXHS2_j?9B_gEN*AP5*CD_~@YcI_JrDnYKs|Q{e}PMxI{f*Y3EXC}hjXBNfHfB! zF~kH%v9Lj=F2N@Qd}n~wX)NHNg1-gjUHu2Hk!FK4+PGl7xZty^>jdQ-ux~=}$z(E- zPr>hlx}}$)Zu_@60pCTIR!VCu@LlNRA#g}gge=U(yEA4PtRWp-f-LjKUxHawEwB=} zNN|&dw1&CAual)qptXj%zM;kfm*Bu<*c<&>&VecW`sjFn915G(py%Tmu#^0lq8! zom>JyiJ%$_jkzw)9u9)WEjXQo4fb&HSfEuK`TMT)11-)Hyfg=&06)R6IE85FS%OBr z%EQUU!bn$R4mA+GP;G4uJpd9^5KH`W zjX4s6gzf{vV#4gXkrXDRHDNIksYU$mjqcvTV#2KVm5`m#jX4-h*iA5!!i4Nr#bUx} zxTG*4Jd4GIVOdFGLiU(qFkyG4NC*?U2?mQv&=4g{;^^_#SWJ?qadhhn1`~Fhh=kr9 zx+?{XN%B07o;r=iBzYc3_mW^SNuI~iZ6X*<*i97@@+Wi;2Nsj$c^vI8V=+md$I)Y) zF_=VWA4sm_i=NSp#UyziM-O1eVv;jKl6!aPiN+W^iH-q~ASPmeT$JNqf|!W?aZv()31TAl$3@xsC5H(;MHu5x zqSX5m#6*m$i1O-75EC3%!5p_Mp$-%sJ>D1d_UK50e#s7}=;-OaI8>4+b2@rBFAkOD zyXAEB>|HD>QJ#9qIXfLaS{H{(@?CQ}dU7rfmE=3;bo5|cEGkj@cnQ8iBut0xyU?-6 z%u0%{kVG_iRFdza)6r9Fv2Re6(OrUX5IvL@hf4C@bUJ$GEDqH_!0qUfvN%-#0JkF{ zJDj(RlBP@0uaLAYcvSxYw<9O4;!*tr+>RdSiuEf|{&ESvLG-j%94g5NU+Cy$G!E52 z!0qTPG!~U8owx+wAbON44%I)v?dVCVI8^@twc|^eglj zQyi*)fZLICL2+*oEbfx@q#qqUgcNHlQD$ukzCrYiQ5>p&fZNd{L~*G80d7Z62*sij zCA^m48$=HP#i9BKxE(#;6Nl;_;C3Xihhqj&9%{+GK?YclhhJyLz#fGup|LXqJ$e)K z1{p|3Ystp?80fj0I8>77b_RNACKeU;gi1-hLG(mS94g6kI|DuL5{F9i+|EGHvc#go z9vmsDH;5ioi9;oMZf78oJuHhdM9GvT$f68^7vWJ!zTeJ3&yU2uK~WlH3EB#*{wCN; zl;c>Es02L(+#3`nFqR}LffwOWiLw(*5EXhvBi64(sfQ(ria3+X5akt?Bq~7<0rv() zd4DB{3f;kgeS@NnPm)9>=po>3CHa0k1Klu!v(-Pq?dZXUSX9_k0wvXFKu!U~y+P~| zfD%ImaXYe7O~RuIB(jII754BxN!bcLED-Bg$Z9nS+lmWuJF;F)(x@PA$DXMtsW*t8 z;fH;LynmqIjvmm5L-h}EI}hr&gZ+<^j@B~JqxG=1!k%L%DZfI_sl)LV_K-S>p@O)b z{}04ukjNg^R!r>iauTu?6Ftrj<5x`V*>Ms>MdWrSvU*Lj+|EP~qQlziAJ|;Vgm#xQ zk=1JwwiSAW9L`pf=XNHv!<31vUX!q`pgpF{e_)d-6Fu1t`v$Scs7Z<$kjNg6MUmBO zl7554m<;n@*lG&zHN~E8CaE_F?>0qNuSxg~LismCbaXEiL73ew5^E!c94ij;v+96wEL9#4{SeWLijRDic|~CTUc}m<;nD*o4Z2cA+wn)oT*A6|@hPiL73eG%ASO|ADQjOlU7E z^B>rZ%0wc2xE}I9upO1jgSh=4*pMp7Y=)m*V*UeLQkl@6ROUahDV2#t_Hcgn4{S?i zB9T2TDi*SOO|pE&g7&4d{(*g|ENEXU3t7D;XmS&c%7XT#vi^a6sVrz;DhpY?CgC>-?Mr3-1N%~0(7sd_ zvU*Lzwu1JhvY3*Ox3i#qsVt`C=TceFzEsvfurF1R`4-<7Ww9i`cZmhD!x=S z3j-BRdq73`9OOnu?m`)hic$m>ZIe{A5l~UDqoS{wirQ5geGn?TqZMRJMt%j|vk7t| zBj3!^(cM8*bUO|e-TxwuZa4u?glzEu zpNP|ePej7OCnB0uI=WOJe4>cx=yG(BmK?=|CR4Bwq$x)*($Qtbz(jw34hzW@0>bW)y*Zg8;xpMTyXI@L>Mpp&cM6a75~I)Mn@ zI*N~hPLhG|=Q&@@QJ<;lw>Ugq@?G&666=BvuT)_SQg?IShmx10LKLQx`A}$ zZb6A&ti7(r8?;ON_$@pTJZckinhH&_?p7JFj$d5njjAL$Be0Ymw{0 zbu4f{hx6w@a6g9${sZOU5*kzRX#hS2zlBr@fa`+;2qx)IXV5^~rZITnLhy6yG{!_4 zZK4{+Cc*wLuF@buJ%=t0e(KNvs2nyMG(lG?TE^sZ1%?*^f2@=Zq0wTthF?lS3y5P!aG`@gv@MSR z!Id$2TmiY`%4l?^pdk^;Sc1lkD+BR70WsssXrSp3ucHAyA=-e+rwK@va4$zdpoB7> zfV6OBAYvyVL|hs0SEvr)yaJ)XUB~1jWQHpP*9rOugfe8WA+8L>#1$4yuC&G>Ui~h^q;N ziEu9;s)Gjf3ci;i=w;#WWzwNK0Q*4C0U~FD4in*CCR7J#<k(vH7sAKpz-tgMy9((e@lV#N(hFCKxy2-w}r{=nNCen2-*#01v_UGU4ao zupx~H@hXyf*@8|jpgAH~E@(zX+k+Me*UM!H24Q&2Tx0}~D+A4(q+S-u?{T2^2D;H?*9itwxH@QD9@KAQ zfxbNaygWe%op3KW{fkfry6teiJd*w-=))0h&jL3?^#T(>?E^p|wl^?LAgLD_+~6_u z*n$Bju8akmAY3mSYBMw*S1@QH+{+`WmkYHS8aSSeXnPhL^lAvd2d;zK42@5NG!H~~ z;ClIVsLjy$48ed8kD1Mc>*a${2jMyz=tdB24`haUy+R2!F5oZp#yn?_=-=$S0I2u zY%^d)A_zRwHXl37X-X<^|EOse8r|iZHCTf z3MLr{_mc1xn+dfUkb6_G;)ZZ9316|KK5CE+XZdWp|Q=P(2lE%cLz-s0*2k|LNe z!IgnmPEs!kmvFd(AUxq-629VqU@74`ItYUhZO;Y|2V;iEjdU(D*+i(9gs-@u7Gk|z zWFHUVUXuP57n!iYT?eM71d~R%GSKp2J97mQYW#IzbS{{PAlwV{5S`0~+6*1Usfl)B zGf4OfG=0dgz>uA2do~E05q}Rvq@gxL=P{r-7YJLx&kKh8kexwD6|R?ruXxBQusFMb zE(l};HiLw(KsXkz7xpzkLnO}&9**!nK=c68K`_*SpBGGWK=txL(1`GRAbJ4T%ZK~u zU!4v zi2|vG-Vx{!5$gamC(!s3OxnTEOXol_8IUCpnhyZcHIjOHf{AIIO&N4P6#oOKRm1fP z9FTY~h@TSM2OGpONv;!2u;c3mYXYG6%ZB+1EG>X|oB`wg7OV4e;6^oneFUF60|vS_3KroCv*NFug!>9W-`kfY$*%FPDU` zSV#^)yibAvDqJrKUx8pLL0U^|0x z3RExXv_j9zBjGDBPJx~mbP%BD<&p3e5Ola+Hq2K%621Z)2G`4h`3ej_;5tCCp2#_D z9?Vxf624;dV7>z56!>{bY|5d*d7(~=~H1J)IZD^p!6sKMux literal 0 HcmV?d00001 diff --git a/tests/tests.py b/tests/tests.py index 044c3e7..7859587 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -8,6 +8,7 @@ import sys import pdfquery from pdfquery.cache import FileCache +import tempfile from .utils import BaseTestCase @@ -157,3 +158,34 @@ def test_annot_dereferencing(self): pdf.load() pdf = pdfquery.PDFQuery("tests/samples/bug42.pdf") pdf.load() + +class TestPageRange(BaseTestCase): + """ + Test various page numbers + """ + + @classmethod + def setUpClass(cls): + cache_dir = "{}/".format(tempfile.gettempdir()) + print(cache_dir) + cls.pdf = pdfquery.PDFQuery("tests/samples/bug67.pdf", parse_tree_cacher=FileCache(cache_dir)) + + def test_page_int(self): + self.pdf.load(3) + self.assertEqual(len(self.pdf.pq('LTPage')), 1) + self.pdf.load(0, 10, 25, 49) + self.assertEqual(len(self.pdf.pq('LTPage')), 4) + + def test_page_array(self): + self.pdf.load([0, 7, 11]) + self.assertEqual(len(self.pdf.pq('LTPage')), 3) + self.pdf.load([10], [0, 12], [30, 40]) + self.assertEqual(len(self.pdf.pq('LTPage')), 5) + + def test_page_mixed(self): + self.pdf.load([0, 7, 11], [0, 44], 1) + self.assertEqual(len(self.pdf.pq('LTPage')), 6) + + def test_page_range(self): + self.pdf.load(range(0, 150)) + self.assertEqual(len(self.pdf.pq('LTPage')), 150) From 4aba5f5ccdcde26f05259510a986727676dfbb92 Mon Sep 17 00:00:00 2001 From: Christoph Kisfeld Date: Sun, 28 Oct 2018 02:39:04 +0200 Subject: [PATCH 2/6] Add Python 3.7 to .travis.yml --- .travis.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 85e0d6b..d52ecd6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,13 @@ language: python -python: - - "2.6" - - "2.7" - - "3.3" - - "3.4" - - "3.5" - - "3.6" +matrix: + include: + - python: 2.7 + - python: 3.4 + - python: 3.5 + - python: 3.6 + - python: 3.7 + dist: xenial + sudo: true env: CFLAGS="-O0" cache: From 6964f0695b4db22ade35e563da9fc94949371b6f Mon Sep 17 00:00:00 2001 From: Christoph Kisfeld Date: Sun, 28 Oct 2018 02:46:47 +0200 Subject: [PATCH 3/6] Remove a print statement --- tests/tests.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index 7859587..1c4c5bb 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -161,13 +161,12 @@ def test_annot_dereferencing(self): class TestPageRange(BaseTestCase): """ - Test various page numbers + Test various page number parameters """ @classmethod def setUpClass(cls): cache_dir = "{}/".format(tempfile.gettempdir()) - print(cache_dir) cls.pdf = pdfquery.PDFQuery("tests/samples/bug67.pdf", parse_tree_cacher=FileCache(cache_dir)) def test_page_int(self): From 8d842747a3c8471b2e2c11a27fe104c14695aef4 Mon Sep 17 00:00:00 2001 From: Christoph Kisfeld Date: Sun, 28 Oct 2018 02:19:08 +0100 Subject: [PATCH 4/6] Fix lxml and add Python 3.5-3.7 for appveyor --- appveyor.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index df3faf9..4f542d7 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,12 +1,14 @@ environment: + global: + PIP_ONLY_BINARY: "lxml" matrix: # http://www.appveyor.com/docs/installed-software#python - PYTHON: "C:\\Python27" - PYTHON: "C:\\Python33" - PYTHON: "C:\\Python34" - # Appveyor does not currently find the wheels for lxml, and cannot build lxml from source. Disable these for now. - # - PYTHON: "C:\\Python35" - # - PYTHON: "C:\\Python36" + - PYTHON: "C:\\Python35" + - PYTHON: "C:\\Python36" + - PYTHON: "C:\\Python37" build: off From f48f7fd7cdfc1da25318efaac795a3754fe335a1 Mon Sep 17 00:00:00 2001 From: Christoph Kisfeld Date: Sun, 28 Oct 2018 02:24:23 +0100 Subject: [PATCH 5/6] PIP_ONLY_BINARY didn't fix appveyor build --- appveyor.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 4f542d7..ce6fc56 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,4 @@ environment: - global: - PIP_ONLY_BINARY: "lxml" matrix: # http://www.appveyor.com/docs/installed-software#python - PYTHON: "C:\\Python27" @@ -12,5 +10,8 @@ environment: build: off +install: + - "%PYTHON%\\python.exe -m pip install lxml" + test_script: - "%PYTHON%\\python.exe setup.py test" \ No newline at end of file From 3354e187d585e4f708c37746e7009c775246602f Mon Sep 17 00:00:00 2001 From: Christoph Kisfeld Date: Sun, 28 Oct 2018 02:29:03 +0100 Subject: [PATCH 6/6] Revert install step --- appveyor.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index ce6fc56..095774d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -10,8 +10,5 @@ environment: build: off -install: - - "%PYTHON%\\python.exe -m pip install lxml" - test_script: - "%PYTHON%\\python.exe setup.py test" \ No newline at end of file