From 2d09ab55fdf6bd112acfb2bd6df532364f1d9d65 Mon Sep 17 00:00:00 2001 From: "Andrey Cunh@" Date: Mon, 2 Mar 2026 16:02:01 -0300 Subject: [PATCH] att --- .../debit_notes_list_test.cpython-313.pyc | Bin 0 -> 6485 bytes .../debit_notes_pdf_reader.cpython-313.pyc | Bin 28577 -> 40092 bytes debit_notes_pdf_reader.py | 526 +++++++++++++----- 3 files changed, 377 insertions(+), 149 deletions(-) create mode 100644 __pycache__/debit_notes_list_test.cpython-313.pyc diff --git a/__pycache__/debit_notes_list_test.cpython-313.pyc b/__pycache__/debit_notes_list_test.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2d41178fc3140472d76bde3cc3feab2b2383faa GIT binary patch literal 6485 zcmc&YYfKy2xij|I<2UAMlkgZ4@-W1ZgpdTXka`!AB>|H4#B{-A-HAPrVa<&9jLBnG zinu?jCF+lW+6u24?Xp#`%1V_=)k-&4t#nBr>Pqd8aUIp}6tydLrE33?CaTnb{mz)N zoe0uO_eyu9Ij`?K-}zqWJLfyk+-?T}-$%dwPvKWyg7^#W7@vKo@$4ZqzD=+MOI{`r zu1%N8OD1HxWJcyo6rz|S%z`WcGhen|q7i+`hHNlSAvB*slUoZ`v~zZw2#6C_q>gbYLd@@I&~lB|g}vlMITvWTV#5ymrL`l6IE z5HYb2=8>8jgpvn*cHw;?_mlP^eQKvB+p`tFM-Mb^l(VYPf`Vp#~cmdn9 zCpJurP`gRM+In|%AX+MDS^mY!7FfC3SdFzCG z>F!jL6QJGE-Ju*d><;5#Q9k{$$>Y*|*A1i``SJu+a|9N6h^@Ek0qnW3%KUI>TA-*|I`9cE*r zBkZ+UBs{8l5eE((n~_iq$fp-%jlx>f9D*!}Fe1izjl$jzoZyY+3NMJ5w_$`N6tE)L zlLW#(g|JC95^2;8DRCFoLQlp%&*-1#uiSo^UJUkOx+`^8tI~UANe~0Gupi19C>Dv& z8=F6F{jfFH*ioqIE!6ZEYF{ff9DM4uR#_J>ZhHv3_kr`i^GV&24d3R0C8uf|UJMl| zOP*@TQVmZif5GW`aQgn~Z=78m+M=tM+aFSSx;aZX7piKPoG*_&xO4wb#+s{YU81&} z^~;HeLwRRg*4b95^(|fAax|#Ek(}d_N?m#eyb*C+7#~E?;s*DFI=)Rrjd3De83uMn z4?sL+O?{+Dz~;t*@1?J}`B`h988+?i7<3e?SN~WWYiAvO5R3P~byncISobS&Jr%fB ztoN0;)ocx0%hv61`Kr)DM)k7>vl}i2mLm7KEJtJ7|@CG0cW<(h3Us1A@ufJ=bI;QaC#kO~@P z*k7J&v6bQ_U1C&Y^+eO zjnH2y+1NRPsE}xPt^6<76HH@bQFhrRS`6MrI@)UR#x_RVjCO=FFgygi501uNbPSOF z6*K7aj2^4e2LGoGNjVUmZ3-VW)+pBz8g44h6k*H~Z53R#L~I^nw{_U&{%^q@1gy)p zULjeZhv+dI6kuI4++JS6Xq9*&31YTV=VbV>fm5yxYT-b6%xD>QWEuS?^c4K7Bx#}% z>{-@p+g|$XDeWJ)IT5T&w8QU+(6CeBk|#Q&oyKYrQ^dSG6&O8cV3lha?JD<-R>av& zvIqLCqATLsb>|&4B!1bay9v=9Ek%GmX1DJ(n{^ow(ew64K{v=|k2or9>Nemy%u%7c zLlGzV+2M!>$|JkY0IOGc+t^1%AFrTrba%Z0y+8%M3h!k*B6fgwMjSx9bRohP0!q7h zNrXNJ-o%}F!RE(y#|65GZN+=j%AlW{}8C`wF9Lgnl2 zNP;<^XI&SGn?r z1gW0Hz{MKFFlpu}f2W_(!2w2EU_zs#Oo~GsbBjZ8t7g(t0<$bJ@s!RWmq46kOu)DK z1V6(qB;o3OlTW4;Iv9+nf6@^oJS5?U&K7w-Cp zAfO@~2^4EED8DPD5FA2;QjMFKT}WmHk&AN@hNroA`7hx>061>G2nV^b%7c=aysJ6n zw1oIr8ZGe3(NKyPK^EO0CHc#;@xW!AM~DNdtXv!`QRnD_Mq$}C0y1DO1sdcPCMt+C z()F_Kipeq)90YwqB0Zp3bpOBg z%W`8S=vs-%(ma=t29)a3?DEi#oFxVsrIQgkiQ&K*`8dC@OD0(KQ(bVy*|oCTOgi6A&|33L*QfLF8X96_p!c`B#Cnx`~Xjv?;0M&7A# zD;bIZfE40y3H)u%Ui1L)6wPJ0P7L=bjHprgu?n(pDK2Xk9aSR}GR`)O zsdc$T_!6j5^YGGvCZUPD@-RFTEfQPq`n{r+it; zSFqI=yuO0_oo$-1_@5G##k%b%K{NIrm^UJ7D6V4UM1gV?XJu^PlQ%-@YghE~t1phf zt`1z)$KNWCyVsLy=v{p@S#Vb8olRM1Q_k74IP`_5A@6DVou?(|VHPhI{LPCOpHTH% z&Z=el8v_8YZ(sXruD)k+{0~f5uI`=hdNL<|a&UwDFaAgTqYFQt&z~OIM7fifej8WA z6Z!CTHaz_sDSPtz>RX?97RS`OcecC@kFV!^%FnNFl20Zk)v0KHDwds!sb3RvQ*-&L zlq#lj6Yu3GZf7TM=O^yw{O2B;Gog=%KO9~QOxJ!YG5Uh8Ci?3q0D%$rgMqjvbj}n$NHJ|z|Z2FfuD+>q1@nGKf0iP zbyDq_Qa#Zpw&~Al=Lh#bxL2@wzMvgTrydO6AI#DF@-&mBnH+s!jnC1Yf2#6txvJH= z&}RFuJ05pzHmNn^Ph3|DuA0y5>X+;VpMS~q$GYaFp{?2@1%GRyzOm4-Z*^{E4qB^| zE0admdN4Ed)J@lUmWH;yM4f;2?8@29m0azyd@bk$rq}s0)W^0DZED+zod0Cre`bTr z)ty}$F1YHmu9l^dg3JH#jWy~!BU#s>rBR)y_O)wk7uJt#9NO?ba{RJqGyRLc%}(I& z_pU2X_Y?Ik&szw)r{HN@YyW=7Cmm}|>(QUpKAQY#i+c7=Q0NHRRA@SsIj0^z^T@Sv z4^-;DNPb;eFYQterjmx??v|U5EybsRa zKf8P-=WNeAJJwC`+T^NQrdDk$HnlO3^LFID-RoS=b$oGH_m{fmYpYW$Q_G`k&h?=m zj(<9?cAk558mtv~9N63syp56Gg12tfzG7c`KXYX5(3)@E@spm7^pE>CIXXe*KKOFvacq5oQ_7$}=1QcH<8w-v5KCb_;KJ$LA zu~YRQTOWUP;(vPoyLYoCckZg%{}xszOa2|};BRIi{ExqdOhkQ*{DcnE75~vN2>aoxE+PsulNb-Xy z24j#X5%LTaAQnmfk*NAJaUe?^_zO|>S7PAv22Levx2@IWWis=2zB!m}4sH{;w{fjl zKT2$#`poudaxE!2bK@T<+t)8_n+a;4&c{Ci%vh^t literal 0 HcmV?d00001 diff --git a/__pycache__/debit_notes_pdf_reader.cpython-313.pyc b/__pycache__/debit_notes_pdf_reader.cpython-313.pyc index 85a36a2f3a25326a36e4cd91269875ee565b0d83..f073ef220fc8630e47d481045c57d101da0b08bf 100644 GIT binary patch delta 17653 zcmdse3shX!b?CkG|9_YdGcdpaGr;iwe}xc;k3M_^mn$KIG#X@2U3-7Yynqyj4drZgbj_FzbF#~H5^2y^y)|id8 z$>V8k8YO4Vl!CQTO4drL*mO$G+H6D~L1{7wN;_t+6Jz@X!Ddjpu}n%o29O!k){3N% zSu3Fo8N>w%>ll*@m5fj+txhD=AnM{j1e+z4nV`&^C}Xp!M#?glO8)h)!jP7p#a{N+L2d_f?YwK~cr;1=xRhTTG0VXcfR5Xy?E z!m&bVBNJp+B;rsWVl zWPfmyXcgxWlccBlb=0VS4;j<5{)qtV9}jthp$R|BLY8vE7xIly`+`AuQDnlKKNOhp zdqgpL#{(gM@aRBHL50`=Gj(G`>>x!8N4b9zSK238A2WVF5cCHtrUSuHMe>~sNsf|s z?r$VCsprg6SGE?~W)axoFNmCfgosJNbcFmHC#9<-na=%-tiWc4oS4RY;r@_!&iBCd ztZ#xXhO8{ERsI3e!hKo3Ko)QfiW1VsjVnIiQI6;m>?~UWN$9a8=%1csE8!)Gw&ZlO z0!is5-*jLiW*DBm&(9q8ji2|I3ppj+kg_H$^h1vX$k(&+N6Q-EhczNdLtsLHt+8qV zF>x@&#`IpVk6~s*zEEJ633|Ok7h>W76Jl!+vmSvN0g{e0i=AOh6QBSR!Fqy|sG4L# zA4-8kJ2$2pCo8xO6FVsDR1P9u~wRipvo@ z>>-721W9aZw~YWJ3^3^OZX0_Uo!ADB<07$b$>GM9Bp+7^!vnGq_-_|zc0Vg(ZIJ5s zX!fhaU|b(!3f6yd-X9DFV{*zL41yN0hXFr6`c0TRh2S0pchiGbEI?4fxr}vj4QRdL zHI9&#SkWV9y@+uyfEal`h|39xG0vaDd*1H@`wg;FkbQ*vaoT}6fwmhS({rM1RO9Yv zNue^^2rtiqH7BYZ;eKi9Ast+^)me>0$7Uip1|TLGpPlf>2v1r}3g$iKXZtZ%MGX%O zbx~e=sF$7M9<$m?1f~S`aKvKL;I!XA7Zc9}81@8ah=cwR>w*;9#C^w_F@nk(XJ;6T z<-SnJKQkAKiD&0SXtaJNCJ8_mI}61@nBtIC;feEWL={QSFy6pKkPQP8eWN|yMwSRF zFCCwQ{PEpY7YYeN34i$h=6?tHDOkA~G1qI#BqiJ{wo(HnqQp%iNcDq%ys%V$^1meO7j;yu-Kf>E3Xi+ib;e3TAy(&T1Nzn4463nYl z$?N-Lf>0A!<^l!mX9XQ*13>%r0|3|y2&Mrn9N?}v?3}}@4#yWD=ftctIO!}3`5By! zFZrA-PH^lDfMJ%wWKzW!YKQ#6x!GXI=L|5A^0U4PpVL3%thiLQw}#!hrh#G19i3q3 zfK*H#^vxXxGx1p2@I|C8*odinsJ;L2k)ssYN$*i=IBqOh97GU8u-|IfdCUU=5rWMK z<`Po{f&m6P$Bg@95?ohfl8L~0h#f>U7!EKf1eXy!K!91X3y}1vh2!%PqRiVJjlo-(_H6ZnHDq;3WGoC`U;KAA~_&B%%tgJmHfrX`UF3Ca-dDD^1ItpLT62b2gPZ&m3C~ec|%6m)B*HinhNsZ@IcRUEP58gyM?gsx@M8J|g|0 zE`!e;jOd1V#n5Y(^u@#5YTcuCi=D3+EK3qdFNGez{K)02vWPxsOYdH(j;KouK9|4h zf37&9u6L9D-T+TaFwxs_5(J z7_p7qTaNDtY88{9md5m*T^&b=|?k7_^mZcUd!Gwj%CsymK7un%Xls zVc~l}auE3bQzR=G4TULT6h4BfB?M?vF=f}~asM1TbAqw$GpT0~>$3oy?4M!sI)Y~r zB<(CF4uPa;v?zxlC4(TPUEaO(>@@p%sPMv#HTTC!@>y`m76*GE1D1K5KjO=#e0-L{ zNenWDAxbu}fAvpGh}1hbG3h;rPCENVAo62QUsMCqKV0-}k`m@BQsq&`RDd-%JwI~} z{C%89Vv^Z8Kf}r}DaEl1#x$rVKG+oq`hrtROmblsR&$h@N6P*W_+kH)n<#eeGMB)| z4hVJr1~7icJyX2O4Hj8Nr6gBjuy!)!C@hBYhb9x#zX&GzQPg(fp@~F3BZ9p({7YV? z8dr=|k;KI3<|kQHu9#?=iAeGG|-md(7DTY0S! zP49Q}TCaAzXnWrFV#cHQEDb$A^2o?1?%B+1UF_mDy-~G(*|OBaORR6n2~~bJ(EQ(j zujvh@Lu-Ge=^(_jnMZnOVmFfcG5BegiT^%d`;lOzR_sXo0nP7G0y)_0(ShP?5kvSLvfb)CiOMwmJR`a+m^WJO~WB_0>- zQ3SisLHhDQa3U}j2nqX`n27bqWI@=7jGvE5r`Xx~Igc#P2#=(Y8*qaIQ|Ci5l8KQs zLA01uuhhoOL(gZuHt5;7f7%ycG6i#Z8Bi@GcfE4mX+dJBBjd1sCLf%hQPbZ1$&GN8 zo8-QI*2&$hE|4)&@aAOPYIQCt=YC%0;(lJOmS1L+v}{O;9urqp(;}jkpx}l1%7o=m zigHOJ?IilZ$kCAke^-TcMU)aKIl1kcLX(UKOPpG73}+>#W>shm_or+IBH` zblhXLWuzuqOG9h!RtqrJ`Ynliq>5Xra|>@@s?$qB;It@N4))0Xq|PIhX4DJ6<^hrd zH#bSx2o3fWjVib4{e_d?^A>a}4NEgMBmj$_Iaf*6IFApTI=3<7=Oy508#2Bj-k zfv>6WUoae=4#2$YZypO43wi@bA)w&Cso;X`aQ}&8kS!FAR+crDkCly8oGJ0l!vsXY zx_sm9`TsL>yONw49i%b6~i=x0n4Z%>Et%)N0lV0CrsSGqAD? zbCGh!n;GzpvXTH2nKz#G5L%)1XsHHO2!kupQs6iFt#<64N zKHmC}hLSM~_)})1hWHLyxblNw3>`M^P?=Ir^pddE+0EA`^DUUKqzMzzqdEesJ_BkE z%076|>`aV51J-~$yFQrcC>5>N5j7%46DK8A)0#RFH`Iglyo&eR{DciOk;*N!)GRv-Xes9=FngKieH=$A2a6 z_Klb}CzGUZyR+%vt66WK ze*9KD-zSMZy%!BA_gT3uG2gj!eqSVUH#@hdj(d{Zf!E_U?sopy;%(n3{#GkbPvt9V z<8iL`ul%igTA!Tr!C~)F_AS9_=-!I1r%2q&B-TMjN6V<(GVqibS)5*G9;}S8THpg4 z-=tTXSUJdiSa)r%3WCa|Tur#MT_lKksK6K)V~S&IjJpZx?nG0GUnY&-E#tm*kJ8DQ z=(KatQ`#i>Hcimmy))_rk-|h}+K}iQV~+RtR1vL9;m@4JOdHJ-m;gYV61jD|8+j+3 z+wSr$@B2`E1Ja}FJGpzND;dgxjlAQ=1D#ru^l%^T>Iubk6#NO#!ZuP?r%+B%6fJxYizKP7h!(yDd0)c@MGLQT?w(7= zl$yIeda|d1`+AQfj`Tf5+D9IXztv-p7k?Xz<4x?#{|e+kFQBlnAMB~OqY^v|do4Kf zW<%`v;f=-J6T8hd9C=*!93}_3Zys@SZyYh7*xO2hQWL@nKuqZuHsN?~@G@q89Y9Pr zK07lL2*DTA>DizkzSPZ5PoMLR-{-Ng{|Au-p{Ysk#L>(=(|KPIcQ*+Q@q*69G?9 zTjmoFU3rN6!0IrcUbUq8A=h>6Q1}OW^S0e_m0qcOa^$J3W!Vo*1z$K2HK#vu{K|1? z!S1-NA)x%&Q~GUN=GDWW>3Oner7z;B-f}dqor~C-*7Mh^dE3D)Tla=*594^mQMu)) zU+ak28rCK2Cf?S%W$W57Z`<4}Jg&>=EgWII6ZB4QoRYTjRQE zJ#UZ5T|44hui>fjJjp&D+mh_;)$D?_+Gk z?>)Hx+R)v>+)e>uvHhF1HvI2X+xNlQos8XWP;v}mVN}e8DK;mwJK&p;JqtDAcV%|T z{)ZEW_#dHndoD_5V1tOHL?zHuo;(!grm@{@>=* zAeyNumwcDHdN+;I^` z^AqYTYMxW~o)#?+audA^3ZIlP+jcr-+>d(QqEmA27gUbqo!h-~uClMPrV>`^4@su2 z#PgyHHPf}kPpzuyD)CR9n(0#UPfMkku64{9#6L5LXH3M;OnEbPH$K_-8VYJD`1m_?2S{(~XAkxtFt3`s20m~3phWyPaX?onD5PNSij=0Ai$84wqpMiXU#O{vOY zQweF<#o>nx*jkwrn@ArV4W#gwjto#XIt`$mHUrF{31CT)VA4qpnBM~*Fb(^;r!%+@ zcAB|AIjn*cV?!8ZokeFQTE-*G9`V2fvzaYX%1{2p+b_YJm{@E zAplAV92$|gbHrs4$OAF1S#9gxbslhi&+@7t!eMK&v1>1R$ zEt3bIE^>zo6N23PP0_AA96J$}%k7-XZ@`aB!b9iM(xkNWfvOAC*NudG$RlG&`c4u~ zDmNjc#4l4Y3hKE_aJQX2)M0gg0tEuHAfQcK_-;hk-4Mk^bUyYsOcx|5FdkZ*&^2EP z3~~XTj~~8#pg*+dT}S8nbdeCy4ABy_dsidfDROty1u(<~HR1$)__Fyw1%pIo6&_kn zhZVGhE`+9vl9pRMT9JZKoD^QkXjL-5Btdi@jDjg;%0_DvWmF-2Ei8NY*FvV8E>E%t zW1IWDGFAlNztaNxI&gCCOd8Q zy9>(`h2a`nO;-!^3Vb@FYlM`Bw(aKGX!~xSJ!On51hcOc%)W}QhVibZYXH{JHh{IX z9Ym)|u;3)RXpoeoe-i|!zc z(;!ZWhIi3u6uK7gP7_YjsS`78qRHkjLwDdba5TKPW;F|3wa_FqcA!SO*MB%JPp3u6 zDtFds#g0)2_th}c$?z7R9Jn^nX+nKiu?NIGl0G{$Ezk+$cv5Xb(^Z1_4hiB*SsMZU z{O);N@@uBA0|+!FbYT?jRe0#+%$n#vQ=1~4_7okf6)>=U5Kl6A6h^BSCO_yKQZCP*nPNCoL@qIku~otY*ByL7JCnA=^PI#EhK*EcPsbm@>r92SZ*z zb4iGI7a~d_Jfa=TNMDScj7bllfTP@?1ajb@bP>+dE_q8-pMbeJSPyx>=KkE9chbG$ zTkVQysv)jaZ&)5&x_?P0#9gw0wi`64Bq8$0I+~0WKLbI&f$@Nkfe2wCKxjehOcoxU zXk{fN_X}^JdO_kTFE3}G!XD`+b`O$+!x$d1U>XfDle1^`)mt!fi}yBLO`{k-cSlhM z_x!z0Mltkc_v{vXCnY(xuR6Rh1PI(^0#}_kO9AA=aDNJ1Mlcz0uNZI|X%da`67eZ1 zEPR@{kxY>dVz|#EiYdAwQm~_A_Hw^)y<~SI!ovEvF9IvR6-l4*1(@;!)3f1m-*oU` z`GF}v19v~zgCul8IKg}!g3kED{ui3phgMZ<=Uysme4+V&3B&QBG&8xN8NJ`fGG&v{ z<{9UL#94OG2_cGmEJ{)A($U%bpj18zVTK?sR)iA3^WBBC(cr~tl#*um$`D9?LBY-Y z3X4omADr6HPx~fjA$rOSq7Sw%+>wdrtlYPKg{5jI#7B-#&j%)a(2i4xUIFaUU}$#k zjzO?)nv zJ04N9r3mhaS5E_1Hr{Y=Hg){y32)a3gsyk>4|jR{x~RSZ@6o>Yp057(zOI_r4xsQ)mmB-Rhg%XnU zP6nnS_EZ*_xC}L7y5xX>c;Rl#quj>$nQFlt6N7?idg0i8ca$K5g#|di<7JDnD>lwE z5mY-d`B^Nv&Rw6#bqN8ipTiESFz(VTL~6#gc+8*d61O?wZh-4>po2n4GX&1!34c7T zn`Z(S_i$qp{I~=b1O|kj6np~6hwmDurbF=yD-tw07Q+`G!b$84uyp@=k$6pGT^KO6<&lSXL}Fw1wj*ana$Rvv!F!q_ z&gLy=+qy5}IJDfgBO}^6$qnCM2UaYr=4*~Max=GTZQ{Gm=9|OI+E-1^ZL>X^5iVP0 z__9{Mwg2Y)&8Z!NJV|<@p88e&+NJf0^^-ed(%wb>vsi4g-c}Ko4Bk<(rsFHx__h;p z^MF4A7YoktXV39x#`%niTW0@jrgR~Me;{Hi-7;0LiIzHFHKo67u)L;8TlPJ2e5pH{ zmAf+hiXRS-L{6N(`5=GK=#ErmKLe~vfK?5!8qIhYt+t!3H!Jwl_wb|V z`FjFD@&cK+o#$Sec-pg)x!R5tDiQ>Y)~MC9S`MQEc+Hj3%KFvIYvGNDH)eVI%&oIN z{@i5b>=Yk9bDlpR05MzcBX5hv##Hl4Jl#GfN^oOtRJ{(i)0Qh^}beKxpWj&N#oc?68NoZ@H6z2^h)hh zqY=AjS+<>%w^DVz?pobyRwTb6yp`X&ULDCfxRuknnbR4`>0Z`sXE|4l*Dcp9D-T9; zYqoNm*4Ri^^H$cO&8$O_td3<>w4i#cpnkKUe(h4E;2>|vf7MjAHo2jR)c5j3{;i?= z_`rSq+(KkH5lAC4^Z+5}(T8=P71s;8neCJF9xFY5ja8t7ln-nyk96yQW)xDB?V@+p?8=X#HX&s~zNO$Xz+TX(;Cnxlu#j%7slsC2z=!8nRbB zn}(8QUDS}X(!6OXTh?zIEKzIGYBpcgxUSf+ZYXYQ`IBCrz893uN9ITK%T`MH^7f5# zzI_m~hR~=R`JyIJQA^8hu>|x?Ljbkha73RUHQ*o_pbgh+g+;u<^+Sy@n&w_%cz4~} z$ok~^$i}IgUcUDXP&!LGqK;BNvuyP&f8Ye)GRO}O@zj0%&@||OhCJ#^ugO zdY3x4Q%H?sM}fvs(j9fz@UGgmp7n6odIQ$%+^wk~3|8J=1~-3zMc!7j+RvA?@GZkU z9pHyA@E0EBtqVvZ$BZL!7UwndjLqWJIo^IVVIG5#DFK)vfuCVHps;gChsD0I7duPWE zy?E!gld$E!Nsz|us3VK(!U4*D=k^G>o166tST5vcy>t6SBCjrK->bE29B=Q9+ccFQJu6(Z`HM1P8y{z z8Kv;@l3jDsBYmleL|Cdi*&=_bnM8O{b;>Q@QW#G;eyWPiC;U4UM}If6exAh?r)V~Jb;y&K)&l)^qf zeua4oOK|@tT(yn6oFBti!7D}gGA7--dnDYLVSCW+0)I0ehauee5W+O#u^D^br{Jcd zcND)`jh_SJTRAHM-c(F+U}kn=e%gPKT>u>TPXtY{Re`Z1$yW&ND}?$LLJ9EuM8Wro z-0u@vKOmgkKTa3PqE;VIn7Fi=PgP`-`jyLDg$xTet3jGJ+?V~ z<~Ek?bih@OnSXFf$@DeL4go*l;+QrjZq_^>*vQ^&IdL2Fb`0Fdn4{STN!it!9oPWN zR%%zptJJFE+QHlSy3+xdiMTfylZz3Prkkc6xNl6F_(7UKd72+R%bz^EIq1DDWbT}k zb49b&Jx9c(m)toiBP5!og^1YxvPAI*-8*7JlJV=Ei*`a`1hprvQKRL<$B=pJ+d()_ WA-*i>E+W2Kr0A{{f3=1L_nfqzP0ZTy^W+id$+fOLRAu`qcCvffk;8b5H|vU%SNrnCT6kky zaV6dNGJulxd#@^g-YKTF(2~`STdV)&_;tgK3Z-4Dm>9HfYPrw2>UXp0^L@@Q&f!ZKm1VOf|d{nOot6l)KtV54;R7^*HSJ~~2gfKBViLb1^3WGEVi7ahEWV-r(hsB4cu7Yj#M z{z~a(X}kI~n@w-21=d9WR$ZKLgw6y(HvFO}`tLAl6}YN|Un$otv#gV@X-nM>z@)k0 zChM{2UIS#=$>co^cCGaTTJ!({j7^N=)ZQn0?Ab<0jY#)K#^8O!;|Br z3SOe5=%sL#gbC1MLtR{w!-Ndj_w(^#B4&6H3j!+w z8v-1S=mDga(HKcvg27NEG93%WCZ;3NU{Devt(=I&NDWffBgjI4tYu|U7_u(|ilGv1 zWawWS4r?SC_JM{TT4fw%E9vKr!oCAQ5KyMssMIH|9!1Fm3Q~t0TCjIUNowP~Y!$8I zJWk}4HjPh{(QuH2$4NMPiMXh>+Osmtbt%|Jx@tZLPK#=U=VpMLq?cqr3uy(sEV+a| zMvq&{*g|^QQgSK}n+_wuDUl=!Xk}4O03ywjwCck2*mDxUv=Zi!oB@1P3&1ZI;-C*L zezunW-11Lal(Tn~rAtN!{a0(5Pf}JPTc;J;0<6S5%3^=$dCKOGABTDrJ?RFs;q=kr z1=Y(2$+DnEE1=0S00FlB4Fz#QtvA4Z)J&#zBz$!?9F0ZOT0R_&!u*jo>@CgPL97V~ zV7)v!gD{(tKapl^K~PCQv^B_+*Su1bHOL;oo&hBpL5lML((I)uMqsdj?2EeKrEq90 zOrj(V=yv)|`w^Kz%gST>R~7kQ#J<5wok=6SyrD>IGhN#0w_P1<4&}1_wPj93zELBM0f->>MNdnXFDf{cd)C{S>@MA&j6#QjyiNT@1&9<1q5*rHN=bI58F_ zp8z5rsV($d`8l-K{Y#_doa)gTXkR(zf1osh*fXcc2*yNmgW8Lm^CqC0*35)RXeuhp z(TygU#6RB<7g;LF86<0K&8$ACf_>00sI>@>Ywk{d_6lB);^f+rOwQN+a% zpd&%}2+zexAHJjd(ufcMcC~&Uv9f$v`y_(T0!V8!A*vtWxT(-wFcgc0r)YDj%l{nW zG}l8Dv0yZc0btu$8I#egr8(+Fz+YLpRr)KIwXb}pyiZyB3Sj2-)nn7wBU1E}oPk^f z?E6OmkV^8!7kLACeVhKIx(=qodEhOUj{qXg8Up&X0jOpsXQwWN ziKJ;-H9ZrK5cIpWdM0!+98GiRM4{1GQ1YfZY*AgFh9!lj45-Oh;6W&DtS$CR3CWtF zM**Zhe*zTWqt|Me>1@N>EpDWjC8!9fBra(!3C~Q1U=Gyd;2SaW8nRZ9|Bj@`07Q3^ z!6zFXnr{JSomqLcv5IY1K|QU5qy@7Dxe{f9iG<`W^I#<&nLvw;CRl#3h>;|Cfsj?e z{e)Su9NMR-MWM(m2+;W^EM?%Zn)2bX{efag2a@OHK|@+Q5gnVjI1wX<>6IprQIhgL zb~{FIHhEbG{llhw&G!KDzZohv-ScB)An1Lg81OTNg9=8^NBt|y%_Xd=nu(~Xr=@`Y z8yVa~NA&b- z%>}HEzTNDnJx9xI2El-t4NC|X8?UFK8enUn_hU}hK>z6IX{BD!(_bI0WR2UM4zdyv zy?r!WrhxZf9xH*muQgk3V;Dh!y_JRrmQJ@;1O85HjSJr7Vft8}8N%Rzg?^>A6bO#B zIY;z@dMI1Q0tR^`nw3wMF{S-Al=MP zL7K*yPwS_G8HRsxDsOm0YLCoLrB&C$k9qq*{FSJjxRMQzW+6|m$lQF0@#7<{A14i8WNpAO{ zmcHKkTh@F=VC+nXveVBn;BY!xV{*{e(*}10JYp2qT@Ne;r~mCRlK9YKwlZo|Hzr4OdG^5f`)fh!upJ8WX*E;Y)~1)EEGSpyB@b^;Q6x#n(DFos~O(Kdk!n?Oebs{W}(B( zKkZr27Mubh?ZPa*GB%(kpTgz{eH`-Sr?9#X;C*y`k_Lc#8mY|R zr`;FtD@YEtUuyIqPYhMC-5@dY7Q7J*PvkG?g-bUzuVJ;1{_`dOVR?J24bO!~XJg^C zZaf?ty)+#O%SV)hNVp0htq(&02@_0qm~F_L^r?wFvz`0{zWgJCUoiCJ6S?%869+s= z?4cNok@vBpgPw2>C8JS0{U0}73OIy#FXtMqEW;duU-XKS9>3hJ-Bg`r)%Fegzb?1N z-?cb4Jl;iNspj_Jt9iFHDXaYp#?Kiy6>6t@i({PbMFZ{IaAYqk?`UspmrO}_)w;Vr zQJ8czBt{cNbTq9yj^7*Gr7$Ml73=QWgd*vvOPoybqT}$oqwU_nhNEDq=*wky%a$vX zg@=-k!xAF?BqlQqmvmRIyXz8JNk@HxPh7|_Njb8=(DAtr(OZ*r)WS>3k@KO?YRlRz zW^4{wZjIBuX=a=rl<5C1di*~j`+rXKtpXd!O|9etjgfiXG|M>LoBayOz){h+tcu6*0B>~k@1 zy9)abt-KcfwZgfH&eDhg-F@Oyuq??p={K%*c`=20QK-$1#ij>plzaXjX<)O(*X$LN6SO#1% zU?VnoWc(Rw3V2Pkb7}qPWH=O&j$=v#lBf{O(`V)?SuOqaTp8@dzBX5)DFpN#b|p3U zI_osUP@o+1Xi?8FzXumW5r$XLFU*g}c^zISK{(Xx-XP5a>C9Y2&+8)w_!;5H30%Yk zJ1jHQE%1bR8?o_*NESTp`Aq)qriU4v8{`H(4RG^>CE^gwqoWK`?Cf;}M}H>d(atzW zZ}#TU11}ijW&zkY{TJA*K@J-HNhNgWjg?Rr*h+}FxVM6UYTe1TkS=%ADC91)nSn%Q z1?Uo3$lIDsK=d zw+jf}7fPN`+AUZ>U8~>*=n=A|IvOBn2!@x7;L4Z*@VRxy%o`Y`b&pYGPCGo}|3@Y} zTNYrF{~MV6W)ZZzB7l{11Q%pgP(P9jo3fn#B1vre-FdTV*JEXaFda02?4w7vq{DnQm3bb?9%N})hrhj0Xl6b#;}@v{)#BmAGe>b+EXz z#sk|P7tPCXh?EI=8H0r2%rR86BSqexsV=v|estG6%D30;*GKfQTeCqh(aXC$gJAp! zvcKCUi%W~yxW|Rdr4{6r!GB z-<~ylp8})f9^RD_?<%1TcvK7d0OJP)J3Op{@=1meT}&`%+@8)gQfFC!T8wpj}v%$Bdj*wUSl(3Kd zgC_<%C|Tx^WSKqFeUT|(hA{m|1-?kY4Qj+G*tTsHw+S{OON#kfzz)vWa^X243&b%A zkh*rgM{tG}dn{rWIH0izHh@-%-d49KwtQ4d8^XI9sr17FgCn~7h%LUeo%S<6yhd=g zIXhH6UWmt1eVomFHUU)tE0>**EBc5TKCUQxfBhS%z;lo<7z_f{N6`d<73|_8&ONG? zY%iWC)ZZhXqUUe^qj-VCy%V^4-p7^kKAw2VO#pQAcV6_OwvdK*3*I2%LG@9Dp>H%7 z(?8VfXv>SWy*qJ1F~|yvdL>*wD0`xTgcixHa!X(|41x40N-dP=d*_@0=!Hia27H!fk0Y) z;;G>2lQjS3g8|dYz7s;vsosI$z_U-CO6&Nu-N6%my}e{Va%#a|BLLC{`i+-s6eLa^ zuZ(cyj}ecXR-&e_y%IGVvHC?seu);`EYN=itFHr~<2Oqb(EuFCDr)45V%1a#E?WDb z!+rQgjSzcoFz<1;8>do6(~Xm<+&od`PWk+z%1di*Rm8U#wL1F)GgDCUnrTgyccbH0 zQ_AfTRnC+rM^w2|Mh-gJvu|{#I4kflnQyeG9PS%E;DR?gQ+6jbzy0+ zvSn>zcJ*Y^E-akf^fH|7rIt;F(v-8|&Rsltr}K8FSkjR6Hm-Y{?-eBD?jyI34PiMYW@RD&s zlk()<_QscOYwmpurc`06sPmKM&4c2$EzYj_>XPpIh103dv*Lj8ebdr& ziTbs&gZB#7%9>YI@06YthlWM2=v|{bWhq^=)Pk@nS7YK@;?lj4*!qlkY;codpJhu@ zCACX^iJHVnqHp!kpPvw)8u1Q^Ttn*p+=Srn5YsSzP~@)`kz%OqL6zb019n*6Ls~yLW+mZ(qfN9_L=?-ar-` zaGbZ@ao%<=#$L@$I!hMXH*)hAuilxxJ-alL^wq8V4kt)5w`o1MbuG6wnR|Sp8!iXp zi)Zf$w}qwVWM1`pUPGcb>1kZ|99i=mNqUYhbfpTb)(dOb3TqRM$-*X)^QA1#MQ_ql zBJMxBzW?NE;e(vyK0amjFSV^%OGK+5%)h8z(k#x2W!<8y=b^RthgSEd!ey%2@B~th zT=3nLqafw*!|6Wd$bS?!X>+n}X|}9PUiI?zdzz&8*e(5rt90qPq^stZ5;`pxB|TMm zci~yL`q!-f_wtKVWp$~N>JPMPr)yKkoh)fvFKJ&r zn=I&9<2pnxFU94*UbJ-i%aOa0#JOZ?>w4*_)wX0|`x@6Sa`_vaEtTa%otHk5IJe5J zHS~z}z2J3ytS{vYEbm+MHHi6*Vq?45F(|e_3oYl^!c<|sSkSN;S1W7>x0EWAd6Q$f zT*xm*Uy4I>awTh=|GlDOk@NplXG)p9i%&0hE_E#TiTjV->k>Pj5!+?g_NKfC#N3*M zO{_a3)^>wCn|j$TrBZL$G%$M8OWcBLLuV4L1xuw%ZZU9JY&s(z?taiA4m>OJ@pGbi z=%H>nWwa%Y0Z>JB)QR<-V%=%+^f@tW=pi?p0{x6Wv8Xv|Y?cTr#j19(vP0|`7PCek za_52IU8D8Z03Cta&me&sEnC-=jM^Y_`^AbDaFu(P#nVG#=dfrP`GM;EFSf2K8Efey zhSNLaDbBITzSOhOnX+UrRwON@8?M5Xt6-^ODPOGac)+ekMaP*(N=0_od)e+yu$E^_ z!YBXNXWAqdm>-iCQFW@|nw>mSpnjIfs ziY>P-*NLv`b?bqo^}u7Ktoc>+X-EwBb4^cWG50Hq&bBb$^#}T`itl;ba{3DuVwT}- zlT!4yWu0wMt~JEf@V3^%^_!IMa4bTrq2H%|C!a-F2zMCjcPebWN}W`xp`O+SgP34~ z!2mlz&cQ%|DuO3<>DwszHr6ngrj0xQ8UtSwm55d$S}`##e<{?&o|~D7TqJSGPYlr_ zkTNvfu`qJ#{;ycW987u;%pmwYf+Ylf2&5z~<#gO?%l|Ba$3yvF7s6P>O_%iPT~6#$ z?#I-fR?8nchjvr=n@IXC1gi+Xhai*Iw=+McRr!A?;DoW~zgW_`BU963vy==+)a!ZNZP3ydN_zdh2#M{nOjOM_>5jU(-*&dRjQbY8LA@;p$YgRKKiT=9hJM zk8a`X=JV{w;0ySY3Ia&ca&)=;?)f!;{U(FR2X^@>o)yHWpB0DBi%*|lI~&}R8aLaO zwCGE<9U}@h$Zoc(85OrMpHzB&sM3A9eN)M(a(=OSF^kdJz};9^%I5s-E^rvu^{Xfz W9sXE#s*d?io$k~z<#&#<0RIhx5m5*L diff --git a/debit_notes_pdf_reader.py b/debit_notes_pdf_reader.py index 964d486..38a2b6e 100644 --- a/debit_notes_pdf_reader.py +++ b/debit_notes_pdf_reader.py @@ -2,9 +2,10 @@ import base64 import json import re import time +from contextlib import redirect_stderr from dataclasses import dataclass from datetime import datetime -from io import BytesIO +from io import BytesIO, StringIO from typing import Any, Dict, List, Optional import requests @@ -18,6 +19,7 @@ TOKENS_URL = "https://api.grupoginseng.com.br/api/tokens" STORES_URL = "https://api-extranet.grupoboticario.digital/api/person-logged/stores" DEBIT_NOTES_URL = "https://sf-fiscal-api.grupoboticario.digital/v1/debit-notes/documents-list" HANDLE_IMAGES_URL = "https://sf-fiscal-api.grupoboticario.digital/v1/handle-images" +FRANCHISES_LIST_URL = "https://sf-fiscal-api.grupoboticario.digital/v1/franchises/list/franchise" class UnauthorizedTokenError(RuntimeError): @@ -104,6 +106,39 @@ def get_franchise_ids(session: requests.Session, auth: Auth) -> List[str]: return out +def get_franchise_ids_from_sf(session: requests.Session, auth: Auth, cp_id: int) -> List[str]: + r = None + for attempt in range(4): + r = session.get(FRANCHISES_LIST_URL, headers=_headers(auth, content_type=False), timeout=60) + if r.status_code not in (401, 403): + break + print(f"[warn] token invalido ao listar franchises do sf (tentativa {attempt + 1}/4), renovando token...") + auth.invalidate() + auth.get_bearer(force_refresh=True) + time.sleep(min(3, attempt + 1)) + + assert r is not None + r.raise_for_status() + body = r.json() + if isinstance(body, list): + items = body + elif isinstance(body, dict) and isinstance(body.get("data"), list): + items = body.get("data") + else: + raise RuntimeError("Resposta inesperada em /v1/franchises/list/franchise") + + out: List[str] = [] + seen = set() + for item in items: + if str(item.get("cpId") or "") != str(cp_id): + continue + code = str(item.get("sapCode") or "").strip() + if code and code not in seen: + seen.add(code) + out.append(code) + return out + + def get_debit_notes_page( session: requests.Session, auth: Auth, @@ -212,6 +247,14 @@ def extract_pdf_text(pdf_bytes: bytes) -> str: return "\n".join(text).strip() +def extract_pdf_text_with_diagnostics(pdf_bytes: bytes) -> tuple[str, List[str]]: + buf = StringIO() + with redirect_stderr(buf): + text = extract_pdf_text(pdf_bytes) + lines = [ln.strip() for ln in buf.getvalue().splitlines() if ln.strip()] + return text, lines + + def parse_money_br(value: Optional[str]) -> Optional[float]: if not value: return None @@ -252,6 +295,7 @@ def numero_parcela_from_text(value: Optional[str], fallback: int) -> int: def parse_pdf_fields(text: str) -> Dict[str, Any]: m_cliente = re.search(r"Cliente:\s*(\d+)", text, flags=re.IGNORECASE) + m_cnpj = re.search(r"CNPJ\s*:\s*([0-9.\-\/]+)", text, flags=re.IGNORECASE) m_nota = re.search(r"NOTA\s+DE\s+D[ÉE]BITO\s*:\s*([A-Z0-9-]+)", text, flags=re.IGNORECASE) m_emissao = re.search( r"Data\s+(?:de\s+)?emiss[aã]o\s*:\s*(\d{2}\.\d{2}\.\d{4})", @@ -302,6 +346,7 @@ def parse_pdf_fields(text: str) -> Dict[str, Any]: return { "cliente": m_cliente.group(1) if m_cliente else None, + "cnpj": m_cnpj.group(1).strip() if m_cnpj else None, "notaDebito": m_nota.group(1) if m_nota else None, "dataEmissao": m_emissao.group(1) if m_emissao else None, "valorTotalDebito": m_total.group(1) if m_total else None, @@ -327,8 +372,29 @@ def upsert_rows_sqlserver(rows: List[Dict[str, Any]], connection_string: str) -> cur = cn.cursor() docs = 0 pars = 0 + has_denominacao_col = False + has_cnpj_col = False try: + cur.execute( + """ +SELECT 1 +FROM sys.columns +WHERE object_id = OBJECT_ID('dbo.TrfDocumento') + AND name = 'Denominacao' + """ + ) + has_denominacao_col = cur.fetchone() is not None + cur.execute( + """ +SELECT 1 +FROM sys.columns +WHERE object_id = OBJECT_ID('dbo.TrfDocumento') + AND name = 'CNPJ' + """ + ) + has_cnpj_col = cur.fetchone() is not None + for row in rows: id_externo = row.get("id") if id_externo is None: @@ -339,42 +405,163 @@ def upsert_rows_sqlserver(rows: List[Dict[str, Any]], connection_string: str) -> found = cur.fetchone() if found: doc_id = int(found[0]) - cur.execute( - """ + if has_denominacao_col and has_cnpj_col: + cur.execute( + """ +UPDATE dbo.TrfDocumento +SET IdExterno=?, FranchiseId=?, ImageName=?, EmissionDate=?, + EmissaoNF=?, NotaFiscal=?, Denominacao=?, CNPJ=?, ValorNF=?, Encargos=?, AtualizadoEm=SYSUTCDATETIME() +WHERE id=? + """, + int(id_externo), + str(row.get("franchiseId") or "")[:20] or None, + str(row.get("imageName") or "")[:150] or None, + parse_date_br(row.get("dataEmissao")), + parse_date_br(row.get("dataEmissao")), + str(row.get("notaDebito") or "")[:40] or None, + str(row.get("denominacao") or "")[:255] or None, + str(row.get("cnpj") or "")[:20] or None, + row.get("valorTotalDebitoNum"), + 0.0, + doc_id, + ) + elif has_denominacao_col: + cur.execute( + """ +UPDATE dbo.TrfDocumento +SET IdExterno=?, FranchiseId=?, ImageName=?, EmissionDate=?, + EmissaoNF=?, NotaFiscal=?, Denominacao=?, ValorNF=?, Encargos=?, AtualizadoEm=SYSUTCDATETIME() +WHERE id=? + """, + int(id_externo), + str(row.get("franchiseId") or "")[:20] or None, + str(row.get("imageName") or "")[:150] or None, + parse_date_br(row.get("dataEmissao")), + parse_date_br(row.get("dataEmissao")), + str(row.get("notaDebito") or "")[:40] or None, + str(row.get("denominacao") or "")[:255] or None, + row.get("valorTotalDebitoNum"), + 0.0, + doc_id, + ) + elif has_cnpj_col: + cur.execute( + """ +UPDATE dbo.TrfDocumento +SET IdExterno=?, FranchiseId=?, ImageName=?, EmissionDate=?, + EmissaoNF=?, NotaFiscal=?, CNPJ=?, ValorNF=?, Encargos=?, AtualizadoEm=SYSUTCDATETIME() +WHERE id=? + """, + int(id_externo), + str(row.get("franchiseId") or "")[:20] or None, + str(row.get("imageName") or "")[:150] or None, + parse_date_br(row.get("dataEmissao")), + parse_date_br(row.get("dataEmissao")), + str(row.get("notaDebito") or "")[:40] or None, + str(row.get("cnpj") or "")[:20] or None, + row.get("valorTotalDebitoNum"), + 0.0, + doc_id, + ) + else: + cur.execute( + """ UPDATE dbo.TrfDocumento SET IdExterno=?, FranchiseId=?, ImageName=?, EmissionDate=?, EmissaoNF=?, NotaFiscal=?, ValorNF=?, Encargos=?, AtualizadoEm=SYSUTCDATETIME() WHERE id=? - """, - int(id_externo), - str(row.get("franchiseId") or "")[:20] or None, - str(row.get("imageName") or "")[:150] or None, - parse_date_br(row.get("dataEmissao")), - parse_date_br(row.get("dataEmissao")), - str(row.get("notaDebito") or "")[:40] or None, - row.get("valorTotalDebitoNum"), - 0.0, - doc_id, - ) + """, + int(id_externo), + str(row.get("franchiseId") or "")[:20] or None, + str(row.get("imageName") or "")[:150] or None, + parse_date_br(row.get("dataEmissao")), + parse_date_br(row.get("dataEmissao")), + str(row.get("notaDebito") or "")[:40] or None, + row.get("valorTotalDebitoNum"), + 0.0, + doc_id, + ) else: - cur.execute( - """ + if has_denominacao_col and has_cnpj_col: + cur.execute( + """ +INSERT INTO dbo.TrfDocumento ( + UUID, IdExterno, FranchiseId, ImageName, EmissionDate, + EmissaoNF, NotaFiscal, Denominacao, CNPJ, ValorNF, Encargos +) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + uuid, + int(id_externo), + str(row.get("franchiseId") or "")[:20] or None, + str(row.get("imageName") or "")[:150] or None, + parse_date_br(row.get("dataEmissao")), + parse_date_br(row.get("dataEmissao")), + str(row.get("notaDebito") or "")[:40] or None, + str(row.get("denominacao") or "")[:255] or None, + str(row.get("cnpj") or "")[:20] or None, + row.get("valorTotalDebitoNum"), + 0.0, + ) + elif has_denominacao_col: + cur.execute( + """ +INSERT INTO dbo.TrfDocumento ( + UUID, IdExterno, FranchiseId, ImageName, EmissionDate, + EmissaoNF, NotaFiscal, Denominacao, ValorNF, Encargos +) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + uuid, + int(id_externo), + str(row.get("franchiseId") or "")[:20] or None, + str(row.get("imageName") or "")[:150] or None, + parse_date_br(row.get("dataEmissao")), + parse_date_br(row.get("dataEmissao")), + str(row.get("notaDebito") or "")[:40] or None, + str(row.get("denominacao") or "")[:255] or None, + row.get("valorTotalDebitoNum"), + 0.0, + ) + elif has_cnpj_col: + cur.execute( + """ +INSERT INTO dbo.TrfDocumento ( + UUID, IdExterno, FranchiseId, ImageName, EmissionDate, + EmissaoNF, NotaFiscal, CNPJ, ValorNF, Encargos +) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + uuid, + int(id_externo), + str(row.get("franchiseId") or "")[:20] or None, + str(row.get("imageName") or "")[:150] or None, + parse_date_br(row.get("dataEmissao")), + parse_date_br(row.get("dataEmissao")), + str(row.get("notaDebito") or "")[:40] or None, + str(row.get("cnpj") or "")[:20] or None, + row.get("valorTotalDebitoNum"), + 0.0, + ) + else: + cur.execute( + """ INSERT INTO dbo.TrfDocumento ( UUID, IdExterno, FranchiseId, ImageName, EmissionDate, EmissaoNF, NotaFiscal, ValorNF, Encargos ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - uuid, - int(id_externo), - str(row.get("franchiseId") or "")[:20] or None, - str(row.get("imageName") or "")[:150] or None, - parse_date_br(row.get("dataEmissao")), - parse_date_br(row.get("dataEmissao")), - str(row.get("notaDebito") or "")[:40] or None, - row.get("valorTotalDebitoNum"), - 0.0, - ) + """, + uuid, + int(id_externo), + str(row.get("franchiseId") or "")[:20] or None, + str(row.get("imageName") or "")[:150] or None, + parse_date_br(row.get("dataEmissao")), + parse_date_br(row.get("dataEmissao")), + str(row.get("notaDebito") or "")[:40] or None, + row.get("valorTotalDebitoNum"), + 0.0, + ) cur.execute("SELECT id FROM dbo.TrfDocumento WHERE UUID = ?", uuid) got = cur.fetchone() if not got: @@ -450,8 +637,10 @@ def main() -> None: # Fluxo fixo: leitura da API -> extracao do PDF em memoria -> upsert SQL. CP_ID = 10269 TAKE = 25 - MAX_PAGINAS_RECENTES = 50 + # Modo diario: varre apenas as paginas mais recentes por franquia. + MAX_PAGINAS_RECENTES = 5 MAX_PAGINAS_SEM_NOVIDADE = 5 + SKIP_EXISTENTE_MESMO_IMAGENAME = True DOCUMENT_TYPE = "NDEB" SQL_CONN = ( "DRIVER={ODBC Driver 17 for SQL Server};" @@ -466,147 +655,186 @@ def main() -> None: s = requests.Session() s.trust_env = False a = Auth(s) - f = get_franchise_ids(s, a) + f = get_franchise_ids_from_sf(s, a, CP_ID) return s, a, f - session, auth, franchises = _new_client() + session, auth, all_franchises = _new_client() + target_franchises = list(all_franchises) total_docs_upsert = 0 total_parcs_upsert = 0 - skip = 0 # para testes, pular os primeiros 900 registros (36 paginas) e ir direto para os mais recentes. Ajustar para 0 para rodar do inicio. O endpoint suporta skip alto, mas pode ser mais lento. O ideal é rodar periodicamente com skip=0 para pegar os novos registros. - skip_inicial = skip - total = None - pagina = 0 - total_paginas = None - relogins = 0 - max_relogins = 20 - paginas_sem_novidade = 0 - while True: - try: - page = get_debit_notes_page(session, auth, CP_ID, skip, TAKE, franchises) - except UnauthorizedTokenError as e: - relogins += 1 - if relogins > max_relogins: - raise RuntimeError( - f"Falha apos {max_relogins} relogins. Ultimo erro: {e}" - ) from e - print( - f"[relogin] 401 persistente em skip={skip}. " - f"Refazendo sessao/token ({relogins}/{max_relogins})..." - ) - session, auth, franchises = _new_client() - time.sleep(2) - continue - if total is None: + font_warning_files: List[str] = [] + + for idx_fr, franchise_code in enumerate(target_franchises, start=1): + print(f"[franchise] iniciando {idx_fr}/{len(target_franchises)} franchiseId={franchise_code}") + skip = 0 + skip_inicial = skip + total = None + pagina = 0 + total_paginas = None + relogins = 0 + max_relogins = 20 + paginas_sem_novidade = 0 + docs_upsert_fr = 0 + parcs_upsert_fr = 0 + + while True: try: - total = int(page.get("documentsTotal") or 0) - except Exception: - total = 0 - total_paginas = (total + TAKE - 1) // TAKE if total > 0 else None - if total_paginas: - print(f"[info] total_registros={total} total_paginas={total_paginas} take={TAKE}") - else: - print(f"[info] total_registros={total} take={TAKE}") - - docs = page.get("documentsList") or [] - if not docs: - break - - pagina += 1 - pagina_global = (skip // TAKE) + 1 - total_paginas_restantes = None - if total is not None and total > 0: - restantes = max(0, total - skip_inicial) - total_paginas_restantes = (restantes + TAKE - 1) // TAKE if restantes > 0 else 0 - if total_paginas: - if total_paginas_restantes is not None: + page = get_debit_notes_page(session, auth, CP_ID, skip, TAKE, [franchise_code]) + except UnauthorizedTokenError as e: + relogins += 1 + if relogins > max_relogins: + raise RuntimeError( + f"Falha apos {max_relogins} relogins. Ultimo erro: {e}" + ) from e print( - f"[page] baixando pagina_execucao={pagina}/{total_paginas_restantes} " - f"pagina_global={pagina_global}/{total_paginas} (itens={len(docs)})" + f"[relogin] 401 persistente em skip={skip} franchiseId={franchise_code}. " + f"Refazendo sessao/token ({relogins}/{max_relogins})..." ) + session, auth, _ = _new_client() + time.sleep(2) + continue + if total is None: + try: + total = int(page.get("documentsTotal") or 0) + except Exception: + total = 0 + total_paginas = (total + TAKE - 1) // TAKE if total > 0 else None + if total_paginas: + print( + f"[info] franchiseId={franchise_code} total_registros={total} " + f"total_paginas={total_paginas} take={TAKE}" + ) + else: + print(f"[info] franchiseId={franchise_code} total_registros={total} take={TAKE}") + + docs = page.get("documentsList") or [] + if not docs: + break + + pagina += 1 + pagina_global = (skip // TAKE) + 1 + total_paginas_restantes = None + if total is not None and total > 0: + restantes = max(0, total - skip_inicial) + total_paginas_restantes = (restantes + TAKE - 1) // TAKE if restantes > 0 else 0 + if total_paginas: + if total_paginas_restantes is not None: + print( + f"[page] franchiseId={franchise_code} " + f"baixando pagina_execucao={pagina}/{total_paginas_restantes} " + f"pagina_global={pagina_global}/{total_paginas} (itens={len(docs)})" + ) + else: + print( + f"[page] franchiseId={franchise_code} " + f"baixando pagina_execucao={pagina} pagina_global={pagina_global}/{total_paginas} " + f"(itens={len(docs)})" + ) else: - print(f"[page] baixando pagina_execucao={pagina} pagina_global={pagina_global}/{total_paginas} (itens={len(docs)})") - else: - print(f"[page] baixando pagina_execucao={pagina} pagina_global={pagina_global} (itens={len(docs)})") - - doc_ids_page: List[int] = [] - for d in docs: - try: - doc_ids_page.append(int(d.get("id"))) - except Exception: - continue - existing_map = get_existing_docs_map_sqlserver(doc_ids_page, SQL_CONN) - - page_rows: List[Dict[str, Any]] = [] - skipped_existing = 0 - for d in docs: - doc_id = int(d.get("id")) - franchise_id = str(d.get("franchiseId") or "").strip() - image_name = str(d.get("imageName") or "").strip() - existing_image_name = existing_map.get(doc_id) - if doc_id in existing_map and (existing_image_name or "") == image_name: - skipped_existing += 1 - print(f"[skip] {doc_id} ja existe no SQL com mesmo imageName") - continue - try: - dl_url = get_download_url( - session, - auth, - DOCUMENT_TYPE, - franchise_id, - doc_id, # para esse endpoint usa "id" do documento - image_name, + print( + f"[page] franchiseId={franchise_code} " + f"baixando pagina_execucao={pagina} pagina_global={pagina_global} (itens={len(docs)})" ) - pdf_bytes = download_pdf_bytes(session, dl_url) - txt = extract_pdf_text(pdf_bytes) - parsed = parse_pdf_fields(txt) - page_rows.append( - { - "id": doc_id, - "franchiseId": franchise_id, - "imageName": image_name, - **parsed, - } - ) - print(f"[ok] {doc_id} -> {image_name}") - except Exception as e: - print(f"[erro] {doc_id} -> {e}") - novos_na_pagina = len(page_rows) - if page_rows: - stats_page = upsert_rows_sqlserver(page_rows, SQL_CONN) - total_docs_upsert += int(stats_page.get("documentos") or 0) - total_parcs_upsert += int(stats_page.get("parcelas") or 0) + doc_ids_page: List[int] = [] + for d in docs: + try: + doc_ids_page.append(int(d.get("id"))) + except Exception: + continue + existing_map = get_existing_docs_map_sqlserver(doc_ids_page, SQL_CONN) + + page_rows: List[Dict[str, Any]] = [] + skipped_existing = 0 + for d in docs: + doc_id = int(d.get("id")) + franchise_id = str(d.get("franchiseId") or "").strip() + image_name = str(d.get("imageName") or "").strip() + existing_image_name = existing_map.get(doc_id) + if SKIP_EXISTENTE_MESMO_IMAGENAME and doc_id in existing_map and (existing_image_name or "") == image_name: + skipped_existing += 1 + print(f"[skip] {doc_id} ja existe no SQL com mesmo imageName") + continue + try: + dl_url = get_download_url( + session, + auth, + DOCUMENT_TYPE, + franchise_id, + doc_id, # para esse endpoint usa "id" do documento + image_name, + ) + pdf_bytes = download_pdf_bytes(session, dl_url) + txt, diag_lines = extract_pdf_text_with_diagnostics(pdf_bytes) + has_font_warn = any("FontBBox" in ln for ln in diag_lines) + if has_font_warn: + font_warning_files.append(image_name) + print(f"[warn-font] {doc_id} -> {image_name}") + parsed = parse_pdf_fields(txt) + page_rows.append( + { + "id": doc_id, + "franchiseId": franchise_id, + "imageName": image_name, + **parsed, + } + ) + print(f"[ok] {doc_id} -> {image_name}") + except Exception as e: + print(f"[erro] {doc_id} -> {e}") + + novos_na_pagina = len(page_rows) + if page_rows: + stats_page = upsert_rows_sqlserver(page_rows, SQL_CONN) + docs_page = int(stats_page.get("documentos") or 0) + parcs_page = int(stats_page.get("parcelas") or 0) + total_docs_upsert += docs_page + total_parcs_upsert += parcs_page + docs_upsert_fr += docs_page + parcs_upsert_fr += parcs_page + print( + f"[sql] franchiseId={franchise_code} pagina={pagina} docs_upsert={docs_page} " + f"parc_upsert={parcs_page} acumulado_fr_docs={docs_upsert_fr} " + f"acumulado_fr_parc={parcs_upsert_fr}" + ) + + if novos_na_pagina == 0: + paginas_sem_novidade += 1 + else: + paginas_sem_novidade = 0 + print( - f"[sql] pagina={pagina} docs_upsert={stats_page['documentos']} " - f"parc_upsert={stats_page['parcelas']} acumulado_docs={total_docs_upsert} " - f"acumulado_parc={total_parcs_upsert}" + f"[page] franchiseId={franchise_code} pagina={pagina} novos={novos_na_pagina} " + f"skip_sql={skipped_existing} sem_novidade={paginas_sem_novidade}/{MAX_PAGINAS_SEM_NOVIDADE}" ) - if novos_na_pagina == 0: - paginas_sem_novidade += 1 - else: - paginas_sem_novidade = 0 + skip += TAKE + print( + f"[page] franchiseId={franchise_code} concluida {pagina} " + f"acumulado_fr_docs={docs_upsert_fr} total={total}" + ) + if pagina >= MAX_PAGINAS_RECENTES: + print(f"[stop] franchiseId={franchise_code} limite diario atingido: {MAX_PAGINAS_RECENTES} paginas recentes") + break + if paginas_sem_novidade >= MAX_PAGINAS_SEM_NOVIDADE: + print(f"[stop] franchiseId={franchise_code} sem novidades por {MAX_PAGINAS_SEM_NOVIDADE} paginas consecutivas") + break + if total and skip >= total: + break print( - f"[page] pagina={pagina} novos={novos_na_pagina} skip_sql={skipped_existing} " - f"sem_novidade={paginas_sem_novidade}/{MAX_PAGINAS_SEM_NOVIDADE}" + f"[franchise] concluida franchiseId={franchise_code} " + f"docs_upsert={docs_upsert_fr} parcelas_upsert={parcs_upsert_fr}" ) - skip += TAKE - print(f"[page] concluida {pagina} acumulado_docs={total_docs_upsert} total={total}") - if pagina >= MAX_PAGINAS_RECENTES: - print(f"[stop] limite diario atingido: {MAX_PAGINAS_RECENTES} paginas recentes") - break - if paginas_sem_novidade >= MAX_PAGINAS_SEM_NOVIDADE: - print(f"[stop] sem novidades por {MAX_PAGINAS_SEM_NOVIDADE} paginas consecutivas") - break - if total and skip >= total: - break + if font_warning_files: + uniq_font_warn = sorted(set(font_warning_files)) + print(f"[warn-font] total_arquivos_com_warning={len(uniq_font_warn)}") + for name in uniq_font_warn: + print(f"[warn-font] arquivo={name}") print(f"SQL upsert final -> documentos={total_docs_upsert} parcelas={total_parcs_upsert}") if __name__ == "__main__": main() - -