From 0fb38d252c1596063ddad768d8a0a1e6ad07c1d4 Mon Sep 17 00:00:00 2001 From: Gerhardsa0 <113539440+Gerhardsa0@users.noreply.github.com> Date: Tue, 20 Feb 2024 17:40:11 +0100 Subject: [PATCH] feat: added lag_plot (#548) Closes #519 ### Summary of Changes I added the visualization of the lag plot to the timeseries class --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> --- src/safeds/data/tabular/containers/_column.py | 2 +- .../data/tabular/containers/_time_series.py | 49 ++++++++++++++++++ .../test_should_return_table.png | Bin 0 -> 11563 bytes .../_time_series/test_plot_lag.py | 41 +++++++++++++++ 4 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/__snapshots__/test_plot_lag/test_should_return_table.png create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_plot_lag.py diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index e776d13c9..04b998294 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -1,7 +1,7 @@ from __future__ import annotations -import sys import io +import sys from collections.abc import Sequence from numbers import Number from typing import TYPE_CHECKING, Any, TypeVar, overload diff --git a/src/safeds/data/tabular/containers/_time_series.py b/src/safeds/data/tabular/containers/_time_series.py index 491b7d68b..fee39fd94 100644 --- a/src/safeds/data/tabular/containers/_time_series.py +++ b/src/safeds/data/tabular/containers/_time_series.py @@ -1,13 +1,19 @@ from __future__ import annotations +import io import sys from typing import TYPE_CHECKING +import matplotlib.pyplot as plt +import pandas as pd + +from safeds.data.image.containers import Image from safeds.data.tabular.containers import Column, Row, Table, TaggedTable from safeds.exceptions import ( ColumnIsTargetError, ColumnIsTimeError, IllegalSchemaModificationError, + NonNumericColumnError, UnknownColumnNameError, ) @@ -839,6 +845,13 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Time The original time series is not modified. + Parameters + ---------- + name: + The name of the column to be transformed. + transformer: + The transformer to the given column + Returns ------- result : TimeSeries @@ -857,3 +870,39 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Time ), time_name=self.time.name, ) + + def plot_lagplot(self, lag: int) -> Image: + """ + Plot a lagplot for the target column. + + Parameters + ---------- + lag: + The amount of lag used to plot + + Returns + ------- + plot: + The plot as an image. + + Raises + ------ + NonNumericColumnError + If the time series targets contains non-numerical values. + + Examples + -------- + >>> from safeds.data.tabular.containers import TimeSeries + >>> table = TimeSeries({"time":[1, 2], "target": [3, 4], "feature":[2,2]}, target_name= "target", time_name="time", feature_names=["feature"], ) + >>> image = table.plot_lagplot(lag = 1) + + """ + if not self.target.type.is_numeric(): + raise NonNumericColumnError("This time series target contains non-numerical columns.") + ax = pd.plotting.lag_plot(self.target._data, lag=lag) + fig = ax.figure + buffer = io.BytesIO() + fig.savefig(buffer, format="png") + plt.close() # Prevents the figure from being displayed directly + buffer.seek(0) + return Image.from_bytes(buffer.read()) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/__snapshots__/test_plot_lag/test_should_return_table.png b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/__snapshots__/test_plot_lag/test_should_return_table.png new file mode 100644 index 0000000000000000000000000000000000000000..0f17b4726090b563dbe797d06ad0f652a2783536 GIT binary patch literal 11563 zcmeHtXHZmGyY6n12nZMeQIbj$i4v5IiZBfdNK&$hAQ{O)LXV25Bt??sBnV9qBsb6~ z0s`-n5&!aS1VQ+hu!q8j{amz1f8Sed;Z)$Rz0INUQSl_128W{E z$y=nBOypE;EN_f-g?Z)q?lu14EBE3oKlb+N^EvD5CH=fJ2{@&sSzX?tLye&%xBSVp zZrswx>_74Tg9pv0hwNC#G2gwU5!EXxN4TANmMcugzd1{}Ct&5;HRl4x%j8)i&GkqS z1SgU*Lkn+(YfVAmU!uUq{a za`FYf54E+}@`Zi8a&iM>VsP1M-COQeTACPp{Z&Ow1r<}2&+zc@X^5B0S?pk49YB!xPvviB7G@c{9tc(ma<;y0eLp4jQ>B%&^5Y)n>Ue2)K51!#hK2^G z^DO!VH@m%Eplf{`f_@DZUi4crxrlVTEN4PAkR1>v^~Tu8YG%;6dQzQ4`eVm zx!HUQv*XKL=wTH|QMAR#IFAL{S8Z7AY>&%3Ebk<4kMMZS7Im*|PUqRQB*;z;R1&Jz zD(5rv^F=;jF$m5jdAnIiFsi<9>4xF7UB~thCzYG@lKf2tGqhj6u+Ro{W$UG8YGtUD z3vWfm#bx_#X85%Yhsq0Lm%J*c`bzR5g-miM+cQkMT2?o_oyrX1YZ>aJ2o66)cjMUG z+Xnb0_ve>1!n-#h_w za&mZdvw@0=O28NyQhmhrK$JUG*Jwyhw`c)kocZmWp=ag5SWbF+-P~^oqWJK``8yi* z-=~@NkQZTLoY+L4pL&5i=>N<2?4CS%a?RV@I}qY#Sou72zY`(OdVWSOfU{)!y@2W6 z)gXRcW@aWls;w)>Aa-rTTW08CXsHA&*5@W4B{#q%>iV z1#lVaiCtAbTZImTa#$w8?Vb{c#1K|#?tq$Pe`=;Fep=ylmLN%$1nwou; zCgkhGt`9CHRCg@+8aka`o@*G@{{HF8)hq6O;rvUAGj=zN9R_pr&FYJL38||_33fuv#4J7?-5K}W&9E>1{-|k})li8RQCM(Yd7*ILSNSf6hI|*nAgvpB zw?l@wZbC~c4E}fQQqh8L`{3pMSpBLst#m4j2nmgRiX{=?js-%1^&e78&czaWU^tF-s28?~bXjftF{UdP3W zrG{J#Y_Va&~j;{p?S-pY?Se8j2Kp^V&VO{U7Exe zo~S0Syl(vZ`g%wZH6k1*f71$TI;U~)GA$M>nBgYt$To!q(tn|hzt#NP_&u@)(EMW?&fcY_e-C>)zlt`RoiH7%eQ z`N}4*QddMo1Z^i`W_2`|FBk2YsM_1-Rv0@h>uPBo{_z1pw7BXdiJq+{BD5Bw^=xAxBwzX--%ljFV?xVGJ_P%l@=*RG|fh{EYdx%zwws@0Hv-2awiK0UA>gCLX=|5!QX4-c48Ck@VbJGaPY{P=OJv#X0w zQc~~jot7^7y=D2P??XcSk%?>-KAA=nlhdIk`JRb=quR!9G2H0{yDJnA>CC8y!njnm zZ`^pza7Nd1ec^jA!98#6)5}v)O|JsjeS79S5ZoCJ&F^1e=IEJxHTh%uKz!23NY0ib zySr<|?(oxEVxCL4v28dEX`yu|UqobNZyXJBmunlNx-osmAOcNvNzT!{ypHuk&d3L* zGG;;~%;MJJ#rFMK5BAZig#DZ!%rhdmS+%8MXMNK%GqG}sd9@S>B^}g>K%@_`F>GW} zBC9Y<&LtTqY(7IXlyW(6WO+l(WkTb!vU1b>nhoSZV?%@0nkW=WV@nI2sR~j(nq^~2 z+%{@B&k@EfO}gG>va9Jp+$q-0HOyREa>}`pe{t`p!(KWpym0H~e@-9nhfgqw36IEU z<_<>%PEj|Cg(o}-t?)!KpG_BRTkyGo*Hcs03`dSw!q~O*<>uyAf`Us9+K-f8Ii+=Y z#7$CX+Rk=aMj;@Lv)kX`J(7;zW9IAIr%$anR#Q_`87RqYRmhOiV}DMea*dyYuD`|* z6Apn*wYYN!U60pVzq{+o#I+o~w@P^xOHb#@#@Ps|7$mp#zmA&`M&W8MfV4&8T5OBQ zjvZV1B-&!x93!5E$7=z?viKC8d?(RIx4+z#ke&E_d1J(>Y-H7oft`Ku9V|2|kmvWI z>ATw!r3eM79}Yn#ywCd{uOOoV+{gEC355Z4A_%?yF78!ZmRF~^7@x@JOh5=vL!5e8 zsTC-hjgWmbN0X+ergXRVzO(OMkl)*AvHOh-Va3Cc*j|kQ7!$h&oeltk)vfZry4%Rb zh6%2rf`YT2G6`dL%VW)9X;$s&Um?Zs=l%0s!BC2)Qr}FJ^9c_R@2Ifrd2`_b#ep_= z;+7EvP(fpZG;*o?dZYDlW#AV22fL*W8SXbT7feM*;@4o=X>Rt{<9e3 zFIegL;9n9|^}=KZlp=IH0|bvCeHzH#t=ZlpDc7lRigACqvhI;&g-S_ef#nB)cds z?CH~|;nC5r8(#4Av63TFihm9O!SG5|qhAwX2fTAc{|z6~&$VcK1O+K|!JI*R#Rw^2X5q?8MW{ z%k|G(&+S;5Y+qezSGO6c@RZ;rN07%fv`q#4(xlsqp?)22Olt&fW;T|`U1xek4jdDE z{Xolz?Qi(RgD?z>Z>nCDrisJ}m26`*yK{|d(zjuXxabx$RW~_~ePW*dR@RPkE#zRb z){*L(06LqQo_x1@_~J2T{I5C5;bBXpx)FBh(4nfIKEy%x6i&GKT!^3Qrk2)=v6jTV z;|>-1&1-l(J|Z!Z{f9Ps^)x@#PziCz4z}V~bF3oz?fIfRtMKe$tt9RRk9rqC-d-R1 z6DKav$t);LkibhR3immX^}lpHg+&l4_K-o-v!t-KxYqxHdvn`T4~;au7+v(334y!N zJHG#c<*@r&=A7|4UiywZ4MMTBj~~hepPPK`iKANZm8%*q;WcwN5PC6F1LV^8^~GVY zr3RiN$6=*-*?YP$hjg!9d#)68)YM%JkvRR|piP8Ky+;tE8dmegaqowm=Te$dC|gdh z5y2WrQdV|$N0SgSJ3;oIThBZh{Q!*J7!$*R6bM01$+hLcCLPUpR;Q7ZmwG-SN`TDx zyy2;loO+R50W~LJiwg%!ZZ{l)&3g_1JWZ^vty_KNaGz`0--HvZH0@-Mw}*{9L;r*V zxIs(NI_cj`b5lnrB7{XU<&<*B#0zz~Yqqx8zH5ZR$&b%ZHP4f`178ul^??!~*HxfQ zjUPX@7MM3x{S^HV0{1+*FsrKb)8H$9ESp0raJfrNREX4X@P;syiM2MseSCK|i+b7l zl|xvrnVKfE-J21SUG31~J9DPJhE6`)zQ5e+*C0$!N>bR^*kBFYhXa7$E97_mEG_b0 z5AO6xpMze*!Xn++d-1aQ?b{Rcd%N@E4g)fkyPMOqzTD@|T{q4v@x*6l?l--Ku%7-k zc@g^0?~BoQFoDXLeUmuvMgUyVb!!mP-v!N8`NM|~rve&hJdXP)d{+DdaDv(R{_$3S z{O`)?+z-z(A}){L2;dkP7*w#>$v27eUMrS9+l%!AKUrlw-qb|KVIG!D@CGau2yLe> z;JFrlOjpc^2_hK!gYaCpLS=RzZG!uJSD;YUYqUH@8zc0~T{0Q@Z`?%l<7M=mF3>!# zGJU~i?y@+(xR(t+2u6`6z+BI+f3=$OxP+|+pl(`LskDv`ZP}kaW-;d+2F(2i#+MsL zof@WB3s`6r=@ZCE6eU6iA2C(qYuTYp(DbJe!hl2){qz^ji+;n010>$TWa9nBdJm?A401p0WK+rve_+r5+Rr<(>d@2YVf4`4H+tkiEHzoXM6fplyT?n_!3d5g(0Qr zc?NTxW9i{#Oq`T!RvLB7-p-V>2W&8Uabh&er1%6gRO`t8K$4@BWSm&w&=6;>`GHD1 zH8Lo&viB*7F~|bu?(hls_|Dd44*d1Js+oLoadEG`oi%A;Amj=oZz~Z-Mn>zNH>R5n z>b@!RyNjVY18ZKDn}d5hCjIV%K8>F~*)B_n!26bZ7YkwLq0>%x<+S^4*Zc88I`k9V zuMbxFjJimnL$BA)JCOaRi78G-*h54ejvR7ZEuN{1^UgE5f86nQ}gxVuOcJDU$V6c6z5pbIdDHc6wRib{r}G zI+zu&O0>V{5_i2S*M9`c?80)ANFVB83<+*IA?)(G&~~Spi0l3o03l$GPWtVR`?>6H zdlH7xm^_4-wD~V1>`)`KgG7^iKWDxSy?fvaJ97qLd8pjxYsiS(?uxK^Y63G;GW`8u7G_BW*9{9#TW90vhf5hUtixq zpG%Kuz9jCh@WdX%d`F?xKcX>Qllk_c1sJW+`eRC!WI0I1RGUE$l_15mxRpnUa>qNiai9TmERLy2t1B!dSupe_4C$A8LAs(2oVdFn5`F z7WU$WIF#Aw$jC%bzS(Su-Cx8#oi``TL44W$v}#y=-|M!l%~pRB#PDD$^^jdjfBI-s z6dq`;zrR21U)yZSVD6(wk0k1ujvVLl)P`t<1&YBW6=xyd_A?p!dUe}eYV+4VH! zxmvu`tKFTg>4@vbUqXFXQg!FgiHk$!Wo{I;jBBZ=JgqR!0~UUXnSrv5PHy9(Xyoi} zog;qCM_KQImp{ir#E|*d_%~l!>|1w6ggR4|>H6>19FX!}zZ*MW|KS5KaFh?3xP?F4 zWYIvKL<_0QZz@hkrCc-!M+I{w*|rXIG)H5$1EY++u(Sb}sF?%{rKzOy!>B^*_`jxuYCQeCb4$$Zt9`Iu z!Bt3bl>M~^k?>|_XJ2usSbSt(KFLi;O*(ugbHUQru?(a!#x0x5V89$|`^k`p2LK;~ zs1xzxMXGsIWagcg1h@JNJfn0sP%Qu-ye2r;1b_G?fMV<0;y3|BmBefi!nqoT?X-b}Ok9N5piLDb(W~?-0(`5ZtLvsWOie`Kt~sv7X2IV*q5Pr=lM~0PrpZk?ks=)o=$#S z!`0Oloe=}$Ygl$?L!PSL1eN}qV1{*NuwBBwiD%&><#$yf zRc0(eBQSFBe+9GC5(Y_g-{XU)!eJjkiZ0^wtL}ah;>0T?eYMTMz&$fCZ@Hko6vi(H z|50huqfAU0D=RCETjlQaxyG=-UOK~&KP9Q%%|=G1hn!_`jZJE(*c3d8AnSWU0yqWb z=;*mF5I10;Ba}8rs#R?VtNQ0CA0yD_%nwYN0~|$q&1WQWkihBxwldUBwtp6(z)1rc z5E>sJKYs`G1uiTgrfx7t@Gv639_POgeh3O%*&NK4GsDlv~$AZSFzce>j4M-hMhAq@kFbeHWcD;qz;rh@LY5fnN)&Er= zxZqN{A)O()ivZ!|enkNAnDD{>{hj+O%}VO)Y9Q@Q+E8qnNs8tM}s z6IW4<P+GBceS5hj0@N5*X}5_G>ACf~#lP3Z z^;Nq)5GQ>x=yxyg#9%QT77vSe22*wgh+nJF$x;Kh6Zp(R<^QP;OdQk0=dN)P1+S zCz12|^>3iK@7A@rcbJdJ!05$=)Y#pmk>4k5NN|8c1DN&|r5K95&@f;Ft|P2Tq8I|nr9{_UjV23 z>C>~zw&yL7yo=h};hU?|-O0pLa&mGf_dt%-y)41x7}!eR<|a-7Dziy`_$+{rOU#RD~)z z|L|~oS-C6|?#)R`vV<`9wI*Mh4Pnr(4`uKE^is4Fe_g1yu5POCxI@Q4q2!;d0@#A<9by2G>^!0eJgqWD+_xJu&gGAyjGc$4P z4qj-_Us=zjEO0&(b_Ttz-+Q?^M%q2sci{nh4=c}v+e}X~IW^-1*qfbECcc>&30^9P zxvyxLo4*3!Veejn@A`8pmV-!Qt{17BeIxm_R{8{;-}-|`bnNhpBvgTc?wlzD*!P+M8{qvhqIW`|owNC_n-Ye}Yr+11{q!kSKC~<_FS1QpW?u= z+n~x4QpuV@_sc1B9xv`$d%(V34aQIY>1%gE#RE1KQY>NH(|XGf(6eNO>$Iaa!%| z?tuRr#eFsAwsXPI`cjk7=-L+GV1t>^;NVV>4sBH!JWt*+B8^w$pl^NePmKocRYpc1 zD~}~=WoKvei%UpM#n|M2i4nI6kBqc%&rbRid7t{}9oJ`1xY`sY)D075WYGg4cwV0a z(FkNOsE)VN13J=9Baf-5saJc+i z5!oL;DB8eQHE*4?+rN%iOOUaut*=-9bk-Nd>zwTDY{sqKMfSZfo1fjQ%(PNML`~n5 zO#}U`>KD0h-!z| zRnpgQt>lHuVXJw5cJTUe!XZb%pm-wQeIQ?2rB!zE!(shG%jfCpiGtYj#k%7&bmzx+?($N%sCsvC~|%u{R!v|=!isJ!oFfV)NAN}E@cRU zE6^p_oFBP_xwhX)_raN#sxi9{j}Jn%e1S7ACv&&4ZNLs!8+Z~w&bsxna`A$c-9Oc( zx{r=c8zNz+(joG9KZk4LbAl}X_3Oxhc~~SAV(cN;xVe5p6TA`>Pj8{si}Wimp@<*9 zqq!L^30`U7%STyRe6xKe>VVJ7=gOyTuQ0NpwbmGMW;J$jTglPUF)Jr0g0sv}5IS#f z+}sq{`_^Crqd)F|^B@stu3K;YoJH^5|KKIPsubLYD!9Zg0#VD#Dl<~hDC?DKR4=Q5C7_i*JgDr) z<^C;~;Wqje9{I3aQU}=O4E^A6!Aj$qJWDX*;ASx(pf?ooA7DPK7(mC~by!^{`9IDq z1hTm0T}XX3xHIP-nWo#9t@jp2#SuH7b6C_+fcL%y_t1qM%>h|f&u4QbA66$g4L(T@ zv=8mHcv25=ne^>T`lh;aVqM|jhW^^Xy*$_T1yh0AnK!a)muY!_^Rt$R2vxVqWOY@Q#`edEt)w>rRbzSV1UG6T+1fj~gsEabaLZ8KE; zyKaene>TWqHo){zCuSyu;TlUM!-W!BIGs~{?gtCtntwrHsrmu95{)L&?bCD~F?VKW zhMtQAHv|N?nhVZdWwgj`U1`rycNiGId%n~lOG%=b!I;R!WL4Fc`5QGl$g=TlorLPbmepqVpN1*TXqgM1dYiz|m6@ z61rf{Cqn+jtbtoQT|S-LKDgaH_!|5>^t4<4c3*}%J603g0O|m5>xWgX<~V@xpIKfm zyqsGTk1 z9s$*_9f#{qb8%g6^LK&+dU!D8vOpGU9tH=158%MmxHnj0*0#3VN=ix}3rH1&VRlxZ zN7pL1v!VqZ>k90s-9X~==gH{fcGd>7{2xSpru3Qx)1qatt?6`>IdtAjPJEN^$JF0Wq;yZ^Cm{<-#+vA2B$xk zBCRLTOgnqC8i|+j)I@ None: + table = TimeSeries( + { + "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "target": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + }, + target_name="target", + time_name="time", + feature_names=None, + ) + lag_plot = table.plot_lagplot(lag=1) + assert lag_plot == snapshot_png + + +def test_should_raise_if_column_contains_non_numerical_values() -> None: + table = TimeSeries( + { + "time": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "feature_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "target": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"], + }, + target_name="target", + time_name="time", + feature_names=None, + ) + with pytest.raises( + NonNumericColumnError, + match=( + r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThis time series target" + r" contains" + r" non-numerical columns." + ), + ): + table.plot_lagplot(2)