From d74b5fa2c73c20e158bc0bb2acbb77bc0d49025c Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 27 Oct 2023 08:53:40 +0200 Subject: [PATCH 01/22] Add names of metrics as attribute to output of score() --- R/score.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/score.R b/R/score.R index c43122cee..acf1dc4e8 100644 --- a/R/score.R +++ b/R/score.R @@ -129,6 +129,8 @@ score.scoringutils_binary <- function(data, metrics = metrics_binary, ...) { return() }, ...) + setattr(data, "metrics", names(metrics)) + return(data[]) } @@ -156,6 +158,8 @@ score.scoringutils_point <- function(data, metrics = metrics_point, ...) { return() }, ...) + setattr(data, "metrics", names(metrics)) + return(data[]) } @@ -187,6 +191,7 @@ score.scoringutils_sample <- function(data, metrics = metrics_sample, ...) { by = forecast_unit ] + setattr(data, "metrics", names(metrics)) return(data[]) } @@ -206,5 +211,7 @@ score.scoringutils_quantile <- function(data, metrics = NULL, ...) { ... ) + setattr(data, "metrics", metrics) + return(scores[]) } From f841380bdc1854b61ec16b5e299837e69f28c89a Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 27 Oct 2023 09:31:00 +0200 Subject: [PATCH 02/22] fix small issue with stored available metrics file --- data/metrics_sample.rda | Bin 9653 -> 7745 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data/metrics_sample.rda b/data/metrics_sample.rda index 0f174907bb4585eaec3702a7802f02e936b2b670..0ab2f8f1429b4ae43a7e4d8e3cb172541a1803bb 100644 GIT binary patch literal 7745 zcmV-H9=_p1T4*^jL0KkKS$13{UI27GfB*mg|NsC0|NsC0|NsC0|Nj4f#2>%^|NsAI z{HOol{y*R&9P~FAx6i%z)%5pYMf2p8VE`z$Y23mP3$~Mb*84CLdws(@;`Q%$v6s96 z000AZlu;mUb91%I1L>onC?0?w?g04o0rx0MA|{$9YG`FW8fZ@-(?dyu6Hi8_^qD5p zQyQV=GJcAAsy#HGr-co)r?FGPG-%ZEJtBUT0MY5Gqttqh8fXSbsP#QVMt~1dp|w1b zvOy$70F3|>5unmx6KZH;X^7Ourjd;%nrJZ^W|}n0dXG_~OiX~rr=%J*(+Fq)0001D z05oU-00E|oH6+at(9KP#0BAG-(9qBWKxhB}0002c000000000000000009a}rbQ#v zpOg{ehoUlMVl-l6VrhiP!ZK;6lrkD*!eBKp27)k5j3xqNGBNTf&(UV5hG-NVuO&d>AJxv+_0kt#%k)RD4Xa<=y z001-%Gyn}W&}aZ?04gdFnhBC>c#{$6CS;AEgF{12G-v}8)Mzx(qee!CMu2IcG5`%U z02%-Q000000002f=N@ns(JCkZiVukTm;$`8A)Qr#hD(I4<*Yys0Q95T{@}z@hMwbg zf-=wk3SceVD7k;@1|P_z>qrK`u9QIA-2or_s&~t ziet3SEp&OqL6ve~U=?|M1CJ)osA#CB^Vt2Y3D34BJ3`WiwGJGci#c^e*3{bT;{9ljge(BRrKUZO!6hwf$Mj2Xo zj-Z4si0}Gv7ZAtOH099-Q>B}}>#FXu5sMLsf#HLjjfn^v1ctu8*^IqC1x z2G_2_J&H%8YD8aSPzhY$A4^?mw-Y-!#x^B=? zxxk_lXwd))28pqw*^_RvNCm;~#B?eKP>gqCf?MP0R|T6Qv|!InRF<8(qX>wWNVY6p zI4uamty?6KVNz}fA{3BRy2TAfy@Hsk--rgJD5lqRNRqvD~8ZmMr ziK;M92g{2DFpO~xcq&BB0>1Myl7)(b47!CNm+verlGDa?TBsAKreIJMG~yM-WL09^ zp@;;UB_*W5RzV3c2kChP2ANm}D3tB!DxWih7~33PbG;8X+AkdCZd;8zplsLU*0Soa3CF*wtv} zxN*G;-1&qvt<8gofTEBz%qf3{sJ-`7#PrX2Pr2jA?8$fR3PpO zt7?l&QlN-{GUr}ot=jW1FK6bz!c9?FBLg-KVDrw0dWMNadt@RBmwXXK)nG&-%faF| zH(!~=j%*&-Yw;nP@>(T9TeFQ7HdGr5R<52Xo}1AM&l6wU#w-JEmvydGXs81ykSl67 z=15_L149V6vJxK^1d7jbqjPdZ9;Dcnj6hd)t_cw1h0Q5dg{oc(Ot2$y6PaQom#90L zutyWiIA(%NIpe2MKPrA?IcObZv`RJElND-^8kk5ID2X^_&i-(lyMl1El{ySNxZJFB z;ELGfFbtEWaN{c%w7z`)&LyaWue~cvxiFsY)40db^^%!#_qTIL8yPvVNT`f6_bttA z>Ur9S!$+_4Vs4jIrmqVYb`(-L@@p)$ID5NHC$MHpTI&&ow|iVTRo=v_SgNYSOPf{| zoET)PSJgxAQ$_8PFAIZyu41WlzaNFWl*yL@mdb3$hXu@%o^NWXNskm}!Fb0B=YMyH z$fE^UVMT2mdHG*17H$DYt zTC%4Jv#!jWHH5PYj42qXn#-#tZur#RxTZZu!#>_bX>H<$omtqMO-jRSXKG?fu`QA< ziH#PGs*-E0-&YYvyl+@1NwK!$8=VdPH&#fUwMC*P;Y8IqZH>|yBVVQnZ3AQmSXkr8 z?7|_D`B_q9RS-G-p3d2u8Niz~XrmpV=qwGtsPH&v$&e`MC5?*5-iM^P;_lfir9<0= z@R(UKrkRaW;zemx4@#l7v2vs=F6-{>bE3de*E>CadB$#9iNgv9puOQM!C`SQ zkjy1FLDU-?3=EX~NdkO0dgmUzUEX^98fgu~%F3iW+FQ<|yeGo#n)vXNBZRv6(m z>m7y+C79W=)xl|Q4)I?Rk4c3suxw05buF0T&Xl-H=7x(P9g&TSH#mdK`P}jn-zqJM zB5#^KIe;<}w1CrHj;MhFl#RZ-R-KRM;UzKV3k7CN>du#B&?F&!n}PP8>M00f;^qX0 zaO=4>24M(lb++b^Yt;&2Sb~c1K6x74RT!R zov`*t2(~b~jB#b3Z5 zE0NYKh1L~?(@583(rt7G9cw4b$ZA+BVZ&k?Y=&v5ZhR^E7B=#Vvqz#q2P!wn_0lGc zV&twS!;rz1{C?|;yrMy*dUqZrFpdKPm+F_dR_fbjiICFuu+xZJ?=2KfcWqL@VQ6Qh zr#O2XmEt$pqrD9c!o*oz_q-luw3kbw-d>|6wiX?KL-HfLlR{$6;+wr{`2ks>!=RaP z*T7h*H6)^$7K+YkhE$yy%9j##(a!o-wYk4ny@=7tt7pGvo%xrWTl@XDonJov5TX)+ z%OdXEN4)8@3fYWAFKbzckH$YuQ?zK&g5Lm=mdQaVTVxO#1# z=P`Ly?$Vr+Rw`jsMR+8DrLs_}ur#D(cEv^Wo;EZk&+~6$)8Js`+RBKxa%ze$V6j>) zV+m6&jX`f&4#Ui&fLRl01riFPM(%}y5x2-mEMp#LST-`0~9S!;^@PhUw?+um7PoQQ?AJE1F(D#gs&I))ck#FjY zvYnEv(vC4xlLvPXR;lV$)9Hbcf`#@Z(&s#%Jet8x6Jx$PU_Rt-9@Y@Xc}d+z-DTzK z(f1s45(I>t;L~!@udU<8dg5-O$42j*9Pyg@+}Jo4UiYaCZdtmW&KbD;qaFVvcaj^% zM;zw(e!+UP*KI%K-dPI+-{A(BYt6u5kLmDU%7BMFw5@Zlmm1j&Gmp6;c67RVLk9^4(j% z4Y)Th1F^&!C?SBMiZD32stOt-6;4v2KsOUUY$#mZ4Vs_xhRmr_8Y~u;$uOj0NX<+r zarK2Q9c_lKnOQPf4E4=taEDNapvd7@Lu?J8B3I~NgQ39ZDaimF=~G7)%(6#yN*ov@((jCeou45pQmlctgY@SUa0 zsyE__DNb$Ru=U17i9#l#1L{YFer0zoGOK;GYPGh5TH8jVs@B?dRj%=)QB+#rTAFE&LolLZSfW~L+ig0VTGOex z?ZDV}A&7`lX$JC$@|HgRNP}R6ltOAv8~R#yCKz?xM-|Ow@a%2(xFqFoC##1cO^VxC zYX+E$)&0lsa&Vsb?|wJO?rOo^_`Yz@{a*2_DWZxiJ~zyN4R%>gG}Cs=mkY2JPP?9 zK<*<+0ycHyB}5y}hR$O+8PKfUv(|UUvu@As__{K@#WD+mr*DY4RC2G%|t;|>Vv%(L)f9DjDpIiRN`W1E4k zj{wkUKEEbr(XG(CTPH$25a@?|9TB`WG%k{D>=D5x%$Ok#Hgh@#tA{o5Y z`L?r~a_sb?n-J3wG2N8+4f)PC&DoJN4$#^aOgW+|-H`=hwR#DrH}S&5mWI^t(_S|# zt~Lh_Q^C9ogwevtKsJX-AsT&jb%s#&tBrHf?~&sx6*LvKXapVKpRSSW9>Q{)a&QDg z30C8PIS3JyVlF@>3V}c*A%TLYl~Y)X+6NIsFdP6fRay}#Ml%I6P-sD+5`;;BS%o74 zFaV(fGZ!ix1Obp6B)~H;R+S16plLup>+sZYU$D;prk8aoS$Qgm3Id8IQkChqQ4G+_ zFj5q;OjjJA{M17c7%7!n~T zVoXAYmLMVkq?-(L3~`_)2q3PE%^FGukO~Dr1}YS3X;x~OK~OT~AOLj;qcqAYr3xa| z0ZmefDWMangTQq7C#2f}rGFk37lesU^XFr8ze$66E*jprsZCMf z9R!AjB1ND|C_th~0HI<6fN2E-t-zuwutE!=j;^9iRXB?TCW6e=5~L-nlMsZGs4)aV zBrvH(0R>4ELj=PF5fo8G&?wPBOuc$sl_nLLnTUvFr7aPPQG``hD1!o_l>nnbNC{Mm zA&pEx2M9n61dtdBiAI22fKkQ}5(yAF(H0<*iK0=0blGMk!4m~arXrC>3??Fjsj-5F zLW(Kf?5>T2D#)cMyD>6qG!O!R)Q-aevalOMx+st`5XP8v1pvSSs9a+pS1FMbqS%PU z(+jjCuxvpAnwxAg1klc#rh(IKxM|VSayTj5q*r8uO&B}M2bq(EgSr;-rr{oId_~a$ zb*S8pr8yAMhz2rGccpL37=x$+{v|O>s^(0r@VTwoHyj?ZS^94a zIsBVnp70Y;$ZfZEA-WXFIV99^TIU%;1&HZ%{r@Oo3NXWhX^Rj9I12d?k_aSdMb(vz z45?!~2qZrh$hDF>19^xDW{ahzq~(Ny%|R%pjL<^L0}5egm>>t$vL<9s$W2aVJc?tV zMi9}XMlfoWQs%LhjN4?(9CFRV&0zk9#sUECDYWmHtb?9G26Tx+^!ecS`t5Hw+j~wu z%V6DvdJ0fV9r6&^x|a50h+Ihk>e#Y?ElW~sS;CvE?pf`q53XvtP8~Bztz=B~3-0yW z1V0Me2&bI)Y*6*F&4T{;)G zl-mI!u>gY)%a(+mBb5vcK^U8h>0o1s4l_3$H!YlzWpfXpl1Ax4iHWV|5hQ=~&MnJ6>t;Hv?S=-Z8|AbdeELjDFNbo0Qo z8ZtvP=&40@dlwTd^cgvUv{VyYW1hoj!BOY*47h`waqXZ;RP4qq%p-N0=e@gwmBVmV zBp}_MPN{0NqWcS)TTYjY)x`iZ(ME+x=_ObdixUQ?S6jH^-U4MY(ZuN64xYpvK{72t z5jqLr_eU1xtTojX6?eL`JlyI>A+3axZa8bYYNHr{S1_)5yaY;~7dZv2QFkpq7$E^9AipFLclOSdqml55CNnqa-pz*i+lZ!>k zXsELHW28%eHRU@fu!TUw>5c;?Os$QBR&hmNX*qFLjq{_4&iibTFXQYI)vIn`T5{%w z#z@Tw<;x_I$pR~vatEH96bCJz1J|d6em9y^!0LfN1@O9(1WOiMxpczT5)8mjh#eTJ zdC@TDa6qdi7_vepj$%7~_Y3dtg|p4OLPlcla#NA{Sm&`cK_p70h6Seqg|mTk6-Jyh z0wxfo;x9s41t4>36APRRXj%k;FAaep;JgNAMTu(=2tBSt5fmhiP*@O5gbcu(Roi^Y zg2`-5N`S`0iDC+2tCGFC!%Een&u(!(v0o^u4v7z=bx zk!TePU>aUMDuKj7i6DX^kff4iX_GP;kjX+|E4yR|ge=k{rHT1Dy>F519&QkpW*wQC zflO-xteKlhb@w=Z5D_-c>S;6zKuH<}6cj6oBJ^7#xrfA9yNCif3mCX3`n&@Qcr!z+QIn z0@{dj^O2&ps)@{kz{T55vz&u{got3A2EZo}nogfTo$X!+Qe#~4f?DM|wgaD<8~1B-!xU^$7aL8BZn#p|#cAwY8G6g%3DTI>D)vGcmG#O*9F z!4Dp6G&!zm>l-9Vt!N4i(CgRpvqWwlyBsCtfa-hBYEEBJc=a?1&{9=s5+ELmauIR? z?9u>gL=%N#7=ZAEKn&}Uog|A?E~P{4rA!E7Wo06ODhdD^NJ3>mm{FkwC<;|YKbN&1 z0R7YdO{ETZAncghgIh@Sx8KVJ$D#VQT9^a+oqhx;>8|#qaiqn`6&_x|v4@;{!=?No zZREGth2C-YEq^j2B17;a31r71y(QPAa#}vVv24#TPak8*?PSaGB*6Cmvc9gL+3fuf zmj`n?Wo36g5Mo=J2cl3ISEYJrbMNeYovdRhjt|`+gUkyB#&0dv%tVYRuX-65{P^CC ztio00n*+s%lhj6UR-*&@2Sq=Q^VGHu=lQ#!a|p#Illo+$cxD$wJb$V{;;#yggUFNB zZz>qkb8W_r^u{7#c?&i_>bU<;qJ1a%+1`lSR4T6Acn(M?iY@t%P-0xNEe%n#ec02> z=J)Fyz|fLD|2JnP_hEMXk)BsA{>Fn?$Z$x_g}`~}T-;tZ@}j zCcICKCoT-MwtcUM38#Zu1coV|2b!*NOZo0o*-cTXH5e!%F#{tqu*M9-2qCeWi7*<- z%FIkEgDhN}%vd2J!bu_pfJqV#`iatctT4*^jL0KkKS@{GR5CEClfB*mg|NsC0|NsC0|L_0*|NsC0?|gmx|NsC0 z|NsAg{r})B{_^%d^XH`Zk6(9+ZS0L2`h98|>}PcD(CTJ)XLL4=(SnxEPqwGN`rO|= zfG=b1*9rHcy5{$K_qV&;@aaNGzp4$j7`p)#6mCL4=`1f=Lq~6DB|i$R9&AfQ2N9r8cA0N9qkvQ`1vG4?!>nga@c;riM(KU;qF^CS;m2Gyn#GU`&}X zm_{^41ZZHG0$>rKG-zTrqtXOQAtpsnW~zQv{U?<4$W5wgw2hM_8ZiwtWIRx9G@39^ z(jd@&shUBiYClvOXaE4&Kn5TU18Op47=Qt$Kn*kv002}S?B$G5|CNfHWSU9;3)Xpa26;Pyho!00000007Vc58`S7g+eHg{Zs?V|LO`M zejxOb3X%om>=12O06V1@h+c$x|6nme;xzq-VKSVs`JiwSd!(!Qvk}S3fOPOuo1gUz zu|K*Ei+~dl!BxKmFo~;(WdnIAh-mG zTp1vVixDyzW{p9hc|MSp+BCg$z+Vg;0x0Sv)FVmT>hv8&UKRmo&TVO@38%STJ-y}9 z%$beNb_S^9iEbwy*HjSO+unrV-?WyYwRdAy&_RuZ39v~kwhq?TN=p=-IWfwJxY{2h zG2T$ob6oi;4(i@9DuV4crKnc&oy1g*G=^eUe#e&G5&dt3>&|vCor=u#9d3Vq{y$4& z!)&e1)!%v^lQbQZ9DonnXExTCE8bi_ak_+q;7B?^P>5S;Aixk~1XYnhAqa%VsA0)i zN@O_-!y!0TgSc1^V;F3@T`l&2n! z*QD<Ew^OTe$i*Ox=IrPq%ipw>einCyGB2GPfu1acUwcFH=A$%NS-;RflP-N{FIPsSa8TIu~9NI4-YfX`|J22>;aBB}d`)k!+ zvG)U9djZHgq1FT6IU)Fi;t!wkgDqz)Ie=)u?t|`!?_yNgAbY~9hjJu}i3$o9K!8rq z?9S=2+Z!JvL%Zb0*oLH@lZ=srk~GfDaG)Sg1GP$vB@=2SmkiK^ff}757?0y@xKDA$ zc?i+y=eStLX*ABEFzM3Aa8#hi#WUgpI=6oWsjh&VdX)1-U_twEqkUzy;19}IaN{POGItn-zbi{|Oz zX(s}_(sk-F3gsOy*O+kgxR(RWQIC&bpyEKN6gv2Gl4N5j-7`ev zhO}prk$Xucjpyx}GQp;u&c#Omw}HlP$&SlYuj~6N{Zx}QMOx{vmKOgzTRhg>@uU3c z^qR)hadok?A28yX1`ElDQq0d|h}_k`BHWm;#hN-JwK2Xu#G@RO0+##aB!_~{Zp>9e zVWb+ts~9CN$ImR!Y_!>u+9FYX>Ninu9r5da7;ZH6R5ncw{=ckNRI_&qC?9n4p#Aac|>ZC7pgr`uVPm+ zN_Fb!b26!E8gm&tT5rx=;50kG4dJ#K_z{~#ljNf=5F85twt3y(CVbhFoaPB-Wl-(+ zo-PL^(PXSy%7Nw1GCvuS47Jk}WO5~13WVwvGK)7kn4Fc$y+~8XIdr7CU@{1~28$y2 zwv-eK4vG`d(*uOtypxi+=0gP}7ZBOeE5Q^XMcBgtyy=YCzcvL7Dwo*+zc;Uyw zPnhw2Cu{Ywq}Q%ZTP;N?h7An$RZ&w2tY<99r7=npLbOTzi%l*T2^)2l+`>>c?Y7%9 z()XMCa9#s`$9!uHCXM+rHB8rWU9&(JtVa%x_b+FYb! zOG71J1#~Ob0<#~pS0uOUVB=M=lv_JWgeHN)KvM=D>_;9)wvq%EXR!kf*P2NqJvwCX zV}i9*9Gp>G#wez`rpYYUO9Bm3DN0>4QsIhbc?xBgcvDQ!AlUml-lu0+&L66hwi-gh zvcZbQfP~bri-;yk z3^5ADT&RU&IS_Ussn1%pG%pGR41^Oe8{*D^j9X-_6q;)!zDQVZoQ3FKdGk)w6ErcD z#jam+4nWZ+qKnL=e3@sw4=Bakq$gVQD%qVe5mA_En$H8HPz zhW4DNQ^dWau*B|GKIV4Q2Fe~KGIm&ZQi{8aq2y%3P72|T%(n{vByvSkq=mkCL+#j( zDG^4oH}AvQLn+cCt&SP1!(?Ncf-z=3IiD#{#WT;##Vk|<6tD@FF1dpUcu8tvIaDni z>P8L>auI_P9p9 zuT5i`ty*VGe5Y|5Mm&p55-w8)JL%)Ou#Y=8idHUWKEpS$lz?7VTwE^Qe13BL1o^!Z z9Ld)I95^%^X4-X?93^;k_jdEaVZt~tw?#5ANy#i!n*^W?mEnW7lxhRE*HY!uw44oV zp$5{GiPVeLP@?|@tsa!~3>2+fNN0ao(N|k!?p=;|S;*(g*lc!S@c~o322gOkKgix~ zGxRe;MbLuCA>g8zVlA`2r|@pgk2wCRnvG>$zJnd_SE zq2^tC_8pPPEZU;nA2UI|0;#Nw*vcJ6fy6%lY|1oJc8`kP}kwg})@wS_86oH!U%?XoZB-Iq5i8xJ$jEBo=QToy2kq5K_K`1N6? zgU;ACaj7uT@t>ikV^@-MzK`5ZUdgvnQyzhX2HR?WVS}ZG$;BQDI2cA&9K+yGy&<>P zEzyF*UYP)f`#F5mB8PwXW$5^v)`T3$PXMJSCX+z z)s3-D-UN2%~-;OM18+B>ZoA-ck+#C!0Bu#`=!we}~2>ru`ejZL#Xuqr-7E2IVnCO3fh%=bbKv;l z6kajFozfbBHr!ddPMF<~W{kEY7^F$2a}qolfjHO8V1}6$C?I%jvKBH#!bUEXj0B!am9X%pg6b34)sd8|IFprQY>P#)=PAa^rnYc;UK76A$4)cyUllW=A{yu@ z+WE+*IUHCYy&hTw-#z>51vDKID@g;4fD$AjB9jb_Q9?-|RZNnVBnU{f$kc=Z1PDyB zw7?Mo0Wl1;O+rwSNi_r%(nV1zQ4~;A#YIz55>P@XuQ#D2RD?vt(1w2}9%Q2Eb*wf} zWJaNglBrOnsZF||IPR(4#kUi2;5$47Ac7#6iKLowcmbLs3R033L2r^p6)h3E+CUj^ zDUTQZ=UPRQHtY*XwIeD;A?cwx$W2I_<2M=AL#abhWN~WFZm~9&BoCc- zO)dvKj`=|6pc)0y!j9va9CuGX+WFVgcMPtk|I3FVhb$bi&5lqQ0t9`_$JoS)=p5cv znP?&Wxi9yapinYQCFCI?2=u;X?kb-Qso@>jgWd3SbQm`nVd)%sWw8_D!Fw);HmIVh z&T91YtVL18h_7veA%r4L_<^@4LczzR>*~_g)6Zzs)y0{&Uel_0YAUUDeHx0YrZ^de5fc(c6}J64s@L4;)KyyI)2XUsd6-cVF)UFnTWzOP zYg%h(Q6zixSlQ_%Zf){|lJZZ+dO@%`&mQ9Se1cb_HBs=EELO*GSP z3oW#?z7W6YkL&mz`|r=I`rXs@eg8La@%|YnPSOYHEIZnVb;FBY%E?21@S58mMHe}h;=huf8m!JA_wfFmP6Sny}f1{ZaC*S_Rf5(Xu zOb|Lz_?lb)-`?Mf5PR9Z_DmMrRW#LARaGbG`O%NZL;pPQYegI0t7MWoP7GZPcjeV< z_e%2Av@FU5&8?L?I+Zu2sZ^>}Di&7tY87ggWeFYIZP+@#dD%4h4xb$Yp+GdHDvtH= zP^|6OubTRs_-IC0z#g;oJQK~b>ahYqppx}FCFPwWPo>fFp8FoNhHl)xyPFPukC?&b zS7idGtJyQh@Uk{~oatG5+P((1R=-5k_ixmBz4#ODje`gqCFQd9`-8?jk~4aHU+e>bE)VSqBjxrV<%FRS zKV%<=K_E+yA^PBeAPVq)98Ni(A2{b?Kp$W_hOLVA1IZqsdeQ1mLBqJbP`+aB(Tgrh@HPP2ITZm`yWeS6Sq9>(9CGKIcB@=eD|%+0-HSDu=r|E__ z8R@4+d;!rNgltB{9V8w)*s4Y$umPM89M)~eA`l>K&HzAi_pkE9q#VVgT*P|_`f5so zH;0h+sb#MWKLBQXGz(L#KNY#5*alCh0c^VT{0*2+Ccn#^?rNN~2!rt2*{zZVNRR@| zMKV!iD8v&W29Z&vK!k8{5I=lPVglMz5d$$yQ8ENlg<2FNF+j;dpaG#krCEg;foB47 z3uJ|6NSR4$84wCVa;6EHf(-~XAW}3XAP0Bx$}RwP-02a}JC&tK4djJVks{Qq7Rneh zMiGMq#fq>})ev3tasV1Omt~j;gpyK<3JL<2hcLvLq9Rx#5aABAL=?6Gfe924kr5*h z%ot;lGz38ziH;;z!z3+C%uHr=)MHFVSZYIJ1lR~kCD_Lhm@aUfNYup41{s-D zWTgNAiAbUe5-CW9BGENOfTEckWTGyi5K1bFi4Xv3qK2sj71D|1VCg)D1Jheb(!NF! z1mcA#H&2PTACV3Y=`|-56m1(ym4$_oAl-rkERsk=%7H=wl29ZhP#9E!p-CZ8h6V^J zN>NybB!s9LNRmW`5LGOJ#44qQNGd5oh=yt)l_r^1gb*kSP-0OEK#5L9RLH4FqM2fv zA_yv`2$+&02`Z|jCWa7DsE{VArl5%;YN(}ieQF?Mk#7aL3BtAN{&cu zL>pm=5D_Url}uC)*tCTqMI;o#K@!YT6j4k?Ng+Z~tVo1NGDQ&3g)LA-kx5WR5ekCp zdFUL0R*(UtLs*31TSdeN|NIhoAWJsFIl0y=hWf9#v z0(7Flj}ZzGM(~G1Q6?-k7BwuAQpD6$m{1%fQiTi%Gz7Yh41&0U7zB+91OzKW4uTxg znFuI8GqZTjB-krPjdX-+DK?pjqtBlpc`ntucBgLXobE&W2n-yoz$6BPnx5VOvl zhlBO(`QC`PGgfI~#J+E+UgAS1_s^O*iDKkt#afOi?>;uU%CtiW`wq9W;heJFVA`fh zFvuBKLxvN9hZ>P$%Eip6V>V#wwUotfFNj)1F2!SH40rb zBKqI@oB{jB@fl$c(>ikB$`<1<~YRX%^(pd4G9)!(sB5ZczJTi^Sqi6re=A|S~b0JuR{hjYc#31vqQ z;_Y_SpBj5Lur+DkZ>)~D-f|(VYe_+gSQg1r9+Q!@NQF!jfI2h1&+9x#h?zl@j9kFX zqyp`NovfSVsfDefGRdd8do~^2o3fPAb6H8n4upq8TvnAe*s0>Us{Ri-lo1Bo#{VvbPnQM;7figLO?sD4_YGfF%trsVFce^Sj<(!KYlqa=X2;>#}lQx zdxCjfZZnRhk;q1&V|RXBHV$vE%8`WI0}}y&9d{p-PQaGT0>OQtBrNwV_OyOz@B}Hh6iJ2fsA~F(2?M_xvuOMZh zp@LJZZr%+V&rMdb48mld&g0^yyg=D)H@BaB5PPvOC*sD@a-qj8G>S5H#R*=LLuD~B zocfKo8;XODq7Olp<#af)QQPN{*$4rbT0oF)20(Gk3b=D6rIICPvdiWdG2+(n*eZ~c zH)rUa#d2W`ig9|#gTY>1V;vue=|e1tG*a-NZACCSB(PvIvRDIibQFm1g71r+zVa;Sme&)#>8`4)f@)9_n>PYS{z-&*)tJkk;Y&$`~MFi9w~QcJ4jAe zj)l%iEZ6f{+KUnmK!mTzv4;n6+*o8TU8yoMAi`nI!>4Q)ywLC-jmAjWvNCx%nQtS7 z+}e2iB0$3`3IcIcGf+u*;qC_+9X)5r^r@^&%*C?Ew(nCB=oPrW3BEbqWXN-lr%%l~ z9p@ahQWiXAZT(ViN{#@>aiWq9OD8aeP|n@XyQ67Wp1Nj zF9?(0mD^^*0Jfz|GJhKgGd@eQz#%3n6phZ<{Gi*!!G*_PBmxFHWqAA>TpI6`M|#22 zK`$c`tR4;v5)df8mhO$W+Q5;e1j#&BhTvV!s3t}XlNNC}NhI>s5rr&K19k)ig7_0^ zLCVs(eW8ROho~AzgaN|NT8=J*QZS4%g9NS(W558Aj01h~83d^uA%Wn7codG8LJ7iD zYKU{h<4VAk#BuO8i=3RxJl2tOZ*#lHaPS2IT@!FnAkbU1MC_f}vCLyoG|&=4cuEq< zjRrm3iCd7TaM_Y7J1%-Qey;my@44wytWJ7#2qV6!By~3pOC0MXmrf}is6uuRujaY! zfhS8zI`PQeh+_hHnyaxB2AvkoU{2y;24)c9BTjs8rdaNB?A@{sX2SwSbvopfdnsmM zpD^hRE}@@-XSfU)fyV$uB?p59#gtDWF&2BpFq{h16N*#~8M+`-fU7b=48;LLK~V@y z447myA(DKhj8g&1n&Di7p7^_B7jM39k0>?v9IhmP|p_NjTE|X9OEetu2 zQK>gvg40?je6u`k6&u5VOdwsNAfZH5+1rgtj8u_1<`*(cUSmeJv zPbFX!A3HfG>)s33e2&!x&S{RYSwY4SOP$LMp|Z4Y)j9fFue=BxI5l2oTER zsB5jm-|yfLOei}vVh;d_Y2HA=0-43ShdQQ7!kEc0sVo_iVoYSmT+ZKB&bJDK6EW&w(r?e7S`#-(^!MTDaN+;l zr;Xd6QX!RugP9mJ5FFzq1q5&>PB=7aD$M^HXw8~NqN0~&Ok}}WrUo()7&wl0R&&7U zbm&YQsMDf!l%le2G)yHF+Qu?6OlCrr{Q4R+Mk9wKWrm5aF*75!yDLT1T+B$5Mw38h z{b)8_wPG-6FbME^+r8Lk-mIJ2tFdb_HHuuZjL%1maoxI_RI@S!q4>v4rYQ0}>j5E% zfkLX1c2;7bnNc0pL~3p1Cd&bMd2PYg<88{wyt0Hr4G$IqsG8ji$pSS<=#?H=0LmB% zh$xv-pdx@O3PA!Sk!A%+l>&+oQkA4Z2YDg_6EkMHa-r&%coTLsddJEGphx{0e9%Z2QLj&U+g<&un=-e#`T0 zqj0;Bl1Y9{e~NQ>5fNFT$0CozCc(>%2#eZm9{zIm-fl$AcQ5T_S#s#ji8l`C)$Q#! zv-O`P4YpK%26o)e7M8!s<@LROk5<#;N~} zyQN4bIyI0q%?-o0g`Z4>A+`KjenE|bj1oEeAYc0 zZ0)b}X-^|cA|2h`^S3xnytOjr-S541uV9WTEk5NLw6NctMV1b3>1X6!QNk!%e(J}Pa=1@QkGz+?dY7hC`0u(-%Rce=YCmtCWwn~-R^H|*|?@?&6r03 z(iweu=KMR__HFUV-cI^G4f&Es{i)|8?{#RA`Zo~ia=!Kdh0T_E-){#wli+^m=j;?+ zhn|UpYM4&5bhRj2*sGclP*n17!Shpt5AHsowp zn3oVRjLO1;G7Ag>7{>{~h>HY>jLI^I$)g;zG02R@j2wzAix&WhsHBSslqLv(AskFz rqjte#8D%2~!q6 Date: Fri, 27 Oct 2023 13:47:42 +0200 Subject: [PATCH 03/22] set attributes in `score()`, add functionality to preserve attributes even if they get lost in data.table operations --- R/get_-functions.R | 24 ++++++++++++++++++++++++ R/score.R | 12 ++++++++---- R/summarise_scores.R | 17 +++++++++++++++++ R/utils.R | 17 +++++++++++++++++ man/assign_attributes.Rd | 25 +++++++++++++++++++++++++ man/get_scoringutils_attributes.Rd | 18 ++++++++++++++++++ 6 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 man/assign_attributes.Rd create mode 100644 man/get_scoringutils_attributes.Rd diff --git a/R/get_-functions.R b/R/get_-functions.R index f17f79f2f..dad5da1f2 100644 --- a/R/get_-functions.R +++ b/R/get_-functions.R @@ -215,3 +215,27 @@ get_duplicate_forecasts <- function(data, forecast_unit = NULL) { out[, scoringutils_InternalDuplicateCheck := NULL] return(out[]) } + + +#' @title Get a list of all attributes of a scoringutils object +#' +#' @param object A object of class `scoringutils_` +#' +#' @return A named list with the attributes of that object. +#' @keywords internal +get_scoringutils_attributes <- function(object) { + possible_attributes <- c( + "by", + "forecast_unit", + "forecast_type", + "metric_names", + "messages", + "warnings" + ) + + attr_list <- list() + for (attr_name in possible_attributes) { + attr_list[[attr_name]] <- attr(object, attr_name) + } + return(attr_list) +} diff --git a/R/score.R b/R/score.R index acf1dc4e8..4ba62e229 100644 --- a/R/score.R +++ b/R/score.R @@ -129,7 +129,7 @@ score.scoringutils_binary <- function(data, metrics = metrics_binary, ...) { return() }, ...) - setattr(data, "metrics", names(metrics)) + setattr(data, "metric_names", names(metrics)) return(data[]) @@ -158,7 +158,7 @@ score.scoringutils_point <- function(data, metrics = metrics_point, ...) { return() }, ...) - setattr(data, "metrics", names(metrics)) + setattr(data, "metric_names", names(metrics)) return(data[]) } @@ -191,7 +191,7 @@ score.scoringutils_sample <- function(data, metrics = metrics_sample, ...) { by = forecast_unit ] - setattr(data, "metrics", names(metrics)) + setattr(data, "metric_names", names(metrics)) return(data[]) } @@ -211,7 +211,11 @@ score.scoringutils_quantile <- function(data, metrics = NULL, ...) { ... ) - setattr(data, "metrics", metrics) + setattr(scores, "metric_names", metrics) + # manual hack to make sure that the correct attributes are there. + setattr(scores, "forecast_unit", forecast_unit) + setattr(scores, "forecast_type", "quantile") + scores <- new_scoringutils(scores, "scoringutils_quantile") return(scores[]) } diff --git a/R/summarise_scores.R b/R/summarise_scores.R index 5283a8061..9da825fe9 100644 --- a/R/summarise_scores.R +++ b/R/summarise_scores.R @@ -103,6 +103,14 @@ summarise_scores <- function(scores, stop("You cannot specify both 'across' and 'by'. Please choose one.") } + if (is.null(attr(scores, "metric_names"))) { + stop("`scores` needs to have an attribute `metric_names` with the names of + the metrics that were used for scoring.") + } + + # store attributes as they may be dropped in data.table operations + stored_attributes <- get_scoringutils_attributes(scores) + # preparations --------------------------------------------------------------- # get unit of a single forecast forecast_unit <- get_forecast_unit(scores) @@ -185,6 +193,8 @@ summarise_scores <- function(scores, scores[, "quantile_coverage" := NULL] } + scores <- assign_attributes(scores, stored_attributes) + return(scores[]) } @@ -289,6 +299,9 @@ check_summary_params <- function(scores, add_coverage <- function(scores, by, ranges = c(50, 90)) { + + stored_attributes <- get_scoringutils_attributes(scores) + summarised_scores <- summarise_scores( scores, by = c(by, "range") @@ -310,5 +323,9 @@ add_coverage <- function(scores, ) scores_with_coverage <- merge(scores, coverages, by = by) + scores_with_coverage <- assign_attributes( + scores_with_coverage, stored_attributes + ) return(scores_with_coverage[]) } + diff --git a/R/utils.R b/R/utils.R index d0e077c68..22c5dd7a9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -131,3 +131,20 @@ filter_function_args <- function(fun, args) { } } + +#' @title Assign attributes to an object from a named list +#' +#' Every list item will be made an attribute of the object. +#' @param object An object to assign attributes to +#' @param attribute_list A named list of attributes to assign to the object. +#' +#' @return The object with new attributes according to the contents of +#' `attribute_list` +#' @keywords internal +assign_attributes <- function(object, attribute_list) { + for (i in seq_along(attribute_list)) { + setattr(object, names(attribute_list)[i], attribute_list[[i]]) + } + return(object) +} + diff --git a/man/assign_attributes.Rd b/man/assign_attributes.Rd new file mode 100644 index 000000000..f9423bab9 --- /dev/null +++ b/man/assign_attributes.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{assign_attributes} +\alias{assign_attributes} +\title{Assign attributes to an object from a named list + +Every list item will be made an attribute of the object.} +\usage{ +assign_attributes(object, attribute_list) +} +\arguments{ +\item{object}{An object to assign attributes to} + +\item{attribute_list}{A named list of attributes to assign to the object.} +} +\value{ +The object with new attributes according to the contents of +\code{attribute_list} +} +\description{ +Assign attributes to an object from a named list + +Every list item will be made an attribute of the object. +} +\keyword{internal} diff --git a/man/get_scoringutils_attributes.Rd b/man/get_scoringutils_attributes.Rd new file mode 100644 index 000000000..82ce6dcbe --- /dev/null +++ b/man/get_scoringutils_attributes.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_-functions.R +\name{get_scoringutils_attributes} +\alias{get_scoringutils_attributes} +\title{Get a list of all attributes of a scoringutils object} +\usage{ +get_scoringutils_attributes(object) +} +\arguments{ +\item{object}{A object of class \code{scoringutils_}} +} +\value{ +A named list with the attributes of that object. +} +\description{ +Get a list of all attributes of a scoringutils object +} +\keyword{internal} From b753f322bac0d9d2ac3599b860678e03223df677 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 27 Oct 2023 14:08:23 +0200 Subject: [PATCH 04/22] update check_columns_present to check for character input --- R/check-input-helpers.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/check-input-helpers.R b/R/check-input-helpers.R index 8d4aed213..335e8d34d 100644 --- a/R/check-input-helpers.R +++ b/R/check-input-helpers.R @@ -327,6 +327,10 @@ check_duplicates <- function(data, forecast_unit = NULL) { #' #' @keywords check-inputs check_columns_present <- function(data, columns) { + if (is.null(columns)) { + return(TRUE) + } + assert_character(columns, min.len = 1) colnames <- colnames(data) for (x in columns){ if (!(x %in% colnames)) { From 4e14cc671e444acc90598876dac37247475bc735 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 27 Oct 2023 14:40:52 +0200 Subject: [PATCH 05/22] Update tests to check_columns_present --- tests/testthat/test-check_forecasts.R | 2 +- tests/testthat/test-summarise_scores.R | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-check_forecasts.R b/tests/testthat/test-check_forecasts.R index eeedb8936..ece7ff644 100644 --- a/tests/testthat/test-check_forecasts.R +++ b/tests/testthat/test-check_forecasts.R @@ -10,7 +10,7 @@ test_that("validate() function has an error for empty data.frame", { test_that("check_columns_present() works", { expect_equal( check_columns_present(example_quantile, c("observed", "predicted", "nop")), - "Data needs to have a column called 'nop'" + "Column 'nop' not found in data" ) expect_true( check_columns_present(example_quantile, c("observed", "predicted")) diff --git a/tests/testthat/test-summarise_scores.R b/tests/testthat/test-summarise_scores.R index 6e6f8b977..4b4f092db 100644 --- a/tests/testthat/test-summarise_scores.R +++ b/tests/testthat/test-summarise_scores.R @@ -17,13 +17,15 @@ test_that("summarise_scores() works without any arguments", { test_that("summarise_scores() handles wrong by argument well", { - expect_error(summarise_scores(scores, by = "not_present"), - "The following items in `by` are notvalid column names of the data: 'not_present'. Check and run `summarise_scores()` again", # nolint + expect_error( + summarise_scores(scores, by = "not_present"), + "Column 'not_present' not found in data.", # nolint fixed = TRUE ) - expect_error(summarise_scores(scores, by = "sample_id"), - "The following items in `by` are notvalid column names of the data: 'sample_id'. Check and run `summarise_scores()` again", # nolint + expect_error( + summarise_scores(scores, by = "sample_id"), + "Column 'sample_id' not found in data.", fixed = TRUE ) }) From a7004e09ceec9c4ba350b8d8f90530da720b94b9 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 27 Oct 2023 14:41:10 +0200 Subject: [PATCH 06/22] Small fix to `assign_attributes()` --- R/utils.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/utils.R b/R/utils.R index 22c5dd7a9..52be63fd8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -142,6 +142,9 @@ filter_function_args <- function(fun, args) { #' `attribute_list` #' @keywords internal assign_attributes <- function(object, attribute_list) { + if (is.null(object)) { + return(NULL) + } for (i in seq_along(attribute_list)) { setattr(object, names(attribute_list)[i], attribute_list[[i]]) } From 66a41a1045b26be58cad84ce92715431afba85d0 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 27 Oct 2023 14:41:56 +0200 Subject: [PATCH 07/22] Split out function `add_pairwise_comparison()` from `summarise_scores()` --- R/summarise_scores.R | 75 +++++++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/R/summarise_scores.R b/R/summarise_scores.R index 9da825fe9..bc5e3c782 100644 --- a/R/summarise_scores.R +++ b/R/summarise_scores.R @@ -134,13 +134,7 @@ summarise_scores <- function(scores, } # check input arguments and check whether relative skill can be computed - relative_skill <- check_summary_params( - scores = scores, - by = by, - relative_skill = relative_skill, - baseline = baseline, - metric = relative_skill_metric - ) + assert(check_columns_present(scores, by)) # get all available metrics to determine names of columns to summarise over cols_to_summarise <- paste0(available_metrics(), collapse = "|") @@ -155,26 +149,14 @@ summarise_scores <- function(scores, # do pairwise comparisons ---------------------------------------------------- if (relative_skill) { - pairwise <- pairwise_comparison( + + scores <- add_pairwise_comparison( scores = scores, - metric = relative_skill_metric, - baseline = baseline, - by = by + by = by, + relative_skill = relative_skill, + relative_skill_metric = relative_skill_metric, + baseline = baseline ) - - if (!is.null(pairwise)) { - # delete unnecessary columns - pairwise[, c( - "compare_against", "mean_scores_ratio", - "pval", "adj_pval" - ) := NULL] - pairwise <- unique(pairwise) - - # merge back - scores <- merge( - scores, pairwise, all.x = TRUE, by = get_forecast_unit(pairwise) - ) - } } # summarise scores ----------------------------------------------------------- @@ -204,6 +186,49 @@ summarise_scores <- function(scores, summarize_scores <- summarise_scores +add_pairwise_comparison <- function(scores, + by = NULL, + relative_skill = FALSE, + relative_skill_metric = "auto", + baseline = NULL) { + # check input arguments and check whether relative skill can be computed + relative_skill <- check_summary_params( + scores = scores, + by = by, + relative_skill = relative_skill, + baseline = baseline, + metric = relative_skill_metric + ) + + # do pairwise comparisons ---------------------------------------------------- + if (relative_skill) { + pairwise <- pairwise_comparison( + scores = scores, + metric = relative_skill_metric, + baseline = baseline, + by = by + ) + + if (!is.null(pairwise)) { + # delete unnecessary columns + pairwise[, c( + "compare_against", "mean_scores_ratio", + "pval", "adj_pval" + ) := NULL] + pairwise <- unique(pairwise) + + # merge back + scores <- merge( + scores, pairwise, all.x = TRUE, by = get_forecast_unit(pairwise) + ) + } + } + return(scores) +} + + + + #' @title Check input parameters for [summarise_scores()] #' #' @description A helper function to check the input parameters for From 6cd3cf9be66a5ca67011ae209f6e8be2e1ce9d06 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 27 Oct 2023 14:42:29 +0200 Subject: [PATCH 08/22] Small documentation change, update error message in `check_columns_present()` --- NAMESPACE | 1 + NEWS.md | 2 +- R/check-input-helpers.R | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 750b23796..6408f66bc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -66,6 +66,7 @@ importFrom(Metrics,ae) importFrom(Metrics,ape) importFrom(Metrics,se) importFrom(checkmate,assert) +importFrom(checkmate,assert_character) importFrom(checkmate,assert_data_frame) importFrom(checkmate,assert_data_table) importFrom(checkmate,assert_factor) diff --git a/NEWS.md b/NEWS.md index 070e4ba82..d3487d687 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ This major update and addresses a variety of comments made by reviewers from the Journal of Statistical Software (see preprint of the manuscript [here](https://arxiv.org/abs/2205.07090)). -The update introduces a lot of breaking changes. If you want to keep using the older version, you can download it using `remotes::install_github("epiforecasts/scoringutils@v1.2")`. +The update introduces a lot of breaking changes. If you want to keep using the older version, you can download it using `remotes::install_github("epiforecasts/scoringutils@v1.2.0")`. ## Package updates - In `score()`, required columns "true_value" and "prediction" were renamed and replaced by required columns "observed" and "predicted". Scoring functions now also use the function arguments "observed" and "predicted" everywhere consistently. diff --git a/R/check-input-helpers.R b/R/check-input-helpers.R index 335e8d34d..4baf430fa 100644 --- a/R/check-input-helpers.R +++ b/R/check-input-helpers.R @@ -324,7 +324,7 @@ check_duplicates <- function(data, forecast_unit = NULL) { #' @param columns names of columns to be checked #' @return Returns string with a message with the first issue encountered if #' any of the column names are not in data, otherwise returns TRUE -#' +#' @importFrom checkmate assert_character #' @keywords check-inputs check_columns_present <- function(data, columns) { if (is.null(columns)) { @@ -334,7 +334,7 @@ check_columns_present <- function(data, columns) { colnames <- colnames(data) for (x in columns){ if (!(x %in% colnames)) { - msg <- paste0("Data needs to have a column called '", x, "'") + msg <- paste0("Column '", x, "' not found in data") return(msg) } } From a6ed7c9ad363204b279594f0d9195250343a38ab Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 27 Oct 2023 14:48:07 +0200 Subject: [PATCH 09/22] remove deprecated metric argument from `summarise_scores()` --- R/summarise_scores.R | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/R/summarise_scores.R b/R/summarise_scores.R index bc5e3c782..e6461ba88 100644 --- a/R/summarise_scores.R +++ b/R/summarise_scores.R @@ -32,8 +32,6 @@ #' a relative skill shall be computed. If equal to 'auto' (the default), then #' this will be either interval score, CRPS or Brier score (depending on which #' of these is available in the input data) -#' @param metric `r lifecycle::badge("deprecated")` Deprecated in 1.1.0. Use -#' `relative_skill_metric` instead. #' @param baseline character string with the name of a model. If a baseline is #' given, then a scaled relative skill with respect to the baseline will be #' returned. By default (`NULL`), relative skill will not be scaled with @@ -89,16 +87,8 @@ summarise_scores <- function(scores, fun = mean, relative_skill = FALSE, relative_skill_metric = "auto", - metric = deprecated(), baseline = NULL, ...) { - if (lifecycle::is_present(metric)) { - lifecycle::deprecate_warn( - "1.1.0", "summarise_scores(metric)", - "summarise_scores(relative_skill_metric)" - ) - } - if (!is.null(across) && !is.null(by)) { stop("You cannot specify both 'across' and 'by'. Please choose one.") } From c575a1cc0dde7f9199df9798b627cdfced3a5f49 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 27 Oct 2023 15:10:45 +0200 Subject: [PATCH 10/22] simplify code for arguments `across` and `by` in `summarise_scores()` and explicitly store `by` and unsummarised scores as attributes --- R/summarise_scores.R | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/R/summarise_scores.R b/R/summarise_scores.R index e6461ba88..87cdae692 100644 --- a/R/summarise_scores.R +++ b/R/summarise_scores.R @@ -98,12 +98,10 @@ summarise_scores <- function(scores, the metrics that were used for scoring.") } - # store attributes as they may be dropped in data.table operations - stored_attributes <- get_scoringutils_attributes(scores) - # preparations --------------------------------------------------------------- # get unit of a single forecast forecast_unit <- get_forecast_unit(scores) + check_attribute_conflict(scores, "forecast_unit", forecast_unit) # if by is not provided, set to the unit of a single forecast if (is.null(by)) { @@ -112,20 +110,27 @@ summarise_scores <- function(scores, # if across is provided, remove from by if (!is.null(across)) { - if (!all(across %in% by)) { + if (!all(across %in% forecast_unit)) { stop( "The columns specified in 'across' must be a subset of the columns ", "that define the forecast unit (possible options are ", - toString(by), + toString(forecast_unit), "). Please check your input and try again." ) } - by <- setdiff(by, across) + by <- setdiff(forecast_unit, across) } # check input arguments and check whether relative skill can be computed assert(check_columns_present(scores, by)) + # store attributes as they may be dropped in data.table operations + stored_attributes <- c( + get_scoringutils_attributes(scores), + "scoringutils_by" = by, + "unsummarised_scores" = scores + ) + # get all available metrics to determine names of columns to summarise over cols_to_summarise <- paste0(available_metrics(), collapse = "|") @@ -167,6 +172,7 @@ summarise_scores <- function(scores, scores <- assign_attributes(scores, stored_attributes) + return(scores[]) } From 2ffb2dc071b1a88723f7c72f23f66faa55b7edc4 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Sat, 28 Oct 2023 11:54:41 +0200 Subject: [PATCH 11/22] Reintroduce test functions to check whether an object has a certain attribute --- R/check-input-helpers.R | 32 ++++++++++++++++++++++++++++++++ man/check_has_attribute.Rd | 19 +++++++++++++++++++ man/test_has_attribute.Rd | 19 +++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 man/check_has_attribute.Rd create mode 100644 man/test_has_attribute.Rd diff --git a/R/check-input-helpers.R b/R/check-input-helpers.R index 4baf430fa..7cae1d837 100644 --- a/R/check-input-helpers.R +++ b/R/check-input-helpers.R @@ -392,3 +392,35 @@ check_data_columns <- function(data) { } +#' Check whether an attribute is present +#' @description Checks whether an object has an attribute +#' @param data An object to be checked +#' @return Returns TRUE if attribute is there and an error message as +#' a string otherwise +#' @keywords check-inputs +check_has_attribute <- function(object, attribute) { + if (is.null(attr(object, attribute))) { + return( + paste0("Found no attribue `", attribute, "`") + ) + } else { + return(TRUE) + } +} + +#' Test whether an attribute is present +#' @description Tests whether an object has an attribute +#' @param data An object to be checked +#' @return Returns TRUE if attribute is there and FALSE otherwise +#' a string otherwise +#' @keywords check-inputs +test_has_attribute <- function(object, attribute) { + check <- check_has_attribute(object, attribute) + if (is.logical(check)) { + return(TRUE) + } else { + return(FALSE) + } +} + + diff --git a/man/check_has_attribute.Rd b/man/check_has_attribute.Rd new file mode 100644 index 000000000..1372c1645 --- /dev/null +++ b/man/check_has_attribute.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check-input-helpers.R +\name{check_has_attribute} +\alias{check_has_attribute} +\title{Check whether an attribute is present} +\usage{ +check_has_attribute(object, attribute) +} +\arguments{ +\item{data}{An object to be checked} +} +\value{ +Returns TRUE if attribute is there and an error message as +a string otherwise +} +\description{ +Checks whether an object has an attribute +} +\keyword{check-inputs} diff --git a/man/test_has_attribute.Rd b/man/test_has_attribute.Rd new file mode 100644 index 000000000..17e96d59b --- /dev/null +++ b/man/test_has_attribute.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check-input-helpers.R +\name{test_has_attribute} +\alias{test_has_attribute} +\title{Test whether an attribute is present} +\usage{ +test_has_attribute(object, attribute) +} +\arguments{ +\item{data}{An object to be checked} +} +\value{ +Returns TRUE if attribute is there and FALSE otherwise +a string otherwise +} +\description{ +Tests whether an object has an attribute +} +\keyword{check-inputs} From ea717ad6ab1f1224c277ea0af1f71df6938e6546 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Sat, 28 Oct 2023 12:54:30 +0200 Subject: [PATCH 12/22] make add_pairwise_comparison independent of summarise_scores() --- NAMESPACE | 1 + R/get_-functions.R | 2 +- R/summarise_scores.R | 55 +++++++++++++++++++++++++++----------------- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 6408f66bc..b5a893ac4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ S3method(validate,scoringutils_quantile) S3method(validate,scoringutils_sample) export(abs_error) export(add_coverage) +export(add_pairwise_comparison) export(ae_median_quantile) export(ae_median_sample) export(avail_forecasts) diff --git a/R/get_-functions.R b/R/get_-functions.R index dad5da1f2..d8b3e2376 100644 --- a/R/get_-functions.R +++ b/R/get_-functions.R @@ -225,7 +225,7 @@ get_duplicate_forecasts <- function(data, forecast_unit = NULL) { #' @keywords internal get_scoringutils_attributes <- function(object) { possible_attributes <- c( - "by", + "scoringutils_by", "forecast_unit", "forecast_type", "metric_names", diff --git a/R/summarise_scores.R b/R/summarise_scores.R index 87cdae692..fd530c845 100644 --- a/R/summarise_scores.R +++ b/R/summarise_scores.R @@ -85,9 +85,6 @@ summarise_scores <- function(scores, by = NULL, across = NULL, fun = mean, - relative_skill = FALSE, - relative_skill_metric = "auto", - baseline = NULL, ...) { if (!is.null(across) && !is.null(by)) { stop("You cannot specify both 'across' and 'by'. Please choose one.") @@ -127,8 +124,10 @@ summarise_scores <- function(scores, # store attributes as they may be dropped in data.table operations stored_attributes <- c( get_scoringutils_attributes(scores), - "scoringutils_by" = by, - "unsummarised_scores" = scores + list( + "scoringutils_by" = by, + "unsummarised_scores" = scores + ) ) # get all available metrics to determine names of columns to summarise over @@ -142,18 +141,6 @@ summarise_scores <- function(scores, .SDcols = colnames(scores) %like% cols_to_summarise ] - # do pairwise comparisons ---------------------------------------------------- - if (relative_skill) { - - scores <- add_pairwise_comparison( - scores = scores, - by = by, - relative_skill = relative_skill, - relative_skill_metric = relative_skill_metric, - baseline = baseline - ) - } - # summarise scores ----------------------------------------------------------- scores <- scores[, lapply(.SD, fun, ...), by = c(by), @@ -171,8 +158,6 @@ summarise_scores <- function(scores, } scores <- assign_attributes(scores, stored_attributes) - - return(scores[]) } @@ -182,16 +167,31 @@ summarise_scores <- function(scores, summarize_scores <- summarise_scores +#' @export add_pairwise_comparison <- function(scores, by = NULL, - relative_skill = FALSE, relative_skill_metric = "auto", baseline = NULL) { + + stored_attributes <- get_scoringutils_attributes(scores) + + if (!is.null(attr(scores, "unsummarised_scores"))) { + scores <- attr(scores, "unsummarised_scores") + } + + if (is.null(by) && !is.null(stored_attributes[["scoringutils_by"]])) { + by <- stored_attributes[["scoringutils_by"]] + } else if (is.null(by)) { + # This needs to be double checked, because getting the forecast unit is not + # so when you can name your own metrics. + by <- get_forecast_unit(scores) + } + # check input arguments and check whether relative skill can be computed relative_skill <- check_summary_params( scores = scores, by = by, - relative_skill = relative_skill, + relative_skill = TRUE, baseline = baseline, metric = relative_skill_metric ) @@ -219,6 +219,19 @@ add_pairwise_comparison <- function(scores, ) } } + + + # get all available metrics to determine names of columns to summarise over + cols_to_summarise <- paste0(available_metrics(), collapse = "|") + scores <- scores[, lapply(.SD, mean), + by = c(by), + .SDcols = colnames(scores) %like% cols_to_summarise + ] + # Maybe this should use summarise scores instead? + # scores <- summarise_scores(scores, by = by, fun = mean) + + scores <- assign_attributes(scores, stored_attributes) + return(scores) } From a7756986bbc32a29a83636dbab0016bb524f0085 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Sat, 28 Oct 2023 12:59:51 +0200 Subject: [PATCH 13/22] fix small error in list of available metrics for sample-based forecasts --- data/metrics_sample.rda | Bin 7745 -> 11116 bytes inst/create-list-available-forecasts.R | 1 - 2 files changed, 1 deletion(-) diff --git a/data/metrics_sample.rda b/data/metrics_sample.rda index 0ab2f8f1429b4ae43a7e4d8e3cb172541a1803bb..0040c4485bd8242eda678df7169cc9397a7ed3d3 100644 GIT binary patch literal 11116 zcmZX31xyo|H?Mq%>^5tDFx#TXnyX5Y2`Q52ly2y$uTkslL?0jD}2Ux))|Neh;{=YSwqK8D8 z_!|zV-T(lAqi7eQ{nDB6GzeNIXkgYE`ngZz8`U=e|D(l!)<>(;&?rD5;87D106+|w z1UxEIdZYv}000O9^sobh(r5}iS8R$=Z?`Mzy;6>%K7`6v_m3g6AK@wCI0GpnaC0vG zV-RmN!<3@`NMi5`*rRqeRSLYi*1c4Fa7)*n_S8i88-)V@aJgLR8y;crA>u#@WwdU# zk^~1YO3W2`nLhlnQL}UwQV8Kg*MlAg@DK9E`Ui1z#}RQC`I2$l^5l0 zEHU)nxYrSvGcA_Mg+P__gH(wdnFOwGlRGU;GhL@!x)Y~S6XqwjTbS=xK_6u<#SY5`) zmuz3OB&F1W*={K~nHEm;#37%3eNjvyRgCvyv$XiVh9KD!#>oTsepov$L6N3|1*c)C zhFHc#t_jAo+TBO8+FhHju{y5W-O^z5En91&+guA-2jlEim1(c}2v&)WWut2SaK4`1 zKgdSK_X9;z#z81#f&TtS1rJrCTCpKNQk%1cnW0}axm_w~t!~*aGj4U^*tcF2@lIZo zbvR4JY-*?8aUd*pVWOmQ#g8dB0#?fQ!o3a^VZ}`z;Zna|oP1*bbUu+Ep?#-mNgm5K zyl3&t`b`zB`^$lbw4IUUE4Qvn+OqL-@}Sf=VC|v$Xv?)mOCQTJvInAwd&w__DipsD zRRm9cz}3B-a^dh8clOAF9B0pC-hxKyG)lhci_a~|6W*vZ@rZH1ukr;GH?WP6^-T#T zX7b2!Rr5Ps z!NB;ofnC24!|+bWS5ngX6Ndo~CHGJ&MOl!ndOq01E?eH^;F+*U)2p_16iiyCgYDrw z`YoLeZQ!;-7}jsRrmgm>G*5%F8V=ZeL&Z{MSXlI9td$nd#+LfM^ee^Lg!k zc^HklKh>?(`Vg&9LFZ-3Dr8|}gKI16g6`MFq(Njuve>qWPFD*3^~$Vm=h;S>uvg&A zo^HzhizoC?>_Vsgd>ym+3~L(|P>3lw|H0hz2#EDEwAHmVch*p;Ak^+-QWg-;o$9=l1*xY9x(`z# zeJgh6^z^Kc1K)pp_1yVB3x4yab>JBYht9nJmzIM&Ijz^Lj@9k`eZw`VV7&jGSgO510y70LWAev^+-+I>;rQ>Ik$~F~U zW(JQsETzP5&*eVaWCx45vmZuXC?%FmJ;LB>Jyw|LT>pUR=%jeTR+}wt{RmFGu`(F5 zuS-Zg_~i!$5EoxC0S7R5gwiVh*GuC^Wd%W2d6|t9Lo!iZc;DAjA%R|0IQ5RB&w>rkVL8F<=(ofiZ7sMJav8VZS%tMN2ph zkWB&8DpM`M(}i09+Tf2NcmFC|Z8>(P=d8vhkX4aw(yoVxf2!RvU$EibT<6r=SnY0I zKeVjLX3Ifx#Vnq?az^3#m=>^|f-^PsjB1LP{2f1CmUu=Mv}C^8(l~GA#Ht+V?%Ct@ z)OVD)W?|g3d{~7d$C^dEbvnNdQaa7B9dVf1Slw|`eHP68sc81f5X@T}bx<>&H-avi z!y}P*7&{SMtg{pS5%d>QUA&lAqfP^j0ffGJIeo9MIc_A_@>5KUBZiP>{uPoO*Hp$X zp24^pW>(_lw=`^O59deIIC-o|On68!6W+o3>7FLxE5W0c5HWeW2eN~o_)`5485&<~ zDY_dPKjSf0m9Z%XP;fHH-fZW|2pM#lzjD9khSP#z2Y&q;Mb%h5%7iX8!!F#V{doTa zC5#BR8jrf`;;S%jV2kNjWkTE+7H|Iiz8_F)#A=)A|5^clP*VZ+k&5)p+jUnI%l1+z z<53b%^(}4pCCO<|Q*DwtFklyRdVUhc?319r^B&!&u0$+umG2m7;af7L`TY?Fcdwn; z58F1_)W$2RxH3iN;DR^b^=Ct$OkKkc_J3#FSvvtZtpF&Q#Y@KZ3$J#J96-l4cfsq8 zCUxTS`OT$NHW$ZZ9*n8QjOuyk!Z`!OY&($#o?3l1W-vMU#{i3;Y_bF6P1O=ig$?2WdkO{+c3YF_H{>RD`B+InK!{%nX8J8rhUZMMXNUkBt~|33-*~ zOTKJf*w~NrTH=fn(x4$(oma4pK=e3Y9>-fHvh-x-KTFXmKzJ#|yyQX4pq*0qmOYlk zGy0*i@sP~|F!Ov6EU&iO{RB+Q35|g;_4J#|mVBTDvKbYaH6crh^;y2V;)@h`T_33t zg(UHKS)y@^cFLu{f)PbQ`+V4!?VE*iH8Phk*%)|3-}`8XN5z0d@kqwJmwuEf)Y?5O z8tLF$^I0F%dLy%JOk~@iTwm7zoF%d>=k?%u#E}XzSgzuM#%ycgfdUv~Q7$|kJPDBe z#Fuml?9YqZPd4KyL^}t+gv0s&G!QazWV$6yJ+C_~E`FmwJ=nDSgHCrSQFx@=5KN*s z2I}U|DejtV`!%B7BAy8~_=Dams&O7U&_)#3=39&DybvYTLblYh0&pW@uM zkYve5f^}|x6Gm+O%=MHrs?osa5ngB6K9po_baN%}%YyZk)q2XlbYDCEDW=%M$m~%IZDnTvXUwL66ZNg^;gt2RCTZ)X z_vM>U%kTNz^Y(s9{!e@;i6=*;r%BWQq|~YCdXcXYqW509zUSMe@$sL_ZoG+SPZSm& zGN|_6Rm=XKH5XO93OnL|i~C-wh$wGeHcVGf|G=}52LHX$i$qhaw^uB6s?>qV%~{>9 z%OB(s_|Ij%5UHd$Y(&bAnrmp|*dyZO{{~*2uohkh=8Q)$|KGS3O>%Eu%o^$x+cfnWk-zT8G*5RN|m028r&L`R3$22;*te-117)+Kqzy$mw zd0}R6I^#|V%ei}8UGhe`aWv4RAq=zCno=wd?Z)^-TVv&V?p_i{_u`&%QD`eBobz;N2SOzM{~7(`fRN$L*7ex4ZYd9`V^{7=aSP zLTC%}(-5AX%uea)8A5Z5LKdKI(9~)ryiJ}J6H|tW!Ke@7_}8hPgd@PlD(n_qRVII9 zi8%TUofOZ$QeZ#9{&UFXJ)+dQ&1;=94UYBDf`vjcEQ>{OZ{iPiLA?KP=GT=+RN*ug zWiylhk0_-F9_E_#zF18+@7)G>LY_P?XORhB`@8w$+?$U|6rV5l-T2v`(&OiE=9Gs` zA_AlPNAFN9RogxHwe60X?zy~51xY^pe{CsiRcN}&)rH6@L^JgVecnBHmCbs6{9jZ~EC44pi zs?A}B_lvn1F)sanbXVy={Ns@lV0ie^VPw^iZVM~1E)_gEBx{hm=gSfokMjABZ77v= zEWhL-jolT=UBCqU4GlUn^+0oXJ2Ja#LWBa@ zYc6M^D08y@rg=Gp-#1gT`PPcNb+4aD6H(CRxK(h#!=8NiGL)_RcB+4yl37XGvZ=q} z1+8Kc$){ShN{71FTX>R{m7{OKON7`mL<~4Uc)1Bi<}i?>^lTsNY@}Q~rTjH%qIY0k zI1pAHGTJc4k@NSmDcYNQNMubRMZwWyOy!Z0CT$h~$2xzPBD(GH=K<7qRpxS4O!}3~ zYQD$qV7FJ`uP}5JDUA{??h9^D`HsGvgs34!r%VRjhVPk}Tse2?wco2WbD2HfU*_5y z=9h}eafYb72QO^_PWyHkic(EyP@e)&-DirT4##$l?#ud>5C9MOg5*h;)(x4(lg>oN zP68Fp-f=lmez3cunMSS%9^toyZ#t7};<2d0il_g{_PodT2!W#S5@o98h1}Dukb{o8 zkzGID{adl(+o+yPdTM!Lk=|_DS2^xA1;JH``_QF^C#2I*Jl}HO--{a zzq7;?2XIX;!s^Nr68iDflxFHW108I4RDJyOt|W*V>0si4v7*VEIu@|2C0KRVweb6| zYiHTe5)DLjhu!PKZ~feiF(P(FhZ#=l#W*B&^WlU$)WCXwYgC;B{RtYv$NDO!49YUO6DyR?VK9 zXmme6!z}BSjSoOHj%cv_=`>E9yHJiv40M8%Dd+{8%|Kq0zxStrNUoXOv9Hl-mPm{x zi2R)LZ$LQwOv8-z?lh9MX=zQL{M@9+q~`0(A0AJCF|IpAH*9;l={~C8{MO%{{f%MQ zUZmiCnlBshjdZk4D-WeSrGPbV)z+)4|I&-%aCkrQs2OUIQL`_G<%4XT7OLE4eFib(4`pz~LXk*@A>|Iz=KKA-jz)E3w`z?adpDj(T(*G+ zZy%$QC*(O9`A(g09{vtD54>JZefhID$ONUxx?wL#8?563us4svu~%lOAaX2E0-ioB zZ_`_SHI2Ch78!ip9<+R1Z%nMNWd{Xoe?Vfnh6wAG?SOT$ND7X(z^4F#4|Iq`Iw+kn zrFNn=Fa|*zWz04EAUoDk2TEgy$h4n?bfc8LcJpMbl|6d^}W(pbN2o=oUI zPO?u%_wdpyb~>!Eg?;7gh&Z86Yy;l=<=@|TQuWUern@f1$UQW2^oWckw> zMiEx8JJ}`k$H^~KU^c8-zUN!Q1KQh|aytQzO3q}@Dqjn)533HOi88ya}9C&Xz2B`#N zwCe6Bo=SVQH6oEKa+6Ky?&j)eW~XM)Ap3|US${*&?6i4B*QLVi)iYz^RH?@5fj7U; zS!%Z395T$^xg9&J*B-#{q;x;6c6)SOt$o^=6E+DGnjPV<0e6XbmKz$vLNW)U-&RU} zH*f6^Y0)JS3txUmmCaZ-4f;=Sy>ZxJ=X(PKWXuXNqAIob3_hB>K0?Nr^T|~5Qo>eJ zEYCBQG`m&AL&&yuSW^7W_ho1Qj*OHs)*zLB-e-E!E8-9jn^-D&V=B(O5DH`k4 zgNd36AS!{gv(K2}8=i)GvZ0C=rFow^-ET_0#Uvb>@*)FmF1(6Fyb(cRV2m)Bi<_95 zNVHB_osPfCSe2R)NT*Ut1!iOr06J1o9C6yK(ee>csSpDa(eDUoIf+`N9nF;peIP(H zsyKHp0j&zA2$I%&JgWeE>Tluu0jnlE@k^~=KRD;wtZ=ITp~&M zj~?L@Dmz9Iay+G=HSgxfiy2G+am7Y;5dyfllr3lzIQR<*6W-$E!3j$M_z2Nh{8)2- zE`ey8SQ=%3TEaK3Zox;6hKvLv=7JPjgm5aAHaiYtD!lALQ4YZRl ztks|-l2;?7g)48I>>~VfB)UNoD=I8n4iZWUrowDc0x|6jj=d;`bOvG&*QgegBdHzG z0@sdFO(Oe)C`}qguC0kyNd<)fMYV+s-87Xcv8olCvxyo^lO0P&ESZyo(1qiflS&f~ zno5omH9g8vLUW*JYgZrRvdqVz#s8w?#GGEMzU8n3GBP3&F|q1#b@fn=T1F32JJQXi zM@pW#TE=)R$z@Rh7v%n@I!Rh@yX(|wOcdV3W2L#*N}(U7g+W&5tuJ0TeX0BO^yD_e zOz-?D@>Jnq9(2z7#9;JBo#{tN_Iu^eKlawc7EUKJu*aEtngw!GxidaK80X*13*ikL zRR|lGOidwpS+S(Dea6?gDU_$|GWQG*2*j`v(17sR0kUb^hB+MRnx~dndRlwgPdQj+ z^tWKs#nd}{ z-Ot_K3!)>(KxbIHXP-^1yF#y1W$uVs1a;=Y3OW#YWgyxw-Spd>*hwFkCc9lF-c44;kM)6OhJ9LuPoCpB#}rMkuiM?!6h zptq%ZWfmTc3Iq22^QjU;(vo6QGTDN#(b*k$+t~n7^C7tN;@IfBr@byYptu5@-l5)I z6K`kdvM}Cju~+8kHXbGxI>ny~)}DfWMVR;!yj;Y7ef~3yV2MRPzdLjQZg2gq1fOQM z%b!=!a`gnY)b7Q`D-_2=BMM<9*!E@dm6@aEZ&8=rpGq%mg}cl)(AW1)^^Z1WBNqfy z;5kf!LMd81Aid$dkrv}TsOq}bI)COxEEiTa2l%{Cx*tR!-igZVh);%9ds+6dEB#=M%ooT(IIRVC%T@%Jz~-S zaQ%|=q&a<`+_<0^!i2o%tGUHi-4Mxxd#~Q_C{mvpUKE^uZ&yBPYCRiq$mrjJ-IQK! zh7HIk+)Tjl>2AIRZVc!bz;4L&zl3LvNM}aC^E{Bh11{wC8Iv;3EjOTdW!d4w`DNYu zD{YzlkQ8y}fzn0d-zRJ)fWFB4&FD|bTsHmV&ObxNlw^cgpX_vwW z@uR=f=vzFu4g=o1r=xFf2ipZJbsQ0hq&3&?4=5i2H1nWH8un6L!s0R4G_;+$&xZKgRzDr9d~Sz(SFwF06v^B)tN|+ zTkO;*YTa&rX;VM>wDfufrsYxW^Ss0e238ycCyGMOx`y-Z_4?8$Ps~kNr91Ei^5ZIy z0_r>*-<;_EI_+e0XTweZ_9-}?NNKIXx(mn!MH>6}%Oaez@*yM0OBe}F+Yab$hyxua zn;+(!NS@%GH}yQ~q=0R`s;M2h!isEp+oN@eK4|be8(D&wHuvi^vq|PRU2PV`_W}FmpnRpQC8R93#YNzOIQbV=gE-FOhv|ck$E%g0oL$If@*T~am zcvj`~{E=dGeCxQ_NS<8xQC6Q*X*P1Y?{*^?w5BAJpeCE&~qJE|en!go8 z2xu9jlc-iUG~Rhz^T=0f1?-{Iq^N1B zOrH3p!p(NFXIPb>SD`m3^e`a$sysYFyMWM1E~PwV7<>?7*n9cqb2*e#-l!M_NFskZ_W|6m@M*5`<>fr%g;HkR_H{bMw^P|!CO2G<3T#eidueo zy;6l`Pi)vE^iwEOa}L8`r+u@KlCA&dhrwa&`m?QOr5M^Yg6?lFs)0hhyO?acZVV~s zrAi>Lmj;8u<_C0B0ySQYLHB~b@PxvTBdX-SOv7pkb~(LQ-Yu)WCuPfJK4ZNz5K(tG z)h(WJbZML@e?z~1yiR9hookCe1fuU)#ssM>idMvF_hi=$xTU$D&G`~1cet6CnC3T< zObiS7b_wO(RhUU6MYZ`-ZVKoPYm-nXyb~Ttz*5o@#$J%Sj#6vD3C-`QIPC>0;5bhQ z{f8zv{HQZMPR2p^QaYkK@=(-Tfc#(!3aaoAS@-dYOTJeWVcyc#)*JnPbD`(ECfHD7 zelIb{OB#op?FVi-s6yp^Pjo!;=iSQ(5BI5W1?19)?f+WH8B%y26u(yR#JXT{Rfe;X z2VeHdM~K;ThsAnZrh?+&Dh6&ZTpSU7f3#umX?E{b7 zIv38+w1IN^%dlGa1PbRI`$#Ak#7`gBuO)6Sq`zgeDiHdHUVu zb$C$7GBil0fWtQ5KLEk)45lx&h}l z3DMIQs!LednnGMAoy9JlH3kgNggIgm!%}USk#v8Jl(VhGx1$yb^G6;QZ(}T+wOLCs z=~mFe-=b=(rQnzzb>4ad58LIOW+~=uJD?j6W%p+YgOE25c+}Z4ZCPaqp||2%du>8x zj1CV<6;tkj2<~Wg{x)X<^x;v_W$LIv)eOn=x)kXyM@m8JBN%#7Sb%w+2k@Bz)=TwHThk4j?H8=? zO;aqU zok)kgKo4P`AdpuJKXuB=+r;y+y{B{^x*BN(5A=nLfjc|RnvF-SPFzr<_7H&16aWh6Q(`!*ud`4^;3PGG(zG1D~zJj~pu0R!RpNo6Ylm=Y0X5ZbnD!4Rp{G7KB zo#>RAOL>WqqTCcuToB+a>D3m5qKgwy)U`S3iOR+7xb7Lfht|K~p0)*V= zX($w8*M68~iL5L^wQJ+YGjERTnG6~EWnTEj5590vvnEup)kpNw=vipBeEA^=v?3hf zh^}|@;K~i3wHLKRRD1}U0~usBz%4eai~mM*R%~!KjQ`z2dp6_s#~_fO<7!c*DM%r* z)3LxR#>LSUZomsy*4oS-Db5D%Ht-7b)%}zctylZL(PMfU?onc#4fljX#`Y=Ifj8wt zA}}6Ni|52i)RJ@{9tN&asB;w~53vCiFOJdlJ_(qYLV~C#qrpQ*(W-1pOk-XF%d;3K zm}ybxIN$X;PSnk@HMrh5!MXT2@s-75;POyRW6dnY^|BT1`I0wRV6ftfIIfR0q01#x zNK{svzg>snt9G22oNBR2u;;`9pUaNIyM!)nL$G7Xnk*uWip9?tSfN!UHz3~L1(aQ( zhI*70eAa|&IDcn#3OO1OYQf5+iG^KtCFf=UH5zjfS=^61XsP-_b!j{dk7-IjFA8On zut(~y(_~O*=pvkv=@YQ$hWrQ%p6PKMpP-ppe{oRn`p5SQ=2-HQB6_(o$N08R@H$nbo;2~xt&O;`v)<5?NpE`h;#`!#HOYL=2EQ6RU6Iw z2eS+N)p5~8g|`7ZRncl{Kjyh#MF@bmsT~OBg>vW=zP_kFB_A+TVPWf<>|txf0^^(9 z@exDn#M=C5WO1T46s}IGwWCkj5AMHlO|nL#d55QgXV{n=tWbSHy-V(5VS`7G(2$^w z78@1cYewE@YTK_bcuFz?KX>(CQA2fhOA`^En%b-Zm?7S6`($&Kcs!U8WXKWAl`a-e zvx#9j_yFg_ao59S97x;^sWO7dw(6uisCsBYYQ@`@>Ik$}!%ms{+IatlK(cWfW$T-{1-0GzjFIbzV_*kzMS)chTBgt2hK)@YB{Z?YJgoj=TCDavA zido931r_VkJJ#Dy+1WYcBW*E)r6jz`=1EYgG5R@F2xhxSC8bhuzkUQ86USdV*m z+|23j<2@bwazDYHGujN&)*bDuM`{~VWjl>{F_r~#DgGNQvSG`DJ^qij#+tPiCQmpO zb)Oo_ls)tDqHoApl?slCObdKxSYr8Z!2ZYh^mkw&P-}85;HPu0SEe_gOKqJV=Q6}K zh}*nOHRD_XOZ1L7s2t1Zcwoo5R|>MZD+VpgnFw^qS2&JjrNCNfm3E>LM!$d#Xp| zlczcqu9eoy!W!!s>YMu+5olzTs0h4lQd{+8B~fvGY`gd)UW|DL)2CoR#`-Gir%`PI z+)M8sAq&^$KNTeb26J)^bvTnz4=zdlt)m{OJ1ljz%U~rd|0zr7T=*)h3Xx2eYvfH& z+Z-BfTaG?f#C|TBhV(k<7AF!_q8i)1a&575v%~&%0aX@V2bhJxSs7#y1KE*0)Bc&4 zkdSh!!~nB{xp8;4Da)w(^Zee=7@Mlrk4#KSY+OE2f1VjDBb$@KMr)}Ex%ecRync4Z zAOWBFIiz*({9+lWELCOT15a^Uvds*WosNc#jhpT|tu)SPTyyBOmk#SDxP@SC&+!rJ zq`{R#_R&_48mV=m3rW?t5lIhhrH((Z{q%9xQgb@xoOGo`syngUm;!65+gNNra*p3E zgLb~&xoT3@)yx#;M&JYjVjW-{5D^|NN3rhO(L%SmNujFSN$!prUyIL@74DO_yFD_{ zJQ>G+xuVxqBAOMv7h~suJ~c^*8;U@#{&>!oARid7W<58QxvX_O2s6ld%MIm?M#7ZHHsygFZ>jo|U-OM$&!&j}bLO1p+I<3ap8mr%M8Nh8h zMB5>Q7Gz$7HY{R{nJXk-)!ZyPl}TBqIj^I0oUE#HK7xR`v%O?Mp+c1&y20!~BHyyM>d+6t}r_bpc~66r;ml$v*lZ#LjSE!)Wt@8x=n3=D(EJ9yo#eU1(6G5=XfL z3#Z#AXLcA`xh`~aZtZLmroRfmQS`3Y)Ho@-Tz?(1#`^KUgNMJ;!Wpk`{R8h$6fqq& kBLh3Zr%fxK|BoeRApe{H$8_`J0X9UlrK0-2!PoZx0F_VPJpcdz literal 7745 zcmV-H9=_p1T4*^jL0KkKS$13{UI27GfB*mg|NsC0|NsC0|NsC0|Nj4f#2>%^|NsAI z{HOol{y*R&9P~FAx6i%z)%5pYMf2p8VE`z$Y23mP3$~Mb*84CLdws(@;`Q%$v6s96 z000AZlu;mUb91%I1L>onC?0?w?g04o0rx0MA|{$9YG`FW8fZ@-(?dyu6Hi8_^qD5p zQyQV=GJcAAsy#HGr-co)r?FGPG-%ZEJtBUT0MY5Gqttqh8fXSbsP#QVMt~1dp|w1b zvOy$70F3|>5unmx6KZH;X^7Ourjd;%nrJZ^W|}n0dXG_~OiX~rr=%J*(+Fq)0001D z05oU-00E|oH6+at(9KP#0BAG-(9qBWKxhB}0002c000000000000000009a}rbQ#v zpOg{ehoUlMVl-l6VrhiP!ZK;6lrkD*!eBKp27)k5j3xqNGBNTf&(UV5hG-NVuO&d>AJxv+_0kt#%k)RD4Xa<=y z001-%Gyn}W&}aZ?04gdFnhBC>c#{$6CS;AEgF{12G-v}8)Mzx(qee!CMu2IcG5`%U z02%-Q000000002f=N@ns(JCkZiVukTm;$`8A)Qr#hD(I4<*Yys0Q95T{@}z@hMwbg zf-=wk3SceVD7k;@1|P_z>qrK`u9QIA-2or_s&~t ziet3SEp&OqL6ve~U=?|M1CJ)osA#CB^Vt2Y3D34BJ3`WiwGJGci#c^e*3{bT;{9ljge(BRrKUZO!6hwf$Mj2Xo zj-Z4si0}Gv7ZAtOH099-Q>B}}>#FXu5sMLsf#HLjjfn^v1ctu8*^IqC1x z2G_2_J&H%8YD8aSPzhY$A4^?mw-Y-!#x^B=? zxxk_lXwd))28pqw*^_RvNCm;~#B?eKP>gqCf?MP0R|T6Qv|!InRF<8(qX>wWNVY6p zI4uamty?6KVNz}fA{3BRy2TAfy@Hsk--rgJD5lqRNRqvD~8ZmMr ziK;M92g{2DFpO~xcq&BB0>1Myl7)(b47!CNm+verlGDa?TBsAKreIJMG~yM-WL09^ zp@;;UB_*W5RzV3c2kChP2ANm}D3tB!DxWih7~33PbG;8X+AkdCZd;8zplsLU*0Soa3CF*wtv} zxN*G;-1&qvt<8gofTEBz%qf3{sJ-`7#PrX2Pr2jA?8$fR3PpO zt7?l&QlN-{GUr}ot=jW1FK6bz!c9?FBLg-KVDrw0dWMNadt@RBmwXXK)nG&-%faF| zH(!~=j%*&-Yw;nP@>(T9TeFQ7HdGr5R<52Xo}1AM&l6wU#w-JEmvydGXs81ykSl67 z=15_L149V6vJxK^1d7jbqjPdZ9;Dcnj6hd)t_cw1h0Q5dg{oc(Ot2$y6PaQom#90L zutyWiIA(%NIpe2MKPrA?IcObZv`RJElND-^8kk5ID2X^_&i-(lyMl1El{ySNxZJFB z;ELGfFbtEWaN{c%w7z`)&LyaWue~cvxiFsY)40db^^%!#_qTIL8yPvVNT`f6_bttA z>Ur9S!$+_4Vs4jIrmqVYb`(-L@@p)$ID5NHC$MHpTI&&ow|iVTRo=v_SgNYSOPf{| zoET)PSJgxAQ$_8PFAIZyu41WlzaNFWl*yL@mdb3$hXu@%o^NWXNskm}!Fb0B=YMyH z$fE^UVMT2mdHG*17H$DYt zTC%4Jv#!jWHH5PYj42qXn#-#tZur#RxTZZu!#>_bX>H<$omtqMO-jRSXKG?fu`QA< ziH#PGs*-E0-&YYvyl+@1NwK!$8=VdPH&#fUwMC*P;Y8IqZH>|yBVVQnZ3AQmSXkr8 z?7|_D`B_q9RS-G-p3d2u8Niz~XrmpV=qwGtsPH&v$&e`MC5?*5-iM^P;_lfir9<0= z@R(UKrkRaW;zemx4@#l7v2vs=F6-{>bE3de*E>CadB$#9iNgv9puOQM!C`SQ zkjy1FLDU-?3=EX~NdkO0dgmUzUEX^98fgu~%F3iW+FQ<|yeGo#n)vXNBZRv6(m z>m7y+C79W=)xl|Q4)I?Rk4c3suxw05buF0T&Xl-H=7x(P9g&TSH#mdK`P}jn-zqJM zB5#^KIe;<}w1CrHj;MhFl#RZ-R-KRM;UzKV3k7CN>du#B&?F&!n}PP8>M00f;^qX0 zaO=4>24M(lb++b^Yt;&2Sb~c1K6x74RT!R zov`*t2(~b~jB#b3Z5 zE0NYKh1L~?(@583(rt7G9cw4b$ZA+BVZ&k?Y=&v5ZhR^E7B=#Vvqz#q2P!wn_0lGc zV&twS!;rz1{C?|;yrMy*dUqZrFpdKPm+F_dR_fbjiICFuu+xZJ?=2KfcWqL@VQ6Qh zr#O2XmEt$pqrD9c!o*oz_q-luw3kbw-d>|6wiX?KL-HfLlR{$6;+wr{`2ks>!=RaP z*T7h*H6)^$7K+YkhE$yy%9j##(a!o-wYk4ny@=7tt7pGvo%xrWTl@XDonJov5TX)+ z%OdXEN4)8@3fYWAFKbzckH$YuQ?zK&g5Lm=mdQaVTVxO#1# z=P`Ly?$Vr+Rw`jsMR+8DrLs_}ur#D(cEv^Wo;EZk&+~6$)8Js`+RBKxa%ze$V6j>) zV+m6&jX`f&4#Ui&fLRl01riFPM(%}y5x2-mEMp#LST-`0~9S!;^@PhUw?+um7PoQQ?AJE1F(D#gs&I))ck#FjY zvYnEv(vC4xlLvPXR;lV$)9Hbcf`#@Z(&s#%Jet8x6Jx$PU_Rt-9@Y@Xc}d+z-DTzK z(f1s45(I>t;L~!@udU<8dg5-O$42j*9Pyg@+}Jo4UiYaCZdtmW&KbD;qaFVvcaj^% zM;zw(e!+UP*KI%K-dPI+-{A(BYt6u5kLmDU%7BMFw5@Zlmm1j&Gmp6;c67RVLk9^4(j% z4Y)Th1F^&!C?SBMiZD32stOt-6;4v2KsOUUY$#mZ4Vs_xhRmr_8Y~u;$uOj0NX<+r zarK2Q9c_lKnOQPf4E4=taEDNapvd7@Lu?J8B3I~NgQ39ZDaimF=~G7)%(6#yN*ov@((jCeou45pQmlctgY@SUa0 zsyE__DNb$Ru=U17i9#l#1L{YFer0zoGOK;GYPGh5TH8jVs@B?dRj%=)QB+#rTAFE&LolLZSfW~L+ig0VTGOex z?ZDV}A&7`lX$JC$@|HgRNP}R6ltOAv8~R#yCKz?xM-|Ow@a%2(xFqFoC##1cO^VxC zYX+E$)&0lsa&Vsb?|wJO?rOo^_`Yz@{a*2_DWZxiJ~zyN4R%>gG}Cs=mkY2JPP?9 zK<*<+0ycHyB}5y}hR$O+8PKfUv(|UUvu@As__{K@#WD+mr*DY4RC2G%|t;|>Vv%(L)f9DjDpIiRN`W1E4k zj{wkUKEEbr(XG(CTPH$25a@?|9TB`WG%k{D>=D5x%$Ok#Hgh@#tA{o5Y z`L?r~a_sb?n-J3wG2N8+4f)PC&DoJN4$#^aOgW+|-H`=hwR#DrH}S&5mWI^t(_S|# zt~Lh_Q^C9ogwevtKsJX-AsT&jb%s#&tBrHf?~&sx6*LvKXapVKpRSSW9>Q{)a&QDg z30C8PIS3JyVlF@>3V}c*A%TLYl~Y)X+6NIsFdP6fRay}#Ml%I6P-sD+5`;;BS%o74 zFaV(fGZ!ix1Obp6B)~H;R+S16plLup>+sZYU$D;prk8aoS$Qgm3Id8IQkChqQ4G+_ zFj5q;OjjJA{M17c7%7!n~T zVoXAYmLMVkq?-(L3~`_)2q3PE%^FGukO~Dr1}YS3X;x~OK~OT~AOLj;qcqAYr3xa| z0ZmefDWMangTQq7C#2f}rGFk37lesU^XFr8ze$66E*jprsZCMf z9R!AjB1ND|C_th~0HI<6fN2E-t-zuwutE!=j;^9iRXB?TCW6e=5~L-nlMsZGs4)aV zBrvH(0R>4ELj=PF5fo8G&?wPBOuc$sl_nLLnTUvFr7aPPQG``hD1!o_l>nnbNC{Mm zA&pEx2M9n61dtdBiAI22fKkQ}5(yAF(H0<*iK0=0blGMk!4m~arXrC>3??Fjsj-5F zLW(Kf?5>T2D#)cMyD>6qG!O!R)Q-aevalOMx+st`5XP8v1pvSSs9a+pS1FMbqS%PU z(+jjCuxvpAnwxAg1klc#rh(IKxM|VSayTj5q*r8uO&B}M2bq(EgSr;-rr{oId_~a$ zb*S8pr8yAMhz2rGccpL37=x$+{v|O>s^(0r@VTwoHyj?ZS^94a zIsBVnp70Y;$ZfZEA-WXFIV99^TIU%;1&HZ%{r@Oo3NXWhX^Rj9I12d?k_aSdMb(vz z45?!~2qZrh$hDF>19^xDW{ahzq~(Ny%|R%pjL<^L0}5egm>>t$vL<9s$W2aVJc?tV zMi9}XMlfoWQs%LhjN4?(9CFRV&0zk9#sUECDYWmHtb?9G26Tx+^!ecS`t5Hw+j~wu z%V6DvdJ0fV9r6&^x|a50h+Ihk>e#Y?ElW~sS;CvE?pf`q53XvtP8~Bztz=B~3-0yW z1V0Me2&bI)Y*6*F&4T{;)G zl-mI!u>gY)%a(+mBb5vcK^U8h>0o1s4l_3$H!YlzWpfXpl1Ax4iHWV|5hQ=~&MnJ6>t;Hv?S=-Z8|AbdeELjDFNbo0Qo z8ZtvP=&40@dlwTd^cgvUv{VyYW1hoj!BOY*47h`waqXZ;RP4qq%p-N0=e@gwmBVmV zBp}_MPN{0NqWcS)TTYjY)x`iZ(ME+x=_ObdixUQ?S6jH^-U4MY(ZuN64xYpvK{72t z5jqLr_eU1xtTojX6?eL`JlyI>A+3axZa8bYYNHr{S1_)5yaY;~7dZv2QFkpq7$E^9AipFLclOSdqml55CNnqa-pz*i+lZ!>k zXsELHW28%eHRU@fu!TUw>5c;?Os$QBR&hmNX*qFLjq{_4&iibTFXQYI)vIn`T5{%w z#z@Tw<;x_I$pR~vatEH96bCJz1J|d6em9y^!0LfN1@O9(1WOiMxpczT5)8mjh#eTJ zdC@TDa6qdi7_vepj$%7~_Y3dtg|p4OLPlcla#NA{Sm&`cK_p70h6Seqg|mTk6-Jyh z0wxfo;x9s41t4>36APRRXj%k;FAaep;JgNAMTu(=2tBSt5fmhiP*@O5gbcu(Roi^Y zg2`-5N`S`0iDC+2tCGFC!%Een&u(!(v0o^u4v7z=bx zk!TePU>aUMDuKj7i6DX^kff4iX_GP;kjX+|E4yR|ge=k{rHT1Dy>F519&QkpW*wQC zflO-xteKlhb@w=Z5D_-c>S;6zKuH<}6cj6oBJ^7#xrfA9yNCif3mCX3`n&@Qcr!z+QIn z0@{dj^O2&ps)@{kz{T55vz&u{got3A2EZo}nogfTo$X!+Qe#~4f?DM|wgaD<8~1B-!xU^$7aL8BZn#p|#cAwY8G6g%3DTI>D)vGcmG#O*9F z!4Dp6G&!zm>l-9Vt!N4i(CgRpvqWwlyBsCtfa-hBYEEBJc=a?1&{9=s5+ELmauIR? z?9u>gL=%N#7=ZAEKn&}Uog|A?E~P{4rA!E7Wo06ODhdD^NJ3>mm{FkwC<;|YKbN&1 z0R7YdO{ETZAncghgIh@Sx8KVJ$D#VQT9^a+oqhx;>8|#qaiqn`6&_x|v4@;{!=?No zZREGth2C-YEq^j2B17;a31r71y(QPAa#}vVv24#TPak8*?PSaGB*6Cmvc9gL+3fuf zmj`n?Wo36g5Mo=J2cl3ISEYJrbMNeYovdRhjt|`+gUkyB#&0dv%tVYRuX-65{P^CC ztio00n*+s%lhj6UR-*&@2Sq=Q^VGHu=lQ#!a|p#Illo+$cxD$wJb$V{;;#yggUFNB zZz>qkb8W_r^u{7#c?&i_>bU<;qJ1a%+1`lSR4T6Acn(M?iY@t%P-0xNEe%n#ec02> z=J)Fyz|fLD|2JnP_hEMXk)BsA{>Fn?$Z$x_g}`~}T-;tZ@}j zCcICKCoT-MwtcUM38#Zu1coV|2b!*NOZo0o*-cTXH5e!%F#{tqu*M9-2qCeWi7*<- z%FIkEgDhN}%vd2J!bu_pfJqV#`iatc Date: Sat, 28 Oct 2023 13:20:30 +0200 Subject: [PATCH 14/22] fix failing tests for add_pairwise_comparison (leaving aside an issue about how forecast units are handled) --- tests/testthat/test-pairwise_comparison.R | 61 +++++++++-------------- tests/testthat/test-summarise_scores.R | 47 ++++++----------- 2 files changed, 38 insertions(+), 70 deletions(-) diff --git a/tests/testthat/test-pairwise_comparison.R b/tests/testthat/test-pairwise_comparison.R index 3435ba1e2..0c55cbce3 100644 --- a/tests/testthat/test-pairwise_comparison.R +++ b/tests/testthat/test-pairwise_comparison.R @@ -53,31 +53,25 @@ test_that("pairwise_comparison() works", { ) # evaluate the toy forecasts, once with and once without a baseline model specified - eval_without_baseline <- suppressMessages(score(data_formatted)) - - eval_without_baseline <- suppressMessages( - summarise_scores(eval_without_baseline, - relative_skill = TRUE, - by = c( - "model", "location", "target_end_date", - "target_variable" - ) + eval <- suppressMessages(score(data_formatted)) + + # check with relative skills + eval_without_rel_skill <- summarise_scores( + eval, + by = c( + "model", "location", "target_end_date", + "target_variable" ) ) - eval_with_baseline <- suppressMessages(score(data_formatted, - count_median_twice = FALSE - )) + eval_without_baseline <- suppressMessages( + add_pairwise_comparison(eval_without_rel_skill) + ) + eval_with_baseline <- suppressMessages( - summarise_scores(eval_with_baseline, - baseline = "m1", - relative_skill = TRUE, - by = c( - "model", "location", "target_end_date", - "target_variable" - ) - ) + add_pairwise_comparison(eval_without_rel_skill, baseline = "m1") ) + # extract the relative_skill values relative_skills_without <- eval_without_baseline[, .( model = unique(model), @@ -206,15 +200,9 @@ test_that("pairwise_comparison() works", { ratios_scaled <- geometric_mean_ratios / geometric_mean_ratios["m1"] names(ratios_scaled) <- NULL - eval_with_baseline <- suppressMessages( - suppressMessages(score(data_formatted, - count_median_twice = FALSE - ))) - eval_with_baseline <- summarise_scores(eval_with_baseline, - baseline = "m1", - relative_skill = TRUE, - by = c("model", "location") - ) + eval <- score(data_formatted) + eval_summarised <- summarise_scores(eval, by = c("model", "location")) + eval_with_baseline <- add_pairwise_comparison(eval_summarised, baseline = "m1") relative_skills_with <- eval_with_baseline[ location == "location_3", @@ -229,20 +217,16 @@ test_that("pairwise_comparison() works", { test_that("pairwise_comparison() work in score() with integer data", { eval <- suppressMessages(score(data = example_integer)) - eval <- suppressMessages( - summarise_scores(eval, by = "model", relative_skill = TRUE) - ) - + eval_summarised <- summarise_scores(eval, by = "model") + eval <- add_pairwise_comparison(eval_summarised) expect_true("relative_skill" %in% colnames(eval)) }) test_that("pairwise_comparison() work in score() with binary data", { eval <- suppressMessages(score(data = example_binary)) - eval <- suppressMessages( - summarise_scores(eval, by = "model", relative_skill = TRUE) - ) - + eval_summarised <- summarise_scores(eval, by = "model") + eval <- add_pairwise_comparison(eval_summarised) expect_true("relative_skill" %in% colnames(eval)) }) @@ -278,7 +262,8 @@ test_that("pairwise_comparison() works inside and outside of score()", { )) eval2 <- suppressMessages(score(data = example_continuous)) - eval2 <- summarise_scores(eval2, by = "model", relative_skill = TRUE) + eval2_summarised <- summarise_scores(eval2, by = "model") + eval2 <- add_pairwise_comparison(eval2_summarised) expect_equal( sort(unique(pairwise$relative_skill)), sort(eval2$relative_skill) diff --git a/tests/testthat/test-summarise_scores.R b/tests/testthat/test-summarise_scores.R index 4b4f092db..736264933 100644 --- a/tests/testthat/test-summarise_scores.R +++ b/tests/testthat/test-summarise_scores.R @@ -36,58 +36,41 @@ test_that("summarise_scores() works with point forecasts in a quantile format", scores <- suppressMessages(score(ex)) - summarise_scores(scores, by = "model", na.rm = TRUE) + scores_summarised <-summarise_scores(scores, by = "model", na.rm = TRUE) expect_warning( expect_warning( - summarise_scores( - scores, by = "model", relative_skill = TRUE, na.rm = TRUE) + add_pairwise_comparison(scores_summarised) ) ) scores_point <- suppressMessages(score(example_point)) + summarised_scores <- summarise_scores(scores_point, by = "model") - # expect_warning( - # expect_warning( - # summarise_scores( - # scores_point, by = "model", relative_skill = TRUE, na.rm = TRUE) - # ) - # ) + # this currently does not work and needs to be fixed + # add_pairwise_comparison(summarised_scores, relative_skill_metric = "se_point") }) test_that("summarise_scores() can compute relative measures", { ex <- data.table::copy(example_quantile) scores <- suppressMessages(score(ex)) - expect_equal( - summarise_scores( - scores, by = "model", relative_skill = TRUE - )[, relative_skill], - c(1.6, 0.81, 0.75, 1.03), tolerance = 0.01 + scores_with <- add_pairwise_comparison( + summarise_scores(scores, by = "model") ) expect_equal( - summarise_scores( - scores, by = "model", relative_skill = TRUE, - relative_skill_metric = "ae_median" - )[, relative_skill], - c(1.6, 0.78, 0.77, 1.04), tolerance = 0.01 + scores_with[, relative_skill], + c(1.6, 0.81, 0.75, 1.03), tolerance = 0.01 ) -}) -test_that("summarise_scores() metric is deprecated", { - ex <- data.table::copy(example_quantile) - scores <- suppressMessages(score(ex)) + scores_with <- add_pairwise_comparison( + summarise_scores(scores, by = "model"), + relative_skill_metric = "ae_median" + ) expect_equal( - suppressWarnings(summarise_scores( - scores, by = "model", metric = "auto", relative_skill = TRUE - ))[, relative_skill], - c(1.6, 0.81, 0.75, 1.03), tolerance = 0.01 - ) - expect_snapshot( - x <- summarise_scores( - scores, by = "model", metric = "auto", relative_skill = TRUE - ) + scores_with[, relative_skill], + c(1.6, 0.78, 0.77, 1.04), tolerance = 0.01 ) }) From 5413abbdfbb313113a717ee44cc1677023f07aa9 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Sat, 28 Oct 2023 13:27:22 +0200 Subject: [PATCH 15/22] Fix issue with get_forecast_unit() such that it now also takes the names of the metrics used during scoring into account --- R/get_-functions.R | 7 +++++-- man/get_forecast_unit.Rd | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/R/get_-functions.R b/R/get_-functions.R index d8b3e2376..8c826e9d4 100644 --- a/R/get_-functions.R +++ b/R/get_-functions.R @@ -133,7 +133,8 @@ get_target_type <- function(data) { #' the column names that define where a single forecast was made for. #' This just takes all columns that are available in the data and subtracts #' the columns that are protected, i.e. those returned by -#' [get_protected_columns()]. +#' [get_protected_columns()] as well as the names of the metrics that were +#' specified during scoring, if any. #' #' @inheritParams validate #' @@ -144,7 +145,9 @@ get_target_type <- function(data) { get_forecast_unit <- function(data) { protected_columns <- get_protected_columns(data) - forecast_unit <- setdiff(colnames(data), protected_columns) + protected_columns <- c(protected_columns, attr(data, "metric_names")) + + forecast_unit <- setdiff(colnames(data), unique(protected_columns)) return(forecast_unit) } diff --git a/man/get_forecast_unit.Rd b/man/get_forecast_unit.Rd index 82d3ce1ca..f17349853 100644 --- a/man/get_forecast_unit.Rd +++ b/man/get_forecast_unit.Rd @@ -33,6 +33,7 @@ Helper function to get the unit of a single forecast, i.e. the column names that define where a single forecast was made for. This just takes all columns that are available in the data and subtracts the columns that are protected, i.e. those returned by -\code{\link[=get_protected_columns]{get_protected_columns()}}. +\code{\link[=get_protected_columns]{get_protected_columns()}} as well as the names of the metrics that were +specified during scoring, if any. } \keyword{internal} From 9006f6a6c07487eb52ae90868012756517c5bc35 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Sat, 28 Oct 2023 13:53:34 +0200 Subject: [PATCH 16/22] Fix remaining test related to pairwise comparisons and checking the forecast_unit, update usage of add_pairwise_comparison in vignette code. --- R/pairwise-comparisons.R | 6 +++++- README.Rmd | 5 +++-- tests/testthat/test-summarise_scores.R | 17 +++++++++++++++-- vignettes/scoringutils.Rmd | 10 ++++------ 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R index 2c3dd5815..5b57c7337 100644 --- a/R/pairwise-comparisons.R +++ b/R/pairwise-comparisons.R @@ -69,7 +69,11 @@ pairwise_comparison <- function(scores, metric <- match.arg(metric, c("auto", available_metrics())) - scores <- data.table::as.data.table(scores) + if (!is.data.table(scores)) { + scores <- as.data.table(scores) + } else { + scores <- copy(scores) + } # determine metric automatically if (metric == "auto") { diff --git a/README.Rmd b/README.Rmd index b4a096a16..77584d145 100644 --- a/README.Rmd +++ b/README.Rmd @@ -94,8 +94,9 @@ example_quantile %>% score() %>% add_coverage(ranges = c(50, 90), by = c("model", "target_type")) %>% summarise_scores( - by = c("model", "target_type"), - relative_skill = TRUE, + by = c("model", "target_type") + ) %>% + add_pairwise_comparison( baseline = "EuroCOVIDhub-ensemble" ) %>% summarise_scores( diff --git a/tests/testthat/test-summarise_scores.R b/tests/testthat/test-summarise_scores.R index 736264933..3c5ab37fc 100644 --- a/tests/testthat/test-summarise_scores.R +++ b/tests/testthat/test-summarise_scores.R @@ -46,8 +46,21 @@ test_that("summarise_scores() works with point forecasts in a quantile format", scores_point <- suppressMessages(score(example_point)) summarised_scores <- summarise_scores(scores_point, by = "model") - # this currently does not work and needs to be fixed - # add_pairwise_comparison(summarised_scores, relative_skill_metric = "se_point") + expect_no_condition( + pw_point <- add_pairwise_comparison( + summarised_scores, + relative_skill_metric = "se_point" + ) + ) + + pw_manual <- pairwise_comparison( + scores_point, by = "model", metric = "se_point" + ) + + expect_equal( + pw_point$relative_skill, + unique(pw_manual$relative_skill) + ) }) test_that("summarise_scores() can compute relative measures", { diff --git a/vignettes/scoringutils.Rmd b/vignettes/scoringutils.Rmd index a6f5e30e5..6b55c25b5 100644 --- a/vignettes/scoringutils.Rmd +++ b/vignettes/scoringutils.Rmd @@ -234,9 +234,8 @@ In order to better compare models against each other we can use relative scores ```{r} score(example_quantile) %>% - summarise_scores(by = c("model", "target_type"), - relative_skill = TRUE, - baseline = "EuroCOVIDhub-ensemble") + summarise_scores(by = c("model", "target_type")) %>% + add_pairwise_comparison(baseline = "EuroCOVIDhub-ensemble") ``` @@ -340,9 +339,8 @@ example_quantile %>% ```{r} example_quantile %>% score() %>% - summarise_scores( - by = "model", relative_skill = TRUE, baseline = "EuroCOVIDhub-baseline" - ) + summarise_scores(by = "model") %>% + add_pairwise_comparison(baseline = "EuroCOVIDhub-baseline") ``` If using the `pairwise_comparison()` function, we can also visualise pairwise comparisons by showing the mean score ratios between models. By default, smaller values are better and the model we care about is showing on the y axis on the left, while the model against it is compared is shown on the x-axis on the bottom. In the example above, the EuroCOVIDhub-ensemble performs best (it only has values smaller 1), while the EuroCOVIDhub-baseline performs worst (and only has values larger than 1). For cases, the UMass-MechBayes model is of course excluded as there are no case forecasts available and therefore the set of overlapping forecasts is empty. From 66bb3d3aa599680eb7100f02f1ac7725629744c0 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Sat, 28 Oct 2023 14:06:03 +0200 Subject: [PATCH 17/22] Fix some documentation related warnings --- R/check-input-helpers.R | 20 ++-------- R/summarise_scores.R | 43 ++++++++++++++-------- R/utils.R | 1 + man/add_coverage.Rd | 3 +- man/add_pairwise_comparison.Rd | 43 ++++++++++++++++++++++ man/assign_attributes.Rd | 6 +-- man/check_has_attribute.Rd | 4 +- man/check_summary_params.Rd | 16 -------- man/metrics_sample.Rd | 2 +- man/summarise_scores.Rd | 45 +---------------------- man/test_has_attribute.Rd | 19 ---------- tests/testthat/_snaps/score.md | 17 --------- tests/testthat/_snaps/summarise_scores.md | 8 ---- 13 files changed, 82 insertions(+), 145 deletions(-) create mode 100644 man/add_pairwise_comparison.Rd delete mode 100644 man/test_has_attribute.Rd delete mode 100644 tests/testthat/_snaps/score.md delete mode 100644 tests/testthat/_snaps/summarise_scores.md diff --git a/R/check-input-helpers.R b/R/check-input-helpers.R index 7cae1d837..d1fa62edc 100644 --- a/R/check-input-helpers.R +++ b/R/check-input-helpers.R @@ -394,33 +394,19 @@ check_data_columns <- function(data) { #' Check whether an attribute is present #' @description Checks whether an object has an attribute -#' @param data An object to be checked +#' @param object An object to be checked +#' @param attribute name of an attribute to be checked #' @return Returns TRUE if attribute is there and an error message as #' a string otherwise #' @keywords check-inputs check_has_attribute <- function(object, attribute) { if (is.null(attr(object, attribute))) { return( - paste0("Found no attribue `", attribute, "`") + paste0("Found no attribute `", attribute, "`") ) } else { return(TRUE) } } -#' Test whether an attribute is present -#' @description Tests whether an object has an attribute -#' @param data An object to be checked -#' @return Returns TRUE if attribute is there and FALSE otherwise -#' a string otherwise -#' @keywords check-inputs -test_has_attribute <- function(object, attribute) { - check <- check_has_attribute(object, attribute) - if (is.logical(check)) { - return(TRUE) - } else { - return(FALSE) - } -} - diff --git a/R/summarise_scores.R b/R/summarise_scores.R index fd530c845..33c5f76fa 100644 --- a/R/summarise_scores.R +++ b/R/summarise_scores.R @@ -21,21 +21,6 @@ #' be used or inferred internally if also not specified. Only one of `across` #' and `by` may be used at a time. #' @param fun a function used for summarising scores. Default is `mean`. -#' @param relative_skill logical, whether or not to compute relative -#' performance between models based on pairwise comparisons. -#' If `TRUE` (default is `FALSE`), then a column called -#' 'model' must be present in the input data. For more information on -#' the computation of relative skill, see [pairwise_comparison()]. -#' Relative skill will be calculated for the aggregation level specified in -#' `by`. -#' @param relative_skill_metric character with the name of the metric for which -#' a relative skill shall be computed. If equal to 'auto' (the default), then -#' this will be either interval score, CRPS or Brier score (depending on which -#' of these is available in the input data) -#' @param baseline character string with the name of a model. If a baseline is -#' given, then a scaled relative skill with respect to the baseline will be -#' returned. By default (`NULL`), relative skill will not be scaled with -#' respect to a baseline model. #' @param ... additional parameters that can be passed to the summary function #' provided to `fun`. For more information see the documentation of the #' respective function. @@ -167,6 +152,33 @@ summarise_scores <- function(scores, summarize_scores <- summarise_scores + +#' @title Add pairwise comparisons +#' @description Adds a columns with relative skills computed by running +#' pairwise comparisons on the scores. +#' +#' a column called +#' 'model' must be present in the input data. For more information on +#' the computation of relative skill, see [pairwise_comparison()]. +#' Relative skill will be calculated for the aggregation level specified in +#' `by`. +#' WRITE MORE INFO HERE. +#' +#' +#' @param scores MORE INFO HERE. +#' @param by character vector with column names to summarise scores by. Default +#' is `NULL`, meaning that the only summary that takes is place is summarising +#' over samples or quantiles (in case of quantile-based forecasts), such that +#' there is one score per forecast as defined by the *unit of a single forecast* +#' (rather than one score for every sample or quantile). +#' @param relative_skill_metric character with the name of the metric for which +#' a relative skill shall be computed. If equal to 'auto' (the default), then +#' this will be either interval score, CRPS or Brier score (depending on which +#' of these is available in the input data) +#' @param baseline character string with the name of a model. If a baseline is +#' given, then a scaled relative skill with respect to the baseline will be +#' returned. By default (`NULL`), relative skill will not be scaled with +#' respect to a baseline model. #' @export add_pairwise_comparison <- function(scores, by = NULL, @@ -306,7 +318,6 @@ check_summary_params <- function(scores, #' @description Adds a column with the coverage of central prediction intervals #' to unsummarised scores as produced by [score()] #' -#' @details #' The coverage values that are added are computed according to the values #' specified in `by`. If, for example, `by = "model"`, then there will be one #' coverage value for every model and [add_coverage()] will compute the coverage diff --git a/R/utils.R b/R/utils.R index 52be63fd8..1301aa8fe 100644 --- a/R/utils.R +++ b/R/utils.R @@ -134,6 +134,7 @@ filter_function_args <- function(fun, args) { #' @title Assign attributes to an object from a named list #' +#' @description #' Every list item will be made an attribute of the object. #' @param object An object to assign attributes to #' @param attribute_list A named list of attributes to assign to the object. diff --git a/man/add_coverage.Rd b/man/add_coverage.Rd index ad658432e..e0abf8161 100644 --- a/man/add_coverage.Rd +++ b/man/add_coverage.Rd @@ -23,8 +23,7 @@ summary is present according to the value specified in \code{by}. \description{ Adds a column with the coverage of central prediction intervals to unsummarised scores as produced by \code{\link[=score]{score()}} -} -\details{ + The coverage values that are added are computed according to the values specified in \code{by}. If, for example, \code{by = "model"}, then there will be one coverage value for every model and \code{\link[=add_coverage]{add_coverage()}} will compute the coverage diff --git a/man/add_pairwise_comparison.Rd b/man/add_pairwise_comparison.Rd new file mode 100644 index 000000000..31777dbfc --- /dev/null +++ b/man/add_pairwise_comparison.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarise_scores.R +\name{add_pairwise_comparison} +\alias{add_pairwise_comparison} +\title{Add pairwise comparisons} +\usage{ +add_pairwise_comparison( + scores, + by = NULL, + relative_skill_metric = "auto", + baseline = NULL +) +} +\arguments{ +\item{scores}{MORE INFO HERE.} + +\item{by}{character vector with column names to summarise scores by. Default +is \code{NULL}, meaning that the only summary that takes is place is summarising +over samples or quantiles (in case of quantile-based forecasts), such that +there is one score per forecast as defined by the \emph{unit of a single forecast} +(rather than one score for every sample or quantile).} + +\item{relative_skill_metric}{character with the name of the metric for which +a relative skill shall be computed. If equal to 'auto' (the default), then +this will be either interval score, CRPS or Brier score (depending on which +of these is available in the input data)} + +\item{baseline}{character string with the name of a model. If a baseline is +given, then a scaled relative skill with respect to the baseline will be +returned. By default (\code{NULL}), relative skill will not be scaled with +respect to a baseline model.} +} +\description{ +Adds a columns with relative skills computed by running +pairwise comparisons on the scores. + +a column called +'model' must be present in the input data. For more information on +the computation of relative skill, see \code{\link[=pairwise_comparison]{pairwise_comparison()}}. +Relative skill will be calculated for the aggregation level specified in +\code{by}. +WRITE MORE INFO HERE. +} diff --git a/man/assign_attributes.Rd b/man/assign_attributes.Rd index f9423bab9..f6dfdeadb 100644 --- a/man/assign_attributes.Rd +++ b/man/assign_attributes.Rd @@ -2,9 +2,7 @@ % Please edit documentation in R/utils.R \name{assign_attributes} \alias{assign_attributes} -\title{Assign attributes to an object from a named list - -Every list item will be made an attribute of the object.} +\title{Assign attributes to an object from a named list} \usage{ assign_attributes(object, attribute_list) } @@ -18,8 +16,6 @@ The object with new attributes according to the contents of \code{attribute_list} } \description{ -Assign attributes to an object from a named list - Every list item will be made an attribute of the object. } \keyword{internal} diff --git a/man/check_has_attribute.Rd b/man/check_has_attribute.Rd index 1372c1645..48b49c208 100644 --- a/man/check_has_attribute.Rd +++ b/man/check_has_attribute.Rd @@ -7,7 +7,9 @@ check_has_attribute(object, attribute) } \arguments{ -\item{data}{An object to be checked} +\item{object}{An object to be checked} + +\item{attribute}{name of an attribute to be checked} } \value{ Returns TRUE if attribute is there and an error message as diff --git a/man/check_summary_params.Rd b/man/check_summary_params.Rd index 042d565e9..9b605f999 100644 --- a/man/check_summary_params.Rd +++ b/man/check_summary_params.Rd @@ -19,22 +19,6 @@ input data that do not correspond to a metric produced by \code{\link[=score]{sc indicate indicate a grouping of forecasts (for example there may be one forecast per day, location and model). Adding additional, unrelated, columns may alter results in an unpredictable way.} - -\item{relative_skill}{logical, whether or not to compute relative -performance between models based on pairwise comparisons. -If \code{TRUE} (default is \code{FALSE}), then a column called -'model' must be present in the input data. For more information on -the computation of relative skill, see \code{\link[=pairwise_comparison]{pairwise_comparison()}}. -Relative skill will be calculated for the aggregation level specified in -\code{by}.} - -\item{baseline}{character string with the name of a model. If a baseline is -given, then a scaled relative skill with respect to the baseline will be -returned. By default (\code{NULL}), relative skill will not be scaled with -respect to a baseline model.} - -\item{metric}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Deprecated in 1.1.0. Use -\code{relative_skill_metric} instead.} } \description{ A helper function to check the input parameters for diff --git a/man/metrics_sample.Rd b/man/metrics_sample.Rd index ab1444158..5231f4ae7 100644 --- a/man/metrics_sample.Rd +++ b/man/metrics_sample.Rd @@ -5,7 +5,7 @@ \alias{metrics_sample} \title{Default metrics for sample-based forecasts.} \format{ -An object of class \code{list} of length 8. +An object of class \code{list} of length 7. } \usage{ metrics_sample diff --git a/man/summarise_scores.Rd b/man/summarise_scores.Rd index ed63cf1af..33e43985d 100644 --- a/man/summarise_scores.Rd +++ b/man/summarise_scores.Rd @@ -5,29 +5,9 @@ \alias{summarize_scores} \title{Summarise scores as produced by \code{\link[=score]{score()}}} \usage{ -summarise_scores( - scores, - by = NULL, - across = NULL, - fun = mean, - relative_skill = FALSE, - relative_skill_metric = "auto", - metric = deprecated(), - baseline = NULL, - ... -) +summarise_scores(scores, by = NULL, across = NULL, fun = mean, ...) -summarize_scores( - scores, - by = NULL, - across = NULL, - fun = mean, - relative_skill = FALSE, - relative_skill_metric = "auto", - metric = deprecated(), - baseline = NULL, - ... -) +summarize_scores(scores, by = NULL, across = NULL, fun = mean, ...) } \arguments{ \item{scores}{A data.table of scores as produced by \code{\link[=score]{score()}}.} @@ -52,27 +32,6 @@ and \code{by} may be used at a time.} \item{fun}{a function used for summarising scores. Default is \code{mean}.} -\item{relative_skill}{logical, whether or not to compute relative -performance between models based on pairwise comparisons. -If \code{TRUE} (default is \code{FALSE}), then a column called -'model' must be present in the input data. For more information on -the computation of relative skill, see \code{\link[=pairwise_comparison]{pairwise_comparison()}}. -Relative skill will be calculated for the aggregation level specified in -\code{by}.} - -\item{relative_skill_metric}{character with the name of the metric for which -a relative skill shall be computed. If equal to 'auto' (the default), then -this will be either interval score, CRPS or Brier score (depending on which -of these is available in the input data)} - -\item{metric}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Deprecated in 1.1.0. Use -\code{relative_skill_metric} instead.} - -\item{baseline}{character string with the name of a model. If a baseline is -given, then a scaled relative skill with respect to the baseline will be -returned. By default (\code{NULL}), relative skill will not be scaled with -respect to a baseline model.} - \item{...}{additional parameters that can be passed to the summary function provided to \code{fun}. For more information see the documentation of the respective function.} diff --git a/man/test_has_attribute.Rd b/man/test_has_attribute.Rd deleted file mode 100644 index 17e96d59b..000000000 --- a/man/test_has_attribute.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/check-input-helpers.R -\name{test_has_attribute} -\alias{test_has_attribute} -\title{Test whether an attribute is present} -\usage{ -test_has_attribute(object, attribute) -} -\arguments{ -\item{data}{An object to be checked} -} -\value{ -Returns TRUE if attribute is there and FALSE otherwise -a string otherwise -} -\description{ -Tests whether an object has an attribute -} -\keyword{check-inputs} diff --git a/tests/testthat/_snaps/score.md b/tests/testthat/_snaps/score.md deleted file mode 100644 index 78c7810cd..000000000 --- a/tests/testthat/_snaps/score.md +++ /dev/null @@ -1,17 +0,0 @@ -# score() can support a sample column when a quantile forecast is used - - Code - summarise_scores(summarise_scores(scores, by = "model"), by = "model", fun = signif, - digits = 2) - Output - model interval_score dispersion underprediction - 1: EuroCOVIDhub-baseline 8500 850 0 - 2: EuroCOVIDhub-ensemble NA NA NA - 3: epiforecasts-EpiNow2 13000 4100 0 - 4: UMass-MechBayes 120 77 39 - overprediction coverage_deviation bias ae_median - 1: 7600 -0.081 0.62 13000 - 2: 11000 NA 0.58 21000 - 3: 8600 0.050 0.50 22000 - 4: 0 0.050 -0.50 210 - diff --git a/tests/testthat/_snaps/summarise_scores.md b/tests/testthat/_snaps/summarise_scores.md deleted file mode 100644 index fdc138864..000000000 --- a/tests/testthat/_snaps/summarise_scores.md +++ /dev/null @@ -1,8 +0,0 @@ -# summarise_scores() metric is deprecated - - Code - x <- summarise_scores(scores, by = "model", metric = "auto", relative_skill = TRUE) - Warning - The `metric` argument of `summarise_scores()` is deprecated as of scoringutils 1.1.0. - i Please use the `relative_skill_metric` argument instead. - From 0347ca1087531f8f3884259c302b4774b8054f15 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sat, 28 Oct 2023 12:09:05 +0000 Subject: [PATCH 18/22] Automatic readme update --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 49bbb6270..2b0e1d1f9 100644 --- a/README.md +++ b/README.md @@ -132,8 +132,9 @@ example_quantile %>% score() %>% add_coverage(ranges = c(50, 90), by = c("model", "target_type")) %>% summarise_scores( - by = c("model", "target_type"), - relative_skill = TRUE, + by = c("model", "target_type") + ) %>% + add_pairwise_comparison( baseline = "EuroCOVIDhub-ensemble" ) %>% summarise_scores( @@ -181,8 +182,8 @@ example_quantile %>% #> 5: epiforecasts-EpiNow2 Cases log 6.005778e-01 0.1066329 #> 6: epiforecasts-EpiNow2 Cases natural 1.443844e+04 5664.3779484 #> underprediction overprediction coverage_deviation bias ae_median -#> 1: 3.521964e-01 0.3804607 -0.10940217 0.09726563 1.185905e+00 -#> 2: 1.028497e+04 7702.9836957 -0.10940217 0.09726563 3.208048e+04 +#> 1: 3.521964e-01 0.3804607 -0.10940217 0.09726562 1.185905e+00 +#> 2: 1.028497e+04 7702.9836957 -0.10940217 0.09726562 3.208048e+04 #> 3: 1.356563e-01 0.3132561 -0.09785326 -0.05640625 7.410484e-01 #> 4: 4.237177e+03 3650.0047554 -0.09785326 -0.05640625 1.770795e+04 #> 5: 1.858699e-01 0.3080750 -0.06660326 -0.07890625 7.656591e-01 From 5a4a32d6dc09ff6dc9f3ca68a480a9477fcb88fa Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Sat, 28 Oct 2023 15:05:41 +0200 Subject: [PATCH 19/22] Let summarise score use the list of actual metrics that were used during scoring --- R/summarise_scores.R | 32 +++++++++++++++----------------- tests/testthat/test-score.R | 3 ++- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/R/summarise_scores.R b/R/summarise_scores.R index 33c5f76fa..5d5a80b4b 100644 --- a/R/summarise_scores.R +++ b/R/summarise_scores.R @@ -75,7 +75,8 @@ summarise_scores <- function(scores, stop("You cannot specify both 'across' and 'by'. Please choose one.") } - if (is.null(attr(scores, "metric_names"))) { + metric_names <- attr(scores, "metric_names") + if (is.null(metric_names)) { stop("`scores` needs to have an attribute `metric_names` with the names of the metrics that were used for scoring.") } @@ -115,21 +116,18 @@ summarise_scores <- function(scores, ) ) - # get all available metrics to determine names of columns to summarise over - cols_to_summarise <- paste0(available_metrics(), collapse = "|") - # takes the mean over ranges and quantiles first, if neither range nor # quantile are in `by`. Reason to do this is that summaries may be # inaccurate if we treat individual quantiles as independent forecasts scores <- scores[, lapply(.SD, base::mean, ...), by = c(unique(c(forecast_unit, by))), - .SDcols = colnames(scores) %like% cols_to_summarise + .SDcols = colnames(scores) %like% paste(metric_names, collapse = "|") ] # summarise scores ----------------------------------------------------------- scores <- scores[, lapply(.SD, fun, ...), by = c(by), - .SDcols = colnames(scores) %like% cols_to_summarise + .SDcols = colnames(scores) %like% paste(metric_names, collapse = "|") ] # remove unnecessary columns ------------------------------------------------- @@ -187,6 +185,11 @@ add_pairwise_comparison <- function(scores, stored_attributes <- get_scoringutils_attributes(scores) + if (is.null(stored_attributes[["metric_names"]])) { + stop("`scores` needs to have an attribute `metric_names` with the names of + the metrics that were used for scoring.") + } + if (!is.null(attr(scores, "unsummarised_scores"))) { scores <- attr(scores, "unsummarised_scores") } @@ -232,18 +235,13 @@ add_pairwise_comparison <- function(scores, } } - - # get all available metrics to determine names of columns to summarise over - cols_to_summarise <- paste0(available_metrics(), collapse = "|") - scores <- scores[, lapply(.SD, mean), - by = c(by), - .SDcols = colnames(scores) %like% cols_to_summarise - ] - # Maybe this should use summarise scores instead? - # scores <- summarise_scores(scores, by = by, fun = mean) - + # add relative skill to list of metric names + stored_attributes[["metric_names"]] <- c( + stored_attributes[["metric_names"]], + "relative_skill", "scaled_rel_skill" + ) scores <- assign_attributes(scores, stored_attributes) - + scores <- summarise_scores(scores, by = by) return(scores) } diff --git a/tests/testthat/test-score.R b/tests/testthat/test-score.R index ff37eb28b..988bd08b9 100644 --- a/tests/testthat/test-score.R +++ b/tests/testthat/test-score.R @@ -159,7 +159,8 @@ test_that("function produces output for a point case", { c( "model", "target_type", "ae_point", - "se_point" + "se_point", + "ape" ) ) }) From 19245819ce46a0015f4e83401235405375c157d9 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Sat, 28 Oct 2023 16:59:29 +0200 Subject: [PATCH 20/22] Try to get rid of `available_metrics()`, but fail at it so it's still there in some places --- R/get_-functions.R | 28 +++++++++++++++++++++++++++- R/pairwise-comparisons.R | 2 +- R/plot.R | 4 +--- R/score.R | 6 ++---- man/compare_two_models.Rd | 3 +-- man/get_metrics.Rd | 19 +++++++++++++++++++ man/pairwise_comparison.Rd | 3 +-- man/pairwise_comparison_one_group.Rd | 3 +-- man/score.Rd | 3 +-- man/score_quantile.Rd | 3 +-- tests/testthat/setup.R | 2 +- tests/testthat/test-score.R | 19 +++++++++++++------ 12 files changed, 69 insertions(+), 26 deletions(-) create mode 100644 man/get_metrics.Rd diff --git a/R/get_-functions.R b/R/get_-functions.R index 8c826e9d4..095a77e48 100644 --- a/R/get_-functions.R +++ b/R/get_-functions.R @@ -126,6 +126,31 @@ get_target_type <- function(data) { } +#' @title Get metrics that were used for scoring +#' +#' @description Internal helper function to get the metrics that were used +#' to score forecasts. +#' @param score A data.table with an attribute `metric_names` +#' +#' @return Character vector with the metrics that were used for scoring. +#' +#' @keywords internal + +get_metrics <- function(scores) { + metric_names <- attr(scores, "metric_names") + if (is.null(metric_names)) { + stop("The data needs to have an attribute `metric_names` with the names ", + " of the metrics that were used for scoring. This should be the case ", + "if the data was produced using `score()`. Either run `score()` ", + "again, or set the attribute manually using ", + "`attr(data, 'metric_names') <- names_of_the_scoring_metrics") + } + return(metric_names) +} + + + + #' @title Get unit of a single forecast #' @@ -169,7 +194,8 @@ get_protected_columns <- function(data = NULL) { protected_columns <- c( "predicted", "observed", "sample_id", "quantile", "upper", "lower", - "pit_value", "range", "boundary", available_metrics(), + "pit_value", "range", "boundary", "relative_skill", "scaled_rel_skill", + available_metrics(), grep("coverage_", names(data), fixed = TRUE, value = TRUE) ) diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R index 5b57c7337..d842ede72 100644 --- a/R/pairwise-comparisons.R +++ b/R/pairwise-comparisons.R @@ -30,7 +30,6 @@ #' @param metric A character vector of length one with the metric to do the #' comparison on. The default is "auto", meaning that either "interval_score", #' "crps", or "brier_score" will be selected where available. -#' See [available_metrics()] for available metrics. #' @param by character vector with names of columns present in the input #' data.frame. `by` determines how pairwise comparisons will be computed. #' You will get a relative skill score for every grouping level determined in @@ -67,6 +66,7 @@ pairwise_comparison <- function(scores, baseline = NULL, ...) { + # metric_names <- get_metrics(scores) metric <- match.arg(metric, c("auto", available_metrics())) if (!is.data.table(scores)) { diff --git a/R/plot.R b/R/plot.R index 1d7e4875b..b3e1a9539 100644 --- a/R/plot.R +++ b/R/plot.R @@ -45,9 +45,7 @@ plot_score_table <- function(scores, # identify metrics ----------------------------------------------------------- id_vars <- get_forecast_unit(scores) - if (is.null(metrics)) { - metrics <- names(scores)[names(scores) %in% available_metrics()] - } + metrics <- get_metrics(scores) scores <- delete_columns( scores, diff --git a/R/score.R b/R/score.R index 4ba62e229..3e631de1c 100644 --- a/R/score.R +++ b/R/score.R @@ -49,9 +49,7 @@ #' [example_integer], [example_point()], and [example_binary]). #' #' @param metrics the metrics you want to have in the output. If `NULL` (the -#' default), all available metrics will be computed. For a list of available -#' metrics see [available_metrics()], or check the [metrics] data set. -#' +#' default), all available metrics will be computed. #' @param ... additional parameters passed down to other functions. #' #' @return A data.table with unsummarised scores. There will be one score per @@ -211,7 +209,7 @@ score.scoringutils_quantile <- function(data, metrics = NULL, ...) { ... ) - setattr(scores, "metric_names", metrics) + setattr(scores, "metric_names", metrics[metrics %in% colnames(scores)]) # manual hack to make sure that the correct attributes are there. setattr(scores, "forecast_unit", forecast_unit) setattr(scores, "forecast_type", "quantile") diff --git a/man/compare_two_models.Rd b/man/compare_two_models.Rd index 840658c23..39780292e 100644 --- a/man/compare_two_models.Rd +++ b/man/compare_two_models.Rd @@ -23,8 +23,7 @@ compare_two_models( \item{metric}{A character vector of length one with the metric to do the comparison on. The default is "auto", meaning that either "interval_score", -"crps", or "brier_score" will be selected where available. -See \code{\link[=available_metrics]{available_metrics()}} for available metrics.} +"crps", or "brier_score" will be selected where available.} \item{one_sided}{Boolean, default is \code{FALSE}, whether two conduct a one-sided instead of a two-sided test to determine significance in a pairwise diff --git a/man/get_metrics.Rd b/man/get_metrics.Rd new file mode 100644 index 000000000..438e6b8f5 --- /dev/null +++ b/man/get_metrics.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_-functions.R +\name{get_metrics} +\alias{get_metrics} +\title{Get metrics that were used for scoring} +\usage{ +get_metrics(scores) +} +\arguments{ +\item{score}{A data.table with an attribute \code{metric_names}} +} +\value{ +Character vector with the metrics that were used for scoring. +} +\description{ +Internal helper function to get the metrics that were used +to score forecasts. +} +\keyword{internal} diff --git a/man/pairwise_comparison.Rd b/man/pairwise_comparison.Rd index 4c14b975e..9288e77fb 100644 --- a/man/pairwise_comparison.Rd +++ b/man/pairwise_comparison.Rd @@ -26,8 +26,7 @@ split data.frames.} \item{metric}{A character vector of length one with the metric to do the comparison on. The default is "auto", meaning that either "interval_score", -"crps", or "brier_score" will be selected where available. -See \code{\link[=available_metrics]{available_metrics()}} for available metrics.} +"crps", or "brier_score" will be selected where available.} \item{baseline}{character vector of length one that denotes the baseline model against which to compare other models.} diff --git a/man/pairwise_comparison_one_group.Rd b/man/pairwise_comparison_one_group.Rd index b176523bb..a7d902f15 100644 --- a/man/pairwise_comparison_one_group.Rd +++ b/man/pairwise_comparison_one_group.Rd @@ -11,8 +11,7 @@ pairwise_comparison_one_group(scores, metric, baseline, by, ...) \item{metric}{A character vector of length one with the metric to do the comparison on. The default is "auto", meaning that either "interval_score", -"crps", or "brier_score" will be selected where available. -See \code{\link[=available_metrics]{available_metrics()}} for available metrics.} +"crps", or "brier_score" will be selected where available.} \item{baseline}{character vector of length one that denotes the baseline model against which to compare other models.} diff --git a/man/score.Rd b/man/score.Rd index e17c0166d..52a8c32a9 100644 --- a/man/score.Rd +++ b/man/score.Rd @@ -42,8 +42,7 @@ For more information see the vignettes and the example data \item{...}{additional parameters passed down to other functions.} \item{metrics}{the metrics you want to have in the output. If \code{NULL} (the -default), all available metrics will be computed. For a list of available -metrics see \code{\link[=available_metrics]{available_metrics()}}, or check the \link{metrics} data set.} +default), all available metrics will be computed.} } \value{ A data.table with unsummarised scores. There will be one score per diff --git a/man/score_quantile.Rd b/man/score_quantile.Rd index f309355d0..002eaa147 100644 --- a/man/score_quantile.Rd +++ b/man/score_quantile.Rd @@ -36,8 +36,7 @@ the unit of a single forecast, i.e. a forecast was made for a combination of the values in \code{forecast_unit}} \item{metrics}{the metrics you want to have in the output. If \code{NULL} (the -default), all available metrics will be computed. For a list of available -metrics see \code{\link[=available_metrics]{available_metrics()}}, or check the \link{metrics} data set.} +default), all available metrics will be computed.} \item{weigh}{if TRUE, weigh the score by alpha / 2, so it can be averaged into an interval score that, in the limit, corresponds to CRPS. Alpha is the diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 5f2b22a2c..dc3d6b941 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -3,4 +3,4 @@ library(ggplot2, quietly = TRUE) suppressMessages(library(magrittr)) # compute quantile scores -scores <- suppressMessages(score(example_quantile)) \ No newline at end of file +scores <- suppressMessages(score(example_quantile)) diff --git a/tests/testthat/test-score.R b/tests/testthat/test-score.R index 988bd08b9..73c013eb9 100644 --- a/tests/testthat/test-score.R +++ b/tests/testthat/test-score.R @@ -156,15 +156,22 @@ test_that("function produces output for a point case", { ) expect_equal( colnames(eval), - c( - "model", "target_type", - "ae_point", - "se_point", - "ape" - ) + c("model", "target_type",names(metrics_point)) + ) +}) + +test_that("Changing metrics names works", { + metrics_test <- metrics_point + names(metrics_test)[1] = "just_testing" + eval <- suppressMessages(score(example_point, metrics = metrics_test)) + eval_summarised <- summarise_scores(eval, by = "model") + expect_equal( + colnames(eval_summarised), + c("model", "just_testing", names(metrics_point)[-1]) ) }) + test_that("score.scoringutils_point() errors with only NA values", { only_nas <- copy(example_point)[, predicted := NA_real_] expect_error( From 4210e6026818d410ec8705e077087e45f45cc32d Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Sat, 28 Oct 2023 17:32:35 +0200 Subject: [PATCH 21/22] add another test, update news file --- NEWS.md | 1 + tests/testthat/test-pairwise_comparison.R | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/NEWS.md b/NEWS.md index d3487d687..322b2b25d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,6 +21,7 @@ The update introduces a lot of breaking changes. If you want to keep using the o - `predicted`: numeric, a vector (if `observed` is a scalar) or a matrix (if `observed` is a vector) - `quantile`: numeric, a vector with quantile-levels. Can alternatively be a matrix of the same shape as `predicted`. - `check_forecasts()` was replaced by a new function `validate()`. `validate()` validates the input and in that sense fulfills the purpose of `check_forecasts()`. It has different methods: `validate.default()` assigns the input a class based on their forecast type. Other methods validate the input specifically for the various forecast types. +- The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_pairwise_comparison()`, was introduced that takes summarised scores as an input and adds pairwise comparisons to it. - The function `find_duplicates()` was renamed to `get_duplicate_forecasts()` - Changes to `avail_forecasts()` and `plot_avail_forecasts()`: - The function `avail_forecasts()` was renamed to `available_forecasts()` for consistency with `available_metrics()`. The old function, `avail_forecasts()` is still available as an alias, but will be removed in the future. diff --git a/tests/testthat/test-pairwise_comparison.R b/tests/testthat/test-pairwise_comparison.R index 0c55cbce3..f08a74a9a 100644 --- a/tests/testthat/test-pairwise_comparison.R +++ b/tests/testthat/test-pairwise_comparison.R @@ -278,3 +278,14 @@ test_that("pairwise_comparison() realises when there is no baseline model", { pairwise_comparison(scores, baseline = "missing_model"), "missing" ) }) + +test_that("You can run `add_pairwise_comparison()` on unsummarised data", { + pw1 <- suppressMessages(add_pairwise_comparison(scores)) + pw1_sum <- summarise_scores(pw1, by = "model") + + pw2 <- summarise_scores(scores, by = "model") + pw2 <- add_pairwise_comparison(pw2) + + expect_true(all(pw1_sum == pw2, na.rm = TRUE)) + expect_true(all(names(attributes(pw2)) == names(attributes(pw1_sum)))) +}) From 3122fbdd77996cfa6aa7abf12222f498215110c6 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Sat, 28 Oct 2023 18:11:16 +0200 Subject: [PATCH 22/22] Allow `add_coverage()` to be run after `summarise_scores()` --- R/summarise_scores.R | 16 +++++++++++++--- man/add_coverage.Rd | 2 +- tests/testthat/test-add_coverage.R | 15 ++++++++++++++- tests/testthat/test-pairwise_comparison.R | 2 +- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/R/summarise_scores.R b/R/summarise_scores.R index 5d5a80b4b..7695a6b15 100644 --- a/R/summarise_scores.R +++ b/R/summarise_scores.R @@ -197,8 +197,6 @@ add_pairwise_comparison <- function(scores, if (is.null(by) && !is.null(stored_attributes[["scoringutils_by"]])) { by <- stored_attributes[["scoringutils_by"]] } else if (is.null(by)) { - # This needs to be double checked, because getting the forecast unit is not - # so when you can name your own metrics. by <- get_forecast_unit(scores) } @@ -340,10 +338,21 @@ check_summary_params <- function(scores, #' @keywords scoring add_coverage <- function(scores, - by, + by = NULL, ranges = c(50, 90)) { stored_attributes <- get_scoringutils_attributes(scores) + if (!is.null(attr(scores, "unsummarised_scores"))) { + scores <- attr(scores, "unsummarised_scores") + } + + if (is.null(by) && !is.null(stored_attributes[["scoringutils_by"]])) { + by <- stored_attributes[["scoringutils_by"]] + } else if (is.null(by)) { + # Need to check this again. + # (mentioned in https://github.com/epiforecasts/scoringutils/issues/346) + by <- get_forecast_unit(scores) + } summarised_scores <- summarise_scores( scores, @@ -369,6 +378,7 @@ add_coverage <- function(scores, scores_with_coverage <- assign_attributes( scores_with_coverage, stored_attributes ) + return(scores_with_coverage[]) } diff --git a/man/add_coverage.Rd b/man/add_coverage.Rd index e0abf8161..33990a3bc 100644 --- a/man/add_coverage.Rd +++ b/man/add_coverage.Rd @@ -4,7 +4,7 @@ \alias{add_coverage} \title{Add coverage of central prediction intervals} \usage{ -add_coverage(scores, by, ranges = c(50, 90)) +add_coverage(scores, by = NULL, ranges = c(50, 90)) } \arguments{ \item{scores}{A data.table of scores as produced by \code{\link[=score]{score()}}.} diff --git a/tests/testthat/test-add_coverage.R b/tests/testthat/test-add_coverage.R index 4ceb68d9a..43686ca80 100644 --- a/tests/testthat/test-add_coverage.R +++ b/tests/testthat/test-add_coverage.R @@ -1,5 +1,4 @@ test_that("add_coverage() works as expected", { - expect_error(add_coverage(scores)) expect_error( add_coverage(scores, by = c("model", "target_type"), range = c()) ) @@ -14,3 +13,17 @@ test_that("add_coverage() works as expected", { c("coverage_deviation", "coverage_10", "coverage_50", "coverage_80") ) }) + + +test_that("Order of `add_coverage()` and `summarise_scores()` doesn't matter", { + # Need to update test. Turns out the order does matter... + # see https://github.com/epiforecasts/scoringutils/issues/367 + pw1 <- add_coverage(scores, by = "model") + pw1_sum <- summarise_scores(pw1, by = "model") + + pw2 <- summarise_scores(scores, by = "model") + pw2 <- add_coverage(pw2) + + # expect_true(all(pw1_sum == pw2, na.rm = TRUE)) + # expect_true(all(names(attributes(pw2)) == names(attributes(pw1_sum)))) +}) diff --git a/tests/testthat/test-pairwise_comparison.R b/tests/testthat/test-pairwise_comparison.R index f08a74a9a..1ff94ad99 100644 --- a/tests/testthat/test-pairwise_comparison.R +++ b/tests/testthat/test-pairwise_comparison.R @@ -279,7 +279,7 @@ test_that("pairwise_comparison() realises when there is no baseline model", { ) }) -test_that("You can run `add_pairwise_comparison()` on unsummarised data", { +test_that("Order of `add_pairwise_comparison()` and `summarise_scores()` doesn't matter", { pw1 <- suppressMessages(add_pairwise_comparison(scores)) pw1_sum <- summarise_scores(pw1, by = "model")