From dc6f3f55edbc4eb11732feb5a6ea1a55cc95dab2 Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Mon, 5 Aug 2024 18:45:32 +0300 Subject: [PATCH] Added '--pmml-schema' (aka '--schema') command-line option --- .../resources/sklearn2pmml-1.0-SNAPSHOT.jar | Bin 4857 -> 7158 bytes src/main/java/com/sklearn2pmml/Main.java | 64 +++++++++++++++++- .../com/sklearn2pmml/VersionConverter.java | 45 ++++++++++++ 3 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 src/main/java/com/sklearn2pmml/VersionConverter.java diff --git a/sklearn2pmml/resources/sklearn2pmml-1.0-SNAPSHOT.jar b/sklearn2pmml/resources/sklearn2pmml-1.0-SNAPSHOT.jar index d602c53b2730178c675653fabe0f3ea897aa0d95..d9827877ddb16b5cedf82bead604a08427267e88 100644 GIT binary patch delta 5998 zcma)=Ra6uXx5Wn(0V#5!0A z>i)mG)~)a9p7(uT&N^%V_Wr|sDV!pR-xp`65(WSO3R9)%U`b8Hy)P*Meq{IScYp{e z)nc7)UYfvMTUWZhT(J_X;OQ|kKK|y%7NNpbaZ8N&)~sY%lnUE8S59LJaLgpUhw9Dl zxB+#qP&*!OK)Rs)*mfAO;KE8x;FnduQG5Aog8neuBH$5t( zFNKS35E`ra*yBXMVjor#9h9IKR*3f2> z0{jZysQev^>b0H&HbpNO`+bfCI%P3mIJ^WJALF1uQ}c%1fy&wO-MiXZWgh+J);XtN zt}RFtlXuQ0UltRM)w^z1Fg9swfU}Ji@hgri@ts1iqkw?F1CY5)Kc|7?Uos%O|~X9mG1A-0`ua&pDF|W)0B74&`zufO zR1OF1Zx_NRYUqxjC!)ZO@?MWNMdBg@ccmUGJ+;0IsbKB#E0uBXKPR?-Hl%>$C&1x9 z`V5^FKiQVKqj5J+d_*O7(Q;B0n3~;p@^z?;3FRdblM1>XC zi-x}0>xwWIyC@+KN$h!E`LLJh6OPp2xre~U%F?&5zP>X5gakW3?s=>)4ESN;b z@XmN53t=d!`u7X^K&HZ^p;dy%*tKWXrV@^I@p-3n5lyTaDm&+<5(RuXVd_oZeA7;_ z%`sCO@hoM@Svwn0?Rj0HfTu#{&q9hU8+vkLdoi+gH3@5`pAe7>Jw(gVO}s1EXya$F zgmbh0$N|5oswvXW|mM6%i_S+(kj$v&5C$;SM(tY(Yi+$QQ& z?*{uoMDn`^P8lYJ)0%FTK#dc|g2_7PM3~QQo*OEDENq&DER?QApI=UoXQ#rN>Y+W! z4)#Kt@+;m4zMxFF??q9^^imnPe-!aCP@FU0;%%=lrebt_xz#)FnuBJsrl8$8laHT5 zjeIA8?4IduPRyZ2M5@iDXxES$juT&@@x0%vlg=^6ebto7m^Wh)KQyy1Zk0IA>!-v} zhLCN{d1T9&!u<2Vw79pwjR#NV>Q=J{~KJ9S8cm(N~m0d}gTWt~xTiET_s z0vQOX`f2f00HI~1^6wVt!5W?Cm+ zI9)IIBby@N;Qd^0DO&z~f>cW>UBrU{@0R{LD=Pi&E*vib{)EQtfQuv+M6P zyYPqoQWzQ97cJw=p7}`sM{Tk?)J`i<++n`97%nYC^nm_Ovm(R&o<_|+9I`LtP zN&?D~bK#FNBDNs8RnJ`2{UZ#>p!4^iR(MC$&=w-*kw`WoaSqu$xH{j)ow%j^%_qF@`J|? zKDt>5+DckfQOcP^>YHsL;O?{t*IbllHffFY)FbH55K2v5S{Z5=u`)Ne)6Op|SA_EO zs;SiHS`^NwuH78pHG&r+1rq6Ml{7T_uWPwxyu*}WsKPsCG*17LY5v7$Higyg1gm0! zh$pRX7lT3REW{wRwr|*jHVAyM+u1RWJv{Vd)DpOtwHkQ{SIt`b^u>82(R=SZA77#`XdDr$n$!|OgQjce1%mvcG zqV%LlH>b+*(u>SRqjaXob)VEcS86SHlCw1ls7)uP;0ie8L}hKC=4BvH8`TSRscm8H zY4h>bCl1_( z`e3P;S>C$Pyqm)!(R$Flr)T2@Iy(x+74ziunW<-0O8Cu(qxQx^Ya&F5DBVs^GONOx zi_2P)9mWd8wRX$v(i@qc63DR5Vaq6a;&K@FshV~q0XrDy@-&eMr;7iJwrCka?piS3 z{!`KDs54cQuo@hY_xwXqkIKl zV5!9AsR(JpZK}oH*eyNP_Unexa}`5Xa7qh96!}s&xYUk&A`&IDmFkrR;RGVgYY>a4 zJGn2Tu`7JnXFxiK%~ir{K7GWQq-SDQK6h4|^{i`>eik)fbHSDrJa4fZFYi2wk)J}l zw4dJg=l;!&avhEN5;WPyyLKcivX74|@go~z%jk`Fy1>p^^SA>m@*WT7pX_ZtXPr%d zp~M?dL(xpVlcKuUvz&@ zu7ua^?h07;Pi}GYnKhvW8I(xEs!){xRG@rdcP~POJ&`6OL{kTdiL`H!jtD?>Zwji! zFDI1`8aa}oX873FESF(52~BJYwt`^(+5@ez_;2R zjKuMSqSCJl$tK*T*j6aqJU)Ei@(idNOnSiXr4!b@q1;~`As6Bw@ENtOQ+Qs)PF-{` zHU2}S%vYRmw;QRn4wx3{GoN@Fy^}JCX^U9xD7gLdW0-p$L%j{lig!9BBlp0OR`n&{ z>JHf87P_puVbjD6oGL)V?yY*8eVkQWo&;QvB6KFdAcA9&NR zy$oLI`_*jGB;@5HqinF<`?Qs4d~T^e2vKQu6O7h9G%yr=UIGIXn`ILlm1lH_}s_ z_SW$i)-cKPF#DJIT4!X|zF3Ei_G8zIaI=Vq2QAe{k0}5E zJUqZ!&NrChW_Q%wFg5@nf&&1s{1X#T1QCe&&kDnT0_|o`YsZ>EL%R%o^ ztLvvk>2B#)$(<*0+^+?C{O-da(A1!DM_MgbrzP0vXwK+Xo||1V?1LD#J1&WUX-`a) zRDudG_>G<%p@$>0UV^h0Z;1@L{Ha`jdYlR2Na>dGJ((O2jVgZx^tWiwWPY=0leQ;c zZ_oh0P{2~!AC6DomOpN`ZxjA$Kr*7W`11S`eztP9?PyTZ4GK;7vKt5ZF&}3}?hv?z z%8bFF41}i+p7B|qExqOibUN)nZ_9GzT&aO}vIma;@F7hFpNnx3XvbyHkZneOAtY+S ze%yREYs;LgBQ@dLa3*WZ2mHnssJCoKjf}q-ZOK_AGb&;esNndv^dYL;>ZpSmwlJNhp51DUM2Y3P>sOj&Pw~dLiyc{N z^mioTaO}4wi$?YETR69Js=0Cp*$tD{KkUAPQUupPYB}~C6eBFxisEe@M5Wo$LolKn zMW>q9-pjzplW~}3E_`E40|DPXGQnCUXK$}%jQ-IdW_lFlmgp$PD%8IJtkQYrB@L|B zj~NLxAO3W>L@}dE>ZY;`w&pOlFQJk^6)D_l$SCgr(4cvgQp6UnvHC7WfHTpC|105i zg15*Az-4Lzz*vb5XfJH6ygv(QUZ-&c&+Dn)tMuzl=SAbyCA+E7jYWlhTvrky`x}xI ztK=VOwx@GRx*Up%H*<_4i^e-HHi9k1HLr%tUamwg9u1$`S8Kr9JMLv6Ta zcUgGC&uM${6!sbiPEvwvmGoW?KuYs&>35lSA6Gqr8qZI?ahLN~Bb5|bA^J@W3YKMJ zWfK4j>z_{-O{GguR+l!MiDR!mb2X~azHu(o(6)O`My`2S=5fp|4{2G!vI4zTi8b0^ zH~&9RVK`obb0CHb0FW{P00IDbF)269-R&Kh|2vm4;{&(vLeCx!Z-G&~ga?4wR(Pv( zMHRtcrAK{^ljS`;gm@0InK@d?ae7v6Ll2#xYFt}PQW>~b`th^m!Hwe=9pcj-E90_r zUzs759^R9*3wLH$$o35;$6$4dsmR3`hVu$prW?G->r8nejYeT+-~1=Lhi7=OUyJWQ zt9Tquh;V*BJ~}#Vf7>s7c!j$=F*Pj8LD=*L#O1z8YS_OgWzYv-K+A^uyMH`lOL~4K zIHeOTln`{JF^F$n)ToLqudSfdEBgh|O&KuqJZv|kTe^)R%~wD(5$1%)J-6tsE+#+3 zr=AT+z_-8Aa60Lc^(=EDbR#xcp2$hfk}Cy;r7}!_ZnYQl5Kp zR;K;YLVdBg1WTB;xr$4-oyuEpJU$MCGq~XY1)!@}PH=$9n zAw#3^yZV+sFsb-wlXB_^wpR`rDa(+g?$Db(^m-8PFt;NX5+x@(F@&cm^72(|gw?he zm(uPF?NbQtqskXY)USFNU`zCn&mDlZV#NCpJbdgKa)D$E=Kc}Hg8ub`-LYs@fDRYt zTY}^`>)9_6a%C`=gVIaMh*FVcIT6As}noeiD%d914iaxUa12t1kAvX4JHe~ zSxDoVpEAgI6M1!c@iX;!hWKmsaBs=6-r#*Dj8?x>SSN(onS4+e)`J>S>rRb$+X`Is zrgmc&PzbNa8(44s8UB*WG=r-X`OLmYBAV)2`xEXhSQB~| z*f#!ppizl~?a`jZ01A*x!>LU&#vkftx- z$%kwv{-4!fkf}N8u>9g0)Jm^jL*1xK5hTj0A^YX08W~<-Up5eRX4zBKNg$_wrzP>1B)BuOg)&!{4pA~ko-;gzE(0j$NV^9P11{`rVPy-H&nrP9aqmqoOrC7Ce_`R}(?$!v@I+)q=w!>6+Ih!&9PP@L3&KZ}1S4 zhvc}D#fAUaGIW_yx>3BupRCzE4pvwb0MJ(F+y>caI2P*uV#9SQy>y6lgN;=KrGg|g3LY2r*q7&X{Jz6TKEt5j_X#7 zfOX3T_Lz&c((f$`z;5hU>r_|Dv@kmYoj6UU@fGqFnZ$5T z?Tt;9NI-Q%PhFLxISaU zeiZe4bVs5^LaVkluC=vcnz>D#!4^-K>JmSZN4)>lXY*MLK_B%Ratfx5x!jQISf^Z1 zSx#wAR%mLg81E_Mau~dRU-Nc0z`Kq8HL`SB75|*|7}L2&aLS*2ZSP^|?@AQyK{bT# z!z+f3a9m|VNt2&9u|=^R>tj@78T(12b!fIH;FwJ=q%9_Vh0SkiM9i1e=+cr=KUAxu zD&k^vjT7y8zW#GkOzJKs?Q`}(z?b@!S{iKXQPwdwjvKyH4G8mHEA3> z3fT&C;sLQHf#;vL^1}&O`0l6$^gZ-}yw`{h;uZnm%sDKY#aEjNYyp zh*c{5*iVR+DRC)_PL;WdCe5g-`(3!|&hn`8wc~nndm7dWw0gD!k3CG2nzUspN1B-$ z3y1lp_>%`{sun7gaaMn;yB)5uX{G`py$_&@%*-C-S%Vs#DqudBdpH*Ig;p1&y zauLDrou1IkZu9at2+*KqQS(lGfbsbCGrh%FHeH% z%>+urtP~y=tLP);4^2MbaC|&)xEbw8QFQ9sE=$2Xv=04cQ%};m{`IfZPSohcW=`QO z(*oi#u0GxOEf1eBbr4GnjQacgf~(}gzY;G$_AZ`5>!A;_ljj0fw+#`{XCHMFIHbzb zH9EeGWW9YmY+g;#;L-IY`)E}fhOs(rcM0E`%vPtqJU`y{MA7J<#z4di^9Mj29o6h! z1oB3Sml2`5!I5TX+Z=4LpXV^M88kkDAX?u>N;Lyhkbx`*%S7 zhf)7?%0vB${@=K&rHX}3@#w#?_um};zg7YO<)Kl}008wjdNQ2K8VcO+EPdhiq_5y! eWW3m9)c*nj_$(PEF#&)8Ao>UZsHFav@;?A~Q*H79 delta 3656 zcmV-O4!804H~A%yH3=+-OA&$q005tnJ{*6Iu?oU46h-$!{^9*VLTVSyW}R9ZC|2Ab z&9j80F{G*B-)j~Z*ULHQY7F*4TjqC4n|%fawUDgp*63-akTHKpezVz=g2@epDI7+v z2Z%U3Tl&<6p;c| zACMZ6_4^9KQ)V~R1^@ss4U_r-B7axecN5nc|BWnZ$PH8ZZS5eT=fuaTzKWoD@W;9rs6~mkuN@6Ihur;iQqoIP+ z8eHTxc<@zJ6|`D*rRqOyl}rVzva`k+BW*cpvEQX2eyD2uR@t1iJS$%^hi%*O4c~HX zPr=q~!6~QnX4Ui4vs{)9yMJi9>2bp~%BF9+6pz?kl67q=RhcWLDptiTS+<#yCKa>= zCZSey_9EU z>ooG7Q>yyr!G<)q27me8W6}4J!w}ci5fQL4O4BHdC{4TI6oY(jZm?5JXL|4Qu}G&l zVY(z+u{@oYoa4D%c9)8CDxOqvUd7K<{DLSLDwJxa)h3*(TQF;$iP8)TXLKZx)Ug>` zbliq53c8z?ktx*i6n?4Vf{tI|*E)WKXLUS>9Xg)J3p$37RDbcJj+gMVir?z^9ez(& zbi9HjeO*cH;ThLCDMm{ zN{z`A{{KzIyE@*(`;1w|2Rc3!MsHn7W5Kb{m@aQodbm<4Sp|9N$Z-=m{lM`$b`-3> z8YlGcde~5~@qY&8RIuinD)NFiLkTNrn`d`dt&;Rit*`X`wr#p2CBySXcj!+fZ+1<# z*JKH%-v5)8kTHtIzp0hmPli!x({HRQu{&1~V1`Mkc$*VtofG5e@MF7c!Iuf58KV^VK9FeQdh_NiOl-b3EfaHDG1p2hdh9{M z5k3w2*`4s%Xl zLipGMBH2Ym`{$v~BbFQJn@4N^BDDSmv?-h~LK_ov>hh;lxPh+rQnW>y-ANDi`bWm#v?UzLp9o+q(!MOdO4#Cfu2TzwV4pY$5)agk#8wBygLX%4 zzL$yFjfb&c(lT=Y2|mLD_wV9kd|8|SL;C;l6~5;FOZ->vgHn&8Q;r%A@GMI$qkq(r z#T3SH3gdh-AHr!&;5^^i7cfb@9pw{Xids)l?@4N#qPCOx29HY!oM5p!cx&#*11x2X zk`Lk`mhwEh37jn~@CBlJH&+q922K$`dn9JRt>JkJAJk+|@u9Gnv4!ZvTX+Qf=&^v$ zC9GKziX?XSs~9DCB!aXhL?syw#D7Qlw}jP*Hqh$=Elc8F2a1FXcc?g^A}i1tT~;p$ zDssy*Z9?Hbsk95RCQNn4uuvXu8_7>_Z*%Y9%gf*&GiqqzKvER9Ba@V}TA?LeQXuip4G{(Q5onFGhm^dsEk|aKbQ-V!} zCZ47~ks8F4FB@#)Sct$79nFVeJ@ooLH%iCa!d zo=mbh<}R*wfQjUi&518z)#kA_fUPvB2ljL>&yO!I-+#Pv@tyTH&U~5(Dt~~xUa04u zn~<1#C+D9A$MZSO)4>T%XioUxqL=~S;MDLAzBxX*IQ_PA-QYm!q>pMLMb16T4PjDf z166OuB+R%2XHX;+dd05K1x(1C%?jlSFQ-}R;ucAv#SPs0S!ksKdPjMh0VK^>FO6&# zcz2BR0_8;^Vv-~-W`U*iXMeRh_^sZov;hc~AZuk_D*RY;`Dv0s$SRaTnubiO-_P)i ziWvg0PH&W}uYq-PAFL8!!rI31 zT(BIvkdTt_mVE^nO@GpTg~nMJ$#53$D?SKvDRk=+`wDRp&eBxKXNC6|KGm;hv`}x{ zgzpA;pr-^h-;8REewaLFAZu9_7F~f%9pgm^LyeJW7W%nu#d1YI@OfMK)B07E?)7T zMJPiLTe0`taetV`8Nkf93B|FVM%%I2sgd!IniF7DG0)|WBzXun(mUmIf0JomQthS& zY@_%qG~P7_(`@-v3WoY7AgN;o_7?fLE_sLCG;`M5U2<>r=pF|-r2KZJcDwKHdYkl0gVv~sLy+`|0r;!f60fQNPK>%MDB^e%nY5~eS7bCwW(4p;w&O%1rKtLjNX(p*_<4yLBMsg?w* z_pT|{kbm~N`x3~%VyiK5yk48OZ9zFz&841dIyw-z3JfK_tN{V=ozlT>`z8V@r;@HX zT@6lFTtNUH4Yy8 z5$elETi10}S#UJ0ePt=Ks|7kTwp3#cau;HSqJOkeBPgfyO)%9`&`d$)ETIUV2}D4e zKiEv>OU}{~p=RxF&@suXZ;iFlIFXVjW~XiQRc?snMgxmX{U~kDDZRhIx_ATZUJO|T zjin{3qx?N$8~oxJm4OoQ9#kIM4ZU@;6I*+DfAQK1{gOtc5GYP>(npGF^M%kc_4UcZ zI)6r!|1wChgh@s?`+KWd0t#{9lPF{#SO=Pd0c0g6HyMf+}7s@=y11Yx|`2Kx~QDJ+{xAY50kDz-}-0BC`fog zS1$#%6R)q!W7pT}o1EsgrbU41{rUA@MK~&dQp1y4TDuu1zWJ9xZ8obW9tq 0){ + throw new SkLearnException("The generated markup requires PMML schema version " + minVersion.getVersion() + " or newer"); + } + + Version maxVersion = versionInspector.getMaximum(); + if(maxVersion.compareTo(this.version) < 0){ + throw new SkLearnException("The generated markup requires PMML schema version " + maxVersion.getVersion() + " or older"); + } + } // End if + if(!this.outputFile.exists()){ File absoluteOutputFile = this.outputFile.getAbsoluteFile(); @@ -88,8 +122,34 @@ private void run() throws Exception { } } - try(OutputStream os = new FileOutputStream(this.outputFile)){ - MetroJAXBUtil.marshalPMML(pmml, os); + if(this.version != null && this.version.compareTo(Version.PMML_4_4) < 0){ + File tempFile = File.createTempFile("sklearn2pmml-", ".pmml"); + + try(OutputStream os = new FileOutputStream(tempFile)){ + MetroJAXBUtil.marshalPMML(pmml, os); + } + + SAXTransformerFactory transformerFactory = (SAXTransformerFactory)TransformerFactory.newInstance(); + + try(OutputStream os = new FileOutputStream(this.outputFile)){ + TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(); + transformerHandler.setResult(new StreamResult(os)); + + ExportFilter exportFilter = new ExportFilter(SAXUtil.createXMLReader(), this.version); + exportFilter.setContentHandler(transformerHandler); + + try(InputStream is = new FileInputStream(tempFile)){ + exportFilter.parse(new InputSource(is)); + } + } + + tempFile.delete(); + } else + + { + try(OutputStream os = new FileOutputStream(this.outputFile)){ + MetroJAXBUtil.marshalPMML(pmml, os); + } } } diff --git a/src/main/java/com/sklearn2pmml/VersionConverter.java b/src/main/java/com/sklearn2pmml/VersionConverter.java new file mode 100644 index 0000000..0ff016f --- /dev/null +++ b/src/main/java/com/sklearn2pmml/VersionConverter.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2024 Villu Ruusmann + * + * This file is part of SkLearn2PMML + * + * SkLearn2PMML is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * SkLearn2PMML is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with SkLearn2PMML. If not, see . + */ +package com.sklearn2pmml; + +import java.util.Objects; + +import com.beust.jcommander.IStringConverter; +import org.dmg.pmml.Version; + +public class VersionConverter implements IStringConverter { + + @Override + public Version convert(String string){ + Version[] versions = Version.values(); + + for(Version version : versions){ + + if(!version.isStandard()){ + continue; + } // End if + + if(Objects.equals(version.getNamespaceURI(), string) || Objects.equals(version.getVersion(), string)){ + return version; + } + } + + throw new IllegalArgumentException(string); + } +} \ No newline at end of file