Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Segmentation Fault in computeLiveOut #76

Open
mill1000 opened this issue Dec 10, 2019 · 0 comments
Open

Segmentation Fault in computeLiveOut #76

mill1000 opened this issue Dec 10, 2019 · 0 comments

Comments

@mill1000
Copy link

Attempting to implement SHA256 with QPULib and I've encountered a seg fault. Backtrack from GDB when compiled for emulation.

Program received signal SIGSEGV, Segmentation fault.
0x000000000800ddfe in computeLiveOut(Seq<SmallSeq<int> >*, Seq<SmallSeq<int> >*, int, SmallSeq<int>*) ()
(gdb) bt
#0  0x000000000800ddfe in computeLiveOut(Seq<SmallSeq<int> >*, Seq<SmallSeq<int> >*, int, SmallSeq<int>*) ()
#1  0x000000000800df9c in liveness(Seq<Instr>*, Seq<SmallSeq<int> >*, Seq<SmallSeq<int> >*) ()
#2  0x000000000800ea18 in regAlloc(Seq<SmallSeq<int> >*, Seq<Instr>*) ()
#3  0x0000000008002142 in compileKernel(Seq<Instr>*, Stmt*) ()
#4  0x0000000008001be6 in Kernel<Ptr<Int>, Ptr<Int> >::Kernel(void (*)(Ptr<Int>, Ptr<Int>)) ()

Here's a minimum version which will cause the fault.

#include <iostream>
#include "QPULib.h"

static Int smsigma0(Int x) {
    return ror(x, 7) ^ ror(x, 18) ^ (x >> 3);
}

static Int smsigma1(Int x) {
    return ror(x, 17) ^ ror(x, 19) ^ (x >> 10);
}

void execute_sha256_cpu(Ptr<Int> data, Ptr<Int> hash)
{
    Int W[64];
    Int a, b, c, d, e, f, g, h;

    for (uint32_t i = 0; i < 16; i++)
        W[i] = data[i*16];
    
    for (uint32_t i = 16; i < 64; i++)
        W[i] = smsigma1(W[i-2]) + W[i-7]+ smsigma0(W[i-15]) + W[i-16];
}

int main(int argc, char **argv)
{
    // Compile the function to a QPU kernel k
    auto k = compile(execute_sha256_cpu);

    k.setNumQPUs(1);

    // Allocate and initialise arrays shared between CPU and QPUs
    SharedArray<int> data(16*64), hash(16*64);
    for (uint32_t i = 0; i < 16*64; i++)
    {
        data[i] = 0;
        hash[i] = 0;
    }

    k(&data,&hash);
}

Here's the output of the program when DEBUG is enabled

Source code
===========

v0 = UNIFORM;
v1 = UNIFORM;
v4 = UNIFORM;
v5 = UNIFORM;
v6 = *(v5+(0 << 2));
v7 = *(v5+(16 << 2));
v8 = *(v5+(32 << 2));
v9 = *(v5+(48 << 2));
v10 = *(v5+(64 << 2));
v11 = *(v5+(80 << 2));
v12 = *(v5+(96 << 2));
v13 = *(v5+(112 << 2));
v14 = *(v5+(128 << 2));
v15 = *(v5+(144 << 2));
v16 = *(v5+(160 << 2));
v17 = *(v5+(176 << 2));
v18 = *(v5+(192 << 2));
v19 = *(v5+(208 << 2));
v20 = *(v5+(224 << 2));
v21 = *(v5+(240 << 2));
v78 = v7;
v79 = (((v78 ror 7) ^ (v78 ror 18)) ^ (v78 >> 3));
v80 = v20;
v81 = (((v80 ror 17) ^ (v80 ror 19)) ^ (v80 >> 10));
v22 = (((v81+v15)+v79)+v6);
v82 = v8;
v83 = (((v82 ror 7) ^ (v82 ror 18)) ^ (v82 >> 3));
v84 = v21;
v85 = (((v84 ror 17) ^ (v84 ror 19)) ^ (v84 >> 10));
v23 = (((v85+v16)+v83)+v7);
v86 = v9;
v87 = (((v86 ror 7) ^ (v86 ror 18)) ^ (v86 >> 3));
v88 = v22;
v89 = (((v88 ror 17) ^ (v88 ror 19)) ^ (v88 >> 10));
v24 = (((v89+v17)+v87)+v8);
v90 = v10;
v91 = (((v90 ror 7) ^ (v90 ror 18)) ^ (v90 >> 3));
v92 = v23;
v93 = (((v92 ror 17) ^ (v92 ror 19)) ^ (v92 >> 10));
v25 = (((v93+v18)+v91)+v9);
v94 = v11;
v95 = (((v94 ror 7) ^ (v94 ror 18)) ^ (v94 >> 3));
v96 = v24;
v97 = (((v96 ror 17) ^ (v96 ror 19)) ^ (v96 >> 10));
v26 = (((v97+v19)+v95)+v10);
v98 = v12;
v99 = (((v98 ror 7) ^ (v98 ror 18)) ^ (v98 >> 3));
v100 = v25;
v101 = (((v100 ror 17) ^ (v100 ror 19)) ^ (v100 >> 10));
v27 = (((v101+v20)+v99)+v11);
v102 = v13;
v103 = (((v102 ror 7) ^ (v102 ror 18)) ^ (v102 >> 3));
v104 = v26;
v105 = (((v104 ror 17) ^ (v104 ror 19)) ^ (v104 >> 10));
v28 = (((v105+v21)+v103)+v12);
v106 = v14;
v107 = (((v106 ror 7) ^ (v106 ror 18)) ^ (v106 >> 3));
v108 = v27;
v109 = (((v108 ror 17) ^ (v108 ror 19)) ^ (v108 >> 10));
v29 = (((v109+v22)+v107)+v13);
v110 = v15;
v111 = (((v110 ror 7) ^ (v110 ror 18)) ^ (v110 >> 3));
v112 = v28;
v113 = (((v112 ror 17) ^ (v112 ror 19)) ^ (v112 >> 10));
v30 = (((v113+v23)+v111)+v14);
v114 = v16;
v115 = (((v114 ror 7) ^ (v114 ror 18)) ^ (v114 >> 3));
v116 = v29;
v117 = (((v116 ror 17) ^ (v116 ror 19)) ^ (v116 >> 10));
v31 = (((v117+v24)+v115)+v15);
v118 = v17;
v119 = (((v118 ror 7) ^ (v118 ror 18)) ^ (v118 >> 3));
v120 = v30;
v121 = (((v120 ror 17) ^ (v120 ror 19)) ^ (v120 >> 10));
v32 = (((v121+v25)+v119)+v16);
v122 = v18;
v123 = (((v122 ror 7) ^ (v122 ror 18)) ^ (v122 >> 3));
v124 = v31;
v125 = (((v124 ror 17) ^ (v124 ror 19)) ^ (v124 >> 10));
v33 = (((v125+v26)+v123)+v17);
v126 = v19;
v127 = (((v126 ror 7) ^ (v126 ror 18)) ^ (v126 >> 3));
v128 = v32;
v129 = (((v128 ror 17) ^ (v128 ror 19)) ^ (v128 >> 10));
v34 = (((v129+v27)+v127)+v18);
v130 = v20;
v131 = (((v130 ror 7) ^ (v130 ror 18)) ^ (v130 >> 3));
v132 = v33;
v133 = (((v132 ror 17) ^ (v132 ror 19)) ^ (v132 >> 10));
v35 = (((v133+v28)+v131)+v19);
v134 = v21;
v135 = (((v134 ror 7) ^ (v134 ror 18)) ^ (v134 >> 3));
v136 = v34;
v137 = (((v136 ror 17) ^ (v136 ror 19)) ^ (v136 >> 10));
v36 = (((v137+v29)+v135)+v20);
v138 = v22;
v139 = (((v138 ror 7) ^ (v138 ror 18)) ^ (v138 >> 3));
v140 = v35;
v141 = (((v140 ror 17) ^ (v140 ror 19)) ^ (v140 >> 10));
v37 = (((v141+v30)+v139)+v21);
v142 = v23;
v143 = (((v142 ror 7) ^ (v142 ror 18)) ^ (v142 >> 3));
v144 = v36;
v145 = (((v144 ror 17) ^ (v144 ror 19)) ^ (v144 >> 10));
v38 = (((v145+v31)+v143)+v22);
v146 = v24;
v147 = (((v146 ror 7) ^ (v146 ror 18)) ^ (v146 >> 3));
v148 = v37;
v149 = (((v148 ror 17) ^ (v148 ror 19)) ^ (v148 >> 10));
v39 = (((v149+v32)+v147)+v23);
v150 = v25;
v151 = (((v150 ror 7) ^ (v150 ror 18)) ^ (v150 >> 3));
v152 = v38;
v153 = (((v152 ror 17) ^ (v152 ror 19)) ^ (v152 >> 10));
v40 = (((v153+v33)+v151)+v24);
v154 = v26;
v155 = (((v154 ror 7) ^ (v154 ror 18)) ^ (v154 >> 3));
v156 = v39;
v157 = (((v156 ror 17) ^ (v156 ror 19)) ^ (v156 >> 10));
v41 = (((v157+v34)+v155)+v25);
v158 = v27;
v159 = (((v158 ror 7) ^ (v158 ror 18)) ^ (v158 >> 3));
v160 = v40;
v161 = (((v160 ror 17) ^ (v160 ror 19)) ^ (v160 >> 10));
v42 = (((v161+v35)+v159)+v26);
v162 = v28;
v163 = (((v162 ror 7) ^ (v162 ror 18)) ^ (v162 >> 3));
v164 = v41;
v165 = (((v164 ror 17) ^ (v164 ror 19)) ^ (v164 >> 10));
v43 = (((v165+v36)+v163)+v27);
v166 = v29;
v167 = (((v166 ror 7) ^ (v166 ror 18)) ^ (v166 >> 3));
v168 = v42;
v169 = (((v168 ror 17) ^ (v168 ror 19)) ^ (v168 >> 10));
v44 = (((v169+v37)+v167)+v28);
v170 = v30;
v171 = (((v170 ror 7) ^ (v170 ror 18)) ^ (v170 >> 3));
v172 = v43;
v173 = (((v172 ror 17) ^ (v172 ror 19)) ^ (v172 >> 10));
v45 = (((v173+v38)+v171)+v29);
v174 = v31;
v175 = (((v174 ror 7) ^ (v174 ror 18)) ^ (v174 >> 3));
v176 = v44;
v177 = (((v176 ror 17) ^ (v176 ror 19)) ^ (v176 >> 10));
v46 = (((v177+v39)+v175)+v30);
v178 = v32;
v179 = (((v178 ror 7) ^ (v178 ror 18)) ^ (v178 >> 3));
v180 = v45;
v181 = (((v180 ror 17) ^ (v180 ror 19)) ^ (v180 >> 10));
v47 = (((v181+v40)+v179)+v31);
v182 = v33;
v183 = (((v182 ror 7) ^ (v182 ror 18)) ^ (v182 >> 3));
v184 = v46;
v185 = (((v184 ror 17) ^ (v184 ror 19)) ^ (v184 >> 10));
v48 = (((v185+v41)+v183)+v32);
v186 = v34;
v187 = (((v186 ror 7) ^ (v186 ror 18)) ^ (v186 >> 3));
v188 = v47;
v189 = (((v188 ror 17) ^ (v188 ror 19)) ^ (v188 >> 10));
v49 = (((v189+v42)+v187)+v33);
v190 = v35;
v191 = (((v190 ror 7) ^ (v190 ror 18)) ^ (v190 >> 3));
v192 = v48;
v193 = (((v192 ror 17) ^ (v192 ror 19)) ^ (v192 >> 10));
v50 = (((v193+v43)+v191)+v34);
v194 = v36;
v195 = (((v194 ror 7) ^ (v194 ror 18)) ^ (v194 >> 3));
v196 = v49;
v197 = (((v196 ror 17) ^ (v196 ror 19)) ^ (v196 >> 10));
v51 = (((v197+v44)+v195)+v35);
v198 = v37;
v199 = (((v198 ror 7) ^ (v198 ror 18)) ^ (v198 >> 3));
v200 = v50;
v201 = (((v200 ror 17) ^ (v200 ror 19)) ^ (v200 >> 10));
v52 = (((v201+v45)+v199)+v36);
v202 = v38;
v203 = (((v202 ror 7) ^ (v202 ror 18)) ^ (v202 >> 3));
v204 = v51;
v205 = (((v204 ror 17) ^ (v204 ror 19)) ^ (v204 >> 10));
v53 = (((v205+v46)+v203)+v37);
v206 = v39;
v207 = (((v206 ror 7) ^ (v206 ror 18)) ^ (v206 >> 3));
v208 = v52;
v209 = (((v208 ror 17) ^ (v208 ror 19)) ^ (v208 >> 10));
v54 = (((v209+v47)+v207)+v38);
v210 = v40;
v211 = (((v210 ror 7) ^ (v210 ror 18)) ^ (v210 >> 3));
v212 = v53;
v213 = (((v212 ror 17) ^ (v212 ror 19)) ^ (v212 >> 10));
v55 = (((v213+v48)+v211)+v39);
v214 = v41;
v215 = (((v214 ror 7) ^ (v214 ror 18)) ^ (v214 >> 3));
v216 = v54;
v217 = (((v216 ror 17) ^ (v216 ror 19)) ^ (v216 >> 10));
v56 = (((v217+v49)+v215)+v40);
v218 = v42;
v219 = (((v218 ror 7) ^ (v218 ror 18)) ^ (v218 >> 3));
v220 = v55;
v221 = (((v220 ror 17) ^ (v220 ror 19)) ^ (v220 >> 10));
v57 = (((v221+v50)+v219)+v41);
v222 = v43;
v223 = (((v222 ror 7) ^ (v222 ror 18)) ^ (v222 >> 3));
v224 = v56;
v225 = (((v224 ror 17) ^ (v224 ror 19)) ^ (v224 >> 10));
v58 = (((v225+v51)+v223)+v42);
v226 = v44;
v227 = (((v226 ror 7) ^ (v226 ror 18)) ^ (v226 >> 3));
v228 = v57;
v229 = (((v228 ror 17) ^ (v228 ror 19)) ^ (v228 >> 10));
v59 = (((v229+v52)+v227)+v43);
v230 = v45;
v231 = (((v230 ror 7) ^ (v230 ror 18)) ^ (v230 >> 3));
v232 = v58;
v233 = (((v232 ror 17) ^ (v232 ror 19)) ^ (v232 >> 10));
v60 = (((v233+v53)+v231)+v44);
v234 = v46;
v235 = (((v234 ror 7) ^ (v234 ror 18)) ^ (v234 >> 3));
v236 = v59;
v237 = (((v236 ror 17) ^ (v236 ror 19)) ^ (v236 >> 10));
v61 = (((v237+v54)+v235)+v45);
v238 = v47;
v239 = (((v238 ror 7) ^ (v238 ror 18)) ^ (v238 >> 3));
v240 = v60;
v241 = (((v240 ror 17) ^ (v240 ror 19)) ^ (v240 >> 10));
v62 = (((v241+v55)+v239)+v46);
v242 = v48;
v243 = (((v242 ror 7) ^ (v242 ror 18)) ^ (v242 >> 3));
v244 = v61;
v245 = (((v244 ror 17) ^ (v244 ror 19)) ^ (v244 >> 10));
v63 = (((v245+v56)+v243)+v47);
v246 = v49;
v247 = (((v246 ror 7) ^ (v246 ror 18)) ^ (v246 >> 3));
v248 = v62;
v249 = (((v248 ror 17) ^ (v248 ror 19)) ^ (v248 >> 10));
v64 = (((v249+v57)+v247)+v48);
v250 = v50;
v251 = (((v250 ror 7) ^ (v250 ror 18)) ^ (v250 >> 3));
v252 = v63;
v253 = (((v252 ror 17) ^ (v252 ror 19)) ^ (v252 >> 10));
v65 = (((v253+v58)+v251)+v49);
v254 = v51;
v255 = (((v254 ror 7) ^ (v254 ror 18)) ^ (v254 >> 3));
v256 = v64;
v257 = (((v256 ror 17) ^ (v256 ror 19)) ^ (v256 >> 10));
v66 = (((v257+v59)+v255)+v50);
v258 = v52;
v259 = (((v258 ror 7) ^ (v258 ror 18)) ^ (v258 >> 3));
v260 = v65;
v261 = (((v260 ror 17) ^ (v260 ror 19)) ^ (v260 >> 10));
v67 = (((v261+v60)+v259)+v51);
v262 = v53;
v263 = (((v262 ror 7) ^ (v262 ror 18)) ^ (v262 >> 3));
v264 = v66;
v265 = (((v264 ror 17) ^ (v264 ror 19)) ^ (v264 >> 10));
v68 = (((v265+v61)+v263)+v52);
v266 = v54;
v267 = (((v266 ror 7) ^ (v266 ror 18)) ^ (v266 >> 3));
v268 = v67;
v269 = (((v268 ror 17) ^ (v268 ror 19)) ^ (v268 >> 10));
v69 = (((v269+v62)+v267)+v53);
flush()
If (any(v0==0))
  v270 = (v1-1);
  v271 = 0;
  While (any(v271<v270))
    semaDec(15)
    v271 = (v271+1);
  End
  hostIRQ()
Else
  semaInc(15)
End

Segmentation fault (core dumped)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant