/****************************************************************************** * Copyright © 2014-2017 The SuperNET Developers. * * * * See the AUTHORS, DEVELOPER-AGREEMENT and LICENSE files at * * the top-level directory of this distribution for the individual copyright * * holder information and the developer policies on copyright and licensing. * * * * Unless otherwise agreed in a custom licensing agreement, no part of the * * SuperNET software, including this file may be copied, modified, propagated * * or distributed except according to the terms contained in the LICENSE file * * * * Removal or modification of this copyright notice is prohibited. * * * ******************************************************************************/ /* requires *vals and *vars to be initialized external calls: OS_milliseconds(), get_jfp_features(), get_yval(), set_ocas_model(), update_ocas_preds(), ocasCL_glue() */ //#include typedef float svmtype; #ifdef WIN32 // fadedreamz@gmail.com - added for successful compilation, however, for MSVC probably require a particular OpenCL SDK // to work with it (e,g nvidia or amd SDK) typedef struct fake_opencl_double { double x; double y; }double2; #else typedef double double2 __attribute__((ext_vector_type(2))); #endif #define MAX_VECTORS (1440 * 365 * 5) #define MAIN_MAXCORES 16 #define c_to_refc(c) (c) #define refc_to_c(refc) (refc) #define CONDITION(feature) (feature) #define FEATURE_THRESHOLD 10. #define HWMPERC_THRESHOLD 101 #define HWMPERC_THRESHOLD0 HWMPERC_THRESHOLD #ifdef INSIDE_OPENCL #pragma OPENCL EXTENSION cl_khr_fp64: enable #define local_barrier() barrier(CLK_LOCAL_MEM_FENCE) #define global_barrier() barrier(CLK_GLOBAL_MEM_FENCE) #else //double get_features(register struct ocas_ptrs *PTRS,register int c,register int weekind,register int numfeatures,register double *features,register double *model,register double *addptr,register struct baserel_info *BR,register double wt); svmtype *get_jfp_features(register int selector,register int numfeatures,register int c,register int weekind); #endif #define OCAS_INFINITY (-log(0.0)) #define OCAS_NEGINFINITY (log(0.0)) #define OCAS_DECAY .1 #define OCAS_BIAS 1. #define OCAS_INDEX(ROW,COL,NUM_ROWS) ((COL)*(NUM_ROWS)+(ROW)) #define QPSOLVER_MAXITER 1000000 #define QPSOLVER_MINITER (QPSOLVER_MAXITER * .1) //#define NUM_CUDA_GROUPS 14 //#define NUM_BUNDLE_ANSWERS 64 //#define NUM_TOPCOEFFIS 558 //#define SMALLVAL .00000000000001 //#define NUM_PRIMARY_FEATURES 4096 #ifdef __APPLE__ #define MAX_OCAS_LHS 512 #else #define MAX_OCAS_LHS 2000 #endif #define MAX_OCAS_FEATURES (1024 * 1024) struct ptrhdr { long allocsize; void *ptr; int devid; char str[64]; }; struct ocas_lhsbuffers { double H[MAX_OCAS_LHS * MAX_OCAS_LHS]; double alpha[MAX_OCAS_LHS],b[MAX_OCAS_LHS],diag_H[MAX_OCAS_LHS],A0[MAX_OCAS_LHS],d[MAX_OCAS_LHS]; double full_A[]; }; struct ocas_CLtmpspace { double2 hpfb[MAX_VECTORS]; int poslist[MAX_VECTORS],neglist[MAX_VECTORS]; }; struct ocas_CLbuffers { double output_pred[MAX_VECTORS],old_output[MAX_VECTORS]; double W[MAX_OCAS_FEATURES+4],oldW[MAX_OCAS_FEATURES+4],new_a[MAX_OCAS_FEATURES+4]; }; struct ocas_vars { struct ptrhdr pH[256]; double Q_P[TRADEBOTS_NUMANSWERS],Q_D[TRADEBOTS_NUMANSWERS]; double netcuts[TRADEBOTS_NUMANSWERS],perc[TRADEBOTS_NUMANSWERS],hwmperc[TRADEBOTS_NUMANSWERS],lastmetrics[TRADEBOTS_NUMANSWERS][2]; double learningrates[TRADEBOTS_NUMANSWERS][2],maxiters[TRADEBOTS_NUMANSWERS],dot_prod_WoldW[TRADEBOTS_NUMANSWERS],cutsum[TRADEBOTS_NUMANSWERS]; double sq_norm_oldW[TRADEBOTS_NUMANSWERS],oldW0[TRADEBOTS_NUMANSWERS],W0[TRADEBOTS_NUMANSWERS],sq_norm_W[TRADEBOTS_NUMANSWERS]; double predabs[TRADEBOTS_NUMANSWERS],predsum[TRADEBOTS_NUMANSWERS],dist[TRADEBOTS_NUMANSWERS]; double xi[TRADEBOTS_NUMANSWERS],pratio[TRADEBOTS_NUMANSWERS],errperc[TRADEBOTS_NUMANSWERS],hwmdist[TRADEBOTS_NUMANSWERS]; double answerabsaves[TRADEBOTS_NUMANSWERS],answeraves[TRADEBOTS_NUMANSWERS]; int answercounts,firstweekinds[TRADEBOTS_NUMANSWERS]; int posA[TRADEBOTS_NUMANSWERS],negA[TRADEBOTS_NUMANSWERS]; int numIt[TRADEBOTS_NUMANSWERS],numlhs[TRADEBOTS_NUMANSWERS],nNZAlpha[TRADEBOTS_NUMANSWERS],trn_err[TRADEBOTS_NUMANSWERS]; int qp_exitflag[TRADEBOTS_NUMANSWERS],exitflag[TRADEBOTS_NUMANSWERS],len[TRADEBOTS_NUMANSWERS]; int have_pendingmodel[TRADEBOTS_NUMANSWERS],cutlen[TRADEBOTS_NUMANSWERS],good[TRADEBOTS_NUMANSWERS],bad[TRADEBOTS_NUMANSWERS]; int nohwm[TRADEBOTS_NUMANSWERS],numposcuts[TRADEBOTS_NUMANSWERS],numnegcuts[TRADEBOTS_NUMANSWERS]; struct ocas_CLbuffers *CLspaces[TRADEBOTS_NUMANSWERS]; struct ocas_lhsbuffers *lhs[TRADEBOTS_NUMANSWERS]; int *weekinds[TRADEBOTS_NUMANSWERS]; float *answers,**features; //unsigned long CLallocsize,tmpallocsize,tmpCLallocsize; double C,TolRel,TolAbs,MaxTime,QPBound,QPSolverTolAbs,QPSolverTolRel; double output_time,sort_time,add_time,w_time,qp_solver_time,ocas_time; // total time spent in svm_ocas_solver int selector,numthreads,starti,modelind,c,refc,maxlen,numfeatures,firstweekind,startweekind,endweekind,numptrs,maxlhs; }; /////////////////////////// Most of runtime is in the add/dot functions #ifdef INSIDE_OPENCL typedef double svmtype; // numCLthreads: NUM_CUDA_CORES, numgroups: (numdocs + NUM_CUDA_CORES-1)/NUM_CUDA_CORES __kernel void dot_featuresCL(__local void *lp,__global double *preds,int numfeatures,__global double *W,double W0,__global svmtype *matrix,int numdocs) { register int i,j,docid; register double sum; register __global svmtype *features; if ( (docid = (int)get_global_id(0)) < numdocs ) { sum = 0.; features = &matrix[docid * numfeatures]; for (i=0; i>= 1; if ( docid < numdocs ) { features = &matrix[docid * numfeatures]; if ( dir == 0 ) sum += CONDITION(features[j]); else sum -= CONDITION(features[j]); } } new_a[j] = sum; } } #else void ocas_purge(struct ocas_vars *vars) { int32_t i; free(vars->answers); for (i=0; imaxlen; i++) if ( vars->features[i] != 0 ) free(vars->features[i]); free(vars->features); for (i=0; iCLspaces[i] != 0 ) myaligned_free(vars->CLspaces[i],sizeof(*vars->CLspaces[i])); if ( vars->lhs[i] != 0 ) myaligned_free(vars->lhs[i],sizeof(*vars->lhs[i]) + vars->numfeatures*vars->maxlhs*sizeof(double)); if ( vars->weekinds[i] != 0 ) free(vars->weekinds[i]); } free(vars); } /*static inline double dot_expanded_features(register double *W,register int c,register int selector,register int numfeatures) { fatal("dot_expanded_features not implemented"); return(0); } static inline void add_expanded_features(register double *W,register double y,register int c,register int selector,register int numfeatures) { fatal("add_expanded_features not implemented"); }*/ static inline double calc_ocas_output(register struct ocas_vars *vars,register int selector,register int c,register int weekind,register int answerind,register double *W,register double W0,register int numfeatures) { register svmtype *features; register double feature,y,sum = 0.; register int coeffi; if ( (y= vars->answers[(weekind-vars->starti)*TRADEBOTS_NUMANSWERS + answerind]) != 0.f ) { if ( (features= vars->features[weekind-vars->starti]) != 0 )//get_jfp_features(vars->selector,numfeatures,c,weekind)) != 0 ) { #ifdef OCAS_USE_TOPCOEFFIS for (int i=0; iselector,numfeatures); sum = y * (W0 + sum); } // printf("%f ",sum); return(sum); } static inline void add_ocas_output(register double y,register struct ocas_vars *vars,register int selector,register int c,register int weekind,register int answerind,register double *W,register double *new_a,register int numfeatures) { register int coeffi; register svmtype *features,feature; if ( y != 0 ) { if ( (features= vars->features[weekind-vars->starti]) != 0 )//get_jfp_features(vars->selector,numfeatures,c,weekind)) != 0 ) { //features = get_jfp_features(vars->selector,numfeatures,c,weekind); #ifdef OCAS_USE_TOPCOEFFIS int32_t i; for (i=0; iselector,numfeatures); } } static inline void STocas_calc_outputs(register struct ocas_vars *vars,register int c,register int answerind,register double *output,register double *old_output,register double *W,register double W0,register int numfeatures,register int *weekinds,register int numdocs) { register int i,j; //vars->good[answerind] = vars->bad[answerind] = 0; //printf("start STocas_calc_outputs.(%p %s.A%d %p) %p %p %p\n",vars,CONTRACTS[c],answerind,weekinds,output,old_output,W); for (i=0; iselector,c,weekinds[i],answerind,W,W0,numfeatures); if ( 1 && isnan(output[i]) != 0 ) { svmtype *features = vars->features[weekinds[i]-vars->starti];//get_jfp_features(vars->selector,numfeatures,c,weekinds[i]); if ( features != 0 ) { for (j=0; janswers[(i-vars->starti)*TRADEBOTS_NUMANSWERS + answerind]*output[i] <= 0 ) { if ( vars->answers[(i-vars->starti)*TRADEBOTS_NUMANSWERS + answerind] != 0.f ) printf("(%f %f) ",vars->answers[(i-vars->starti)*TRADEBOTS_NUMANSWERS + answerind],output[i]); } //printf("[%f %f] ",vars->answers[(i-vars->starti)*TRADEBOTS_NUMANSWERS + answerind],output[i]); } } //printf("finish STocas_calc_outputs\n"); } static inline void STocas_add_newcuts(register struct ocas_vars *vars,register int answerind,register int numfeatures,register int *weekinds,register int *new_cut,register int numcuts,register double *W,register double *new_a) { register int weekind,dir,i,c = vars->c; memset(new_a,0,sizeof(*new_a) * numfeatures); //printf("STocas_add_newcuts numcuts.%d numfeatures.%d\n",numcuts,numfeatures); for (i=0; i>= 1; add_ocas_output(dir==0?1:-1,vars,vars->selector,c,weekind,answerind,W,new_a,numfeatures); } } //////////////////////////// end of add/dot functions static inline double _dbufave(register double *buf,register int len) { register int i,n; register double sum; sum = 0.; n = 0; for (i=0; i 0.0000000001 ) { n++; sum += buf[i]; } } if ( n != 0 ) sum /= n; if ( fabs(sum) <= 0.0000000001 ) sum = 0.; return(sum); } static inline void add_newcut_entry(register struct ocas_vars *vars,register int answerind,register int *new_cut,register int i,register int weekind,register double y) { weekind <<= 1; if ( y > 0 ) vars->numposcuts[answerind]++; else if ( y < 0 ) vars->numnegcuts[answerind]++, weekind |= 1; new_cut[vars->cutlen[answerind]++] = weekind; } static inline double validate_ocas_model(register struct ocas_vars *vars,register int answerind,register double *output_pred,register double *old_output,register int *weekinds,register int numdocs,register double *W,register double W0,register int numfeatures,register int paranoid) { register svmtype *features; register double y,pred,perc,answer=0.,feature; register int i,j,pos,neg,good,bad,oldcuts,training_errors,weekind,nonz,posA,negA; for (i=pos=neg=good=bad=oldcuts=training_errors=posA=negA=0; ianswers[(weekind-vars->starti)*TRADEBOTS_NUMANSWERS + answerind]) != 0. ) { if ( y > 0 ) posA++; else if ( y < 0 ) negA++; if ( paranoid != 0 ) { pred = 0.; if ( (features= vars->features[weekind-vars->starti]) != 0 )//get_jfp_features(vars->selector,numfeatures,vars->c,weekind)) != 0 ) { for (j=nonz=0; j SMALLVAL ) pred += W0; } else pred = 0;//dot_expanded_features(W,c,selector,numfeatures); if ( output_pred[i] != 0 && fabs(pred - output_pred[i]) > .000001 ) // if ( (rand() % 10000) == 0 ) printf("i.%d A %9.6f pred %9.6f != output_pred %9.6f [%14.10f]\n",i,answer,pred,output_pred[i],pred-output_pred[i]); } else pred = output_pred[i], nonz = numfeatures; if ( nonz != 0 ) { if ( pred > 0 ) pos++; else if ( pred < 0 ) neg++; if ( pred*y > 0 ) good++; else if ( pred*y < 0 ) bad++; } if ( old_output[i] <= 1. ) { oldcuts++; if ( old_output[i] <= 0. ) training_errors++; } } } nonz = 0; for (i=0; i>>>>> %d.A%02d.(+%-6d -%-6d oc.%-6d | good.%-6d bad.%-6d >>>>> %6.2f%% <<<<<).%-6d | W0 %9.6f W[%d] %9.6f | A +%-6d -%-6d | paranoid.%d numdocs.%d\n",c_to_refc(vars->c),answerind,pos,neg,oldcuts,good,bad,perc,training_errors,W0,nonz,_dbufave(W,numfeatures),posA,negA,paranoid,numdocs); return(perc); } static int _increasing_double(const void *a,const void *b) { #define double_a (*(double *)a) #define double_b (*(double *)b) if ( double_b > double_a ) return(-1); else if ( double_b < double_a ) return(1); return(0); #undef double_a #undef double_b } static inline void calc_ocas_strategy(register struct ocas_vars *vars,register int answerind,register double C,register int numfeatures,register int len,register int *weekinds,register int *new_cut,register double *W,register double *oldW,register double *output,register double *old_output,register double2 *hpfb) { double answermag; register int i,j,num_hp,good,bad,zero; register double Bval,Cval,newoutput,W0,oldW0,sq_norm_W,A0val,B0,dist,GradVal,t,t1,t2,t_new,val,GradVal_new,y,starttime,*preds = output; num_hp = 0; W0 = vars->W0[answerind]; oldW0 = vars->oldW0[answerind]; A0val = vars->sq_norm_W[answerind] - (2. * vars->dot_prod_WoldW[answerind]) + vars->sq_norm_oldW[answerind]; B0 = (vars->dot_prod_WoldW[answerind] - vars->sq_norm_oldW[answerind]); GradVal = B0; for (i=0; ianswers[(weekinds[i]-vars->starti)*TRADEBOTS_NUMANSWERS + answerind]) != 0.f ) { svmtype *features = vars->features[weekinds[i]-vars->starti];//get_jfp_features(vars->selector,numfeatures,vars->c,weekinds[i]); //printf("i.%d weekind.%d starti.%d y %f %p\n",i,weekinds[i],vars->starti,y,features); if ( 0 && features != 0 ) { double oldsum=oldW0,sum=W0; for (j=0; j .000001 ) { printf("A%d numIt.%d docid.%-6d sum %11.7f * y%2.0f %11.7f != %11.7f output [%11.7f] W0 %11.7f oldW0 %11.7f\n",answerind,vars->numIt[answerind],i,sum,y,sum*y,output[i],output[i]-sum*y,W0,oldW0); //output[i] = sum*y; } if ( fabs(oldsum*y - old_output[i]) > .000001 ) { if ( old_output[i] != 0 && oldW0 != 0 && (rand() % 1000) == 0 ) printf("A%d numIt.%d docid.%-6d oldsum %11.7f * y%2.0f %11.7f != %11.7f oldoutput [%11.7f] W0 %11.7f oldW0 %11.7f\n",answerind,vars->numIt[answerind],i,oldsum,y,oldsum*y,old_output[i],old_output[i]-oldsum*y,W0,oldW0); old_output[i] = oldsum*y; } } Cval = C * (1. - old_output[i]); Bval = C * (old_output[i] - output[i]); if ( Bval != 0 ) val = -(Cval / Bval); else val = OCAS_NEG_INF; if ( val > 0 ) { hpfb[num_hp].y = Bval; hpfb[num_hp].x = val; num_hp++; } if ( (Bval < 0 && val > 0) || (Bval > 0 && val <= 0) ) GradVal += Bval; } } //printf("num_hp.%d\n",num_hp); t = 0; if ( GradVal < 0 ) { starttime = OS_milliseconds(); qsort(hpfb,num_hp,sizeof(double2),_increasing_double); //ocas_sort(hpfb,num_hp); i = 0; while ( GradVal < 0 && i < num_hp ) { t_new = hpfb[i].x; GradVal_new = GradVal + fabs(hpfb[i].y) + A0val*(t_new - t); if ( GradVal_new >= 0 ) t = t + GradVal * (t - t_new) / (GradVal_new - GradVal); else t = t_new, i++; GradVal = GradVal_new; } vars->sort_time += OS_milliseconds() - starttime; } t = MAX(t,0.); // just sanity check; t < 0 should not occur t1 = t; // new (best so far) W t2 = t + OCAS_DECAY*(1. - t); // new cutting plane W0 = oldW0 * (1. - t) + (t * W0); sq_norm_W = W0 * W0; for (j=0; jW0[answerind] = W0; vars->sq_norm_W[answerind] = sq_norm_W; vars->trn_err[answerind] = 0; dist = 0.; for (i=good=bad=zero=0; ianswers[(weekinds[i]-vars->starti)*TRADEBOTS_NUMANSWERS + answerind]) != 0.f ) { answermag = fabs(y); // 1.; if ( (old_output[i] * (1. - t2) + t2*output[i]) <= answermag ) //1. add_newcut_entry(vars,answerind,new_cut,i,weekinds[i],y); newoutput = (old_output[i] * (1. - t1)) + (t1 * output[i]); if ( 0 ) // won't match unless old_output corresponds with features*oldW { double sum=W0; svmtype *features = vars->features[weekinds[i]-vars->starti];//get_jfp_features(vars->selector,numfeatures,vars->c,weekinds[i]); if ( features != 0 ) { for (j=0; j .0000001 ) printf("numIt.%d docid.%-6d w%-6d sum %11.7f * y%2.0f %11.7f != %11.7f newoutput [%11.7f] W0 %11.7f oldW0 %11.7f\n",vars->numIt[answerind],i,weekinds[i],sum,y,sum*y,newoutput,newoutput-sum*y,W0,oldW0); newoutput = sum*y; } } if ( newoutput <= answermag ) { vars->xi[answerind] += (answermag - newoutput); if ( newoutput <= 0. ) vars->trn_err[answerind]++; } preds[i] = y * newoutput; dist += fabs(preds[i] - y); old_output[i] = newoutput; if ( newoutput > 0. ) good++; else if ( newoutput < 0. ) { bad++; //printf("(%f %f) ",y,newoutput); } } else zero++;//,printf("i.%d -> w%d | zeroes.%d good.%d bad.%d of len.%d\n",i,weekinds[i],zero,good,bad,len); } //printf("finished strategy\n"); vars->good[answerind] = good; vars->bad[answerind] = bad; vars->dist[answerind] = dist / MAX(1,good+bad); vars->perc[answerind] = (100. * (double)vars->good[answerind]) / MAX(1,good+bad); if ( vars->perc[answerind] > vars->hwmperc[answerind] || (vars->perc[answerind] == vars->hwmperc[answerind] && (vars->hwmdist[answerind] == 0 || vars->dist[answerind] < vars->hwmdist[answerind])) ) { double set_ocas_model(int refc,int answerind,double *W,double W0,int numfeatures,int firstweekind,int len,int bad,double dist,double predabs,int posA,int negA,double answerabs,double aveanswer); vars->W0[answerind] = set_ocas_model(vars->refc,answerind,vars->CLspaces[answerind]->W,vars->W0[answerind],vars->numfeatures,vars->firstweekind,vars->len[answerind],vars->trn_err[answerind],vars->dist[answerind],vars->predabs[answerind],vars->posA[answerind],vars->negA[answerind],vars->answerabsaves[answerind],0.); vars->nohwm[answerind] = 0; vars->hwmperc[answerind] = vars->perc[answerind]; vars->hwmdist[answerind] = vars->dist[answerind]; } else vars->nohwm[answerind]++; //printf("good.%d bad.%d zero.%d errors.%d | selector.%d\n",good,bad,zero,vars->trn_err[answerind],vars->selector); } static inline double ocas_splx_solver(register int *nonzalphap,register int maxlhs,register double *d,register double *activeH,register double *diag_H,register double *f,register double C,register double *alpha,register int n,register int MaxIter,register double TolAbs,register double TolRel,register double QP_TH) { register double *col_u,*col_v; register double QP,QD,lastQD,tmp,diff,distA,distB,etaA,etaB,improv,tmp_num,delta,x_neq0,xval,dval,diagval,tmp_den,tau; register int u=0,v=0,i,j,iter,nonzalpha,unlikely = 0; QP = distA = distB = OCAS_PLUS_INF; lastQD = QD = OCAS_NEG_INF; x_neq0 = C; etaA = etaB = 0.; for (i=0; i 0. ) { col_u = &activeH[maxlhs * i]; for (j=0; j 10*(MaxIter-iter) && etaB > 10*(MaxIter-iter) ) unlikely++; else unlikely = 0; } else unlikely = 0; diff = (QP - QD); if ( 0 && (diff <= fabs(QP)*TolRel || diff <= TolAbs || QP <= QP_TH || unlikely > 100) ) { if ( 0 ) { if ( diff <= fabs(QP)*TolRel ) printf("caseA %f | ",diff - fabs(QP)*TolRel); else if ( diff <= TolAbs ) printf("caseB %f | ",diff - TolAbs); else if ( etaA > 2*(MaxIter-iter) && etaB > 2*(MaxIter-iter) ) printf("caseC etas %f %f | ",etaA,etaB); printf("%d: QP %f QD %f diff %f n.%d d0 %9.6f dA %9.6f %9.6f dB %9.6f %9.6f\n",iter,QP,QD,QP-QD,n,d[0],distA,etaA,distB,etaB); } break; } distA = (diff - fabs(QP)*TolRel); distB = (diff - TolAbs); lastQD = QD; if ( d[u] > 0 ) u = -1; else delta -= C * d[u]; // if satisfied then k-th block of variables needs update if ( delta > TolAbs && delta > (TolRel * fabs(QP)) ) { // for fixed u select v = argmax_{i in I_k} Improvement(i) improv = OCAS_NEG_INF; for (i=0; i<=n; i++) { if ( i == u || (xval = ((i 0 ) { tmp_num = xval * dval; if ( tmp_num < tmp_den ) tmp = tmp_num*tmp_num / tmp_den; else tmp = tmp_num - .5 * tmp_den; if ( 0 && i < n ) // jimbo tweak { tmp = alpha[i] * MIN(1.,tmp_num/tmp_den); alpha[i] -= tmp; if ( u == -1 ) x_neq0 += tmp; else alpha[u] += tmp; } if ( tmp > improv ) { improv = tmp; tau = MIN(1.,tmp_num/tmp_den); v = i; } } } // update d = H*x + f if ( v < n ) { tau *= alpha[v]; alpha[v] -= tau; if ( u != -1 ) { alpha[u] += tau; col_v = &activeH[maxlhs * v]; for (i=0; isq_norm_oldW[answerind] = vars->sq_norm_W[answerind]; oldW0 = vars->oldW0[answerind] = vars->W0[answerind]; W0 = 0.; for (i=0; ialpha[i]) > 0 ) { //printf("%9.6f ",alpha); for (j=0; jfull_A[OCAS_INDEX(j,i,numfeatures)]; W0 += lhs->A0[i] * alpha; } } vars->W0[answerind] = W0; sq_norm_W = W0 * W0; dot_prod_WoldW = W0 * oldW0; for (j=0; jalpha,numlhs),W0,sq_norm_W,answerind); vars->dot_prod_WoldW[answerind] = dot_prod_WoldW; vars->sq_norm_W[answerind] = sq_norm_W; } static inline void ocas_update_Lspace(register struct ocas_vars *vars,register int answerind,register double netcuts,register double cut_length,register int numfeatures,register double C,register double QPSolverTolAbs,register double QPSolverTolRel) { register struct ocas_CLbuffers *ptr = vars->CLspaces[answerind]; register struct ocas_lhsbuffers *lhs = vars->lhs[answerind]; register double *new_col_H; register double sq_norm_a,maxiters,metric,tmp; register int i,j,iters,numlhs,maxlhs = vars->maxlhs; numlhs = vars->numlhs[answerind]; new_col_H = &lhs->H[OCAS_INDEX(0,numlhs,maxlhs)]; lhs->A0[numlhs] = netcuts; lhs->b[numlhs] = -cut_length; sq_norm_a = lhs->A0[numlhs] * lhs->A0[numlhs]; for (j=0; jfull_A[OCAS_INDEX(j,numlhs,numfeatures)] = ptr->new_a[j]; if ( fabs(ptr->new_a[j]) > 1000 ) { //printf("(%d %9.6f %f) ",j,ptr->new_a[j],sq_norm_a); ptr->new_a[j] = 0.; } else sq_norm_a += ptr->new_a[j] * ptr->new_a[j]; ptr->oldW[j] = ptr->W[j]; ptr->W[j] = 0.; } new_col_H[numlhs] = sq_norm_a; //printf("QPsolver.A%02d: ABS %f Rel %.11f numlhs.%d cutlen.%f netcuts.%f sq_norm_a %f netcuts.%f\n",answerind,QPSolverTolAbs,QPSolverTolRel,vars->numlhs[answerind],cut_length,lhs->A0[numlhs],sq_norm_a,netcuts); for (i=0; iA0[numlhs] * lhs->A0[i]; for (j=0; jnew_a[j] * lhs->full_A[OCAS_INDEX(j,i,numfeatures)]; new_col_H[i] = tmp; } lhs->d[numlhs] = lhs->alpha[numlhs] = 0.; lhs->diag_H[numlhs] = lhs->H[OCAS_INDEX(numlhs,numlhs,maxlhs)]; for (i=0; i H[OCAS_INDEX(numlhs,i,maxlhs)] = lhs->H[OCAS_INDEX(i,numlhs,maxlhs)]; numlhs = ++vars->numlhs[answerind]; iters = vars->numIt[answerind]; if ( vars->nohwm[answerind] > 3 ) vars->maxiters[answerind] *= 1 + sqrt(vars->nohwm[answerind])/100; else if ( vars->nohwm[answerind] == 0 ) vars->maxiters[answerind] *= .5; if ( vars->maxiters[answerind] > QPSOLVER_MAXITER ) vars->maxiters[answerind] = QPSOLVER_MAXITER; if ( vars->maxiters[answerind] < QPSOLVER_MINITER ) vars->maxiters[answerind] = QPSOLVER_MINITER; maxiters = MAX(QPSOLVER_MINITER,vars->maxiters[answerind]); vars->Q_D[answerind] = ocas_splx_solver(&vars->nNZAlpha[answerind],maxlhs,lhs->d,lhs->H,lhs->diag_H,lhs->b,C,lhs->alpha,vars->numlhs[answerind],MIN(maxiters,QPSOLVER_MAXITER),QPSolverTolAbs,QPSolverTolRel,OCAS_NEG_INF); metric = ((double)vars->len[answerind] / MAX(1,vars->trn_err[answerind])) / 1.; vars->lastmetrics[answerind][iters & 1] = metric; update_ocas_model(ptr->W,ptr->oldW,vars,numfeatures,answerind,lhs,numlhs); } static inline void start_ocas_iter(register struct ocas_vars *vars,register int c,register int answerind) { if ( vars->pratio[answerind] == 0. ) vars->pratio[answerind] = vars->answerabsaves[answerind]; vars->good[answerind] = -1; vars->bad[answerind] = vars->trn_err[answerind] = vars->cutlen[answerind] = vars->numposcuts[answerind] = vars->numnegcuts[answerind] = 0; vars->xi[answerind] = vars->predsum[answerind] = vars->dist[answerind] = vars->cutsum[answerind] = vars->netcuts[answerind] = 0.; } static void ocas_print(struct ocas_vars *vars,int answerind,int ishwm,double C) { int i; double dispvals[4]; //printf("ocas_print.A%d\n",answerind); //printf("%s.A%02d %4d %8.2f | QP %9.3f QD %9.3f [%9.4f %7.3f] SV.%d %4d | M%9.6f (%9.6f max %8.1f %9.6f) %s.A%02d %9.6f%%\n", dispvals[0] = vars->Q_P[answerind]/1000000000.; dispvals[1] = (C * vars->Q_D[answerind])/1000000000.; dispvals[2] = (vars->Q_P[answerind]-C * vars->Q_D[answerind]) / 1000000000; dispvals[3] = (vars->Q_P[answerind]-C * vars->Q_D[answerind]) / MAX(1,fabs(vars->Q_P[answerind])); printf("%3d %d.A%02d +%d -%d",vars->nohwm[answerind],vars->refc,answerind,vars->good[answerind],vars->bad[answerind]); printf(" %4d %8.2f |QP %9.3f QD %10.2f [%11.2f %9.1f] SV.%3d %3d |M%9.3f errs.%-6d %-8.0f %5.2f%% errs %6.5f A%9.6f W0%9.6f D%11.9f\n",//[%7.4f%%]\n", vars->numIt[answerind],vars->ocas_time/1000,dispvals[0],dispvals[1],dispvals[2],dispvals[3], vars->nNZAlpha[answerind], vars->numlhs[answerind], // PTRS->lastmetrics[answerind],PTRS->learningrates[answerind][0],PTRS->maxiters[answerind],PTRS->learningrates[answerind][1], vars->lastmetrics[answerind][0],vars->trn_err[answerind],vars->maxiters[answerind],vars->perc[answerind], vars->errperc[answerind]/100,_dbufave(vars->CLspaces[answerind]->new_a,vars->numfeatures),vars->W0[answerind], vars->dist[answerind]/vars->answerabsaves[answerind]);//_dbufave(vars->hwmperc,81));//,vars->errperc+vars->perc); for (i=0; i<4; i++) if ( isnan(dispvals[i]) != 0 ) break; if ( vars->lastmetrics[answerind][0] > 10 ) usleep(vars->lastmetrics[answerind][0] * vars->lastmetrics[answerind][0]); /*if ( 0 && i < 4 )//|| (vars->answerind >= 32 && fabs(vars->W0) > .9) ) { int save_model(int refc,int answerind,double *W,int numfeatures,double W0,double perc,int posA,int negA); memset(&vars->CLspaces[answerind],0,sizeof(vars->CLspaces[answerind])); vars->W0[answerind] = vars->oldW0[answerind] = 0.; vars->numIt[answerind] = 0; vars->perc[answerind] = vars->hwmperc[answerind] = 0.; printf("reset model %s.A%02d\n",CONTRACTS[vars->selector!=3?vars->refc:NUM_COMBINED],answerind); save_model(vars->refc,answerind,vars->CLspaces[answerind]->W,vars->numfeatures,vars->W0[answerind],vars->perc[answerind]); }*/ } static inline void finish_ocasiter(register int answerind,register struct ocas_vars *vars,register double C) { register double den; vars->have_pendingmodel[answerind] = 0; if ( vars->good[answerind] == 0 && vars->bad[answerind] == 0 ) { vars->bad[answerind] = vars->trn_err[answerind]; vars->good[answerind] = (vars->len[answerind] - vars->trn_err[answerind]); } den = MAX(1.,vars->good[answerind]+vars->bad[answerind]); if ( (vars->predabs[answerind] = (vars->predsum[answerind] / den)) != 0. ) vars->pratio[answerind] = (vars->answerabsaves[answerind] / vars->predabs[answerind]); else vars->pratio[answerind] = 0.; vars->dist[answerind] = sqrt(vars->dist[answerind] / den); //printf("W0 %9.6f pred sum %f %f | pratio %f distsum %f (%f vs hwm %f)\n",vars->W0[answerind],vars->predsum[answerind],vars->predabs[answerind],vars->pratio[answerind],vars->dist[answerind],vars->perc[answerind],vars->hwmperc[answerind]); vars->errperc[answerind] = (100 * (double)vars->trn_err[answerind])/(double)MAX(1,vars->len[answerind]); vars->Q_P[answerind] = 0.5*vars->sq_norm_W[answerind] + (C * vars->xi[answerind]); vars->ocas_time = (vars->output_time + vars->w_time + vars->add_time + vars->sort_time + vars->qp_solver_time); ocas_print(vars,answerind,0,C); } static inline int ocas_iter(struct ocas_vars *vars,int max_nohwm) { int Method = 1; int min_nohwm = 1; int skipflags[84]; static int new_cut[MAX_VECTORS]; static double2 hpfb[MAX_VECTORS]; int inactives[81]; register struct ocas_CLbuffers *ptr; register double netcuts,startmilli,y,psum,pcount,nosum; register int i,numfeatures,cutlen,lastanswerind,lwm,numactive,numthreads,answerind,*weekinds; numactive = 0; if ( (numfeatures= vars->numfeatures) > MAX_OCAS_FEATURES ) { printf("numfeatures > MAX_OCAS_FEATURES\n"); exit(-1); } psum = pcount = nosum = 0; { //printf("c.%d mask %lx %p\n",c,contractmask,PTRS->ocas[c]); memset(inactives,0,sizeof(inactives)); lastanswerind = TRADEBOTS_NUMANSWERS; numfeatures = vars->numfeatures; answerind = 0; //printf("numIt.%d ocas iter.%s A.mask%lx len.%d CLspace.%p lhs.%p | vars.%p\n",vars->numIt[answerind],CONTRACTS[c_to_refc(c)],answerindmask,vars->len[answerind],vars->CLspaces[answerind],vars->lhs[answerind],vars); memset(skipflags,0,sizeof(skipflags)); for (answerind=0; answerindCLspaces[answerind],vars->weekinds[answerind],vars->nohwm[answerind],max_nohwm); if ( vars->hwmperc[answerind] != 0 ) { nosum += vars->nohwm[answerind]; pcount++, psum += vars->hwmperc[answerind]; } //printf("answerind.%d\n",answerind); if ( vars->len[answerind] == 0 || vars->CLspaces[answerind] == 0 || (vars->nohwm[answerind] > min_nohwm && vars->hwmperc[answerind] > ((answerind==0) ? HWMPERC_THRESHOLD0 : HWMPERC_THRESHOLD)) ) { inactives[answerind] = 1; continue; } if ( vars->nohwm[answerind] < max_nohwm ) { numactive++; if ( vars->numIt[answerind]++ == 0 ) { for (i=0; iCLspaces[answerind]->W[i] != 0 ) break; if ( i == numfeatures ) skipflags[answerind] = 1; } ptr = vars->CLspaces[answerind]; weekinds = vars->weekinds[answerind]; //printf("start iter %p %p\n",ptr,weekinds); start_ocas_iter(vars,vars->c,answerind); numthreads = vars->numthreads; if ( skipflags[answerind] != 0 ) { for (i=0; ilen[answerind]; i++) { if ( (y= vars->answers[(weekinds[i]-vars->starti)*TRADEBOTS_NUMANSWERS + answerind]) != 0.f ) { ptr->output_pred[i] = 0; add_newcut_entry(vars,answerind,new_cut,i,weekinds[i],y); } } fprintf(stderr,"skip %d.A%02d cuts +%d -%d, ",c_to_refc(vars->c),answerind,vars->numposcuts[answerind],vars->numnegcuts[answerind]); } else { startmilli = OS_milliseconds(); //printf("%s ocas_calc_outputs.A%d len.%d | numthreads.%d\n",CONTRACTS[c_to_refc(c)],answerind,vars->len[answerind],numthreads); STocas_calc_outputs(vars,vars->c,answerind,ptr->output_pred,ptr->old_output,ptr->W,vars->W0[answerind],numfeatures,weekinds,vars->len[answerind]); //ocas_calc_outputs(PTRS,numthreads,vars,c,answerind,ptr->output_pred,ptr->old_output,ptr->W,vars->W0[answerind],numfeatures,weekinds,vars->len[answerind]); vars->output_time += (OS_milliseconds() - startmilli); if ( Method != 0 ) { startmilli = OS_milliseconds(); //printf("%d calc_ocas_strategy.A%d len.%d | numthreads.%d\n",c_to_refc(vars->c),answerind,vars->len[answerind],numthreads); calc_ocas_strategy(vars,answerind,vars->C,numfeatures,vars->maxlen,weekinds,new_cut,ptr->W,ptr->oldW,ptr->output_pred,ptr->old_output,hpfb); vars->w_time += (OS_milliseconds() - startmilli); } finish_ocasiter(answerind,vars,vars->C); } //printf("%s calc ocas_add_newcuts.A%d poscuts.%d negcuts.%d | numthreads.%d\n",CONTRACTS[c_to_refc(c)],answerind,vars->numposcuts[answerind],vars->numnegcuts[answerind],vars->numthreads); startmilli = OS_milliseconds(); //if ( vars->nohwm[answerind] > 13 ) // numthreads = vars->numthreads;///MAIN_MAXCORES; memset(ptr->new_a,0,sizeof(ptr->new_a)); //ocas_add_newcuts(PTRS,numthreads,vars,answerind,numfeatures,weekinds,new_cut,vars->numposcuts[answerind]+vars->numnegcuts[answerind],ptr->W,ptr->new_a); STocas_add_newcuts(vars,answerind,numfeatures,weekinds,new_cut,vars->numposcuts[answerind]+vars->numnegcuts[answerind],ptr->W,ptr->new_a); vars->add_time += (OS_milliseconds() - startmilli); //printf("done %d calc ocas_add_newcuts.A%d poscuts.%d negcuts.%d | good.%d bad.%d\n",c_to_refc(vars->c),answerind,vars->numposcuts[answerind],vars->numnegcuts[answerind],vars->good[answerind],vars->bad[answerind]); } else inactives[answerind] = 1;//, printf("maxnohwm.%d\n",max_nohwm); } startmilli = OS_milliseconds(); for (answerind=0; answerindnumposcuts[answerind] - vars->numnegcuts[answerind]); cutlen = (vars->numposcuts[answerind] + vars->numnegcuts[answerind]); if ( vars->nohwm[answerind] < lwm ) lwm = vars->nohwm[answerind]; ocas_update_Lspace(vars,answerind,netcuts,cutlen,numfeatures,vars->C,0.,vars->QPSolverTolRel); } vars->qp_solver_time += (OS_milliseconds() - startmilli); } if ( pcount != 0 ) printf("numactive.%d %.0f | ave perf %f%% | ave nohwm %.1f\n",numactive,pcount,psum/pcount,nosum/pcount); return(numactive); } static inline int init_ocas_vars(int numthreads,int selector,long answerindmask,struct ocas_vars *vars,int c,double C,int numfeatures,int maxlhs,int maxlen,int len,double *answerabsave,int *posA,int *negA) { int answerind,lastanswerind,retval = 0; lastanswerind = TRADEBOTS_NUMANSWERS; vars->maxlen = maxlen; vars->numthreads = numthreads; vars->selector = selector; //printf("init_ocas_vars lastanswerind.%d\n",lastanswerind); for (answerind=0; answerindlen[answerind]); if ( vars->len[answerind] > 0 )//&& (answerindmask == -1L || ((1L<refc = c_to_refc(c); vars->c = c; vars->C = C; vars->numfeatures = numfeatures; vars->maxlhs = maxlhs; if ( vars->CLspaces[answerind] == 0 ) vars->CLspaces[answerind] = myaligned_alloc(sizeof(*vars->CLspaces[answerind])); vars->answerabsaves[answerind] = answerabsave[answerind]; vars->posA[answerind] = posA[answerind]; vars->negA[answerind] = negA[answerind]; if ( vars->lhs[answerind] == 0 ) vars->lhs[answerind] = myaligned_alloc(sizeof(*vars->lhs[answerind]) + numfeatures*maxlhs*sizeof(double)); vars->maxiters[answerind] = QPSOLVER_MINITER; vars->trn_err[answerind] = vars->len[answerind]; vars->Q_P[answerind] = 0.5*vars->sq_norm_W[answerind] + C*vars->len[answerind]; vars->perc[answerind] = vars->hwmperc[answerind] = vars->dist[answerind] = vars->hwmdist[answerind] = 0.; vars->numIt[answerind] = 0; //printf("init.A%d %d | %p %p weekinds.%p\n",answerind,vars->len[answerind],vars->CLspaces[answerind],vars->lhs[answerind],vars->weekinds[answerind]); } } //printf("mask.%lx init_ocas_vars selector.%d weekinds[0].%p\n",answerindmask,selector,vars->weekinds[0]); return(retval); } void ocas_init(struct ocas_vars *vars,int32_t c,int32_t numfeatures,int32_t starti,int32_t endi) { struct ocas_CLbuffers *ptr; struct ocas_lhsbuffers *lhs; int32_t nonz,weekind,answerind; double answer,y; if ( numfeatures < 0 ) return; vars->maxlhs = MAX_OCAS_LHS; vars->numfeatures = numfeatures; vars->maxlen = (endi - starti + 1); vars->C = 1.; vars->c = c; vars->TolRel = 0.01; vars->TolAbs = 0.0; vars->QPSolverTolRel = vars->TolRel*0.5; vars->QPSolverTolAbs = vars->TolAbs*0.5; vars->MaxTime = OCAS_INFINITY; vars->QPBound = 0.0; memset(vars->posA,0,sizeof(vars->posA)); memset(vars->negA,0,sizeof(vars->negA)); memset(vars->answeraves,0,sizeof(vars->answeraves)); memset(vars->answerabsaves,0,sizeof(vars->answerabsaves)); memset(vars->firstweekinds,0,sizeof(vars->firstweekinds)); vars->starti = starti; vars->answers = calloc(TRADEBOTS_NUMANSWERS,sizeof(vars->answers)*vars->maxlen); vars->features = calloc(vars->maxlen,sizeof(*vars->features)); for (answerind=0; answerindweekinds[answerind] = calloc(vars->maxlen,sizeof(*vars->weekinds[answerind])); for (weekind=starti; weekind<=endi; weekind++) { if ( (vars->features[weekind - starti]= get_features(numfeatures,c,weekind)) == 0 ) continue; for (answerind=0; answerindposA[answerind]+vars->negA[answerind]) >= vars->maxlen ) continue; if ( (y= get_yval(&answer,0,weekind,c,answerind)) != 0.f ) { vars->answers[(weekind-starti)*TRADEBOTS_NUMANSWERS + answerind] = y; vars->weekinds[answerind][vars->len[answerind]++] = weekind; if ( vars->posA[answerind]+vars->negA[answerind] == 0 ) vars->firstweekinds[answerind] = weekind; vars->answeraves[answerind] += answer; if ( answer > 0 ) { vars->posA[answerind]++; vars->answerabsaves[answerind] += answer; } else if ( answer < 0 ) { vars->negA[answerind]++; vars->answerabsaves[answerind] -= answer; } } } } for (answerind=0; answerindposA[answerind]+vars->negA[answerind])) != 0 ) { vars->answerabsaves[answerind] /= nonz; vars->answeraves[answerind] /= nonz; printf("A%02d.(%9.6f %d %d) ",answerind,vars->answerabsaves[answerind],vars->posA[answerind],vars->negA[answerind]); } init_ocas_vars(1,0,-1,vars,c,vars->C,numfeatures,MAX_OCAS_LHS,vars->maxlen,vars->maxlen,vars->answerabsaves,vars->posA,vars->negA); for (answerind=0; answerindlen[answerind]); lhs = vars->lhs[answerind]; ptr = vars->CLspaces[answerind]; //printf("%d.A%d call init ocas vars weekinds[0] %p numfeatures.%d (%p %p)\n",c_to_refc(vars->c),answerind,vars->weekinds[0],numfeatures,lhs,ptr); if ( lhs == 0 || ptr == 0 ) continue; vars->numlhs[answerind] = 0;//init_full_A(lhs->full_A,vars->numfeatures,c,answerind,models); memset(ptr->W,0,sizeof(*ptr->W) * numfeatures); memset(ptr->oldW,0,sizeof(*ptr->oldW) * numfeatures); vars->W0[answerind] = vars->oldW0[answerind] = 0; #ifndef DISABLE_EXISTINGMODEL double init_model(double *percp,double *W,double *oldW,int c,int answerind,int numfeatures); vars->W0[answerind] = init_model(&vars->hwmperc[answerind],ptr->W,ptr->oldW,c,answerind,vars->numfeatures); if ( _dbufave(ptr->W,numfeatures) != 0 ) validate_ocas_model(vars,answerind,ptr->output_pred,ptr->old_output,vars->weekinds[answerind],vars->len[answerind],ptr->W,vars->W0[answerind],numfeatures,1); #endif //printf("%s.A%d call init ocas vars weekinds[0] %p\n",CONTRACTS[c_to_refc(c)],answerind,vars->weekinds[0]); } vars->output_time = vars->sort_time = vars->w_time = vars->qp_solver_time = vars->ocas_time = vars->add_time = 0; vars->startweekind = starti; vars->endweekind = endi; } int32_t ocas_gen(int32_t c,int32_t numfeatures,int32_t starti,int32_t endi) { int32_t i; struct ocas_vars *vars = calloc(1,sizeof(*vars)); ocas_init(vars,c,numfeatures,starti,endi); for (i=0; i<10; i++) ocas_iter(vars,3); ocas_purge(vars); return(0); } #endif