************************************************************************; * SELC: A macro to perform the sequential elimination of level *; * combinations algorithm. *; * *; * Inputs: *; * dfile : name of the dataset that contains the initial design *; * ffile : name of the dataset that contains the forbidden array *; * farray : name of the dataset that contains the forbidden array *; * plus then design points specified by strength *; * numoff : number of offspring required *; * strength : strength of the forbidden array *; * order : order of the forbidden array *; * numfact : total number of factors in the design *; * dir : direction of desired response *; * 1 = maximum is desired *; * 0 = minimum is desired *; * seed : seed value for random operations. Must be a *; * nonnegative integer. Default value is set to *; * clock time. *; * *; * Outputs: *; * next_gen : a data set that contains the design points to be *; * created and evaluated in the next generation *; * *; * Example: *; * %include 'c:\SELC.sas'; *; * *; * data initial; *; * infile 'c:\initialdesign.txt' expandtabs; *; * input x1-x3 y; *; * run; *; * *; * data forbid; *; * infile 'c:\forbiddenarray.txt' expandtabs; *; * input x1-x3; *; * run; *; * *; * %let L1 = 5; *; * %let L2 = 34; *; * %let L3 = 241; *; * %global L1 L2 L3; *; * *; * %SELC(dfile = initial, *; * ffile = forbid, *; * farray = newforbid, *; * numoff = 5, *; * strength = 10, *; * order = 2, *; * numfact = 3, *; * dir = 1, *; * seed = 1); *; * *; * References: *; * Mandal, A., Wu, C.F.J., and Johnson, K. (2006). SELC: Sequential *; * elimination of level combinations by means of modified genetic *; * algorithms. Technometrics, 48(2), 273-283. *; * *; * Mandal, A., Johnson, K., Wu, C.F.J., and Bornemeier, D. (2007). *; * Identifying promising compounds in drug discovery: Genetic *; * algorithms and some new statistical techniques. Journal of *; * Chemical Information and Modeling, 47(3), 981-988. *; * (DOI: 10.1021/ci600556v) *; * Highlighted in Drug Discovery News Online: *; * http://www.drugdiscoverynews.com/index.php?newsarticle=1318 *; * *; * Written by Kjell Johnson, Abhyuday Mandal, and Tan Ding *; * March 2008 *; * Version 1.0 *; ************************************************************************; ************************************************************************; * MUTPROB: A macro that is called by SELC that calculates the *; * mutation probability for each level of each factor. *; * *; * Inputs: *; * inputds : dataset from which to calculate the mutation *; * probabilities *; * mutfac : factor number to be mutated *; * sigds : data set that contains the significant factors *; * *; * Output: *; * mutds : data set that contains the new level of mutfac factor *; ************************************************************************; %macro mutprob(inputds=,mutfac=,sigds=,mutds=,seed=); %if &seed > 0 %then %do; %let seed_sur = seed=&seed; %end; %else %do; %let seed_sur=; %end; data mutfac; parameter = "&mutfac"; run; proc sort data=&sigds; by parameter; run; data sig; merge mutfac(in=a) &sigds(in=b); by parameter; if a and b; run; proc sql noprint; select count(*) into : n from sig; quit; data _null_; set mutfac; length fac 8; fac = substr(parameter,2); call symput('facnum',fac); run; %let facnum = %cmpres(&facnum); *if n = 0, then all levels get equal probability; *if n = 1, then levels receive weighted probability; %if &n = 0 %then %do; data &mutds; &mutfac=ceil(&&L&facnum*ranuni(&seed)); run; %end; %else %do; data prob1; do i=1 to &&L&facnum; x&facnum = i; prob1 = 1/&&L&facnum; output; end; drop i; run; proc sql noprint; select sum (y_pps) into : sum_y from &inputds; quit; proc sort data=&inputds; by &mutfac; run; proc means data=&inputds sum noprint; by &mutfac; var y_pps; output out=outsum(drop=_TYPE_ _FREQ_) sum=sum; run; data prob2; set outsum; prob2 = sum/&sum_y; run; data merge_p; merge prob1 prob2; by &mutfac; if prob1 = . then prob1 = 0; if prob2 = . then prob2 = 0; if prob2 = 0 then prob = 0.25*prob1; if prob2 > 0 then prob = 0.25*prob1 + 0.75*prob2; run; proc surveyselect data=merge_p method=pps sampsize=1 out=&mutds(keep=&mutfac) &seed_sur noprint; size prob; run; %end; %mend mutprob; %macro valid(dfile=,ffile=,strength=,order=,numfact=,dir=); ***Check strength***; proc sql noprint; select count(*) into : nobs from &dfile; quit; %let strengthmax = %sysevalf(&nobs-2); %if &strength > &strengthmax %then %do; %put *********************************************************************; %put *** WARNING: Strength must be less than or equal to &strengthmax ***; %put *********************************************************************; %let proceed = 0; %end; ***Check direction***; %if &dir = 0 %then %do; %let proceed = 1; %end; %else %if &dir = 1 %then %do; %let proceed = 1; %end; %else %do; %let proceed = 0; %put ******************************************; %put *** WARNING: Direction must be 0 or 1 ***; %put ******************************************; %end; *** Check number of factors and order ***; ods listing close; ods output Contents.DataSet.Attributes=att_i; proc contents data=&dfile; run; ods output Contents.DataSet.Attributes=att_f; proc contents data=&ffile; run; ods listing; data icols; set att_i(where=(Label2="Variables")); keep nValue2; run; data _null_; set icols; call symput("ic",nValue2); run; data fcols; set att_f(where=(Label2="Variables")); keep nValue2; run; data _null_; set fcols; call symput("fc",nValue2); run; %if &ic = %sysevalf(&fc+1) %then %do; %let proceed = 1; %end; %else %do; %let proceed = 0; %put ***************************************************************; %put *** WARNING: Number of factors in forbidden array must ***; %put *** match number of factors in the initial design ***; %put ***************************************************************; %end; %if &order < 1 %then %do; %let proceed = 0; %put ******************************************; %put *** WARNING: Order must be at least 1 ***; %put ******************************************; %end; %let maxorder = %sysevalf(&ic-1); %if &order > &maxorder %then %do; %let proceed = 0; %put ********************************************************; %put *** WARNING: Order cannot be greater than &maxorder ***; %put ********************************************************; %end; %let counter = 0; %do iloop = 1 %to &numfact; proc sql noprint; select max(x&iloop) into : maxd from &dfile; quit; proc sql noprint; select max(x&iloop) into : maxf from &ffile; quit; %if &&L&iloop < &maxd %then %do; %let counter = %sysevalf(&counter+1); %end; %else %if &&L&iloop < &maxf %then %do; %let counter = %sysevalf(&counter+1); %end; %end; %if &counter > 0 %then %do; %let proceed = 0; %put *******************************************************************************; %put *** WARNING: There exist levels in initial design or forbidden array which ***; %put *** are greater than the levels specified in L1, L2, ..., Lp ***; %put *******************************************************************************; %end; %mend valid; %macro SELC(dfile=, ffile=, farray=, numoff=, strength=, order=, numfact=, dir=, seed=); %global proceed; %let proceed = 1; ***Check validity of inputs***; %valid(dfile=&dfile,ffile=&ffile,strength=&strength,order=&order,numfact=&numfact,dir=&dir); %if &proceed = 1 %then %do; ***p-value cutoff for determining significance of effects***; %let alpha = 0.05; ***Create working data sets for the initial design and forbidden array files***; proc sql noprint; select max(y) into : max_y from &dfile; select min(y) into : min_y from &dfile; quit; %if &dir=1 %then %do; data work_ds; set &dfile; y_pps = (y - &min_y) / (&max_y - &min_y); run; proc sort data=&dfile out=forbid1(keep=x1-x&numfact); by y; run; %end; %else %if &dir=0 %then %do; data work_ds; set &dfile; y_pps = (&max_y - y) / (&max_y - &min_y); run; proc sort data=&dfile out=forbid1(keep=x1-x&numfact); by descending y; run; %end; data forbid; set forbid1(obs=&strength) &ffile; run; data &farray; set forbid; run; ***Check for significance of univariate factors***; ods output GLM.ANOVA.y.ModelANOVA=GLMout; ods listing close; proc glm data=&dfile; class x1-x&numfact; model y = x1-x&numfact; run; quit; ods listing; data sigfac; set glmout; if HypothesisType = 1; if ((probF < &alpha) and (probF > 0)); parameter = source; keep parameter; run; data next_gen; run; ***Generate new offspring***; %let index = 0; %do %while (&index < &numoff); %put *** &index ***; %if &seed > 0 %then %do; %let seed = %sysevalf(&seed + 1,int); %let seed_sur = seed=&seed; %end; %else %do; %let seed = -1; %let seed_sur=; %end; ***get mutation location***; data _null_; loc=ceil(&numfact*ranuni(&seed)); call symput('ml',loc); run; %let ml=%cmpres(&ml); *** ml is randomely selected mutation location ***; %let mutfac=x&ml; *** mutfac represents mutation factor ***; ***get mutation probabilities for mutation location***; %mutprob(inputds=work_ds,mutfac=&mutfac,sigds=sigfac,mutds=mutval,seed=&seed); ***Choose 2 parents with probability proportional to y values***; proc surveyselect data=work_ds method=pps sampsize=2 out=parents(keep=x1-x&numfact) &seed_sur noprint; size y_pps; run; ***Crossover and mutation***; proc iml; use parents; read all into parents; n = &numfact; ***crossover***; c1 = int(&numfact*ranuni(&seed)); if c1 = 0 then offsp = parents[1,1:n]; else offsp = parents[1,1:c1]||parents[2,(c1+1):n]; ***mutation***; use mutval; read all into mutval; c2 = &ml; if c2 = 1 then offsp = mutval[1,1]||offsp[1,2:n]; if c2 > 1 then if c2 < n then offsp = offsp[1,1:(c2-1)]||mutval[1,1]||offsp[1,(c2+1):n]; else offsp = offsp[1,1:(c2-1)]||mutval[1,1]; varnames = 'x1':"x&numfact"; create offsp from offsp[colname=varnames]; append from offsp; ***Check eligibility of new offspring***; ***Make sure that the new offspring is not in the FA or in the original data set***; use forbid; read all into forbid; nf = nrow(forbid); offsp_n = j(nf,1,1)*offsp; comp = (forbid=offsp_n); max_same = max(comp[,+]); create max_same from max_same[colname='max_same']; append from max_same; use work_ds; read all var('x1':"x&numfact") into work_ds; use next_gen; read all into next_gen; allgen = work_ds//next_gen; ng = nrow(allgen); offsp_n = j(ng,1,1)*offsp; comp = (allgen=offsp_n); same = max(comp[,+]); create same from same[colname='same']; append from same; quit; data _null_; set max_same; call symput('max_same',max_same); run; data _null_; set same; call symput('same',same); run; %if &max_same < &order %then %do; %if &same < &numfact %then %do; %let index = %eval(&index + 1); data next_gen; set next_gen offsp; if x1 = . then delete; run; %end; %end; %end; data next_gen; set next_gen; run; %end; %else %if &proceed = 0 %then %do; %put *************************************************; %put *** WARNING: Macro stopped because of errors ***; %put *************************************************; %end; %mend SELC;