%%%%%%%%%%%%%%%%%%%%%%%% Simulation data generation %%%%%%%%%%%%%%%%%%%%
% This script is to generate some simulation data

n = initN; totN = n + valN + tstN;
d = initD; 
% dseq = [initNZ, initD-initNZ]; bseq = bStrength*[1 0]; % [3,1.5,0,2,0]; %[1, 1, -.5, -1.4, .5, 0]; 
if sum(dseq) < initD 
    dseq = [dseq, initD - sum(dseq)];
    bseq = bStrength*[bseq 0];
else
    if sum(dseq) ~= initD
        error('wrong beta input')
    end
end

    
if strcmp(dataName(1:2), 'my')   
    if strcmp(dataName, 'myunif') 
        X = rand(totN, d); X(X<=.25) = -1; X(X>.25 & X <= .75) = 0; X(X>.75) = 1;;
    elseif strcmp(dataName, 'mynorm-iid')
        X = randn(totN, d); % X(:, 2:2:end) = sign(X(:, 2:2:end));
        rou=0;  
        corrMat = rou .^ abs([1:d]' * ones(1, d) - ones(d, 1) * [1:d]);
        X = X * chol(corrMat);
    elseif strcmp(dataName, 'mynorm-highcorr')
        X = randn(totN, d); % X(:, 2:2:end) = sign(X(:, 2:2:end));
        rou=.9;  
        corrMat = rou .^ abs([1:d]' * ones(1, d) - ones(d, 1) * [1:d]);
        X = X * chol(corrMat);
    elseif strcmp(dataName, 'mynorm-medcorr')
        X = randn(totN, d); % X(:, 2:2:end) = sign(X(:, 2:2:end));
        rou=.5;  
        corrMat = rou .^ abs([1:d]' * ones(1, d) - ones(d, 1) * [1:d]);
        X = X * chol(corrMat);
    elseif strcmp(dataName, 'mynorm-mildcorr')
        X = randn(totN, d); % X(:, 2:2:end) = sign(X(:, 2:2:end));
        rou=.1;  
        corrMat = rou .^ abs([1:d]' * ones(1, d) - ones(d, 1) * [1:d]);
        X = X * chol(corrMat);
    elseif strcmp(dataName, 'mynorm-allequal')
        X = randn(totN, d); 
        rou=.5;  
        corrMat = rou .^ ones(d)+ (1-rou) * eye(d);
        X = X * chol(corrMat);
    else
        error('Wrong data name')
    end        

    betaTrue = [];
    for i = 1:size(dseq,2), betaTrue = [betaTrue, ones(1, dseq(i))*bseq(i)]; end
    betaTrue = betaTrue';

    switch lower(family)
       case {'binomial'}
          y = binornd(1, GLM_mean(X, betaTrue, 'binomial'));
       case 'poisson'
          y = poissrnd(GLM_mean(X, betaTrue, 'poisson'));
       case 'gaussian'
          y = randn(size(X, 1), 1) + GLM_mean(X, betaTrue, 'gaussian');
       otherwise
          error('not implemented yet')
    end

    tstX = X((n+valN+1):end, :); valX = X((n+1):(n+valN), :); X = X(1:n, :); 
    tstY = y((n+valN+1):end); valY = y((n+1):(n+valN)); y = y(1:n); 

    centerX = 0; scaleX = 0;

    [X, valX, tstX, trnCenters, trnScales] = DataStand(centerX, scaleX, X, valX, tstX);
    if scaleX == 1
        betaTrue = betaTrue .* trnScales';
    else
        betaTrue = betaTrue;
    end
    if centerX == 1
        alphaTrue = sum(betaTrue'.*trnCenters);
    else
        alphaTrue = 0;
    end
    % [glmfit_b, glmfit_dev, glmfit_stats] = glmfit(X, y, family, 'constant', 'off')

    % X = [ones(size(X, 1),1), X]; valX = [ones(size(valX, 1),1), valX]; tstX = [ones(size(tstX, 1),1), tstX];
    % betaTrue = [0; betaTrue];

elseif strcmp(dataName(1:2), 'CC') % case-control
    halfN = totN/2;
    y = [zeros(halfN, 1); ones(halfN, 1)];
    tmpN = 2*totN;
    if strcmp(dataName, 'CCunif') 
        X = rand(tmpN, d); X(X<=.25) = -1; X(X>.25 & X <= .75) = 0; X(X>.75) = 1;;
    elseif strcmp(dataName, 'CCnorm-iid')
        X = randn(tmpN, d); % X(:, 2:2:end) = sign(X(:, 2:2:end));
        rou=0;  
        corrMat = rou .^ abs([1:d]' * ones(1, d) - ones(d, 1) * [1:d]);
        X = X * chol(corrMat);
    elseif strcmp(dataName, 'CCnorm-highcorr')
        X = randn(tmpN, d); % X(:, 2:2:end) = sign(X(:, 2:2:end));
        rou=.9;  
        corrMat = rou .^ abs([1:d]' * ones(1, d) - ones(d, 1) * [1:d]);
        X = X * chol(corrMat);
    elseif strcmp(dataName, 'CCnorm-medcorr')
        X = randn(tmpN, d); % X(:, 2:2:end) = sign(X(:, 2:2:end));
        rou=.5;  
        corrMat = rou .^ abs([1:d]' * ones(1, d) - ones(d, 1) * [1:d]);
        X = X * chol(corrMat);
    elseif strcmp(dataName, 'CCnorm-mildcorr')
        X = randn(tmpN, d); % X(:, 2:2:end) = sign(X(:, 2:2:end));
        rou=.1;  
        corrMat = rou .^ abs([1:d]' * ones(1, d) - ones(d, 1) * [1:d]);
        X = X * chol(corrMat);        
    end        

    betaTrue = [];
    for i = 1:size(dseq,2), betaTrue = [betaTrue, ones(1, dseq(i))*bseq(i)]; end
    betaTrue = betaTrue';
    if ~strcmp(family, 'binomial')
        error('Case-control only for binomial!!')
    end
	psedy = binornd(1, GLM_mean(X, betaTrue, 'binomial'));
    psedX = X; X = ones(totN, d);
    cls0 = find(psedy==0);
    cls1 = find(psedy==1);
    if numel(cls0) < totN/2 || numel(cls1) < totN/2
        error('Increase total sample size to get enough data!')
    end
    X(1:(totN/2), :) = psedX(cls0(1:totN/2), :);
    X((totN/2+1):end, :) = psedX(cls1(1:totN/2), :);
    
    tstX = X([(n/2+valN/2+1):(n/2+valN/2+tstN/2), totN/2 + ((n/2+valN/2+1):(n/2+valN/2+tstN/2))], :); 
    tstY = y([(n/2+valN/2+1):(n/2+valN/2+tstN/2), totN/2 + ((n/2+valN/2+1):(n/2+valN/2+tstN/2))]); 
    valX = X([(n/2+1):(n/2+valN/2), totN/2 + ((n/2+1):(n/2+valN/2))], :); 
    valY = y([(n/2+1):(n/2+valN/2), totN/2 + ((n/2+1):(n/2+valN/2))]); 
    X = X([(1):(n/2), totN/2 + ((1):(n/2))], :); 
    y = y([(1):(n/2), totN/2 + ((1):(n/2))]); 

    centerX = 0; scaleX = 0;

    [X, valX, tstX, trnCenters, trnScales] = DataStand(centerX, scaleX, X, valX, tstX);
    if scaleX == 1
        betaTrue = betaTrue .* trnScales';
    else
        betaTrue = betaTrue;
    end
    if centerX == 1
        alphaTrue = sum(betaTrue'.*trnCenters);
    else
        alphaTrue = 0;
    end
    % [glmfit_b, glmfit_dev, glmfit_stats] = glmfit(X, y, family, 'constant', 'off')

    % X = [ones(size(X, 1),1), X]; valX = [ones(size(valX, 1),1), valX]; tstX = [ones(size(tstX, 1),1), tstX];
    % betaTrue = [0; betaTrue];

else
      error('not implemented yet')
end
