
function [alphaTrue,betaTrue,X,valX,tstX,trnCenters,trnScales,y,valY,tstY,trnCenterY,d,D,n,fd,newF,grps, betaTrue_rawscale]...
    =generateSpecData(initN, valN, tstN, initD, trueFreqs, trueAmps, truePhis, sigmasq, fs, family,...
    centerX, scaleX, centerY)
% Generate a real signal with spectral sparsity, where frequencies and amplitudes and phases are given.
% Arguments: 
%   initN, valN, tstN: sample sizes for train validation and test
%   initD: number of frequency bins x 2
%   fs: sampling frequency
%   trueFreqs, trueAmps, TruePhis: signal parameters
%   family: noise type; sigmasq: noise power
%   centerX, scaleX, centerY: is X or Y is centered or scaled
% Value:
%   alphaTrue, betaTrue: beta and alpha in the GLM model
%   y, valY, tstY: signal for train, validation and test
%   X, valX, tstX: the cos-sin matrix for each signal y
%   D: number of frequency bins x 2, d=D-1
%   n: sample size for train
%   fd: frequency interval
% The signal is
%   sum A_i*cos(2*pi*f_i*t+phi_i) + noise


numFreqs = numel(trueFreqs);
if numel(trueAmps) ~= numFreqs | numel(trueAmps) ~= numFreqs, error('wrong Freq/Amp/Phi parameters'), end

N = initN; n = N; % sample size for train
totN = N + valN + tstN; % total sample size
D = initD; d = D-1; % number of frequency bins

% SNR = 10*log10(sum(trueAmps.^2)/2/sigmasq);
SNR = 10*log10(sum(trueAmps.^2)/sigmasq); % why there was  a factor of 2 there?

resolution = fs/(D);
Ts = 1/fs; % sampling interval
t = Ts:Ts:(totN)*Ts;
fmax = 1/2*(fs-0);
fd = resolution:resolution:fmax;
ftab = [fd fd(1:(end-1))];  
X = [cos(2*pi*t'*fd),  sin(2*pi*t'*fd(1:(end-1)))];   

grps = [1:numel(fd), 1:(numel(fd)-1)];

newF = trueFreqs;
for i = 1:numFreqs
    newF(i) = ftab(find(min(abs(ftab-trueFreqs(i)))==abs(ftab-trueFreqs(i)),1,'first')); % find the nearest frequency
end


betaTrue = zeros(d,1);
for i = 1:numFreqs
    betaTrue(ftab==newF(i)) = trueAmps(i)*[cos(truePhis(i)) -sin(truePhis(i))];
end
betaTrue_rawscale = betaTrue;
switch lower(family)
    case {'binomial'}
        y = binornd(1, GLM_mean(X, betaTrue, 'binomial'));
    case 'poisson'
        y = poissrnd(GLM_mean(X, betaTrue, 'poisson'));
    case 'gaussian'
        y = sqrt(sigmasq)*randn(size(X, 1), 1) + GLM_mean(X, betaTrue, 'gaussian');
    otherwise
        error('not implemented yet')
end

tstX = X((n+valN+1):end, :); valX = X((n+1):(n+valN), :); X = X(1:n, :);
tstY = y((n+valN+1):end); valY = y((n+1):(n+valN)); y = y(1:n);

% normalize X (center + scale) and Y (assume the family is Gaussian)
[X, valX, tstX, trnCenters, trnScales, y, valY, tstY, trnCenterY] = ...
AllDataStand(centerX, scaleX, X, valX, tstX, centerY, y, valY, tstY);

if scaleX == 1
    betaTrue = betaTrue .* trnScales';
else
    betaTrue = betaTrue;
end
if centerX == 1
    alphaTrue = sum(betaTrue'.*trnCenters);
else
    alphaTrue = 0;
end
if centerY == 1
    alphaTrue = 0;
end
