function [Y,P] = msalign(MZ,Y,P,varargin)
%MSALIGN aligns a mass spectrometry signal to a set of candidate peaks
%
%   YOUT = MSALIGN(MZ,Y,P) Aligns a mass spectrometry signal (Y) by scaling
%   and shifting the domain (MZ) such that the cross-correlation between
%   the input spectrum and a synthetic target signal is maximum. The
%   synthetic target signal is built with Gaussian pulses centered at the
%   locations specified by the vector P. After obtaining the new MZ
%   mass-charge domain, MSALIGN calculates YOUT by shape-preserving
%   piecewise cubic interpolation of the shifted input signal to the
%   original MZ vector. The algorithm works best with three to five marker
%   peaks that you know will appear in the spectrum at specific locations.
%
%   MZ is the mass-charge column vector with the same size as the vector of
%   ion intensity samples Y (or spectrum). Y can be a matrix with several 
%   spectra, all sharing the same MZ scale.
%
%   MSALIGN uses an iterative grid search until it finds the best scale and
%   shift factors for every spectrum.
%
%   MSALIGN(...,'WEIGHTS',W) sets the weighting for every peak in P. W is a
%   vector of the same size as P and it contains relative weights with
%   non-negative values. The default is W = ones(size(P)). Increase the
%   relative weightings if you have small reference peaks to emphasize them.
%
%   MSALIGN(...,'RANGE',R) sets the lower and upper limits for allowable
%   shifts in range for any of the peaks. Units are MZ, and the default is
%   [-100 100]. Use these values to tune the robustness of the algorithm.
%   Ideally you should only try to correct small shifts; therefore, these
%   bounds should be small. However, by increasing the limits you can try
%   to correct larger shifts at the expense of picking incorrect peaks to
%   be aligned.
%
%   MSALIGN(...,'WIDTHOFPULSES',WP) sets the width WP (in MZ units) of the
%   Gaussian pulses used to build the correlating synthetic signal. WP is
%   the point where the Gaussian pulse reaches 60.65% of its maximum. The
%   default is 10, which means a constant width for all Gaussian pulses is
%   set at 10. WP can also be a function handle. The referenced function is
%   evaluated at each MZ value to compute a variable width for the pulses.
%   Its evaluation should give reasonable values (i.e., WP < max(abs(R) and
%   WP >0); otherwise, the function errors out.
%
%   Note: Tuning the spread of the Gaussian pulses controls a tradeoff
%   between robustness (wider pulses) and precision (narrower pulses);
%   however, it is unrelated to the shape of the observed peaks in the
%   spectrum. 
%
%   MSALIGN(...,'WINDOWSIZERATIO',WSR) WSR is a scalar value that
%   determines the size of the windows around every putative peak; the
%   synthetic signal is correlated to the input spectrum only within these
%   regions, saving computation time. The size of the window is given by
%   WP * WSR (in MZ units). The default is 2.5; i.e., at the limits of the
%   window, the Gaussian pulses have a value of 4.39% of their maximum. 
%
%   MSALIGN(...,'ITERATIONS',I) sets the number of refining iterations. At
%   every iteration, the search grid is scaled down to improve the
%   estimates. The default is 5.
%
%   MSALIGN(...,'GRIDSTEPS',GS) sets the number of steps for the search
%   grid; i.e., at every iteration the search area is divided by GS^2.
%   The default is 20. 
% 
%   MSALIGN(...,'SEARCHSPACE',SS) sets the type of the search space. The
%   default is a 'regular' grid. 'latin' uses a random latin hypercube with
%   GS^2 samples.
%
%   [YOUT, POUT] = MSALIGN(...,'GROUP',true) updates the original peak
%   locations such that the actual movement of the peaks is minimum. POUT
%   contains the updated peak locations. The default is false.
%
%   MSALIGN(...,'SHOWPLOT',SP) plots the original and the aligned spectrum
%   over the P markers. When SP is TRUE, the first spectrogram in Y is
%   used. If MSALIGN is called without output arguments, a plot will be
%   shown unless SP is FALSE. SP can also contain an index to one of the
%   spectrograms in Y. 
%  
%   Example: 
%
%       load sample_lo_res
%       % Select some reference peaks.
%       P = [3991.4 4598 7964 9160];
%       msheatmap(MZ_lo_res,Y_lo_res,'markers',P,'limit',[3000 10000])
%       title('before alignment')
%
%       % Align all the spectrograms in Y_lo_res to the reference peaks.
%       YA = msalign(MZ_lo_res,Y_lo_res,P);
%
%       msheatmap(MZ_lo_res,YA,'markers',P,'limit',[3000 10000])
%       title('after alignment')
%
%       % Repeat the alignment now specifying weights for the reference
%       % peaks.
%       P = [3991.4 4598 7964 9160];
%       W = [60 100 60 100];
%       YA = msalign(MZ_lo_res,Y_lo_res,P,'weights',W);
%
%       msheatmap(MZ_lo_res,YA,'markers',P,'limit',[3000 10000])
%       title('after alignment with weights')
%
%   See also MSBACKADJ, MSHEATMAP, MSLOWESS, MSNORM, MSRESAMPLE, MSSGOLAY,
%   MSVIEWER. 

%   Copyright 2003-2004 The MathWorks, Inc.
%   $Revision: 1.1.12.1 $  $Date: 2004/12/24 20:43:24 $

% set defaults
gaussianWidth = 10;  % std dev of the Gaussian pulses (in mz)
gaussianRatio = 2.5; % sets the width of the windows use at every pulse
gaussianResol = 100; % resolution of every Gaussian pulse (number of points) 
iterations = 5;      % increase to improve accuracy
gridSteps = 20;      % size of grid for exhaustive search      
shiftRange = [-100 100];     % initial shift range
searchSpaceType = 'regular'; % type of search space
groupAlign = false;
if nargout == 0
    plotId = 1; 
else
    plotId = 0;
end

% validate required inputs 
if ~isnumeric(Y) || ~isreal(Y) 
   error('Bioinfo:msalign:IntensityNotNumericAndReal',...
       'Ion intensities must be numeric and real.') 
end
if ~isnumeric(MZ) || ~isreal(MZ) || ~isvector(MZ)
   error('Bioinfo:msalign:MZNotNumericAndReal',...
       'MZ scale must be a numeric and real vector.') 
end
if size(MZ,1) ~= size(Y,1)
   error('Bioinfo:msalign:NotEqualNumberOfSamples',...
       'The ion intensity vector(s) must have the same number\nof samples as the MZ scale.')
end
numSpectrograms = size(Y,2);
if ~isnumeric(P) || ~isreal(P) || ~isvector(P)
   error('Bioinfo:msalign:PValsNotNumericAndReal',...
       'P values must be numeric and real.') 
end

P=P(:);
numPeaks = numel(P);
W = ones(size(P));  %default weights


% get input arguments
if  nargin > 3
    if rem(nargin,2) == 0
        error('Bioinfo:msalign:IncorrectNumberOfArguments',...
            'Incorrect number of arguments to %s.',mfilename);
    end
    okargs = {'weights','range','widthofpulses',...
              'windowsizeratio','size','iterations','gridsteps',...
              'steps','searchspace','space','group','showplot'};
    for j=1:2:nargin-3
        pname = varargin{j};
        pval = varargin{j+1};
        k = strmatch(lower(pname), okargs); %#ok
        if isempty(k)
            error('Bioinfo:msalign:UnknownParameterName',...
                'Unknown parameter name: %s.',pname);
        elseif length(k)>1
            error('Bioinfo:msalign:AmbiguousParameterName',...
                'Ambiguous parameter name: %s.',pname);
        else
            switch(k)
                case 1  % 'weights'
                    W =  pval(:);
                    if numel(W)~=numPeaks
                        error('Bioinfo:msalign:IncorrectWeights', ...
                        'Weights vector should be same size as P.');
                    end
                case 2 % range
                    shiftRange = pval(:)';
                    if (numel(shiftRange)~=2) || (diff(shiftRange)<=0 )
                        error('Bioinfo:msalign:IncorrectShiftRange', ...
                        'Shift range should be an 1x2 increasing vector.');
                    end
                case 3 % width
                    gaussianWidth = pval;
                case {4,5} % window size
                    if (~isscalar(pval) || pval<=1)
                        error('Bioinfo:msalign:IncorrectWindowRatio', ...
                        'Window ratio should a scalar larger than 1')
                    end
                    gaussianRatio = pval;
                case 6 % iterations
                    if (~isscalar(pval) || pval<=0)
                        error('Bioinfo:msalign:IncorrectPrecision', ...
                        'Precision should a positive scalar.')
                    end
                    iterations = pval;
                case {7,8} %  grid steps
                    if (~isscalar(pval) || pval<=0)
                        error('Bioinfo:msalign:IncorrectGrid', ...
                        'Grid should a positive scalar.')
                    end
                    gridSteps = pval; 
                case {9,10} % search space
                    searchSpaceTypes = {'latinhypercube','regular'};
                    searchSpaceType = strmatch(lower(pval),searchSpaceTypes); %#ok
                    if isempty(searchSpaceType) 
                        error('Bioinfo:msalign:NotValidSearchSpaceType',...
                              'Not a valid search space type.')
                    end
                    searchSpaceType = searchSpaceTypes{searchSpaceType};
                case 11 % group
                    groupAlign = opttf(pval);
                case 12 % show
                    if opttf(pval) 
                        if isnumeric(pval)
                            if isscalar(pval)
                                plotId = double(pval); 
                            else
                                plotId = 1;
                                warning('Bioinfo:msalign:SPNoScalar',...
                                'SHOWPLOT must be a single index to one of the spectrograms in Y or\na logical. Plotting the first column in Y only.')
                            end
                        else
                            plotId = 1;
                        end
                    else
                        plotId = 0;
                    end
            end
        end
    end
end

if (plotId~=0) && ~any((1:numSpectrograms)==plotId)
    warning('Bioinfo:msalign:InvalidPlotIndex',...
    'SHOWPLOT is not a valid spectrogram index, no plot was generated.')
end

% change scalar to function handler
if isnumeric(gaussianWidth)   
    gaussianWidth = @(x) gaussianWidth;   
end

% check that values for GaussianWidth are valid
G = zeros(numPeaks,1);
for i = 1:numPeaks
    G(i) = gaussianWidth(P(i));
    if G(i)<=0 || G(i)>max(abs(shiftRange))
        error('Bioinfo:msalign:InvalidWidths',...
            'Gaussian pulse width must be greater than 0 and less than abs(shiftRange) for every peak in P.')
    end
end

% set the synthetic target signal
corr_sig_x = zeros(gaussianResol+1,numel(P)); 
corr_sig_y = zeros(gaussianResol+1,numel(P)); 
for i = 1:numPeaks
    leftL = P(i)-gaussianRatio*G(i);
    rightL = P(i)+gaussianRatio*G(i);
    corr_sig_x(:,i) = (leftL+(0:gaussianResol)*(rightL-leftL)/gaussianResol)';
    corr_sig_y(:,i) = W(i)*exp(-((corr_sig_x(:,i)-P(i))/G(i)).^2);
end

corr_sig_l = (gaussianResol+1)*numPeaks;
corr_sig_x = corr_sig_x(:);
corr_sig_y = corr_sig_y(:);

% set reduceRangeFactor to take 5 points of the previous ranges or half of
% the previous range if gridSteps<10
reduceRangeFactor = min(0.5,5/gridSteps);

% set scl such that the maximum peak can shift no more than the
% limits imposed by shft when scaling
scaleRange = 1 + shiftRange/max(P); 

% allocate space for vectors of optima
sclOpt = zeros(numSpectrograms,1); shftOpt = sclOpt;

% number of points in the search space
searchSize = (gridSteps)^2;

% create the meshgrid only once
switch searchSpaceType
    case 'regular'
        [A,B] = meshgrid((0:gridSteps-1)/(gridSteps-1),...
                         (0:gridSteps-1)/(gridSteps-1));
        srchSpc = repmat([A(:),B(:)],1,iterations);
    case 'latinhypercube'
        % Generate a latin hypercube sample for every iteration
        srchSpc = zeros(searchSize,iterations*2);
        for i = 1:iterations
            srchSpc(:,i*2-[1 0]) = lhsdesign(searchSize,2); 
        end
end

% iterate for every spectrogram
for i = 1:numSpectrograms
if nargout>0 || (i == plotId) || groupAlign
    
    % Main loop: searches for optimum values for the Scale and Shift
    % factors by exhaustive search over a multiresolution grid, getting
    % finer at every iteration.

    %set to back to the user input arguments (or default)
    shft = shiftRange;
    scl  = scaleRange;

    for t = 1:iterations % increase for better resolution
        % scale and shift search space
        A = scl(1) + srchSpc(:,t*2-1) * diff(scl);
        B = shft(1) + srchSpc(:,t*2) * diff(shft);
        temp = A*corr_sig_x'+repmat(B,1,corr_sig_l);
        temp(:) = interp1(MZ,Y(:,i),temp(:),'pchip',0);
        [dump,imax] = max(temp*corr_sig_y);%#ok
        % save optimum
        sclOpt(i)  = A(imax);
        shftOpt(i) = B(imax);
        % readjust grid for next iteration
        scl  = sclOpt(i)  + [-0.5 0.5]*diff(scl) *reduceRangeFactor;
        shft = shftOpt(i) + [-0.5 0.5]*diff(shft)*reduceRangeFactor;
    end

end % if nargout>0 || (i == plotId)    
end % for i = 1:numSpectrograms 

if groupAlign && numSpectrograms>1
    shftAll =  mean(mean(sclOpt*P'+repmat(shftOpt,1,numPeaks)-...
                                   repmat(P',numSpectrograms,1)));
    shftOpt = shftOpt - shftAll;
    P = P + shftAll;
end

% iterate for every spectrogram
for i = 1:numSpectrograms
if nargout>0 || (i == plotId)
    
    if (i == plotId)
        yo = Y(:,i);
    end
    
    % interpolation back to the original domain
    Y(:,i) = interp1((MZ-shftOpt(i))/sclOpt(i),Y(:,i),MZ,'pchip',NaN);
    
    if (i == plotId)
        figure
        plot(MZ,[yo Y(:,i)])
        hold on
        plot([P P]',[min(Y(:,i));max(Y(:,i))]*ones(1,numPeaks),'r--')
        title(sprintf('Spectrogram ID: %d',i));
        xlabel('Mass/Charge (M/Z)')
        ylabel('Relative Intensity')
        legend('Original spectrogram','Aligned spectrogram',...
               'Targeted peak locations')
        axis([min(MZ) max(MZ) min(Y(:,i)) max(Y(:,i))])
        grid on
        hold off
    end
    
end % if nargout>0 || (i == plotId)    
end % for i = 1:numSpectrograms 
