function [X,gnorm] = manorm(X,varargin)
%MANORM normalizes microarray data
%
%   XNORM = MANORM(X), where X is a vector or a matrix, scales the values
%   in each column of X by dividing by the mean column intensity.
%
%   XNORM = MANORM(MASTRUCT,FIELDNAME), where MASTRUCT is a microarray
%   structure, scales the data for field FIELDNAME for each block, or
%   print-tip, by dividing by the mean column intensity for each block. The
%   output is a matrix with each column corresponding to the normalized
%   data for each block.
%
%   [XNORM, COLVAL] = MANORM(...) returns the values used to normalize the
%   data.
%
%   MANORM(...,'METHOD',METHOD) allows you to choose the method used for
%   scaling or centering the data. METHOD can be one of 'Mean' (default),
%   'Median', 'STD' (standard deviation), 'MAD' (median absolute
%   deviation), or a function handle. If you pass a function handle then
%   the function should ignore NaNs and must return a single value per
%   column of the input data.
%
%   MANORM(...,'EXTRA_ARGS',ARGS) allows you to pass extra arguments to the
%   'METHOD' function. ARGS must be a cell array.
%
%   MANORM(...,'LOGDATA',TRUE) is used for working with log ratio data in
%   which case the mean (or METHOD value) of each column is subtracted
%   from the values in the columns, instead of dividing the column by the
%   normalizing value.
%
%   MANORM(...,'PRCTILE',PCT) only uses the PCT percentile of the data,
%   preventing large outliers from skewing the normalization. If PCT is a
%   vector containing two values, then the range from the PCT(1) percentile
%   to the PCT(2) percentile is used. The default value is 100, that is, to
%   use all the data in the data set.
%
%   MANORM(...,'GLOBAL',TRUE) normalizes the values in the data set by the
%   global mean (or METHOD value) of the data, as opposed to normalizing
%   each column, or block, of the data independently. 
%
%   MANORM(...,'STRUCTOUTPUT',true), when the input data is a structure,
%   returns the input structure with an additional data field for the
%   normalized data.
%
%   MANORM(...,'NEWCOLUMNNAME',COLNAME), when using STRUCTOUTPUT, allows
%   you to specify the name of the column that is appended to the list of
%   ColumnNames in the structure. The default behavior is to prefix 'Block
%   Normalized' to the FIELDNAME string.
%
%   Examples:
%
%       maStruct = gprread('mouse_a1wt.gpr');
%       % Extract some data of interest.
%       Red = maStruct.Data(:,4);
%       Green = maStruct.Data(:,13);
%       % Create a log-log plot.
%       maloglog(Red,Green,'factorlines',true)
%       % Center the data.
%       normRed = manorm(Red);
%       normGreen = manorm(Green);
%       % Create a log-log plot of the centered data.
%       figure
%       maloglog(normRed,normGreen,'title','Normalized','factorlines',true)
%
%       % Alternatively, you can work directly with the structure
%       normRedBs = manorm(maStruct,'F635 Median - B635');
%       normGreenBs = manorm(maStruct,'F532 Median - B532');
%       % Create a log-log plot of the centered data. This includes some
%       % zero values so turn off the warning.
%       figure
%       w = warning('off','Bioinfo:MaloglogZeroValues');
%       maloglog(normRedBs,normGreenBs,'title',...
%               'Normalized Background-Subtracted Median Values',...
%               'factorlines',true)
%       warning(w);
%
%   See also MABOXPLOT, MAIRPLOT, MALOGLOG, MALOWESS, QUANTILENORM.

% Reference:
%       Stekel, B., Microarray Bioinformatics  
%       Cambridge University Press 2003.

% Copyright 2004 The MathWorks, Inc.
% $Revision: 1.1.12.1 $   $Date: 2004/12/24 20:44:27 $

% Note that the structure handling is dealt with by @struct/manorm.m.

% set defaults
pct = [0,100];
globalFlag = false;
logFlag = false;
normfun = @nanmean;
optArgs = {};

% deal with the various inputs
if nargin > 2
    if rem(nargin,2) == 0
        error('Bioinfo:IncorrectNumberOfArguments',...
            'Incorrect number of arguments to %s.',mfilename);
    end
    okargs = {'prctile','global','logdata','method',...
        'extra_args','structoutput','newcolumnname'};
    for j=1:2:nargin-2
        pname = varargin{j};
        pval = varargin{j+1};
        k = strmatch(lower(pname), okargs);%#ok
        if isempty(k)
            error('Bioinfo:UnknownParameterName',...
                'Unknown parameter name: %s.',pname);
        elseif length(k)>1
            error('Bioinfo:AmbiguousParameterName',...
                'Ambiguous parameter name: %s.',pname);
        else
            switch(k)
                case 1  % span
                    if ~isnumeric(pval)
                        error('Bioinfo:PrctileMustBeNumeric',...
                            'PRCTILE must be a numeric value');
                    end
                    if numel(pval) == 1
                        pct(2) = pval;
                    elseif numel(pval) == 2  %range
                        pct = pval;
                        if pct(2)<=pct(1)
                      error('Bioinfo:PrctileInvalidRange',...
                            'The PRCTILE range is empty.'); 
                        end
                    else
                        error('Bioinfo:PrctileBadSize',...
                            'PRCTILE must be a scalar or two element vector in the range 0 to 100.');
                    end
                    if any(pct < 0) || any(pct > 100)
                        error('Bioinfo:PrctileMustBe0To100',...
                            'PRCTILE values must be in the range 0 to 100.');
                    end
                case 2 % global flag
                    globalFlag = opttf(pval);
                    if isempty(globalFlag)
                        error('Bioinfo:InputOptionNotLogical','%s must be a logical value, true or false.',...
                            upper(char(okargs(k))));
                    end
                case 3 % log data
                    logFlag = opttf(pval);
                    if isempty(logFlag)
                        error('Bioinfo:InputOptionNotLogical','%s must be a logical value, true or false.',...
                            upper(char(okargs(k))));
                    end
                case 4 % method
                    normmethod = pval;
                    if ischar(normmethod)
                        okmethods = {'mean','median','std','mad'};
                        nm = strmatch(lower(normmethod), okmethods);%#ok
                        if isempty(nm)
                            error('Bioinfo:UnknownMethodName',...
                                'Unknown normalization method: %s.',pval);
                        elseif length(nm)>1
                            error('Bioinfo:AmbiguousMethodName',...
                                'Ambiguous normalization method: %s.',pval);
                        else
                            switch nm
                                case 1
                                    normfun = @nanmean;
                                case 2
                                    normfun = @nanmedian;
                                case 3
                                    normfun = @nanstd;
                                case 4
                                    normfun = @mad;
                                    optArgs = {1};
                            end
                        end

                    elseif isa(normmethod, 'function_handle')
                        normfun = normmethod;
                    else
                        error('Bioinfo:MethodNotFunctionHandle',...
                            'METHOD must be a string or a function handle.');
                    end
                case 5 % optional arguments
                    optArgs = pval;
                    
                    if ~iscell(optArgs)
                      optArgs = {pval};  
                      
                    end
                case {6,7}  % struct options
                    warning('Bioinfo:StructOnlyOptions',...
                        '%s option is only valid with structure input',pname);
            end
        end
    end
end
% for global values, simply make the data into a single column and reshape
% later.
if globalFlag
    origSize = size(X);
    X = X(:);
end

% deal with prctile data
Xtemp = X;
if pct(1)>0 || pct(2) <100
    lowMask = X < repmat(prctile(X,pct(1)),size(X,1),1);
    highMask = X > repmat(prctile(X,pct(2)),size(X,1),1);
    Xtemp(highMask | lowMask) = NaN;
end
% calculate the function
gnorm = feval(normfun,Xtemp,optArgs{:});

% do we subtract or divide?
if logFlag
    X = X - repmat(gnorm,size(X,1),1);
else
    X = X./repmat(gnorm,size(X,1),1);
end
% reshape global data
if globalFlag
    X = reshape(X,origSize);
end
