Wednesday, August 17, 2011

Completed Program in MATLAB for error surface minimization

To fit maximum productivity to data.... that may have not met it's maximum productivity yet.
I wrote this code. Of course, in the current state it works for MY DATA. But you can make it work for yours, too!


% site_index.m


% this program will calculate maximum productivity for ws1 plots


% we base this on the idea that biomass follows a logistic function. the
% logistic function is defined as B(t) = K/(1+Ce^(-r*t)). We would like to
% fit a logistic function to our biomass curves and then solve for where
% the derivative of biomass (productivity) is maximized (or where the
% second derivative of biomass is 0). To do this we must fit a logistic
% function to the biomass data.


% Import biomass data. this data should be in a file with no headers and no
% plot ids, just literally columns of biomass in your interval years like
% so
% 0.007 | 0.01 | 0.100 |  etc.


file = importdata('biomat.csv',',');
data = file;


% Create a vector T representing the number of
% years after the first measurement


T =[0;4;8;12;16;22;28];


%dataT is transposed data. Now you should have an array that is number of plots
%columns and number of remeasurements rows.


dataT = data';


% now we will determine an appropriate range for "r" which is the rate of
% growth that goes into the logistic equation. To do this, following the
% methods I have outlined, we need to know the productivity rate at the
% point of inflection or at the end of the exponential growth period,
% whichever is applicable


%prod_rate is a vector that will hold the ANPP values between each interval


prod_rate = zeros(6, length(dataT));


%years is a vector that holds the interval length between periods.
%This can be calculated or just done in your brain, which is easier for
%this example.


years = [4 ; 4 ; 4 ; 4 ; 6 ; 6];


%calculate the difference in biomass for each interval, divide by the
%length of duration of the interval, store the results in the vector
%prod_rate


% sample below shows how to calculate ANPP for just one plot. recall that
% because we are fitting to the logistic we assume mortality is inclusive.


prod_rate(1,1) = (dataT(2,1)- dataT(1,1))/years(1);
prod_rate(2,1) = (dataT(3,1)- dataT(2,1))/years(2);
prod_rate(3,1) = (dataT(4,1)- dataT(3,1))/years(3);
prod_rate(4,1) = (dataT(5,1)- dataT(4,1))/years(4);
prod_rate(5,1) = (dataT(6,1)- dataT(5,1))/years(5);
prod_rate(6,1) = (dataT(7,1)- dataT(6,1))/years(6);


% loop through all plots, the number of plots is(length(dataT))
% and for each interval (i) and store the results in prod_rate
for j = 2:length(dataT)
    for i=1:6
        prod_rate(i,j) = (dataT(i+1,j)-dataT(i,j))/years(i);
    end
end


%create a vector telling us what the maximum productivity is:
inflection_value = zeros(length(prod_rate),1);


inflection_value(1,1) = max(prod_rate(:,1));
for j = 2:length(dataT)
    inflection_value(j,1) = max(prod_rate(:,j));
end


%indexOfMaxValue is an index that tells us the interval time when the
%maximum productivity occurs.


% we loop through the productivity rates to find when the rate is equal to
% the maximum rate


indexOfMaxValue = zeros(length(prod_rate),1);


indexOfMaxValue(1,1) = find(prod_rate(:,1) == max(prod_rate(:,1)));
for j = 2:length(prod_rate)
    indexOfMaxValue(j,1) = find(prod_rate(:,j)== max(prod_rate(:,j)));
end


% slopeAtInflection is a vector that we will use to hold point-slope values
% for "r" at the inflection point found in indexOfMaxValue


slopeAtInflection = zeros(length(prod_rate),1);


% oneAhead holds the biomass value one position ahead of the max.
% If the max occurs at the end of the interval, then the final value is
% the max value, and we will compute only the left sided slope


oneAhead = zeros(length(dataT),1);


oneAhead(1) = dataT((find(prod_rate(:,1) == max(prod_rate(:,1)))+1),1);
for j = 2:length(dataT)
    if indexOfMaxValue(j)~=6
        oneAhead(j) = dataT((find(prod_rate(:,j) == max(prod_rate(:,j)))+1),j);
    else
        oneAhead(j) = dataT((find(prod_rate(:,j) == max(prod_rate(:,j)))),j);
    end
end


% oneBehind finds the value one position behind the max. If the max occurs
% at the end of the interval the same procedure applies.


oneBehind = zeros(length(dataT),1);
oneBehind(1) = prod_rate((find(prod_rate(:,1) == max(prod_rate(:,1)))-1),1);
for j = 2:length(dataT)
    %if indexOfMaxValue(j)~=6
    oneBehind(j) = dataT((find(prod_rate(:,j) == max(prod_rate(:,j)))-1),j);
    % else
    %  oneBehind(j) = prod_rate((find(prod_rate(:,j) == max(prod_rate(:,j)))),j);
    % end
end


% find the slope at the inflection point. This will help us define an
% appropriate range for "r".


% Initially I attempted to do this using the indexes, which was not correct
% ...do not use! Keeping in here as a lesson.
%slopeAtInflection(1,1) = (oneAhead(1)-oneBehind(1))/((indexOfMaxValue(1) + 1) -(indexOfMaxValue(1) -1));


% slopeAtInflection is the point slope calculated by the change in Y (bio)
% over the change in X (years). When the inflection is at the end of the
% interval, it's indexOfMaxValue is 6. In these cases, the point slope is
% calculated between position 6 and position 5, rather than over two
% intervals (5 to 3, for example).


slopeAtInflection(1,1) = (oneAhead(1)-oneBehind(1))/(T((indexOfMaxValue(1) + 1)) - T((indexOfMaxValue(1) -1)));
for j = 2:length(dataT)
    if indexOfMaxValue(j) ~=6
        slopeAtInflection(j) = (oneAhead(j)-oneBehind(j))/ (T((indexOfMaxValue(j)+1))-(T(indexOfMaxValue(j)-1)));
    else
        slopeAtInflection(j) =(oneAhead(j)-oneBehind(j))/ (T((indexOfMaxValue(j)))-(T(indexOfMaxValue(j)-1)));
    end
end


% K is the absolute largest biomass on our watershed
% this servs as the asymptote for the initial guess
% because of the method of calculation we will be using, the purpose of K
% is only to determine a range for "r" and it does not affect the fitting


K = max(max(dataT));


% alternate values of K can be used to check the effects of K on the output
%K = 200;
%K = 400;
%K = 600;


% using the power of differential equations we solved the ANPP function at
% the inflection point to find that we can approximate r = 4*slopeAtInflectionPoint/K
% this is only to determine a safe range for r.


r = zeros(length(slopeAtInflection),1);
r = (4.*slopeAtInflection)/K;


% now we will conduct a safe fitting procedure for r. It is generally
% advised that we look at the "r" values generated by the previous code,
% find their maxes and mins, and select a range encompassing these. In this
% case we will say that "r" is not less than 0.01 nor greater than 0.90, and
% we will graph in 50 equal increments. Adding more increments slows the
% processing.


% we will say that the inflection point must be between 0 and 28. We could
% push it out further if we wished to change the output (see the second t0)
% which is worth testing.


% linspace is a command that defines linearly spaced vectors. In a sense
% this means it creates an algerbraic grid that can be bent. If you want to
% see the linspace, uncomment figure 1 below.


% I have added extra subroutines for calculating special linspaces for
% larger and smaller parameterizations. i is for 10 as a maximum
% inflection occurance. nothing is for 50. ii is for 75 as a maximum
% inflection occurance. and iii is for 100 as a maximum 


linR=linspace(0.01,1,100);
t0=linspace(0,75,50);
%t0=linspace(0,20,50);
[linR,t0]=meshgrid(linR,t0);


%-------------------------------------------------------------
% the fitting part. I still haven't figured out how to loop through the
%
% For the first plot, this is how you will fit the parameters


% if I ever get the loops right these matrices will hold all the outputs.
%rD1mat=zeros(length(dataT),1);
%rD2mat=zeros(length(dataT),1);
%Krepmat=zeros(length(dataT),1);


%for k=1:length(dataT)


% [m,n] are the dimensions of the the errorspace, which is the same size as 
% the linear space (linR) we just defined. e is what we will call the error
% space


% MYERROR NOTES: 
% you will need to define a function "myerror" or whatever you want to call
% it outside of this program. The following code should be used:
% (make sure to comment it back it with one comment, highlight and press
% control + e.
%-----------------------MY ERROR-------------------------------------
% %function [ e] = myerror( x,t,m )
% %%UNTITLED3 Summary of this function goes here
% %   Detailed explanation goes here
% %r=x(1);
% t0=x(2);
% h=1./(1+exp(-r*(t-t0)));
% e=m'*m-(h'*m)^2/(h'*h);

% end
%---------------------------------------------------------------------


[m,n] = size(linR);
e = zeros(size(linR));
for i=1:m
    for j=1:n
        e(i,j) = myerror([linR(i,j); t0(i,j)], T, dataT(:,3));
    end
end


% % images of the first plot-- this one shows you the 3d errorspace. it
% eats memory. be careful!


% figure(1)
% mesh(linR, t0, e);
% xlabel('r')
% ylabel('inflection point')
% zlabel('e')
% title('plotting error versus parameters for the first plot');


% this one shows a contour plot where the error is minimized. it is less
% hungry. it is good to check these to be sure you don't have multiple
% minima.


figure(2)
contour(linR, t0, e,50)
xlabel('r')
ylabel('inflection');
title('contour map of error surface for plot 103');


% min finds the minimum error of that plot using the matlab function fminsearch
% the first output is the value of "r" and the second output is the inflection
% point (t0) when it occurs.
min = fminsearch(@myerror, [0.15, 50],[],T,dataT(:,3));


% pull out the values for the "r" and "inflection point" from your
% minimized error function
rD1 = min(1);
%rD1mat(k) = rD1;
rD2 = min(2);
%rD2mat(k) = rD2;


% H is from the linear proof which makes K a function of r and t0.
% Now solve for maximum biomass,which is Krep.


H = 1./(1+exp(-rD1*(T-rD2)));
Krep=(H'*dataT(:,3))/(H'*H);
%Krepmat(k) = Krep;


% figure 3 is your best fit plot. This is the one you want to use for
% output


% figure(3)
% t = linspace(0,100);
% y = Krep./(1+exp(-rD1*(t-rD2)));
% plot(T, dataT(:,2), 'o',t, y);
% xlabel('years after 1980')
% ylabel('biomass in Mg/Ha')
% title ('MLE of logistic fit based on minimized error surface')
% %end




% % for the fourth plot this is what you do
% [m4,n4] = size(linR);
% e = zeros(size(linR));
% for i=1:m4
%     for j=1:n4
%         e(i,j) = myerror([linR(i,j); t0(i,j)], T, dataT(:,4));
%     end
% end
%
% % find the minimum error of that plot, the first output is the value of "r"
% % and the second output is the inflection point when it occurs.
% min4 = fminsearch(@myerror, [0.1, 4],[],T,dataT(:,4));
%
% % pull out the values for the "r" and "inflection point" from your
% % minimized error function
% rD1 = min4(1);
% rD2 = min4(2);
%
% % now solve for maximum biomass,which is Krep.
%
% H = 1./(1+exp(-rD1*(T-rD2)));
% Krep4=(H'*dataT(:,1))/(H'*H);

Friday, August 12, 2011

not finished code


% site_index.m

% this program will calculate maximum productivity for ws1 plots

% we base this on the idea that biomass follows a logistic function. the
% logistic function is defined as B(t) = K/(1+Ce^(-r*t)). We would like to
% fit a logistic function to our biomass curves and then solve for where
% the derivative of biomass (productivity) is maximized (or where the
% second derivative of biomass is 0). To do this we must fit a logistic
% function to the biomass data.

% import biomass data

file = importdata('biomat.csv',',');
data = file;

% years after the first measurement
T =[0;4;8;12;16;22;28];

%transpose data

dataT = data';
prod_rate = zeros(6, length(dataT));

%create a vector "years" that holds the interval length between periods
years = [4 ; 4 ; 4 ; 4 ; 6 ; 6];

%calculate the difference in biomass-- later just import the ANPP matrix
%but we're doing this for now... no MORTALITY included at the moment!

% sample below shows how to do it for just one plot
prod_rate(1,1) = (dataT(2,1)- dataT(1,1))/years(1);
prod_rate(2,1) = (dataT(3,1)- dataT(2,1))/years(2);
prod_rate(3,1) = (dataT(4,1)- dataT(3,1))/years(3);
prod_rate(4,1) = (dataT(5,1)- dataT(4,1))/years(4);
prod_rate(5,1) = (dataT(6,1)- dataT(5,1))/years(5);
prod_rate(6,1) = (dataT(7,1)- dataT(6,1))/years(6);

% loop through all plots (lengthdataT) and for each interval (i) and store
% the results in the array "prod_rate"
for j = 2:length(dataT)
    for i=1:6
        prod_rate(i,j) = (dataT(i+1,j)-dataT(i,j))/years(i);
    end
end

%create a vector telling us what the maximum productivity is:
inflection_value = zeros(length(prod_rate),1);

inflection_value(1,1) = max(prod_rate(:,1));
for j = 2:length(dataT)
    inflection_value(j,1) = max(prod_rate(:,j));
end

%find that maximum productivity and WHEN IT OCCURS by interval time
% these are our initial guesses for the inflection point
indexOfMaxValue = zeros(length(prod_rate),1);

indexOfMaxValue(1,1) = find(prod_rate(:,1) == max(prod_rate(:,1)));
for j = 2:length(prod_rate)
    indexOfMaxValue(j,1) = find(prod_rate(:,j)== max(prod_rate(:,j)));
end

% to find the slope at the inflection point, which will generate the best
% guess value for "r" we need to find

slopeAtInflection = zeros(length(prod_rate),1);

% find the value one position ahead of the max. If the max occurs at the
% end of the interval, then save that value as the "one ahead" value
oneAhead = zeros(length(dataT),1);

oneAhead(1) = dataT((find(prod_rate(:,1) == max(prod_rate(:,1)))+1),1);
for j = 2:length(dataT)
    if indexOfMaxValue(j)~=6
    oneAhead(j) = dataT((find(prod_rate(:,j) == max(prod_rate(:,j)))+1),j);
    else
        oneAhead(j) = dataT((find(prod_rate(:,j) == max(prod_rate(:,j)))),j);
    end
end

% find the value one position behind the max. If the max occurs at the end
% of the interval it's cool.
oneBehind = zeros(length(dataT),1);
oneBehind(1) = prod_rate((find(prod_rate(:,1) == max(prod_rate(:,1)))-1),1);
for j = 2:length(dataT)
    %if indexOfMaxValue(j)~=6
    oneBehind(j) = dataT((find(prod_rate(:,j) == max(prod_rate(:,j)))-1),j);
   % else
      %  oneBehind(j) = prod_rate((find(prod_rate(:,j) == max(prod_rate(:,j)))),j);
   % end
end

% find the slope at the inflection point. This will help us find R.
slopeAtInflection(1,1) = (oneAhead(1)-oneBehind(1))/((indexOfMaxValue(1) + 1) -(indexOfMaxValue(1) -1));

for j = 2:length(dataT)
    if indexOfMaxValue(j) ~=6
        slopeAtInflection(j) = (oneAhead(j)-oneBehind(j))/ ((indexOfMaxValue(j)+1)-(indexOfMaxValue(j)-1));
    else
        slopeAtInflection(j) =(oneAhead(j)-oneBehind(j))/ ((indexOfMaxValue(j))-(indexOfMaxValue(j)-1));
    end
end

% absolute largest biomass on our watershed serves as the resounding
% asymptote for the initial guess

K = max(max(dataT));

% using the power of differential equations we solve to find that r =
% 4*slopeAtInflectionPoint/K

r = zeros(length(slopeAtInflection),1);
r = (4.*slopeAtInflection)/K;

% we will say that "r" is not less than 0.01 nor greater than 0.90, and we
% will graph in 50 equal increments.
% we will say that the inflection point must be between (logically) 1 and 6

linR=linspace(0.01,0.9,50);
t0=linspace(0,28,50);
[linR,t0]=meshgrid(linR,t0);

% for the first plot this is what you do
[m,n] = size(linR);
e = zeros(size(linR));
for i=1:m
    for j=1:n
        e(i,j) = myerror([linR(i,j); t0(i,j)], T, dataT(:,1));
    end
end

% % images of the first plot
% figure(1)
% mesh(linR, t0, e);
% xlabel('r')
% ylabel('inflection point')
% zlabel('e')
% title('plotting error versus parameters for the first plot');
%
% figure(2)
% contour(linR, t0, e,50)
% xlabel('r')
% ylabel('inflection');
% title('contour map');

% find the minimum error of that plot, the first output is the value of "r"
% and the second output is the inflection point when it occurs.
min = fminsearch(@myerror, [0.1, 4],[],T,dataT(:,1));

% pull out the values for the "r" and "inflection point" from your
% minimized error function
rD1 = min(1);
rD2 = min(2);

% now solve for maximum biomass,which is Krep.

H = 1./(1+exp(-rD1*(T-rD2)));
Krep=(H'*dataT(:,1))/(H'*H);

% for the fourth plot this is what you do
[m4,n4] = size(linR);
e = zeros(size(linR));
for i=1:m4
    for j=1:n4
        e(i,j) = myerror([linR(i,j); t0(i,j)], T, dataT(:,4));
    end
end

% find the minimum error of that plot, the first output is the value of "r"
% and the second output is the inflection point when it occurs.
min4 = fminsearch(@myerror, [0.1, 4],[],T,dataT(:,4));

% pull out the values for the "r" and "inflection point" from your
% minimized error function
rD1 = min4(1);
rD2 = min4(2);

% now solve for maximum biomass,which is Krep.

H = 1./(1+exp(-rD1*(T-rD2)));
Krep4=(H'*dataT(:,1))/(H'*H);