%pcabio.m
% to compute and graph PCA for stand biometrics, as well as measure distances between points!
% this file has the 10 stand metrics currently measured. The first row is
% numeric. The first column is plot names.
%------------------------------------------------------------------------
% names of files to load-- comment in what you want
%load PCAEX80.dat;
load PCABIO1984.dat
%load PCABIO2007.dat;
format short g
% creation of the file headers and column numbers. Different columns in
% early file due to no ANPP
%For 2007
% fid = fopen('PCABIO2007.dat');
% C = textscan(fid,'%s %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32');
% fclose(fid);
%for 1984
fid = fopen('PCABIO1984.dat');
C = textscan(fid,'%s %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32');
fclose(fid);
%For files that are 80
% fid = fopen('PCAEX80.dat');
% C = textscan(fid,'%s %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32 %f32');
% fclose(fid);
% these are the headings that correspond to the stand metrics, in order,
%2007
%headers = {'HARDWOOD' ,'BIO' ,'PERHARD','ANPP', 'BASAL', 'NUMT', 'STEMD', 'SHANNON', 'ASSAYNO','ASSAYBIO','NUMHW','PERNUMHW','STEMDHW','PERSTEMDHW'};
%1984
headers = {'HARDWOOD' ,'BIO' ,'PERHARD','ANPP', 'BASAL', 'NUMT', 'STEMD', 'ASSAYNO','ASSAYBIO','NUMHW','PERNUMHW','STEMDHW','PERSTEMDHW','SHANNON'};
% 1980
%headers = {'HARDWOOD' ,'BIO' ,'PERHARD','BASAL', 'NUMT', 'STEMD', 'ASSAYNO','ASSAYBIO','SHANNON','NUMHW','PERHWNUM','STEMDHW','PERSTEMDHW'};
% create a matrix that does not include the first row of PCAEX07 because it
% is text for the plot numbers (see above string in %s)
%X = PCAEX80(:,2:14);
X = PCABIO1984(:,2:15);
%X = PCABIO2007(:,2:15);
%------------------------------------------CONDUCTING PCA!!!------------
% if there are no NaN's!
[PC, scores, latent] = princomp(zscore(X));
% if NaN's are included, you must centralize and standardize each row
% individually, so comment in this code!
% % subtract out the mean
% for i = 1:length(X)
% thisRow = X(i,:);
% thisRow(isnan(thisRow)) = [];
% stdX(i) = std(thisRow);
% meanX(i) = mean(thisRow);
% centerX = X - repmat(meanX,10,1);
% centerStandardX = centerX./repmat(stdX,10,1);
% end
% % conduct principle components analysis
% [PC, scores, latent] = princomp(centerStandardX);
%----------------DISPLAYING THE CUMULATIVE OUTPUT OF THE VARIANCE!---
account = cumsum(latent)./sum(latent);
disp(account);
% name eigenvectors to the vectors that you are about.
vect1 = scores(:,1);
vect2 = scores(:,2);
% plot the principle components (FACTOR ANALYSIS PLOT) with the plot
% numbers-- must make sure these are STRINGS and not FLOATS/INTS
figure(1)
plot(vect1,vect2,'b.')
xlabel('1st Principal Component')
ylabel('2nd Principal Component')
title('Principal Components of Stand Biometerics')
text(vect1+0.1, vect2, C{1}, 'FontSize',8);
% plot a scree plot of cumulative variance explained
figure(2)
pareto(latent)
xlabel('Principal Component')
ylabel('Variance Explained (%)')
title('Scree plot of variance explained and CDF')
% plot a biplot for the first two axes... can be done for 3 as well, if
% needed, but is hard to interpret in my opinion. if disp(account)'s second
% entry is bigger than about 0.55, I don't do it.
figure(3)
biplot(PC(:,1:2), 'scores',scores(:,1:2),'varlabels',headers')
% for finding the distance between the sets of points and printing them out
% by labels! Will be used later in geostats
b = zeros(length(vect1),2);
for i = 1: length(vect1)
b(i,1) = vect1(i);
b(i,2) = vect2(i);
end
k = 0;
for i = 1:length(b)
for j = 1:length(b)
if j~=i
k=k+1;
index(k,:) = [i j];
end;
end;
end;
dist = ((b(index(:,2),1)-b(index(:,1),1)).^2 + (b(index(:,2),2)-b(index(:,2),2)).^2).^0.5;
% if you want to print it to the scree-- WARNING IT WILL OVERLOAD YOUR
% MEMORY IF YOU ARE RUNNING MUCH ELSE!
% fprintf(' #1 #2 distance\n');
% fprintf('%3d %3d %10.6f\n', ...
% [index(:,1),index(:,2),dist].');
% fprintf('Total %11.6f\n',sum(dist));
m = [index(:,1),index(:,2),dist];
% output the distance matrix information to a file that saves in the Matlab
% directory. I think this format will be adaptable well to SPSS, but it
% would be better for Matlab to have in matrix form!
%dlmwrite('biometric_distances_80.csv',m,',');
dlmwrite('biometric_distances_804.csv',m,',');
%dlmwrite('biometric_distances_07.csv',m,',');
No comments:
Post a Comment