% Laboratory in Oceanography - Computing Basic Statistics % SMAST, UMass Dartmouth % % Examples of basic stats computations and plots % % Written by Miles A. Sundermeyer, 2/17/09 % set a breakpoint - use F10 to step line by line in debug mode keyboard %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% load some data to perform analysis on %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Import the file S = importdata('ONichols_2008.csv',',',1); header = S.textdata(1,:) [year month day] = datevec(S.textdata(2:end,1)); % notice there is no time of day record, but that there are % 96 data points per day, i.e.,15 minute data. Create decimal day array. decday = [0:95]'*ones(1,7); decday(1:(96-69)) = []; decday = day + decday'*15/24/60; S.data(1:7,:) = []; % clear out known bad data points decday(1:7) = []; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% figure(1) clf for n=1:11 subplot(6,2,n) plot(decday, S.data(:,n)); title(header{n+1}) end print -djpeg basic_stats_1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % parse data into individual variables temp = S.data(:,1); DOconc = S.data(:,3); depth = S.data(:,5); cond = S.data(:,7); salin = S.data(:,10); TDS = S.data(:,11); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Now do some stats on salinity figure(2) clf %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% subplot(2,1,1) plot(decday, DOconc,'b.-') %plot(decday, salin,'b.-') xlabel('day') ylabel('DO conc (\mu g/l)') %ylabel('salinity') set(gca,'ylim',[9 11.5]); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% subplot(2,2,3) bins = [8:0.1:11.5]; hist(DOconc,bins) %hist(salin,20) xlabel('DO conc (\mu g/l)') ylabel('# Occurances') set(gca,'xlim',[9 11.5]); set(gca,'ylim',[0 125]); print -djpeg basic_stats_2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % compute min, max [DOconc_min, DOconc_minind] = min(DOconc) [DOconc_max, DOconc_maxind] = max(DOconc) DOconc_range = range(DOconc) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% figure(2) subplot(2,1,1) hold on plot(decday(DOconc_minind),DOconc_min,'m^','markerfacecolor','m') plot(decday(DOconc_maxind),DOconc_max,'rv','markerfacecolor','r') subplot(2,2,3) hold on plot(DOconc_min,0,'m^','markerfacecolor','m') plot(DOconc_max,0,'rv','markerfacecolor','r') print -djpeg basic_stats_3 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % compute mean, median, mode DOconc_mean = mean(DOconc) DOconc_mode = mode(DOconc) DOconc_median = median(DOconc) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% figure(2) subplot(2,1,1) plot([min(day) max(day)+1], DOconc_mean*[1 1], 'k-', 'linewidth', 2) plot([min(day) max(day)+1], DOconc_mode*[1 1], 'c-') plot([min(day) max(day)+1], DOconc_median*[1 1], 'g-') subplot(2,2,3) plot(DOconc_mean*[1 1], [0 125], 'k-', 'linewidth', 2) plot(DOconc_mode*[1 1], [0 125], 'c-', 'linewidth', 2) plot(DOconc_median*[1 1], [0 125], 'g-', 'linewidth', 2) print -djpeg basic_stats_4 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % now do variance and std deviation DOconc_var = var(DOconc) DOconc_std = std(DOconc) DOconc_std_too = sqrt(DOconc_var) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% figure(2) subplot(2,1,1) plot([min(day) max(day)+1]', ... DOconc_mean*ones(2)+DOconc_std*[-1 -1; 1 1]', 'k--','linewidth',2) subplot(2,2,3) plot(DOconc_mean*ones(2)+DOconc_std*[-1 1; -1 1], ... [0 125], 'w', 'linewidth', 2) plot(DOconc_mean*ones(2)+DOconc_std*[-1 1; -1 1], ... [0 125], 'k--', 'linewidth', 2) print -djpeg basic_stats_5 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % compute percentiles Y=prctile(DOconc,[0 25 50 75 100]) figure(2) subplot(2,1,1) plot([min(day) max(day)+1]', ... [Y(2)*[1 1]; Y(4)*[1 1]]', 'w','linewidth',2) plot([min(day) max(day)+1]', ... [Y(2)*[1 1]; Y(4)*[1 1]]', 'k:','linewidth',2) subplot(2,2,3) plot([Y(2)*[1; 1] Y(4)*[1; 1]], ... [0 125], 'w', 'linewidth', 2) plot([Y(2)*[1; 1] Y(4)*[1; 1]], ... [0 125], 'k:', 'linewidth', 2) print -djpeg basic_stats_6 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % plot cumulative distribution function subplot(2,2,4) cdfplot(DOconc) xlabel('DO conc (\mu g/l)') set(gca,'xlim',[9 11]) print -djpeg basic_stats_7 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % tests for normality: H=0 indicates the null hypothsis "the data are % normally distributed" canot be rejected at 5% significance level DOconc_kstest = kstest((DOconc-DOconc_mean)/DOconc_std,[]) % Kolmogorov-Smirnov test DOconc_lillie = lillietest(DOconc) % Lilliefors test for normality DOconc_jb = jbtest(DOconc) % Jarque-Bera test for normality %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Create normal distribution with same number of points for comparison randdata = randn(size(DOconc)); % make the std dev and mean the same as well randdata = randdata*DOconc_std + DOconc_mean; figure(2) hold on subplot(2,1,1) plot(decday,randdata,'r-') subplot(2,2,3) % pass X, the bin centers of prev hist hold on hist(randdata,bins,'r'); h = findobj(gca,'Type','patch'); % change this one to red set(h(1),'FaceColor','none','EdgeColor','r','linewidth',2) subplot(2,2,4) hold on h = cdfplot(randdata); set(h,'color','r') print -djpeg basic_stats_8 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % create a Gaussian PDF curve with same stats as above t = [9:0.01:11.5]; x = 1/(DOconc_std*sqrt(2*pi)) * exp(-(t-DOconc_mean).^2/(2*DOconc_var)); figure(3) subplot(2,2,1) plot(t,x,'r-') set(gca,'xlim',[9 11.5]); title('Gaussian') print -djpeg basic_stats_9