- 11th Nov 2022
- 06:03 am
Function Regression
%Table 5
M = readtable( 'Data_NaturalCatalysts_priceCatalog.csv' )
x=M.MichaelisConstant_uM_
y=M.Price_USD____lb_
plot(x, y, '*','displayname','Scatterplot')
title('scatterplot')
xlabel('estimated')
ylabel('measured')
% Fit linear regression line with OLS.
b = [ones(size(x,1),1) x]\y;
% Use estimated slope and intercept to create regression line.
RegressionLine = [ones(size(x,1),1) x]*b;
%Include the equation and scatter plot it there.
hold on,
plot(x,RegressionLine,'displayname',sprintf('Regression line (y = %0.2f*x + %0.2f)',b(2),b(1)))
legend('location','nw')
% RMSE between regression line and y
RMSE = sqrt(mean((y-RegressionLine).^2));
% R2 between regression line and y
SS_X = sum((RegressionLine-mean(RegressionLine)).^2);
SS_Y = sum((y-mean(y)).^2);
SS_XY = sum((RegressionLine-mean(RegressionLine)).*(y-mean(y)));
R_squared = SS_XY/sqrt(SS_X*SS_Y);
fprintf('RMSE: %0.2f | R2: %0.2f\n',RMSE,R_squared)
yresid = y - RegressionLine;
SSresid = sum(yresid.^2);
SStotal = (length(y)-1) * var(y);
%Linearize data
p = polyfit(x,y,1)
y2 = polyval(p,x);
figure
plot(x,y2)
title('Regression line (y = -1.86x + 654.8)')
RMSE = sqrt(mean((y-y2).^2));
R_squared = 1 - sum((y - y2).^2)/sum((y - mean(y)).^2)
yresid = y - y2;
SSresid = sum(yresid.^2);
SStotal = (length(y)-1) * var(y);
% Table 6: Price prediction
%NextGen-A
N = readtable( 'Data_nextGen_KEtesting_allresults.csv' )
y2 = polyval(p,str2double(N.EnzymeNextGen_A));
figure
plot(str2double(N.EnzymeNextGen_A),y2)
title('Enzyme A')
y2 = polyval(p,str2double(N.Var2));
figure
plot(str2double(N.Var2),y2)
title('Enzyme B')
y2 = polyval(p,str2double(N.Var3));
figure
plot(str2double(N.Var3),y2)
title('Enzyme C')
y2 = polyval(p,str2double(N.Var4));
figure
plot(str2double(N.Var4),y2)
title('Enzyme D')
y2 = polyval(p,str2double(N.Var5));
figure
plot(str2double(N.Var5),y2)
title('Enzyme E')
end