From 74227459bd0613927053cd73d829e96d72871b39 Mon Sep 17 00:00:00 2001 From: Maxim Slipenko Date: Thu, 29 Feb 2024 14:45:48 +0300 Subject: [PATCH 1/3] wip --- statapp/calculations.py | 94 ++++++++--------------- statapp/models/regression_result_model.py | 2 +- 2 files changed, 35 insertions(+), 61 deletions(-) diff --git a/statapp/calculations.py b/statapp/calculations.py index a0f9064..d26f2c7 100644 --- a/statapp/calculations.py +++ b/statapp/calculations.py @@ -81,93 +81,60 @@ class RegressionResult: monomials: list -def linearPolynom(data): +def _prepareDataAndFeatures(data, degree): y = data[:, 0] x = data[:, 1:] - - polyFeatures = PolynomialFeatures(degree=1, include_bias=False) + polyFeatures = PolynomialFeatures(degree=degree, include_bias=False) xPoly = polyFeatures.fit_transform(x) + return y, x, xPoly, polyFeatures + +def _trainModelAndPredict(y, xPoly): model = LinearRegression(fit_intercept=True) model.fit(xPoly, y) + predictions = model.predict(xPoly) + return model, predictions - params = np.hstack([model.intercept_, model.coef_]) - predictions = model.predict(xPoly) - residuals = y - predictions +def _calculateStatistics(y, x, xPoly, predictions, model, polyFeatures): mse = mean_squared_error(y, predictions) - rSquared = model.score(xPoly, y) - n = xPoly.shape[0] k = xPoly.shape[1] + 1 - fStatistic = (rSquared / (k - 1)) / ((1 - rSquared) / (n - k)) - - xWithIntercept = np.hstack([np.ones((n, 1)), xPoly]) - varB = mse * np.linalg.inv(xWithIntercept.T @ xWithIntercept).diagonal() - seB = np.sqrt(varB) - - tStats = params / seB - - monomials = ['c'] + ['x' + str(i) for i in range(1, x.shape[1] + 1)] - - residualVariance = np.var(residuals, ddof=k) - scaledResidualVariance = residualVariance / (n - k) - - paramsAndTStats = np.vstack((params, tStats)).T - - return RegressionResult( - paramsAndTStats, - residualVariance, - scaledResidualVariance, - rSquared, - fStatistic, - monomials - ) - - -def squaredPolynom(data): - y = data[:, 0] - x = data[:, 1:] - - polyFeatures = PolynomialFeatures(degree=2, include_bias=False) - xPoly = polyFeatures.fit_transform(x) - - model = LinearRegression(fit_intercept=True) - model.fit(xPoly, y) - params = np.hstack([model.intercept_, model.coef_]) - - predictions = model.predict(xPoly) residuals = y - predictions - mse = mean_squared_error(y, predictions) - - rSquared = model.score(xPoly, y) - - n = xPoly.shape[0] - k = xPoly.shape[1] + 1 - - fStatistic = (rSquared / (k - 1)) / ((1 - rSquared) / (n - k)) - xWithIntercept = np.hstack([np.ones((n, 1)), xPoly]) varB = mse * np.linalg.pinv(xWithIntercept.T @ xWithIntercept).diagonal() seB = np.sqrt(np.maximum(varB, 0)) - tStats = params / seB - + residualVariance = np.var(residuals, ddof=k) + scaledResidualVariance = residualVariance / (n - k) monomials = ['c'] + list( polyFeatures.get_feature_names_out(['x' + str(i) for i in range(1, x.shape[1] + 1)]) ) monomials = [monomial.replace(' ', '*') for monomial in monomials] + return params, tStats, residualVariance, scaledResidualVariance, rSquared, fStatistic, monomials - residualVariance = np.var(residuals, ddof=k) - scaledResidualVariance = residualVariance / (n - k) - paramsAndTStats = np.vstack((params, tStats)).T +def _regressionAnalysis(data, degree): + y, x, xPoly, polyFeatures = _prepareDataAndFeatures( + data, degree + ) + model, predictions = _trainModelAndPredict(y, xPoly) + (params, tStats, residualVariance, + scaledResidualVariance, rSquared, fStatistic, monomials) = ( + _calculateStatistics( + y, + x, + xPoly, + predictions, + model, + polyFeatures + )) return RegressionResult( - paramsAndTStats, + np.vstack((params, tStats)).T, residualVariance, scaledResidualVariance, rSquared, @@ -175,6 +142,13 @@ def squaredPolynom(data): monomials ) +def linearPolynom(data): + return _regressionAnalysis(data, 1) + + +def squaredPolynom(data): + return _regressionAnalysis(data, 2) + def prediction(inputData, result: RegressionResult): inputs = inputData[:, 1:] diff --git a/statapp/models/regression_result_model.py b/statapp/models/regression_result_model.py index 059e6ea..17af473 100644 --- a/statapp/models/regression_result_model.py +++ b/statapp/models/regression_result_model.py @@ -28,7 +28,7 @@ def __init__(self, result: RegressionResult): self._monomials = result.monomials def getHorizontalHeader(self): - return ['Коэффициент регрессии', 'Коэффициент значимости'] + return ['Коэффициент регрессии', 'Коэффициент значимости', 'Весовые коэффициенты'] def getVerticalHeader(self): return self._monomials From 4e09bc28f50c908e835fd6241a20b7bb7af50621 Mon Sep 17 00:00:00 2001 From: Maxim Slipenko Date: Thu, 29 Feb 2024 18:55:16 +0300 Subject: [PATCH 2/3] wip --- statapp/calculations.py | 24 ++++++++++++++++++-- statapp/polynoms/transform_polynom_window.py | 2 +- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/statapp/calculations.py b/statapp/calculations.py index d26f2c7..8c1c759 100644 --- a/statapp/calculations.py +++ b/statapp/calculations.py @@ -25,6 +25,7 @@ from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error +from sklearn.metrics import r2_score DIRECT_LINK = 0 @@ -97,26 +98,45 @@ def _trainModelAndPredict(y, xPoly): def _calculateStatistics(y, x, xPoly, predictions, model, polyFeatures): + # Рассчитываем Среднеквадратическую ошибку (MSE) между фактическими и прогнозируемыми значениями mse = mean_squared_error(y, predictions) - rSquared = model.score(xPoly, y) + # Рассчитываем коэффициент детерминации R^2, который + # показывает долю вариации зависимой переменной, объясненную моделью + rSquared = r2_score(y, predictions) + # Определяем количество наблюдений n = xPoly.shape[0] + # Определяем количество предикторов (признаков) плюс один для свободного члена k = xPoly.shape[1] + 1 + # Рассчитываем F-статистику для оценки значимости всей регрессионной модели fStatistic = (rSquared / (k - 1)) / ((1 - rSquared) / (n - k)) + # Собираем параметры модели, включая свободный член и коэффициенты перед переменными params = np.hstack([model.intercept_, model.coef_]) + # Вычисляем остатки модели как разницу между фактическими и прогнозируемыми значениями residuals = y - predictions + # Добавляем столбец единиц к матрице признаков для учета свободного члена в регрессионной модели xWithIntercept = np.hstack([np.ones((n, 1)), xPoly]) + # Рассчитываем дисперсии коэффициентов модели varB = mse * np.linalg.pinv(xWithIntercept.T @ xWithIntercept).diagonal() + # Вычисляем стандартные ошибки коэффициентов, берем корень из дисперсий seB = np.sqrt(np.maximum(varB, 0)) + # Рассчитываем t-статистики для каждого коэффициента tStats = params / seB + # Рассчитываем дисперсию остатков с поправкой на количество параметров residualVariance = np.var(residuals, ddof=k) - scaledResidualVariance = residualVariance / (n - k) + # Рассчитываем скорректированную дисперсию остатков + scaledResidualVariance = 1 - rSquared + # Генерируем список мономов (названий признаков после + # полиномиализации), добавляя константу для свободного члена monomials = ['c'] + list( polyFeatures.get_feature_names_out(['x' + str(i) for i in range(1, x.shape[1] + 1)]) ) + # Заменяем пробелы на звездочки для представления умножения в названиях мономов monomials = [monomial.replace(' ', '*') for monomial in monomials] + # Возвращаем рассчитанные статистики и названия мономов return params, tStats, residualVariance, scaledResidualVariance, rSquared, fStatistic, monomials + def _regressionAnalysis(data, degree): y, x, xPoly, polyFeatures = _prepareDataAndFeatures( data, degree diff --git a/statapp/polynoms/transform_polynom_window.py b/statapp/polynoms/transform_polynom_window.py index 9d98d9e..fb10fd3 100644 --- a/statapp/polynoms/transform_polynom_window.py +++ b/statapp/polynoms/transform_polynom_window.py @@ -116,4 +116,4 @@ def rebuildData(self, data): self.ui.residualVarianceValueLabel.setText(str(result.residualVariance)) self.ui.scaledResidualVarianceValueLabel.setText(str(result.scaledResidualVariance)) self.ui.fStatisticValueLabel.setText(str(result.fStatistic)) - self.ui.rSquaredValueLabel.setText(str(result.scaledResidualVariance)) + self.ui.rSquaredValueLabel.setText(str(result.rSquared)) From 69d8a0ff73e89970848d3c1cc99bdf486cae33c3 Mon Sep 17 00:00:00 2001 From: Maxim Slipenko Date: Thu, 29 Feb 2024 19:03:14 +0300 Subject: [PATCH 3/3] wip --- statapp/models/regression_result_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/statapp/models/regression_result_model.py b/statapp/models/regression_result_model.py index 17af473..059e6ea 100644 --- a/statapp/models/regression_result_model.py +++ b/statapp/models/regression_result_model.py @@ -28,7 +28,7 @@ def __init__(self, result: RegressionResult): self._monomials = result.monomials def getHorizontalHeader(self): - return ['Коэффициент регрессии', 'Коэффициент значимости', 'Весовые коэффициенты'] + return ['Коэффициент регрессии', 'Коэффициент значимости'] def getVerticalHeader(self): return self._monomials