Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: исправлена остаточная дисперсия (масштабированная) #124

Merged
merged 4 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 55 additions & 61 deletions statapp/calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


DIRECT_LINK = 0
Expand Down Expand Up @@ -81,100 +82,93 @@ class RegressionResult:
monomials: list


def linearPolynom(data):
def _prepareDataAndFeatures(data, degree):
y = data[:, 0]
x = data[:, 1:]

polyFeatures = PolynomialFeatures(degree=1, include_bias=False)
polyFeatures = PolynomialFeatures(degree=degree, include_bias=False)
xPoly = polyFeatures.fit_transform(x)
return y, x, xPoly, polyFeatures


def _trainModelAndPredict(y, xPoly):
model = LinearRegression(fit_intercept=True)
model.fit(xPoly, y)

params = np.hstack([model.intercept_, model.coef_])

predictions = model.predict(xPoly)
residuals = y - predictions
mse = mean_squared_error(y, predictions)
return model, predictions

rSquared = model.score(xPoly, y)

def _calculateStatistics(y, x, xPoly, predictions, model, polyFeatures):
# Рассчитываем Среднеквадратическую ошибку (MSE) между фактическими и прогнозируемыми значениями
mse = mean_squared_error(y, predictions)
# Рассчитываем коэффициент детерминации R^2, который
# показывает долю вариации зависимой переменной, объясненную моделью
rSquared = r2_score(y, predictions)
# Определяем количество наблюдений
n = xPoly.shape[0]
# Определяем количество предикторов (признаков) плюс один для свободного члена
k = xPoly.shape[1] + 1

# Рассчитываем F-статистику для оценки значимости всей регрессионной модели
fStatistic = (rSquared / (k - 1)) / ((1 - rSquared) / (n - k))

xWithIntercept = np.hstack([np.ones((n, 1)), xPoly])
varB = mse * np.linalg.inv(xWithIntercept.T @ xWithIntercept).diagonal()
seB = np.sqrt(varB)

tStats = params / seB

monomials = ['c'] + ['x' + str(i) for i in range(1, x.shape[1] + 1)]

residualVariance = np.var(residuals, ddof=k)
scaledResidualVariance = residualVariance / (n - k)

paramsAndTStats = np.vstack((params, tStats)).T

return RegressionResult(
paramsAndTStats,
residualVariance,
scaledResidualVariance,
rSquared,
fStatistic,
monomials
)


def squaredPolynom(data):
y = data[:, 0]
x = data[:, 1:]

polyFeatures = PolynomialFeatures(degree=2, include_bias=False)
xPoly = polyFeatures.fit_transform(x)

model = LinearRegression(fit_intercept=True)
model.fit(xPoly, y)

# Собираем параметры модели, включая свободный член и коэффициенты перед переменными
params = np.hstack([model.intercept_, model.coef_])

predictions = model.predict(xPoly)
# Вычисляем остатки модели как разницу между фактическими и прогнозируемыми значениями
residuals = y - predictions
mse = mean_squared_error(y, predictions)

rSquared = model.score(xPoly, y)

n = xPoly.shape[0]
k = xPoly.shape[1] + 1

fStatistic = (rSquared / (k - 1)) / ((1 - rSquared) / (n - k))

# Добавляем столбец единиц к матрице признаков для учета свободного члена в регрессионной модели
xWithIntercept = np.hstack([np.ones((n, 1)), xPoly])
# Рассчитываем дисперсии коэффициентов модели
varB = mse * np.linalg.pinv(xWithIntercept.T @ xWithIntercept).diagonal()
# Вычисляем стандартные ошибки коэффициентов, берем корень из дисперсий
seB = np.sqrt(np.maximum(varB, 0))

# Рассчитываем t-статистики для каждого коэффициента
tStats = params / seB

# Рассчитываем дисперсию остатков с поправкой на количество параметров
residualVariance = np.var(residuals, ddof=k)
# Рассчитываем скорректированную дисперсию остатков
scaledResidualVariance = 1 - rSquared
# Генерируем список мономов (названий признаков после
# полиномиализации), добавляя константу для свободного члена
monomials = ['c'] + list(
polyFeatures.get_feature_names_out(['x' + str(i) for i in range(1, x.shape[1] + 1)])
)
# Заменяем пробелы на звездочки для представления умножения в названиях мономов
monomials = [monomial.replace(' ', '*') for monomial in monomials]
# Возвращаем рассчитанные статистики и названия мономов
return params, tStats, residualVariance, scaledResidualVariance, rSquared, fStatistic, monomials


residualVariance = np.var(residuals, ddof=k)
scaledResidualVariance = residualVariance / (n - k)

paramsAndTStats = np.vstack((params, tStats)).T
def _regressionAnalysis(data, degree):
y, x, xPoly, polyFeatures = _prepareDataAndFeatures(
data, degree
)
model, predictions = _trainModelAndPredict(y, xPoly)
(params, tStats, residualVariance,
scaledResidualVariance, rSquared, fStatistic, monomials) = (
_calculateStatistics(
y,
x,
xPoly,
predictions,
model,
polyFeatures
))

return RegressionResult(
paramsAndTStats,
np.vstack((params, tStats)).T,
residualVariance,
scaledResidualVariance,
rSquared,
fStatistic,
monomials
)

def linearPolynom(data):
return _regressionAnalysis(data, 1)


def squaredPolynom(data):
return _regressionAnalysis(data, 2)


def prediction(inputData, result: RegressionResult):
inputs = inputData[:, 1:]
Expand Down
2 changes: 1 addition & 1 deletion statapp/polynoms/transform_polynom_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,4 @@ def rebuildData(self, data):
self.ui.residualVarianceValueLabel.setText(str(result.residualVariance))
self.ui.scaledResidualVarianceValueLabel.setText(str(result.scaledResidualVariance))
self.ui.fStatisticValueLabel.setText(str(result.fStatistic))
self.ui.rSquaredValueLabel.setText(str(result.scaledResidualVariance))
self.ui.rSquaredValueLabel.setText(str(result.rSquared))
Loading