Проект готов
This commit is contained in:
parent
da3e12f42e
commit
7cd2770d8d
4
.gitignore
vendored
4
.gitignore
vendored
@ -1 +1,3 @@
|
||||
.venv/
|
||||
.venv/
|
||||
data
|
||||
.ipynb_checkpoints/
|
309
Untitled.ipynb
Normal file
309
Untitled.ipynb
Normal file
File diff suppressed because one or more lines are too long
237
Untitled1.ipynb
Normal file
237
Untitled1.ipynb
Normal file
File diff suppressed because one or more lines are too long
108
plot_sgd_comparison copy.py
Normal file
108
plot_sgd_comparison copy.py
Normal file
@ -0,0 +1,108 @@
|
||||
import torch
|
||||
from torchvision import datasets, transforms
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from sklearn.linear_model import (
|
||||
LogisticRegression,
|
||||
PassiveAggressiveClassifier,
|
||||
Perceptron,
|
||||
SGDClassifier,
|
||||
)
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
# -----------------------------
|
||||
# 1. Загрузка и подготовка данных
|
||||
# -----------------------------
|
||||
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(), # Преобразуем в тензор
|
||||
lambda x: x.view(-1).numpy() # Преобразуем изображение в одномерный массив
|
||||
])
|
||||
|
||||
# Загружаем EMNIST (Letters): содержит буквы A-Z
|
||||
train_dataset = datasets.EMNIST(
|
||||
root='./data', split='letters', train=True, download=True, transform=transform
|
||||
)
|
||||
test_dataset = datasets.EMNIST(
|
||||
root='./data', split='letters', train=False, download=True, transform=transform
|
||||
)
|
||||
|
||||
# Объединяем train и test
|
||||
X_train = [x for x, y in train_dataset]
|
||||
y_train = [y - 1 for x, y in train_dataset] # метки от 1 до 26 -> делаем 0..25
|
||||
|
||||
X_test = [x for x, y in test_dataset]
|
||||
y_test = [y - 1 for x, y in test_dataset]
|
||||
|
||||
# Объединяем всё в один набор
|
||||
X = np.array(X_train + X_test)
|
||||
y = np.array(y_train + y_test)
|
||||
|
||||
# 🔍 Ограничиваем данные до ~1800 образцов (как в digits())
|
||||
SAMPLE_LIMIT = 1800
|
||||
X = X[:SAMPLE_LIMIT]
|
||||
y = y[:SAMPLE_LIMIT]
|
||||
|
||||
print("Данные загружены:", X.shape, y.shape)
|
||||
|
||||
# -----------------------------
|
||||
# 2. Настройка моделей
|
||||
# -----------------------------
|
||||
|
||||
heldout = [0.95, 0.90, 0.75, 0.50, 0.01] # доли тестовой выборки
|
||||
rounds = 10 # число повторений для усреднения
|
||||
|
||||
classifiers = [
|
||||
("SGD", SGDClassifier(max_iter=110)),
|
||||
("ASGD", SGDClassifier(max_iter=110, average=True)),
|
||||
("Perceptron", Perceptron(max_iter=110)),
|
||||
(
|
||||
"Passive-Aggressive I",
|
||||
PassiveAggressiveClassifier(max_iter=110, loss="hinge", C=1.0, tol=1e-4),
|
||||
),
|
||||
(
|
||||
"Passive-Aggressive II",
|
||||
PassiveAggressiveClassifier(
|
||||
max_iter=110, loss="squared_hinge", C=1.0, tol=1e-4
|
||||
),
|
||||
),
|
||||
(
|
||||
"SAG",
|
||||
LogisticRegression(max_iter=110, solver="sag", tol=1e-1, C=1.0e4 / X.shape[0]),
|
||||
),
|
||||
]
|
||||
|
||||
xx = 1.0 - np.array(heldout) # пропорция обучающей выборки
|
||||
|
||||
# -----------------------------
|
||||
# 3. Обучение и оценка моделей
|
||||
# -----------------------------
|
||||
|
||||
for name, clf in classifiers:
|
||||
print(f"Обучение: {name}")
|
||||
rng = np.random.RandomState(42)
|
||||
yy = []
|
||||
for test_size in heldout:
|
||||
errors = []
|
||||
for r in range(rounds):
|
||||
X_train_part, X_test_part, y_train_part, y_test_part = train_test_split(
|
||||
X, y, test_size=test_size, random_state=rng
|
||||
)
|
||||
clf.fit(X_train_part, y_train_part)
|
||||
y_pred = clf.predict(X_test_part)
|
||||
error_rate = 1 - np.mean(y_pred == y_test_part)
|
||||
errors.append(error_rate)
|
||||
yy.append(np.mean(errors))
|
||||
plt.plot(xx, yy, label=name)
|
||||
|
||||
# -----------------------------
|
||||
# 4. Визуализация результатов
|
||||
# -----------------------------
|
||||
|
||||
plt.legend(loc="upper right")
|
||||
plt.xlabel("Пропорция обучающей выборки")
|
||||
plt.ylabel("Ошибка на тесте")
|
||||
plt.title("Сравнение онлайн-алгоритмов на уменьшенном EMNIST Letters")
|
||||
plt.grid(True)
|
||||
plt.show()
|
70
plot_sgd_comparison.py
Normal file
70
plot_sgd_comparison.py
Normal file
@ -0,0 +1,70 @@
|
||||
"""
|
||||
==================================
|
||||
Comparing various online solvers
|
||||
==================================
|
||||
An example showing how different online solvers perform
|
||||
on the hand-written digits dataset.
|
||||
"""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn.linear_model import (
|
||||
LogisticRegression,
|
||||
PassiveAggressiveClassifier,
|
||||
Perceptron,
|
||||
SGDClassifier,
|
||||
)
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
heldout = [0.95, 0.90, 0.75, 0.50, 0.01]
|
||||
# Number of rounds to fit and evaluate an estimator.
|
||||
rounds = 10
|
||||
X, y = datasets.load_digits(return_X_y=True)
|
||||
|
||||
classifiers = [
|
||||
("SGD", SGDClassifier(max_iter=110)),
|
||||
("ASGD", SGDClassifier(max_iter=110, average=True)),
|
||||
("Perceptron", Perceptron(max_iter=110)),
|
||||
(
|
||||
"Passive-Aggressive I",
|
||||
PassiveAggressiveClassifier(max_iter=110, loss="hinge", C=1.0, tol=1e-4),
|
||||
),
|
||||
(
|
||||
"Passive-Aggressive II",
|
||||
PassiveAggressiveClassifier(
|
||||
max_iter=110, loss="squared_hinge", C=1.0, tol=1e-4
|
||||
),
|
||||
),
|
||||
(
|
||||
"SAG",
|
||||
LogisticRegression(max_iter=110, solver="sag", tol=1e-1, C=1.0e4 / X.shape[0]),
|
||||
),
|
||||
]
|
||||
|
||||
xx = 1.0 - np.array(heldout)
|
||||
|
||||
for name, clf in classifiers:
|
||||
print("training %s" % name)
|
||||
rng = np.random.RandomState(42)
|
||||
yy = []
|
||||
for i in heldout:
|
||||
yy_ = []
|
||||
for r in range(rounds):
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=i, random_state=rng
|
||||
)
|
||||
clf.fit(X_train, y_train)
|
||||
y_pred = clf.predict(X_test)
|
||||
yy_.append(1 - np.mean(y_pred == y_test))
|
||||
yy.append(np.mean(yy_))
|
||||
plt.plot(xx, yy, label=name)
|
||||
|
||||
plt.legend(loc="upper right")
|
||||
plt.xlabel("Proportion train")
|
||||
plt.ylabel("Test Error Rate")
|
||||
plt.show()
|
Loading…
Reference in New Issue
Block a user