108 lines
3.5 KiB
Python
108 lines
3.5 KiB
Python
import torch
|
|
from torchvision import datasets, transforms
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
|
|
from sklearn.linear_model import (
|
|
LogisticRegression,
|
|
PassiveAggressiveClassifier,
|
|
Perceptron,
|
|
SGDClassifier,
|
|
)
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
# -----------------------------
|
|
# 1. Загрузка и подготовка данных
|
|
# -----------------------------
|
|
|
|
transform = transforms.Compose([
|
|
transforms.ToTensor(), # Преобразуем в тензор
|
|
lambda x: x.view(-1).numpy() # Преобразуем изображение в одномерный массив
|
|
])
|
|
|
|
# Загружаем EMNIST (Letters): содержит буквы A-Z
|
|
train_dataset = datasets.EMNIST(
|
|
root='./data', split='letters', train=True, download=True, transform=transform
|
|
)
|
|
test_dataset = datasets.EMNIST(
|
|
root='./data', split='letters', train=False, download=True, transform=transform
|
|
)
|
|
|
|
# Объединяем train и test
|
|
X_train = [x for x, y in train_dataset]
|
|
y_train = [y - 1 for x, y in train_dataset] # метки от 1 до 26 -> делаем 0..25
|
|
|
|
X_test = [x for x, y in test_dataset]
|
|
y_test = [y - 1 for x, y in test_dataset]
|
|
|
|
# Объединяем всё в один набор
|
|
X = np.array(X_train + X_test)
|
|
y = np.array(y_train + y_test)
|
|
|
|
# 🔍 Ограничиваем данные до ~1800 образцов (как в digits())
|
|
SAMPLE_LIMIT = 1800
|
|
X = X[:SAMPLE_LIMIT]
|
|
y = y[:SAMPLE_LIMIT]
|
|
|
|
print("Данные загружены:", X.shape, y.shape)
|
|
|
|
# -----------------------------
|
|
# 2. Настройка моделей
|
|
# -----------------------------
|
|
|
|
heldout = [0.95, 0.90, 0.75, 0.50, 0.01] # доли тестовой выборки
|
|
rounds = 10 # число повторений для усреднения
|
|
|
|
classifiers = [
|
|
("SGD", SGDClassifier(max_iter=110)),
|
|
("ASGD", SGDClassifier(max_iter=110, average=True)),
|
|
("Perceptron", Perceptron(max_iter=110)),
|
|
(
|
|
"Passive-Aggressive I",
|
|
PassiveAggressiveClassifier(max_iter=110, loss="hinge", C=1.0, tol=1e-4),
|
|
),
|
|
(
|
|
"Passive-Aggressive II",
|
|
PassiveAggressiveClassifier(
|
|
max_iter=110, loss="squared_hinge", C=1.0, tol=1e-4
|
|
),
|
|
),
|
|
(
|
|
"SAG",
|
|
LogisticRegression(max_iter=110, solver="sag", tol=1e-1, C=1.0e4 / X.shape[0]),
|
|
),
|
|
]
|
|
|
|
xx = 1.0 - np.array(heldout) # пропорция обучающей выборки
|
|
|
|
# -----------------------------
|
|
# 3. Обучение и оценка моделей
|
|
# -----------------------------
|
|
|
|
for name, clf in classifiers:
|
|
print(f"Обучение: {name}")
|
|
rng = np.random.RandomState(42)
|
|
yy = []
|
|
for test_size in heldout:
|
|
errors = []
|
|
for r in range(rounds):
|
|
X_train_part, X_test_part, y_train_part, y_test_part = train_test_split(
|
|
X, y, test_size=test_size, random_state=rng
|
|
)
|
|
clf.fit(X_train_part, y_train_part)
|
|
y_pred = clf.predict(X_test_part)
|
|
error_rate = 1 - np.mean(y_pred == y_test_part)
|
|
errors.append(error_rate)
|
|
yy.append(np.mean(errors))
|
|
plt.plot(xx, yy, label=name)
|
|
|
|
# -----------------------------
|
|
# 4. Визуализация результатов
|
|
# -----------------------------
|
|
|
|
plt.legend(loc="upper right")
|
|
plt.xlabel("Пропорция обучающей выборки")
|
|
plt.ylabel("Ошибка на тесте")
|
|
plt.title("Сравнение онлайн-алгоритмов на уменьшенном EMNIST Letters")
|
|
plt.grid(True)
|
|
plt.show() |