Минорные корректировки некоторых Markdown'ов. Для корректного просмотра Markdown'a с пошаговым алгоритмом блоков кода необходимо сделать 'дабл-клик' по выбранной Markdown-ячейке

This commit is contained in:
Иван Кузнецов 2025-05-13 22:17:19 +03:00
parent ddffe931ea
commit 241c01c017
2 changed files with 46 additions and 78 deletions

View File

@ -45,15 +45,15 @@
"from sklearn.neural_network import MLPClassifier\n", "from sklearn.neural_network import MLPClassifier\n",
"from sklearn.metrics import classification_report\n", "from sklearn.metrics import classification_report\n",
"\n", "\n",
"# Загрузка и разбиение данных\n", "#Загрузка и разбиение данных\n",
"X, y = load_iris(return_X_y=True)\n", "X, y = load_iris(return_X_y=True)\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
"\n", "\n",
"# Модель MLP — многослойный перцептрон\n", "#Модель MLP — многослойный перцептрон\n",
"clf = MLPClassifier(hidden_layer_sizes=(10,), activation='relu', max_iter=500)\n", "clf = MLPClassifier(hidden_layer_sizes=(10,), activation='relu', max_iter=500)\n",
"clf.fit(X_train, y_train)\n", "clf.fit(X_train, y_train)\n",
"\n", "\n",
"# Отчёт о точности\n", "#Отчёт о точности\n",
"print(classification_report(y_test, clf.predict(X_test)))" "print(classification_report(y_test, clf.predict(X_test)))"
] ]
}, },
@ -65,28 +65,12 @@
"--------------------------------------------------" "--------------------------------------------------"
] ]
}, },
{
"cell_type": "markdown",
"id": "d0e92ee0-5dc3-4770-bef6-9128528698a7",
"metadata": {},
"source": [
"# Был выбран пример \"Inductive Clustering\" из раздела \"Clustering\". Ниже реализация алгоритма со scikit-learn"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe13ea46-9ab7-4d59-a5c8-319d75de55c8",
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "e0cdc91f-bac2-47a5-8e77-2c6aa79483bb", "id": "e0cdc91f-bac2-47a5-8e77-2c6aa79483bb",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Для самостоятельного задания выбран \"Inductive Clustering\" из Clustering. Ниже непосредственный пример его реализации со scikit-learn" "# Для самостоятельного задания выбран \"Inductive Clustering\" из Clustering. Ниже непосредственный пример его реализации со scikit-learn\n"
] ]
}, },
{ {
@ -260,11 +244,11 @@
"from sklearn.utils.metaestimators import available_if\n", "from sklearn.utils.metaestimators import available_if\n",
"from sklearn.utils.validation import check_is_fitted\n", "from sklearn.utils.validation import check_is_fitted\n",
"\n", "\n",
"# === Константы ===\n", "# Константы \n",
"RANDOM_STATE = 42\n", "RANDOM_STATE = 42\n",
"N_SAMPLES = 1000\n", "N_SAMPLES = 1000\n",
"\n", "\n",
"# === Вспомогательная функция для делегирования методов классификатора ===\n", "# Вспомогательная функция для делегирования методов классификатора\n",
"def _classifier_has(attr):\n", "def _classifier_has(attr):\n",
" return lambda estimator: (\n", " return lambda estimator: (\n",
" hasattr(estimator.classifier_, attr)\n", " hasattr(estimator.classifier_, attr)\n",
@ -272,7 +256,7 @@
" else hasattr(estimator.classifier, attr)\n", " else hasattr(estimator.classifier, attr)\n",
" )\n", " )\n",
"\n", "\n",
"# === Класс индуктивного кластеризатора ===\n", "# Класс индуктивного кластеризатора \n",
"class InductiveClusterer(BaseEstimator):\n", "class InductiveClusterer(BaseEstimator):\n",
" def __init__(self, clusterer, classifier):\n", " def __init__(self, clusterer, classifier):\n",
" self.clusterer = clusterer\n", " self.clusterer = clusterer\n",
@ -295,11 +279,11 @@
" check_is_fitted(self)\n", " check_is_fitted(self)\n",
" return self.classifier_.decision_function(X)\n", " return self.classifier_.decision_function(X)\n",
"\n", "\n",
"# === Функция для отрисовки точек ===\n", "# Функция для отрисовки точек \n",
"def plot_scatter(X, color, alpha=0.5):\n", "def plot_scatter(X, color, alpha=0.5):\n",
" return plt.scatter(X[:, 0], X[:, 1], c=color, alpha=alpha, edgecolor=\"k\")\n", " return plt.scatter(X[:, 0], X[:, 1], c=color, alpha=alpha, edgecolor=\"k\")\n",
"\n", "\n",
"# === Генерация обучающих данных ===\n", "#Генерация обучающих данных \n",
"X, y = make_classification(\n", "X, y = make_classification(\n",
" n_samples=N_SAMPLES,\n", " n_samples=N_SAMPLES,\n",
" n_features=2,\n", " n_features=2,\n",
@ -310,17 +294,17 @@
" random_state=RANDOM_STATE,\n", " random_state=RANDOM_STATE,\n",
")\n", ")\n",
"\n", "\n",
"# === Кластеризация ===\n", "# Кластеризация\n",
"clusterer = AgglomerativeClustering(n_clusters=3)\n", "clusterer = AgglomerativeClustering(n_clusters=3)\n",
"cluster_labels = clusterer.fit_predict(X)\n", "cluster_labels = clusterer.fit_predict(X)\n",
"\n", "\n",
"# === Визуализация кластеров ===\n", "# Визуализация кластеров \n",
"plt.figure(figsize=(12, 4))\n", "plt.figure(figsize=(12, 4))\n",
"plt.subplot(131)\n", "plt.subplot(131)\n",
"plot_scatter(X, cluster_labels)\n", "plot_scatter(X, cluster_labels)\n",
"plt.title(\"Ward Linkage (Classification Data)\")\n", "plt.title(\"Ward Linkage (Classification Data)\")\n",
"\n", "\n",
"# === Генерация новых (неизвестных) точек ===\n", "# Генерация новых (неизвестных) точек\n",
"X_new, _ = make_classification(\n", "X_new, _ = make_classification(\n",
" n_samples=10,\n", " n_samples=10,\n",
" n_features=2,\n", " n_features=2,\n",
@ -331,20 +315,20 @@
" random_state=RANDOM_STATE + 1,\n", " random_state=RANDOM_STATE + 1,\n",
")\n", ")\n",
"\n", "\n",
"# === Визуализация известных и новых точек ===\n", "# Визуализация известных и новых точек \n",
"plt.subplot(132)\n", "plt.subplot(132)\n",
"plot_scatter(X, cluster_labels)\n", "plot_scatter(X, cluster_labels)\n",
"plot_scatter(X_new, \"black\", 1)\n", "plot_scatter(X_new, \"black\", 1)\n",
"plt.title(\"Unknown instances\")\n", "plt.title(\"Unknown instances\")\n",
"\n", "\n",
"# === Индуктивное обучение ===\n", "# Индуктивное обучение \n",
"classifier = RandomForestClassifier(random_state=RANDOM_STATE)\n", "classifier = RandomForestClassifier(random_state=RANDOM_STATE)\n",
"inductive_learner = InductiveClusterer(clusterer, classifier).fit(X)\n", "inductive_learner = InductiveClusterer(clusterer, classifier).fit(X)\n",
"\n", "\n",
"# === Предсказание кластера для новых точек ===\n", "# Предсказание кластера для новых точек \n",
"predicted_clusters = inductive_learner.predict(X_new)\n", "predicted_clusters = inductive_learner.predict(X_new)\n",
"\n", "\n",
"# === Визуализация предсказаний и границ ===\n", "#Визуализация предсказаний и границ \n",
"ax = plt.subplot(133)\n", "ax = plt.subplot(133)\n",
"plot_scatter(X, cluster_labels)\n", "plot_scatter(X, cluster_labels)\n",
"plot_scatter(X_new, predicted_clusters)\n", "plot_scatter(X_new, predicted_clusters)\n",
@ -562,23 +546,23 @@
"from sklearn.decomposition import PCA\n", "from sklearn.decomposition import PCA\n",
"from sklearn.datasets import fetch_openml\n", "from sklearn.datasets import fetch_openml\n",
"\n", "\n",
"# 1. Загрузка данных через fetch_openml\n", "# Загрузка данных через fetch_openml\n",
"data = fetch_openml(data_id=43618, as_frame=True)\n", "data = fetch_openml(data_id=43618, as_frame=True)\n",
"df = data.frame\n", "df = data.frame\n",
"\n", "\n",
"# 2. Предобработка данных\n", "# Предобработка данных\n",
"\n", "\n",
"df.fillna(df.median(), inplace=True)\n", "df.fillna(df.median(), inplace=True)\n",
"\n", "\n",
"# 3. Масштабирование признаков\n", "# Масштабирование признаков\n",
"scaler = StandardScaler()\n", "scaler = StandardScaler()\n",
"X_scaled = scaler.fit_transform(df)\n", "X_scaled = scaler.fit_transform(df)\n",
"\n", "\n",
"# 4. Кластеризация\n", "# Кластеризация\n",
"clusterer = AgglomerativeClustering(n_clusters=4)\n", "clusterer = AgglomerativeClustering(n_clusters=4)\n",
"cluster_labels = clusterer.fit_predict(X_scaled)\n", "cluster_labels = clusterer.fit_predict(X_scaled)\n",
"\n", "\n",
"# 5. Обучение классификатора\n", "# Обучение классификатора\n",
"class InductiveClusterer(BaseEstimator):\n", "class InductiveClusterer(BaseEstimator):\n",
" def __init__(self, clusterer, classifier):\n", " def __init__(self, clusterer, classifier):\n",
" self.clusterer = clusterer\n", " self.clusterer = clusterer\n",
@ -599,10 +583,10 @@
"classifier = RandomForestClassifier(random_state=42)\n", "classifier = RandomForestClassifier(random_state=42)\n",
"inductive_learner = InductiveClusterer(clusterer, classifier).fit(X_scaled)\n", "inductive_learner = InductiveClusterer(clusterer, classifier).fit(X_scaled)\n",
"\n", "\n",
"# 6. Предсказание сегментов для новых клиентов\n", "# Предсказание сегментов для новых клиентов\n",
"predicted_labels = inductive_learner.predict(X_scaled)\n", "predicted_labels = inductive_learner.predict(X_scaled)\n",
"\n", "\n",
"# 7. Визуализация результатов с помощью PCA\n", "# Визуализация результатов с помощью PCA\n",
"pca = PCA(n_components=2)\n", "pca = PCA(n_components=2)\n",
"X_pca = pca.fit_transform(X_scaled)\n", "X_pca = pca.fit_transform(X_scaled)\n",
"\n", "\n",

View File

@ -45,15 +45,15 @@
"from sklearn.neural_network import MLPClassifier\n", "from sklearn.neural_network import MLPClassifier\n",
"from sklearn.metrics import classification_report\n", "from sklearn.metrics import classification_report\n",
"\n", "\n",
"# Загрузка и разбиение данных\n", "#Загрузка и разбиение данных\n",
"X, y = load_iris(return_X_y=True)\n", "X, y = load_iris(return_X_y=True)\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
"\n", "\n",
"# Модель MLP — многослойный перцептрон\n", "#Модель MLP — многослойный перцептрон\n",
"clf = MLPClassifier(hidden_layer_sizes=(10,), activation='relu', max_iter=500)\n", "clf = MLPClassifier(hidden_layer_sizes=(10,), activation='relu', max_iter=500)\n",
"clf.fit(X_train, y_train)\n", "clf.fit(X_train, y_train)\n",
"\n", "\n",
"# Отчёт о точности\n", "#Отчёт о точности\n",
"print(classification_report(y_test, clf.predict(X_test)))" "print(classification_report(y_test, clf.predict(X_test)))"
] ]
}, },
@ -65,28 +65,12 @@
"--------------------------------------------------" "--------------------------------------------------"
] ]
}, },
{
"cell_type": "markdown",
"id": "d0e92ee0-5dc3-4770-bef6-9128528698a7",
"metadata": {},
"source": [
"# Был выбран пример \"Inductive Clustering\" из раздела \"Clustering\". Ниже реализация алгоритма со scikit-learn"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe13ea46-9ab7-4d59-a5c8-319d75de55c8",
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "e0cdc91f-bac2-47a5-8e77-2c6aa79483bb", "id": "e0cdc91f-bac2-47a5-8e77-2c6aa79483bb",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Для самостоятельного задания выбран \"Inductive Clustering\" из Clustering. Ниже непосредственный пример его реализации со scikit-learn" "# Для самостоятельного задания выбран \"Inductive Clustering\" из Clustering. Ниже непосредственный пример его реализации со scikit-learn\n"
] ]
}, },
{ {
@ -260,11 +244,11 @@
"from sklearn.utils.metaestimators import available_if\n", "from sklearn.utils.metaestimators import available_if\n",
"from sklearn.utils.validation import check_is_fitted\n", "from sklearn.utils.validation import check_is_fitted\n",
"\n", "\n",
"# === Константы ===\n", "# Константы \n",
"RANDOM_STATE = 42\n", "RANDOM_STATE = 42\n",
"N_SAMPLES = 1000\n", "N_SAMPLES = 1000\n",
"\n", "\n",
"# === Вспомогательная функция для делегирования методов классификатора ===\n", "# Вспомогательная функция для делегирования методов классификатора\n",
"def _classifier_has(attr):\n", "def _classifier_has(attr):\n",
" return lambda estimator: (\n", " return lambda estimator: (\n",
" hasattr(estimator.classifier_, attr)\n", " hasattr(estimator.classifier_, attr)\n",
@ -272,7 +256,7 @@
" else hasattr(estimator.classifier, attr)\n", " else hasattr(estimator.classifier, attr)\n",
" )\n", " )\n",
"\n", "\n",
"# === Класс индуктивного кластеризатора ===\n", "# Класс индуктивного кластеризатора \n",
"class InductiveClusterer(BaseEstimator):\n", "class InductiveClusterer(BaseEstimator):\n",
" def __init__(self, clusterer, classifier):\n", " def __init__(self, clusterer, classifier):\n",
" self.clusterer = clusterer\n", " self.clusterer = clusterer\n",
@ -295,11 +279,11 @@
" check_is_fitted(self)\n", " check_is_fitted(self)\n",
" return self.classifier_.decision_function(X)\n", " return self.classifier_.decision_function(X)\n",
"\n", "\n",
"# === Функция для отрисовки точек ===\n", "# Функция для отрисовки точек \n",
"def plot_scatter(X, color, alpha=0.5):\n", "def plot_scatter(X, color, alpha=0.5):\n",
" return plt.scatter(X[:, 0], X[:, 1], c=color, alpha=alpha, edgecolor=\"k\")\n", " return plt.scatter(X[:, 0], X[:, 1], c=color, alpha=alpha, edgecolor=\"k\")\n",
"\n", "\n",
"# === Генерация обучающих данных ===\n", "#Генерация обучающих данных \n",
"X, y = make_classification(\n", "X, y = make_classification(\n",
" n_samples=N_SAMPLES,\n", " n_samples=N_SAMPLES,\n",
" n_features=2,\n", " n_features=2,\n",
@ -310,17 +294,17 @@
" random_state=RANDOM_STATE,\n", " random_state=RANDOM_STATE,\n",
")\n", ")\n",
"\n", "\n",
"# === Кластеризация ===\n", "# Кластеризация\n",
"clusterer = AgglomerativeClustering(n_clusters=3)\n", "clusterer = AgglomerativeClustering(n_clusters=3)\n",
"cluster_labels = clusterer.fit_predict(X)\n", "cluster_labels = clusterer.fit_predict(X)\n",
"\n", "\n",
"# === Визуализация кластеров ===\n", "# Визуализация кластеров \n",
"plt.figure(figsize=(12, 4))\n", "plt.figure(figsize=(12, 4))\n",
"plt.subplot(131)\n", "plt.subplot(131)\n",
"plot_scatter(X, cluster_labels)\n", "plot_scatter(X, cluster_labels)\n",
"plt.title(\"Ward Linkage (Classification Data)\")\n", "plt.title(\"Ward Linkage (Classification Data)\")\n",
"\n", "\n",
"# === Генерация новых (неизвестных) точек ===\n", "# Генерация новых (неизвестных) точек\n",
"X_new, _ = make_classification(\n", "X_new, _ = make_classification(\n",
" n_samples=10,\n", " n_samples=10,\n",
" n_features=2,\n", " n_features=2,\n",
@ -331,20 +315,20 @@
" random_state=RANDOM_STATE + 1,\n", " random_state=RANDOM_STATE + 1,\n",
")\n", ")\n",
"\n", "\n",
"# === Визуализация известных и новых точек ===\n", "# Визуализация известных и новых точек \n",
"plt.subplot(132)\n", "plt.subplot(132)\n",
"plot_scatter(X, cluster_labels)\n", "plot_scatter(X, cluster_labels)\n",
"plot_scatter(X_new, \"black\", 1)\n", "plot_scatter(X_new, \"black\", 1)\n",
"plt.title(\"Unknown instances\")\n", "plt.title(\"Unknown instances\")\n",
"\n", "\n",
"# === Индуктивное обучение ===\n", "# Индуктивное обучение \n",
"classifier = RandomForestClassifier(random_state=RANDOM_STATE)\n", "classifier = RandomForestClassifier(random_state=RANDOM_STATE)\n",
"inductive_learner = InductiveClusterer(clusterer, classifier).fit(X)\n", "inductive_learner = InductiveClusterer(clusterer, classifier).fit(X)\n",
"\n", "\n",
"# === Предсказание кластера для новых точек ===\n", "# Предсказание кластера для новых точек \n",
"predicted_clusters = inductive_learner.predict(X_new)\n", "predicted_clusters = inductive_learner.predict(X_new)\n",
"\n", "\n",
"# === Визуализация предсказаний и границ ===\n", "#Визуализация предсказаний и границ \n",
"ax = plt.subplot(133)\n", "ax = plt.subplot(133)\n",
"plot_scatter(X, cluster_labels)\n", "plot_scatter(X, cluster_labels)\n",
"plot_scatter(X_new, predicted_clusters)\n", "plot_scatter(X_new, predicted_clusters)\n",
@ -562,23 +546,23 @@
"from sklearn.decomposition import PCA\n", "from sklearn.decomposition import PCA\n",
"from sklearn.datasets import fetch_openml\n", "from sklearn.datasets import fetch_openml\n",
"\n", "\n",
"# 1. Загрузка данных через fetch_openml\n", "# Загрузка данных через fetch_openml\n",
"data = fetch_openml(data_id=43618, as_frame=True)\n", "data = fetch_openml(data_id=43618, as_frame=True)\n",
"df = data.frame\n", "df = data.frame\n",
"\n", "\n",
"# 2. Предобработка данных\n", "# Предобработка данных\n",
"\n", "\n",
"df.fillna(df.median(), inplace=True)\n", "df.fillna(df.median(), inplace=True)\n",
"\n", "\n",
"# 3. Масштабирование признаков\n", "# Масштабирование признаков\n",
"scaler = StandardScaler()\n", "scaler = StandardScaler()\n",
"X_scaled = scaler.fit_transform(df)\n", "X_scaled = scaler.fit_transform(df)\n",
"\n", "\n",
"# 4. Кластеризация\n", "# Кластеризация\n",
"clusterer = AgglomerativeClustering(n_clusters=4)\n", "clusterer = AgglomerativeClustering(n_clusters=4)\n",
"cluster_labels = clusterer.fit_predict(X_scaled)\n", "cluster_labels = clusterer.fit_predict(X_scaled)\n",
"\n", "\n",
"# 5. Обучение классификатора\n", "# Обучение классификатора\n",
"class InductiveClusterer(BaseEstimator):\n", "class InductiveClusterer(BaseEstimator):\n",
" def __init__(self, clusterer, classifier):\n", " def __init__(self, clusterer, classifier):\n",
" self.clusterer = clusterer\n", " self.clusterer = clusterer\n",
@ -599,10 +583,10 @@
"classifier = RandomForestClassifier(random_state=42)\n", "classifier = RandomForestClassifier(random_state=42)\n",
"inductive_learner = InductiveClusterer(clusterer, classifier).fit(X_scaled)\n", "inductive_learner = InductiveClusterer(clusterer, classifier).fit(X_scaled)\n",
"\n", "\n",
"# 6. Предсказание сегментов для новых клиентов\n", "# Предсказание сегментов для новых клиентов\n",
"predicted_labels = inductive_learner.predict(X_scaled)\n", "predicted_labels = inductive_learner.predict(X_scaled)\n",
"\n", "\n",
"# 7. Визуализация результатов с помощью PCA\n", "# Визуализация результатов с помощью PCA\n",
"pca = PCA(n_components=2)\n", "pca = PCA(n_components=2)\n",
"X_pca = pca.fit_transform(X_scaled)\n", "X_pca = pca.fit_transform(X_scaled)\n",
"\n", "\n",