гитигноре

2026-05-05 19:22:10 +03:00 · 2026-05-05 19:22:10 +03:00 · eb7671247d
commit eb7671247d
4 changed files with 680 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,164 @@
+# ---> Python
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+.idea
+.venv
+.ipynb_checkpoints
--- a/4444/Untitled.ipynb
+++ b/4444/Untitled.ipynb
@ -0,0 +1,161 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "42259e56-f030-4f9a-b2f7-696736dc4439",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'StandardScaler' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 14\u001b[39m\n\u001b[32m     10\u001b[39m y_real = y_real.astype(int)\n\u001b[32m     11\u001b[39m \n\u001b[32m     12\u001b[39m \u001b[38;5;66;03m# Разбиение (теперь функция импортирована и сработает без ошибок)\u001b[39;00m\n\u001b[32m     13\u001b[39m X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_real, y_real, test_size=\u001b[32m0.2\u001b[39m, random_state=\u001b[32m42\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m14\u001b[39m scaler_r = StandardScaler()\n\u001b[32m     15\u001b[39m X_train_r_scaled = scaler_r.fit_transform(X_train_r)\n\u001b[32m     16\u001b[39m X_test_r_scaled = scaler_r.transform(X_test_r)\n\u001b[32m     17\u001b[39m \u001b[38;5;66;03m# Обучение с L1\u001b[39;00m\n",
+      "\u001b[31mNameError\u001b[39m: name 'StandardScaler' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "import openml\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "\n",
+    "# Загрузка датасета Spambase (ID 44)\n",
+    "dataset = openml.datasets.get_dataset(44)\n",
+    "X_real, y_real, _, _ = dataset.get_data(target=dataset.default_target_attribute)\n",
+    "\n",
+    "# Преобразование целевой переменной в числовой формат (0 и 1)\n",
+    "y_real = y_real.astype(int)\n",
+    "\n",
+    "# Разбиение (теперь функция импортирована и сработает без ошибок)\n",
+    "X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_real, y_real, test_size=0.2, random_state=42)\n",
+    "scaler_r = StandardScaler()\n",
+    "X_train_r_scaled = scaler_r.fit_transform(X_train_r)\n",
+    "X_test_r_scaled = scaler_r.transform(X_test_r)\n",
+    "# Обучение с L1\n",
+    "sgd_real_l1 = SGDClassifier(loss='log_loss', penalty='l1', alpha=0.01, random_state=42)\n",
+    "sgd_real_l1.fit(X_train_r_scaled, y_train_r)\n",
+    "\n",
+    "# Обучение с L2\n",
+    "sgd_real_l2 = SGDClassifier(loss='log_loss', penalty='l2', alpha=0.01, random_state=42)\n",
+    "sgd_real_l2.fit(X_train_r_scaled, y_train_r)\n",
+    "\n",
+    "# Точность моделей\n",
+    "acc_l1 = accuracy_score(y_test_r, sgd_real_l1.predict(X_test_r_scaled))\n",
+    "acc_l2 = accuracy_score(y_test_r, sgd_real_l2.predict(X_test_r_scaled))\n",
+    "\n",
+    "print(f\"Точность модели с L1-регуляризацией: {acc_l1:.4f}\")\n",
+    "print(f\"Точность модели с L2-регуляризацией: {acc_l2:.4f}\")\n",
+    "print(f\"Количество ненулевых весов (L1): {np.sum(sgd_real_l1.coef_ != 0)} из {X_real.shape[1]}\")\n",
+    "print(f\"Количество ненулевых весов (L2): {np.sum(sgd_real_l2.coef_ != 0)} из {X_real.shape[1]}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "c186361c-aa9b-4f87-8df5-34b2caff07a7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--- Результаты L1 (Lasso) ---\n",
+      "Accuracy: 0.8686\n",
+      "Полезных признаков (вес != 0): 22 из 57\n",
+      "\n",
+      "--- Результаты L2 (Ridge) ---\n",
+      "Accuracy: 0.9055\n",
+      "Полезных признаков (вес != 0): 57 из 57\n"
+     ]
+    }
+   ],
+   "source": [
+    "import openml\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.linear_model import SGDClassifier\n",
+    "from sklearn.metrics import accuracy_score, classification_report\n",
+    "\n",
+    "# 1. Загрузка датасета Spambase (ID 44) — классификация спам-писем\n",
+    "dataset = openml.datasets.get_dataset(44)\n",
+    "X_real, y_real, _, _ = dataset.get_data(target=dataset.default_target_attribute)\n",
+    "\n",
+    "# Преобразование целевой переменной в числа (0 - не спам, 1 - спам)\n",
+    "y_real = y_real.astype(int)\n",
+    "\n",
+    "# 2. Разбиение на обучающую и тестовую выборки\n",
+    "X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(\n",
+    "    X_real, y_real, test_size=0.2, random_state=42\n",
+    ")\n",
+    "# Инициализация и обучение скалера\n",
+    "scaler_r = StandardScaler()\n",
+    "X_train_r_scaled = scaler_r.fit_transform(X_train_r)\n",
+    "X_test_r_scaled = scaler_r.transform(X_test_r)\n",
+    "# Параметры: loss='log_loss' делает из SGD логистическую регрессию\n",
+    "# alpha — сила регуляризации (чем выше, тем сильнее сжимаем веса)\n",
+    "\n",
+    "# Модель с L1 (Lasso) — должна занулить часть признаков\n",
+    "sgd_l1 = SGDClassifier(loss='log_loss', penalty='l1', alpha=0.02, random_state=42)\n",
+    "sgd_l1.fit(X_train_r_scaled, y_train_r)\n",
+    "\n",
+    "# Модель с L2 (Ridge) — просто уменьшает веса\n",
+    "sgd_l2 = SGDClassifier(loss='log_loss', penalty='l2', alpha=0.02, random_state=42)\n",
+    "sgd_l2.fit(X_train_r_scaled, y_train_r)\n",
+    "\n",
+    "# Предсказания\n",
+    "y_pred_l1 = sgd_l1.predict(X_test_r_scaled)\n",
+    "y_pred_l2 = sgd_l2.predict(X_test_r_scaled)\n",
+    "print(\"--- Результаты L1 (Lasso) ---\")\n",
+    "print(f\"Accuracy: {accuracy_score(y_test_r, y_pred_l1):.4f}\")\n",
+    "print(f\"Полезных признаков (вес != 0): {np.sum(sgd_l1.coef_ != 0)} из {X_real.shape[1]}\")\n",
+    "\n",
+    "print(\"\\n--- Результаты L2 (Ridge) ---\")\n",
+    "print(f\"Accuracy: {accuracy_score(y_test_r, y_pred_l2):.4f}\")\n",
+    "print(f\"Полезных признаков (вес != 0): {np.sum(sgd_l2.coef_ != 0)} из {X_real.shape[1]}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "52c23789-f6f5-4ef4-9efc-0dda66562e71",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "559b4093-337c-4897-9e51-45d8e24c1be3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/4444/Untitled1.ipynb
+++ b/4444/Untitled1.ipynb
--- a/4444/week4_scikit_learn.ipynb
+++ b/4444/week4_scikit_learn.ipynb
@ -0,0 +1,163 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6c3666d4-5898-4253-90ed-a4750c2e1306",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       1.00      1.00      1.00        12\n",
+      "           1       1.00      0.89      0.94         9\n",
+      "           2       0.90      1.00      0.95         9\n",
+      "\n",
+      "    accuracy                           0.97        30\n",
+      "   macro avg       0.97      0.96      0.96        30\n",
+      "weighted avg       0.97      0.97      0.97        30\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\timsh\\PycharmProjects\\4444\\.venv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:785: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.datasets import load_iris\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.neural_network import MLPClassifier\n",
+    "from sklearn.metrics import classification_report\n",
+    "\n",
+    "# Загрузка и разбиение данных\n",
+    "X, y = load_iris(return_X_y=True)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
+    "\n",
+    "# Модель MLP — многослойный перцептрон\n",
+    "clf = MLPClassifier(hidden_layer_sizes=(10,), activation='relu', max_iter=500)\n",
+    "clf.fit(X_train, y_train)\n",
+    "\n",
+    "# Отчёт о точности\n",
+    "print(classification_report(y_test, clf.predict(X_test)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "182f69f2-a498-4adc-b06d-f8cbc6979c1d",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'sd' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m sd\n",
+      "\u001b[31mNameError\u001b[39m: name 'sd' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "sd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "96a73d41-6a38-4608-9f77-bd442dfcafd1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "Once deleted, variables cannot be recovered. Proceed (y/[n])?  y\n"
+     ]
+    }
+   ],
+   "source": [
+    "reset\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "51de2ae0-7584-47c2-a5a3-27f546bc6f1d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       1.00      1.00      1.00         9\n",
+      "           1       1.00      0.91      0.95        11\n",
+      "           2       0.91      1.00      0.95        10\n",
+      "\n",
+      "    accuracy                           0.97        30\n",
+      "   macro avg       0.97      0.97      0.97        30\n",
+      "weighted avg       0.97      0.97      0.97        30\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.datasets import load_iris\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.neural_network import MLPClassifier\n",
+    "from sklearn.metrics import classification_report\n",
+    "\n",
+    "# Загрузка и разбиение данных\n",
+    "X, y = load_iris(return_X_y=True)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
+    "\n",
+    "# Модель MLP — многослойный перцептрон\n",
+    "clf = MLPClassifier(hidden_layer_sizes=(10,), activation='relu', max_iter=2500)\n",
+    "clf.fit(X_train, y_train)\n",
+    "\n",
+    "# Отчёт о точности\n",
+    "print(classification_report(y_test, clf.predict(X_test)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f58322b-0064-4173-88fd-1c173a9bf3a9",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}