From 63bfdd075974d12740a53a5708332970c8016f02 Mon Sep 17 00:00:00 2001 From: Timur Date: Sat, 30 May 2026 11:56:33 +0300 Subject: [PATCH] Add JupyterLab analysis notebook --- 02_jupyter_analysis/week2_analysis.ipynb | 144 +++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 02_jupyter_analysis/week2_analysis.ipynb diff --git a/02_jupyter_analysis/week2_analysis.ipynb b/02_jupyter_analysis/week2_analysis.ipynb new file mode 100644 index 0000000..1705ef3 --- /dev/null +++ b/02_jupyter_analysis/week2_analysis.ipynb @@ -0,0 +1,144 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "e33cfdf4", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Ячейка 1: Импорт библиотек\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from tqdm import tqdm\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc6236ff", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Ячейка 2: Загрузка данных\n", + "df = pd.read_csv(\"kinopoisk-top250.csv\", index_col=0)\n", + "print(f'Размер таблицы: {df.shape}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e616e786", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Ячейка 3: Анализ\n", + "df.info()\n", + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9df3843", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Ячейка 4: Гистограмма\n", + "plt.figure(figsize=(8, 5))\n", + "sns.histplot(df['year'], kde=True, bins=20, color='blue')\n", + "plt.title('Распределение фильмов по годам')\n", + "plt.xlabel('Год')\n", + "plt.ylabel('Количество фильмов')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e82506e0", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Ячейка 5: Scatter plot\n", + "df_top30 = df.head(30).copy()\n", + "plt.figure(figsize=(10, 6))\n", + "sns.scatterplot(x='director', y='year', hue='rating_ball', \n", + " data=df_top30, palette='viridis')\n", + "plt.title('Режиссеры, год выпуска и рейтинг')\n", + "plt.xlabel('Режиссер')\n", + "plt.ylabel('Год выпуска')\n", + "plt.xticks(rotation=90)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f51c0053", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Ячейка 6: Boxplot\n", + "filtered_df = df[df['year'] > 2000].copy()\n", + "plt.figure(figsize=(10, 6))\n", + "sns.boxplot(x='year', y='rating_ball', data=filtered_df, palette='pastel')\n", + "plt.title('Распределение рейтингов (>2000)')\n", + "plt.xlabel('Год')\n", + "plt.ylabel('Рейтинг')\n", + "plt.xticks(rotation=90)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc5173ad", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Ячейка 7: tqdm\n", + "for i in tqdm(range(100), desc=\"Обработка\"):\n", + " time.sleep(0.01)\n", + "print(\"Готово\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}