diff --git a/movies.csv b/movies.csv new file mode 100644 index 0000000..66e6f34 --- /dev/null +++ b/movies.csv @@ -0,0 +1,21 @@ +title,year,country,rating,duration,genre +The Shawshank Redemption,1994,USA,9.3,142,Drama +The Godfather,1972,USA,9.2,175,Crime +The Dark Knight,2008,USA,9.0,152,Action +Pulp Fiction,1994,USA,8.9,154,Crime +Forrest Gump,1994,USA,8.8,142,Drama +Inception,2010,USA,8.8,148,Sci-Fi +Fight Club,1999,USA,8.8,139,Drama +Interstellar,2014,USA,8.7,169,Sci-Fi +The Matrix,1999,USA,8.7,136,Sci-Fi +Gladiator,2000,USA,8.5,155,Action +The Green Mile,1999,USA,8.6,189,Drama +Whiplash,2014,USA,8.5,106,Drama +Parasite,2019,South Korea,8.5,132,Thriller +Joker,2019,USA,8.4,122,Drama +Avengers Endgame,2019,USA,8.4,181,Action +Django Unchained,2012,USA,8.5,165,Western +The Departed,2006,USA,8.5,151,Crime +The Prestige,2006,USA,8.5,130,Drama +Coco,2017,USA,8.4,105,Animation +WALL-E,2008,USA,8.4,98,Animation \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index aea9dad..a8fbceb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,102 @@ +anyio==4.13.0 +argon2-cffi==25.1.0 +argon2-cffi-bindings==25.1.0 +arrow==1.4.0 +asttokens==3.0.1 +async-lru==2.3.0 +attrs==26.1.0 +babel==2.18.0 +beautifulsoup4==4.14.3 +bleach==6.3.0 +certifi==2026.5.20 +cffi==2.0.0 +charset-normalizer==3.4.7 +colorama==0.4.6 +comm==0.2.3 +contourpy==1.3.3 +cycler==0.12.1 +debugpy==1.8.20 +decorator==5.3.1 +defusedxml==0.7.1 +executing==2.2.1 +fastjsonschema==2.21.2 +fonttools==4.63.0 +fqdn==1.5.1 +h11==0.16.0 +httpcore==1.0.9 +httpx==0.28.1 +idna==3.16 +ipykernel==7.2.0 +ipython==9.13.0 +ipython_pygments_lexers==1.1.1 +isoduration==20.11.0 +jedi==0.20.0 +Jinja2==3.1.6 +json5==0.14.0 +jsonpointer==3.1.1 +jsonschema==4.26.0 +jsonschema-specifications==2025.9.1 +jupyter-events==0.12.1 +jupyter-lsp==2.3.1 +jupyter_client==8.8.0 +jupyter_core==5.9.1 +jupyter_server==2.18.2 +jupyter_server_terminals==0.5.4 +jupyterlab==4.5.7 +jupyterlab_pygments==0.3.0 +jupyterlab_server==2.28.0 +kiwisolver==1.5.0 +lark==1.3.1 +MarkupSafe==3.0.3 +matplotlib==3.10.9 +matplotlib-inline==0.2.2 +mistune==3.2.1 +nbclient==0.10.4 +nbconvert==7.17.1 +nbformat==5.10.4 +nest-asyncio==1.6.0 +notebook_shim==0.2.4 numpy==2.4.6 +packaging==26.2 pandas==3.0.3 +pandocfilters==1.5.1 +parso==0.8.7 +pillow==12.2.0 +platformdirs==4.9.6 +prometheus_client==0.25.0 +prompt_toolkit==3.0.52 +psutil==7.2.2 +pure_eval==0.2.3 +pycparser==3.0 +Pygments==2.20.0 +pyparsing==3.3.2 python-dateutil==2.9.0.post0 +python-json-logger==4.1.0 +pywinpty==3.0.3 +PyYAML==6.0.3 +pyzmq==27.1.0 +referencing==0.37.0 +requests==2.34.2 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rfc3987-syntax==1.1.0 +rpds-py==0.30.0 +seaborn==0.13.2 +Send2Trash==2.1.0 +setuptools==82.0.1 six==1.17.0 +soupsieve==2.8.4 +stack-data==0.6.3 +terminado==0.18.1 +tinycss2==1.4.0 +tornado==6.5.6 +tqdm==4.67.3 +traitlets==5.15.0 +typing_extensions==4.15.0 tzdata==2026.2 +uri-template==1.3.0 +urllib3==2.7.0 +wcwidth==0.7.0 +webcolors==25.10.0 +webencodings==0.5.1 +websocket-client==1.9.0 diff --git a/week2_analysis.ipynb b/week2_analysis.ipynb new file mode 100644 index 0000000..eb2b716 --- /dev/null +++ b/week2_analysis.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "e7297b19-5736-4a17-911d-645849a628a6", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from tqdm import tqdm\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "711ef140-61f6-4b64-8fcf-598247617d65", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"movies.csv\")\n", + "\n", + "print(\"Размер таблицы:\", df.shape)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d269a73-b428-421b-a9e7-e1158368d099", + "metadata": {}, + "outputs": [], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "965d5583-2cc3-409c-8db4-a7a00494a6ed", + "metadata": {}, + "outputs": [], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c74ac668-6c80-49c8-8b72-eea13fdf5ded", + "metadata": {}, + "outputs": [], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4940a598-773c-4f6e-8bce-e841404eebc3", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(8, 5))\n", + "sns.histplot(df[\"rating\"], bins=8, kde=True)\n", + "plt.title(\"Распределение рейтингов фильмов\")\n", + "plt.xlabel(\"Рейтинг\")\n", + "plt.ylabel(\"Количество фильмов\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "906ffd6f-e7a2-402a-9a33-40a075cc5685", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(8, 5))\n", + "sns.scatterplot(data=df, x=\"year\", y=\"rating\", hue=\"genre\")\n", + "plt.title(\"Связь года выпуска и рейтинга фильма\")\n", + "plt.xlabel(\"Год выпуска\")\n", + "plt.ylabel(\"Рейтинг\")\n", + "plt.legend(title=\"Жанр\", bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a05e677a-74e7-4a3a-adce-02f8dc04153e", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 5))\n", + "sns.boxplot(data=df, x=\"genre\", y=\"rating\")\n", + "plt.title(\"Распределение рейтингов по жанрам\")\n", + "plt.xlabel(\"Жанр\")\n", + "plt.ylabel(\"Рейтинг\")\n", + "plt.xticks(rotation=45)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55e5ea86-f072-419c-b937-d752f8fec4e3", + "metadata": {}, + "outputs": [], + "source": [ + "ratings = df[\"rating\"].to_numpy()\n", + "\n", + "print(\"Средний рейтинг:\", np.mean(ratings))\n", + "print(\"Минимальный рейтинг:\", np.min(ratings))\n", + "print(\"Максимальный рейтинг:\", np.max(ratings))\n", + "print(\"Стандартное отклонение:\", np.std(ratings))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86546195-d108-4c55-9401-9d7017d96277", + "metadata": {}, + "outputs": [], + "source": [ + "for i in tqdm(range(100)):\n", + " time.sleep(0.01)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}