import os def parse_review_line(line): parts = line.strip().split("|") return { "user_id": parts[0], "movie_title": parts[1], "rating": float(parts[2]), "review_text": parts[3], "watch_date": parts[4], "useful_votes": int(parts[5]), "total_votes": int(parts[6]) } def load_reviews(file_path): if not os.path.exists(file_path): print("Файл не найден") return [] f = open(file_path, "r", encoding="utf-8") reviews = [] for line in f: line = line.strip() if line != "": reviews.append(parse_review_line(line)) f.close() return reviews def filter_by_min_rating(reviews, min_rating): result = [] for r in reviews: if r["rating"] >= min_rating: result.append(r) return result def calculate_average_rating(reviews): if len(reviews) == 0: return 0 total = sum(r["rating"] for r in reviews) return round(total / len(reviews), 2) def get_movie_statistics(reviews): stats = {} for r in reviews: title = r["movie_title"] if title not in stats: stats[title] = {} stats[title]["reviews_count"] = 0 stats[title]["total_rating"] = 0 stats[title]["total_useful_votes"] = 0 stats[title]["reviews_count"] = stats[title]["reviews_count"] + 1 stats[title]["total_rating"] = stats[title]["total_rating"] + r["rating"] stats[title]["total_useful_votes"] = stats[title]["total_useful_votes"] + r["useful_votes"] for title in stats: stats[title]["average_rating"] = round(stats[title]["total_rating"] / stats[title]["reviews_count"], 2) del stats[title]["total_rating"] items = list(stats.items()) items.sort(key=lambda x: x[1]["reviews_count"], reverse=True) result = {} for title, data in items: result[title] = data return result def find_most_useful_review(reviews): if len(reviews) == 0: return {} best = min(reviews, key=lambda x: - (x["useful_votes"] / x["total_votes"]) if x["total_votes"] > 0 else -1) percent = (best["useful_votes"] / best["total_votes"]) * 100 if best["total_votes"] > 0 else 0 return { "movie_title": best["movie_title"], "review_text": best["review_text"], "useful_percentage": round(percent, 2) } def group_reviews_by_month(reviews): months = {} for r in reviews: month = r["watch_date"][:7] if month in months: months[month] = months[month] + 1 else: months[month] = 1 result = dict(sorted(months.items())) return result def filter_by_keywords(reviews, keywords): result = [] for r in reviews: text = r["review_text"].lower() if any(k.lower() in text for k in keywords): result.append(r) return result def get_top_movies_by_rating(reviews, n): data = {} for r in reviews: title = r["movie_title"] if title not in data: data[title] = {} data[title]["total"] = 0 data[title]["count"] = 0 data[title]["total"] = data[title]["total"] + r["rating"] data[title]["count"] = data[title]["count"] + 1 ratings = [] for title in data: if data[title]["count"] >= 2: avg = data[title]["total"] / data[title]["count"] ratings.append((title, round(avg, 2))) ratings.sort(key=lambda x: x[1], reverse=True) return ratings[:n] def generate_review_report(reviews, file_path): if len(reviews) == 0: return total = len(reviews) avg_rating = calculate_average_rating(reviews) movie_stats = get_movie_statistics(reviews) top_movies = get_top_movies_by_rating(reviews, 3) monthly = group_reviews_by_month(reviews) useful_list = [] for r in reviews: if r["total_votes"] > 0: percent = (r["useful_votes"] / r["total_votes"]) * 100 useful_list.append((r, percent)) useful_list.sort(key=lambda x: x[1], reverse=True) f = open(file_path, "w", encoding="utf-8") f.write("ОТЧЕТ ПО РЕЦЕНЗИЯМ\n") f.write("\n") f.write("Всего рецензий: " + str(total) + "\n") f.write("Средний рейтинг: " + str(avg_rating) + "\n") f.write("\n") f.write("ТОП 5 ФИЛЬМОВ ПО КОЛИЧЕСТВУ РЕЦЕНЗИЙ\n") items = list(movie_stats.items()) for i in range(min(5, len(items))): title, s = items[i] f.write(str(i + 1) + ". " + title + " - рецензий: " + str(s["reviews_count"]) + ", средний рейтинг: " + str(s["average_rating"]) + "\n") f.write("\n") f.write("ТОП 3 САМЫХ ПОЛЕЗНЫХ РЕЦЕНЗИЙ ()\n") for i in range(min(3, len(useful_list))): r, p = useful_list[i] f.write(str(i + 1) + ". " + r["movie_title"] + " - " + str(p) + "%\n") f.write(" " + r["review_text"][:100] + "\n") f.write("\n") f.write("ДИНАМИКА ПО МЕСЯЦАМ\n") for m in monthly: f.write(m + ": " + str(monthly[m]) + " рецензий\n") f.write("\n") f.write("ТОП 3 ФИЛЬМА ПО СРЕДНЕМУ РЕЙТИНГУ (мин 2 рецензии)\n") for i in range(len(top_movies)): title, rating = top_movies[i] f.write(str(i + 1) + ". " + title + " - " + str(rating) + "\n") f.close() if not os.path.exists("data"): os.makedirs("data") def main(): reviews = load_reviews("data/data.txt") if len(reviews) == 0: return print("Общее количество рецензий:", len(reviews)) print() print("Средний рейтинг:", calculate_average_rating(reviews)) print() print("Топ 5 фильмов по количеству рецензий:") stats = get_movie_statistics(reviews) count = 0 for title in stats: if count >= 5: break s = stats[title] print(str(count + 1) + ".", title, "-", s["reviews_count"], "рецензий, средний рейтинг:", s["average_rating"]) count = count + 1 print() print("Самая полезная рецензия:") useful = find_most_useful_review(reviews) print("Фильм:", useful["movie_title"]) print("Полезность:", useful["useful_percentage"], "%") print("Текст:", useful["review_text"][:100], "...") print() print("Топ 3 фильма по среднему рейтингу (мин 2 рецензии):") top = get_top_movies_by_rating(reviews, 3) for i in range(len(top)): title, rating = top[i] print(str(i + 1) + ".", title, "-", rating) print() generate_review_report(reviews, "data/report.txt") print("Отчет сохранен в файл data/report.txt") if __name__ == "__main__": main()