rewiews/reviews.py

217 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
def parse_review_line(line):
parts = line.strip().split("|")
return {
"user_id": parts[0],
"movie_title": parts[1],
"rating": float(parts[2]),
"review_text": parts[3],
"watch_date": parts[4],
"useful_votes": int(parts[5]),
"total_votes": int(parts[6])
}
def load_reviews(file_path):
if not os.path.exists(file_path):
print("Файл не найден")
return []
f = open(file_path, "r", encoding="utf-8")
reviews = []
for line in f:
line = line.strip()
if line != "":
reviews.append(parse_review_line(line))
f.close()
return reviews
def filter_by_min_rating(reviews, min_rating):
result = []
for r in reviews:
if r["rating"] >= min_rating:
result.append(r)
return result
def calculate_average_rating(reviews):
if len(reviews) == 0:
return 0
total = sum(r["rating"] for r in reviews)
return round(total / len(reviews), 2)
def get_movie_statistics(reviews):
stats = {}
for r in reviews:
title = r["movie_title"]
if title not in stats:
stats[title] = {}
stats[title]["reviews_count"] = 0
stats[title]["total_rating"] = 0
stats[title]["total_useful_votes"] = 0
stats[title]["reviews_count"] = stats[title]["reviews_count"] + 1
stats[title]["total_rating"] = stats[title]["total_rating"] + r["rating"]
stats[title]["total_useful_votes"] = stats[title]["total_useful_votes"] + r["useful_votes"]
for title in stats:
stats[title]["average_rating"] = round(stats[title]["total_rating"] / stats[title]["reviews_count"], 2)
del stats[title]["total_rating"]
items = list(stats.items())
items.sort(key=lambda x: x[1]["reviews_count"], reverse=True)
result = {}
for title, data in items:
result[title] = data
return result
def find_most_useful_review(reviews):
if len(reviews) == 0:
return {}
best = min(reviews, key=lambda x: - (x["useful_votes"] / x["total_votes"]) if x["total_votes"] > 0 else -1)
percent = (best["useful_votes"] / best["total_votes"]) * 100 if best["total_votes"] > 0 else 0
return {
"movie_title": best["movie_title"],
"review_text": best["review_text"],
"useful_percentage": round(percent, 2)
}
def group_reviews_by_month(reviews):
months = {}
for r in reviews:
month = r["watch_date"][:7]
if month in months:
months[month] = months[month] + 1
else:
months[month] = 1
result = dict(sorted(months.items()))
return result
def filter_by_keywords(reviews, keywords):
result = []
for r in reviews:
text = r["review_text"].lower()
if any(k.lower() in text for k in keywords):
result.append(r)
return result
def get_top_movies_by_rating(reviews, n):
data = {}
for r in reviews:
title = r["movie_title"]
if title not in data:
data[title] = {}
data[title]["total"] = 0
data[title]["count"] = 0
data[title]["total"] = data[title]["total"] + r["rating"]
data[title]["count"] = data[title]["count"] + 1
ratings = []
for title in data:
if data[title]["count"] >= 2:
avg = data[title]["total"] / data[title]["count"]
ratings.append((title, round(avg, 2)))
ratings.sort(key=lambda x: x[1], reverse=True)
return ratings[:n]
def generate_review_report(reviews, file_path):
if len(reviews) == 0:
return
total = len(reviews)
avg_rating = calculate_average_rating(reviews)
movie_stats = get_movie_statistics(reviews)
top_movies = get_top_movies_by_rating(reviews, 3)
monthly = group_reviews_by_month(reviews)
useful_list = []
for r in reviews:
if r["total_votes"] > 0:
percent = (r["useful_votes"] / r["total_votes"]) * 100
useful_list.append((r, percent))
useful_list.sort(key=lambda x: x[1], reverse=True)
f = open(file_path, "w", encoding="utf-8")
f.write("ОТЧЕТ ПО РЕЦЕНЗИЯМ\n")
f.write("\n")
f.write("Всего рецензий: " + str(total) + "\n")
f.write("Средний рейтинг: " + str(avg_rating) + "\n")
f.write("\n")
f.write("ТОП 5 ФИЛЬМОВ ПО КОЛИЧЕСТВУ РЕЦЕНЗИЙ\n")
items = list(movie_stats.items())
for i in range(min(5, len(items))):
title, s = items[i]
f.write(str(i + 1) + ". " + title + " - рецензий: " + str(s["reviews_count"]) + ", средний рейтинг: " + str(s["average_rating"]) + "\n")
f.write("\n")
f.write("ТОП 3 САМЫХ ПОЛЕЗНЫХ РЕЦЕНЗИЙ ()\n")
for i in range(min(3, len(useful_list))):
r, p = useful_list[i]
f.write(str(i + 1) + ". " + r["movie_title"] + " - " + str(p) + "%\n")
f.write(" " + r["review_text"][:100] + "\n")
f.write("\n")
f.write("ДИНАМИКА ПО МЕСЯЦАМ\n")
for m in monthly:
f.write(m + ": " + str(monthly[m]) + " рецензий\n")
f.write("\n")
f.write("ТОП 3 ФИЛЬМА ПО СРЕДНЕМУ РЕЙТИНГУ (мин 2 рецензии)\n")
for i in range(len(top_movies)):
title, rating = top_movies[i]
f.write(str(i + 1) + ". " + title + " - " + str(rating) + "\n")
f.close()
if not os.path.exists("data"):
os.makedirs("data")
def main():
reviews = load_reviews("data/data.txt")
if len(reviews) == 0:
return
print("Общее количество рецензий:", len(reviews))
print()
print("Средний рейтинг:", calculate_average_rating(reviews))
print()
print("Топ 5 фильмов по количеству рецензий:")
stats = get_movie_statistics(reviews)
count = 0
for title in stats:
if count >= 5:
break
s = stats[title]
print(str(count + 1) + ".", title, "-", s["reviews_count"], "рецензий, средний рейтинг:", s["average_rating"])
count = count + 1
print()
print("Самая полезная рецензия:")
useful = find_most_useful_review(reviews)
print("Фильм:", useful["movie_title"])
print("Полезность:", useful["useful_percentage"], "%")
print("Текст:", useful["review_text"][:100], "...")
print()
print("Топ 3 фильма по среднему рейтингу (мин 2 рецензии):")
top = get_top_movies_by_rating(reviews, 3)
for i in range(len(top)):
title, rating = top[i]
print(str(i + 1) + ".", title, "-", rating)
print()
generate_review_report(reviews, "data/report.txt")
print("Отчет сохранен в файл data/report.txt")
if __name__ == "__main__":
main()