ReviewAnalyzer/reviews.py

181 lines
4.6 KiB
Python

import os.path
import string
def load_reviews(filepath: str) -> list:
reviews = []
if not os.path.exists(filepath):
print("Файл отзывов не найден!")
return reviews
with open(filepath, "r", encoding="utf-8") as file:
for line in file:
line = line.strip()
parts = line.split("|")
username, rating, text = parts
reviews.append({
"username": username.strip(),
"rating": int(rating),
"text": text.strip()
})
return reviews
def filter_by_rating(reviews: list, min_rating: int, max_rating: int) -> list:
filtered_reviews = []
for rev in reviews:
rating = rev["rating"]
if min_rating <= rating <= max_rating:
filtered_reviews.append(rev)
return filtered_reviews
def get_average_rating(reviews: list) -> float:
if not reviews:
return 0.0
total = sum(rev["rating"] for rev in reviews)
avg = total / len(reviews)
return round(avg, 2)
def count_words_in_review(review: dict) -> int:
text = review.get("text", "")
words = text.split()
return len(words)
def find_longest_review(reviews: list) -> dict:
if not reviews:
return {}
return max(reviews, key=count_words_in_review)
def build_word_frequency(reviews: list) -> dict:
freq = {}
translator = str.maketrans("", "", string.punctuation)
for review in reviews:
text = review["text"].lower()
text = text.translate(translator)
words = text.split()
for word in words:
freq[word] = freq.get(word, 0) + 1
return freq
def get_top_words(freq_dict: dict, n: int) -> list:
sorted_words = sorted(
freq_dict.items(),
key=lambda x: x[1],
reverse=True
)
return sorted_words[:n]
def group_reviews_by_rating(reviews: list) -> dict:
grouped = {
item: []
for item in range(1, 6)
}
for rev in reviews:
rating = rev["rating"]
grouped[rating].append(rev)
return grouped
def format_summary(reviews: list, top_words: list, avg_rating: float) -> str:
total_reviews = len(reviews)
usernames = sorted({
rev["username"]
for rev in reviews
})
report_lines = [
"=== Отчёт по отзывам ===",
"Общее количество отзывов: " + str(total_reviews),
"Средний рейтинг: " + str(avg_rating),
"",
"Топ слов:"
]
for word, count in top_words:
report_lines.append("- " + word + ": " + str(count))
report_lines.append("")
report_lines.append("Пользователи:")
for user in usernames:
report_lines.append("- " + user)
result = "\n".join(report_lines)
return result
def save_report(report: str, output_path: str) -> None:
directory = os.path.dirname(output_path)
if directory and not os.path.exists(directory):
os.makedirs(directory)
with open(output_path, "w", encoding="utf-8") as file:
file.write(report)
def main():
filepath = "data/data.txt"
# Загрузка отзывов из файла
reviews = load_reviews(filepath)
print("Загружено отзывов:", len(reviews))
# Средний рейтинг
avg_rating = get_average_rating(reviews)
print("Средний рейтинг: " + str(avg_rating))
# Фильтрация по рейтингу
positive = filter_by_rating(reviews, 4, 5)
negative = filter_by_rating(reviews, 1, 2)
print("\nПоложительных отзывов:", len(positive))
print("Отрицательных отзывов:", len(negative))
# Самый длинный отзыв
longest = find_longest_review(reviews)
if longest:
print("\nСамый длинный отзыв:")
print(longest['username'] + ": " + longest['text'])
# Топ-5 слов
freq = build_word_frequency(reviews)
top_words = get_top_words(freq, 5)
print("\nТоп-5 слов:")
for word, count in top_words:
print(str(word) + ": " + str(count))
# Группировка
grouped = group_reviews_by_rating(reviews)
print("\nКоличество отзывов по рейтингам:")
for rating in range(1, 6):
print(str(rating) + ": " + str(len(grouped[rating])))
# Отчёт
report = format_summary(reviews, top_words, avg_rating)
print("\n" + report)
# Сохранение отчёта
save_report(report, "data/report.txt")
print("\nПрограмма завершена!")
if __name__ == '__main__':
main()