ReviewAnalyzer/reviews.py

200 lines
5.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os.path
import string
def load_reviews(filepath: str) -> list:
reviews = []
if not os.path.exists(filepath):
print("Файл отзывов не найден!")
return reviews
with open(filepath, "r", encoding="utf-8") as file:
for line in file:
line = line.strip()
if not line:
continue
parts = line.split("|")
if len(parts) != 3:
print("Пропущена строка с неверным форматом:", line)
continue
username, rating, text = parts
try:
rating_int = int(rating.strip())
except ValueError:
print("Неверный рейтинг:", line)
continue
if not (1 <= rating_int <= 5):
print("Рейтинг вне диапазона [15]:", line)
continue
reviews.append({
"username": username.strip(),
"rating": rating_int,
"text": text.strip()
})
return reviews
def filter_by_rating(reviews: list, min_rating: int, max_rating: int) -> list:
filtered_reviews = []
for rev in reviews:
rating = rev["rating"]
if min_rating <= rating <= max_rating:
filtered_reviews.append(rev)
return filtered_reviews
def get_average_rating(reviews: list) -> float:
if not reviews:
return 0.0
total = sum(rev["rating"] for rev in reviews)
avg = total / len(reviews)
return round(avg, 2)
def count_words_in_review(review: dict) -> int:
text = review.get("text", "")
words = text.split()
return len(words)
def find_longest_review(reviews: list) -> dict:
if not reviews:
return {}
longest = reviews[0]
max_words = count_words_in_review(longest)
for rev in reviews[1:]:
words = count_words_in_review(rev)
if words > max_words:
max_words = words
longest = rev
return longest
def build_word_frequency(reviews: list) -> dict:
freq = {}
translator = str.maketrans("", "", string.punctuation)
for review in reviews:
text = review["text"].lower()
text = text.translate(translator)
words = text.split()
for word in words:
freq[word] = freq.get(word, 0) + 1
return freq
def get_top_words(freq_dict: dict, n: int) -> list:
sorted_words = sorted(
freq_dict.items(),
key=lambda x: x[1],
reverse=True
)
return sorted_words[:n]
def group_reviews_by_rating(reviews: list) -> dict:
grouped = {
item: []
for item in range(1, 6)
}
for rev in reviews:
rating = rev["rating"]
grouped[rating].append(rev)
return grouped
def format_summary(reviews: list, top_words: list, avg_rating: float) -> str:
total_reviews = len(reviews)
usernames = sorted({
rev["username"]
for rev in reviews
})
report_lines = [
"=== Отчёт по отзывам ===",
"Общее количество отзывов: " + str(total_reviews),
"Средний рейтинг: " + str(avg_rating),
"",
"Топ слов:"
]
for word, count in top_words:
report_lines.append("- " + word + ": " + str(count))
report_lines.append("")
report_lines.append("Пользователи:")
for user in usernames:
report_lines.append("- " + user)
result = "\n".join(report_lines)
return result
def save_report(report: str, output_path: str) -> None:
directory = os.path.dirname(output_path)
if directory and not os.path.exists(directory):
os.makedirs(directory)
with open(output_path, "w", encoding="utf-8") as file:
file.write(report)
def main():
filepath = "data/data.txt"
# Загрузка отзывов из файла
reviews = load_reviews(filepath)
print("Загружено отзывов:", len(reviews))
# Средний рейтинг
avg_rating = get_average_rating(reviews)
print("Средний рейтинг: " + str(avg_rating))
# Фильтрация по рейтингу
positive = filter_by_rating(reviews, 4, 5)
negative = filter_by_rating(reviews, 1, 2)
print("\nПоложительных отзывов:", len(positive))
print("Отрицательных отзывов:", len(negative))
# Самый длинный отзыв
longest = find_longest_review(reviews)
if longest:
print("\nСамый длинный отзыв:")
print(longest['username'] + ": " + longest['text'])
# Топ-5 слов
freq = build_word_frequency(reviews)
top_words = get_top_words(freq, 5)
print("\nТоп-5 слов:")
for word, count in top_words:
print(str(word) + ": " + str(count))
# Группировка
grouped = group_reviews_by_rating(reviews)
print("\nКоличество отзывов по рейтингам:")
for rating in range(1, 6):
print(str(rating) + ": " + str(len(grouped[rating])))
# Отчёт
report = format_summary(reviews, top_words, avg_rating)
print("\n" + report)
# Сохранение отчёта
save_report(report, "data/report.txt")
print("\nПрограмма завершена!")
if __name__ == '__main__':
main()