181 lines
4.6 KiB
Python
181 lines
4.6 KiB
Python
import os.path
|
|
import string
|
|
|
|
|
|
def load_reviews(filepath: str) -> list:
|
|
reviews = []
|
|
if not os.path.exists(filepath):
|
|
print("Файл отзывов не найден!")
|
|
return reviews
|
|
with open(filepath, "r", encoding="utf-8") as file:
|
|
for line in file:
|
|
line = line.strip()
|
|
parts = line.split("|")
|
|
username, rating, text = parts
|
|
reviews.append({
|
|
"username": username.strip(),
|
|
"rating": int(rating),
|
|
"text": text.strip()
|
|
})
|
|
return reviews
|
|
|
|
|
|
def filter_by_rating(reviews: list, min_rating: int, max_rating: int) -> list:
|
|
filtered_reviews = []
|
|
for rev in reviews:
|
|
rating = rev["rating"]
|
|
if min_rating <= rating <= max_rating:
|
|
filtered_reviews.append(rev)
|
|
return filtered_reviews
|
|
|
|
|
|
def get_average_rating(reviews: list) -> float:
|
|
if not reviews:
|
|
return 0.0
|
|
|
|
total = sum(rev["rating"] for rev in reviews)
|
|
avg = total / len(reviews)
|
|
return round(avg, 2)
|
|
|
|
|
|
def count_words_in_review(review: dict) -> int:
|
|
text = review.get("text", "")
|
|
words = text.split()
|
|
return len(words)
|
|
|
|
|
|
def find_longest_review(reviews: list) -> dict:
|
|
if not reviews:
|
|
return {}
|
|
|
|
return max(reviews, key=count_words_in_review)
|
|
|
|
|
|
def build_word_frequency(reviews: list) -> dict:
|
|
freq = {}
|
|
translator = str.maketrans("", "", string.punctuation)
|
|
|
|
for review in reviews:
|
|
text = review["text"].lower()
|
|
text = text.translate(translator)
|
|
words = text.split()
|
|
|
|
for word in words:
|
|
freq[word] = freq.get(word, 0) + 1
|
|
|
|
return freq
|
|
|
|
|
|
def get_top_words(freq_dict: dict, n: int) -> list:
|
|
sorted_words = sorted(
|
|
freq_dict.items(),
|
|
key=lambda x: x[1],
|
|
reverse=True
|
|
)
|
|
return sorted_words[:n]
|
|
|
|
|
|
def group_reviews_by_rating(reviews: list) -> dict:
|
|
grouped = {
|
|
item: []
|
|
for item in range(1, 6)
|
|
}
|
|
|
|
for rev in reviews:
|
|
rating = rev["rating"]
|
|
grouped[rating].append(rev)
|
|
|
|
return grouped
|
|
|
|
|
|
def format_summary(reviews: list, top_words: list, avg_rating: float) -> str:
|
|
total_reviews = len(reviews)
|
|
usernames = sorted({
|
|
rev["username"]
|
|
for rev in reviews
|
|
})
|
|
|
|
report_lines = [
|
|
"=== Отчёт по отзывам ===",
|
|
"Общее количество отзывов: " + str(total_reviews),
|
|
"Средний рейтинг: " + str(avg_rating),
|
|
"",
|
|
"Топ слов:"
|
|
]
|
|
|
|
for word, count in top_words:
|
|
report_lines.append("- " + word + ": " + str(count))
|
|
|
|
report_lines.append("")
|
|
report_lines.append("Пользователи:")
|
|
|
|
for user in usernames:
|
|
report_lines.append("- " + user)
|
|
|
|
result = "\n".join(report_lines)
|
|
|
|
return result
|
|
|
|
|
|
def save_report(report: str, output_path: str) -> None:
|
|
directory = os.path.dirname(output_path)
|
|
|
|
if directory and not os.path.exists(directory):
|
|
os.makedirs(directory)
|
|
|
|
with open(output_path, "w", encoding="utf-8") as file:
|
|
file.write(report)
|
|
|
|
|
|
def main():
|
|
filepath = "data/data.txt"
|
|
|
|
# Загрузка отзывов из файла
|
|
reviews = load_reviews(filepath)
|
|
print("Загружено отзывов:", len(reviews))
|
|
|
|
# Средний рейтинг
|
|
avg_rating = get_average_rating(reviews)
|
|
print("Средний рейтинг: " + str(avg_rating))
|
|
|
|
# Фильтрация по рейтингу
|
|
positive = filter_by_rating(reviews, 4, 5)
|
|
negative = filter_by_rating(reviews, 1, 2)
|
|
|
|
print("\nПоложительных отзывов:", len(positive))
|
|
print("Отрицательных отзывов:", len(negative))
|
|
|
|
# Самый длинный отзыв
|
|
longest = find_longest_review(reviews)
|
|
if longest:
|
|
print("\nСамый длинный отзыв:")
|
|
print(longest['username'] + ": " + longest['text'])
|
|
|
|
# Топ-5 слов
|
|
freq = build_word_frequency(reviews)
|
|
top_words = get_top_words(freq, 5)
|
|
|
|
print("\nТоп-5 слов:")
|
|
for word, count in top_words:
|
|
print(str(word) + ": " + str(count))
|
|
|
|
# Группировка
|
|
grouped = group_reviews_by_rating(reviews)
|
|
|
|
print("\nКоличество отзывов по рейтингам:")
|
|
for rating in range(1, 6):
|
|
print(str(rating) + ": " + str(len(grouped[rating])))
|
|
|
|
# Отчёт
|
|
report = format_summary(reviews, top_words, avg_rating)
|
|
print("\n" + report)
|
|
|
|
# Сохранение отчёта
|
|
save_report(report, "data/report.txt")
|
|
|
|
print("\nПрограмма завершена!")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|