200 lines
5.4 KiB
Python
200 lines
5.4 KiB
Python
import os.path
|
||
import string
|
||
|
||
|
||
def load_reviews(filepath: str) -> list:
|
||
reviews = []
|
||
if not os.path.exists(filepath):
|
||
print("Файл отзывов не найден!")
|
||
return reviews
|
||
with open(filepath, "r", encoding="utf-8") as file:
|
||
for line in file:
|
||
line = line.strip()
|
||
if not line:
|
||
continue
|
||
parts = line.split("|")
|
||
if len(parts) != 3:
|
||
print("Пропущена строка с неверным форматом:", line)
|
||
continue
|
||
username, rating, text = parts
|
||
try:
|
||
rating_int = int(rating.strip())
|
||
except ValueError:
|
||
print("Неверный рейтинг:", line)
|
||
continue
|
||
if not (1 <= rating_int <= 5):
|
||
print("Рейтинг вне диапазона [1–5]:", line)
|
||
continue
|
||
reviews.append({
|
||
"username": username.strip(),
|
||
"rating": rating_int,
|
||
"text": text.strip()
|
||
})
|
||
return reviews
|
||
|
||
|
||
def filter_by_rating(reviews: list, min_rating: int, max_rating: int) -> list:
|
||
filtered_reviews = []
|
||
for rev in reviews:
|
||
rating = rev["rating"]
|
||
if min_rating <= rating <= max_rating:
|
||
filtered_reviews.append(rev)
|
||
return filtered_reviews
|
||
|
||
|
||
def get_average_rating(reviews: list) -> float:
|
||
if not reviews:
|
||
return 0.0
|
||
|
||
total = sum(rev["rating"] for rev in reviews)
|
||
avg = total / len(reviews)
|
||
return round(avg, 2)
|
||
|
||
|
||
def count_words_in_review(review: dict) -> int:
|
||
text = review.get("text", "")
|
||
words = text.split()
|
||
return len(words)
|
||
|
||
|
||
def find_longest_review(reviews: list) -> dict:
|
||
if not reviews:
|
||
return {}
|
||
longest = reviews[0]
|
||
max_words = count_words_in_review(longest)
|
||
for rev in reviews[1:]:
|
||
words = count_words_in_review(rev)
|
||
if words > max_words:
|
||
max_words = words
|
||
longest = rev
|
||
return longest
|
||
|
||
|
||
def build_word_frequency(reviews: list) -> dict:
|
||
freq = {}
|
||
translator = str.maketrans("", "", string.punctuation)
|
||
|
||
for review in reviews:
|
||
text = review["text"].lower()
|
||
text = text.translate(translator)
|
||
words = text.split()
|
||
|
||
for word in words:
|
||
freq[word] = freq.get(word, 0) + 1
|
||
|
||
return freq
|
||
|
||
|
||
def get_top_words(freq_dict: dict, n: int) -> list:
|
||
sorted_words = sorted(
|
||
freq_dict.items(),
|
||
key=lambda x: x[1],
|
||
reverse=True
|
||
)
|
||
return sorted_words[:n]
|
||
|
||
|
||
def group_reviews_by_rating(reviews: list) -> dict:
|
||
grouped = {
|
||
item: []
|
||
for item in range(1, 6)
|
||
}
|
||
|
||
for rev in reviews:
|
||
rating = rev["rating"]
|
||
grouped[rating].append(rev)
|
||
|
||
return grouped
|
||
|
||
|
||
def format_summary(reviews: list, top_words: list, avg_rating: float) -> str:
|
||
total_reviews = len(reviews)
|
||
usernames = sorted({
|
||
rev["username"]
|
||
for rev in reviews
|
||
})
|
||
|
||
report_lines = [
|
||
"=== Отчёт по отзывам ===",
|
||
"Общее количество отзывов: " + str(total_reviews),
|
||
"Средний рейтинг: " + str(avg_rating),
|
||
"",
|
||
"Топ слов:"
|
||
]
|
||
|
||
for word, count in top_words:
|
||
report_lines.append("- " + word + ": " + str(count))
|
||
|
||
report_lines.append("")
|
||
report_lines.append("Пользователи:")
|
||
|
||
for user in usernames:
|
||
report_lines.append("- " + user)
|
||
|
||
result = "\n".join(report_lines)
|
||
|
||
return result
|
||
|
||
|
||
def save_report(report: str, output_path: str) -> None:
|
||
directory = os.path.dirname(output_path)
|
||
|
||
if directory and not os.path.exists(directory):
|
||
os.makedirs(directory)
|
||
|
||
with open(output_path, "w", encoding="utf-8") as file:
|
||
file.write(report)
|
||
|
||
|
||
def main():
|
||
filepath = "data/data.txt"
|
||
|
||
# Загрузка отзывов из файла
|
||
reviews = load_reviews(filepath)
|
||
print("Загружено отзывов:", len(reviews))
|
||
|
||
# Средний рейтинг
|
||
avg_rating = get_average_rating(reviews)
|
||
print("Средний рейтинг: " + str(avg_rating))
|
||
|
||
# Фильтрация по рейтингу
|
||
positive = filter_by_rating(reviews, 4, 5)
|
||
negative = filter_by_rating(reviews, 1, 2)
|
||
|
||
print("\nПоложительных отзывов:", len(positive))
|
||
print("Отрицательных отзывов:", len(negative))
|
||
|
||
# Самый длинный отзыв
|
||
longest = find_longest_review(reviews)
|
||
if longest:
|
||
print("\nСамый длинный отзыв:")
|
||
print(longest['username'] + ": " + longest['text'])
|
||
|
||
# Топ-5 слов
|
||
freq = build_word_frequency(reviews)
|
||
top_words = get_top_words(freq, 5)
|
||
|
||
print("\nТоп-5 слов:")
|
||
for word, count in top_words:
|
||
print(str(word) + ": " + str(count))
|
||
|
||
# Группировка
|
||
grouped = group_reviews_by_rating(reviews)
|
||
|
||
print("\nКоличество отзывов по рейтингам:")
|
||
for rating in range(1, 6):
|
||
print(str(rating) + ": " + str(len(grouped[rating])))
|
||
|
||
# Отчёт
|
||
report = format_summary(reviews, top_words, avg_rating)
|
||
print("\n" + report)
|
||
|
||
# Сохранение отчёта
|
||
save_report(report, "data/report.txt")
|
||
|
||
print("\nПрограмма завершена!")
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|