diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/data/students_performance.csv b/data/students_performance.csv new file mode 100644 index 0000000..4fe7063 --- /dev/null +++ b/data/students_performance.csv @@ -0,0 +1,1001 @@ +"gender","race/ethnicity","parental level of education","lunch","test preparation course","math score","reading score","writing score" +"female","group B","bachelor's degree","standard","none","72","72","74" +"female","group C","some college","standard","completed","69","90","88" +"female","group B","master's degree","standard","none","90","95","93" +"male","group A","associate's degree","free/reduced","none","47","57","44" +"male","group C","some college","standard","none","76","78","75" +"female","group B","associate's degree","standard","none","71","83","78" +"female","group B","some college","standard","completed","88","95","92" +"male","group B","some college","free/reduced","none","40","43","39" +"male","group D","high school","free/reduced","completed","64","64","67" +"female","group B","high school","free/reduced","none","38","60","50" +"male","group C","associate's degree","standard","none","58","54","52" +"male","group D","associate's degree","standard","none","40","52","43" +"female","group B","high school","standard","none","65","81","73" +"male","group A","some college","standard","completed","78","72","70" +"female","group A","master's degree","standard","none","50","53","58" +"female","group C","some high school","standard","none","69","75","78" +"male","group C","high school","standard","none","88","89","86" +"female","group B","some high school","free/reduced","none","18","32","28" +"male","group C","master's degree","free/reduced","completed","46","42","46" +"female","group C","associate's degree","free/reduced","none","54","58","61" +"male","group D","high school","standard","none","66","69","63" +"female","group B","some college","free/reduced","completed","65","75","70" +"male","group D","some college","standard","none","44","54","53" +"female","group C","some high school","standard","none","69","73","73" +"male","group D","bachelor's degree","free/reduced","completed","74","71","80" +"male","group A","master's degree","free/reduced","none","73","74","72" +"male","group B","some college","standard","none","69","54","55" +"female","group C","bachelor's degree","standard","none","67","69","75" +"male","group C","high school","standard","none","70","70","65" +"female","group D","master's degree","standard","none","62","70","75" +"female","group D","some college","standard","none","69","74","74" +"female","group B","some college","standard","none","63","65","61" +"female","group E","master's degree","free/reduced","none","56","72","65" +"male","group D","some college","standard","none","40","42","38" +"male","group E","some college","standard","none","97","87","82" +"male","group E","associate's degree","standard","completed","81","81","79" +"female","group D","associate's degree","standard","none","74","81","83" +"female","group D","some high school","free/reduced","none","50","64","59" +"female","group D","associate's degree","free/reduced","completed","75","90","88" +"male","group B","associate's degree","free/reduced","none","57","56","57" +"male","group C","associate's degree","free/reduced","none","55","61","54" +"female","group C","associate's degree","standard","none","58","73","68" +"female","group B","associate's degree","standard","none","53","58","65" +"male","group B","some college","free/reduced","completed","59","65","66" +"female","group E","associate's degree","free/reduced","none","50","56","54" +"male","group B","associate's degree","standard","none","65","54","57" +"female","group A","associate's degree","standard","completed","55","65","62" +"female","group C","high school","standard","none","66","71","76" +"female","group D","associate's degree","free/reduced","completed","57","74","76" +"male","group C","high school","standard","completed","82","84","82" +"male","group E","some college","standard","none","53","55","48" +"male","group E","associate's degree","free/reduced","completed","77","69","68" +"male","group C","some college","standard","none","53","44","42" +"male","group D","high school","standard","none","88","78","75" +"female","group C","some high school","free/reduced","completed","71","84","87" +"female","group C","high school","free/reduced","none","33","41","43" +"female","group E","associate's degree","standard","completed","82","85","86" +"male","group D","associate's degree","standard","none","52","55","49" +"male","group D","some college","standard","completed","58","59","58" +"female","group C","some high school","free/reduced","none","0","17","10" +"male","group E","bachelor's degree","free/reduced","completed","79","74","72" +"male","group A","some high school","free/reduced","none","39","39","34" +"male","group A","associate's degree","free/reduced","none","62","61","55" +"female","group C","associate's degree","standard","none","69","80","71" +"female","group D","some high school","standard","none","59","58","59" +"male","group B","some high school","standard","none","67","64","61" +"male","group D","some high school","free/reduced","none","45","37","37" +"female","group C","some college","standard","none","60","72","74" +"male","group B","associate's degree","free/reduced","none","61","58","56" +"female","group C","associate's degree","standard","none","39","64","57" +"female","group D","some college","free/reduced","completed","58","63","73" +"male","group D","some college","standard","completed","63","55","63" +"female","group A","associate's degree","free/reduced","none","41","51","48" +"male","group C","some high school","free/reduced","none","61","57","56" +"male","group C","some high school","standard","none","49","49","41" +"male","group B","associate's degree","free/reduced","none","44","41","38" +"male","group E","some high school","standard","none","30","26","22" +"male","group A","bachelor's degree","standard","completed","80","78","81" +"female","group D","some high school","standard","completed","61","74","72" +"female","group E","master's degree","standard","none","62","68","68" +"female","group B","associate's degree","standard","none","47","49","50" +"male","group B","high school","free/reduced","none","49","45","45" +"male","group A","some college","free/reduced","completed","50","47","54" +"male","group E","associate's degree","standard","none","72","64","63" +"male","group D","high school","free/reduced","none","42","39","34" +"female","group C","some college","standard","none","73","80","82" +"female","group C","some college","free/reduced","none","76","83","88" +"female","group D","associate's degree","standard","none","71","71","74" +"female","group A","some college","standard","none","58","70","67" +"female","group D","some high school","standard","none","73","86","82" +"female","group C","bachelor's degree","standard","none","65","72","74" +"male","group C","high school","free/reduced","none","27","34","36" +"male","group C","high school","standard","none","71","79","71" +"male","group C","associate's degree","free/reduced","completed","43","45","50" +"female","group B","some college","standard","none","79","86","92" +"male","group C","associate's degree","free/reduced","completed","78","81","82" +"male","group B","some high school","standard","completed","65","66","62" +"female","group E","some college","standard","completed","63","72","70" +"female","group D","some college","free/reduced","none","58","67","62" +"female","group D","bachelor's degree","standard","none","65","67","62" +"male","group B","some college","standard","none","79","67","67" +"male","group D","bachelor's degree","standard","completed","68","74","74" +"female","group D","associate's degree","standard","none","85","91","89" +"male","group B","high school","standard","completed","60","44","47" +"male","group C","some college","standard","completed","98","86","90" +"female","group C","some college","standard","none","58","67","72" +"female","group D","master's degree","standard","none","87","100","100" +"male","group E","associate's degree","standard","completed","66","63","64" +"female","group B","associate's degree","free/reduced","none","52","76","70" +"female","group B","some high school","standard","none","70","64","72" +"female","group D","associate's degree","free/reduced","completed","77","89","98" +"male","group C","high school","standard","none","62","55","49" +"male","group A","associate's degree","standard","none","54","53","47" +"female","group D","some college","standard","none","51","58","54" +"female","group E","bachelor's degree","standard","completed","99","100","100" +"male","group C","high school","standard","none","84","77","74" +"female","group B","bachelor's degree","free/reduced","none","75","85","82" +"female","group D","bachelor's degree","standard","none","78","82","79" +"female","group D","some high school","standard","none","51","63","61" +"female","group C","some college","standard","none","55","69","65" +"female","group C","bachelor's degree","standard","completed","79","92","89" +"male","group B","associate's degree","standard","completed","91","89","92" +"female","group C","some college","standard","completed","88","93","93" +"male","group D","high school","free/reduced","none","63","57","56" +"male","group E","some college","standard","none","83","80","73" +"female","group B","high school","standard","none","87","95","86" +"male","group B","some high school","standard","none","72","68","67" +"male","group D","some college","standard","completed","65","77","74" +"male","group D","master's degree","standard","none","82","82","74" +"female","group A","bachelor's degree","standard","none","51","49","51" +"male","group D","master's degree","standard","none","89","84","82" +"male","group C","some high school","free/reduced","completed","53","37","40" +"male","group E","some college","free/reduced","completed","87","74","70" +"female","group C","some college","standard","completed","75","81","84" +"male","group D","bachelor's degree","free/reduced","completed","74","79","75" +"male","group C","bachelor's degree","standard","none","58","55","48" +"male","group B","some high school","standard","completed","51","54","41" +"male","group E","high school","standard","none","70","55","56" +"female","group C","associate's degree","standard","none","59","66","67" +"male","group D","some college","standard","completed","71","61","69" +"female","group D","some high school","standard","none","76","72","71" +"female","group C","some college","free/reduced","none","59","62","64" +"female","group E","some college","free/reduced","completed","42","55","54" +"male","group A","high school","standard","none","57","43","47" +"male","group D","some college","standard","none","88","73","78" +"female","group C","some college","free/reduced","none","22","39","33" +"male","group B","some high school","standard","none","88","84","75" +"male","group C","associate's degree","free/reduced","none","73","68","66" +"female","group D","bachelor's degree","standard","completed","68","75","81" +"male","group E","associate's degree","free/reduced","completed","100","100","93" +"male","group A","some high school","standard","completed","62","67","69" +"male","group A","bachelor's degree","standard","none","77","67","68" +"female","group B","associate's degree","standard","completed","59","70","66" +"male","group D","bachelor's degree","standard","none","54","49","47" +"male","group D","some high school","standard","none","62","67","61" +"female","group C","some college","standard","completed","70","89","88" +"female","group E","high school","free/reduced","completed","66","74","78" +"male","group B","some college","free/reduced","none","60","60","60" +"female","group B","associate's degree","standard","completed","61","86","87" +"male","group D","associate's degree","free/reduced","none","66","62","64" +"male","group B","associate's degree","free/reduced","completed","82","78","74" +"female","group E","some college","free/reduced","completed","75","88","85" +"male","group B","master's degree","free/reduced","none","49","53","52" +"male","group C","high school","standard","none","52","53","49" +"female","group E","master's degree","standard","none","81","92","91" +"female","group C","bachelor's degree","standard","completed","96","100","100" +"male","group C","high school","free/reduced","completed","53","51","51" +"female","group B","master's degree","free/reduced","completed","58","76","78" +"female","group B","high school","standard","completed","68","83","78" +"female","group C","some college","free/reduced","completed","67","75","70" +"male","group A","high school","standard","completed","72","73","74" +"male","group E","some high school","standard","none","94","88","78" +"female","group D","some college","standard","none","79","86","81" +"female","group C","associate's degree","standard","none","63","67","70" +"female","group C","bachelor's degree","free/reduced","completed","43","51","54" +"female","group C","master's degree","standard","completed","81","91","87" +"female","group B","high school","free/reduced","completed","46","54","58" +"female","group C","associate's degree","standard","completed","71","77","77" +"female","group B","master's degree","free/reduced","completed","52","70","62" +"female","group D","some high school","standard","completed","97","100","100" +"male","group C","master's degree","free/reduced","completed","62","68","75" +"female","group C","some college","free/reduced","none","46","64","66" +"female","group E","high school","standard","none","50","50","47" +"female","group D","associate's degree","standard","none","65","69","70" +"male","group C","some high school","free/reduced","completed","45","52","49" +"male","group C","associate's degree","free/reduced","completed","65","67","65" +"male","group E","high school","standard","none","80","76","65" +"male","group D","some high school","standard","completed","62","66","68" +"male","group B","some high school","free/reduced","none","48","52","45" +"female","group C","bachelor's degree","standard","none","77","88","87" +"female","group E","associate's degree","standard","none","66","65","69" +"male","group D","some college","standard","completed","76","83","79" +"female","group B","some high school","standard","none","62","64","66" +"male","group D","some college","standard","completed","77","62","62" +"female","group C","master's degree","standard","completed","69","84","85" +"male","group D","associate's degree","standard","none","61","55","52" +"male","group C","some high school","free/reduced","completed","59","69","65" +"male","group E","high school","free/reduced","none","55","56","51" +"female","group B","some college","free/reduced","none","45","53","55" +"female","group B","bachelor's degree","free/reduced","none","78","79","76" +"female","group C","associate's degree","standard","completed","67","84","86" +"female","group D","some college","free/reduced","none","65","81","77" +"male","group C","associate's degree","standard","none","69","77","69" +"female","group B","associate's degree","standard","none","57","69","68" +"male","group C","some college","standard","none","59","41","42" +"male","group D","some high school","standard","completed","74","71","78" +"male","group E","bachelor's degree","standard","none","82","62","62" +"male","group E","high school","standard","completed","81","80","76" +"female","group B","some college","free/reduced","none","74","81","76" +"female","group B","some college","free/reduced","none","58","61","66" +"male","group D","some high school","free/reduced","completed","80","79","79" +"male","group C","some college","free/reduced","none","35","28","27" +"female","group C","high school","free/reduced","none","42","62","60" +"male","group C","associate's degree","free/reduced","completed","60","51","56" +"male","group E","high school","standard","completed","87","91","81" +"male","group B","some high school","standard","completed","84","83","75" +"female","group E","associate's degree","free/reduced","completed","83","86","88" +"female","group C","high school","free/reduced","none","34","42","39" +"male","group B","high school","free/reduced","none","66","77","70" +"male","group B","some high school","standard","completed","61","56","56" +"female","group D","high school","standard","completed","56","68","74" +"male","group B","associate's degree","standard","none","87","85","73" +"female","group C","some high school","free/reduced","none","55","65","62" +"male","group D","some high school","standard","none","86","80","75" +"female","group B","associate's degree","standard","completed","52","66","73" +"female","group E","master's degree","free/reduced","none","45","56","54" +"female","group C","some college","standard","none","72","72","71" +"male","group D","high school","standard","none","57","50","54" +"male","group A","some high school","free/reduced","none","68","72","64" +"female","group C","some college","standard","completed","88","95","94" +"male","group D","some college","standard","none","76","64","66" +"male","group C","associate's degree","standard","none","46","43","42" +"female","group B","bachelor's degree","standard","none","67","86","83" +"male","group E","some high school","standard","none","92","87","78" +"male","group C","bachelor's degree","standard","completed","83","82","84" +"male","group D","associate's degree","standard","none","80","75","77" +"male","group D","bachelor's degree","free/reduced","none","63","66","67" +"female","group D","some high school","standard","completed","64","60","74" +"male","group B","some college","standard","none","54","52","51" +"male","group C","associate's degree","standard","none","84","80","80" +"male","group D","high school","free/reduced","completed","73","68","66" +"female","group E","bachelor's degree","standard","none","80","83","83" +"female","group D","high school","standard","none","56","52","55" +"male","group E","some college","standard","none","59","51","43" +"male","group D","some high school","standard","none","75","74","69" +"male","group C","associate's degree","standard","none","85","76","71" +"male","group E","associate's degree","standard","none","89","76","74" +"female","group B","high school","standard","completed","58","70","68" +"female","group B","high school","standard","none","65","64","62" +"male","group C","high school","standard","none","68","60","53" +"male","group A","some high school","standard","completed","47","49","49" +"female","group D","some college","free/reduced","none","71","83","83" +"female","group B","some high school","standard","completed","60","70","70" +"male","group D","master's degree","standard","none","80","80","72" +"male","group D","high school","standard","none","54","52","52" +"female","group E","some college","standard","none","62","73","70" +"female","group C","associate's degree","free/reduced","none","64","73","68" +"male","group C","associate's degree","standard","completed","78","77","77" +"female","group B","some college","standard","none","70","75","78" +"female","group C","master's degree","free/reduced","completed","65","81","81" +"female","group C","some high school","free/reduced","completed","64","79","77" +"male","group C","some college","standard","completed","79","79","78" +"female","group C","some high school","free/reduced","none","44","50","51" +"female","group E","high school","standard","none","99","93","90" +"male","group D","high school","standard","none","76","73","68" +"male","group D","some high school","free/reduced","none","59","42","41" +"female","group C","bachelor's degree","standard","none","63","75","81" +"female","group D","high school","standard","none","69","72","77" +"female","group D","associate's degree","standard","completed","88","92","95" +"female","group E","some college","free/reduced","none","71","76","70" +"male","group C","bachelor's degree","standard","none","69","63","61" +"male","group C","some college","standard","none","58","49","42" +"female","group D","associate's degree","free/reduced","none","47","53","58" +"female","group D","some college","standard","none","65","70","71" +"male","group B","some college","standard","completed","88","85","76" +"male","group C","bachelor's degree","standard","none","83","78","73" +"female","group C","some high school","standard","completed","85","92","93" +"female","group E","high school","standard","completed","59","63","75" +"female","group C","some high school","free/reduced","none","65","86","80" +"male","group B","bachelor's degree","free/reduced","none","73","56","57" +"male","group D","high school","standard","none","53","52","42" +"male","group D","high school","standard","none","45","48","46" +"female","group D","bachelor's degree","free/reduced","none","73","79","84" +"female","group D","some college","free/reduced","completed","70","78","78" +"female","group B","some high school","standard","none","37","46","46" +"male","group B","associate's degree","standard","completed","81","82","82" +"male","group E","associate's degree","standard","completed","97","82","88" +"female","group B","some high school","standard","none","67","89","82" +"male","group B","bachelor's degree","free/reduced","none","88","75","76" +"male","group E","some high school","standard","completed","77","76","77" +"male","group C","associate's degree","standard","none","76","70","68" +"male","group D","some high school","standard","none","86","73","70" +"male","group C","some high school","standard","completed","63","60","57" +"female","group E","bachelor's degree","standard","none","65","73","75" +"male","group D","high school","free/reduced","completed","78","77","80" +"male","group B","associate's degree","free/reduced","none","67","62","60" +"male","group A","some high school","standard","completed","46","41","43" +"male","group E","associate's degree","standard","completed","71","74","68" +"male","group C","high school","free/reduced","completed","40","46","50" +"male","group D","associate's degree","free/reduced","none","90","87","75" +"male","group A","some college","free/reduced","completed","81","78","81" +"male","group D","some high school","free/reduced","none","56","54","52" +"female","group C","associate's degree","standard","completed","67","84","81" +"male","group B","associate's degree","standard","none","80","76","64" +"female","group C","associate's degree","standard","completed","74","75","83" +"male","group A","some college","standard","none","69","67","69" +"male","group E","some college","standard","completed","99","87","81" +"male","group C","some high school","standard","none","51","52","44" +"female","group B","associate's degree","free/reduced","none","53","71","67" +"female","group D","high school","free/reduced","none","49","57","52" +"female","group B","associate's degree","standard","none","73","76","80" +"male","group B","bachelor's degree","standard","none","66","60","57" +"male","group D","bachelor's degree","standard","completed","67","61","68" +"female","group C","associate's degree","free/reduced","completed","68","67","69" +"female","group C","bachelor's degree","standard","completed","59","64","75" +"male","group C","high school","standard","none","71","66","65" +"female","group D","master's degree","standard","completed","77","82","91" +"male","group C","associate's degree","standard","none","83","72","78" +"male","group B","bachelor's degree","standard","none","63","71","69" +"female","group D","associate's degree","free/reduced","none","56","65","63" +"female","group C","high school","free/reduced","completed","67","79","84" +"female","group E","high school","standard","none","75","86","79" +"female","group C","some college","standard","none","71","81","80" +"female","group C","some high school","free/reduced","none","43","53","53" +"female","group C","high school","free/reduced","none","41","46","43" +"female","group C","some college","standard","none","82","90","94" +"male","group C","some college","standard","none","61","61","62" +"male","group A","some college","free/reduced","none","28","23","19" +"male","group C","associate's degree","standard","completed","82","75","77" +"female","group B","some high school","standard","none","41","55","51" +"male","group C","high school","standard","none","71","60","61" +"male","group C","associate's degree","standard","none","47","37","35" +"male","group E","associate's degree","standard","completed","62","56","53" +"male","group B","associate's degree","standard","none","90","78","81" +"female","group C","bachelor's degree","standard","none","83","93","95" +"female","group B","some college","free/reduced","none","61","68","66" +"male","group D","some high school","standard","completed","76","70","69" +"male","group C","associate's degree","standard","none","49","51","43" +"female","group B","some high school","free/reduced","none","24","38","27" +"female","group D","some high school","free/reduced","completed","35","55","60" +"male","group C","high school","free/reduced","none","58","61","52" +"female","group C","high school","standard","none","61","73","63" +"female","group B","high school","standard","completed","69","76","74" +"male","group D","associate's degree","standard","completed","67","72","67" +"male","group D","some college","standard","none","79","73","67" +"female","group C","high school","standard","none","72","80","75" +"male","group B","some college","standard","none","62","61","57" +"female","group C","bachelor's degree","standard","completed","77","94","95" +"male","group D","high school","free/reduced","none","75","74","66" +"male","group E","associate's degree","standard","none","87","74","76" +"female","group B","bachelor's degree","standard","none","52","65","69" +"male","group E","some college","standard","none","66","57","52" +"female","group C","some college","standard","completed","63","78","80" +"female","group C","associate's degree","standard","none","46","58","57" +"female","group C","some college","standard","none","59","71","70" +"female","group B","bachelor's degree","standard","none","61","72","70" +"male","group A","associate's degree","standard","none","63","61","61" +"female","group C","some college","free/reduced","completed","42","66","69" +"male","group D","some college","free/reduced","none","59","62","61" +"female","group D","some college","standard","none","80","90","89" +"female","group B","high school","standard","none","58","62","59" +"male","group B","some high school","standard","completed","85","84","78" +"female","group C","some college","standard","none","52","58","58" +"female","group D","some high school","free/reduced","none","27","34","32" +"male","group C","some college","standard","none","59","60","58" +"male","group A","bachelor's degree","free/reduced","completed","49","58","60" +"male","group C","high school","standard","completed","69","58","53" +"male","group C","bachelor's degree","free/reduced","none","61","66","61" +"female","group A","some high school","free/reduced","none","44","64","58" +"female","group D","some high school","standard","none","73","84","85" +"male","group E","some college","standard","none","84","77","71" +"female","group C","some college","free/reduced","completed","45","73","70" +"male","group D","some high school","standard","none","74","74","72" +"female","group D","some college","standard","completed","82","97","96" +"female","group D","bachelor's degree","standard","none","59","70","73" +"male","group E","associate's degree","free/reduced","none","46","43","41" +"female","group D","some high school","standard","none","80","90","82" +"female","group D","master's degree","free/reduced","completed","85","95","100" +"female","group A","some high school","standard","none","71","83","77" +"male","group A","bachelor's degree","standard","none","66","64","62" +"female","group B","associate's degree","standard","none","80","86","83" +"male","group C","associate's degree","standard","completed","87","100","95" +"male","group C","master's degree","free/reduced","none","79","81","71" +"female","group E","some high school","free/reduced","none","38","49","45" +"female","group A","some high school","free/reduced","none","38","43","43" +"female","group E","some college","standard","none","67","76","75" +"female","group E","bachelor's degree","standard","none","64","73","70" +"female","group C","associate's degree","free/reduced","none","57","78","67" +"female","group D","high school","standard","none","62","64","64" +"male","group D","master's degree","standard","none","73","70","75" +"male","group E","some high school","free/reduced","completed","73","67","59" +"female","group D","some college","standard","none","77","68","77" +"male","group E","some college","standard","none","76","67","67" +"male","group C","associate's degree","standard","completed","57","54","56" +"female","group C","some high school","standard","completed","65","74","77" +"male","group A","high school","free/reduced","none","48","45","41" +"female","group B","high school","free/reduced","none","50","67","63" +"female","group C","associate's degree","standard","none","85","89","95" +"male","group B","some high school","standard","none","74","63","57" +"male","group D","some high school","standard","none","60","59","54" +"female","group C","some high school","standard","completed","59","54","67" +"male","group A","some college","standard","none","53","43","43" +"female","group A","some college","free/reduced","none","49","65","55" +"female","group D","high school","standard","completed","88","99","100" +"female","group C","high school","standard","none","54","59","62" +"female","group C","some high school","standard","none","63","73","68" +"male","group B","associate's degree","standard","completed","65","65","63" +"female","group B","associate's degree","standard","none","82","80","77" +"female","group D","high school","free/reduced","completed","52","57","56" +"male","group D","associate's degree","standard","completed","87","84","85" +"female","group D","master's degree","standard","completed","70","71","74" +"male","group E","some college","standard","completed","84","83","78" +"male","group D","associate's degree","standard","none","71","66","60" +"male","group B","some high school","standard","completed","63","67","67" +"female","group C","bachelor's degree","free/reduced","completed","51","72","79" +"male","group E","high school","standard","none","84","73","69" +"male","group C","bachelor's degree","standard","completed","71","74","68" +"male","group C","associate's degree","standard","none","74","73","67" +"male","group D","some college","standard","none","68","59","62" +"male","group E","high school","free/reduced","completed","57","56","54" +"female","group C","associate's degree","free/reduced","completed","82","93","93" +"female","group D","high school","standard","completed","57","58","64" +"female","group D","master's degree","free/reduced","completed","47","58","67" +"female","group A","some high school","standard","completed","59","85","80" +"male","group B","some college","free/reduced","none","41","39","34" +"female","group C","some college","free/reduced","none","62","67","62" +"male","group C","bachelor's degree","standard","none","86","83","86" +"male","group C","some high school","free/reduced","none","69","71","65" +"male","group A","some high school","free/reduced","none","65","59","53" +"male","group C","some high school","free/reduced","none","68","63","54" +"male","group C","associate's degree","free/reduced","none","64","66","59" +"female","group C","high school","standard","none","61","72","70" +"male","group C","high school","standard","none","61","56","55" +"female","group A","some high school","free/reduced","none","47","59","50" +"male","group C","some high school","standard","none","73","66","66" +"male","group C","some college","free/reduced","completed","50","48","53" +"male","group D","associate's degree","standard","none","75","68","64" +"male","group D","associate's degree","free/reduced","none","75","66","73" +"male","group C","high school","standard","none","70","56","51" +"male","group D","some high school","standard","completed","89","88","82" +"female","group C","some college","standard","completed","67","81","79" +"female","group D","high school","standard","none","78","81","80" +"female","group A","some high school","free/reduced","none","59","73","69" +"female","group B","associate's degree","standard","none","73","83","76" +"male","group A","some high school","free/reduced","none","79","82","73" +"female","group C","some high school","standard","completed","67","74","77" +"male","group D","some college","free/reduced","none","69","66","60" +"male","group C","high school","standard","completed","86","81","80" +"male","group B","high school","standard","none","47","46","42" +"male","group B","associate's degree","standard","none","81","73","72" +"female","group C","some college","free/reduced","completed","64","85","85" +"female","group E","some college","standard","none","100","92","97" +"female","group C","associate's degree","free/reduced","none","65","77","74" +"male","group C","some college","free/reduced","none","65","58","49" +"female","group C","associate's degree","free/reduced","none","53","61","62" +"male","group C","bachelor's degree","free/reduced","none","37","56","47" +"female","group D","bachelor's degree","standard","none","79","89","89" +"male","group D","associate's degree","free/reduced","none","53","54","48" +"female","group E","bachelor's degree","standard","none","100","100","100" +"male","group B","high school","standard","completed","72","65","68" +"male","group C","bachelor's degree","free/reduced","none","53","58","55" +"male","group B","some college","free/reduced","none","54","54","45" +"female","group E","some college","standard","none","71","70","76" +"female","group C","some college","free/reduced","none","77","90","91" +"male","group A","bachelor's degree","standard","completed","75","58","62" +"female","group C","some college","standard","none","84","87","91" +"female","group D","associate's degree","free/reduced","none","26","31","38" +"male","group A","high school","free/reduced","completed","72","67","65" +"female","group A","high school","free/reduced","completed","77","88","85" +"male","group C","some college","standard","none","91","74","76" +"female","group C","associate's degree","standard","completed","83","85","90" +"female","group C","high school","standard","none","63","69","74" +"female","group C","associate's degree","standard","completed","68","86","84" +"female","group D","some high school","standard","none","59","67","61" +"female","group B","associate's degree","standard","completed","90","90","91" +"female","group D","bachelor's degree","standard","completed","71","76","83" +"male","group E","bachelor's degree","standard","completed","76","62","66" +"male","group D","associate's degree","standard","none","80","68","72" +"female","group D","master's degree","standard","none","55","64","70" +"male","group E","associate's degree","standard","none","76","71","67" +"male","group B","high school","standard","completed","73","71","68" +"female","group D","associate's degree","free/reduced","none","52","59","56" +"male","group C","some college","free/reduced","none","68","68","61" +"male","group A","high school","standard","none","59","52","46" +"female","group B","associate's degree","standard","none","49","52","54" +"male","group C","high school","standard","none","70","74","71" +"male","group D","some college","free/reduced","none","61","47","56" +"female","group C","associate's degree","free/reduced","none","60","75","74" +"male","group B","some high school","standard","completed","64","53","57" +"male","group A","associate's degree","free/reduced","completed","79","82","82" +"female","group A","associate's degree","free/reduced","none","65","85","76" +"female","group C","associate's degree","standard","none","64","64","70" +"female","group C","some college","standard","none","83","83","90" +"female","group C","bachelor's degree","standard","none","81","88","90" +"female","group B","high school","standard","none","54","64","68" +"male","group D","high school","standard","completed","68","64","66" +"female","group C","some college","standard","none","54","48","52" +"female","group D","some college","free/reduced","completed","59","78","76" +"female","group B","some high school","standard","none","66","69","68" +"male","group E","some college","standard","none","76","71","72" +"female","group D","master's degree","standard","none","74","79","82" +"female","group B","associate's degree","standard","completed","94","87","92" +"male","group C","some college","free/reduced","none","63","61","54" +"female","group E","associate's degree","standard","completed","95","89","92" +"female","group D","master's degree","free/reduced","none","40","59","54" +"female","group B","some high school","standard","none","82","82","80" +"male","group A","high school","standard","none","68","70","66" +"male","group B","bachelor's degree","free/reduced","none","55","59","54" +"male","group C","master's degree","standard","none","79","78","77" +"female","group C","bachelor's degree","standard","none","86","92","87" +"male","group D","some college","standard","none","76","71","73" +"male","group A","some high school","standard","none","64","50","43" +"male","group D","some high school","free/reduced","none","62","49","52" +"female","group B","some high school","standard","completed","54","61","62" +"female","group B","master's degree","free/reduced","completed","77","97","94" +"female","group C","some high school","standard","completed","76","87","85" +"female","group D","some college","standard","none","74","89","84" +"female","group E","some college","standard","completed","66","74","73" +"female","group D","some high school","standard","completed","66","78","78" +"female","group B","high school","free/reduced","completed","67","78","79" +"male","group D","some college","standard","none","71","49","52" +"female","group C","associate's degree","standard","none","91","86","84" +"male","group D","bachelor's degree","standard","none","69","58","57" +"male","group C","master's degree","free/reduced","none","54","59","50" +"male","group C","high school","standard","completed","53","52","49" +"male","group E","some college","standard","none","68","60","59" +"male","group C","some high school","free/reduced","completed","56","61","60" +"female","group C","high school","free/reduced","none","36","53","43" +"female","group D","bachelor's degree","free/reduced","none","29","41","47" +"female","group C","associate's degree","standard","none","62","74","70" +"female","group C","associate's degree","standard","completed","68","67","73" +"female","group C","some high school","standard","none","47","54","53" +"male","group E","associate's degree","standard","completed","62","61","58" +"female","group E","associate's degree","standard","completed","79","88","94" +"male","group B","high school","standard","completed","73","69","68" +"female","group C","bachelor's degree","free/reduced","completed","66","83","83" +"male","group C","associate's degree","standard","completed","51","60","58" +"female","group D","high school","standard","none","51","66","62" +"male","group E","bachelor's degree","standard","completed","85","66","71" +"male","group A","associate's degree","standard","completed","97","92","86" +"male","group C","high school","standard","completed","75","69","68" +"male","group D","associate's degree","free/reduced","completed","79","82","80" +"female","group C","associate's degree","standard","none","81","77","79" +"female","group D","associate's degree","standard","none","82","95","89" +"female","group D","master's degree","standard","none","64","63","66" +"male","group E","some high school","free/reduced","completed","78","83","80" +"female","group A","some high school","standard","completed","92","100","97" +"male","group C","high school","standard","completed","72","67","64" +"female","group C","high school","free/reduced","none","62","67","64" +"male","group C","master's degree","standard","none","79","72","69" +"male","group C","some high school","free/reduced","none","79","76","65" +"male","group B","bachelor's degree","free/reduced","completed","87","90","88" +"female","group B","associate's degree","standard","none","40","48","50" +"male","group D","some college","free/reduced","none","77","62","64" +"male","group E","associate's degree","standard","none","53","45","40" +"female","group C","some college","free/reduced","none","32","39","33" +"female","group C","associate's degree","standard","completed","55","72","79" +"male","group C","master's degree","free/reduced","none","61","67","66" +"female","group B","associate's degree","free/reduced","none","53","70","70" +"male","group D","some high school","standard","none","73","66","62" +"female","group D","some college","standard","completed","74","75","79" +"female","group C","some college","standard","none","63","74","74" +"male","group C","bachelor's degree","standard","completed","96","90","92" +"female","group D","some college","free/reduced","completed","63","80","80" +"male","group B","bachelor's degree","free/reduced","none","48","51","46" +"male","group B","associate's degree","standard","none","48","43","45" +"female","group E","bachelor's degree","free/reduced","completed","92","100","100" +"female","group D","master's degree","free/reduced","completed","61","71","78" +"male","group B","high school","free/reduced","none","63","48","47" +"male","group D","bachelor's degree","free/reduced","none","68","68","67" +"male","group B","some college","standard","completed","71","75","70" +"male","group A","bachelor's degree","standard","none","91","96","92" +"female","group C","some college","standard","none","53","62","56" +"female","group C","high school","free/reduced","completed","50","66","64" +"female","group E","high school","standard","none","74","81","71" +"male","group A","associate's degree","free/reduced","completed","40","55","53" +"male","group A","some college","standard","completed","61","51","52" +"female","group B","high school","standard","none","81","91","89" +"female","group B","some college","free/reduced","completed","48","56","58" +"female","group D","master's degree","standard","none","53","61","68" +"female","group D","some high school","standard","none","81","97","96" +"female","group E","some high school","standard","none","77","79","80" +"female","group D","bachelor's degree","free/reduced","none","63","73","78" +"female","group D","associate's degree","standard","completed","73","75","80" +"female","group D","some college","standard","none","69","77","77" +"female","group C","associate's degree","standard","none","65","76","76" +"female","group A","high school","standard","none","55","73","73" +"female","group C","bachelor's degree","free/reduced","none","44","63","62" +"female","group C","some college","standard","none","54","64","65" +"female","group A","some high school","standard","none","48","66","65" +"male","group C","some college","free/reduced","none","58","57","54" +"male","group A","some high school","standard","none","71","62","50" +"male","group E","bachelor's degree","standard","none","68","68","64" +"female","group E","high school","standard","none","74","76","73" +"female","group C","bachelor's degree","standard","completed","92","100","99" +"female","group C","bachelor's degree","standard","completed","56","79","72" +"male","group B","high school","free/reduced","none","30","24","15" +"male","group A","some high school","standard","none","53","54","48" +"female","group D","high school","standard","none","69","77","73" +"female","group D","some high school","standard","none","65","82","81" +"female","group D","master's degree","standard","none","54","60","63" +"female","group C","high school","standard","none","29","29","30" +"female","group E","some college","standard","none","76","78","80" +"male","group D","high school","free/reduced","none","60","57","51" +"male","group D","master's degree","free/reduced","completed","84","89","90" +"male","group C","some high school","standard","none","75","72","62" +"female","group C","associate's degree","standard","none","85","84","82" +"female","group C","master's degree","free/reduced","none","40","58","54" +"female","group E","some college","standard","none","61","64","62" +"female","group B","associate's degree","standard","none","58","63","65" +"male","group D","some college","free/reduced","completed","69","60","63" +"female","group C","some college","standard","none","58","59","66" +"male","group C","bachelor's degree","standard","completed","94","90","91" +"female","group C","associate's degree","standard","none","65","77","74" +"female","group A","associate's degree","standard","none","82","93","93" +"female","group C","high school","standard","none","60","68","72" +"female","group E","bachelor's degree","standard","none","37","45","38" +"male","group D","bachelor's degree","standard","none","88","78","83" +"male","group D","master's degree","standard","none","95","81","84" +"male","group C","associate's degree","free/reduced","completed","65","73","68" +"female","group C","high school","free/reduced","none","35","61","54" +"male","group B","bachelor's degree","free/reduced","none","62","63","56" +"male","group C","high school","free/reduced","completed","58","51","52" +"male","group A","some college","standard","completed","100","96","86" +"female","group E","bachelor's degree","free/reduced","none","61","58","62" +"male","group D","some college","standard","completed","100","97","99" +"male","group B","associate's degree","free/reduced","completed","69","70","63" +"male","group D","associate's degree","standard","none","61","48","46" +"male","group D","some college","free/reduced","none","49","57","46" +"female","group C","some high school","standard","completed","44","51","55" +"male","group D","some college","standard","none","67","64","70" +"male","group B","high school","standard","none","79","60","65" +"female","group B","bachelor's degree","standard","completed","66","74","81" +"female","group C","high school","standard","none","75","88","85" +"male","group D","some high school","standard","none","84","84","80" +"male","group A","high school","standard","none","71","74","64" +"female","group B","high school","free/reduced","completed","67","80","81" +"female","group D","some high school","standard","completed","80","92","88" +"male","group E","some college","standard","none","86","76","74" +"female","group D","associate's degree","standard","none","76","74","73" +"male","group D","high school","standard","none","41","52","51" +"female","group D","associate's degree","free/reduced","completed","74","88","90" +"female","group B","some high school","free/reduced","none","72","81","79" +"female","group E","high school","standard","completed","74","79","80" +"male","group B","high school","standard","none","70","65","60" +"female","group B","bachelor's degree","standard","completed","65","81","81" +"female","group D","associate's degree","standard","none","59","70","65" +"female","group E","high school","free/reduced","none","64","62","68" +"female","group B","high school","standard","none","50","53","55" +"female","group D","some college","standard","completed","69","79","81" +"male","group C","some high school","free/reduced","completed","51","56","53" +"female","group A","high school","standard","completed","68","80","76" +"female","group D","some college","standard","completed","85","86","98" +"female","group A","associate's degree","standard","completed","65","70","74" +"female","group B","some high school","standard","none","73","79","79" +"female","group B","some college","standard","none","62","67","67" +"male","group C","associate's degree","free/reduced","none","77","67","64" +"male","group D","some high school","standard","none","69","66","61" +"female","group D","associate's degree","free/reduced","none","43","60","58" +"male","group D","associate's degree","standard","none","90","87","85" +"male","group C","some college","free/reduced","none","74","77","73" +"male","group C","some high school","standard","none","73","66","63" +"female","group D","some college","free/reduced","none","55","71","69" +"female","group C","high school","standard","none","65","69","67" +"male","group D","associate's degree","standard","none","80","63","63" +"female","group C","some high school","free/reduced","completed","50","60","60" +"female","group C","some college","free/reduced","completed","63","73","71" +"female","group B","bachelor's degree","free/reduced","none","77","85","87" +"male","group C","some college","standard","none","73","74","61" +"male","group D","associate's degree","standard","completed","81","72","77" +"female","group C","high school","free/reduced","none","66","76","68" +"male","group D","associate's degree","free/reduced","none","52","57","50" +"female","group C","some college","standard","none","69","78","76" +"female","group C","associate's degree","standard","completed","65","84","84" +"female","group D","high school","standard","completed","69","77","78" +"female","group B","some college","standard","completed","50","64","66" +"female","group E","some college","standard","completed","73","78","76" +"female","group C","some high school","standard","completed","70","82","76" +"male","group D","associate's degree","free/reduced","none","81","75","78" +"male","group D","some college","free/reduced","none","63","61","60" +"female","group D","high school","standard","none","67","72","74" +"male","group B","high school","standard","none","60","68","60" +"male","group B","high school","standard","none","62","55","54" +"female","group C","some high school","free/reduced","completed","29","40","44" +"male","group B","some college","standard","completed","62","66","68" +"female","group E","master's degree","standard","completed","94","99","100" +"male","group E","some college","standard","completed","85","75","68" +"male","group D","associate's degree","free/reduced","none","77","78","73" +"male","group A","high school","free/reduced","none","53","58","44" +"male","group E","some college","free/reduced","none","93","90","83" +"female","group C","associate's degree","standard","none","49","53","53" +"female","group E","associate's degree","free/reduced","none","73","76","78" +"female","group C","bachelor's degree","free/reduced","completed","66","74","81" +"female","group D","associate's degree","standard","none","77","77","73" +"female","group C","some high school","standard","none","49","63","56" +"female","group D","some college","free/reduced","none","79","89","86" +"female","group C","associate's degree","standard","completed","75","82","90" +"female","group A","bachelor's degree","standard","none","59","72","70" +"female","group D","associate's degree","standard","completed","57","78","79" +"male","group C","high school","free/reduced","none","66","66","59" +"female","group E","bachelor's degree","standard","completed","79","81","82" +"female","group B","some high school","standard","none","57","67","72" +"male","group A","bachelor's degree","standard","completed","87","84","87" +"female","group D","some college","standard","none","63","64","67" +"female","group B","some high school","free/reduced","completed","59","63","64" +"male","group A","bachelor's degree","free/reduced","none","62","72","65" +"male","group D","high school","standard","none","46","34","36" +"male","group C","some college","standard","none","66","59","52" +"male","group D","high school","standard","none","89","87","79" +"female","group D","associate's degree","free/reduced","completed","42","61","58" +"male","group C","some college","standard","completed","93","84","90" +"female","group E","some high school","standard","completed","80","85","85" +"female","group D","some college","standard","none","98","100","99" +"male","group D","master's degree","standard","none","81","81","84" +"female","group B","some high school","standard","completed","60","70","74" +"female","group B","associate's degree","free/reduced","completed","76","94","87" +"male","group C","associate's degree","standard","completed","73","78","72" +"female","group C","associate's degree","standard","completed","96","96","99" +"female","group C","high school","standard","none","76","76","74" +"male","group E","associate's degree","free/reduced","completed","91","73","80" +"female","group C","some college","free/reduced","none","62","72","70" +"male","group D","some high school","free/reduced","completed","55","59","59" +"female","group B","some high school","free/reduced","completed","74","90","88" +"male","group C","high school","standard","none","50","48","42" +"male","group B","some college","standard","none","47","43","41" +"male","group E","some college","standard","completed","81","74","71" +"female","group E","associate's degree","standard","completed","65","75","77" +"male","group E","some high school","standard","completed","68","51","57" +"female","group D","high school","free/reduced","none","73","92","84" +"male","group C","some college","standard","none","53","39","37" +"female","group B","associate's degree","free/reduced","completed","68","77","80" +"male","group A","some high school","free/reduced","none","55","46","43" +"female","group C","some college","standard","completed","87","89","94" +"male","group D","some high school","standard","none","55","47","44" +"female","group E","some college","free/reduced","none","53","58","57" +"male","group C","master's degree","standard","none","67","57","59" +"male","group C","associate's degree","standard","none","92","79","84" +"female","group B","some college","free/reduced","completed","53","66","73" +"male","group D","associate's degree","standard","none","81","71","73" +"male","group C","high school","free/reduced","none","61","60","55" +"male","group D","bachelor's degree","standard","none","80","73","72" +"female","group A","associate's degree","free/reduced","none","37","57","56" +"female","group C","high school","standard","none","81","84","82" +"female","group C","associate's degree","standard","completed","59","73","72" +"male","group B","some college","free/reduced","none","55","55","47" +"male","group D","associate's degree","standard","none","72","79","74" +"male","group D","high school","standard","none","69","75","71" +"male","group C","some college","standard","none","69","64","68" +"female","group C","bachelor's degree","free/reduced","none","50","60","59" +"male","group B","some college","standard","completed","87","84","86" +"male","group D","some high school","standard","completed","71","69","68" +"male","group E","some college","standard","none","68","72","65" +"male","group C","master's degree","free/reduced","completed","79","77","75" +"female","group C","some high school","standard","completed","77","90","85" +"male","group C","associate's degree","free/reduced","none","58","55","53" +"female","group E","associate's degree","standard","none","84","95","92" +"male","group D","some college","standard","none","55","58","52" +"male","group E","bachelor's degree","free/reduced","completed","70","68","72" +"female","group D","some college","free/reduced","completed","52","59","65" +"male","group B","some college","standard","completed","69","77","77" +"female","group C","high school","free/reduced","none","53","72","64" +"female","group D","some high school","standard","none","48","58","54" +"male","group D","some high school","standard","completed","78","81","86" +"female","group B","high school","standard","none","62","62","63" +"male","group D","some college","standard","none","60","63","59" +"female","group B","high school","standard","none","74","72","72" +"female","group C","high school","standard","completed","58","75","77" +"male","group B","high school","standard","completed","76","62","60" +"female","group D","some high school","standard","none","68","71","75" +"male","group A","some college","free/reduced","none","58","60","57" +"male","group B","high school","standard","none","52","48","49" +"male","group D","bachelor's degree","standard","none","75","73","74" +"female","group B","some high school","free/reduced","completed","52","67","72" +"female","group C","bachelor's degree","free/reduced","none","62","78","79" +"male","group B","some college","standard","none","66","65","60" +"female","group B","some high school","free/reduced","none","49","58","55" +"female","group B","high school","standard","none","66","72","70" +"female","group C","some college","free/reduced","none","35","44","43" +"female","group A","some college","standard","completed","72","79","82" +"male","group E","associate's degree","standard","completed","94","85","82" +"female","group D","associate's degree","free/reduced","none","46","56","57" +"female","group B","master's degree","standard","none","77","90","84" +"female","group B","high school","free/reduced","completed","76","85","82" +"female","group C","associate's degree","standard","completed","52","59","62" +"male","group C","bachelor's degree","standard","completed","91","81","79" +"female","group B","some high school","standard","completed","32","51","44" +"female","group E","some high school","free/reduced","none","72","79","77" +"female","group B","some college","standard","none","19","38","32" +"male","group C","associate's degree","free/reduced","none","68","65","61" +"female","group C","master's degree","free/reduced","none","52","65","61" +"female","group B","high school","standard","none","48","62","60" +"female","group D","some college","free/reduced","none","60","66","70" +"male","group D","high school","free/reduced","none","66","74","69" +"male","group E","some high school","standard","completed","89","84","77" +"female","group B","high school","standard","none","42","52","51" +"female","group E","associate's degree","free/reduced","completed","57","68","73" +"male","group D","high school","standard","none","70","70","70" +"female","group E","associate's degree","free/reduced","none","70","84","81" +"male","group E","some college","standard","none","69","60","54" +"female","group C","associate's degree","standard","none","52","55","57" +"male","group C","some high school","standard","completed","67","73","68" +"male","group C","some high school","standard","completed","76","80","73" +"female","group E","associate's degree","standard","none","87","94","95" +"female","group B","some college","standard","none","82","85","87" +"female","group C","some college","standard","none","73","76","78" +"male","group A","some college","free/reduced","none","75","81","74" +"female","group D","some college","free/reduced","none","64","74","75" +"female","group E","high school","free/reduced","none","41","45","40" +"male","group C","high school","standard","none","90","75","69" +"male","group B","bachelor's degree","standard","none","59","54","51" +"male","group A","some high school","standard","none","51","31","36" +"male","group A","high school","free/reduced","none","45","47","49" +"female","group C","master's degree","standard","completed","54","64","67" +"male","group E","some high school","standard","completed","87","84","76" +"female","group C","high school","standard","none","72","80","83" +"male","group B","some high school","standard","completed","94","86","87" +"female","group A","bachelor's degree","standard","none","45","59","64" +"male","group D","bachelor's degree","free/reduced","completed","61","70","76" +"female","group B","high school","free/reduced","none","60","72","68" +"female","group C","some high school","standard","none","77","91","88" +"female","group A","some high school","standard","completed","85","90","92" +"female","group D","bachelor's degree","free/reduced","none","78","90","93" +"male","group E","some college","free/reduced","completed","49","52","51" +"female","group B","high school","free/reduced","none","71","87","82" +"female","group C","some high school","free/reduced","none","48","58","52" +"male","group C","high school","standard","none","62","67","58" +"female","group C","associate's degree","free/reduced","completed","56","68","70" +"female","group C","some high school","standard","none","65","69","76" +"female","group D","some high school","free/reduced","completed","69","86","81" +"male","group B","some high school","standard","none","68","54","53" +"female","group A","some college","free/reduced","none","61","60","57" +"female","group C","bachelor's degree","free/reduced","completed","74","86","89" +"male","group A","bachelor's degree","standard","none","64","60","58" +"female","group B","high school","standard","completed","77","82","89" +"male","group B","some college","standard","none","58","50","45" +"female","group C","high school","standard","completed","60","64","74" +"male","group E","high school","standard","none","73","64","57" +"female","group A","high school","standard","completed","75","82","79" +"male","group B","associate's degree","free/reduced","completed","58","57","53" +"female","group C","associate's degree","standard","none","66","77","73" +"female","group D","high school","free/reduced","none","39","52","46" +"male","group C","some high school","standard","none","64","58","51" +"female","group B","high school","free/reduced","completed","23","44","36" +"male","group B","some college","free/reduced","completed","74","77","76" +"female","group D","some high school","free/reduced","completed","40","65","64" +"male","group E","master's degree","standard","none","90","85","84" +"male","group C","master's degree","standard","completed","91","85","85" +"male","group D","high school","standard","none","64","54","50" +"female","group C","high school","standard","none","59","72","68" +"male","group D","associate's degree","standard","none","80","75","69" +"male","group C","master's degree","standard","none","71","67","67" +"female","group A","high school","standard","none","61","68","63" +"female","group E","some college","standard","none","87","85","93" +"male","group E","some high school","standard","none","82","67","61" +"male","group C","some high school","standard","none","62","64","55" +"female","group B","bachelor's degree","standard","none","97","97","96" +"male","group B","some college","free/reduced","none","75","68","65" +"female","group C","bachelor's degree","standard","none","65","79","81" +"male","group B","high school","standard","completed","52","49","46" +"male","group C","associate's degree","free/reduced","none","87","73","72" +"female","group C","associate's degree","standard","none","53","62","53" +"female","group E","master's degree","free/reduced","none","81","86","87" +"male","group D","bachelor's degree","free/reduced","completed","39","42","38" +"female","group C","some college","standard","completed","71","71","80" +"male","group C","associate's degree","standard","none","97","93","91" +"male","group D","some college","standard","completed","82","82","88" +"male","group C","high school","free/reduced","none","59","53","52" +"male","group B","associate's degree","standard","none","61","42","41" +"male","group E","associate's degree","free/reduced","completed","78","74","72" +"male","group C","associate's degree","free/reduced","none","49","51","51" +"male","group B","high school","standard","none","59","58","47" +"female","group C","some college","standard","completed","70","72","76" +"male","group B","associate's degree","standard","completed","82","84","78" +"male","group E","associate's degree","free/reduced","none","90","90","82" +"female","group C","bachelor's degree","free/reduced","none","43","62","61" +"male","group C","some college","free/reduced","none","80","64","66" +"male","group D","some college","standard","none","81","82","84" +"male","group C","some high school","standard","none","57","61","54" +"female","group D","some high school","standard","none","59","72","80" +"female","group D","associate's degree","standard","none","64","76","74" +"male","group C","bachelor's degree","standard","completed","63","64","66" +"female","group E","bachelor's degree","standard","completed","71","70","70" +"female","group B","high school","free/reduced","none","64","73","71" +"male","group D","bachelor's degree","free/reduced","none","55","46","44" +"female","group E","associate's degree","standard","none","51","51","54" +"female","group C","associate's degree","standard","completed","62","76","80" +"female","group E","associate's degree","standard","completed","93","100","95" +"male","group C","high school","free/reduced","none","54","72","59" +"female","group D","some college","free/reduced","none","69","65","74" +"male","group D","high school","free/reduced","none","44","51","48" +"female","group E","some college","standard","completed","86","85","91" +"female","group E","associate's degree","standard","none","85","92","85" +"female","group A","master's degree","free/reduced","none","50","67","73" +"male","group D","some high school","standard","completed","88","74","75" +"female","group E","associate's degree","standard","none","59","62","69" +"female","group E","some high school","free/reduced","none","32","34","38" +"male","group B","high school","free/reduced","none","36","29","27" +"female","group B","some high school","free/reduced","completed","63","78","79" +"male","group D","associate's degree","standard","completed","67","54","63" +"female","group D","some high school","standard","completed","65","78","82" +"male","group D","master's degree","standard","none","85","84","89" +"female","group C","master's degree","standard","none","73","78","74" +"female","group A","high school","free/reduced","completed","34","48","41" +"female","group D","bachelor's degree","free/reduced","completed","93","100","100" +"female","group D","some high school","free/reduced","none","67","84","84" +"male","group D","some college","standard","none","88","77","77" +"male","group B","high school","standard","none","57","48","51" +"female","group D","some college","standard","completed","79","84","91" +"female","group C","bachelor's degree","free/reduced","none","67","75","72" +"male","group E","bachelor's degree","standard","completed","70","64","70" +"male","group D","bachelor's degree","free/reduced","none","50","42","48" +"female","group A","some college","standard","none","69","84","82" +"female","group C","bachelor's degree","standard","completed","52","61","66" +"female","group C","bachelor's degree","free/reduced","completed","47","62","66" +"female","group B","associate's degree","free/reduced","none","46","61","55" +"female","group E","some college","standard","none","68","70","66" +"male","group E","bachelor's degree","standard","completed","100","100","100" +"female","group C","high school","standard","none","44","61","52" +"female","group C","associate's degree","standard","completed","57","77","80" +"male","group B","some college","standard","completed","91","96","91" +"male","group D","high school","free/reduced","none","69","70","67" +"female","group C","high school","free/reduced","none","35","53","46" +"male","group D","high school","standard","none","72","66","66" +"female","group B","associate's degree","free/reduced","none","54","65","65" +"male","group D","high school","free/reduced","none","74","70","69" +"male","group E","some high school","standard","completed","74","64","60" +"male","group E","associate's degree","free/reduced","none","64","56","52" +"female","group D","high school","free/reduced","completed","65","61","71" +"male","group E","associate's degree","free/reduced","completed","46","43","44" +"female","group C","some high school","free/reduced","none","48","56","51" +"male","group C","some college","free/reduced","completed","67","74","70" +"male","group D","some college","free/reduced","none","62","57","62" +"male","group D","associate's degree","free/reduced","completed","61","71","73" +"male","group C","bachelor's degree","free/reduced","completed","70","75","74" +"male","group C","associate's degree","standard","completed","98","87","90" +"male","group D","some college","free/reduced","none","70","63","58" +"male","group A","associate's degree","standard","none","67","57","53" +"female","group E","high school","free/reduced","none","57","58","57" +"male","group D","some college","standard","completed","85","81","85" +"male","group D","some high school","standard","completed","77","68","69" +"male","group C","master's degree","free/reduced","completed","72","66","72" +"female","group D","master's degree","standard","none","78","91","96" +"male","group C","high school","standard","none","81","66","64" +"male","group A","some high school","free/reduced","completed","61","62","61" +"female","group B","high school","standard","none","58","68","61" +"female","group C","associate's degree","standard","none","54","61","58" +"male","group B","high school","standard","none","82","82","80" +"female","group D","some college","free/reduced","none","49","58","60" +"male","group B","some high school","free/reduced","completed","49","50","52" +"female","group E","high school","free/reduced","completed","57","75","73" +"male","group E","high school","standard","none","94","73","71" +"female","group D","some college","standard","completed","75","77","83" +"female","group E","some high school","free/reduced","none","74","74","72" +"male","group C","high school","standard","completed","58","52","54" +"female","group C","some college","standard","none","62","69","69" +"male","group E","associate's degree","standard","none","72","57","62" +"male","group C","some college","standard","none","84","87","81" +"female","group D","master's degree","standard","none","92","100","100" +"female","group D","high school","standard","none","45","63","59" +"male","group C","high school","standard","none","75","81","71" +"female","group A","some college","standard","none","56","58","64" +"female","group D","some high school","free/reduced","none","48","54","53" +"female","group E","associate's degree","standard","none","100","100","100" +"female","group C","some high school","free/reduced","completed","65","76","75" +"male","group D","some college","standard","none","72","57","58" +"female","group D","some college","standard","none","62","70","72" +"male","group A","some high school","standard","completed","66","68","64" +"male","group C","some college","standard","none","63","63","60" +"female","group E","associate's degree","standard","none","68","76","67" +"female","group B","bachelor's degree","standard","none","75","84","80" +"female","group D","bachelor's degree","standard","none","89","100","100" +"male","group C","some high school","standard","completed","78","72","69" +"female","group A","high school","free/reduced","completed","53","50","60" +"female","group D","some college","free/reduced","none","49","65","61" +"female","group A","some college","standard","none","54","63","67" +"female","group C","some college","standard","completed","64","82","77" +"male","group B","some college","free/reduced","completed","60","62","60" +"male","group C","associate's degree","standard","none","62","65","58" +"male","group D","high school","standard","completed","55","41","48" +"female","group C","associate's degree","standard","none","91","95","94" +"female","group B","high school","free/reduced","none","8","24","23" +"male","group D","some high school","standard","none","81","78","78" +"male","group B","some high school","standard","completed","79","85","86" +"female","group A","some college","standard","completed","78","87","91" +"female","group C","some high school","standard","none","74","75","82" +"male","group A","high school","standard","none","57","51","54" +"female","group C","associate's degree","standard","none","40","59","51" +"male","group E","some high school","standard","completed","81","75","76" +"female","group A","some high school","free/reduced","none","44","45","45" +"female","group D","some college","free/reduced","completed","67","86","83" +"male","group E","high school","free/reduced","completed","86","81","75" +"female","group B","some high school","standard","completed","65","82","78" +"female","group D","associate's degree","free/reduced","none","55","76","76" +"female","group D","bachelor's degree","free/reduced","none","62","72","74" +"male","group A","high school","standard","none","63","63","62" +"female","group E","master's degree","standard","completed","88","99","95" +"male","group C","high school","free/reduced","none","62","55","55" +"female","group C","high school","free/reduced","completed","59","71","65" +"female","group D","some college","standard","completed","68","78","77" +"female","group D","some college","free/reduced","none","77","86","86" diff --git a/week4_scikit_learn.ipynb.ipynb b/week4_scikit_learn.ipynb.ipynb index 363fcab..7644e26 100644 --- a/week4_scikit_learn.ipynb.ipynb +++ b/week4_scikit_learn.ipynb.ipynb @@ -1,6 +1,574 @@ { - "cells": [], - "metadata": {}, + "cells": [ + { + "cell_type": "markdown", + "id": "9e9a4ad7-e5d5-420c-b8ec-0d2cd3b311c1", + "metadata": {}, + "source": [ + "# Искусственные нейронные сети: первые шаги\n", + "\n", + "Цель работы — изучить базовые принципы построения и обучения искусственных нейронных сетей на Python с использованием библиотеки scikit-learn." + ] + }, + { + "cell_type": "markdown", + "id": "2d680f8f-6874-48d1-bda8-0efd410c3559", + "metadata": {}, + "source": [ + "from sklearn.datasets import load_iris\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.neural_network import MLPClassifier\n", + "from sklearn.metrics import classification_report" + ] + }, + { + "cell_type": "markdown", + "id": "607e372b-aa58-4fe9-9387-55160bd766c4", + "metadata": {}, + "source": [ + "## Базовая нейросеть на датасете Iris\n", + "\n", + "В качестве первого примера используется встроенный датасет Iris. Данные разделяются на обучающую и тестовую выборки." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "de6181a7-cec2-4e9e-bcfe-278838288c88", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_iris\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.neural_network import MLPClassifier\n", + "from sklearn.metrics import classification_report" + ] + }, + { + "cell_type": "markdown", + "id": "921bf426-ac10-4948-9a8c-61094ab9c78b", + "metadata": {}, + "source": [ + "## Обучение модели MLPClassifier\n", + "\n", + "Для классификации используется MLPClassifier — многослойный перцептрон. В модели задан один скрытый слой из 10 нейронов, функция активации ReLU и максимальное количество итераций 500." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4861d7a8-e701-48a4-831f-d20aefff3c46", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 10\n", + " 1 1.00 0.78 0.88 9\n", + " 2 0.85 1.00 0.92 11\n", + "\n", + " accuracy 0.93 30\n", + " macro avg 0.95 0.93 0.93 30\n", + "weighted avg 0.94 0.93 0.93 30\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Артем\\lab4_networks\\.venv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:785: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from sklearn.datasets import load_iris\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.neural_network import MLPClassifier\n", + "from sklearn.metrics import classification_report\n", + "\n", + "X, y = load_iris(return_X_y=True)\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X,\n", + " y,\n", + " test_size=0.2,\n", + " random_state=42\n", + ")\n", + "\n", + "clf = MLPClassifier(\n", + " hidden_layer_sizes=(10,),\n", + " activation=\"relu\",\n", + " max_iter=500,\n", + " random_state=42\n", + ")\n", + "\n", + "clf.fit(X_train, y_train)\n", + "\n", + "y_pred = clf.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "2e8b27a1-9760-4666-86db-976e3691759c", + "metadata": {}, + "source": [ + "## Изменение количества итераций\n", + "\n", + "Для проверки влияния количества итераций были обучены модели с max_iter=100 и max_iter=2500." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e780d1fe-7579-4327-8e8f-5e68b390db32", + "metadata": {}, + "outputs": [], + "source": [ + "for iterations in [100, 2500]:\n", + " model = MLPClassifier(\n", + " hidden_layer_sizes=(10,),\n", + " activation=\"relu\",\n", + " max_iter=iterations,\n", + " random_state=42\n", + " )\n", + "\n", + " model.fit(X_train, y_train)\n", + " prediction = model.predict(X_test)\n", + "\n", + " print(f\"Результат при max_iter={iterations}\")\n", + " print(classification_report(y_test, prediction))\n", + " print(\"-\" * 60)" + ] + }, + { + "cell_type": "markdown", + "id": "a3dbf204-2324-4020-8236-b3b487939995", + "metadata": {}, + "source": [ + "## Вывод по базовой модели\n", + "\n", + "В результате была обучена базовая модель MLPClassifier на датасете Iris. При изменении параметра max_iter можно увидеть, как количество итераций влияет на процесс обучения модели. При малом числе итераций модель может не успеть полностью сойтись, а при большем числе итераций обучение становится стабильнее.\n" + ] + }, + { + "cell_type": "markdown", + "id": "a45ef132-8d74-4415-b361-186a4ff703b5", + "metadata": {}, + "source": [ + "# Самостоятельное задание\n", + "\n", + "Для самостоятельного задания был выбран раздел Nearest Neighbors из примеров библиотеки scikit-learn. В качестве примера используется метод Local Outlier Factor (LOF), предназначенный для обнаружения выбросов в данных.\n", + "\n", + "Цель задачи — создать набор данных, добавить к нему выбросы, обучить модель LOF и визуально определить, какие точки были распознаны как аномальные." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "caa5e6d2-9ad4-49d0-8b06-c525287a55d9", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from sklearn.datasets import make_moons\n", + "from sklearn.neighbors import LocalOutlierFactor\n", + "from sklearn.preprocessing import StandardScaler\n" + ] + }, + { + "cell_type": "markdown", + "id": "77af768d-ebb7-4094-a7bc-97716347dc79", + "metadata": {}, + "source": [ + "## Генерация данных\n", + "\n", + "Для эксперимента используется сгенерированный датасет make_moons. К нему добавляются случайные точки, которые будут выступать в роли выбросов." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c47d2cdc-33c7-48aa-ad7d-1a1536a047af", + "metadata": {}, + "outputs": [], + "source": [ + "X, y = make_moons(n_samples=300, noise=0.1, random_state=42)\n", + "\n", + "np.random.seed(42)\n", + "outliers = np.random.uniform(\n", + " low=[-2, -1],\n", + " high=[3, 2],\n", + " size=(20, 2)\n", + ")\n", + "\n", + "X = np.vstack([X, outliers])\n", + "y = np.hstack([y, -1 * np.ones(20)])\n", + "\n", + "print(\"Размер датасета:\", X.shape)\n", + "print(\"Количество выбросов:\", 20)\n" + ] + }, + { + "cell_type": "markdown", + "id": "4be0b6de-c584-456c-b396-433e8ae8f175", + "metadata": {}, + "source": [ + "## Препроцессинг данных\n", + "\n", + "Перед обучением модели данные были масштабированы с помощью StandardScaler. Это нужно для того, чтобы признаки имели сопоставимый масштаб." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43408f4f-0914-4f61-b4d2-0143e9e607af", + "metadata": {}, + "outputs": [], + "source": [ + "scaler = StandardScaler()\n", + "X_scaled = scaler.fit_transform(X)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72179dfd-a2ba-4960-afb2-2e5184f62620", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(8, 5))\n", + "\n", + "plt.scatter(\n", + " X_scaled[y != -1, 0],\n", + " X_scaled[y != -1, 1],\n", + " c=y[y != -1],\n", + " cmap=\"coolwarm\",\n", + " label=\"Основные точки\"\n", + ")\n", + "\n", + "plt.scatter(\n", + " X_scaled[y == -1, 0],\n", + " X_scaled[y == -1, 1],\n", + " c=\"black\",\n", + " marker=\"x\",\n", + " label=\"Добавленные выбросы\"\n", + ")\n", + "\n", + "plt.title(\"Исходные данные с добавленными выбросами\")\n", + "plt.xlabel(\"Признак 1\")\n", + "plt.ylabel(\"Признак 2\")\n", + "plt.legend()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "5927bdcb-fc4b-472d-a8e5-03da8c48f30b", + "metadata": {}, + "source": [ + "## Обучение модели LOF\n", + "\n", + "Для поиска выбросов используется алгоритм Local Outlier Factor. Он сравнивает локальную плотность точек и определяет объекты, которые находятся в менее плотных областях." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06513802-bcf8-4e69-8597-1f2fad9280f5", + "metadata": {}, + "outputs": [], + "source": [ + "lof = LocalOutlierFactor(\n", + " n_neighbors=20,\n", + " contamination=0.1\n", + ")\n", + "\n", + "y_pred = lof.fit_predict(X_scaled)\n", + "\n", + "print(\"Метки модели:\")\n", + "print(np.unique(y_pred, return_counts=True))\n" + ] + }, + { + "cell_type": "markdown", + "id": "55b66101-388e-4b2a-a50a-b76262bf7704", + "metadata": {}, + "source": [ + "## Результаты\n", + "\n", + "После обучения модели были построены точки, которые алгоритм определил как нормальные объекты и выбросы." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1514d0c9-e332-4534-a269-c859eebda40d", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(8, 5))\n", + "\n", + "plt.scatter(\n", + " X_scaled[y_pred != -1, 0],\n", + " X_scaled[y_pred != -1, 1],\n", + " c=\"blue\",\n", + " label=\"Нормальные точки\"\n", + ")\n", + "\n", + "plt.scatter(\n", + " X_scaled[y_pred == -1, 0],\n", + " X_scaled[y_pred == -1, 1],\n", + " c=\"red\",\n", + " marker=\"x\",\n", + " label=\"Выбросы по LOF\"\n", + ")\n", + "\n", + "plt.title(\"Результаты обнаружения выбросов методом LOF\")\n", + "plt.xlabel(\"Признак 1\")\n", + "plt.ylabel(\"Признак 2\")\n", + "plt.legend()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "dce51bf9-091c-4de1-86fd-e3dff52e4ae8", + "metadata": {}, + "source": [ + "## Интерпретация результатов\n", + "\n", + "Алгоритм Local Outlier Factor определил выбросы как точки, расположенные в областях с низкой локальной плотностью. Большая часть добавленных случайных точек была выделена как аномальная.\n", + "\n", + "Некоторые точки из основной выборки также могут быть определены как выбросы, если они находятся на границе распределения или отличаются от соседних объектов. Это показывает, что результат работы LOF зависит от параметров n_neighbors и contamination.\n", + "\n", + "Метод LOF подходит для обнаружения аномалий в данных, особенно когда выбросы отличаются от основной массы объектов по расположению относительно соседей." + ] + }, + { + "cell_type": "markdown", + "id": "ef335fdd-a5ba-40ee-a596-aeeb8dfe9497", + "metadata": {}, + "source": [ + "# Работа с внешним датасетом\n", + "\n", + "Для второй части самостоятельного задания был использован внешний датасет students_performance.csv. В нём содержатся данные о студентах и их результатах по математике, чтению и письму. Цель этапа — применить метод Local Outlier Factor к реальным табличным данным и определить аномальные значения." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e062d3fa-5f7b-48ae-81bc-f6192e41fbf9", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "students_df = pd.read_csv(\"data/students_performance.csv\")\n", + "students_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "237b4a11-4b73-49dd-bec9-15df519a4300", + "metadata": {}, + "source": [ + "## Первичный анализ данных\n", + "\n", + "Перед применением модели была просмотрена структура таблицы, статистическое описание и наличие пропущенных значений." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29ed515f-4a10-42b7-8d85-42de4bbe312c", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Информация о таблице:\")\n", + "students_df.info()\n", + "\n", + "print(\"Статистическое описание:\")\n", + "display(students_df.describe())\n", + "\n", + "print(\"Количество пропущенных значений:\")\n", + "display(students_df.isnull().sum())" + ] + }, + { + "cell_type": "markdown", + "id": "00aef9c1-805e-4692-9c9e-1af3c5a4580f", + "metadata": {}, + "source": [ + "## Выбор признаков\n", + "\n", + "Для поиска выбросов были выбраны числовые признаки: результаты по математике, чтению и письму." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7fa9808-a63c-4cf1-94bb-44abe2d34936", + "metadata": {}, + "outputs": [], + "source": [ + "features = students_df[[\"math score\", \"reading score\", \"writing score\"]]\n", + "features.head()" + ] + }, + { + "cell_type": "markdown", + "id": "3f1b701a-fdf8-453e-8dd1-eef6fdd999a1", + "metadata": {}, + "source": [ + "## Препроцессинг данных\n", + "\n", + "Перед применением LOF числовые признаки были масштабированы с помощью StandardScaler." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39558ba0-8f6c-41f4-9631-e7114906728e", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler()\n", + "features_scaled = scaler.fit_transform(features)\n", + "\n", + "features_scaled[:5]\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a19eb94-ff3c-4db5-aa25-78f084f6c7fa", + "metadata": {}, + "source": [ + "## Обучение модели LOF\n", + "\n", + "Для поиска аномальных объектов использовался метод Local Outlier Factor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8657a361-cf92-4bf0-ae9a-bb0d6520bd0b", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import LocalOutlierFactor\n", + "\n", + "lof_students = LocalOutlierFactor(\n", + " n_neighbors=20,\n", + " contamination=0.05\n", + ")\n", + "\n", + "students_pred = lof_students.fit_predict(features_scaled)\n", + "\n", + "students_df[\"outlier\"] = students_pred\n", + "students_df[\"outlier\"].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59ee0a68-21c7-4f24-844e-1b4bdc7ae1bd", + "metadata": {}, + "outputs": [], + "source": [ + "outliers_df = students_df[students_df[\"outlier\"] == -1]\n", + "outliers_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "b85b8cad-a11e-43ee-959d-d47ea1d768fc", + "metadata": {}, + "source": [ + "## Визуализация результата\n", + "\n", + "На графике показана связь результатов по математике и чтению. Нормальные объекты и выбросы отмечены разными цветами." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "422b117c-611b-4dd3-bf1e-33fbabfdd16d", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.figure(figsize=(8, 5))\n", + "\n", + "plt.scatter(\n", + " students_df[students_df[\"outlier\"] == 1][\"math score\"],\n", + " students_df[students_df[\"outlier\"] == 1][\"reading score\"],\n", + " c=\"blue\",\n", + " label=\"Нормальные объекты\"\n", + ")\n", + "\n", + "plt.scatter(\n", + " students_df[students_df[\"outlier\"] == -1][\"math score\"],\n", + " students_df[students_df[\"outlier\"] == -1][\"reading score\"],\n", + " c=\"red\",\n", + " marker=\"x\",\n", + " label=\"Выбросы\"\n", + ")\n", + "\n", + "plt.title(\"LOF на датасете students_performance.csv\")\n", + "plt.xlabel(\"Балл по математике\")\n", + "plt.ylabel(\"Балл по чтению\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "2dc45ef7-5585-45b9-ab92-2b6aa2b27a55", + "metadata": {}, + "source": [ + "## Интерпретация результатов\n", + "\n", + "Метод Local Outlier Factor выделил часть студентов как выбросы. Такие объекты отличаются от основной массы данных по сочетанию результатов по математике, чтению и письму.\n", + "\n", + "Выбросами могут быть студенты с очень низкими или необычно высокими результатами относительно остальных наблюдений. LOF оценивает не один отдельный балл, а положение объекта относительно соседних точек в пространстве признаков.\n", + "\n", + "В результате метод был применён не только к сгенерированным данным, но и к внешнему табличному датасету, загруженному через pandas." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.4" + } + }, "nbformat": 4, "nbformat_minor": 5 }