Model Evaluation

Multiclass classification evaluator

1
from pyspark.mllib.evaluation import MulticlassMetrics
2
evaluator = MulticlassClassificationEvaluator(predictionCol="prediction")
3
4
for model in ["lrpredictions", "nbpredictions", "rfpredictions"]:
5
6
df = globals()[model]
7
########################################
8
# Compute raw scores on the test set
9
predictionAndLabels = df.select("prediction", "label").rdd
10
11
# Instantiate metrics object
12
metrics = MulticlassMetrics(predictionAndLabels)
13
14
# Overall statistics
15
precision = metrics.precision()
16
recall = metrics.recall()
17
f1Score = metrics.fMeasure()
18
print("Summary Stats for: ", model)
19
#print(metrics.confusionMatrix())
20
print("Accuracy = %s" % evaluator.evaluate(df))
21
print("Precision = %s" % precision)
22
print("Recall = %s" % recall)
23
print("F1 Score = %s" % f1Score)
24
25
# Weighted stats
26
#print("Weighted recall = %s" % metrics.weightedRecall)
27
#print("Weighted precision = %s" % metrics.weightedPrecision)
28
#print("Weighted F(1) Score = %s" % metrics.weightedFMeasure())
29
#print("Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5))
30
#print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate)
31
print("\n")
Copied!
Export as PDF
Copy link