import pandas
from holoviews import *
from holoviews.interface.seaborn import DFrame
import numpy as np
%load_ext holoviews.ipython
marking_dfa1 = pandas.read_csv('./Student_MarksA1.csv')[0:81]
marking_dfa1 = marking_dfa1.filter([c for c in marking_dfa1 if c.startswith('Question ') or c in ['Total', 'Index', 'Student Number']])
marking_dfa1['TotalA1'] = [float(t.split('%')[0]) for t in marking_dfa1['Total']]
marking_dfa1.rename(columns={'Tutorial Group': 'TG'}, inplace=True)
marking_df = pandas.read_csv('./Student_Marks.csv')[0:95]
marking_df['Total'] = [float(t.split('%')[0]) for t in marking_df.Total]
marking_df.rename(columns={'Total': 'TotalA2'}, inplace=True)
merged = marking_df.merge(marking_dfa1, on=['Student Number']).reset_index()
merged['Score Difference'] = merged.TotalA2 - merged.TotalA1
merged_df = DFrame(merged.filter(['TotalA1', 'TotalA2', 'Tutorial Group']), plot_type='violinplot')
%output size=200
First things first we'll check whether the marking process didn't suffer from considerable bias over time, which is possible since the scores are indexed consecutively according to how I marked them. We'll show the results for both assignments, to confirm consistency.
%%opts Regression [apply_databounds=True]
DFrame(merged).regression('index', 'TotalA1', group='By_Index', reduce_fn=np.mean, extents=(1,0,84,100)) *\
DFrame(merged).regression('index', 'TotalA2', group='By_Index', reduce_fn=np.mean, extents=(1,0,84,100)) +\
DFrame(merged).regression('Tutorial Group', 'TotalA1', group='By_Group', reduce_fn=np.mean, extents=(1,0,8,100)) *\
DFrame(merged).regression('Tutorial Group', 'TotalA2', group='By_Group', reduce_fn=np.mean, extents=(1,0,8,100))
As we can see the data does not show any trend that isn't explained by the variability in performance between the Tutorial Groups. In particular the trends for the first and second assignment is largely the same.
%%output dpi=120 size=100
%%opts Regression [apply_databounds=True]
DFrame(marking_df.reset_index()).regression('index', 'TotalA2', mdims=['Tutorial Group'],
extents=(None, 0, None, 100)).layout('Tutorial Group')
We can break this down by Tutorial Group to check if there are any trends in each group, we may have missed. Trends that carry through multiple groups would be indicative of bias, this is not the case.
%%opts Distribution (bins=20 hist_kws={'range':(0,100)} kde_kws={'cut':0} rug=True color='indianred') DFrame (cut=0)
Distribution(marking_df.TotalA2, key_dimensions=['Total Score']) +\
DFrame(marking_df, group='Total Score', plot_type='violinplot', x='Tutorial Group', y='TotalA2')
Now the mean total score broken down by Tutorial Group.
%%opts DFrame (kind='bar' aspect=2)
DFrame(marking_df, plot_type='factorplot', x='Tutorial Group', y='TotalA2')
Finally a breakdown into degree classification:
first = sum(marking_df.TotalA2 > 70)
twoone = sum((60 <= marking_df.TotalA2) & (marking_df.TotalA2 < 70))
twotwo = sum((50 <= marking_df.TotalA2) & (marking_df.TotalA2 < 60))
third = sum((40 <= marking_df.TotalA2) & (marking_df.TotalA2 < 50))
fail = sum(marking_df.TotalA2 < 40)
nstudents = float(len(marking_df.TotalA2))
print "Total with a First: {} ({} %)".format(first, 100*first/float(nstudents))
print "Total with a 2:1: {} ({} %)".format(twoone, 100*twoone/float(nstudents))
print "Total with a 2:2: {} ({} %)".format(twotwo, 100*twotwo/float(nstudents))
print "Total with a Third: {} ({} %)".format(third, 100*third/float(nstudents))
print "Total with a Fail: {} ({} %)".format(fail, 100*fail/float(nstudents))
And here are some basic stats:
marking_df.TotalA2.describe()
questions = ['Question %d' % q for q in range(1,7)]
stacked = []
for idx, (sn, group) in enumerate(marking_df.groupby('Student Number')):
group_df = pandas.DataFrame(group.filter(questions).stack()).reset_index(level=1)
group_df.insert(0, 'Student Number', sn)
tg = list(marking_df[marking_df['Student Number'] == sn]['Tutorial Group'])[0]
group_df.insert(0, 'Tutorial Group', tg)
group_df.insert(0, 'Index', idx)
stacked.append(group_df)
stacked_df = pandas.concat(stacked)
stacked_df.columns =['Index', 'Tutorial Group', 'Student Number', 'Question', 'Mark']
stacked_df['Question'] = [int(q[-2:].strip()) for q in stacked_df['Question']]
stacked_df.index = list(range(len(stacked_df)))
qdim = Dimension('Question', type=int)
print "The overall mean score per question was:", stacked_df.Mark.mean()
print "With a standard deviation of:", stacked_df.Mark.std()
%%output size=300
%%opts DFrame (kind='point' aspect=2 palette='Set2')
DFrame(stacked_df, plot_type='factorplot', x='Question', y='Mark', x2='Tutorial Group')
And again a broken down individually:
%%output dpi=120
DFrame(stacked_df, plot_type='factorplot', x='Question', y='Mark')(style=dict(kind='point', hue='Tutorial Group', col='Tutorial Group', palette='Set2', col_wrap=4))
%%output dpi=120
%%opts DFrame (kind='point' hue='Index' col='Index' palette='Set2' col_wrap=4)
DFrame(stacked_df, plot_type='factorplot', x='Question', y='Mark')
Finally let's look at the marks for each student and question as a heatmap.
from matplotlib.ticker import FormatStrFormatter
Dimension.type_formatters[int] = FormatStrFormatter("%d")
Dimension.type_formatters[float] = FormatStrFormatter("%.3g")
Dimension.type_formatters[np.float32] = FormatStrFormatter("%.3g")
Dimension.type_formatters[np.float64] = FormatStrFormatter("%.3g")
%%output size=450 dpi=100
%%opts HeatMap [yticks=100 show_values=False] Histogram [xticks=6]
hm_df = stacked_df.filter(['Question', 'Index', 'Mark'])
DFrame(hm_df, dimensions={'Question': qdim}).heatmap(['Question', 'Index'], 'Mark').hist(num_bins=11)
Finally let's have a look at how students performed from the first assignment to the second. The easiest way of doing so is showing linked violin plots side by side:
%%opts DFrame (inner='points' join_rm=True cut=0 color='RdGy_r') NdLayout [figure_size=50]
merged_df.holomap(['Tutorial Group']).layout('Tutorial Group')
Next let's look a score difference between the first and second assignment.
DFrame(merged).distribution('Score Difference', group='Distribution of score difference b/w assignments')
That's probably the best approximation of a Gaussian we could expect from 90 samples.
Finally we can run a simple linear regression between the score in the first and the second assignment, which should demonstrate at least some consistency:
%%opts Bivariate [joint=True apply_databounds=True] (kind='reg')
DFrame(merged).bivariate(['TotalA1', 'TotalA2'], [], extents=(0,0,100,100), group='Regression b/w Assignment 1 & 2 score', reduce_fn=np.mean)