In [10]:
import pandas as pd
from glob import glob
pd.options.display.float_format = '{:,.6f}'.format
import pandas as pd
from critdd import Diagram

In [2]:
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from matplotlib import ticker
%matplotlib inline
import matplotlib
import matplotlib.pylab as plt
plt.rcParams["figure.figsize"] = (10,7)
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(10, 10, forward=True)
fig.set_dpi(100)
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (10, 10),
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'large',
         'ytick.labelsize':'large'}
plt.rcParams["figure.figsize"] = (20,10)
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(15, 10, forward=True)
plt.rcParams.update(params)

<Figure size 1500x1000 with 0 Axes>

In [3]:
class Tex:
    def __init__(self, path):
        self.raw_df=pd.read_pickle(path)
        
        self.automls=['Baseline', 
                      'MLJAR',
                      'FLAML',
                      'AutoSKLearn', 
                      'H2O',  
                       'TPOT',
                      'AutoGluon',
                      'RSS']
        
        self.time_budgets=[30,
                           60,
                           120,
                           180,
                           300]
        
        assert(self.check())
        
    def __str__(self):
        out='\n'
        for automl in self.automls:
            out+=f'{df[df.automl==automl].head(1).to_markdown()}\n'
        return out
    
    def check(self):
        df=self.raw_df
        for a in df.automl.unique():
            assert(a in self.automls)
        assert(len(df.automl.unique()==8))
        assert(len(df.project.unique())==31)
        assert(len(df.seed.unique())==10)
        assert(len(df.time_budget.unique()==5))
        return True
    
    def get_overall(self):
        df=self.raw_df.copy()
        temp_data={}
        for automl in self.automls:
            filter_df=df[df.automl==automl].groupby('time_budget').agg(['mean','std'])
            row=[]
            for i in filter_df.index:
                score=filter_df.loc[i].score['mean']
                std=filter_df.loc[i].score['std']
                row.append(f' {score.round(4)} $\pm$ {std.round(2)} ')
            temp_data[f'{automl}']=row
        out_df=pd.DataFrame.from_dict(temp_data)
        out_df.index=self.time_budgets
        tex= self._get_tex(out_df)
        return tex, out_df
    
    def _get_tex(self,df):
        return str(df.to_latex().replace('\$',
                                         '$').replace('\\textbackslash ',
                                                      '\\').replace('\{',
                                                                    '{').replace('\}',
                                                                                 '}'))
    
    def save_time_budget_figure(self):
        df=self.raw_df
        ax=None
        axs=[]
        for time_budget in reversed(sorted(df.time_budget.unique())):
            
            df_draw=df[(df.time_budget==time_budget)][['automl',
                                                       'project',
                                                       'score']].groupby(['project',
                                                                          'automl']).agg(['mean'])
            
            ax=df_draw.unstack().plot(ax=ax, rot=45, style=['r*-','b--','y^-', 
                                                            'g*', 'y*','r--',
                                                            'bo-','g:'], linewidth=2.0)
            ax.set_title(f'Time Budget:{time_budget}')
            ax.set_xlabel('Dataset')
            ax.set_ylabel('$R^2$')
            legends=[_[1] for _ in df_draw.index ]
            plt.legend(legends,fontsize=20)
            plt.grid(visible=True, which='major', color='0.75', linestyle='-')
            plt.savefig(f'fig_a_{time_budget}.jpg')
            plt.show()
            axs.append(ax)
            ax=None
        return axs  

In [7]:
tex.raw_df

Unnamed: 0,automl,project,seed,time_budget,score
0,AutoGluon,361072,219,30,0.848995
1,AutoGluon,361072,219,60,0.986669
2,AutoGluon,361072,219,120,0.986655
3,AutoGluon,361072,219,180,0.987282
4,AutoGluon,361072,219,300,0.987274
...,...,...,...,...,...
12128,TPOT,361104,194,30,0.000000
12129,TPOT,361104,194,60,0.000000
12130,TPOT,361104,219,30,0.000000
12131,TPOT,361104,219,60,0.000000


In [17]:
tex.raw_df.project.unique()

array([361072, 361073, 361074, 361075, 361076, 361077, 361078, 361079,
       361080, 361081, 361082, 361083, 361084, 361085, 361086, 361087,
       361088, 361089, 361090, 361092, 361093, 361094, 361095, 361096,
       361097, 361098, 361099, 361101, 361102, 361103, 361104])

In [8]:
df=tex.raw_df[['automl','project','score']]
df_pivot = df.pivot_table(
    index = "project",
    columns = "automl",
    values = 'score'
)
df_pivot = df_pivot.loc[:,~df_pivot.apply(lambda x: x.duplicated(),axis=1).all()].copy()
df_pivot.columns

Index(['AutoGluon', 'AutoSKLearn', 'Baseline', 'FLAML', 'H2O', 'MLJAR', 'RSS',
       'TPOT'],
      dtype='object', name='automl')

In [13]:
# create a CD diagram from the Pandas DataFrame
diagram = Diagram(
    df_pivot.to_numpy(),
    treatment_names = df_pivot.columns,
    maximize_outcome = True
)

# inspect average ranks and groups of statistically indistinguishable treatments
diagram.average_ranks # the average rank of each treatment
diagram.get_groups(alpha=.05, adjustment="holm")

# export the diagram to a file
diagram.to_file(
    "automl_cd.tex",
    alpha = .05,
    adjustment = "holm",
    reverse_x = True,
    axis_options = {"title": "AutoML Critical Difference (CD) Plot"},
)

In [14]:
print(open('automl_cd.tex').read())

\begin{tikzpicture}[
  treatment line/.style={rounded corners=1.5pt, line cap=round, shorten >=1pt},
  treatment label/.style={font=\small},
  group line/.style={ultra thick},
]

\begin{axis}[
  clip={false},
  axis x line={center},
  axis y line={none},
  axis line style={-},
  xmin={1},
  ymax={0},
  scale only axis={true},
  width={\axisdefaultwidth},
  ticklabel style={anchor=south, yshift=1.3*\pgfkeysvalueof{/pgfplots/major tick length}, font=\small},
  every tick/.style={draw=black},
  major tick style={yshift=.5*\pgfkeysvalueof{/pgfplots/major tick length}},
  minor tick style={yshift=.5*\pgfkeysvalueof{/pgfplots/minor tick length}},
  title style={yshift=\baselineskip},
  xmax={8},
  ymin={-5.5},
  height={6\baselineskip},
  xtick={1,2,3,4,5,6,7,8},
  minor x tick num={1},
  x dir={reverse},
  title={AutoML Critical Difference (CD) Plot},
]

\draw[treatment line] ([yshift=-2pt] axis cs:1.3225806451612903, 0) |- (axis cs:0.6559139784946236, -2.0)
  node[treatment label, anchor=w