forked from h2oai/driverlessai-recipes
-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.tex
345 lines (312 loc) · 12.1 KB
/
main.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
\documentclass[11pt,
%10pt,
%hyperref={colorlinks},
aspectratio=169,
hyperref={colorlinks}
]{beamer}
\usetheme{Singapore}
\usecolortheme[snowy, cautious]{owl}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[american]{babel}
\usepackage{graphicx}
\usepackage{hyperref}
\hypersetup{
colorlinks=true,
urlcolor=[rgb]{1,0,1},
linkcolor=[rgb]{1,0,1}}
\usepackage[natbib=true,style=authoryear,backend=bibtex,useprefix=true]{biblatex}
\usepackage{listings}
\lstset{numbers=right,
numberstyle=\tiny,
% breaklines=true,
% backgroundcolor=\color{light-gray},
numbersep=5pt,
xleftmargin=\parindent,
xrightmargin=.25in}
%\setbeamercolor*{bibliography entry title}{fg=black}
%\setbeamercolor*{bibliography entry location}{fg=black}
%\setbeamercolor*{bibliography entry note}{fg=black}
\definecolor{OwlGreen}{RGB}{75,0,130} % easier to see
\setbeamertemplate{bibliography item}{}
\setbeamerfont{caption}{size=\footnotesize}
\setbeamertemplate{frametitle continuation}{}
\setcounter{tocdepth}{1}
\renewcommand*{\bibfont}{\scriptsize}
\addbibresource{bibliography.bib}
\renewcommand*{\thefootnote}{\fnsymbol{footnote}}
%\author{\copyright\hspace{1pt}Ashrith Barthur\footnote{\tiny{This material is shared under a \href{https://creativecommons.org/licenses/by/4.0/deed.ast}{CC By 4.0 license} which allows for editing and redistribution, even for commercial purposes. However, any derivative work should attribute the author and H2O.AI.}}}
\author{Ashrith Barthur}
\title{How to Write A Recipe?}
\subtitle{Automating Feature Engineering Using DriverlessAI}
\logo{\includegraphics[height=8pt]{img/h2o_logo.png}}
\institute{\href{https://www.h2o.ai}{H\textsubscript{2}O.ai}}
\date{\today}
\subject{How to Write A Recipe?}
\begin{document}
\maketitle
% \begin{frame}
% \frametitle{Contents}
% \tableofcontents{}
% \end{frame}
%-------------------------------------------------------------------------------
\subsection{Question}
%-------------------------------------------------------------------------------
\begin{frame}
\frametitle{Question}
\begin{enumerate}
\item How many of us have built variables, features, transformers, or feature transformers?
\item What are they?
\end{enumerate}
\end{frame}
%-------------------------------------------------------------------------------
\subsection{Answer}
%-------------------------------------------------------------------------------
\begin{frame}
\frametitle{Answer}
\begin{enumerate}
\item Variables, features, transformers, feature transformers are refer to the same.
\item Each column in your data is considered a variable or a feature.
\item Each \textit{new} column created is also referred to as a variable or a feature.
\item The process of creating a new variable, or a feature is called a transformation.
\item The code processing an \textit{existing} column to a \textit{new} column is called a \textit{transformer}.
\end{enumerate}
\end{frame}
%-------------------------------------------------------------------------------
\subsection{Example Transformation}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Example Transformation}
\begin{enumerate}
\item \textit{height}- Variable
\item New variable after transformation \textit{log2(height)}
\end{enumerate}
\end{frame}
%-------------------------------------------------------------------------------
\subsection{Question}
%-------------------------------------------------------------------------------
\begin{frame}
\frametitle{Question}
\begin{enumerate}
\item How many of us are familiar with Custom Transformers in Driverless AI?
\item What are they?
\end{enumerate}
\end{frame}
%-------------------------------------------------------------------------------
\subsection{Answer}
%-------------------------------------------------------------------------------
\begin{frame}
\frametitle{Answer}
\begin{enumerate}
\item DriverlessAI already has a large, comprehensive set of transformers.
\item But there are always domains that require nuanced features.
\item And for this, DriverlessAI provides us to create custom transformers.
\item This is provided by provisioning an extension class \textit{CustomTransformer}
\end{enumerate}
\end{frame}
%-------------------------------------------------------------------------------
\subsection{Recipe Introduction}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{How Did We Build A Custom Transformer?}
Driverless AI provides an extension. \\
This is a class `CustomTransformer`
\begin{verbatim}
class ExampleLogTransformer(CustomTransformer):
\end{verbatim}
\end{frame}
%-------------------------------------------------------------------------------
\section{Feature Recipe Structure}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{How Did We Build This?}
The class has:
\begin{enumerate}
\item Parameters that need to be provided.
\item These parameters are specific to the type of feature recipe that you are building.
\item It also has four methods which primarily handle your feature engineering transformation.
\end{enumerate}
\end{frame}
%-------------------------------------------------------------------------------
\subsection{Parameters - Basic}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Parameters - Basic}
\begin{verbatim}
class ExampleLogTransformer(CustomTransformer):
_regression = True
_binary = True
_multiclass = True
\end{verbatim}
\end{frame}
%-------------------------------------------------------------------------------
\subsection{Parameters - Advanced}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Parameters - Advanced}
\begin{verbatim}
class ExampleLogTransformer(CustomTransformer):
_regression = True
_binary = True
_multiclass = True
_numeric_output = True
_is_reproducible = True
_excluded_model_classes = ['tensorflow']
_modules_needed_by_name = ["custom_package==1.0.0"]
\end{verbatim}
\end{frame}
%-------------------------------------------------------------------------------
% \subsection{Parameters - Advanced}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Acceptance Method}
\begin{verbatim}
class ExampleLogTransformer(CustomTransformer):
_regression = True
_binary = True
_multiclass = True
_numeric_output = True
_is_reproducible = True
_excluded_model_classes = ['tensorflow']
_modules_needed_by_name = ["custom_package==1.0.0"]
@staticmethod
def do_acceptance_test():
return True
\end{verbatim}
\end{frame}
%-------------------------------------------------------------------------------
% \subsection{Parameters - Advanced}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Input Data}
\begin{verbatim}
...
@staticmethod
def do_acceptance_test():
return True
@staticmethod
def get_default_properties():
return dict(col_type = "numeric", min_cols = 1, max_cols = 1,
relative_importance = 1)
\end{verbatim}
\end{frame}
%-------------------------------------------------------------------------------
% \subsection{Parameters - Advanced}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Input Data Types}
\begin{verbatim}
a. "all" - all column types
b. "any" - any column types
c. "numeric" - numeric int/float column
d. "categorical" - string/int/float column considered a categorical for
feature engineering
e. "numcat" - allow both numeric or categorical
f. "datetime" - string or int column with raw datetime such as
'%Y/%m/%d %H:%M:%S' or '%Y%m%d%H%M'
\end{verbatim}
\end{frame}
%-------------------------------------------------------------------------------
% \subsection{Parameters - Advanced}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Input Data Types}
\begin{verbatim}
g. "date" - string or int column with raw date such as
'%Y/%m/%d' or '%Y%m%d'
h. "text" - string column containing text
(and hence not treated as categorical)
i. "time_column" - the time column specified at the start of
the experiment (unmodified)
\end{verbatim}
\end{frame}
%-------------------------------------------------------------------------------
% \subsection{Parameters - Advanced}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Fit Function}
\begin{verbatim}
@staticmethod
def get_default_properties():
return dict(col_type = "numeric", min_cols = 1, max_cols = 1,
relative_importance = 1)
def fit_transform(self, X: dt.Frame, y: np.array = None):
X_pandas = X.to_pandas()
X_p_log = np.log10(X_pandas)
return X_p_log
\end{verbatim}
\end{frame}
%-------------------------------------------------------------------------------
% \subsection{Parameters - Advanced}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Transform Function}
\begin{verbatim}
def fit_transform(self, X: dt.Frame, y: np.array = None):
X_pandas = X.to_pandas()
X_p_log = np.log10(X_pandas)
return X_p_log
def transform(self, X: dt.Frame):
X_pandas = X.to_pandas()
X_p_log = np.log10(X_pandas)
return X_p_log
\end{verbatim}
\end{frame}
%-------------------------------------------------------------------------------
% \subsection{Parameters - Advanced}
%-------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Library}
\begin{verbatim}
from h2oaicore.systemutils import segfault, loggerinfo, main_logger
from h2oaicore.transformer_utils import CustomTransformer
import datatable as dt
import numpy as np
import pandas as pd
import logging
\end{verbatim}
\end{frame}
%-------------------------------------------------------------------------------
% \subsection{Question}
%-------------------------------------------------------------------------------
\begin{frame}
\frametitle{DEMO}
\end{frame}
%-------------------------------------------------------------------------------
% \subsection{Question}
%-------------------------------------------------------------------------------
\begin{frame}
\frametitle{Advantages}
\begin{enumerate}
\item Feature engineering process standardised by:
\begin{enumerate}
\item preset parameters
\item preset methods
\end{enumerate}
\item Effort minimisation leads to minimisation in time spent.
\item Build only once - Feature engineering is carried over from training/testing to production.
\item DAI automatically, runs multiple models on various sets of features to get the best model.
\item All the requirements are handled internally by DAI.
\end{enumerate}
\end{frame}
%-------------------------------------------------------------------------------
% References
%-------------------------------------------------------------------------------
\begin{frame}[t, allowframebreaks]
\frametitle{References}
\textbf{How to build a recipe}\\
\small{\url{https://github.com/ashrith/how_to_write_a_recipe}}
\framebreak
\printbibliography
\end{frame}
%-------------------------------------------------------------------------------
\section{Questions}
%------------------------------------------------------------------------------
\begin{frame}
\frametitle{Thanks \& Questions}
\end{frame}
%-------------------------------------------------------------------------------
% \section{End}
%------------------------------------------------------------------------------
\end{document}
\end{document}