The Best Machine Learning Frameworks & Extensions for Scikit-learn

Data formats


data =pd.DataFrame({ 
from sklearn_pandas import DataFrameMapper mapper = DataFrameMapper([ 
('Uni', sklearn.preprocessing.LabelBinarizer()),
(['Age'], sklearn.preprocessing.StandardScaler())
mapper = DataFrameMapper([ 
('Uni', sklearn.preprocessing.LabelBinarizer()),
(['Age'], sklearn.preprocessing.StandardScaler())


import numpy as np 
import xarray as xr
data = np.random.rand(16, 4)
my_xarray = xr.DataArray(data)
from sklearn.preprocessing import StandardScaler 
Xt = wrap(StandardScaler()).fit_transform(X)
pipeline = Pipeline([ 
('pca', wrap(PCA(n_components=50), reshapes='feature')),
('cls', wrap(LogisticRegression(), reshapes='feature'))
from sklearn_xarray.model_selection 
import CrossValidatorWrapper from sklearn.model_selection
import GridSearchCV, KFold
cv = CrossValidatorWrapper(KFold())
pipeline = Pipeline([
('pca', wrap(PCA(), reshapes='feature')),
('cls', wrap(LogisticRegression(), reshapes='feature'))
gridsearch = GridSearchCV(
pipeline, cv=cv, param_grid={'pca__n_components': [20, 40, 60]}



$ curl | xargs -n 1 -L 1 pip install
from autosklearn.classification 
import AutoSklearnClassifier
cls = AutoSklearnClassifier(), y_train)
predictions = cls.predict(X_test)

Auto_ViML — Automatic Variant Interpretable Machine Learning” (pronounced “Auto_Vimal”)

from sklearn.model_selection import train_test_split, cross_validate X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) 
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=54)
train, test = X_train.join(y_train), X_val.join(y_val)
model, features, train, test = Auto_ViML(train,"target",test,verbose=2)

TPOT — Tree-based Pipeline Optimization Tool

from tpot import TPOTClassifier 
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(,, train_size=0.75, test_size=0.25, random_state=42)
tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42), y_train)
print(tpot.score(X_test, y_test))

Feature Tools

import featuretools as ftentities = {
"customers" : (customers_df, "customer_id"),
"sessions" : (sessions_df, "session_id", "session_start"),
"transactions" : (transactions_df, "transaction_id", "transaction_time")
relationships = [("sessions", "session_id", "transactions", "session_id"),
("customers", "customer_id", "sessions", "customer_id")]
feature_matrix, features_defs = ft.dfs(entities=entities,
relationships = relationships,
target_entity = "customers")


Experimentation frameworks

SciKit-Learn Laboratory

$ run_experimen experiment.cfg


from neptunecontrib.monitoring.sklearn import log_regressor_summarylog_regressor_summary(rfr, X_train, X_test, y_train, y_test)



Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store