Source code for streamline.models.gradient_boosting

from abc import ABC
from streamline.modeling.basemodel import BaseModel
from streamline.modeling.parameters import get_parameters
from sklearn.ensemble import GradientBoostingClassifier as GB
from xgboost import XGBClassifier as XGB
from lightgbm import LGBMClassifier as LGB
from catboost import CatBoostClassifier as CGB



[docs]
class GBClassifier(BaseModel, ABC):
    model_name = "Gradient Boosting"
    small_name = "GB"
    color = "cornflowerblue"

    def __init__(self, cv_folds=3, scoring_metric='balanced_accuracy',
                 metric_direction='maximize', random_state=None, cv=None, n_jobs=None):
        super().__init__(GB, "Gradient Boosting", cv_folds, scoring_metric, metric_direction, random_state, cv)
        self.param_grid = get_parameters(self.model_name)
        self.param_grid['random_state'] = [random_state, ]
        self.small_name = "GB"
        self.color = "cornflowerblue"
        self.n_jobs = n_jobs


[docs]
    def objective(self, trial, params=None):
        self.params = {'n_estimators': trial.suggest_int('n_estimators', self.param_grid['n_estimators'][0],
                                                         self.param_grid['n_estimators'][1]),
                       'loss': trial.suggest_categorical('loss', self.param_grid['loss']),
                       'learning_rate': trial.suggest_float('learning_rate', self.param_grid['learning_rate'][0],
                                                            self.param_grid['learning_rate'][1], log=True),
                       'min_samples_leaf': trial.suggest_int('min_samples_leaf', self.param_grid['min_samples_leaf'][0],
                                                             self.param_grid['min_samples_leaf'][1]),
                       'min_samples_split': trial.suggest_int('min_samples_split',
                                                              self.param_grid['min_samples_split'][0],
                                                              self.param_grid['min_samples_split'][1]),
                       'max_depth': trial.suggest_int('max_depth', self.param_grid['max_depth'][0],
                                                      self.param_grid['max_depth'][1]),
                       'random_state': trial.suggest_categorical('random_state', self.param_grid['random_state'])}

        mean_cv_score = self.hyper_eval()
        return mean_cv_score





[docs]
class XGBClassifier(BaseModel, ABC):
    model_name = "Extreme Gradient Boosting"
    small_name = "XGB"
    color = "cyan"

    def __init__(self, cv_folds=3, scoring_metric='balanced_accuracy',
                 metric_direction='maximize', random_state=None, cv=None, n_jobs=None):
        super().__init__(XGB, "Extreme Gradient Boosting", cv_folds, scoring_metric, metric_direction, random_state, cv)
        self.param_grid = get_parameters(self.model_name)
        self.param_grid['random_state'] = [random_state, ]
        self.small_name = "XGB"
        self.color = "cyan"
        self.n_jobs = n_jobs


[docs]
    def objective(self, trial, params=None):
        class_weight = params['class_weight']
        param_grid = self.param_grid
        self.params = {'booster': trial.suggest_categorical('booster', param_grid['booster']),
                       'objective': trial.suggest_categorical('objective', param_grid['objective']),
                       'verbosity': trial.suggest_categorical('verbosity', param_grid['verbosity']),
                       'reg_lambda': trial.suggest_float('reg_lambda', param_grid['reg_lambda'][0],
                                                         param_grid['reg_lambda'][1], log=True),
                       'alpha': trial.suggest_float('alpha', param_grid['alpha'][0], param_grid['alpha'][1], log=True),
                       'eta': trial.suggest_float('eta', param_grid['eta'][0], param_grid['eta'][1], log=True),
                       'gamma': trial.suggest_float('gamma', param_grid['gamma'][0], param_grid['gamma'][1], log=True),
                       'max_depth': trial.suggest_int('max_depth', param_grid['max_depth'][0],
                                                      param_grid['max_depth'][1]),
                       'grow_policy': trial.suggest_categorical('grow_policy', param_grid['grow_policy']),
                       'n_estimators': trial.suggest_int('n_estimators', param_grid['n_estimators'][0],
                                                         param_grid['n_estimators'][1]),
                       'min_samples_split': trial.suggest_int('min_samples_split', param_grid['min_samples_split'][0],
                                                              param_grid['min_samples_split'][1]),
                       'min_samples_leaf': trial.suggest_int('min_samples_leaf', param_grid['min_samples_leaf'][0],
                                                             param_grid['min_samples_leaf'][1]),
                       'subsample': trial.suggest_float('subsample', param_grid['subsample'][0],
                                                        param_grid['subsample'][1]),
                       'min_child_weight': trial.suggest_float('min_child_weight',
                                                               param_grid['min_child_weight'][0],
                                                               param_grid['min_child_weight'][1], log=True),
                       'colsample_bytree': trial.suggest_float('colsample_bytree', param_grid['colsample_bytree'][0],
                                                               param_grid['colsample_bytree'][1]),
                       'scale_pos_weight': trial.suggest_categorical('scale_pos_weight', [1.0, class_weight]),
                       'nthread': trial.suggest_categorical('nthread', param_grid['nthread']),
                       'random_state': trial.suggest_categorical('random_state', param_grid['random_state']), }

        mean_cv_score = self.hyper_eval()
        return mean_cv_score





[docs]
class LGBClassifier(BaseModel, ABC):
    model_name = "Light Gradient Boosting"
    small_name = "LGB"
    color = "pink"

    def __init__(self, cv_folds=3, scoring_metric='balanced_accuracy',
                 metric_direction='maximize', random_state=None, cv=None, n_jobs=None):
        super().__init__(LGB, "Light Gradient Boosting", cv_folds, scoring_metric, metric_direction, random_state, cv)
        self.param_grid = get_parameters(self.model_name)
        self.param_grid['random_state'] = [random_state, ]
        self.small_name = "LGB"
        self.color = "pink"
        self.n_jobs = n_jobs


[docs]
    def objective(self, trial, params=None):
        class_weight = params['class_weight']
        param_grid = self.param_grid
        self.params = {'objective': trial.suggest_categorical('objective', param_grid['objective']),
                       'metric': trial.suggest_categorical('metric', param_grid['metric']),
                       'verbosity': trial.suggest_categorical('verbosity', param_grid['verbosity']),
                       'boosting_type': trial.suggest_categorical('boosting_type', param_grid['boosting_type']),
                       'num_leaves': trial.suggest_int('num_leaves', param_grid['num_leaves'][0],
                                                       param_grid['num_leaves'][1]),
                       'max_depth': trial.suggest_int('max_depth', param_grid['max_depth'][0],
                                                      param_grid['max_depth'][1]),
                       'reg_alpha': trial.suggest_float('reg_alpha', param_grid['reg_alpha'][0],
                                                        param_grid['reg_alpha'][1], log=True),
                       'reg_lambda': trial.suggest_float('reg_lambda', param_grid['reg_lambda'][0],
                                                         param_grid['reg_lambda'][1], log=True),
                       'colsample_bytree': trial.suggest_float('colsample_bytree', param_grid['colsample_bytree'][0],
                                                               param_grid['colsample_bytree'][1]),
                       'subsample': trial.suggest_float('subsample', param_grid['subsample'][0],
                                                        param_grid['subsample'][1]),
                       'subsample_freq': trial.suggest_int('subsample_freq', param_grid['subsample_freq'][0],
                                                           param_grid['subsample_freq'][1]),
                       'min_child_samples': trial.suggest_int('min_child_samples', param_grid['min_child_samples'][0],
                                                              param_grid['min_child_samples'][1]),
                       'n_estimators': trial.suggest_int('n_estimators', param_grid['n_estimators'][0],
                                                         param_grid['n_estimators'][1]),
                       'scale_pos_weight': trial.suggest_categorical('scale_pos_weight', [1.0, class_weight]),
                       'random_state': trial.suggest_categorical('random_state', param_grid['random_state']),
                       }
        # print(self.model.get_params())
        mean_cv_score = self.hyper_eval()
        return mean_cv_score





[docs]
class CGBClassifier(BaseModel, ABC):
    model_name = "Category Gradient Boosting"
    small_name = "CGB"
    color = "magenta"

    def __init__(self, cv_folds=3, scoring_metric='balanced_accuracy',
                 metric_direction='maximize', random_state=None, cv=None, n_jobs=None):
        super().__init__(CGB, "Category Gradient Boosting", cv_folds, scoring_metric, metric_direction, random_state,
                         cv)
        self.param_grid = get_parameters(self.model_name)
        self.param_grid['random_state'] = [random_state, ]
        self.small_name = "CGB"
        self.color = "magenta"
        self.n_jobs = n_jobs


[docs]
    def objective(self, trial, params=None):
        self.params = {'learning_rate': trial.suggest_float('learning_rate', self.param_grid['learning_rate'][0],
                                                            self.param_grid['learning_rate'][1], log=True),
                       'iterations': trial.suggest_int('iterations', self.param_grid['iterations'][0],
                                                       self.param_grid['iterations'][1]),
                       'depth': trial.suggest_int('depth', self.param_grid['depth'][0], self.param_grid['depth'][1]),
                       'l2_leaf_reg': trial.suggest_int('l2_leaf_reg', self.param_grid['l2_leaf_reg'][0],
                                                        self.param_grid['l2_leaf_reg'][1]),
                       'loss_function': trial.suggest_categorical('loss_function', self.param_grid['loss_function']),
                       'random_state': trial.suggest_categorical('random_state', self.param_grid['random_state']),
                       'verbose': trial.suggest_categorical('verbose', self.param_grid['verbose']),
                       }

        mean_cv_score = self.hyper_eval()
        return mean_cv_score