Source code for streamline.utils.runners

import os
import multiprocessing

from joblib import Parallel, delayed

num_cores = int(os.environ.get('SLURM_CPUS_PER_TASK', -1))
if num_cores == -1:
    num_cores = multiprocessing.cpu_count()


[docs] def check_if_single_phase(params): phase_list = [params['do_eda'], params['do_dataprep'], params['do_feat_imp'], params['do_feat_sel'], params['do_model'], params['do_stats'], params['do_compare_dataset'], params['do_report'], params['do_replicate'], params['do_rep_report'], params['do_cleanup']] phase_count = 0 for phase in phase_list: if phase: phase_count += 1 if phase_count == 1: return True else: return False
[docs] def parallel_eda_call(eda_job, params): """ Runner function for running eda job objects """ if params and 'top_features' in params: eda_job.run(params['top_features']) else: eda_job.run()
[docs] def model_runner_fn(job, model): """ Runner function for running model job objects """ job.run(model)
[docs] def runner_fn(job): """ Runner function for running job objects """ job.run()
[docs] def run_jobs(job_list): """ Function to start and join a list of job objects """ for i in range(0, len(job_list), num_cores): sub_jobs(job_list[i:i + num_cores])
[docs] def sub_jobs(job_list): Parallel(n_jobs=num_cores)( delayed(model_runner_fn)(job_obj, model ) for job_obj, model in job_list)