-
Notifications
You must be signed in to change notification settings - Fork 1
/
train_sklearn.sh
executable file
·26 lines (18 loc) · 1.24 KB
/
train_sklearn.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#Example
#TODO: Replace with name of dataset
DATA_FILENAME='SUSY.csv'
#TODO: Replace with path to directory containing the dataset. The model will be saved in this directory as well.
DATA_DIR='/data/'
#TODO: Replace with label column (-1 indicates last column, 0 would indicate first column). Note the data should be in a csv file with columns representing features (and one of the columns holding the labels), and rows representing observations.
LAB_COL=0
#TODO: Replace with number of trees
NUM_TREES=100
#TODO: Algorithm: rf: random forest, gbt: gradient boosted tree
ALGORITHM='rf'
#TODO: Task: Classification or regression
TASK='classification'
#TODO: Change to joblib ONLY for the baseline experiments. Otherwise it should be json.
FORMAT='json'
#NOTE: RUN python3 scripts/train_and_save_sklearn.py --help for a list of all options
#Run this script to train the model and save it in a json format for BLOCKSET to pack. One can modify this script to work for XGBoost as well, i.e the model needs to be stored in a JSON format detailed in the script.
python3 scripts/train_and_save_sklearn.py --labelcol $LAB_COL --datadir $DATA_DIR --datafilename $DATA_FILENAME --numtrees $NUM_TREES --algorithm $ALGORITHM --numtest $NUM_TEST --task $TASK --saveformat $FORMAT