CANDLE Compliance Example#

See this in Singularity repo: https://github.com/JDACS4C-IMPROVE/Singularity/tree/master/examples

from os import system
system("rm example_default_model.txt")
# create a hyperparameters file
file_name="example_default_model.txt"

with open(file_name, 'a') as f:
    f.write("[global]\n# the only absolutely required parameter\nmodel_name=\"example\"\n")
    f.write("#demonstrating how a list is handled\ndense=[1000, 750, 500]")

system("cat -n example_default_model.txt")

[global]
# the only absolutely required parameter
model_name="example"
#demonstrating how a list is handled
dense=[1000, 750, 500]

rm: cannot remove 'example_default_model.txt': No such file or directory

import os, json
import candle

# Just because the tensorflow warnings are a bit verbose
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# This should be set outside as a user environment variable
os.environ['CANDLE_DATA_DIR'] = os.environ['HOME'] + '/improve_data_dir'

Importing candle utils for Keras
Importing candle utils for pytorch

/home/docs/checkouts/readthedocs.org/user_builds/candle-lib/conda/latest/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

# Define any needed additional args to ensure all new args are command-line accessible.
additional_definitions = [{
    'name': 'new_keyword',
    'type': str,
    'nargs': 1,
    'help': 'helpful description'
}]

# Define args that are required.
required = None


# Extend candle.Benchmark to configure the args
class IBenchmark(candle.Benchmark):

    def set_locals(self):
        if required is not None:
            self.required = set(required)
        if additional_definitions is not None:
            self.additional_definitions = additional_definitions

# Extend candle.Benchmark to configure the args
class IBenchmark(candle.Benchmark):

    def set_locals(self):
        if required is not None:
            self.required = set(required)
        if additional_definitions is not None:
            self.additional_definitions = additional_definitions

# In the initialize_parameters() method, we will instantiate the base
# class, and finally build an argument parser to recognize your customized
# parameters in addition to the default parameters.The initialize_parameters()
# method should return a python dictionary, which will be passed to the run()
# method.
def initialize_parameters():
    print("initializing params\n")
    i_bmk = IBenchmark(
        "./",  # this is the path to this file needed to find default_model.txt
        'example_default_model.txt',  # name of the default_model.txt file
        'keras',  # framework, choice is keras or pytorch
        prog='example_baseline',  # basename of the model
        desc='IMPROVE Benchmark')

    gParameters = candle.finalize_parameters(
        i_bmk)  # returns the parameter dictionary built from
    # default_model.txt and overwritten by any
    # matching comand line parameters.

    return gParameters

def run(params):
    # fetch data
    # preprocess data
    # save preprocessed data
    # define callbacks
    # build / compile model
    # train model
    # infer using model
    # etc
    print("running third party code")
    print("returning training metrics")
    metrics = {"val_loss": 0.101, "pcc": 0.923, "scc": 0.777, "rmse": 0.036}
    # metrics is used by the supervisor when running
    # HPO workflows (and possible future non HPO workflows)

    # Dumping results into file, workflow requirement
    val_scores = {
        "key": "val_loss",
        "value": metrics["val_loss"],
        "val_loss": metrics["val_loss"],
        "pcc": metrics["pcc"],
        "scc": metrics["scc"],
        "rmse": metrics["rmse"],
    }
    # ~/improve_data_dir/Example
    # $ tree .
    # .
    # ├── Data
    # └── Output
    #     └── EXP000
    #         └── RUN000
    #             └── scores.json

    with open(params["output_dir"] + "/scores.json", "w", encoding="utf-8") as f:
        json.dump(val_scores, f, ensure_ascii=False, indent=4)

    return metrics  # metrics is used by the supervisor when running
    # HPO workflows (and possible future non HPO workflows)

def main():
    params = initialize_parameters()
    scores = run(params)
    print(params['data_dir'])

    # demonstrating a list
    for i, value in enumerate(params['dense']):
        print("dense layer {} has {} nodes".format(i,value))

if __name__ == "__main__":
    main()

initializing params

model name:  "example"
Configuration file:  ./example_default_model.txt
Params:
{'HistoryManager.hist_file=:memory': ':memory:',
 'ckpt_checksum': False,
 'ckpt_directory': None,
 'ckpt_keep_limit': 5,
 'ckpt_keep_mode': 'linear',
 'ckpt_restart_mode': 'auto',
 'ckpt_save_best': True,
 'ckpt_save_best_metric': 'val_loss',
 'ckpt_save_interval': 0,
 'ckpt_save_weights_only': False,
 'ckpt_skip_epochs': 0,
 'data_dir': '/home/docs/improve_data_dir/example/Data',
 'data_type': <class 'numpy.float32'>,
 'dense': [1000, 750, 500],
 'experiment_id': 'EXP000',
 'jupyter': '/tmp/tmpi2lrf30k.json',
 'logfile': None,
 'model_name': 'example',
 'output_dir': '/home/docs/improve_data_dir/example/Output/EXP000/RUN000',
 'profiling': False,
 'rng_seed': 7102,
 'run_id': 'RUN000',
 'shuffle': False,
 'timeout': -1,
 'train_bool': True,
 'verbose': False}
running third party code
returning training metrics
/home/docs/improve_data_dir/example/Data
dense layer 0 has 1000 nodes
dense layer 1 has 750 nodes
dense layer 2 has 500 nodes