Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Path: blob/main/apex/tests/L1/common/run_test.sh
Views: 794
#!/bin/bash12print_banner() {3printf "\n\n\n\e[30m\e[42m$1\e[0m\n\n\n\n"4}56print_banner "Distributed status: $1"78echo $29DATADIR=$21011if [ -n "$3" ]12then13USE_BASELINE=""14else15USE_BASELINE="--use_baseline"16fi1718if [ "$1" == "single_gpu" ]19then20BASE_CMD="python main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"21fi2223if [ "$1" == "distributed" ]24then25BASE_CMD="python -m torch.distributed.launch --nproc_per_node=2 main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"26fi2728ADAM_ARGS="--opt-level O2 --keep-batchnorm-fp32 False --fused-adam"2930keep_batchnorms=(31""32"--keep-batchnorm-fp32 True"33"--keep-batchnorm-fp32 False"34)3536loss_scales=(37""38"--loss-scale 1.0"39"--loss-scale 128.0"40"--loss-scale dynamic"41)4243opt_levels=(44"O0"45"O1"46"O2"47"O3"48)4950rm True*51rm False*5253set -e5455print_banner "Installing Apex with --cuda_ext and --cpp_ext"5657pushd ../../..58pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .59popd6061for opt_level in "${opt_levels[@]}"62do63for loss_scale in "${loss_scales[@]}"64do65for keep_batchnorm in "${keep_batchnorms[@]}"66do67if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]68then69print_banner "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"70continue71fi72print_banner "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --has-ext $DATADIR"73set -x74${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --has-ext $DATADIR75set +x76done77done78done7980# Handle FusedAdam separately due to limited support.81# FusedAdam will not be tested for bitwise accuracy against the Python implementation.82# The L0 tests already do so. These tests are here to ensure that it actually runs,83# and get an idea of performance.84for loss_scale in "${loss_scales[@]}"85do86print_banner "${BASE_CMD} ${ADAM_ARGS} ${loss_scale} --has-ext $DATADIR"87set -x88${BASE_CMD} ${ADAM_ARGS} ${loss_scale} --has-ext $DATADIR89set +x90done9192print_banner "Reinstalling apex without extensions"9394pushd ../../..95pip install -v --no-cache-dir .96popd9798for opt_level in "${opt_levels[@]}"99do100for loss_scale in "${loss_scales[@]}"101do102for keep_batchnorm in "${keep_batchnorms[@]}"103do104if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]105then106print_banner "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"107continue108fi109print_banner "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} $DATADIR"110set -x111${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} $DATADIR112set +x113done114done115done116117print_banner "Checking for bitwise accuracy between Python-only and cpp/cuda extension installs"118119for opt_level in "${opt_levels[@]}"120do121for loss_scale in "${loss_scales[@]}"122do123for keep_batchnorm in "${keep_batchnorms[@]}"124do125echo ""126if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]127then128echo "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"129continue130fi131echo "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} [--has-ext] $DATADIR"132set -x133python compare.py --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --use_baseline134set +x135done136done137done138139print_banner "Reinstalling Apex with --cuda_ext and --cpp_ext"140141pushd ../../..142pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .143popd144145146