CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ai-forever

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: ai-forever/sber-swap
Path: blob/main/apex/tests/L1/common/run_test.sh
Views: 794
1
#!/bin/bash
2
3
print_banner() {
4
printf "\n\n\n\e[30m\e[42m$1\e[0m\n\n\n\n"
5
}
6
7
print_banner "Distributed status: $1"
8
9
echo $2
10
DATADIR=$2
11
12
if [ -n "$3" ]
13
then
14
USE_BASELINE=""
15
else
16
USE_BASELINE="--use_baseline"
17
fi
18
19
if [ "$1" == "single_gpu" ]
20
then
21
BASE_CMD="python main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"
22
fi
23
24
if [ "$1" == "distributed" ]
25
then
26
BASE_CMD="python -m torch.distributed.launch --nproc_per_node=2 main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"
27
fi
28
29
ADAM_ARGS="--opt-level O2 --keep-batchnorm-fp32 False --fused-adam"
30
31
keep_batchnorms=(
32
""
33
"--keep-batchnorm-fp32 True"
34
"--keep-batchnorm-fp32 False"
35
)
36
37
loss_scales=(
38
""
39
"--loss-scale 1.0"
40
"--loss-scale 128.0"
41
"--loss-scale dynamic"
42
)
43
44
opt_levels=(
45
"O0"
46
"O1"
47
"O2"
48
"O3"
49
)
50
51
rm True*
52
rm False*
53
54
set -e
55
56
print_banner "Installing Apex with --cuda_ext and --cpp_ext"
57
58
pushd ../../..
59
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
60
popd
61
62
for opt_level in "${opt_levels[@]}"
63
do
64
for loss_scale in "${loss_scales[@]}"
65
do
66
for keep_batchnorm in "${keep_batchnorms[@]}"
67
do
68
if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
69
then
70
print_banner "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
71
continue
72
fi
73
print_banner "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --has-ext $DATADIR"
74
set -x
75
${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --has-ext $DATADIR
76
set +x
77
done
78
done
79
done
80
81
# Handle FusedAdam separately due to limited support.
82
# FusedAdam will not be tested for bitwise accuracy against the Python implementation.
83
# The L0 tests already do so. These tests are here to ensure that it actually runs,
84
# and get an idea of performance.
85
for loss_scale in "${loss_scales[@]}"
86
do
87
print_banner "${BASE_CMD} ${ADAM_ARGS} ${loss_scale} --has-ext $DATADIR"
88
set -x
89
${BASE_CMD} ${ADAM_ARGS} ${loss_scale} --has-ext $DATADIR
90
set +x
91
done
92
93
print_banner "Reinstalling apex without extensions"
94
95
pushd ../../..
96
pip install -v --no-cache-dir .
97
popd
98
99
for opt_level in "${opt_levels[@]}"
100
do
101
for loss_scale in "${loss_scales[@]}"
102
do
103
for keep_batchnorm in "${keep_batchnorms[@]}"
104
do
105
if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
106
then
107
print_banner "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
108
continue
109
fi
110
print_banner "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} $DATADIR"
111
set -x
112
${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} $DATADIR
113
set +x
114
done
115
done
116
done
117
118
print_banner "Checking for bitwise accuracy between Python-only and cpp/cuda extension installs"
119
120
for opt_level in "${opt_levels[@]}"
121
do
122
for loss_scale in "${loss_scales[@]}"
123
do
124
for keep_batchnorm in "${keep_batchnorms[@]}"
125
do
126
echo ""
127
if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
128
then
129
echo "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
130
continue
131
fi
132
echo "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} [--has-ext] $DATADIR"
133
set -x
134
python compare.py --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --use_baseline
135
set +x
136
done
137
done
138
done
139
140
print_banner "Reinstalling Apex with --cuda_ext and --cpp_ext"
141
142
pushd ../../..
143
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
144
popd
145
146