1. Ghostnet on MS1mV3
    1. Constant lr decay
      1. ReLU, with_pointwise, E
        1. TT_ghostnet_pointwise_E_arc_emb512_dr04_wd5e4_bs512_ms1m_hist
          1. 0.995833 | 0.953571 | 0.959
      2. PReLU, GDC, droupout 0.4
        1. TT_ghostnet_prelu_GDC_arc_emb512_dr04_wd5e4_bs512_ms1m_hist
          1. 0.995333 | 0.957714 | 0.956
    2. Cosine lr decay, PReLU
      1. on batch, first_decay_step 16
        1. sgdw 5e-4
          1. TT_ghostnet_prelu_GDC_arc_emb512_dr0_sgdw_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_fixed_hist
          2. 0.996167 | 0.961286 | 0.966833
        2. sgd, l2 5e-4, apply_to_batch_normal=False
          1. TT_ghostnet_prelu_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_fixed_hist
          2. 0.997167 | 0.959429 | 0.969333
        3. sgd, l2 5e-4, restarts 3
          1. use_bias True, scale True
          2. TT_ghostnet_prelu_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_restart_3_hist
          3. 0.997 | 0.959714 | 0.962833
          4. use_bias False, scale True
          5. TT_ghostnet_prelu_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_restart_3_bias_false_hist
          6. 0.9965 | 0.960571 | 0.97
          7. arc + triplet 64
          8. TT_ghostnet_prelu_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_restart_3_bias_false_E48_arc_trip_hist
          9. 0.997333 | 0.970857 | 0.9705
      2. first_decay_step 7
        1. on epoch
          1. sgd, l2 1e-3, apply_to_batch_normal=False
          2. TT_ghostnet_prelu_GDC_arc_emb512_dr0_sgd_l2_1e3_bs1024_ms1m_bnm09_bne1e5_cos7_epoch_hist
          3. 0.9965 | 0.965 | 0.97
          4. sgd, l2 5e-4, apply_to_batch_normal=False
          5. TT_ghostnet_prelu_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos7_epoch_hist
          6. 0.996833 | 0.962429 | 0.969
        2. on batch, sgd, l2 1e-3
          1. image_per_class=0
          2. TT_ghostnet_prelu_GDC_arc_emb512_dr0_sgd_l2_1e3_bs1024_ms1m_bnm09_bne1e5_cos7_batch_hist
          3. 0.997167 | 0.959857 | 0.968667
          4. image_per_class=4
          5. TT_ghostnet_prelu_GDC_arc_emb512_dr0_sgd_l2_1e3_bs1024_ms1m_bnm09_bne1e5_cos7_batch_image_4_hist
          6. 0.996333 | 0.959714 | 0.968000
    3. float16
      1. PReLU
        1. init 0
          1. TT_ghostnet_prelu_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_restart_3_bias_false_hist
          2. 0.995333 | 0.957714 | 0.969
        2. init 0.25
          1. TT_ghostnet_prelu_25_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_fixed_float16_hist
          2. 0.996667 | 0.960429 | 0.966833
      2. swish
        1. image_per_class 10
          1. TT_ghostnet_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_fixed_ipc10_float16_hist
          2. 0.9955 | 0.961857 | 0.971167
        2. Randaug
          1. TT_ghostnet_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_fixed_randaug_float16_hist
          2. 0.9965 | 0.958 | 0.960667
        3. keep_lr_as_min 0
          1. TT_ghostnet_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_fixed_kam0_float16_hist
          2. 0.9965 | 0.962 | 0.968333
        4. SGD LookAhead
          1. TT_ghostnet_swish_GDC_arc_emb512_dr0_sgd_LH_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_float16_hist
          2. 0.995833 | 0.948429 | 0.956167
        5. Basic
          1. TT_ghostnet_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_fixed_float16_hist
          2. 0.996833 | 0.959857 | 0.969
          3. E50, lr 0.025
          4. SGD
          5. TT_ghostnet_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_E50_sgd_lr25e3_float16_hist
          6. 0.997667 | 0.962571 | 0.9705
          7. LookAhead
          8. TT_ghostnet_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_E50_LH_sgd_lr25e3_float16_hist
          9. 0.997833 | 0.964857 | 0.970833
          10. SAM
          11. TT_ghostnet_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_E50_SAM_sgd_lr25e3_float16_hist
          12. 0.997833 | 0.963571 | 0.97
          13. SAM + LookAhead
          14. TT_ghostnet_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_E50_SAM_LH_sgd_lr25e3_float16_hist
          15. 0.997667 | 0.963857 | 0.970667
    4. first_strides=1
      1. PReLU, se use PReLU
        1. TT_ghostnet_strides_1_prelu_25_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_fixed_float16_hist
          1. 0.997833 | 0.978286 | 0.98
      2. PReLU, se use ReLU
        1. TT_ghostnet_strides_1_prelu_25_se_relu_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_fixed_float16_hist
          1. 0.9975 | 0.978429 | 0.976333
  2. Botnet50 on MS1mV3
    1. relu
      1. Conv use_bias=True, shortcut act relu
        1. TT_botnet50_relu_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_restart_3_bias_false_hist
          1. 0.998 | 0.978 | 0.978167
      2. Conv use_bias=False, shortcut act relu
        1. TT_botnet50_relu_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_restart_3_bias_false_conv_no_bias_hist
          1. 0.997833 | 0.981143 | 0.978833
      3. Conv use_bias=False, shortcut act none
        1. random 0
          1. TT_botnet50_relu_shortcut_act_none_GDC_arc_emb512_cos16_batch_restart_3_bias_false_conv_no_bias_tmul_2_hist
          2. 0.9985 | 0.980286 | 0.979667
        2. randaug 100
          1. TT_botnet50_relu_shortcut_act_none_GDC_arc_emb512_cos16_batch_restart_3_bias_false_conv_no_bias_tmul_2_randaug_hist
          2. 0.997667 | 0.981857 | 0.979333
    2. PreLU, init 0
      1. Conv use_bias=False, shortcut act none, random 0
        1. TT_botnet50_prelu_shortcut_act_none_GDC_arc_emb512_bs768_cos16_batch_restart_2_bias_false_conv_no_bias_tmul_2_random0_hist
          1. 0.997833 | 0.978571 | 0.978
    3. swish
      1. Conv use_bias=False, shortcut act none, random 0
        1. GDC
          1. use_bias False
          2. TT_botnet50_swish_shortcut_act_none_GDC_arc_emb512_cos16_batch_restart_2_bias_false_conv_no_bias_tmul_2_random0_hist
          3. 0.9985 | 0.984571 | 0.979833
        2. E, dropout 0.4
          1. use_bias False, E17
          2. TT_botnet50_swish_shortcut_act_none_E_dr04__arc_emb512_cos16_batch_restart_2_bias_false_conv_no_bias_tmul_2_random0_hist
          3. 0.998 | 0.981571 | 0.979
          4. use_bias True, E17
          5. TT_botnet50_swish_shortcut_act_none_E_dr04__arc_emb512_cos16_batch_restart_2_bias_true_conv_no_bias_tmul_2_random0_hist
          6. 0.997667 | 0.983143 | 0.9785
  3. Resnet on MS1mV3
    1. resnet50v2
      1. pad_same_conv_no_bias
        1. GDC
          1. relu
          2. TT_resnet50v2_pad_same_conv_no_bias_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e4_cos16_float16_hist
          3. 0.998 | 0.980143 | 0.976667
          4. swish
          5. TT_resnet50v2_swish_pad_same_conv_no_bias_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e4_cos16_float16_hist
          6. 0.997333 | 0.977286 | 0.977
        2. swish, E
          1. first_conv_k7_stride_2
          2. TT_resnet50v2_swish_pad_same_conv_no_bias_E_arc_emb512_dr04_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e4_cos16_float16_hist
          3. 0.998 | 0.979 | 0.977667
          4. first_conv_k3_stride_1
          5. TT_resnet50v2_swish_pad_same_first_conv_k3_stride_1_conv_no_bias_E_arc_emb512_dr04_sgd_l2_5e4_bs384_ms1m_bnm09_bne1e4_cos16_hist
          6. 0.9985 | 0.988571 | 0.9835
    2. resnet101v2
      1. basic, relu, DC
        1. TT_resnet101v2_pad_same_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e5_cos16_batch_fixed_float16_hist
          1. 0.998167 | 0.984 | 0.9785
      2. pad_same_conv_no_bias
        1. relu, GDC
          1. TT_resnet101v2_pad_same_conv_no_bias_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e4_cos16_float16_hist
          2. 0.997833 | 0.983429 | 0.979167
        2. swish, E
          1. first_conv_k7_stride_2
          2. TT_resnet101v2_swish_pad_same_conv_no_bias_E_arc_emb512_dr04_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e4_cos16_float16_hist
          3. 0.998167 | 0.982714 | 0.980333
          4. first_conv_k3_stride_1
          5. TT_resnet101v2_swish_pad_same_first_conv_k3_stride_1_conv_no_bias_E_arc_emb512_dr04_sgd_l2_5e4_bs384_ms1m_bnm09_bne1e4_cos16_hist
          6. 0.9985 | 0.989143 | 0.9845
    3. r50
      1. swish, E, ms1m
        1. TT_r50_swish_E_arc_emb512_dr04_sgd_l2_5e4_bs1024_ms1m_bnm09_bne1e4_cos16_hist
          1. 0.998333 | 0.989714 | 0.984167
      2. swish, E, ms1m_cleaned
        1. basic
          1. TT_r50_swish_E_arc_emb512_dr04_sgd_l2_5e4_bs1024_ms1m_cleaned_bnm09_bne1e4_cos16_hist
          2. 0.998333 | 0.989571 | 0.984333
        2. SD (1, 0.8)
          1. TT_r50_SD_swish_E_arc_emb512_dr04_sgd_l2_5e4_bs1024_ms1m_cleaned_bnm09_bne1e4_cos16_hist
          2. 0.9985 | 0.989714 | 0.983667
        3. se_r50
          1. random 0
          2. TT_se_r50_swish_E_arc_emb512_dr04_sgd_l2_5e4_bs1024_ms1m_cleaned_bnm09_bne1e4_cos16_hist
          3. 0.998333 | 0.989714 | 0.984
          4. randaug 100
          5. TT_se_r50_swish_E_arc_emb512_dr04_sgd_l2_5e4_bs1024_ms1m_cleaned_randaug_100_bnm09_bne1e4_cos16_hist
        4. se_r50, SD (1, 0.8)
          1. TT_se_r50_SD_swish_E_arc_emb512_dr04_sgd_l2_5e4_bs1024_ms1m_cleaned_bnm09_bne1e4_cos16_hist
  4. efficientnetV2 on MS1mV3
    1. efv2_s
      1. early defined one
        1. Stochastic Depth 0
          1. TT_early_efv2_s_add_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_cos16_hist
          2. 0.997833 | 0.960429 | 0.9785
        2. Stochastic Depth 0.8
          1. TT_early_efv2_s_sd08_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_cos16_hist
          2. 0.997667 | 0.981 | 0.981
        3. Stochastic Depth (1, 0.8)
          1. TT_early_efv2_s_sd_1_08_GDC_arc_emb512_dr0_sgd_l2_5e4_bs1024_ms1m_cos16_hist
          2. 0.998167 | 0.974857 | 0.982167
      2. Official image21k, SD 0, E17
        1. TT_efv2_s_swish_E_arc_emb512_dr04_sgd_l2_5e4_bs512_ms1m_bnm09_bne1e4_cos16_hist
          1. 0.997333 | 0.975571 | 0.977833
    2. efv2_b0
      1. sgdw 5e-4
        1. TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgdw_wd_5e4_bs512_ms1m_cos16_batch_float16_hist
          1. 0.995833 | 0.953429 | 0.964
      2. SGD, l2 5e-4
        1. bnm 0.99, bne 1e-3, MS1mV3
          1. TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_cos16_batch_float16_hist
          2. 0.997167 | 0.974857 | 0.976
        2. bnm 0.9, bne 1e-4
          1. ms1m_cleaned
          2. TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_cleaned_bnm09_bne1e4_cos16_batch_float16_hist
          3. 0.9975 | 0.975714 | 0.976333
          4. MS1mV3
          5. TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_bnm09_bne1e4_cos16_batch_float16_hist
          6. 0.997167 | 0.975429 | 0.975
    3. finetune efv2_b0, E50 --> E17
      1. MS1mV3
        1. arcface
          1. SD 0
          2. TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_bnm09_bne1e4_cos16_batch_float16_E50_arc_base_hist
          3. 0.997333 | 0.972 | 0.9755
          4. SD (1, 0.8)
          5. TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_bnm09_bne1e4_cos16_batch_float16_E50_arc_SD_hist
          6. 0.997167 | 0.974 | 0.976
          7. random 2
          8. randaug 100, no shear
          9. randaug 100, no shear, cutout
          10. cutout only
        2. arc + triplet 64
          1. TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_bnm09_bne1e4_cos16_batch_float16_E50_arc_trip64_hist
          2. 0.997667 | 0.981143 | 0.9785
        3. curricular
          1. TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_bnm09_bne1e4_cos16_batch_float16_E50_curr_hist
          2. 0.997 | 0.973429 | 0.976333
      2. ms1m_cleaned
        1. arc + triplet 64
          1. random 0
          2. TT__efv2_b0_swish_*_E50_arc_trip64_hist
          3. 0.998 | 0.982714 | 0.977833
          4. randaug 100
          5. TT__efv2_b0_swish_*_E50_arc_trip64_randaug_100_hist
          6. 0.997833 | 0.981714 | 0.974167
          7. random 2
          8. TT__efv2_b0_swish_*_E50_arc_trip64_random_2_hist
          9. 0.997833 | 0.983429 | 0.977833
          10. randaug 100, no shear
          11. TT__efv2_b0_swish_*_E50_arc_trip64_randaug_100_no_shear_hist
          12. randaug 100, no shear, cutout
          13. TT__efv2_b0_swish_*_E50_arc_trip64_randaug_100_no_shear_cutout_hist
        2. curricular + triplet 64
          1. TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_cleaned_bnm09_bne1e4_cos16_batch_float16_E50_curr_trip64_hist
          2. 0.997833 | 0.983286 | 0.977333
        3. curricular
          1. SD (1, 0.8)
          2. TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_cleaned_bnm09_bne1e4_cos16_batch_float16_E50_curr_SD_hist
          3. 0.9975 | 0.973857 | 0.975167
          4. SD 0
          5. TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_cleaned_bnm09_bne1e4_cos16_batch_float16_E50_curr_hist
          6. 0.998167 | 0.975857 | 0.976167