From f4e03076654ea0264755dbc0bd8d727b2d90efc4 Mon Sep 17 00:00:00 2001 From: s572915912 <54531516+s572915912@users.noreply.github.com> Date: Fri, 11 Jul 2025 18:32:16 +0800 Subject: [PATCH] Update train_deepspeed.sh --- hy3dshape/scripts/train_deepspeed.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hy3dshape/scripts/train_deepspeed.sh b/hy3dshape/scripts/train_deepspeed.sh index 278cf73..444e5cc 100644 --- a/hy3dshape/scripts/train_deepspeed.sh +++ b/hy3dshape/scripts/train_deepspeed.sh @@ -29,10 +29,9 @@ else export NCCL_IB_SL=3 export NCCL_CHECK_DISABLE=1 export NCCL_P2P_DISABLE=0 - export NCCL_IB_DISABLE=1 + export NCCL_IB_DISABLE=0 export NCCL_LL_THRESHOLD=16384 export NCCL_IB_CUDA_SUPPORT=1 - # DELETED: The hardcoded export NCCL_SOCKET_IFNAME and UCX_NET_DEVICES lines export NCCL_COLLNET_ENABLE=0 export SHARP_COLL_ENABLE_SAT=0 export NCCL_NET_GDR_LEVEL=2