rename distributed

MayDomine · Mar 25, 2022 · 17e9425 · 17e9425
1 parent ed6d54c
commit 17e9425
Show file tree

Hide file tree

Showing 8 changed files with 13 additions and 10 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -143,4 +143,5 @@ cython_debug/
 **/.DS_Store
 
 **/log
-**/*.qdrep
+**/*.qdrep
+!bmtrain/dist
diff --git a/Dockerfile b/Dockerfile
@@ -12,8 +12,9 @@ RUN apt install iputils-ping opensm libopensm-dev libibverbs1 libibverbs-dev -y
 ENV TORCH_CUDA_ARCH_LIST=6.1;7.0;7.5
 ENV BMP_AVX512=1
 ADD other_requirements.txt other_requirements.txt
-RUN pip3 install -r other_requirements.txt
-RUN pip3 install bmtrain
+RUN pip3 install --upgrade pip && pip3 install -r other_requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
+ADD . .
+RUN python3 setup.py install
 
 WORKDIR /root
 ADD example example
diff --git a/bmtrain/__init__.py b/bmtrain/__init__.py
@@ -19,4 +19,4 @@
 from . import inspect
 from . import lr_scheduler
 from . import loss
-from . import dist
+from . import distributed
diff --git a/bmtrain/dist/__init__.py → bmtrain/distributed/__init__.py b/bmtrain/dist/__init__.py → bmtrain/distributed/__init__.py
diff --git a/bmtrain/dist/ops.py → bmtrain/distributed/ops.py b/bmtrain/dist/ops.py → bmtrain/distributed/ops.py
@@ -1,4 +1,3 @@
-from typing import Literal
 import torch
 from ..global_var import config
 from ..nccl import allGather as ncclAllGather
@@ -30,7 +29,7 @@ def all_gather(x : torch.Tensor):
 
 class OpAllReduce(torch.autograd.Function):
     @staticmethod
-    def forward(ctx, input : torch.Tensor, op: Literal['sum', 'prod', 'max', 'min', 'avg']):
+    def forward(ctx, input : torch.Tensor, op : str):
         if not input.contiguous():
             input = input.contiguous()
         output = torch.empty( input.size(), dtype=input.dtype, device=input.device)
@@ -64,7 +63,7 @@ def backward(ctx, grad_output):
         else:
             return grad_output * ctx.saved_tensors[0], None
 
-def all_reduce(x : torch.Tensor, op: Literal['sum', 'prod', 'max', 'min', 'avg']):
+def all_reduce(x : torch.Tensor, op : str = "sum"):
     assert x.is_cuda
     return OpAllReduce.apply(x, op)
 

diff --git a/other_requirements.txt b/other_requirements.txt
@@ -1,3 +1,6 @@
 tqdm
 cpm_kernels>=1.0.11
-jieba
+jieba
+tensorboard
+setuptools_rust
+transformers
diff --git a/setup.py b/setup.py
@@ -32,7 +32,6 @@ def get_avx_flags():
     install_requires=[
         "torch>=1.10",
         "numpy",
-        "tensorboard"
     ],
     ext_modules=[
         CUDAExtension('bmtrain.nccl._C', [

diff --git a/tests/test_dist.py b/tests/test_dist.py
@@ -4,7 +4,7 @@
 def main():
     bmt.init_distributed()
     x = torch.full((1,), bmt.rank() + 1, dtype=torch.half, device="cuda").requires_grad_(True)
-    y = bmt.dist.all_reduce(x, "prod").view(-1)
+    y = bmt.distributed.all_reduce(x, "prod").view(-1)
     bmt.print_rank(y)
     loss = (y * y).sum() / 2
     loss.backward()