Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix memory estimation #25

Merged
merged 12 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ jobs:
matrix:
python-version: ['3.8']
steps:
- uses: actions/checkout@v3
- name: Checkout repository
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
Expand All @@ -25,7 +26,7 @@ jobs:
poetry run pytest --cov=./ --cov-report=xml

- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
fail_ci_if_error: true # optional (default = false)
Expand Down
15 changes: 13 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,26 @@ repos:
- id: fix-encoding-pragma
args: [--remove]
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
rev: 7.0.0
hooks:
- id: flake8
args: ["--config=.flake8"]
- repo: https://github.com/google/yapf
rev: v0.32.0
hooks:
- id: yapf
additional_dependencies: [toml]
name: yapf
description: "A formatter for Python files."
entry: yapf
args: [-i, -vv, -p] # inplace
language: python
types: [python]
additional_dependencies:
- "toml"
- repo: https://github.com/pycqa/isort
hooks:
- id: isort
rev: 5.12.0
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
Expand Down
12 changes: 5 additions & 7 deletions examples/llama2/run_infer_cursor.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from llm_analysis.config import (
ParallelismConfig,
get_dtype_config_by_name,
get_gpu_config_by_name,
get_model_config_by_name,
)
from llm_analysis.analysis import LLMAnalysis
import csv

from llm_analysis.analysis import LLMAnalysis
from llm_analysis.config import (ParallelismConfig, get_dtype_config_by_name,
get_gpu_config_by_name,
get_model_config_by_name)

gpu_name = "a100-sxm-80gb"
dtype_name = "w16a16e16"
model_name = "upstage/Llama-2-70b-instruct-v2"
Expand Down
338 changes: 227 additions & 111 deletions llm_analysis/analysis.py

Large diffs are not rendered by default.

14 changes: 6 additions & 8 deletions llm_analysis/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,8 @@

import fire

from llm_analysis.constant import (
DTYPE_CONFIG_DIR_NAME,
GPU_CONFIG_DIR_NAME,
MODEL_CONFIG_DIR_NAME,
)
from llm_analysis.constant import (DTYPE_CONFIG_DIR_NAME, GPU_CONFIG_DIR_NAME,
MODEL_CONFIG_DIR_NAME)
from llm_analysis.logger import logger

try:
Expand Down Expand Up @@ -116,8 +113,9 @@ class ParallelismConfig:
tp_size: int = 1 # tensor parallelism size, Megatron-LM tensor parallelism implementation
pp_size: int = 1 # pipeline parallelism size, Megatron-LM pipeline parallelism implementation
dp_size: int = (
1 # data parallelism size, DeepSpeed Zero parallelism implementation
1 # sharded data parallelism size, PyTorch FSDP or DeepSpeed Zero parallelism implementation
)
rdp_size: int = 1 # replicated data parallelism size, PyTorch HSDP implementation
ep_size: int = 1 # expert parallelism size
sp_size: int = None # sequence parallelism size, Megatron-LM sequence parallelism implementation

Expand Down Expand Up @@ -357,10 +355,10 @@ def get_model_config_by_name(name_or_path: str) -> ModelConfig:
model_configs[config.name] = config
return config
except Exception as e:
raise ValueError(f"unknown gpu config name: {e}")
raise ValueError(f"unknown model config name: {e}")
model_config = get_model_config_from_hf(name_or_path)
if model_config is None:
raise (
raise ValueError(
f"unknown model config name: {name_or_path}, and none is found on HuggingFace Hub"
)
return model_config
Expand Down
2 changes: 2 additions & 0 deletions llm_analysis/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@


def _num_to_string(num, precision=2, divisor=1024):
if num is None:
return None
if num < 0:
sign = '-'
num = -num
Expand Down
16 changes: 8 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@ readme = "README.md"
packages = [{ include = "llm_analysis" }]

[tool.poetry.dependencies]
python = "^3.8"
python = ">=3.8"
fire = "^0.5.0"
huggingface-hub = "^0.14.1"
transformers = "^4.28.1"
# huggingface-hub = "^0.14.1"
# transformers = "^4.28.1"

[tool.poetry.group.dev.dependencies]
pytest = "^7.3.1"
coverage = { extras = ["toml"], version = "^7.2.5" }
sphinx = "^7.0.0"
sphinx-autodoc-typehints = "^1.23.0"
pytest-cov = "^4.0.0"
pytest = ">=7.3.1"
coverage = { extras = ["toml"], version = ">=7.2.5" }
sphinx = ">=7.0.0"
sphinx-autodoc-typehints = ">=1.23.0"
pytest-cov = ">=4.0.0"

[tool.coverage.run]
omit = [".*", "*/site-packages/*"]
Expand Down
14 changes: 5 additions & 9 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from llm_analysis.config import (
ModelConfig,
GPUConfig,
DtypeConfig,
get_dtype_config_by_name,
get_gpu_config_by_name,
get_model_config_by_name,
)
from llm_analysis.config import (DtypeConfig, GPUConfig, ModelConfig,
get_dtype_config_by_name,
get_gpu_config_by_name,
get_model_config_by_name)


def test_get_model_config_by_name():
model_name = "facebook/opt-125m"
model_name = "facebook_opt-125m"
model_config = get_model_config_by_name(model_name)
assert isinstance(model_config, ModelConfig)
assert model_config.num_layers == 12
Expand Down
13 changes: 5 additions & 8 deletions tests/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from llm_analysis.utils import within_range
from llm_analysis.analysis import LLMAnalysis
from llm_analysis.config import (
ParallelismConfig,
get_dtype_config_by_name,
get_gpu_config_by_name,
get_model_config_by_name,
)
from llm_analysis.config import (ParallelismConfig, get_dtype_config_by_name,
get_gpu_config_by_name,
get_model_config_by_name)
from llm_analysis.utils import within_range

TOLERANCE = 0.1

Expand Down Expand Up @@ -55,7 +52,7 @@ def test_fastertransformer_13b_tp1():


def test_llama2_70b():
model_name = "upstage/Llama-2-70b-instruct-v2"
model_name = "upstage_Llama-2-70b-instruct-v2"
dtype_name = "w16a16e16"
gpu_name = "a100-sxm-80gb"

Expand Down
9 changes: 3 additions & 6 deletions tests/test_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,9 @@
# limitations under the License.

from llm_analysis.analysis import ActivationRecomputation, DSZeRO, LLMAnalysis
from llm_analysis.config import (
ParallelismConfig,
get_dtype_config_by_name,
get_gpu_config_by_name,
get_model_config_by_name,
)
from llm_analysis.config import (ParallelismConfig, get_dtype_config_by_name,
get_gpu_config_by_name,
get_model_config_by_name)
from llm_analysis.utils import _latency_to_string, _num_to_string, within_range

TOLERANCE = 0.05
Expand Down
Loading