Skip to content

Commit

Permalink
Update code
Browse files Browse the repository at this point in the history
  • Loading branch information
Aetf committed Jan 13, 2020
1 parent b111dc9 commit 01a36d6
Show file tree
Hide file tree
Showing 21 changed files with 593 additions and 75 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ if(WITH_TIMEOUT_WARNING)
set(SALUS_ENABLE_TIMEOUT_WARNING 1)
endif(WITH_TIMEOUT_WARNING)

if(USE_TENSORFLOW)
set(SALUS_ENABLE_TENSORFLOW 1)
endif(USE_TENSORFLOW)

configure_file(src/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
include_directories(${CMAKE_CURRENT_BINARY_DIR})

Expand Down
7 changes: 5 additions & 2 deletions benchmarks/driver/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ def __call__(self, executor, output_file):
else:
output_file.parent.mkdir(exist_ok=True, parents=True)
with output_file.open('w') as f:
return execute(cmd, cwd=str(cwd), env=env, stdout=f, stderr=sp.STDOUT)
# return execute(cmd, cwd=str(cwd), env=env, stdout=f, stderr=sp.STDOUT)
return execute(cmd, cwd=str(cwd), env=env, stdout=f, stderr=None)

def _construct_test_name(self, executor):
# type: (Executor) -> Tuple[str, str]
Expand All @@ -239,7 +240,7 @@ def _construct_test_name(self, executor):
})
}

variable_batch_size_models = {'vae', 'superres'}
variable_batch_size_models = {'vae', 'superres', 'seq2seq', 'mnistsf', 'mnistcv', 'mnistlg'}
if remove_suffix(self.wl.name, 'eval') not in variable_batch_size_models:
if self.wl.batch_size not in self.wl.wtl.available_batch_sizes():
raise ValueError(f"Batch size `{self.wl.batch_size}' is not supported for {self.wl.name},"
Expand Down Expand Up @@ -273,6 +274,8 @@ def _construct_test_name(self, executor):
}

postfix = names.get(self.wl.batch_size, '0')
if model_name == 'seq2seq' and postfix == '0':
postfix = '2_large'

method = f'{cls}.{prefix}{postfix}'
return pkg, method
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/driver/server/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from builtins import super

from absl import flags
from copy import copy
from copy import deepcopy

from ...utils import maybe_path
from ...utils.compatiblity import pathlib
Expand Down Expand Up @@ -77,7 +77,7 @@ def __setattr__(self, key, value):
def copy(self, **kwargs):
# type: (...) -> SalusConfig
"""Return a new copy of the tuple"""
return copy(self).update(**kwargs)
return deepcopy(self).update(**kwargs)

def update(self, d=None, **kwargs):
# type: (...) -> SalusConfig
Expand Down
7 changes: 7 additions & 0 deletions benchmarks/exps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import time
import re
import logging
import string
import random
from absl import flags
from typing import Union, Iterable, List, TypeVar, Callable, Optional

Expand Down Expand Up @@ -379,3 +381,8 @@ def release_on_pipe(pipe):
def sync_on_pipe(pipe):
wait_on_pipe(pipe)
release_on_pipe(pipe)


def random_id(size=6, chars=string.ascii_uppercase + string.digits):
"""Generate a random ID"""
return ''.join(random.choice(chars) for _ in range(size))
27 changes: 26 additions & 1 deletion benchmarks/exps/smtracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,31 @@ def tfmps(argv):
)


def train_alone(argv):
"""Run training workload alone take note of SM usage"""
sm_factors = [float(v) for v in argv]
if not sm_factors:
sm_factors = [1.0, 1.5, 2.0, 2.5, 3.0]

logger.info(f"Running Salus with sm factors: {sm_factors}")

# run salus
for factor in sm_factors:
with tempfile.TemporaryDirectory() as td:
scfg = maybe_forced_preset(presets.OpTracing)
scfg.logconf = 'smtracing'
scfg.extra_args += [
'--sm-factor', f'{factor:.2f}'
]
logger.info(f"Running Salus with sm factor: {factor}")
# the background training job
wl, pipe = create_train(Executor.Salus, 0, td)
run_seq(scfg.copy(output_dir=FLAGS.save_dir / "alone" / f"{factor:.2f}"),
wl,
RunFn(lambda *args, **kwargs: wait_on_pipe(pipe)),
RunFn(lambda *args, **kwargs: release_on_pipe(pipe)))


@case_switch_main
def main():
return salus, tfmps
return salus, tfmps, train_alone, salus_factor
Loading

0 comments on commit 01a36d6

Please sign in to comment.