-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement fully parallel upload processing
This adds another feature/rollout flag which prefers the parallel upload processing pipeline in favor of running it as an experiment. Upload Processing can run in essentially 4 modes: - Completely serial processing - Serial processing, but running "experiment" code (`is_experiment_serial`): - In this mode, each `UploadProcessor` task saves a copy of the raw upload, as well as a copy of the final report (`is_final`) for later verification. - Parallel processing, but running "experiment" code (`is_experiment_parallel`): - In this mode, another parallel set of `UploadProcessor` tasks runs *after* the main set up tasks. - These tasks are using the copied-over raw uploads that were prepared by the `is_experiment_serial` tasks to do their processing. - These tasks are not persisting any of their results in the database, instead the final `UploadFinisher` task will launch the `ParallelVerification` task. - Fully parallel processing (`is_fully_parallel`): - In this mode, the final `UploadFinisher` task is responsible for merging the final report and persisting it. An example Task chain might look like this, in "experiment" mode: - Upload - UploadProcessor (`is_experiment_serial`) - UploadProcessor (`is_experiment_serial`) - UploadProcessor (`is_experiment_serial`, `is_final`) - UploadFinisher - UploadProcessor (`is_experiment_parallel`) - UploadProcessor (`is_experiment_parallel`) - UploadProcessor (`is_experiment_parallel`) - UploadFinisher (`is_experiment_parallel`) - ParallelVerification Once implemented, `is_fully_parallel` will look like this: - Upload - UploadProcessor (`is_fully_parallel`) - UploadProcessor (`is_fully_parallel`) - UploadProcessor (`is_fully_parallel`) - UploadFinisher (`is_fully_parallel`)
- Loading branch information
Showing
8 changed files
with
329 additions
and
184 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import dataclasses | ||
from typing import Self | ||
|
||
from rollouts import ( | ||
FULLY_PARALLEL_UPLOAD_PROCESSING_BY_REPO, | ||
PARALLEL_UPLOAD_PROCESSING_BY_REPO, | ||
) | ||
|
||
|
||
@dataclasses.dataclass(frozen=True) | ||
class ParallelProcessing: | ||
""" | ||
This encapsulates Parallel Upload Processing logic | ||
Upload Processing can run in essentially 4 modes: | ||
- Completely serial processing | ||
- Serial processing, but running "experiment" code (`is_experiment_serial`): | ||
- In this mode, each `UploadProcessor` task saves a copy of the raw upload, | ||
as well as a copy of the final report (`is_final`) for later verification. | ||
- Parallel processing, but running "experiment" code (`is_experiment_parallel`): | ||
- In this mode, another parallel set of `UploadProcessor` tasks runs *after* | ||
the main set up tasks. | ||
- These tasks are using the copied-over raw uploads that were prepared by | ||
the `is_experiment_serial` tasks to do their processing. | ||
- These tasks are not persisting any of their results in the database, | ||
instead the final `UploadFinisher` task will launch the `ParallelVerification` task. | ||
- Fully parallel processing (`is_fully_parallel`): | ||
- In this mode, the final `UploadFinisher` task is responsible for merging | ||
the final report and persisting it. | ||
An example Task chain might look like this, in "experiment" mode: | ||
- Upload | ||
- UploadProcessor (`is_experiment_serial`) | ||
- UploadProcessor (`is_experiment_serial`) | ||
- UploadProcessor (`is_experiment_serial`, `is_final`) | ||
- UploadFinisher | ||
- UploadProcessor (`is_experiment_parallel`) | ||
- UploadProcessor (`is_experiment_parallel`) | ||
- UploadProcessor (`is_experiment_parallel`) | ||
- UploadFinisher (`is_experiment_parallel`) | ||
- ParallelVerification | ||
The `is_fully_parallel` mode looks like this: | ||
- Upload | ||
- UploadProcessor (`is_fully_parallel`) | ||
- UploadProcessor (`is_fully_parallel`) | ||
- UploadProcessor (`is_fully_parallel`) | ||
- UploadFinisher (`is_fully_parallel`) | ||
""" | ||
|
||
run_experiment: bool = False | ||
run_fully_parallel: bool = False | ||
|
||
is_fully_parallel: bool = False | ||
is_experiment_parallel: bool = False | ||
is_experiment_serial: bool = False | ||
is_final: bool = False | ||
parallel_idx: int | None = None | ||
|
||
def initial(repoid: int) -> Self: | ||
run_fully_parallel = FULLY_PARALLEL_UPLOAD_PROCESSING_BY_REPO.check_value( | ||
identifier=repoid, default=False | ||
) | ||
run_experiment = ( | ||
False | ||
if run_fully_parallel | ||
else PARALLEL_UPLOAD_PROCESSING_BY_REPO.check_value( | ||
identifier=repoid, default=False | ||
) | ||
) | ||
|
||
return ParallelProcessing( | ||
run_fully_parallel=run_fully_parallel, | ||
run_experiment=run_experiment, | ||
is_fully_parallel=run_fully_parallel, | ||
) | ||
|
||
def from_task_args( | ||
repoid: int, | ||
in_parallel: bool = False, | ||
fully_parallel: bool = False, | ||
is_final: bool = False, | ||
parallel_idx: bool | None = None, | ||
**kwargs, | ||
) -> Self: | ||
slf = ParallelProcessing.initial(repoid) | ||
|
||
if fully_parallel: | ||
return dataclasses.replace(slf, is_fully_parallel=True) | ||
|
||
is_experiment_parallel = slf.run_experiment and in_parallel | ||
is_experiment_serial = slf.run_experiment and not in_parallel | ||
|
||
return dataclasses.replace( | ||
slf, | ||
is_experiment_parallel=is_experiment_parallel, | ||
is_experiment_serial=is_experiment_serial, | ||
is_final=is_final, | ||
parallel_idx=parallel_idx, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.