Skip to content

Commit

Permalink
scheduler: if a job is BUDA, we need to return usage info (CPU, GPU)
Browse files Browse the repository at this point in the history
with the workunit rather than the app version.
This commit lays the groundword for this.
  • Loading branch information
davidpanderson committed Dec 13, 2024
1 parent fc745ae commit b10221b
Show file tree
Hide file tree
Showing 11 changed files with 96 additions and 43 deletions.
4 changes: 4 additions & 0 deletions db/boinc_db_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
// You should have received a copy of the GNU Lesser General Public License
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.

// structures corresponding to various DB tables.
// In some cases the structures have extra fields,
// used by the server code but not stored in the DB

#ifndef _BOINC_DB_TYPES_
#define _BOINC_DB_TYPES_

Expand Down
10 changes: 7 additions & 3 deletions sched/sched_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,11 +323,15 @@ static bool scan_work_array() {
//
result.id = wu_result.resultid;
if (result_still_sendable(result, wu)) {
bool is_buda, is_ok;
HOST_USAGE hu;
if (!handle_wu_plan_class(wu, bavp, hu)) {
continue;
check_buda_plan_class(wu, hu, is_buda, is_ok);
if (is_buda) {
if (!is_ok) continue;
} else {
hu = bavp->host_usage;
}
add_result_to_reply(result, wu, bavp, hu, false);
add_result_to_reply(result, wu, bavp, hu, is_buda, false);

// add_result_to_reply() fails only in pathological cases -
// e.g. we couldn't update the DB record or modify XML fields.
Expand Down
10 changes: 9 additions & 1 deletion sched/sched_assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,15 @@ static int send_assigned_job(ASSIGNMENT& asg) {
DB_ID_TYPE result_id = boinc_db.insert_id();
SCHED_DB_RESULT result;
retval = result.lookup_id(result_id);
add_result_to_reply(result, wu, bavp, bavp->host_usage, false);
bool is_buda, is_ok;
HOST_USAGE hu;
check_buda_plan_class(wu, hu, is_buda, is_ok);
if (is_buda) {
if (!is_ok) return -1;
} else {
hu = bavp->host_usage;
}
add_result_to_reply(result, wu, bavp, hu, is_buda, false);

if (config.debug_assignment) {
log_messages.printf(MSG_NORMAL,
Expand Down
10 changes: 7 additions & 3 deletions sched/sched_locality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,11 +332,15 @@ static int possibly_send_result(SCHED_DB_RESULT& result) {
if (count > 0) return ERR_WU_USER_RULE;
}

bool is_buda, is_ok;
HOST_USAGE hu;
if (!handle_wu_plan_class(wu, bavp, hu)) {
return false;
check_buda_plan_class(wu, hu, is_buda, is_ok);
if (is_buda) {
if (!is_ok) ERR_NO_APP_VERSION;
} else {
hu = bavp->host_usage;
}
return add_result_to_reply(result, wu, bavp, hu, true);
return add_result_to_reply(result, wu, bavp, hu, is_buda, false);
}

// Retrieves and returns a trigger instance identified by the given
Expand Down
2 changes: 1 addition & 1 deletion sched/sched_nci.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ static int send_job_for_app(APP& app) {
"Sending non-CPU-intensive job: %s\n", wu.name
);
}
add_result_to_reply(result, wu, bavp, bavp->host_usage, false);
add_result_to_reply(result, wu, bavp, bavp->host_usage, false, false);
return 0;
}
log_messages.printf(MSG_NORMAL,
Expand Down
14 changes: 8 additions & 6 deletions sched/sched_resend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,13 +249,15 @@ bool resend_lost_work() {
);
g_reply->insert_message(warning_msg, "low");
} else {
HOST_USAGE host_usage;
if (!handle_wu_plan_class(wu, bavp, host_usage)) {
continue;
bool is_buda, is_ok;
HOST_USAGE hu;
check_buda_plan_class(wu, hu, is_buda, is_ok);
if (is_buda) {
if (!is_ok) continue;
} else {
hu = bavp->host_usage;
}
retval = add_result_to_reply(
result, wu, bavp, host_usage, false
);
retval = add_result_to_reply(result, wu, bavp, hu, is_buda, false);
if (retval) {
log_messages.printf(MSG_CRITICAL,
"[HOST#%lu] failed to send [RESULT#%lu]\n",
Expand Down
21 changes: 18 additions & 3 deletions sched/sched_score.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ static int get_size_class(APP& app, double es) {
return app.n_size_classes - 1;
}

JOB::JOB() {
memset(this, 0, sizeof(JOB));
}

// Assign a score to this job,
// representing the value of sending the job to this host.
// Also do some initial screening,
Expand Down Expand Up @@ -226,8 +230,15 @@ void send_work_score_type(int rt) {

// check WU plan class (for BUDA jobs)
//
if (!handle_wu_plan_class(wu, job.bavp, job.host_usage)) {
continue;
bool is_buda, is_ok;
HOST_USAGE hu;
check_buda_plan_class(wu, hu, is_buda, is_ok);
if (is_buda) {
if (!is_ok) continue;
job.host_usage = hu;
job.is_buda = true;
} else {
job.host_usage = job.bavp->host_usage;
}

job.index = i;
Expand Down Expand Up @@ -357,7 +368,11 @@ void send_work_score_type(int rt) {
SCHED_DB_RESULT result;
result.id = wu_result.resultid;
if (result_still_sendable(result, wu)) {
add_result_to_reply(result, wu, job.bavp, job.host_usage, false);
add_result_to_reply(
result, wu, job.bavp, job.host_usage,
job.is_buda,
false // locality scheduling
);

// add_result_to_reply() fails only in pathological cases -
// e.g. we couldn't update the DB record or modify XML fields.
Expand Down
4 changes: 4 additions & 0 deletions sched/sched_score.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@ struct JOB {
double score;
APP* app;
BEST_APP_VERSION* bavp;
bool is_buda;
HOST_USAGE host_usage;
// if is_buda, usage returned by WU plan class
// else a copy of bavp->host_usage

JOB();
bool get_score(int);
};

Expand Down
52 changes: 29 additions & 23 deletions sched/sched_send.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -896,44 +896,50 @@ static bool wu_has_plan_class(WORKUNIT &wu, char* buf) {
return true;
}

// If workunit has a plan class (e.g. BUDA), check it.
// In any case, fill in the HOST_USAGE
// If workunit has a plan class (e.g. BUDA)
// return false if host not capable
// plan class computes host usage
// is_buda = true
// else
// host usage is from app version
// is_buda = false
//
bool handle_wu_plan_class(
WORKUNIT &wu, BEST_APP_VERSION *bavp, HOST_USAGE &hu
void check_buda_plan_class(
WORKUNIT &wu, HOST_USAGE &hu, bool &is_buda, bool &is_ok
) {
char plan_class[256];
if (wu_has_plan_class(wu, plan_class)) {
if (!wu_has_plan_class(wu, plan_class)) {
is_buda = false;
return;
}
if (config.debug_version_select) {
log_messages.printf(MSG_NORMAL,
"[version] plan class: %s\n", plan_class
);
}
is_buda = true;
is_ok = true;
if (!strlen(plan_class)) {
hu.sequential_app(g_reply->host.p_fpops);
return;
}
if (!app_plan(*g_request, plan_class, hu, &wu)) {
if (config.debug_version_select) {
log_messages.printf(MSG_NORMAL,
"[version] plan class: %s\n", plan_class
"[version] app_plan(%s) returned false\n", plan_class
);
}
if (strlen(plan_class)) {
if (!app_plan(*g_request, plan_class, hu, &wu)) {
if (config.debug_version_select) {
log_messages.printf(MSG_NORMAL,
"[version] [AV#%lu] app_plan(%s) returned false\n",
bavp->avp->id, plan_class
);
}
// can't send this job
return false;
}
} else {
hu.sequential_app(g_reply->host.p_fpops);
}
} else {
hu = bavp->host_usage;
// can't send this job
is_ok = false;
}
return true;
}

int add_result_to_reply(
SCHED_DB_RESULT& result,
WORKUNIT& wu,
BEST_APP_VERSION* bavp,
HOST_USAGE &host_usage,
bool is_buda,
bool locality_scheduling
) {
int retval;
Expand Down
7 changes: 4 additions & 3 deletions sched/sched_send.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,14 @@ extern void send_work();
extern int add_result_to_reply(
SCHED_DB_RESULT& result, WORKUNIT& wu, BEST_APP_VERSION* bavp,
HOST_USAGE&,
bool is_buda,
bool locality_scheduling
);

// if WU has plan class, get corresponding host_usage
// if WU has plan class, check host, and get corresponding host_usage
//
extern bool handle_wu_plan_class(
WORKUNIT &wu, BEST_APP_VERSION *bavp, HOST_USAGE &host_usage
extern void check_buda_plan_class(
WORKUNIT &wu, HOST_USAGE &host_usage, bool &is_buda, bool &is_ok
);

inline bool is_anonymous(PLATFORM* platform) {
Expand Down
5 changes: 5 additions & 0 deletions sched/sched_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ struct USER_MESSAGE {
USER_MESSAGE(const char* m, const char*p);
};

// The resource usage (CPU, GPU, RAM) of a job,
// and estimates of its speed
// Populated by plan-class functions if have plan class,
// else by HOST_USAGE::sequential_app()
//
struct HOST_USAGE {
int proc_type;
double gpu_usage;
Expand Down

0 comments on commit b10221b

Please sign in to comment.