From e0f0434078900f772f4e11e5ca6d9ae837a9584d Mon Sep 17 00:00:00 2001 From: Erika Hunhoff <54562339+hunhoffe@users.noreply.github.com> Date: Sun, 4 Sep 2022 19:31:58 -0600 Subject: [PATCH] Prepare to integrate new scheduler into apache/openwhisk-deploy-kube (#5278) * Kubernetes Akka bootstrap for controller * Update cluster management for the scheduler to help with k8s deployment * Made changes to tools to try to integrate scheduler into travis/CI build process * Added scheduler Dockerfile.cov * Use consistent ordering of components * remove canonical.port setting in scheduler, controller * Remove unneeded dependency from controller, scheduler * Remove cluster creation from ShardingContainerPoolBalancer * Remove trailing whitespace Signed-off-by: Erika Hunhoff * Revert akka cluster changes to controller Signed-off-by: Erika Hunhoff Signed-off-by: Erika Hunhoff --- Jenkinsfile | 2 +- build.gradle | 2 +- .../ShardingContainerPoolBalancer.scala | 2 +- core/scheduler/Dockerfile.cov | 30 +++++++++++++++++++ core/scheduler/build.gradle | 2 ++ .../src/main/resources/application.conf | 11 +++++++ .../openwhisk/core/scheduler/Scheduler.scala | 8 +++++ tools/admin/wskadmin | 2 +- tools/build/checkLogs.py | 1 + tools/build/citool | 2 ++ tools/build/redo | 5 ++++ tools/jenkins/apache/dockerhub.groovy | 2 +- tools/travis/distDocker.sh | 1 + 13 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 core/scheduler/Dockerfile.cov diff --git a/Jenkinsfile b/Jenkinsfile index 3e54c99c490..7f3863514c4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -49,7 +49,7 @@ timeout(time: 12, unit: 'HOURS') { sh "docker run -d --restart=always --name registry -v \"$HOME\"/certs:/certs \ -e REGISTRY_HTTP_ADDR=0.0.0.0:${port} -e REGISTRY_HTTP_TLS_CERTIFICATE=/certs/${cert} \ -e REGISTRY_HTTP_TLS_KEY=/certs/${key} -p ${port}:${port} registry:2" - // Build the controller and invoker images. + // Build the controller, scheduler, and invoker images. sh "./gradlew distDocker -PdockerRegistry=${domainName}:${port}" //Install the various modules like standalone sh "./gradlew install" diff --git a/build.gradle b/build.gradle index a0a49e8a7d2..cebe54c7bc7 100644 --- a/build.gradle +++ b/build.gradle @@ -42,7 +42,7 @@ subprojects { def cons = project.getDependencies().getConstraints() def akka = ['akka-actor', 'akka-cluster', 'akka-cluster-metrics', 'akka-cluster-tools', 'akka-coordination', 'akka-discovery', 'akka-distributed-data', 'akka-protobuf', 'akka-remote', 'akka-slf4j', - 'akka-stream', 'akka-stream-testkit', 'akka-testkit'] + 'akka-stream', 'akka-stream-testkit', 'akka-testkit', 'akka-persistence', 'akka-cluster-sharding'] def akkaHttp = ['akka-http', 'akka-http-core', 'akka-http-spray-json', 'akka-http-testkit', 'akka-http-xml', 'akka-parsing', 'akka-http2-support'] diff --git a/core/controller/src/main/scala/org/apache/openwhisk/core/loadBalancer/ShardingContainerPoolBalancer.scala b/core/controller/src/main/scala/org/apache/openwhisk/core/loadBalancer/ShardingContainerPoolBalancer.scala index 5f7b9f05cd6..20f1581de7f 100644 --- a/core/controller/src/main/scala/org/apache/openwhisk/core/loadBalancer/ShardingContainerPoolBalancer.scala +++ b/core/controller/src/main/scala/org/apache/openwhisk/core/loadBalancer/ShardingContainerPoolBalancer.scala @@ -158,7 +158,7 @@ class ShardingContainerPoolBalancer( AkkaManagement(actorSystem).start() ClusterBootstrap(actorSystem).start() Some(Cluster(actorSystem)) - } else if (loadConfigOrThrow[Seq[String]]("akka.cluster.seed-nodes").nonEmpty) { + } else if (loadConfigOrThrow[Seq[String]]("akka.cluster.seed-nodes").nonEmpty) { Some(Cluster(actorSystem)) } else { None diff --git a/core/scheduler/Dockerfile.cov b/core/scheduler/Dockerfile.cov new file mode 100644 index 00000000000..ef3a09471a4 --- /dev/null +++ b/core/scheduler/Dockerfile.cov @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM scheduler + +ARG OW_ROOT_DIR + +USER root +RUN mkdir -p /coverage/common && \ + mkdir -p /coverage/scheduler && \ + mkdir -p "${OW_ROOT_DIR}/common/scala/build" && \ + mkdir -p "${OW_ROOT_DIR}/core/scheduler/build" && \ + ln -s /coverage/common "${OW_ROOT_DIR}/common/scala/build/scoverage" && \ + ln -s /coverage/scheduler "${OW_ROOT_DIR}/core/scheduler/build/scoverage" + +COPY build/tmp/docker-coverage /scheduler/ diff --git a/core/scheduler/build.gradle b/core/scheduler/build.gradle index 02c4d2be23f..ae5068ce914 100644 --- a/core/scheduler/build.gradle +++ b/core/scheduler/build.gradle @@ -65,6 +65,8 @@ dependencies { implementation "org.scala-lang:scala-library:${gradle.scala.version}" implementation project(':common:scala') implementation "io.altoo:akka-kryo-serialization_${gradle.scala.depVersion}:1.0.0" + implementation "com.lightbend.akka.management:akka-management-cluster-bootstrap_${gradle.scala.depVersion}:${gradle.akka_management.version}" + implementation "com.lightbend.akka.discovery:akka-discovery-kubernetes-api_${gradle.scala.depVersion}:${gradle.akka_management.version}" } // workaround for akka-grpc diff --git a/core/scheduler/src/main/resources/application.conf b/core/scheduler/src/main/resources/application.conf index 23ca734bb79..1f696fc43b7 100644 --- a/core/scheduler/src/main/resources/application.conf +++ b/core/scheduler/src/main/resources/application.conf @@ -44,9 +44,20 @@ akka { transport = tcp } } + + cluster { + shutdown-after-unsuccessful-join-seed-nodes = 60s + + # Disable legacy metrics in akka-cluster. + metrics.enabled = off + } } whisk { + cluster { + use-cluster-bootstrap: false + } + # tracing configuration tracing { component = "Scheduler" diff --git a/core/scheduler/src/main/scala/org/apache/openwhisk/core/scheduler/Scheduler.scala b/core/scheduler/src/main/scala/org/apache/openwhisk/core/scheduler/Scheduler.scala index 128c004ef53..f32520cc756 100644 --- a/core/scheduler/src/main/scala/org/apache/openwhisk/core/scheduler/Scheduler.scala +++ b/core/scheduler/src/main/scala/org/apache/openwhisk/core/scheduler/Scheduler.scala @@ -21,6 +21,8 @@ import akka.Done import akka.actor.{ActorRef, ActorRefFactory, ActorSelection, ActorSystem, CoordinatedShutdown, Props} import akka.http.scaladsl.Http import akka.http.scaladsl.model.{HttpRequest, HttpResponse} +import akka.management.scaladsl.AkkaManagement +import akka.management.cluster.bootstrap.ClusterBootstrap import akka.pattern.ask import akka.util.Timeout import com.typesafe.config.ConfigValueFactory @@ -258,6 +260,7 @@ trait SchedulerCore { object Scheduler { protected val protocol = loadConfigOrThrow[String]("whisk.scheduler.protocol") + protected val useClusterBootstrap = loadConfigOrThrow[Boolean]("whisk.cluster.use-cluster-bootstrap") val topicPrefix = loadConfigOrThrow[String](ConfigKeys.kafkaTopicsPrefix) @@ -289,6 +292,11 @@ object Scheduler { implicit val logger = new AkkaLogging(akka.event.Logging.getLogger(actorSystem, this)) + if (useClusterBootstrap) { + AkkaManagement(actorSystem).start() + ClusterBootstrap(actorSystem).start() + } + // Prepare Kamon shutdown CoordinatedShutdown(actorSystem).addTask(CoordinatedShutdown.PhaseActorSystemTerminate, "shutdownKamon") { () => logger.info(this, s"Shutting down Kamon with coordinated shutdown") diff --git a/tools/admin/wskadmin b/tools/admin/wskadmin index f65d7c29b32..126e7847d57 100755 --- a/tools/admin/wskadmin +++ b/tools/admin/wskadmin @@ -169,7 +169,7 @@ def parseArgs(): subparser.required = True subcmd = subparser.add_parser('get', help='get logs') - subcmd.add_argument('components', help='components, one or more of [controllerN, invokerN] where N is the instance', nargs='*', default=['controller0', 'invoker0']) + subcmd.add_argument('components', help='components, one or more of [controllerN, schedulerN, invokerN] where N is the instance', nargs='*', default=['controller0', 'scheduler0', 'invoker0']) subcmd.add_argument('-t', '--tid', help='retrieve logs for the transaction id') subcmd.add_argument('-g', '--grep', help='retrieve logs that match grep expression') diff --git a/tools/build/checkLogs.py b/tools/build/checkLogs.py index b436042a3cf..049156b32b8 100755 --- a/tools/build/checkLogs.py +++ b/tools/build/checkLogs.py @@ -96,6 +96,7 @@ def colorize(code, string): ("db-triggers.log", {"db"}, [ partial(database_has_at_most_x_entries, 0) ]), # Assert that stdout of the container is correctly piped and empty ("controller0.log", {"system"}, [ partial(file_has_at_most_x_bytes, 0) ]), + ("scheduler0.log", {"system"}, [ partial(file_has_at_most_x_bytes, 0) ]), ("invoker0.log", {"system"}, [ partial(file_has_at_most_x_bytes, 0) ]) ] diff --git a/tools/build/citool b/tools/build/citool index 8a5403d3fae..1202e2f8470 100755 --- a/tools/build/citool +++ b/tools/build/citool @@ -87,6 +87,7 @@ def parseArgs(): subparser.add_argument('-s', '--sort', help='sort logs by timestamp', action='store_true') subparser.add_argument('-n', '--invokers', help='number of invokers', type=int, default=3) subparser.add_argument('-c', '--controllers', help='number of controllers', type=int, default=1) + subparser.add_argument('-c', '--schedulers', help='number of schedulers', type=int, default=1) return parser.parse_args() @@ -312,6 +313,7 @@ def cat(args): else: components = { 'controller': args.controllers, + 'scheduler': args.schedulers, 'invoker': args.invokers } logs = map(getComponentLogs, getComponentList(components)) diff --git a/tools/build/redo b/tools/build/redo index b149c1aa3fa..b2ec0881289 100755 --- a/tools/build/redo +++ b/tools/build/redo @@ -272,6 +272,11 @@ Components = [ modes = 'clean', gradle = 'core:controller'), + makeComponent('scheduler', + 'build/deploy scheduler', + modes = 'clean', + gradle = 'core:scheduler'), + makeComponent('invoker', 'build/deploy invoker', modes = 'clean', diff --git a/tools/jenkins/apache/dockerhub.groovy b/tools/jenkins/apache/dockerhub.groovy index 6ff1c591d9d..65734952294 100644 --- a/tools/jenkins/apache/dockerhub.groovy +++ b/tools/jenkins/apache/dockerhub.groovy @@ -30,7 +30,7 @@ node('ubuntu') { withCredentials([usernamePassword(credentialsId: 'openwhisk_dockerhub', passwordVariable: 'DOCKER_PASSWORD', usernameVariable: 'DOCKER_USER')]) { sh 'HOME="$WORKSPACE/local-docker-cfg" docker login -u ${DOCKER_USER} -p ${DOCKER_PASSWORD}' } - def PUSH_CMD = "./gradlew :core:controller:distDocker :core:invoker:distDocker :core:standalone:distDocker :core:monitoring:user-events:distDocker :tools:ow-utils:distDocker :core:cosmos:cache-invalidator:distDocker -PdockerRegistry=docker.io -PdockerImagePrefix=openwhisk" + def PUSH_CMD = "./gradlew :core:controller:distDocker :core:scheduler:distDocker :core:invoker:distDocker :core:standalone:distDocker :core:monitoring:user-events:distDocker :tools:ow-utils:distDocker :core:cosmos:cache-invalidator:distDocker -PdockerRegistry=docker.io -PdockerImagePrefix=openwhisk" def gitCommit = sh(returnStdout: true, script: 'git rev-parse HEAD').trim() def shortCommit = gitCommit.take(7) sh "./gradlew clean" diff --git a/tools/travis/distDocker.sh b/tools/travis/distDocker.sh index a859beca289..13c8450d98b 100755 --- a/tools/travis/distDocker.sh +++ b/tools/travis/distDocker.sh @@ -29,6 +29,7 @@ TERM=dumb ./gradlew clean # Run a clean step before build TERM=dumb ./gradlew distDocker -PdockerImagePrefix=testing $GRADLE_PROJS_SKIP TERM=dumb ./gradlew :core:controller:distDockerCoverage -PdockerImagePrefix=testing +TERM=dumb ./gradlew :core:scheduler:distDockerCoverage -PdockerImagePrefix=testing TERM=dumb ./gradlew :core:invoker:distDockerCoverage -PdockerImagePrefix=testing TERM=dumb ./gradlew :core:standalone:build