From 517e5e6305db4c94195f443345abee3bd824d31f Mon Sep 17 00:00:00 2001 From: Chirag Pandya Date: Wed, 5 Feb 2025 17:00:51 -0800 Subject: [PATCH] Back out "fbcode/gloo/examples" (#404) Summary: Put back example files that were deleted by codemod because they didn't have a TARGET Reviewed By: fduwjj Differential Revision: D69182638 --- .github/config/lint/.clang-format | 3 + gloo/examples/example1.cc | 5 +- gloo/examples/example_allreduce.cc | 153 +++++++++++++++++++++ gloo/examples/example_reduce.cc | 149 +++++++++++++++++++++ gloo/examples/looks_like_mpi.cc | 206 +++++++++++++++++++++++++++++ 5 files changed, 515 insertions(+), 1 deletion(-) create mode 100644 .github/config/lint/.clang-format create mode 100644 gloo/examples/example_allreduce.cc create mode 100644 gloo/examples/example_reduce.cc create mode 100644 gloo/examples/looks_like_mpi.cc diff --git a/.github/config/lint/.clang-format b/.github/config/lint/.clang-format new file mode 100644 index 000000000..7c5159a0f --- /dev/null +++ b/.github/config/lint/.clang-format @@ -0,0 +1,3 @@ +--- +Language: Cpp +PointerAlignment: Left diff --git a/gloo/examples/example1.cc b/gloo/examples/example1.cc index 8bdd5e63c..782c5479d 100644 --- a/gloo/examples/example1.cc +++ b/gloo/examples/example1.cc @@ -4,6 +4,8 @@ #include #include +#include +#include #include "gloo/allreduce_ring.h" #include "gloo/rendezvous/context.h" @@ -55,7 +57,8 @@ int main(void) { // attr.ai_family = AF_INET; // Force IPv4 // attr.ai_family = AF_INET6; // Force IPv6 - attr.ai_family = AF_UNSPEC; // Use either (default) + // Use either (default) + attr.ai_family = AF_UNSPEC; // A string is implicitly converted to an "attr" struct with its // hostname field populated. This will try to resolve the interface diff --git a/gloo/examples/example_allreduce.cc b/gloo/examples/example_allreduce.cc new file mode 100644 index 000000000..db3c3297b --- /dev/null +++ b/gloo/examples/example_allreduce.cc @@ -0,0 +1,153 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "gloo/allreduce.h" + +#include "gloo/rendezvous/context.h" +#include "gloo/rendezvous/file_store.h" +#include "gloo/rendezvous/prefix_store.h" +#include "gloo/transport/uv/device.h" + +// Usage: +// +// Open two terminals. Run the same program in both terminals, using +// a different RANK in each. For example: +// +// A: PREFIX=test1 SIZE=2 RANK=0 example_allreduce +// B: PREFIX=test1 SIZE=2 RANK=1 example_allreduce +// +// Expected output: +// +// data[0] = 18 +// data[1] = 18 +// data[2] = 18 +// data[3] = 18 +// + +void mysum(void* c_, const void* a_, const void* b_, int n) { + printf("n=%d\r\n", n); + int* c = static_cast(c_); + const int* a = static_cast(a_); + const int* b = static_cast(b_); + for (auto i = 0; i < n; i++) { + printf("a[%d]=%d\r\n", i, a[i]); + printf("b[%d]=%d\r\n", i, b[i]); + c[i] = a[i] + b[i]; + printf("c[%d]=%d\r\n", i, c[i]); + } +} + +int main(void) { + if (getenv("PREFIX") == nullptr || getenv("SIZE") == nullptr || + getenv("RANK") == nullptr) { + std::cerr << "Please set environment variables PREFIX, SIZE, and RANK." + << std::endl; + return 1; + } + + // The following statement creates a TCP "device" for Gloo to use. + // See "gloo/transport/device.h" for more information. For the + // purposes of this example, it is sufficient to see the device as + // a factory for every communication pair. + // + // The argument to gloo::transport::tcp::CreateDevice is used to + // find the network interface to bind connection to. The attr struct + // can be populated to specify exactly which interface should be + // used, as shown below. This is useful if you have identical + // multi-homed machines that all share the same network interface + // name, for example. + // + gloo::transport::uv::attr attr; + // attr.iface = "eth0"; + // attr.iface = "ib0"; + // attr.iface = "Wi-Fi"; + + // attr.ai_family = AF_INET; // Force IPv4 + // attr.ai_family = AF_INET6; // Force IPv6 + // Use either (default) + attr.ai_family = AF_UNSPEC; + + // A string is implicitly converted to an "attr" struct with its + // hostname field populated. This will try to resolve the interface + // to use by resolving the hostname or IP address, and finding the + // corresponding network interface. + // + // Hostname "localhost" should resolve to 127.0.0.1, so using this + // implies that all connections will be local. This can be useful + // for single machine operation. + // + // auto dev = gloo::transport::tcp::CreateDevice("localhost"); + // + + auto dev = gloo::transport::uv::CreateDevice(attr); + + // Now that we have a device, we can connect all participating + // processes. We call this process "rendezvous". It can be performed + // using a shared filesystem, a Redis instance, or something else by + // extending it yourself. + // + // See "gloo/rendezvous/store.h" for the functionality you need to + // implement to create your own store for performing rendezvous. + // + // Below, we instantiate rendezvous using the filesystem, given that + // this example uses multiple processes on a single machine. + // + auto fileStore = gloo::rendezvous::FileStore("/libtmp"); + + // To be able to reuse the same store over and over again and not have + // interference between runs, we scope it to a unique prefix with the + // PrefixStore. This wraps another store and prefixes every key before + // forwarding the call to the underlying store. + std::string prefix = getenv("PREFIX"); + auto prefixStore = gloo::rendezvous::PrefixStore(prefix, fileStore); + + // Using this store, we can now create a Gloo context. The context + // holds a reference to every communication pair involving this + // process. It is used by every collective algorithm to find the + // current process's rank in the collective, the collective size, + // and setup of send/receive buffer pairs. + const int rank = atoi(getenv("RANK")); + const int size = atoi(getenv("SIZE")); + auto context = std::make_shared(rank, size); + context->connectFullMesh(prefixStore, dev); + + // All connections are now established. We can now initialize some + // test data, instantiate the collective algorithm, and run it. + size_t elements = 4; + std::vector inputPointers; + std::vector outputPointers; + for (size_t i = 0; i < elements; i++) { + int* value = reinterpret_cast(malloc(sizeof(int))); + *value = i * (rank + 1); + inputPointers.push_back(value); + int* value1 = reinterpret_cast(malloc(sizeof(int))); + *value1 = 0; + outputPointers.push_back(value1); + } + + // Configure AllreduceOptions struct + gloo::AllreduceOptions opts_(context); + opts_.setInputs(inputPointers, 1); + opts_.setOutputs(outputPointers, 1); + opts_.setAlgorithm(gloo::AllreduceOptions::Algorithm::RING); + void (*fn)(void*, const void*, const void*, int) = &mysum; + opts_.setReduceFunction(fn); + gloo::allreduce(opts_); + + // Print the result. + std::cout << "Output: " << std::endl; + for (int i = 0; i < outputPointers.size(); i++) { + std::cout << "data[" << i << "] = " << *outputPointers[i] << std::endl; + } + + return 0; +} diff --git a/gloo/examples/example_reduce.cc b/gloo/examples/example_reduce.cc new file mode 100644 index 000000000..fa01787cc --- /dev/null +++ b/gloo/examples/example_reduce.cc @@ -0,0 +1,149 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + */ + +#include +#include +#include +#include + +#include "gloo/reduce.h" +#include "gloo/rendezvous/context.h" +#include "gloo/rendezvous/file_store.h" +#include "gloo/rendezvous/prefix_store.h" +#include "gloo/transport/uv/device.h" + +// Usage: +// +// Open two terminals. Run the same program in both terminals, using +// a different RANK in each. For example: +// +// A: PREFIX=test1 SIZE=2 RANK=0 example_reduce +// B: PREFIX=test1 SIZE=2 RANK=1 example_reduce +// +// Expected output: +// +// data[0] = 0 +// data[1] = 3 +// data[2] = 6 +// data[3] = 9 +// + +void mysum(void* c_, const void* a_, const void* b_, int n) { + printf("n=%d\r\n", n); + int* c = static_cast(c_); + const int* a = static_cast(a_); + const int* b = static_cast(b_); + for (auto i = 0; i < n; i++) { + printf("a[%d]=%d\r\n", i, a[i]); + printf("b[%d]=%d\r\n", i, b[i]); + c[i] = a[i] + b[i]; + printf("c[%d]=%d\r\n", i, c[i]); + } +} + +int main(void) { + // Unrelated to the example: perform some sanity checks. + if (getenv("PREFIX") == nullptr || getenv("SIZE") == nullptr || + getenv("RANK") == nullptr) { + std::cerr << "Please set environment variables PREFIX, SIZE, and RANK." + << std::endl; + return 1; + } + + // The following statement creates a TCP "device" for Gloo to use. + // See "gloo/transport/device.h" for more information. For the + // purposes of this example, it is sufficient to see the device as + // a factory for every communication pair. + // + // The argument to gloo::transport::tcp::CreateDevice is used to + // find the network interface to bind connection to. The attr struct + // can be populated to specify exactly which interface should be + // used, as shown below. This is useful if you have identical + // multi-homed machines that all share the same network interface + // name, for example. + // + gloo::transport::uv::attr attr; + // attr.iface = "eth0"; + // attr.iface = "ib0"; + // attr.iface = "Wi-Fi"; + + // attr.ai_family = AF_INET; // Force IPv4 + // attr.ai_family = AF_INET6; // Force IPv6 + // Use either (default) + attr.ai_family = AF_UNSPEC; + + // A string is implicitly converted to an "attr" struct with its + // hostname field populated. This will try to resolve the interface + // to use by resolving the hostname or IP address, and finding the + // corresponding network interface. + // + // Hostname "localhost" should resolve to 127.0.0.1, so using this + // implies that all connections will be local. This can be useful + // for single machine operation. + // + // auto dev = gloo::transport::tcp::CreateDevice("localhost"); + // + + auto dev = gloo::transport::uv::CreateDevice(attr); + + // Now that we have a device, we can connect all participating + // processes. We call this process "rendezvous". It can be performed + // using a shared filesystem, a Redis instance, or something else by + // extending it yourself. + // + // See "gloo/rendezvous/store.h" for the functionality you need to + // implement to create your own store for performing rendezvous. + // + // Below, we instantiate rendezvous using the filesystem, given that + // this example uses multiple processes on a single machine. + // + auto fileStore = gloo::rendezvous::FileStore("/libtmp"); + + // To be able to reuse the same store over and over again and not have + // interference between runs, we scope it to a unique prefix with the + // PrefixStore. This wraps another store and prefixes every key before + // forwarding the call to the underlying store. + std::string prefix = getenv("PREFIX"); + auto prefixStore = gloo::rendezvous::PrefixStore(prefix, fileStore); + + // Using this store, we can now create a Gloo context. The context + // holds a reference to every communication pair involving this + // process. It is used by every collective algorithm to find the + // current process's rank in the collective, the collective size, + // and setup of send/receive buffer pairs. + const int rank = atoi(getenv("RANK")); + const int size = atoi(getenv("SIZE")); + auto context = std::make_shared(rank, size); + context->connectFullMesh(prefixStore, dev); + + // All connections are now established. We can now initialize some + // test data, instantiate the collective algorithm, and run it. + int* inputPointers = reinterpret_cast(malloc(sizeof(int) * 4)); + int* outputPointers = reinterpret_cast(malloc(sizeof(int) * 4)); + gloo::ReduceOptions opts(context); + opts.setInput(inputPointers, 4); + opts.setOutput(outputPointers, 4); + for (int i = 0; i < 4; i++) { + inputPointers[i] = i * (rank + 1); + outputPointers[i] = 0; + } + + void (*fn)(void*, const void*, const void*, int) = &mysum; + opts.setReduceFunction(fn); + + // A small maximum segment size triggers code paths where we'll + // have a number of segments larger than the lower bound of + // twice the context size. + opts.setMaxSegmentSize(128); + opts.setRoot(size - 1); + reduce(opts); + + // Print the result. + std::cout << "Output: " << std::endl; + for (int i = 0; i < 4; i++) { + std::cout << "data = " << outputPointers[i] << std::endl; + } + + return 0; +} diff --git a/gloo/examples/looks_like_mpi.cc b/gloo/examples/looks_like_mpi.cc new file mode 100644 index 000000000..a9621c454 --- /dev/null +++ b/gloo/examples/looks_like_mpi.cc @@ -0,0 +1,206 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + */ + +#include +#include + +#include +#include +#include +#include + +#include "gloo/allreduce.h" +#include "gloo/barrier.h" +#include "gloo/rendezvous/context.h" +#include "gloo/rendezvous/file_store.h" +#include "gloo/rendezvous/prefix_store.h" +#include "gloo/transport/tcp/device.h" + +// clang-format off +#define ASSERT(expr) \ + do { \ + if (!(expr)) { \ + throw std::runtime_error("Assertion failed: " #expr); \ + } \ + } while (0); +// clang-format on + +// Global context +std::shared_ptr kContext; + +// Make this example look like MPI code +using MPI_Comm = int; +const MPI_Comm MPI_COMM_WORLD = 0; + +enum MPI_Datatype { + MPI_INT, +}; + +enum MPI_Op { + MPI_SUM, +}; + +// Same prototype as MPI API. +int MPI_Comm_rank(MPI_Comm comm, int* rank) { + ASSERT(comm == MPI_COMM_WORLD); + if (rank) { + *rank = kContext->rank; + } + return 0; +} + +// Same prototype as MPI API. +int MPI_Comm_size(MPI_Comm comm, int* size) { + ASSERT(comm == MPI_COMM_WORLD); + if (size) { + *size = kContext->size; + } + return 0; +} + +// Same prototype as MPI API. +int MPI_Barrier(MPI_Comm comm) { + ASSERT(comm == MPI_COMM_WORLD); + gloo::BarrierOptions opts(kContext); + gloo::barrier(opts); + return 0; +} + +// Same prototype +int MPI_Allreduce(const void* sendbuf, void* recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { + ASSERT(datatype == MPI_INT); + ASSERT(op == MPI_SUM); + ASSERT(comm == MPI_COMM_WORLD); + gloo::AllreduceOptions opts(kContext); + opts.setInput(const_cast(reinterpret_cast(sendbuf)), count); + opts.setOutput(static_cast(recvbuf), count); + typedef void (*ReduceFunction)(void*, const void*, const void*, size_t); + opts.setReduceFunction(static_cast(&gloo::sum)); + gloo::allreduce(opts); + return 0; +} + +// Actual prototype: +// +// int MPI_Recv( +// void *buf, +// int count, +// MPI_Datatype datatype, +// int source, +// int tag, +// MPI_Comm comm, +// MPI_Status *status); +// +// Implementation below: +// - doesn't use MPI_Datatype +// - doesn't take MPI_Comm argument but uses global +// - doesn't return an MPI_Status object +// +int MPI_Recv(void* buf, ssize_t bytes, int source, int tag, + MPI_Comm comm) { // NOLINT unused parameter + auto ubuf = kContext->createUnboundBuffer(buf, bytes); + ubuf->recv(source, tag); + ubuf->waitRecv(); + return 0; +} + +// Actual prototype: +// +// int MPI_Send( +// const void *buf, +// int count, +// MPI_Datatype datatype, +// int dest, +// int tag, +// MPI_Comm comm); +// +// Implementation below: +// - doesn't use MPI_Datatype +// +int MPI_Send(const void* cbuf, ssize_t bytes, int dest, int tag, + MPI_Comm comm) { // NOLINT unused parameter + // Argument is logically const if we're only sending. + auto ubuf = kContext->createUnboundBuffer(const_cast(cbuf), bytes); + ubuf->send(dest, tag); + ubuf->waitSend(); + return 0; +} + +// Entrypoint of this example. +int run() { + int rank; + int size; + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + // Send on rank 0 + if (rank == 0) { + const int dst = 1; + const int tag = 1234; + int pid = getpid(); + MPI_Send(&pid, sizeof(pid), dst, tag, MPI_COMM_WORLD); + std::cout << "Sent to rank " << dst << ": " << pid << std::endl; + } + + // Recv on rank 1 + if (rank == 1) { + const int src = 0; + const int tag = 1234; + int pid = -1; + MPI_Recv(&pid, sizeof(pid), src, tag, MPI_COMM_WORLD); + std::cout << "Received from rank " << src << ": " << pid << std::endl; + } + + // Run allreduce on the number 1 + { + const int input = 1; + int output = -1; + MPI_Allreduce(&input, &output, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + std::cout << "Result of allreduce on " << input << ": " << output + << std::endl; + } + + // Barrier before exit + MPI_Barrier(MPI_COMM_WORLD); + return 0; +} + +// See example1.cc in this directory for a walkthrough of initialization. +void init(const std::string& path) { + if (getenv("PREFIX") == nullptr || getenv("SIZE") == nullptr || + getenv("RANK") == nullptr) { + std::cerr << "Please set environment variables PREFIX, SIZE, and RANK." + << std::endl; + exit(1); + } + + const std::string prefix = getenv("PREFIX"); + const int rank = atoi(getenv("RANK")); + const int size = atoi(getenv("SIZE")); + + // Initialize store + auto fileStore = gloo::rendezvous::FileStore(path); + auto prefixStore = gloo::rendezvous::PrefixStore(prefix, fileStore); + + // Initialize device + gloo::transport::tcp::attr attr; + attr.iface = "eth0"; + auto dev = gloo::transport::tcp::CreateDevice("localhost"); + + // Initialize global context + auto context = std::make_shared(rank, size); + context->connectFullMesh(prefixStore, dev); + kContext = std::move(context); +} + +int main(int argc, char** argv) { + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " PATH" << std::endl; + exit(1); + } + init(argv[1]); + return run(); +}