From de886e5b9b591f87977c8721f2ad81594ef638c9 Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Sat, 23 Dec 2023 15:09:33 +0100 Subject: [PATCH 01/17] draft --- lib/mixer.ex | 72 +++++++++++++++++++++++++++++++++++----------- lib/splitter.ex | 76 ++++++++++++++++++++++++++++++------------------- 2 files changed, 102 insertions(+), 46 deletions(-) diff --git a/lib/mixer.ex b/lib/mixer.ex index 33689f0..5925f4c 100644 --- a/lib/mixer.ex +++ b/lib/mixer.ex @@ -8,7 +8,10 @@ defmodule Strom.Mixer do running: false, data: %{}, buffer: @buffer, - no_data_counter: 0 + no_data_counter: 0, + tasks: %{}, + consumer: nil + def start(opts \\ []) when is_list(opts) do state = %__MODULE__{ @@ -42,11 +45,20 @@ defmodule Strom.Mixer do new_stream = Stream.resource( - fn -> mixer end, + fn -> GenServer.call(mixer.pid, {:register_consumer, self()}) end, fn mixer -> + case GenServer.call(mixer.pid, :get_data) do {:ok, {data, no_data_counter}} -> - maybe_wait(no_data_counter, 0) +## maybe_wait(no_data_counter, 0) +# if rem(no_data_counter, 10) == 9 do + if length(data) == 0 do + IO.inspect(no_data_counter, label: "no_data_counter_mixer: #{name}") + receive do + :continue -> + flush() + end + end {data, mixer} {:error, :done} -> @@ -66,24 +78,37 @@ defmodule Strom.Mixer do def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) defp run_streams(streams, pid, buffer) do - Enum.map(streams, fn {{name, fun}, stream} -> - async_run_stream({name, fun}, stream, buffer, pid) + Enum.reduce(streams, %{}, fn {{name, fun}, stream}, acc -> + task = async_run_stream({name, fun}, stream, buffer, pid) + Map.put(acc, {name, fun}, task) end) end defp async_run_stream({name, fun}, stream, buffer, pid) do Task.async(fn -> stream - |> Stream.each(fn el -> - if fun.(el) do - data_length = GenServer.call(pid, {:new_data, {name, fun}, el}) - maybe_wait(data_length, buffer) - end + |> Stream.chunk_every(buffer) + |> Stream.each(fn chunk -> + {chunk, _} = Enum.split_with(chunk, fun) + GenServer.cast(pid, {:new_data, {name, fun}, chunk}) + receive do + :continue -> + flush() + end end) |> Stream.run() GenServer.call(pid, {:done, {name, fun}}) end) + |> IO.inspect(label: "mixer task") + end + + defp flush do + receive do + _ -> flush() + after + 0 -> :ok + end end defp maybe_wait(current, allowed) do @@ -95,18 +120,20 @@ defmodule Strom.Mixer do end end - def handle_call({:new_data, {name, fun}, datum}, _from, %__MODULE__{data: prev_data} = mixer) do + def handle_cast({:new_data, {name, fun}, chunk}, %__MODULE__{data: prev_data} = mixer) do + if mixer.consumer, do: send(mixer.consumer, :continue) prev_data_from_stream = Map.get(prev_data, {name, fun}, []) - data_from_stream = [datum | prev_data_from_stream] - data = Map.put(prev_data, {name, fun}, data_from_stream) + data_from_stream = prev_data_from_stream ++ chunk - {:reply, length(data_from_stream), %{mixer | data: data}} + data = Map.put(prev_data, {name, fun}, data_from_stream) + + {:noreply, %{mixer | data: data}} end def handle_call({:run_streams, streams_to_mix}, _from, %__MODULE__{} = mixer) do - run_streams(streams_to_mix, mixer.pid, mixer.buffer) + tasks = run_streams(streams_to_mix, mixer.pid, mixer.buffer) - {:reply, :ok, %{mixer | running: true, streams: streams_to_mix}} + {:reply, :ok, %{mixer | running: true, streams: streams_to_mix, tasks: tasks}} end def handle_call({:done, {name, fun}}, _from, %__MODULE__{streams: streams} = mixer) do @@ -115,7 +142,13 @@ defmodule Strom.Mixer do end def handle_call(:get_data, _from, %__MODULE__{data: data, streams: streams} = mixer) do - all_data = Enum.reduce(data, [], fn {_, d}, acc -> acc ++ Enum.reverse(d) end) + all_data = Enum.reduce(data, [], fn {_, d}, acc -> acc ++ d end) + + mixer.tasks + |> Enum.shuffle() + |> Enum.each(fn {{name, fun}, task} -> + send(task.pid, :continue) + end) if length(all_data) == 0 && map_size(streams) == 0 do {:reply, {:error, :done}, mixer} @@ -133,6 +166,11 @@ defmodule Strom.Mixer do end end + def handle_call({:register_consumer, pid},_from,%__MODULE__{consumer: consumer} = mixer) do + mixer = %{mixer | consumer: pid} + {:reply, mixer, mixer} + end + def handle_call(:stop, _from, %__MODULE__{} = mixer) do {:stop, :normal, :ok, %{mixer | running: false}} end diff --git a/lib/splitter.ex b/lib/splitter.ex index 6bccf82..1cbc389 100644 --- a/lib/splitter.ex +++ b/lib/splitter.ex @@ -8,7 +8,9 @@ defmodule Strom.Splitter do partitions: %{}, running: false, buffer: @buffer, - no_data_counter: 0 + no_data_counter: 0, + task: nil, + consumers: [] def start(opts \\ []) when is_list(opts) do state = %__MODULE__{ @@ -36,19 +38,29 @@ defmodule Strom.Splitter do GenServer.call(splitter.pid, {:set_partitions, partitions}) stream_to_run = Map.fetch!(flow, name) - :ok = GenServer.call(splitter.pid, {:run_stream, stream_to_run}) + task = GenServer.call(splitter.pid, {:run_stream, stream_to_run}) sub_flow = partitions |> Enum.reduce(%{}, fn {name, fun}, flow -> stream = Stream.resource( - fn -> splitter end, + fn -> + GenServer.call(splitter.pid, {:register_consumer, self()}) + |> IO.inspect + end, fn splitter -> case GenServer.call(splitter.pid, {:get_data, {name, fun}}) do {:ok, {data, no_data_counter}} -> - maybe_wait(no_data_counter, 0) - +# if rem(no_data_counter, 10) == 9 do + if length(data) == 0 do +# Process.sleep(1) + IO.inspect(no_data_counter, label: "no_data_counter_splitter: #{name}") + receive do + :continue -> + flush() + end + end {data, splitter} {:error, :done} -> @@ -73,44 +85,47 @@ defmodule Strom.Splitter do defp async_run_stream(stream, buffer, pid) do Task.async(fn -> stream - |> Stream.each(fn el -> - data_size = GenServer.call(pid, {:new_data, el}) - maybe_wait(data_size, buffer) + |> Stream.chunk_every(buffer) + |> Stream.each(fn chunk -> + GenServer.cast(pid, {:new_data, chunk}) + receive do + :continue -> + flush() + end end) |> Stream.run() GenServer.call(pid, :done) end) + |> IO.inspect(label: "slitter task") end - defp maybe_wait(current, allowed) do - if current > allowed do - diff = current - allowed - to_sleep = trunc(:math.pow(2, diff)) - Process.sleep(to_sleep) - to_sleep + defp flush do + receive do + _ -> flush() + after + 0 -> :ok end end - def handle_call({:new_data, datum}, _from, %__MODULE__{} = splitter) do + def handle_cast({:new_data, data}, %__MODULE__{} = splitter) do new_partitions = Enum.reduce(splitter.partitions, %{}, fn {{name, fun}, prev_data}, acc -> - if fun.(datum) do - Map.put(acc, {name, fun}, [datum | prev_data]) - else - Map.put(acc, {name, fun}, prev_data) - end + {valid_data, _} = Enum.split_with(data, fun) + new_data = prev_data ++ valid_data + Map.put(acc, {name, fun}, new_data) end) - data_size = - Enum.reduce(new_partitions, 0, fn {_key, data}, acc -> acc + length(data) end) + splitter.consumers + |> Enum.shuffle() + |> Enum.each(&send(&1, :continue)) - {:reply, data_size, %{splitter | partitions: new_partitions}} + {:noreply, %{splitter | partitions: new_partitions}} end def handle_call({:run_stream, stream}, _from, %__MODULE__{} = splitter) do - async_run_stream(stream, splitter.buffer, splitter.pid) - {:reply, :ok, %{splitter | running: true}} + task = async_run_stream(stream, splitter.buffer, splitter.pid) + {:reply, :ok, %{splitter | running: true, task: task}} end def handle_call({:set_partitions, partitions}, _from, %__MODULE__{} = splitter) do @@ -125,16 +140,19 @@ defmodule Strom.Splitter do {:reply, :ok, %{splitter | running: false}} end + def handle_call({:register_consumer, pid},_from,%__MODULE__{consumers: consumers} = splitter) do + splitter = %{splitter | consumers: [pid | consumers]} + {:reply, splitter, splitter} + end + def handle_call( {:get_data, partition_fun}, _from, %__MODULE__{partitions: partitions, running: running} = splitter ) do - data = - partitions - |> Map.get(partition_fun) - |> Enum.reverse() + send(splitter.task.pid, :continue) + data = Map.get(partitions, partition_fun) if length(data) == 0 && !running do {:reply, {:error, :done}, splitter} else From 2364ff82bd0f8b4f65a58de316c351e8c7d2cbd7 Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Mon, 25 Dec 2023 13:26:29 +0100 Subject: [PATCH 02/17] Generic distributor draft --- lib/cons.ex | 103 +++++++++++++++++++++++++++ lib/distributor.ex | 143 ++++++++++++++++++++++++++++++++++++++ test/distributor_test.exs | 57 +++++++++++++++ 3 files changed, 303 insertions(+) create mode 100644 lib/cons.ex create mode 100644 lib/distributor.ex create mode 100644 test/distributor_test.exs diff --git a/lib/cons.ex b/lib/cons.ex new file mode 100644 index 0000000..83108d9 --- /dev/null +++ b/lib/cons.ex @@ -0,0 +1,103 @@ +defmodule Strom.Cons do + use GenServer + + @buffer 2 + + defstruct pid: nil, + distributor_pid: nil, + running: false, + client: nil, + name: nil, + fun: nil, + data: [] + + def start({name, fun}, distributor_pid, opts \\ []) when is_list(opts) do + state = %__MODULE__{distributor_pid: distributor_pid, name: name, fun: fun, running: true} + + {:ok, pid} = GenServer.start_link(__MODULE__, state) + __state__(pid) + end + + def init(%__MODULE__{} = cons) do + {:ok, %{cons | pid: self()}} + end + + def call(cons) do + Stream.resource( + fn -> + GenServer.call(cons.pid, :register_client) + end, + fn cons -> + case GenServer.call(cons.pid, :get_data) do + {:ok, data} -> + if length(data) == 0 do + receive do + :continue -> + flush() + end + end + {data, cons} + + {:error, :done} -> + {:halt, cons} + end + end, + fn cons -> cons end + ) + end + + def stop(cons) do + GenServer.call(cons.pid, :stop) + end + + defp flush do + receive do + _ -> flush() + after + 0 -> :ok + end + end + + def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) + + def handle_call(:get_data, _from, cons) do + if length(cons.data) == 0 and !cons.running do + {:reply, {:error, :done}, cons} + else + data = cons.data + cons = %{cons | data: []} + GenServer.cast(cons.distributor_pid, :continue) + {:reply, {:ok, data}, cons} + end + end + + def handle_call(:register_client, {pid, ref}, cons) do + cons = %{cons | client: pid} + + {:reply, cons, cons} + end + + def handle_call({:put_data, new_data}, _from, cons) do + {new_data, _} = Enum.split_with(new_data, cons.fun) + cons = %{cons | data: cons.data ++ new_data} + if cons.client do + send(cons.client, :continue) + end + {:reply, cons, cons} + end + + def handle_cast(:continue, cons) do + if cons.client do + send(cons.client, :continue) + end + {:noreply, cons} + end + + def handle_call(:stop, _from, cons) do + cons = %{cons | running: false} + + {:reply, cons, cons} + end + + def handle_call(:__state__, _from, cons), do: {:reply, cons, cons} +end diff --git a/lib/distributor.ex b/lib/distributor.ex new file mode 100644 index 0000000..0b6cd05 --- /dev/null +++ b/lib/distributor.ex @@ -0,0 +1,143 @@ +defmodule Strom.Distributor do + use GenServer + + @buffer 2 + + defstruct streams: %{}, + pid: nil, + running: false, + data: %{}, + buffer: @buffer, + no_data_counter: 0, + in_tasks: %{}, + consumers: %{} + + def start(opts \\ []) when is_list(opts) do + state = %__MODULE__{ + buffer: Keyword.get(opts, :buffer, @buffer) + } + + {:ok, pid} = GenServer.start_link(__MODULE__, state) + __state__(pid) + end + + def init(%__MODULE__{} = distributor) do + {:ok, %{distributor | pid: self()}} + end + + def call(flow, %__MODULE__{} = distributor, inputs, outputs) + when is_map(flow) and is_map(inputs) and is_map(outputs) do + input_streams = + Enum.reduce(inputs, %{}, fn {name, fun}, acc -> + Map.put(acc, {name, fun}, Map.fetch!(flow, name)) + end) + + sub_flow = + outputs + |> Enum.reduce(%{}, fn {name, fun}, flow -> + cons = Strom.Cons.start({name, fun}, distributor.pid) + :ok = GenServer.call(distributor.pid, {:register_consumer, {{name, fun}, cons}}) + stream = Strom.Cons.call(cons) + Map.put(flow, name, stream) + end) + + :ok = GenServer.call(distributor.pid, {:run_inputs, input_streams}) + + flow + |> Map.drop(Map.keys(inputs)) + |> Map.merge(sub_flow) + end + + def stop(%__MODULE__{pid: pid}), do: GenServer.call(pid, :stop) + + def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) + + defp run_inputs(streams, pid, buffer) do + Enum.reduce(streams, %{}, fn {{name, fun}, stream}, acc -> + task = async_run_stream({name, fun}, stream, buffer, pid) + Map.put(acc, {name, fun}, task) + end) + end + + defp async_run_stream({name, fun}, stream, buffer, pid) do + Task.async(fn -> + stream + |> Stream.chunk_every(buffer) + |> Stream.each(fn chunk -> + {chunk, _} = Enum.split_with(chunk, fun) + GenServer.cast(pid, {:new_data, {name, fun}, chunk}) + + receive do + :continue -> + flush() + end + end) + |> Stream.run() + + GenServer.call(pid, {:done, {name, fun}}) + end) + end + + defp flush do + receive do + _ -> flush() + after + 0 -> :ok + end + end + + def handle_cast({:new_data, {name, fun}, chunk}, %__MODULE__{} = distributor) do + Enum.each(distributor.consumers, fn {{name, fun}, cons}-> + GenServer.call(cons.pid, {:put_data, chunk}) + GenServer.cast(cons.pid, :continue) + end) + + {:noreply, distributor} + end + + def handle_call({:run_inputs, streams_to_mix}, _from, %__MODULE__{} = mixer) do + in_tasks = run_inputs(streams_to_mix, mixer.pid, mixer.buffer) + + {:reply, :ok, %{mixer | running: true, streams: streams_to_mix, in_tasks: in_tasks}} + end + + def handle_call({:register_consumer, {{name, fun}, cons}}, _from, %__MODULE__{} = distributor) do + distributor = %{distributor | consumers: Map.put(distributor.consumers, {name, fun}, cons)} + {:reply, :ok, distributor} + end + + def handle_call(:stop, _from, %__MODULE__{} = mixer) do + {:stop, :normal, :ok, %{mixer | running: false}} + end + + def handle_call({:done, {name, fun}}, _from, %__MODULE__{} = distributor) do + in_tasks = Map.delete(distributor.in_tasks, {name, fun}) + distributor = %{distributor | in_tasks: in_tasks} + if map_size(distributor.in_tasks) == 0 do + Enum.each(distributor.consumers, fn {{name, fun}, cons}-> + GenServer.cast(cons.pid, :continue) + GenServer.call(cons.pid, :stop) + end) + end + {:reply, :ok, distributor} + end + + def handle_cast(:continue, %__MODULE__{} = distributor) do + Enum.each(distributor.in_tasks, fn {{name, fun}, task}-> + send(task.pid, :continue) + end) + {:noreply, distributor} + end + + def handle_call(:__state__, _from, mixer), do: {:reply, mixer, mixer} + + def handle_info({_task_ref, :ok}, mixer) do + # do nothing for now + {:noreply, mixer} + end + + def handle_info({:DOWN, _task_ref, :process, _task_pid, :normal}, mixer) do + # do nothing for now + {:noreply, mixer} + end +end diff --git a/test/distributor_test.exs b/test/distributor_test.exs new file mode 100644 index 0000000..de03873 --- /dev/null +++ b/test/distributor_test.exs @@ -0,0 +1,57 @@ +defmodule Strom.DistributorTest do + use ExUnit.Case, async: false + + alias Strom.Distributor + + # test "start and stop" do + # mixer = Mixer.start() + # assert Process.alive?(mixer.pid) + # :ok = Mixer.stop(mixer) + # refute Process.alive?(mixer.pid) + # end + + test "call" do + flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10]} + + distributor = Distributor.start() + + inputs = %{ + numbers1: fn el -> el < 5 end, + numbers2: fn el -> el > 6 end + } + + outputs = %{ + odd: fn el -> rem(el, 2) == 1 end, + even: fn el -> rem(el, 2) == 0 end + } + + flow = Distributor.call(flow, distributor, inputs, outputs) + + assert Enum.sort(Enum.to_list(flow[:odd])) == [1,3,7,9] + assert Enum.sort(Enum.to_list(flow[:even])) == [2,4,8,10] + end + + test "massive call" do + :observer.start() + flow = %{numbers1: Enum.to_list(1..100_000), numbers2: Enum.to_list(200_000..300_000)} + + distributor = Distributor.start() + + inputs = %{ + numbers1: fn el -> rem(el, 3) == 0 end, + numbers2: fn el -> rem(el, 5) == 0 end + } + + outputs = %{ + odd: fn el -> rem(el, 2) == 1 end, + even: fn el -> rem(el, 2) == 0 end + } + + flow = Distributor.call(flow, distributor, inputs, outputs) + + Enum.to_list(flow[:odd]) + |> IO.inspect + Enum.to_list(flow[:even]) + |> IO.inspect + end +end From f592036e9874b33f21bcbfd84d0ee6d20512d1b0 Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Mon, 25 Dec 2023 13:55:30 +0100 Subject: [PATCH 03/17] Generic distributor draft --- lib/cons.ex | 14 +++++---- lib/distributor.ex | 21 +++++++------ test/distributor_test.exs | 64 +++++++++++++++++++++++++++++++++------ 3 files changed, 75 insertions(+), 24 deletions(-) diff --git a/lib/cons.ex b/lib/cons.ex index 83108d9..281eddf 100644 --- a/lib/cons.ex +++ b/lib/cons.ex @@ -1,8 +1,6 @@ defmodule Strom.Cons do use GenServer - @buffer 2 - defstruct pid: nil, distributor_pid: nil, running: false, @@ -36,6 +34,7 @@ defmodule Strom.Cons do flush() end end + {data, cons} {:error, :done} -> @@ -77,26 +76,29 @@ defmodule Strom.Cons do {:reply, cons, cons} end - def handle_call({:put_data, new_data}, _from, cons) do + def handle_cast({:put_data, new_data}, cons) do {new_data, _} = Enum.split_with(new_data, cons.fun) cons = %{cons | data: cons.data ++ new_data} + if cons.client do send(cons.client, :continue) end - {:reply, cons, cons} + + {:noreply, cons} end def handle_cast(:continue, cons) do if cons.client do send(cons.client, :continue) end + {:noreply, cons} end - def handle_call(:stop, _from, cons) do + def handle_cast(:stop, cons) do cons = %{cons | running: false} - {:reply, cons, cons} + {:noreply, cons} end def handle_call(:__state__, _from, cons), do: {:reply, cons, cons} diff --git a/lib/distributor.ex b/lib/distributor.ex index 0b6cd05..e1c415d 100644 --- a/lib/distributor.ex +++ b/lib/distributor.ex @@ -1,7 +1,7 @@ defmodule Strom.Distributor do use GenServer - @buffer 2 + @buffer 1000 defstruct streams: %{}, pid: nil, @@ -74,7 +74,7 @@ defmodule Strom.Distributor do end) |> Stream.run() - GenServer.call(pid, {:done, {name, fun}}) + GenServer.cast(pid, {:done, {name, fun}}) end) end @@ -87,8 +87,8 @@ defmodule Strom.Distributor do end def handle_cast({:new_data, {name, fun}, chunk}, %__MODULE__{} = distributor) do - Enum.each(distributor.consumers, fn {{name, fun}, cons}-> - GenServer.call(cons.pid, {:put_data, chunk}) + Enum.each(distributor.consumers, fn {{name, fun}, cons} -> + GenServer.cast(cons.pid, {:put_data, chunk}) GenServer.cast(cons.pid, :continue) end) @@ -110,22 +110,25 @@ defmodule Strom.Distributor do {:stop, :normal, :ok, %{mixer | running: false}} end - def handle_call({:done, {name, fun}}, _from, %__MODULE__{} = distributor) do + def handle_cast({:done, {name, fun}}, %__MODULE__{} = distributor) do in_tasks = Map.delete(distributor.in_tasks, {name, fun}) distributor = %{distributor | in_tasks: in_tasks} + if map_size(distributor.in_tasks) == 0 do - Enum.each(distributor.consumers, fn {{name, fun}, cons}-> + Enum.each(distributor.consumers, fn {{name, fun}, cons} -> GenServer.cast(cons.pid, :continue) - GenServer.call(cons.pid, :stop) + GenServer.cast(cons.pid, :stop) end) end - {:reply, :ok, distributor} + + {:noreply, distributor} end def handle_cast(:continue, %__MODULE__{} = distributor) do - Enum.each(distributor.in_tasks, fn {{name, fun}, task}-> + Enum.each(distributor.in_tasks, fn {{name, fun}, task} -> send(task.pid, :continue) end) + {:noreply, distributor} end diff --git a/test/distributor_test.exs b/test/distributor_test.exs index de03873..ee36299 100644 --- a/test/distributor_test.exs +++ b/test/distributor_test.exs @@ -27,19 +27,24 @@ defmodule Strom.DistributorTest do flow = Distributor.call(flow, distributor, inputs, outputs) - assert Enum.sort(Enum.to_list(flow[:odd])) == [1,3,7,9] - assert Enum.sort(Enum.to_list(flow[:even])) == [2,4,8,10] + assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 3, 7, 9] + assert Enum.sort(Enum.to_list(flow[:even])) == [2, 4, 8, 10] end test "massive call" do - :observer.start() - flow = %{numbers1: Enum.to_list(1..100_000), numbers2: Enum.to_list(200_000..300_000)} + # :observer.start() + flow = %{ + numbers1: Enum.to_list(1..1_000_000), + numbers2: Enum.to_list(1..1_000_000), + numbers3: Enum.to_list(1..1_000_000) + } distributor = Distributor.start() inputs = %{ numbers1: fn el -> rem(el, 3) == 0 end, - numbers2: fn el -> rem(el, 5) == 0 end + numbers2: fn el -> rem(el, 4) == 0 end, + numbers3: fn el -> rem(el, 5) == 0 end } outputs = %{ @@ -49,9 +54,50 @@ defmodule Strom.DistributorTest do flow = Distributor.call(flow, distributor, inputs, outputs) - Enum.to_list(flow[:odd]) - |> IO.inspect - Enum.to_list(flow[:even]) - |> IO.inspect + task1 = + Task.async(fn -> + Enum.to_list(flow[:odd]) + |> IO.inspect() + end) + + task2 = + Task.async(fn -> + Enum.to_list(flow[:even]) + |> IO.inspect() + end) + + Task.await(task1, :infinity) + Task.await(task2, :infinity) + end + + test "huge files" do + :observer.start() + source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) + source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) + + sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) + sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) + + flow = + %{} + |> Strom.Source.call(source1, :source1) + |> Strom.Source.call(source2, :source2) + + distributor = Distributor.start() + + inputs = %{ + source1: fn el -> el end, + source2: fn el -> el end + } + + outputs = %{ + odd: fn el -> String.contains?(el, "ORDER_CREATED") end, + even: fn el -> String.contains?(el, "PARCEL_SHIPPED") end + } + + flow + |> Distributor.call(distributor, inputs, outputs) + |> Strom.Sink.call(sink1, [:odd]) + |> Strom.Sink.call(sink2, [:even], true) end end From bf71a0ebbe173bde1ee464440052f1020d469d99 Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Mon, 25 Dec 2023 16:57:49 +0100 Subject: [PATCH 04/17] Generic distributor draft --- lib/distributor.ex | 18 ++++- test/distributor_test.exs | 142 +++++++++++++++++++++++--------------- 2 files changed, 100 insertions(+), 60 deletions(-) diff --git a/lib/distributor.ex b/lib/distributor.ex index e1c415d..67d78c6 100644 --- a/lib/distributor.ex +++ b/lib/distributor.ex @@ -10,11 +10,13 @@ defmodule Strom.Distributor do buffer: @buffer, no_data_counter: 0, in_tasks: %{}, - consumers: %{} + consumers: %{}, + function: nil def start(opts \\ []) when is_list(opts) do state = %__MODULE__{ - buffer: Keyword.get(opts, :buffer, @buffer) + buffer: Keyword.get(opts, :buffer, @buffer), + function: Keyword.get(opts, :function, {fn el, nil -> {[el], nil} end, nil}) } {:ok, pid} = GenServer.start_link(__MODULE__, state) @@ -87,11 +89,21 @@ defmodule Strom.Distributor do end def handle_cast({:new_data, {name, fun}, chunk}, %__MODULE__{} = distributor) do + {fun, acc} = distributor.function + + {new_chunk, new_acc} = + Enum.reduce(chunk, {[], acc}, fn el, {events, acc} -> + {new_events, acc} = fun.(el, acc) + {events ++ new_events, acc} + end) + Enum.each(distributor.consumers, fn {{name, fun}, cons} -> - GenServer.cast(cons.pid, {:put_data, chunk}) + GenServer.cast(cons.pid, {:put_data, new_chunk}) GenServer.cast(cons.pid, :continue) end) + distributor = %{distributor | function: {fun, new_acc}} + {:noreply, distributor} end diff --git a/test/distributor_test.exs b/test/distributor_test.exs index ee36299..ee58de8 100644 --- a/test/distributor_test.exs +++ b/test/distributor_test.exs @@ -3,40 +3,68 @@ defmodule Strom.DistributorTest do alias Strom.Distributor - # test "start and stop" do - # mixer = Mixer.start() - # assert Process.alive?(mixer.pid) - # :ok = Mixer.stop(mixer) - # refute Process.alive?(mixer.pid) - # end + test "start and stop" do + distributor = Distributor.start() + assert Process.alive?(distributor.pid) + :ok = Distributor.stop(distributor) + refute Process.alive?(distributor.pid) + end - test "call" do - flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10]} + describe "call" do + setup do + flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10]} - distributor = Distributor.start() + inputs = %{ + numbers1: fn el -> el < 5 end, + numbers2: fn el -> el > 6 end + } - inputs = %{ - numbers1: fn el -> el < 5 end, - numbers2: fn el -> el > 6 end - } + outputs = %{ + odd: fn el -> rem(el, 2) == 1 end, + even: fn el -> rem(el, 2) == 0 end + } - outputs = %{ - odd: fn el -> rem(el, 2) == 1 end, - even: fn el -> rem(el, 2) == 0 end - } + %{flow: flow, inputs: inputs, outputs: outputs} + end - flow = Distributor.call(flow, distributor, inputs, outputs) + test "call", %{flow: flow, inputs: inputs, outputs: outputs} do + distributor = Distributor.start() + flow = Distributor.call(flow, distributor, inputs, outputs) + + assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 3, 7, 9] + assert Enum.sort(Enum.to_list(flow[:even])) == [2, 4, 8, 10] + end + + test "call with function", %{flow: flow, inputs: inputs, outputs: outputs} do + function = {fn el, nil -> {[el * el], nil} end, nil} + distributor = Distributor.start(function: function) + + flow = Distributor.call(flow, distributor, inputs, outputs) + + assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 9, 49, 81] + assert Enum.sort(Enum.to_list(flow[:even])) == [4, 16, 64, 100] + end + + test "call with function/2 and accumulator", %{flow: flow, inputs: inputs, outputs: outputs} do + function = fn el, acc -> + {[el, acc], acc + 1} + end - assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 3, 7, 9] - assert Enum.sort(Enum.to_list(flow[:even])) == [2, 4, 8, 10] + distributor = Distributor.start(function: {function, 1000}) + + flow = Distributor.call(flow, distributor, inputs, outputs) + + assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 3, 7, 9, 1001, 1003, 1005, 1007] + assert Enum.sort(Enum.to_list(flow[:even])) == [2, 4, 8, 10, 1000, 1002, 1004, 1006] + end end test "massive call" do # :observer.start() flow = %{ - numbers1: Enum.to_list(1..1_000_000), - numbers2: Enum.to_list(1..1_000_000), - numbers3: Enum.to_list(1..1_000_000) + numbers1: Enum.to_list(1..100_000), + numbers2: Enum.to_list(1..100_000), + numbers3: Enum.to_list(1..100_000) } distributor = Distributor.start() @@ -56,48 +84,48 @@ defmodule Strom.DistributorTest do task1 = Task.async(fn -> - Enum.to_list(flow[:odd]) - |> IO.inspect() + list = Enum.to_list(flow[:odd]) + assert length(list) == 26667 end) task2 = Task.async(fn -> - Enum.to_list(flow[:even]) - |> IO.inspect() + list = Enum.to_list(flow[:even]) + assert length(list) == 51666 end) Task.await(task1, :infinity) Task.await(task2, :infinity) end - test "huge files" do - :observer.start() - source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) - source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) - - sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) - sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) - - flow = - %{} - |> Strom.Source.call(source1, :source1) - |> Strom.Source.call(source2, :source2) - - distributor = Distributor.start() - - inputs = %{ - source1: fn el -> el end, - source2: fn el -> el end - } - - outputs = %{ - odd: fn el -> String.contains?(el, "ORDER_CREATED") end, - even: fn el -> String.contains?(el, "PARCEL_SHIPPED") end - } - - flow - |> Distributor.call(distributor, inputs, outputs) - |> Strom.Sink.call(sink1, [:odd]) - |> Strom.Sink.call(sink2, [:even], true) - end + # test "huge files" do + # :observer.start() + # source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) + # source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) + # + # sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) + # sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) + # + # flow = + # %{} + # |> Strom.Source.call(source1, :source1) + # |> Strom.Source.call(source2, :source2) + # + # distributor = Distributor.start() + # + # inputs = %{ + # source1: fn el -> el end, + # source2: fn el -> el end + # } + # + # outputs = %{ + # odd: fn el -> String.contains?(el, "ORDER_CREATED") end, + # even: fn el -> String.contains?(el, "PARCEL_SHIPPED") end + # } + # + # flow + # |> Distributor.call(distributor, inputs, outputs) + # |> Strom.Sink.call(sink1, [:odd]) + # |> Strom.Sink.call(sink2, [:even], true) + # end end From bfee253b85b40a9b6d3d0766efdac2279975912f Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Tue, 26 Dec 2023 08:56:12 +0100 Subject: [PATCH 05/17] Revert "Generic distributor draft" This reverts commit bf71a0ebbe173bde1ee464440052f1020d469d99. --- lib/distributor.ex | 18 +---- test/distributor_test.exs | 142 +++++++++++++++----------------------- 2 files changed, 60 insertions(+), 100 deletions(-) diff --git a/lib/distributor.ex b/lib/distributor.ex index 67d78c6..e1c415d 100644 --- a/lib/distributor.ex +++ b/lib/distributor.ex @@ -10,13 +10,11 @@ defmodule Strom.Distributor do buffer: @buffer, no_data_counter: 0, in_tasks: %{}, - consumers: %{}, - function: nil + consumers: %{} def start(opts \\ []) when is_list(opts) do state = %__MODULE__{ - buffer: Keyword.get(opts, :buffer, @buffer), - function: Keyword.get(opts, :function, {fn el, nil -> {[el], nil} end, nil}) + buffer: Keyword.get(opts, :buffer, @buffer) } {:ok, pid} = GenServer.start_link(__MODULE__, state) @@ -89,21 +87,11 @@ defmodule Strom.Distributor do end def handle_cast({:new_data, {name, fun}, chunk}, %__MODULE__{} = distributor) do - {fun, acc} = distributor.function - - {new_chunk, new_acc} = - Enum.reduce(chunk, {[], acc}, fn el, {events, acc} -> - {new_events, acc} = fun.(el, acc) - {events ++ new_events, acc} - end) - Enum.each(distributor.consumers, fn {{name, fun}, cons} -> - GenServer.cast(cons.pid, {:put_data, new_chunk}) + GenServer.cast(cons.pid, {:put_data, chunk}) GenServer.cast(cons.pid, :continue) end) - distributor = %{distributor | function: {fun, new_acc}} - {:noreply, distributor} end diff --git a/test/distributor_test.exs b/test/distributor_test.exs index ee58de8..ee36299 100644 --- a/test/distributor_test.exs +++ b/test/distributor_test.exs @@ -3,68 +3,40 @@ defmodule Strom.DistributorTest do alias Strom.Distributor - test "start and stop" do - distributor = Distributor.start() - assert Process.alive?(distributor.pid) - :ok = Distributor.stop(distributor) - refute Process.alive?(distributor.pid) - end - - describe "call" do - setup do - flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10]} - - inputs = %{ - numbers1: fn el -> el < 5 end, - numbers2: fn el -> el > 6 end - } - - outputs = %{ - odd: fn el -> rem(el, 2) == 1 end, - even: fn el -> rem(el, 2) == 0 end - } - - %{flow: flow, inputs: inputs, outputs: outputs} - end - - test "call", %{flow: flow, inputs: inputs, outputs: outputs} do - distributor = Distributor.start() - flow = Distributor.call(flow, distributor, inputs, outputs) - - assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 3, 7, 9] - assert Enum.sort(Enum.to_list(flow[:even])) == [2, 4, 8, 10] - end - - test "call with function", %{flow: flow, inputs: inputs, outputs: outputs} do - function = {fn el, nil -> {[el * el], nil} end, nil} - distributor = Distributor.start(function: function) + # test "start and stop" do + # mixer = Mixer.start() + # assert Process.alive?(mixer.pid) + # :ok = Mixer.stop(mixer) + # refute Process.alive?(mixer.pid) + # end - flow = Distributor.call(flow, distributor, inputs, outputs) + test "call" do + flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10]} - assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 9, 49, 81] - assert Enum.sort(Enum.to_list(flow[:even])) == [4, 16, 64, 100] - end + distributor = Distributor.start() - test "call with function/2 and accumulator", %{flow: flow, inputs: inputs, outputs: outputs} do - function = fn el, acc -> - {[el, acc], acc + 1} - end + inputs = %{ + numbers1: fn el -> el < 5 end, + numbers2: fn el -> el > 6 end + } - distributor = Distributor.start(function: {function, 1000}) + outputs = %{ + odd: fn el -> rem(el, 2) == 1 end, + even: fn el -> rem(el, 2) == 0 end + } - flow = Distributor.call(flow, distributor, inputs, outputs) + flow = Distributor.call(flow, distributor, inputs, outputs) - assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 3, 7, 9, 1001, 1003, 1005, 1007] - assert Enum.sort(Enum.to_list(flow[:even])) == [2, 4, 8, 10, 1000, 1002, 1004, 1006] - end + assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 3, 7, 9] + assert Enum.sort(Enum.to_list(flow[:even])) == [2, 4, 8, 10] end test "massive call" do # :observer.start() flow = %{ - numbers1: Enum.to_list(1..100_000), - numbers2: Enum.to_list(1..100_000), - numbers3: Enum.to_list(1..100_000) + numbers1: Enum.to_list(1..1_000_000), + numbers2: Enum.to_list(1..1_000_000), + numbers3: Enum.to_list(1..1_000_000) } distributor = Distributor.start() @@ -84,48 +56,48 @@ defmodule Strom.DistributorTest do task1 = Task.async(fn -> - list = Enum.to_list(flow[:odd]) - assert length(list) == 26667 + Enum.to_list(flow[:odd]) + |> IO.inspect() end) task2 = Task.async(fn -> - list = Enum.to_list(flow[:even]) - assert length(list) == 51666 + Enum.to_list(flow[:even]) + |> IO.inspect() end) Task.await(task1, :infinity) Task.await(task2, :infinity) end - # test "huge files" do - # :observer.start() - # source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) - # source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) - # - # sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) - # sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) - # - # flow = - # %{} - # |> Strom.Source.call(source1, :source1) - # |> Strom.Source.call(source2, :source2) - # - # distributor = Distributor.start() - # - # inputs = %{ - # source1: fn el -> el end, - # source2: fn el -> el end - # } - # - # outputs = %{ - # odd: fn el -> String.contains?(el, "ORDER_CREATED") end, - # even: fn el -> String.contains?(el, "PARCEL_SHIPPED") end - # } - # - # flow - # |> Distributor.call(distributor, inputs, outputs) - # |> Strom.Sink.call(sink1, [:odd]) - # |> Strom.Sink.call(sink2, [:even], true) - # end + test "huge files" do + :observer.start() + source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) + source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) + + sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) + sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) + + flow = + %{} + |> Strom.Source.call(source1, :source1) + |> Strom.Source.call(source2, :source2) + + distributor = Distributor.start() + + inputs = %{ + source1: fn el -> el end, + source2: fn el -> el end + } + + outputs = %{ + odd: fn el -> String.contains?(el, "ORDER_CREATED") end, + even: fn el -> String.contains?(el, "PARCEL_SHIPPED") end + } + + flow + |> Distributor.call(distributor, inputs, outputs) + |> Strom.Sink.call(sink1, [:odd]) + |> Strom.Sink.call(sink2, [:even], true) + end end From 53d8becba92439799100701e81b23b82bbd3e765 Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Tue, 26 Dec 2023 09:23:35 +0100 Subject: [PATCH 06/17] GenMix and its consumer --- lib/{distributor.ex => gen_mix.ex} | 75 ++++++++++--------- lib/{cons.ex => gen_mix/consumer.ex} | 14 ++-- test/distributor_test.exs | 103 --------------------------- test/gen_mix_test.exs | 103 +++++++++++++++++++++++++++ 4 files changed, 145 insertions(+), 150 deletions(-) rename lib/{distributor.ex => gen_mix.ex} (56%) rename lib/{cons.ex => gen_mix/consumer.ex} (84%) delete mode 100644 test/distributor_test.exs create mode 100644 test/gen_mix_test.exs diff --git a/lib/distributor.ex b/lib/gen_mix.ex similarity index 56% rename from lib/distributor.ex rename to lib/gen_mix.ex index e1c415d..4275c14 100644 --- a/lib/distributor.ex +++ b/lib/gen_mix.ex @@ -1,17 +1,17 @@ -defmodule Strom.Distributor do +defmodule Strom.GenMix do use GenServer @buffer 1000 - defstruct streams: %{}, - pid: nil, + defstruct pid: nil, running: false, - data: %{}, buffer: @buffer, - no_data_counter: 0, - in_tasks: %{}, + producers: %{}, consumers: %{} + alias Strom.GenMix.Consumer + + # TODO supervisor def start(opts \\ []) when is_list(opts) do state = %__MODULE__{ buffer: Keyword.get(opts, :buffer, @buffer) @@ -21,11 +21,11 @@ defmodule Strom.Distributor do __state__(pid) end - def init(%__MODULE__{} = distributor) do - {:ok, %{distributor | pid: self()}} + def init(%__MODULE__{} = mix) do + {:ok, %{mix | pid: self()}} end - def call(flow, %__MODULE__{} = distributor, inputs, outputs) + def call(flow, %__MODULE__{} = mix, inputs, outputs) when is_map(flow) and is_map(inputs) and is_map(outputs) do input_streams = Enum.reduce(inputs, %{}, fn {name, fun}, acc -> @@ -35,13 +35,13 @@ defmodule Strom.Distributor do sub_flow = outputs |> Enum.reduce(%{}, fn {name, fun}, flow -> - cons = Strom.Cons.start({name, fun}, distributor.pid) - :ok = GenServer.call(distributor.pid, {:register_consumer, {{name, fun}, cons}}) - stream = Strom.Cons.call(cons) + consumer = Consumer.start({name, fun}, mix.pid) + :ok = GenServer.call(mix.pid, {:register_consumer, {{name, fun}, consumer}}) + stream = Consumer.call(consumer) Map.put(flow, name, stream) end) - :ok = GenServer.call(distributor.pid, {:run_inputs, input_streams}) + :ok = GenServer.call(mix.pid, {:run_inputs, input_streams}) flow |> Map.drop(Map.keys(inputs)) @@ -86,61 +86,60 @@ defmodule Strom.Distributor do end end - def handle_cast({:new_data, {name, fun}, chunk}, %__MODULE__{} = distributor) do - Enum.each(distributor.consumers, fn {{name, fun}, cons} -> + def handle_cast({:new_data, {name, fun}, chunk}, %__MODULE__{} = mix) do + Enum.each(mix.consumers, fn {{name, fun}, cons} -> GenServer.cast(cons.pid, {:put_data, chunk}) GenServer.cast(cons.pid, :continue) end) - {:noreply, distributor} + {:noreply, mix} end - def handle_call({:run_inputs, streams_to_mix}, _from, %__MODULE__{} = mixer) do - in_tasks = run_inputs(streams_to_mix, mixer.pid, mixer.buffer) + def handle_call({:run_inputs, streams_to_mix}, _from, %__MODULE__{} = mix) do + producers = run_inputs(streams_to_mix, mix.pid, mix.buffer) - {:reply, :ok, %{mixer | running: true, streams: streams_to_mix, in_tasks: in_tasks}} + {:reply, :ok, %{mix | running: true, producers: producers}} end - def handle_call({:register_consumer, {{name, fun}, cons}}, _from, %__MODULE__{} = distributor) do - distributor = %{distributor | consumers: Map.put(distributor.consumers, {name, fun}, cons)} - {:reply, :ok, distributor} + def handle_call({:register_consumer, {{name, fun}, cons}}, _from, %__MODULE__{} = mix) do + mix = %{mix | consumers: Map.put(mix.consumers, {name, fun}, cons)} + {:reply, :ok, mix} end - def handle_call(:stop, _from, %__MODULE__{} = mixer) do - {:stop, :normal, :ok, %{mixer | running: false}} + def handle_call(:stop, _from, %__MODULE__{} = mix) do + {:stop, :normal, :ok, %{mix | running: false}} end - def handle_cast({:done, {name, fun}}, %__MODULE__{} = distributor) do - in_tasks = Map.delete(distributor.in_tasks, {name, fun}) - distributor = %{distributor | in_tasks: in_tasks} + def handle_cast({:done, {name, fun}}, %__MODULE__{} = mix) do + mix = %{mix | producers: Map.delete(mix.producers, {name, fun})} - if map_size(distributor.in_tasks) == 0 do - Enum.each(distributor.consumers, fn {{name, fun}, cons} -> + if map_size(mix.producers) == 0 do + Enum.each(mix.consumers, fn {{name, fun}, cons} -> GenServer.cast(cons.pid, :continue) GenServer.cast(cons.pid, :stop) end) end - {:noreply, distributor} + {:noreply, mix} end - def handle_cast(:continue, %__MODULE__{} = distributor) do - Enum.each(distributor.in_tasks, fn {{name, fun}, task} -> + def handle_cast({:consumer_got_data, {name, fun}}, %__MODULE__{} = mix) do + Enum.each(mix.producers, fn {{name, fun}, task} -> send(task.pid, :continue) end) - {:noreply, distributor} + {:noreply, mix} end - def handle_call(:__state__, _from, mixer), do: {:reply, mixer, mixer} + def handle_call(:__state__, _from, mix), do: {:reply, mix, mix} - def handle_info({_task_ref, :ok}, mixer) do + def handle_info({_task_ref, :ok}, mix) do # do nothing for now - {:noreply, mixer} + {:noreply, mix} end - def handle_info({:DOWN, _task_ref, :process, _task_pid, :normal}, mixer) do + def handle_info({:DOWN, _task_ref, :process, _task_pid, :normal}, mix) do # do nothing for now - {:noreply, mixer} + {:noreply, mix} end end diff --git a/lib/cons.ex b/lib/gen_mix/consumer.ex similarity index 84% rename from lib/cons.ex rename to lib/gen_mix/consumer.ex index 281eddf..5586cdc 100644 --- a/lib/cons.ex +++ b/lib/gen_mix/consumer.ex @@ -1,16 +1,16 @@ -defmodule Strom.Cons do +defmodule Strom.GenMix.Consumer do use GenServer defstruct pid: nil, - distributor_pid: nil, + mix_pid: nil, running: false, client: nil, name: nil, fun: nil, data: [] - def start({name, fun}, distributor_pid, opts \\ []) when is_list(opts) do - state = %__MODULE__{distributor_pid: distributor_pid, name: name, fun: fun, running: true} + def start({name, fun}, mix_pid, opts \\ []) when is_list(opts) do + state = %__MODULE__{mix_pid: mix_pid, name: name, fun: fun, running: true} {:ok, pid} = GenServer.start_link(__MODULE__, state) __state__(pid) @@ -65,7 +65,7 @@ defmodule Strom.Cons do else data = cons.data cons = %{cons | data: []} - GenServer.cast(cons.distributor_pid, :continue) + GenServer.cast(cons.mix_pid, {:consumer_got_data, {cons.name, cons.fun}}) {:reply, {:ok, data}, cons} end end @@ -80,10 +80,6 @@ defmodule Strom.Cons do {new_data, _} = Enum.split_with(new_data, cons.fun) cons = %{cons | data: cons.data ++ new_data} - if cons.client do - send(cons.client, :continue) - end - {:noreply, cons} end diff --git a/test/distributor_test.exs b/test/distributor_test.exs deleted file mode 100644 index ee36299..0000000 --- a/test/distributor_test.exs +++ /dev/null @@ -1,103 +0,0 @@ -defmodule Strom.DistributorTest do - use ExUnit.Case, async: false - - alias Strom.Distributor - - # test "start and stop" do - # mixer = Mixer.start() - # assert Process.alive?(mixer.pid) - # :ok = Mixer.stop(mixer) - # refute Process.alive?(mixer.pid) - # end - - test "call" do - flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10]} - - distributor = Distributor.start() - - inputs = %{ - numbers1: fn el -> el < 5 end, - numbers2: fn el -> el > 6 end - } - - outputs = %{ - odd: fn el -> rem(el, 2) == 1 end, - even: fn el -> rem(el, 2) == 0 end - } - - flow = Distributor.call(flow, distributor, inputs, outputs) - - assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 3, 7, 9] - assert Enum.sort(Enum.to_list(flow[:even])) == [2, 4, 8, 10] - end - - test "massive call" do - # :observer.start() - flow = %{ - numbers1: Enum.to_list(1..1_000_000), - numbers2: Enum.to_list(1..1_000_000), - numbers3: Enum.to_list(1..1_000_000) - } - - distributor = Distributor.start() - - inputs = %{ - numbers1: fn el -> rem(el, 3) == 0 end, - numbers2: fn el -> rem(el, 4) == 0 end, - numbers3: fn el -> rem(el, 5) == 0 end - } - - outputs = %{ - odd: fn el -> rem(el, 2) == 1 end, - even: fn el -> rem(el, 2) == 0 end - } - - flow = Distributor.call(flow, distributor, inputs, outputs) - - task1 = - Task.async(fn -> - Enum.to_list(flow[:odd]) - |> IO.inspect() - end) - - task2 = - Task.async(fn -> - Enum.to_list(flow[:even]) - |> IO.inspect() - end) - - Task.await(task1, :infinity) - Task.await(task2, :infinity) - end - - test "huge files" do - :observer.start() - source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) - source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) - - sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) - sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) - - flow = - %{} - |> Strom.Source.call(source1, :source1) - |> Strom.Source.call(source2, :source2) - - distributor = Distributor.start() - - inputs = %{ - source1: fn el -> el end, - source2: fn el -> el end - } - - outputs = %{ - odd: fn el -> String.contains?(el, "ORDER_CREATED") end, - even: fn el -> String.contains?(el, "PARCEL_SHIPPED") end - } - - flow - |> Distributor.call(distributor, inputs, outputs) - |> Strom.Sink.call(sink1, [:odd]) - |> Strom.Sink.call(sink2, [:even], true) - end -end diff --git a/test/gen_mix_test.exs b/test/gen_mix_test.exs new file mode 100644 index 0000000..a6c8148 --- /dev/null +++ b/test/gen_mix_test.exs @@ -0,0 +1,103 @@ +defmodule Strom.GenMixTest do + use ExUnit.Case, async: false + + alias Strom.GenMix + + test "start and stop" do + mix = GenMix.start() + assert Process.alive?(mix.pid) + :ok = GenMix.stop(mix) + refute Process.alive?(mix.pid) + end + + test "call" do + flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10]} + + mix = GenMix.start() + + inputs = %{ + numbers1: fn el -> el < 5 end, + numbers2: fn el -> el > 6 end + } + + outputs = %{ + odd: fn el -> rem(el, 2) == 1 end, + even: fn el -> rem(el, 2) == 0 end + } + + flow = GenMix.call(flow, mix, inputs, outputs) + + assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 3, 7, 9] + assert Enum.sort(Enum.to_list(flow[:even])) == [2, 4, 8, 10] + end + + test "massive call" do + # :observer.start() + flow = %{ + numbers1: Enum.to_list(1..100_000), + numbers2: Enum.to_list(1..100_000), + numbers3: Enum.to_list(1..100_000) + } + + mix = GenMix.start() + + inputs = %{ + numbers1: fn el -> rem(el, 3) == 0 end, + numbers2: fn el -> rem(el, 4) == 0 end, + numbers3: fn el -> rem(el, 5) == 0 end + } + + outputs = %{ + odd: fn el -> rem(el, 2) == 1 end, + even: fn el -> rem(el, 2) == 0 end + } + + flow = GenMix.call(flow, mix, inputs, outputs) + + task1 = + Task.async(fn -> + list = Enum.to_list(flow[:odd]) + assert length(list) == 26667 + end) + + task2 = + Task.async(fn -> + list = Enum.to_list(flow[:even]) + assert length(list) == 51666 + end) + + Task.await(task1, :infinity) + Task.await(task2, :infinity) + end + +# test "huge files" do +# :observer.start() +# source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) +# source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) +# +# sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) +# sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) +# +# flow = +# %{} +# |> Strom.Source.call(source1, :source1) +# |> Strom.Source.call(source2, :source2) +# +# mix = GenMix.start() +# +# inputs = %{ +# source1: fn el -> el end, +# source2: fn el -> el end +# } +# +# outputs = %{ +# odd: fn el -> String.contains?(el, "ORDER_CREATED") end, +# even: fn el -> String.contains?(el, "PARCEL_SHIPPED") end +# } +# +# flow +# |> GenMix.call(mix, inputs, outputs) +# |> Strom.Sink.call(sink1, [:odd]) +# |> Strom.Sink.call(sink2, [:even], true) +# end +end From a280dc40383cd9f2cb9d45151a236810c970159f Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Tue, 26 Dec 2023 10:32:02 +0100 Subject: [PATCH 07/17] Mixer and Splitter use GenMix --- lib/gen_mix.ex | 34 ++++---- lib/gen_mix/consumer.ex | 9 +- lib/mixer.ex | 184 +++------------------------------------- lib/splitter.ex | 181 +++------------------------------------ test/gen_mix_test.exs | 75 ++++++++-------- 5 files changed, 83 insertions(+), 400 deletions(-) diff --git a/lib/gen_mix.ex b/lib/gen_mix.ex index 4275c14..0d3b3cf 100644 --- a/lib/gen_mix.ex +++ b/lib/gen_mix.ex @@ -21,17 +21,18 @@ defmodule Strom.GenMix do __state__(pid) end + @impl true def init(%__MODULE__{} = mix) do {:ok, %{mix | pid: self()}} end def call(flow, %__MODULE__{} = mix, inputs, outputs) when is_map(flow) and is_map(inputs) and is_map(outputs) do + input_streams = Enum.reduce(inputs, %{}, fn {name, fun}, acc -> Map.put(acc, {name, fun}, Map.fetch!(flow, name)) end) - sub_flow = outputs |> Enum.reduce(%{}, fn {name, fun}, flow -> @@ -86,15 +87,7 @@ defmodule Strom.GenMix do end end - def handle_cast({:new_data, {name, fun}, chunk}, %__MODULE__{} = mix) do - Enum.each(mix.consumers, fn {{name, fun}, cons} -> - GenServer.cast(cons.pid, {:put_data, chunk}) - GenServer.cast(cons.pid, :continue) - end) - - {:noreply, mix} - end - + @impl true def handle_call({:run_inputs, streams_to_mix}, _from, %__MODULE__{} = mix) do producers = run_inputs(streams_to_mix, mix.pid, mix.buffer) @@ -110,11 +103,23 @@ defmodule Strom.GenMix do {:stop, :normal, :ok, %{mix | running: false}} end + def handle_call(:__state__, _from, mix), do: {:reply, mix, mix} + + @impl true + def handle_cast({:new_data, {_name, _fun}, chunk}, %__MODULE__{} = mix) do + Enum.each(mix.consumers, fn {_, cons} -> + GenServer.cast(cons.pid, {:put_data, chunk}) + GenServer.cast(cons.pid, :continue) + end) + + {:noreply, mix} + end + def handle_cast({:done, {name, fun}}, %__MODULE__{} = mix) do mix = %{mix | producers: Map.delete(mix.producers, {name, fun})} if map_size(mix.producers) == 0 do - Enum.each(mix.consumers, fn {{name, fun}, cons} -> + Enum.each(mix.consumers, fn {_, cons} -> GenServer.cast(cons.pid, :continue) GenServer.cast(cons.pid, :stop) end) @@ -123,16 +128,15 @@ defmodule Strom.GenMix do {:noreply, mix} end - def handle_cast({:consumer_got_data, {name, fun}}, %__MODULE__{} = mix) do - Enum.each(mix.producers, fn {{name, fun}, task} -> + def handle_cast({:consumer_got_data, {_name, _fun}}, %__MODULE__{} = mix) do + Enum.each(mix.producers, fn {_, task} -> send(task.pid, :continue) end) {:noreply, mix} end - def handle_call(:__state__, _from, mix), do: {:reply, mix, mix} - + @impl true def handle_info({_task_ref, :ok}, mix) do # do nothing for now {:noreply, mix} diff --git a/lib/gen_mix/consumer.ex b/lib/gen_mix/consumer.ex index 5586cdc..2018c73 100644 --- a/lib/gen_mix/consumer.ex +++ b/lib/gen_mix/consumer.ex @@ -16,6 +16,7 @@ defmodule Strom.GenMix.Consumer do __state__(pid) end + @impl true def init(%__MODULE__{} = cons) do {:ok, %{cons | pid: self()}} end @@ -59,6 +60,7 @@ defmodule Strom.GenMix.Consumer do def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) + @impl true def handle_call(:get_data, _from, cons) do if length(cons.data) == 0 and !cons.running do {:reply, {:error, :done}, cons} @@ -70,12 +72,15 @@ defmodule Strom.GenMix.Consumer do end end - def handle_call(:register_client, {pid, ref}, cons) do + def handle_call(:register_client, {pid, _ref}, cons) do cons = %{cons | client: pid} {:reply, cons, cons} end + def handle_call(:__state__, _from, cons), do: {:reply, cons, cons} + + @impl true def handle_cast({:put_data, new_data}, cons) do {new_data, _} = Enum.split_with(new_data, cons.fun) cons = %{cons | data: cons.data ++ new_data} @@ -96,6 +101,4 @@ defmodule Strom.GenMix.Consumer do {:noreply, cons} end - - def handle_call(:__state__, _from, cons), do: {:reply, cons, cons} end diff --git a/lib/mixer.ex b/lib/mixer.ex index 5925f4c..e6ed8a0 100644 --- a/lib/mixer.ex +++ b/lib/mixer.ex @@ -1,189 +1,25 @@ defmodule Strom.Mixer do - use GenServer - - @buffer 1000 - - defstruct streams: %{}, - pid: nil, - running: false, - data: %{}, - buffer: @buffer, - no_data_counter: 0, - tasks: %{}, - consumer: nil - + alias Strom.GenMix def start(opts \\ []) when is_list(opts) do - state = %__MODULE__{ - buffer: Keyword.get(opts, :buffer, @buffer) - } - - {:ok, pid} = GenServer.start_link(__MODULE__, state) - __state__(pid) - end - - def init(%__MODULE__{} = mixer) do - {:ok, %{mixer | pid: self()}} + GenMix.start(opts) end - def call(flow, %__MODULE__{} = mixer, to_mix, name) when is_map(flow) and is_list(to_mix) do - to_mix = + def call(flow, %GenMix{} = mix, to_mix, name) when is_map(flow) and is_list(to_mix) do + inputs = Enum.reduce(to_mix, %{}, fn name, acc -> Map.put(acc, name, fn _el -> true end) end) - call(flow, mixer, to_mix, name) - end - - def call(flow, %__MODULE__{} = mixer, to_mix, name) when is_map(flow) and is_map(to_mix) do - streams_to_mix = - Enum.reduce(to_mix, %{}, fn {name, fun}, acc -> - Map.put(acc, {name, fun}, Map.fetch!(flow, name)) - end) - - :ok = GenServer.call(mixer.pid, {:run_streams, streams_to_mix}) - - new_stream = - Stream.resource( - fn -> GenServer.call(mixer.pid, {:register_consumer, self()}) end, - fn mixer -> - - case GenServer.call(mixer.pid, :get_data) do - {:ok, {data, no_data_counter}} -> -## maybe_wait(no_data_counter, 0) -# if rem(no_data_counter, 10) == 9 do - if length(data) == 0 do - IO.inspect(no_data_counter, label: "no_data_counter_mixer: #{name}") - receive do - :continue -> - flush() - end - end - {data, mixer} - - {:error, :done} -> - {:halt, mixer} - end - end, - fn mixer -> mixer end - ) - - flow - |> Map.drop(Map.keys(to_mix)) - |> Map.put(name, new_stream) - end - - def stop(%__MODULE__{pid: pid}), do: GenServer.call(pid, :stop) - - def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) - - defp run_streams(streams, pid, buffer) do - Enum.reduce(streams, %{}, fn {{name, fun}, stream}, acc -> - task = async_run_stream({name, fun}, stream, buffer, pid) - Map.put(acc, {name, fun}, task) - end) - end - - defp async_run_stream({name, fun}, stream, buffer, pid) do - Task.async(fn -> - stream - |> Stream.chunk_every(buffer) - |> Stream.each(fn chunk -> - {chunk, _} = Enum.split_with(chunk, fun) - GenServer.cast(pid, {:new_data, {name, fun}, chunk}) - receive do - :continue -> - flush() - end - end) - |> Stream.run() - - GenServer.call(pid, {:done, {name, fun}}) - end) - |> IO.inspect(label: "mixer task") - end - - defp flush do - receive do - _ -> flush() - after - 0 -> :ok - end - end - - defp maybe_wait(current, allowed) do - if current > allowed do - diff = current - allowed - to_sleep = trunc(:math.pow(2, diff)) - Process.sleep(to_sleep) - to_sleep - end - end - - def handle_cast({:new_data, {name, fun}, chunk}, %__MODULE__{data: prev_data} = mixer) do - if mixer.consumer, do: send(mixer.consumer, :continue) - prev_data_from_stream = Map.get(prev_data, {name, fun}, []) - data_from_stream = prev_data_from_stream ++ chunk - - data = Map.put(prev_data, {name, fun}, data_from_stream) + outputs = %{name => fn _el -> true end} - {:noreply, %{mixer | data: data}} + GenMix.call(flow, mix, inputs, outputs) end - def handle_call({:run_streams, streams_to_mix}, _from, %__MODULE__{} = mixer) do - tasks = run_streams(streams_to_mix, mixer.pid, mixer.buffer) - - {:reply, :ok, %{mixer | running: true, streams: streams_to_mix, tasks: tasks}} - end - - def handle_call({:done, {name, fun}}, _from, %__MODULE__{streams: streams} = mixer) do - streams = Map.delete(streams, {name, fun}) - {:reply, :ok, %{mixer | streams: streams, running: false}} + def call(flow, %GenMix{} = mix, to_mix, name) when is_map(flow) and is_map(to_mix) do + outputs = %{name => fn _el -> true end} + GenMix.call(flow, mix, to_mix, outputs) end - def handle_call(:get_data, _from, %__MODULE__{data: data, streams: streams} = mixer) do - all_data = Enum.reduce(data, [], fn {_, d}, acc -> acc ++ d end) - - mixer.tasks - |> Enum.shuffle() - |> Enum.each(fn {{name, fun}, task} -> - send(task.pid, :continue) - end) - - if length(all_data) == 0 && map_size(streams) == 0 do - {:reply, {:error, :done}, mixer} - else - data = Enum.reduce(data, %{}, fn {name, _}, acc -> Map.put(acc, name, []) end) - no_data_counter = if length(all_data) == 0, do: mixer.no_data_counter + 1, else: 0 - - mixer = %{ - mixer - | data: data, - no_data_counter: no_data_counter - } - - {:reply, {:ok, {all_data, no_data_counter}}, mixer} - end - end - - def handle_call({:register_consumer, pid},_from,%__MODULE__{consumer: consumer} = mixer) do - mixer = %{mixer | consumer: pid} - {:reply, mixer, mixer} - end - - def handle_call(:stop, _from, %__MODULE__{} = mixer) do - {:stop, :normal, :ok, %{mixer | running: false}} - end - - def handle_call(:__state__, _from, mixer), do: {:reply, mixer, mixer} - - def handle_info({_task_ref, :ok}, mixer) do - # do nothing for now - {:noreply, mixer} - end - - def handle_info({:DOWN, _task_ref, :process, _task_pid, :normal}, mixer) do - # do nothing for now - {:noreply, mixer} - end + def stop(%GenMix{} = mix), do: GenMix.stop(mix) end diff --git a/lib/splitter.ex b/lib/splitter.ex index 1cbc389..88cda6d 100644 --- a/lib/splitter.ex +++ b/lib/splitter.ex @@ -1,186 +1,25 @@ defmodule Strom.Splitter do - use GenServer - - @buffer 1000 - - defstruct pid: nil, - stream: nil, - partitions: %{}, - running: false, - buffer: @buffer, - no_data_counter: 0, - task: nil, - consumers: [] + alias Strom.GenMix def start(opts \\ []) when is_list(opts) do - state = %__MODULE__{ - buffer: Keyword.get(opts, :buffer, @buffer) - } - - {:ok, pid} = GenServer.start_link(__MODULE__, state) - __state__(pid) + GenMix.start(opts) end - def init(%__MODULE__{} = splitter) do - {:ok, %{splitter | pid: self()}} - end + def call(flow, %GenMix{} = mix, name, partitions) when is_list(partitions) do + inputs = %{name => fn _el -> true end} - def call(flow, %__MODULE__{} = splitter, name, partitions) when is_list(partitions) do - partitions = + outputs = Enum.reduce(partitions, %{}, fn name, acc -> Map.put(acc, name, fn _el -> true end) end) - call(flow, splitter, name, partitions) - end - - def call(flow, %__MODULE__{} = splitter, name, partitions) when is_map(partitions) do - GenServer.call(splitter.pid, {:set_partitions, partitions}) - stream_to_run = Map.fetch!(flow, name) - - task = GenServer.call(splitter.pid, {:run_stream, stream_to_run}) - - sub_flow = - partitions - |> Enum.reduce(%{}, fn {name, fun}, flow -> - stream = - Stream.resource( - fn -> - GenServer.call(splitter.pid, {:register_consumer, self()}) - |> IO.inspect - end, - fn splitter -> - case GenServer.call(splitter.pid, {:get_data, {name, fun}}) do - {:ok, {data, no_data_counter}} -> -# if rem(no_data_counter, 10) == 9 do - if length(data) == 0 do -# Process.sleep(1) - IO.inspect(no_data_counter, label: "no_data_counter_splitter: #{name}") - receive do - :continue -> - flush() - end - end - {data, splitter} - - {:error, :done} -> - {:halt, splitter} - end - end, - fn splitter -> splitter end - ) - - Map.put(flow, name, stream) - end) - - flow - |> Map.delete(name) - |> Map.merge(sub_flow) + GenMix.call(flow, mix, inputs, outputs) end - def stop(%__MODULE__{pid: pid}), do: GenServer.call(pid, :stop) - - def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) - - defp async_run_stream(stream, buffer, pid) do - Task.async(fn -> - stream - |> Stream.chunk_every(buffer) - |> Stream.each(fn chunk -> - GenServer.cast(pid, {:new_data, chunk}) - receive do - :continue -> - flush() - end - end) - |> Stream.run() - - GenServer.call(pid, :done) - end) - |> IO.inspect(label: "slitter task") + def call(flow, %GenMix{} = mix, name, partitions) when is_map(partitions) do + inputs = %{name => fn _el -> true end} + GenMix.call(flow, mix, inputs, partitions) end - defp flush do - receive do - _ -> flush() - after - 0 -> :ok - end - end - - def handle_cast({:new_data, data}, %__MODULE__{} = splitter) do - new_partitions = - Enum.reduce(splitter.partitions, %{}, fn {{name, fun}, prev_data}, acc -> - {valid_data, _} = Enum.split_with(data, fun) - new_data = prev_data ++ valid_data - Map.put(acc, {name, fun}, new_data) - end) - - splitter.consumers - |> Enum.shuffle() - |> Enum.each(&send(&1, :continue)) - - {:noreply, %{splitter | partitions: new_partitions}} - end - - def handle_call({:run_stream, stream}, _from, %__MODULE__{} = splitter) do - task = async_run_stream(stream, splitter.buffer, splitter.pid) - {:reply, :ok, %{splitter | running: true, task: task}} - end - - def handle_call({:set_partitions, partitions}, _from, %__MODULE__{} = splitter) do - partitions = - Enum.reduce(partitions, %{}, fn {name, fun}, acc -> Map.put(acc, {name, fun}, []) end) - - splitter = %{splitter | partitions: partitions} - {:reply, splitter, splitter} - end - - def handle_call(:done, _from, %__MODULE__{} = splitter) do - {:reply, :ok, %{splitter | running: false}} - end - - def handle_call({:register_consumer, pid},_from,%__MODULE__{consumers: consumers} = splitter) do - splitter = %{splitter | consumers: [pid | consumers]} - {:reply, splitter, splitter} - end - - def handle_call( - {:get_data, partition_fun}, - _from, - %__MODULE__{partitions: partitions, running: running} = splitter - ) do - send(splitter.task.pid, :continue) - - data = Map.get(partitions, partition_fun) - if length(data) == 0 && !running do - {:reply, {:error, :done}, splitter} - else - no_data_counter = if length(data) == 0, do: splitter.no_data_counter + 1, else: 0 - - splitter = %{ - splitter - | partitions: Map.put(partitions, partition_fun, []), - no_data_counter: no_data_counter - } - - {:reply, {:ok, {data, no_data_counter}}, splitter} - end - end - - def handle_call(:stop, _from, %__MODULE__{} = splitter) do - {:stop, :normal, :ok, %{splitter | running: false, partitions: %{}}} - end - - def handle_call(:__state__, _from, splitter), do: {:reply, splitter, splitter} - - def handle_info({_task_ref, :ok}, splitter) do - # do nothing for now - {:noreply, splitter} - end - - def handle_info({:DOWN, _task_ref, :process, _task_pid, :normal}, splitter) do - # do nothing for now - {:noreply, splitter} - end + def stop(%GenMix{} = mix), do: GenMix.stop(mix) end diff --git a/test/gen_mix_test.exs b/test/gen_mix_test.exs index a6c8148..ea30077 100644 --- a/test/gen_mix_test.exs +++ b/test/gen_mix_test.exs @@ -3,15 +3,15 @@ defmodule Strom.GenMixTest do alias Strom.GenMix - test "start and stop" do - mix = GenMix.start() - assert Process.alive?(mix.pid) - :ok = GenMix.stop(mix) - refute Process.alive?(mix.pid) - end + test "start and stop" do + mix = GenMix.start() + assert Process.alive?(mix.pid) + :ok = GenMix.stop(mix) + refute Process.alive?(mix.pid) + end test "call" do - flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10]} + flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} mix = GenMix.start() @@ -29,6 +29,7 @@ defmodule Strom.GenMixTest do assert Enum.sort(Enum.to_list(flow[:odd])) == [1, 3, 7, 9] assert Enum.sort(Enum.to_list(flow[:even])) == [2, 4, 8, 10] + assert Enum.sort(Enum.to_list(flow[:numbers3])) == [0, 0, 0, 0, 0] end test "massive call" do @@ -70,34 +71,34 @@ defmodule Strom.GenMixTest do Task.await(task2, :infinity) end -# test "huge files" do -# :observer.start() -# source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) -# source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) -# -# sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) -# sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) -# -# flow = -# %{} -# |> Strom.Source.call(source1, :source1) -# |> Strom.Source.call(source2, :source2) -# -# mix = GenMix.start() -# -# inputs = %{ -# source1: fn el -> el end, -# source2: fn el -> el end -# } -# -# outputs = %{ -# odd: fn el -> String.contains?(el, "ORDER_CREATED") end, -# even: fn el -> String.contains?(el, "PARCEL_SHIPPED") end -# } -# -# flow -# |> GenMix.call(mix, inputs, outputs) -# |> Strom.Sink.call(sink1, [:odd]) -# |> Strom.Sink.call(sink2, [:even], true) -# end + # test "huge files" do + # :observer.start() + # source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) + # source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) + # + # sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) + # sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) + # + # flow = + # %{} + # |> Strom.Source.call(source1, :source1) + # |> Strom.Source.call(source2, :source2) + # + # mix = GenMix.start() + # + # inputs = %{ + # source1: fn el -> el end, + # source2: fn el -> el end + # } + # + # outputs = %{ + # odd: fn el -> String.contains?(el, "ORDER_CREATED") end, + # even: fn el -> String.contains?(el, "PARCEL_SHIPPED") end + # } + # + # flow + # |> GenMix.call(mix, inputs, outputs) + # |> Strom.Sink.call(sink1, [:odd]) + # |> Strom.Sink.call(sink2, [:even], true) + # end end From ca2f4146a2bebc08518192c41bcb1f49142740f8 Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Tue, 26 Dec 2023 14:26:07 +0100 Subject: [PATCH 08/17] GenCall draft --- lib/gen_call.ex | 163 +++++++++++++++++++++++++++++++++++++++++ test/gen_call_test.exs | 40 ++++++++++ 2 files changed, 203 insertions(+) create mode 100644 lib/gen_call.ex create mode 100644 test/gen_call_test.exs diff --git a/lib/gen_call.ex b/lib/gen_call.ex new file mode 100644 index 0000000..c4bdc68 --- /dev/null +++ b/lib/gen_call.ex @@ -0,0 +1,163 @@ +defmodule Strom.GenCall do + use GenServer + + @buffer 2 + + defstruct pid: nil, + running: false, + buffer: @buffer, + function: nil, + tasks: %{}, + data: %{} + + # TODO supervisor + def start(opts \\ []) when is_list(opts) do + state = %__MODULE__{ + buffer: Keyword.get(opts, :buffer, @buffer) + } + + {:ok, pid} = GenServer.start_link(__MODULE__, state) + __state__(pid) + end + + @impl true + def init(%__MODULE__{} = call) do + {:ok, %{call | pid: self()}} + end + + def call(flow, %__MODULE__{} = call, names, function) + when is_map(flow) and is_list(names) and is_function(function) do + + input_streams = + Enum.reduce(names, %{}, fn name, acc -> + Map.put(acc, {name, function}, Map.fetch!(flow, name)) + end) + + :ok = GenServer.call(call.pid, {:run_inputs, input_streams}) + + sub_flow = + names + |> Enum.reduce(%{}, fn name, flow -> + stream = Stream.resource( + fn -> + nil + end, + fn nil -> + case GenServer.call(call.pid, {:get_data, name}) do + {:ok, data} -> + if length(data) == 0 do + receive do + :continue -> + flush() + end + end + + {data, nil} + + {:error, :done} -> + {:halt, nil} + end + end, + fn nil -> nil end + ) + Map.put(flow, name, stream) + end) + + + flow + |> Map.drop(names) + |> Map.merge(sub_flow) + end + + def stop(%__MODULE__{pid: pid}), do: GenServer.call(pid, :stop) + + def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) + + defp run_inputs(streams, pid, buffer) do + Enum.reduce(streams, %{}, fn {{name, fun}, stream}, acc -> + task = async_run_stream({name, fun}, stream, buffer, pid) + Map.put(acc, name, task) + end) + end + + defp async_run_stream({name, fun}, stream, buffer, pid) do + Task.async(fn -> + stream + |> Stream.chunk_every(buffer) + |> Stream.each(fn chunk -> + chunk = Enum.map(chunk, &fun.(&1)) + GenServer.cast(pid, {:new_data, name, chunk}) + + receive do + :continue -> + flush() + end + end) + |> Stream.run() + + GenServer.cast(pid, {:done, name}) + end) + end + + defp flush do + receive do + _ -> flush() + after + 0 -> :ok + end + end + + @impl true + def handle_call({:run_inputs, streams_to_call}, _from, %__MODULE__{} = call) do + tasks = run_inputs(streams_to_call, call.pid, call.buffer) + + {:reply, :ok, %{call | running: true, tasks: tasks}} + end + + def handle_call({:get_data, name}, {pid, _ref}, call) do + send(pid, :continue) + + data = Map.get(call.data, name, []) + if length(data) == 0 and !call.running do + {:reply, {:error, :done}, call} + else + call = %{call | data: Map.put(call.data, name, [])} + {:reply, {:ok, data}, call} + end + end + + def handle_call(:stop, _from, %__MODULE__{} = call) do + {:stop, :normal, :ok, %{call | running: false}} + end + + def handle_call(:__state__, _from, call), do: {:reply, call, call} + + @impl true + def handle_cast({:new_data, name, chunk}, %__MODULE__{} = call) do + task = Map.fetch!(call.tasks, name) + send(task.pid, :continue) + + prev_data = Map.get(call.data, name, []) + new_data = Map.put(call.data, name, prev_data ++ chunk) + call = %{call | data: new_data} + + {:noreply, call} + end + + def handle_cast({:done, name}, %__MODULE__{} = call) do + call = %{call | tasks: Map.delete(call.tasks, name)} + running = map_size(call.tasks) > 0 + {:noreply, %{call | running: running}} + end + + @impl true + def handle_info({_task_ref, :ok}, call) do + # do nothing for now + {:noreply, call} + end + + def handle_info({:DOWN, _task_ref, :process, _task_pid, :normal}, call) do + # do nothing for now + {:noreply, call} + end +end diff --git a/test/gen_call_test.exs b/test/gen_call_test.exs new file mode 100644 index 0000000..8836a61 --- /dev/null +++ b/test/gen_call_test.exs @@ -0,0 +1,40 @@ +defmodule Strom.GenCallTest do + use ExUnit.Case, async: false + + alias Strom.GenCall + + test "start and stop" do + call = GenCall.start() + assert Process.alive?(call.pid) + :ok = GenCall.stop(call) + refute Process.alive?(call.pid) + end + + test "call" do + call = GenCall.start() + + flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} + fun = fn el -> el * el end + flow = GenCall.call(flow, call, [:numbers1, :numbers2], fun) + + assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 4, 9, 16, 25] + assert Enum.sort(Enum.to_list(flow[:numbers2])) == [36, 49, 64, 81, 100] + assert Enum.sort(Enum.to_list(flow[:numbers3])) == [0, 0, 0, 0, 0] + end + + test "call with accumulator" do + call = GenCall.start() + + flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} + + fun = fn el, acc -> + {[el, acc], acc + 1} + end + + flow = GenCall.call(flow, call, [:numbers1, :numbers2], {fun, 0}) + + assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 4, 9, 16, 25] +# assert Enum.sort(Enum.to_list(flow[:numbers2])) == [36, 49, 64, 81, 100] +# assert Enum.sort(Enum.to_list(flow[:numbers3])) == [0, 0, 0, 0, 0] + end +end From 1c6520f47e257eb15c639284d7d091b6b47f6183 Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Tue, 26 Dec 2023 14:49:20 +0100 Subject: [PATCH 09/17] GenCall with accumulator --- lib/gen_call.ex | 25 +++++++++++++++---------- test/gen_call_test.exs | 15 +++++++++------ 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/lib/gen_call.ex b/lib/gen_call.ex index c4bdc68..83fc645 100644 --- a/lib/gen_call.ex +++ b/lib/gen_call.ex @@ -25,12 +25,12 @@ defmodule Strom.GenCall do {:ok, %{call | pid: self()}} end - def call(flow, %__MODULE__{} = call, names, function) + def call(flow, %__MODULE__{} = call, names, {function, acc}) when is_map(flow) and is_list(names) and is_function(function) do input_streams = - Enum.reduce(names, %{}, fn name, acc -> - Map.put(acc, {name, function}, Map.fetch!(flow, name)) + Enum.reduce(names, %{}, fn name, streams -> + Map.put(streams, {name, function, acc}, Map.fetch!(flow, name)) end) :ok = GenServer.call(call.pid, {:run_inputs, input_streams}) @@ -74,24 +74,29 @@ defmodule Strom.GenCall do def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) defp run_inputs(streams, pid, buffer) do - Enum.reduce(streams, %{}, fn {{name, fun}, stream}, acc -> - task = async_run_stream({name, fun}, stream, buffer, pid) - Map.put(acc, name, task) + Enum.reduce(streams, %{}, fn {{name, fun, acc}, stream}, streams_acc -> + task = async_run_stream({name, fun, acc}, stream, buffer, pid) + Map.put(streams_acc, name, task) end) end - defp async_run_stream({name, fun}, stream, buffer, pid) do + defp async_run_stream({name, fun, acc}, stream, buffer, pid) do Task.async(fn -> stream |> Stream.chunk_every(buffer) - |> Stream.each(fn chunk -> - chunk = Enum.map(chunk, &fun.(&1)) - GenServer.cast(pid, {:new_data, name, chunk}) + |> Stream.transform(acc, fn chunk, acc -> + {chunk, new_acc} = Enum.reduce(chunk, {[], acc}, fn el, {events, acc} -> + {new_events, acc} = fun.(el, acc) + {events ++ new_events, acc} + end) + GenServer.cast(pid, {:new_data, name, chunk}) receive do :continue -> flush() end + + {[], new_acc} end) |> Stream.run() diff --git a/test/gen_call_test.exs b/test/gen_call_test.exs index 8836a61..f1dd804 100644 --- a/test/gen_call_test.exs +++ b/test/gen_call_test.exs @@ -14,8 +14,11 @@ defmodule Strom.GenCallTest do call = GenCall.start() flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} - fun = fn el -> el * el end - flow = GenCall.call(flow, call, [:numbers1, :numbers2], fun) +# fun = fn el -> el * el end + + function = fn el, nil -> {[el * el], nil} end + + flow = GenCall.call(flow, call, [:numbers1, :numbers2], {function, nil}) assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 4, 9, 16, 25] assert Enum.sort(Enum.to_list(flow[:numbers2])) == [36, 49, 64, 81, 100] @@ -31,10 +34,10 @@ defmodule Strom.GenCallTest do {[el, acc], acc + 1} end - flow = GenCall.call(flow, call, [:numbers1, :numbers2], {fun, 0}) + flow = GenCall.call(flow, call, [:numbers1, :numbers2], {fun, 100}) - assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 4, 9, 16, 25] -# assert Enum.sort(Enum.to_list(flow[:numbers2])) == [36, 49, 64, 81, 100] -# assert Enum.sort(Enum.to_list(flow[:numbers3])) == [0, 0, 0, 0, 0] + assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 2, 3, 4, 5, 100, 101, 102, 103, 104] + assert Enum.sort(Enum.to_list(flow[:numbers2])) == [6, 7, 8, 9, 10, 100, 101, 102, 103, 104] + assert Enum.sort(Enum.to_list(flow[:numbers3])) == [0, 0, 0, 0, 0] end end From caa1d31cbe39a8170e70e75cbdd25108a8a63f0c Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Tue, 26 Dec 2023 15:51:53 +0100 Subject: [PATCH 10/17] GenCall with accumulator --- lib/gen_call.ex | 8 +++++++- test/gen_call_test.exs | 8 ++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/gen_call.ex b/lib/gen_call.ex index 83fc645..165c253 100644 --- a/lib/gen_call.ex +++ b/lib/gen_call.ex @@ -1,7 +1,7 @@ defmodule Strom.GenCall do use GenServer - @buffer 2 + @buffer 1000 defstruct pid: nil, running: false, @@ -69,6 +69,12 @@ defmodule Strom.GenCall do |> Map.merge(sub_flow) end + def call(flow, %__MODULE__{} = call, names, function) + when is_map(flow) and is_list(names) and is_function(function) do + fun = fn el, nil -> {[function.(el)], nil} end + call(flow, %__MODULE__{} = call, names, {fun, nil}) + end + def stop(%__MODULE__{pid: pid}), do: GenServer.call(pid, :stop) def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) diff --git a/test/gen_call_test.exs b/test/gen_call_test.exs index f1dd804..7d8cc1f 100644 --- a/test/gen_call_test.exs +++ b/test/gen_call_test.exs @@ -12,13 +12,9 @@ defmodule Strom.GenCallTest do test "call" do call = GenCall.start() - flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} -# fun = fn el -> el * el end - - function = fn el, nil -> {[el * el], nil} end - - flow = GenCall.call(flow, call, [:numbers1, :numbers2], {function, nil}) + fun = &(&1*&1) + flow = GenCall.call(flow, call, [:numbers1, :numbers2], fun) assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 4, 9, 16, 25] assert Enum.sort(Enum.to_list(flow[:numbers2])) == [36, 49, 64, 81, 100] From aecbc24f7afcc0037aeb69ef87ccd2be1e7c13be Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Wed, 27 Dec 2023 15:37:15 +0100 Subject: [PATCH 11/17] Transform draft --- lib/dsl.ex | 14 ++ lib/flow.ex | 9 ++ lib/gen_call.ex | 53 +++---- lib/gen_mix.ex | 2 +- test/examples/parcels_data_test.exs | 11 +- test/examples/parcels_test.exs | 224 +++++++++++----------------- test/gen_call_test.exs | 2 +- test/gen_mix_test.exs | 69 +++++---- 8 files changed, 187 insertions(+), 197 deletions(-) diff --git a/lib/dsl.ex b/lib/dsl.ex index ea3b322..c389786 100644 --- a/lib/dsl.ex +++ b/lib/dsl.ex @@ -19,6 +19,10 @@ defmodule Strom.DSL do defstruct function: nil, opts: [], inputs: [] end + defmodule Transform do + defstruct function: nil, acc: nil, inputs: [], call: nil + end + defmodule Module do defstruct module: nil, opts: [], inputs: [], state: nil end @@ -81,6 +85,16 @@ defmodule Strom.DSL do end end + defmacro transform(inputs, function, acc \\ nil) do + quote do + %Strom.DSL.Transform{ + function: unquote(function), + acc: unquote(acc), + inputs: unquote(inputs) + } + end + end + defmacro module(inputs, module, opts \\ []) do quote do %Strom.DSL.Module{ diff --git a/lib/flow.ex b/lib/flow.ex index b521b7e..ba0cb05 100644 --- a/lib/flow.ex +++ b/lib/flow.ex @@ -49,6 +49,9 @@ defmodule Strom.Flow do %DSL.Function{function: function, opts: opts} = fun -> %{fun | function: Strom.Function.start(function, opts)} + %DSL.Transform{} = fun -> + %{fun | call: Strom.GenCall.start()} + %DSL.Module{module: module, opts: opts} = mod -> module = Strom.Module.start(module, opts) %{mod | module: module} @@ -93,6 +96,9 @@ defmodule Strom.Flow do %DSL.Function{function: function, inputs: inputs} -> Strom.Function.call(flow, function, inputs) + %DSL.Transform{call: call, function: function, acc: acc, inputs: inputs} -> + Strom.GenCall.call(flow, call, inputs, {function, acc}) + %DSL.Module{module: module, inputs: inputs} -> Strom.Module.call(flow, module, inputs) @@ -123,6 +129,9 @@ defmodule Strom.Flow do %DSL.Function{function: function} -> Strom.Function.stop(function) + %DSL.Transform{call: call} -> + Strom.GenCall.stop(call) + %DSL.Module{module: module} -> Strom.Module.stop(module) end diff --git a/lib/gen_call.ex b/lib/gen_call.ex index 165c253..e10c64f 100644 --- a/lib/gen_call.ex +++ b/lib/gen_call.ex @@ -27,7 +27,6 @@ defmodule Strom.GenCall do def call(flow, %__MODULE__{} = call, names, {function, acc}) when is_map(flow) and is_list(names) and is_function(function) do - input_streams = Enum.reduce(names, %{}, fn name, streams -> Map.put(streams, {name, function, acc}, Map.fetch!(flow, name)) @@ -38,32 +37,33 @@ defmodule Strom.GenCall do sub_flow = names |> Enum.reduce(%{}, fn name, flow -> - stream = Stream.resource( - fn -> - nil - end, - fn nil -> - case GenServer.call(call.pid, {:get_data, name}) do - {:ok, data} -> - if length(data) == 0 do - receive do - :continue -> - flush() + stream = + Stream.resource( + fn -> + nil + end, + fn nil -> + case GenServer.call(call.pid, {:get_data, name}) do + {:ok, data} -> + if length(data) == 0 do + receive do + :continue -> + flush() + end end - end - {data, nil} + {data, nil} + + {:error, :done} -> + {:halt, nil} + end + end, + fn nil -> nil end + ) - {:error, :done} -> - {:halt, nil} - end - end, - fn nil -> nil end - ) Map.put(flow, name, stream) end) - flow |> Map.drop(names) |> Map.merge(sub_flow) @@ -91,12 +91,14 @@ defmodule Strom.GenCall do stream |> Stream.chunk_every(buffer) |> Stream.transform(acc, fn chunk, acc -> - {chunk, new_acc} = Enum.reduce(chunk, {[], acc}, fn el, {events, acc} -> - {new_events, acc} = fun.(el, acc) - {events ++ new_events, acc} - end) + {chunk, new_acc} = + Enum.reduce(chunk, {[], acc}, fn el, {events, acc} -> + {new_events, acc} = fun.(el, acc) + {events ++ new_events, acc} + end) GenServer.cast(pid, {:new_data, name, chunk}) + receive do :continue -> flush() @@ -129,6 +131,7 @@ defmodule Strom.GenCall do send(pid, :continue) data = Map.get(call.data, name, []) + if length(data) == 0 and !call.running do {:reply, {:error, :done}, call} else diff --git a/lib/gen_mix.ex b/lib/gen_mix.ex index 0d3b3cf..8af44bd 100644 --- a/lib/gen_mix.ex +++ b/lib/gen_mix.ex @@ -28,11 +28,11 @@ defmodule Strom.GenMix do def call(flow, %__MODULE__{} = mix, inputs, outputs) when is_map(flow) and is_map(inputs) and is_map(outputs) do - input_streams = Enum.reduce(inputs, %{}, fn {name, fun}, acc -> Map.put(acc, {name, fun}, Map.fetch!(flow, name)) end) + sub_flow = outputs |> Enum.reduce(%{}, fn {name, fun}, flow -> diff --git a/test/examples/parcels_data_test.exs b/test/examples/parcels_data_test.exs index 6a6b622..bb42aa5 100644 --- a/test/examples/parcels_data_test.exs +++ b/test/examples/parcels_data_test.exs @@ -19,6 +19,11 @@ defmodule Strom.Examples.ParcelsDataTest do to_ship = :rand.uniform(5) order_number = last_order[:order_number] + 1 + if order_number > 10_010 do + Process.sleep(5000) + raise("done") + end + order = %{ type: "ORDER_CREATED", occurred_at: occurred_at, @@ -28,7 +33,7 @@ defmodule Strom.Examples.ParcelsDataTest do {parcels, _} = Enum.reduce(1..to_ship, {[], order[:occurred_at]}, fn _i, {acc, occurred_at} -> - occurred_at = DateTime.add(occurred_at, :rand.uniform(2) * 24 * 3600, :second) + occurred_at = DateTime.add(occurred_at, :rand.uniform(2 * 24 * 3600), :second) parcel = %{ type: "PARCEL_SHIPPED", @@ -69,7 +74,7 @@ defmodule Strom.Examples.ParcelsDataTest do end test "test" do - # GenData.start() - # GenData.call(%{stream: Stream.cycle([:tick])}) + GenData.start() + GenData.call(%{stream: Stream.cycle([:tick])}) end end diff --git a/test/examples/parcels_test.exs b/test/examples/parcels_test.exs index d5472b0..786e133 100644 --- a/test/examples/parcels_test.exs +++ b/test/examples/parcels_test.exs @@ -31,145 +31,96 @@ defmodule Strom.Examples.ParcelsTest do %{type: type, occurred_at: occurred_at, order_number: order_number} end - defmodule ForceOrder do - def start(_opts), do: {%{}, 0, 0} - def stop(_, _opts), do: :ok - - def call(event, memo, _) do - {memo, last_order_number, last_parcel_number} = memo - - IO.inspect(length(Map.keys(memo)), label: "-----------------> memo length") - order_number = event[:order_number] - - case event[:type] do - "PARCEL_SHIPPED" -> - IO.inspect({last_order_number, last_parcel_number}, label: "PARCEL_SHIPPED > ") - - case Map.get(memo, order_number) do - nil -> - memo = Map.put(memo, order_number, [event]) - {[], {memo, last_order_number, order_number}} - - parcels -> - {parcels ++ [event], - {Map.put(memo, order_number, []), last_order_number, order_number}} - end - - "ORDER_CREATED" -> - IO.inspect({last_order_number, last_parcel_number}, label: "ORDER_CREATED > ") - - case Map.get(memo, order_number) do - nil -> - {[event], {Map.put(memo, order_number, []), order_number, last_parcel_number}} - - parcels -> - {[event | parcels], - {Map.put(memo, order_number, []), order_number, last_parcel_number}} - end - end + def force_order(event, memo) do + order_number = event[:order_number] + + case event[:type] do + "PARCEL_SHIPPED" -> + case Map.get(memo, order_number) do + nil -> + memo = Map.put(memo, order_number, [event]) + {[], memo} + + true -> + {[event], memo} + + parcels -> + {[], Map.put(memo, order_number, parcels ++ [event])} + end + + "ORDER_CREATED" -> + case Map.get(memo, order_number) do + nil -> + {[event], Map.put(memo, order_number, true)} + + parcels -> + {[event | parcels], Map.put(memo, order_number, true)} + end end end - defmodule CheckExpired do - @seconds_in_week 3600 * 24 * 7 - - def start(_opts), do: [] - def stop(_, _opts), do: :ok - - def call(event, memo, _) do - order_number = event[:order_number] - - case event[:type] do - "ORDER_CREATED" -> - {expired_events, still_valid} = check_expired(event, memo) - - memo = [{order_number, event[:occurred_at]} | still_valid] - {expired_events ++ [event], memo} - - "PARCEL_SHIPPED" -> - {expired_events, still_valid} = check_expired(event, memo) - - {expired_events ++ [event], still_valid} - end - end - - def check_expired(event, memo) do - {expired, still_valid} = - Enum.split_while(Enum.reverse(memo), fn {_, order_time} -> - DateTime.diff(event[:occurred_at], order_time, :second) > @seconds_in_week - end) - - expired_events = - Enum.map(expired, fn {order_number, time} -> - %{type: "THRESHOLD_EXCEEDED", order_number: order_number, occurred_at: time} - end) - - {expired_events, still_valid} - end - end - - defmodule CheckCount do - def start(_opts), do: %{} - def stop(_, _opts), do: :ok - - def call(event, memo, _) do - order_number = event[:order_number] - - case event[:type] do - "ORDER_CREATED" -> - # putting order time here, it's always less than parcels time - memo = Map.put(memo, order_number, {event[:to_ship], event[:occurred_at]}) - {[], memo} - - "PARCEL_SHIPPED" -> - case Map.get(memo, order_number) do - # was deleted in THRESHOLD_EXCEEDED - nil -> - {[], memo} - - {1, last_occurred_at} -> - last_occurred_at = latest_occurred_at(event[:occurred_at], last_occurred_at) - - ok_event = %{ - type: "ALL_PARCELS_SHIPPED", + @seconds_in_week 3600 * 24 * 7 + + def decide(event, memo) do + order_number = event[:order_number] + + case event[:type] do + "ORDER_CREATED" -> + memo = Map.put(memo, order_number, {event[:to_ship], event[:occurred_at]}) + {[], memo} + + "PARCEL_SHIPPED" -> + case Map.get(memo, order_number) do + # THRESHOLD_EXCEEDED was sent already + nil -> + {[], memo} + + {1, order_occurred_at} -> + good_or_bad = + if DateTime.diff(event[:occurred_at], order_occurred_at, :second) > + @seconds_in_week do + %{ + type: "THRESHOLD_EXCEEDED", + order_number: order_number, + occurred_at: event[:occurred_at] + } + else + %{ + type: "ALL_PARCELS_SHIPPED", + order_number: order_number, + occurred_at: event[:occurred_at] + } + end + + memo = Map.delete(memo, order_number) + {[good_or_bad], memo} + + {amount, order_occurred_at} when amount > 1 -> + if DateTime.diff(event[:occurred_at], order_occurred_at, :second) > + @seconds_in_week do + bad = %{ + type: "THRESHOLD_EXCEEDED", order_number: order_number, - occurred_at: last_occurred_at + occurred_at: event[:occurred_at] } - memo = Map.put(memo, order_number, :all_parcels_shipped) - {[ok_event], memo} + memo = Map.delete(memo, order_number) + {[bad], memo} + else + memo = Map.put(memo, order_number, {amount - 1, order_occurred_at}) - {amount, last_occurred_at} when amount > 1 -> - last_occurred_at = latest_occurred_at(event[:occurred_at], last_occurred_at) - memo = Map.put(memo, order_number, {amount - 1, last_occurred_at}) {[], memo} - end - - "THRESHOLD_EXCEEDED" -> - case Map.get(memo, order_number) do - :all_parcels_shipped -> - {[], Map.delete(memo, order_number)} - - _count -> - {[event], Map.delete(memo, order_number)} - end - end - end - - def latest_occurred_at(occurred_at, last_occurred_at) do - case DateTime.compare(occurred_at, last_occurred_at) do - :gt -> - occurred_at + end + end - _ -> - last_occurred_at - end + :end -> + IO.inspect(memo, limit: :infinity, label: ":end") + {[], memo} end end def to_string(event) do "#{event[:type]},#{event[:order_number]},#{event[:occurred_at]}" - |> IO.inspect() end def buffer(event) do @@ -187,12 +138,11 @@ defmodule Strom.Examples.ParcelsTest do function(:orders, &__MODULE__.build_order/1), source(:parcels, %ReadLines{path: "test_data/parcels.csv"}), function(:parcels, &__MODULE__.build_parcel/1), - mixer([:orders, :parcels], :mixed, buffer: [100, 300]), - # mixer([:orders, :parcels], :mixed, buffer: &__MODULE__.buffer/1), - module(:mixed, ForceOrder), - module(:mixed, CheckExpired), - module(:mixed, CheckCount), - splitter(:mixed, partitions, buffer: 1000), + mixer([:orders, :parcels], :mixed), + transform([:mixed], &ParcelsFlow.force_order/2, %{}), + source(:mixed, [%{type: :end}]), + transform([:mixed], &ParcelsFlow.decide/2, %{}), + splitter(:mixed, partitions), function([:threshold_exceeded, :all_parcels_shipped], &__MODULE__.to_string/1), sink(:threshold_exceeded, %WriteLines{path: "test_data/threshold_exceeded.csv"}), sink(:all_parcels_shipped, %WriteLines{path: "test_data/all_parcels_shipped.csv"}, true) @@ -210,12 +160,12 @@ defmodule Strom.Examples.ParcelsTest do %{ order_number: 222, type: "THRESHOLD_EXCEEDED", - occurred_at: ~U[2017-04-20 09:00:00.000Z] + occurred_at: ~U[2017-04-30 08:00:00.000Z] }, %{ order_number: 333, type: "THRESHOLD_EXCEEDED", - occurred_at: ~U[2017-04-21 09:00:00.000Z] + occurred_at: ~U[2017-05-01 08:00:00.000Z] } ] end @@ -223,9 +173,9 @@ defmodule Strom.Examples.ParcelsTest do @tag timeout: 3000_000 test "flow" do # :observer.start() - # ParcelsFlow.start() - # ParcelsFlow.call(%{}) - - # assert Enum.sort(Enum.to_list(mixed)) == Enum.sort(expected_results()) + ParcelsFlow.start() + ParcelsFlow.call(%{}) + # + # assert Enum.sort(Enum.to_list(mixed)) == Enum.sort(expected_results()) end end diff --git a/test/gen_call_test.exs b/test/gen_call_test.exs index 7d8cc1f..80ba8f7 100644 --- a/test/gen_call_test.exs +++ b/test/gen_call_test.exs @@ -13,7 +13,7 @@ defmodule Strom.GenCallTest do test "call" do call = GenCall.start() flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} - fun = &(&1*&1) + fun = &(&1 * &1) flow = GenCall.call(flow, call, [:numbers1, :numbers2], fun) assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 4, 9, 16, 25] diff --git a/test/gen_mix_test.exs b/test/gen_mix_test.exs index ea30077..4a85626 100644 --- a/test/gen_mix_test.exs +++ b/test/gen_mix_test.exs @@ -71,34 +71,43 @@ defmodule Strom.GenMixTest do Task.await(task2, :infinity) end - # test "huge files" do - # :observer.start() - # source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) - # source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) - # - # sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) - # sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) - # - # flow = - # %{} - # |> Strom.Source.call(source1, :source1) - # |> Strom.Source.call(source2, :source2) - # - # mix = GenMix.start() - # - # inputs = %{ - # source1: fn el -> el end, - # source2: fn el -> el end - # } - # - # outputs = %{ - # odd: fn el -> String.contains?(el, "ORDER_CREATED") end, - # even: fn el -> String.contains?(el, "PARCEL_SHIPPED") end - # } - # - # flow - # |> GenMix.call(mix, inputs, outputs) - # |> Strom.Sink.call(sink1, [:odd]) - # |> Strom.Sink.call(sink2, [:even], true) - # end + test "huge files" do + :observer.start() + source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) + source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) + + sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) + sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) + + flow = + %{} + |> Strom.Source.call(source1, :source1) + |> Strom.Source.call(source2, :source2) + + mix1 = GenMix.start() + mix2 = GenMix.start() + call1 = Strom.GenCall.start() + call2 = Strom.GenCall.start() + + inputs = %{ + source1: fn el -> el end, + source2: fn el -> el end + } + + outputs = %{ + odd: fn el -> rem(el, 2) == 1 end, + even: fn el -> rem(el, 2) == 0 end + } + + function1 = fn el -> String.length(el) end + function2 = fn el -> "#{el}" end + + flow + |> GenMix.call(mix1, inputs, inputs) + |> Strom.GenCall.call(call1, [:source1, :source2], function1) + |> GenMix.call(mix2, inputs, outputs) + |> Strom.GenCall.call(call2, [:odd, :even], function2) + |> Strom.Sink.call(sink1, [:odd]) + |> Strom.Sink.call(sink2, [:even], true) + end end From b7c1fa3653a6c3cbc5a25a76f9f174c7f2fba19d Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Wed, 27 Dec 2023 16:23:04 +0100 Subject: [PATCH 12/17] Transform draft --- lib/dsl.ex | 12 +++++++++++- lib/flow.ex | 6 +++++- test/examples/parcels_test.exs | 6 +++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/lib/dsl.ex b/lib/dsl.ex index c389786..a7c7f87 100644 --- a/lib/dsl.ex +++ b/lib/dsl.ex @@ -85,7 +85,7 @@ defmodule Strom.DSL do end end - defmacro transform(inputs, function, acc \\ nil) do + defmacro transform(inputs, function, acc) do quote do %Strom.DSL.Transform{ function: unquote(function), @@ -95,6 +95,16 @@ defmodule Strom.DSL do end end + defmacro transform(inputs, function) do + quote do + %Strom.DSL.Transform{ + function: unquote(function), + acc: nil, + inputs: unquote(inputs) + } + end + end + defmacro module(inputs, module, opts \\ []) do quote do %Strom.DSL.Module{ diff --git a/lib/flow.ex b/lib/flow.ex index ba0cb05..af742e5 100644 --- a/lib/flow.ex +++ b/lib/flow.ex @@ -97,7 +97,11 @@ defmodule Strom.Flow do Strom.Function.call(flow, function, inputs) %DSL.Transform{call: call, function: function, acc: acc, inputs: inputs} -> - Strom.GenCall.call(flow, call, inputs, {function, acc}) + if is_function(function, 2) do + Strom.GenCall.call(flow, call, inputs, {function, acc}) + else + Strom.GenCall.call(flow, call, inputs, function) + end %DSL.Module{module: module, inputs: inputs} -> Strom.Module.call(flow, module, inputs) diff --git a/test/examples/parcels_test.exs b/test/examples/parcels_test.exs index 786e133..9dd1377 100644 --- a/test/examples/parcels_test.exs +++ b/test/examples/parcels_test.exs @@ -135,15 +135,15 @@ defmodule Strom.Examples.ParcelsTest do [ source(:orders, %ReadLines{path: "test_data/orders.csv"}), - function(:orders, &__MODULE__.build_order/1), + transform([:orders], &__MODULE__.build_order/1), source(:parcels, %ReadLines{path: "test_data/parcels.csv"}), - function(:parcels, &__MODULE__.build_parcel/1), + transform([:parcels], &__MODULE__.build_parcel/1), mixer([:orders, :parcels], :mixed), transform([:mixed], &ParcelsFlow.force_order/2, %{}), source(:mixed, [%{type: :end}]), transform([:mixed], &ParcelsFlow.decide/2, %{}), splitter(:mixed, partitions), - function([:threshold_exceeded, :all_parcels_shipped], &__MODULE__.to_string/1), + transform([:threshold_exceeded, :all_parcels_shipped], &__MODULE__.to_string/1), sink(:threshold_exceeded, %WriteLines{path: "test_data/threshold_exceeded.csv"}), sink(:all_parcels_shipped, %WriteLines{path: "test_data/all_parcels_shipped.csv"}, true) ] From 317e36eff807345335398d8a0048b344a7566fba Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Fri, 5 Jan 2024 12:34:56 +0100 Subject: [PATCH 13/17] GenCall with opts --- lib/gen_call.ex | 28 ++-- mix.exs | 2 +- test/data/even.txt | 3 - test/data/odd.txt | 7 - test/data/output.csv | 214 ---------------------------- test/examples/parcels_data_test.exs | 8 +- test/examples/parcels_test.exs | 56 ++------ test/gen_call_test.exs | 16 +++ 8 files changed, 53 insertions(+), 281 deletions(-) diff --git a/lib/gen_call.ex b/lib/gen_call.ex index e10c64f..41e98c8 100644 --- a/lib/gen_call.ex +++ b/lib/gen_call.ex @@ -7,13 +7,15 @@ defmodule Strom.GenCall do running: false, buffer: @buffer, function: nil, + opts: nil, tasks: %{}, data: %{} # TODO supervisor def start(opts \\ []) when is_list(opts) do state = %__MODULE__{ - buffer: Keyword.get(opts, :buffer, @buffer) + buffer: Keyword.get(opts, :buffer, @buffer), + opts: Keyword.get(opts, :opts, nil) } {:ok, pid} = GenServer.start_link(__MODULE__, state) @@ -26,7 +28,7 @@ defmodule Strom.GenCall do end def call(flow, %__MODULE__{} = call, names, {function, acc}) - when is_map(flow) and is_list(names) and is_function(function) do + when is_map(flow) and is_list(names) and is_function(function, 3) do input_streams = Enum.reduce(names, %{}, fn name, streams -> Map.put(streams, {name, function, acc}, Map.fetch!(flow, name)) @@ -69,9 +71,15 @@ defmodule Strom.GenCall do |> Map.merge(sub_flow) end + def call(flow, %__MODULE__{} = call, names, {function, acc}) + when is_map(flow) and is_list(names) and is_function(function, 2) do + fun = fn el, acc, nil -> function.(el, acc) end + call(flow, %__MODULE__{} = call, names, {fun, acc}) + end + def call(flow, %__MODULE__{} = call, names, function) - when is_map(flow) and is_list(names) and is_function(function) do - fun = fn el, nil -> {[function.(el)], nil} end + when is_map(flow) and is_list(names) and is_function(function, 1) do + fun = fn el, nil, nil -> {[function.(el)], nil} end call(flow, %__MODULE__{} = call, names, {fun, nil}) end @@ -79,21 +87,21 @@ defmodule Strom.GenCall do def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) - defp run_inputs(streams, pid, buffer) do + defp run_inputs(streams, pid, buffer, opts) do Enum.reduce(streams, %{}, fn {{name, fun, acc}, stream}, streams_acc -> - task = async_run_stream({name, fun, acc}, stream, buffer, pid) + task = async_run_stream({name, fun, acc, opts}, stream, buffer, pid) Map.put(streams_acc, name, task) end) end - defp async_run_stream({name, fun, acc}, stream, buffer, pid) do + defp async_run_stream({name, fun, acc, opts}, stream, buffer, pid) do Task.async(fn -> stream |> Stream.chunk_every(buffer) |> Stream.transform(acc, fn chunk, acc -> {chunk, new_acc} = Enum.reduce(chunk, {[], acc}, fn el, {events, acc} -> - {new_events, acc} = fun.(el, acc) + {new_events, acc} = fun.(el, acc, opts) {events ++ new_events, acc} end) @@ -121,8 +129,8 @@ defmodule Strom.GenCall do end @impl true - def handle_call({:run_inputs, streams_to_call}, _from, %__MODULE__{} = call) do - tasks = run_inputs(streams_to_call, call.pid, call.buffer) + def handle_call({:run_inputs, streams_to_call}, _from, %__MODULE__{opts: opts} = call) do + tasks = run_inputs(streams_to_call, call.pid, call.buffer, opts) {:reply, :ok, %{call | running: true, tasks: tasks}} end diff --git a/mix.exs b/mix.exs index 5ed1e43..2c138ad 100644 --- a/mix.exs +++ b/mix.exs @@ -16,7 +16,7 @@ defmodule Strom.MixProject do def application do [ - extra_applications: [:logger], + extra_applications: [:logger, :observer, :runtime_tools, :wx], mod: {Strom.Application, []} ] end diff --git a/test/data/even.txt b/test/data/even.txt index e2ba1ef..e69de29 100644 --- a/test/data/even.txt +++ b/test/data/even.txt @@ -1,3 +0,0 @@ -2 -4 -6 diff --git a/test/data/odd.txt b/test/data/odd.txt index 5330516..e69de29 100644 --- a/test/data/odd.txt +++ b/test/data/odd.txt @@ -1,7 +0,0 @@ -3 -5 -11 -21 -31 -41 -51 diff --git a/test/data/output.csv b/test/data/output.csv index e4ffa55..e69de29 100644 --- a/test/data/output.csv +++ b/test/data/output.csv @@ -1,214 +0,0 @@ -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 -ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 -ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 -ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 diff --git a/test/examples/parcels_data_test.exs b/test/examples/parcels_data_test.exs index bb42aa5..025a6e6 100644 --- a/test/examples/parcels_data_test.exs +++ b/test/examples/parcels_data_test.exs @@ -73,8 +73,8 @@ defmodule Strom.Examples.ParcelsDataTest do end end - test "test" do - GenData.start() - GenData.call(%{stream: Stream.cycle([:tick])}) - end + # test "test" do + # GenData.start() + # GenData.call(%{stream: Stream.cycle([:tick])}) + # end end diff --git a/test/examples/parcels_test.exs b/test/examples/parcels_test.exs index 9dd1377..9965dba 100644 --- a/test/examples/parcels_test.exs +++ b/test/examples/parcels_test.exs @@ -7,6 +7,8 @@ defmodule Strom.Examples.ParcelsTest do use Strom.DSL + @seconds_in_week 3600 * 24 * 7 + def build_order(event) do list = String.split(event, ",") type = Enum.at(list, 0) @@ -38,8 +40,7 @@ defmodule Strom.Examples.ParcelsTest do "PARCEL_SHIPPED" -> case Map.get(memo, order_number) do nil -> - memo = Map.put(memo, order_number, [event]) - {[], memo} + {[], Map.put(memo, order_number, [event])} true -> {[event], memo} @@ -59,8 +60,6 @@ defmodule Strom.Examples.ParcelsTest do end end - @seconds_in_week 3600 * 24 * 7 - def decide(event, memo) do order_number = event[:order_number] @@ -112,10 +111,6 @@ defmodule Strom.Examples.ParcelsTest do {[], memo} end end - - :end -> - IO.inspect(memo, limit: :infinity, label: ":end") - {[], memo} end end @@ -123,10 +118,6 @@ defmodule Strom.Examples.ParcelsTest do "#{event[:type]},#{event[:order_number]},#{event[:occurred_at]}" end - def buffer(event) do - {event[:order_number], 1000} - end - def topology(_opts) do partitions = %{ threshold_exceeded: &(&1[:type] == "THRESHOLD_EXCEEDED"), @@ -134,48 +125,29 @@ defmodule Strom.Examples.ParcelsTest do } [ - source(:orders, %ReadLines{path: "test_data/orders.csv"}), + source(:orders, %ReadLines{path: "test/examples/parcels/orders.csv"}), transform([:orders], &__MODULE__.build_order/1), - source(:parcels, %ReadLines{path: "test_data/parcels.csv"}), + source(:parcels, %ReadLines{path: "test/examples/parcels/parcels.csv"}), transform([:parcels], &__MODULE__.build_parcel/1), mixer([:orders, :parcels], :mixed), transform([:mixed], &ParcelsFlow.force_order/2, %{}), - source(:mixed, [%{type: :end}]), transform([:mixed], &ParcelsFlow.decide/2, %{}), splitter(:mixed, partitions), - transform([:threshold_exceeded, :all_parcels_shipped], &__MODULE__.to_string/1), - sink(:threshold_exceeded, %WriteLines{path: "test_data/threshold_exceeded.csv"}), - sink(:all_parcels_shipped, %WriteLines{path: "test_data/all_parcels_shipped.csv"}, true) + transform([:threshold_exceeded, :all_parcels_shipped], &__MODULE__.to_string/1) + # sink(:threshold_exceeded, %WriteLines{path: "test_data/threshold_exceeded.csv"}), + # sink(:all_parcels_shipped, %WriteLines{path: "test_data/all_parcels_shipped.csv"}, true) ] end end - def expected_results do - [ - %{ - order_number: 111, - type: "ALL_PARCELS_SHIPPED", - occurred_at: ~U[2017-04-21T08:00:00.000Z] - }, - %{ - order_number: 222, - type: "THRESHOLD_EXCEEDED", - occurred_at: ~U[2017-04-30 08:00:00.000Z] - }, - %{ - order_number: 333, - type: "THRESHOLD_EXCEEDED", - occurred_at: ~U[2017-05-01 08:00:00.000Z] - } - ] - end - - @tag timeout: 3000_000 test "flow" do # :observer.start() ParcelsFlow.start() - ParcelsFlow.call(%{}) - # - # assert Enum.sort(Enum.to_list(mixed)) == Enum.sort(expected_results()) + + %{threshold_exceeded: threshold_exceeded, all_parcels_shipped: all_parcels_shipped} = + ParcelsFlow.call(%{}) + + assert length(Enum.to_list(threshold_exceeded)) == 2 + assert length(Enum.to_list(all_parcels_shipped)) == 1 end end diff --git a/test/gen_call_test.exs b/test/gen_call_test.exs index 80ba8f7..7c291a1 100644 --- a/test/gen_call_test.exs +++ b/test/gen_call_test.exs @@ -36,4 +36,20 @@ defmodule Strom.GenCallTest do assert Enum.sort(Enum.to_list(flow[:numbers2])) == [6, 7, 8, 9, 10, 100, 101, 102, 103, 104] assert Enum.sort(Enum.to_list(flow[:numbers3])) == [0, 0, 0, 0, 0] end + + test "call with opts and accumulator" do + call = GenCall.start(opts: %{add: 1}) + + flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} + + fun = fn el, acc, opts -> + {[el, acc], acc + opts[:add]} + end + + flow = GenCall.call(flow, call, [:numbers1, :numbers2], {fun, 100}) + + assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 2, 3, 4, 5, 100, 101, 102, 103, 104] + assert Enum.sort(Enum.to_list(flow[:numbers2])) == [6, 7, 8, 9, 10, 100, 101, 102, 103, 104] + assert Enum.sort(Enum.to_list(flow[:numbers3])) == [0, 0, 0, 0, 0] + end end From db3ec5902bf3e2452cad9d9b42139676a8188276 Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Fri, 5 Jan 2024 12:41:11 +0100 Subject: [PATCH 14/17] Rename GenCall to transformer --- lib/flow.ex | 8 ++++---- lib/{gen_call.ex => transformer.ex} | 2 +- ...gen_call_test.exs => transformer_test.exs} | 20 +++++++++---------- 3 files changed, 15 insertions(+), 15 deletions(-) rename lib/{gen_call.ex => transformer.ex} (99%) rename test/{gen_call_test.exs => transformer_test.exs} (76%) diff --git a/lib/flow.ex b/lib/flow.ex index af742e5..af2d9c8 100644 --- a/lib/flow.ex +++ b/lib/flow.ex @@ -50,7 +50,7 @@ defmodule Strom.Flow do %{fun | function: Strom.Function.start(function, opts)} %DSL.Transform{} = fun -> - %{fun | call: Strom.GenCall.start()} + %{fun | call: Strom.Transformer.start()} %DSL.Module{module: module, opts: opts} = mod -> module = Strom.Module.start(module, opts) @@ -98,9 +98,9 @@ defmodule Strom.Flow do %DSL.Transform{call: call, function: function, acc: acc, inputs: inputs} -> if is_function(function, 2) do - Strom.GenCall.call(flow, call, inputs, {function, acc}) + Strom.Transformer.call(flow, call, inputs, {function, acc}) else - Strom.GenCall.call(flow, call, inputs, function) + Strom.Transformer.call(flow, call, inputs, function) end %DSL.Module{module: module, inputs: inputs} -> @@ -134,7 +134,7 @@ defmodule Strom.Flow do Strom.Function.stop(function) %DSL.Transform{call: call} -> - Strom.GenCall.stop(call) + Strom.Transformer.stop(call) %DSL.Module{module: module} -> Strom.Module.stop(module) diff --git a/lib/gen_call.ex b/lib/transformer.ex similarity index 99% rename from lib/gen_call.ex rename to lib/transformer.ex index 41e98c8..ea9a8ac 100644 --- a/lib/gen_call.ex +++ b/lib/transformer.ex @@ -1,4 +1,4 @@ -defmodule Strom.GenCall do +defmodule Strom.Transformer do use GenServer @buffer 1000 diff --git a/test/gen_call_test.exs b/test/transformer_test.exs similarity index 76% rename from test/gen_call_test.exs rename to test/transformer_test.exs index 7c291a1..aacc7e8 100644 --- a/test/gen_call_test.exs +++ b/test/transformer_test.exs @@ -1,20 +1,20 @@ -defmodule Strom.GenCallTest do +defmodule Strom.TransformerTest do use ExUnit.Case, async: false - alias Strom.GenCall + alias Strom.Transformer test "start and stop" do - call = GenCall.start() + call = Transformer.start() assert Process.alive?(call.pid) - :ok = GenCall.stop(call) + :ok = Transformer.stop(call) refute Process.alive?(call.pid) end test "call" do - call = GenCall.start() + call = Transformer.start() flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} fun = &(&1 * &1) - flow = GenCall.call(flow, call, [:numbers1, :numbers2], fun) + flow = Transformer.call(flow, call, [:numbers1, :numbers2], fun) assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 4, 9, 16, 25] assert Enum.sort(Enum.to_list(flow[:numbers2])) == [36, 49, 64, 81, 100] @@ -22,7 +22,7 @@ defmodule Strom.GenCallTest do end test "call with accumulator" do - call = GenCall.start() + call = Transformer.start() flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} @@ -30,7 +30,7 @@ defmodule Strom.GenCallTest do {[el, acc], acc + 1} end - flow = GenCall.call(flow, call, [:numbers1, :numbers2], {fun, 100}) + flow = Transformer.call(flow, call, [:numbers1, :numbers2], {fun, 100}) assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 2, 3, 4, 5, 100, 101, 102, 103, 104] assert Enum.sort(Enum.to_list(flow[:numbers2])) == [6, 7, 8, 9, 10, 100, 101, 102, 103, 104] @@ -38,7 +38,7 @@ defmodule Strom.GenCallTest do end test "call with opts and accumulator" do - call = GenCall.start(opts: %{add: 1}) + call = Transformer.start(opts: %{add: 1}) flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} @@ -46,7 +46,7 @@ defmodule Strom.GenCallTest do {[el, acc], acc + opts[:add]} end - flow = GenCall.call(flow, call, [:numbers1, :numbers2], {fun, 100}) + flow = Transformer.call(flow, call, [:numbers1, :numbers2], {fun, 100}) assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 2, 3, 4, 5, 100, 101, 102, 103, 104] assert Enum.sort(Enum.to_list(flow[:numbers2])) == [6, 7, 8, 9, 10, 100, 101, 102, 103, 104] From 3047edb5fb8adeb5773ef0ed5b70bf8f19df6b18 Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Fri, 5 Jan 2024 12:45:52 +0100 Subject: [PATCH 15/17] Rename Rename to Renamer --- lib/flow.ex | 4 ++-- lib/{rename.ex => renamer.ex} | 2 +- test/{rename_test.exs => renamer_test.exs} | 16 ++++++++-------- 3 files changed, 11 insertions(+), 11 deletions(-) rename lib/{rename.ex => renamer.ex} (96%) rename test/{rename_test.exs => renamer_test.exs} (67%) diff --git a/lib/flow.ex b/lib/flow.ex index af2d9c8..fa4e24d 100644 --- a/lib/flow.ex +++ b/lib/flow.ex @@ -57,7 +57,7 @@ defmodule Strom.Flow do %{mod | module: module} %DSL.Rename{names: names} = ren -> - rename = Strom.Rename.start(names) + rename = Strom.Renamer.start(names) %{ren | rename: rename} end end) @@ -107,7 +107,7 @@ defmodule Strom.Flow do Strom.Module.call(flow, module, inputs) %DSL.Rename{rename: rename, names: names} -> - Strom.Rename.call(flow, rename, names) + Strom.Renamer.call(flow, rename, names) end end) diff --git a/lib/rename.ex b/lib/renamer.ex similarity index 96% rename from lib/rename.ex rename to lib/renamer.ex index 58e5b1f..921a9ee 100644 --- a/lib/rename.ex +++ b/lib/renamer.ex @@ -1,4 +1,4 @@ -defmodule Strom.Rename do +defmodule Strom.Renamer do use GenServer defstruct names: nil, pid: nil diff --git a/test/rename_test.exs b/test/renamer_test.exs similarity index 67% rename from test/rename_test.exs rename to test/renamer_test.exs index b86172c..5abc49b 100644 --- a/test/rename_test.exs +++ b/test/renamer_test.exs @@ -1,22 +1,22 @@ -defmodule Strom.RenameTest do +defmodule Strom.RenamerTest do use ExUnit.Case, async: true - alias Strom.Rename + alias Strom.Renamer test "start and stop" do - rename = Rename.start(%{s1: :s2}) + rename = Renamer.start(%{s1: :s2}) assert Process.alive?(rename.pid) - :ok = Rename.stop(rename) + :ok = Renamer.stop(rename) refute Process.alive?(rename.pid) end test "rename" do names = %{s1: :foo1, s2: :foo2} - rename = Rename.start(names) + rename = Renamer.start(names) flow = %{s1: [1], s2: [2], s3: [3]} - new_flow = Rename.call(flow, rename, names) + new_flow = Renamer.call(flow, rename, names) refute new_flow[:s1] refute new_flow[:s2] @@ -28,11 +28,11 @@ defmodule Strom.RenameTest do test "raise when there is no such name" do names = %{s2: :foo2} - rename = Rename.start(names) + rename = Renamer.start(names) flow = %{s1: [1]} assert_raise KeyError, fn -> - Rename.call(flow, rename, names) + Renamer.call(flow, rename, names) end end end From 5747e17553b0d0c99739fb37024f348853e4aba2 Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Fri, 5 Jan 2024 12:58:51 +0100 Subject: [PATCH 16/17] Delete unused components --- lib/dsl.ex | 28 ------------- lib/flow.ex | 19 --------- lib/function.ex | 59 -------------------------- lib/loop.ex | 67 ------------------------------ lib/module.ex | 73 --------------------------------- lib/transformer.ex | 8 ++-- test/function_test.exs | 65 ----------------------------- test/integration/sleep_test.exs | 26 ------------ test/loop_test.exs | 30 -------------- test/module_test.exs | 70 ------------------------------- test/transformer_test.exs | 27 +++++++----- 11 files changed, 22 insertions(+), 450 deletions(-) delete mode 100644 lib/function.ex delete mode 100644 lib/loop.ex delete mode 100644 lib/module.ex delete mode 100644 test/function_test.exs delete mode 100644 test/integration/sleep_test.exs delete mode 100644 test/loop_test.exs delete mode 100644 test/module_test.exs diff --git a/lib/dsl.ex b/lib/dsl.ex index a7c7f87..89e4dfc 100644 --- a/lib/dsl.ex +++ b/lib/dsl.ex @@ -15,18 +15,10 @@ defmodule Strom.DSL do defstruct splitter: nil, opts: [], input: nil, partitions: %{} end - defmodule Function do - defstruct function: nil, opts: [], inputs: [] - end - defmodule Transform do defstruct function: nil, acc: nil, inputs: [], call: nil end - defmodule Module do - defstruct module: nil, opts: [], inputs: [], state: nil - end - defmodule Rename do defstruct names: nil, rename: nil end @@ -75,16 +67,6 @@ defmodule Strom.DSL do end end - defmacro function(inputs, function, opts \\ []) do - quote do - %Strom.DSL.Function{ - function: unquote(function), - opts: unquote(opts), - inputs: unquote(inputs) - } - end - end - defmacro transform(inputs, function, acc) do quote do %Strom.DSL.Transform{ @@ -105,16 +87,6 @@ defmodule Strom.DSL do end end - defmacro module(inputs, module, opts \\ []) do - quote do - %Strom.DSL.Module{ - module: unquote(module), - opts: unquote(opts), - inputs: unquote(inputs) - } - end - end - defmacro from(module, opts \\ []) do quote do unless is_atom(unquote(module)) do diff --git a/lib/flow.ex b/lib/flow.ex index fa4e24d..6348f4e 100644 --- a/lib/flow.ex +++ b/lib/flow.ex @@ -46,16 +46,9 @@ defmodule Strom.Flow do %DSL.Splitter{opts: opts} = splitter -> %{splitter | splitter: Strom.Splitter.start(opts)} - %DSL.Function{function: function, opts: opts} = fun -> - %{fun | function: Strom.Function.start(function, opts)} - %DSL.Transform{} = fun -> %{fun | call: Strom.Transformer.start()} - %DSL.Module{module: module, opts: opts} = mod -> - module = Strom.Module.start(module, opts) - %{mod | module: module} - %DSL.Rename{names: names} = ren -> rename = Strom.Renamer.start(names) %{ren | rename: rename} @@ -93,9 +86,6 @@ defmodule Strom.Flow do %DSL.Splitter{splitter: splitter, input: input, partitions: partitions} -> Strom.Splitter.call(flow, splitter, input, partitions) - %DSL.Function{function: function, inputs: inputs} -> - Strom.Function.call(flow, function, inputs) - %DSL.Transform{call: call, function: function, acc: acc, inputs: inputs} -> if is_function(function, 2) do Strom.Transformer.call(flow, call, inputs, {function, acc}) @@ -103,9 +93,6 @@ defmodule Strom.Flow do Strom.Transformer.call(flow, call, inputs, function) end - %DSL.Module{module: module, inputs: inputs} -> - Strom.Module.call(flow, module, inputs) - %DSL.Rename{rename: rename, names: names} -> Strom.Renamer.call(flow, rename, names) end @@ -130,14 +117,8 @@ defmodule Strom.Flow do %DSL.Splitter{splitter: splitter} -> Strom.Splitter.stop(splitter) - %DSL.Function{function: function} -> - Strom.Function.stop(function) - %DSL.Transform{call: call} -> Strom.Transformer.stop(call) - - %DSL.Module{module: module} -> - Strom.Module.stop(module) end end) diff --git a/lib/function.ex b/lib/function.ex deleted file mode 100644 index 51e0312..0000000 --- a/lib/function.ex +++ /dev/null @@ -1,59 +0,0 @@ -defmodule Strom.Function do - use GenServer - - defstruct function: nil, opts: nil, pid: nil - - def start(function, opts \\ nil) do - state = %__MODULE__{function: function, opts: opts} - - {:ok, pid} = GenServer.start_link(__MODULE__, state) - __state__(pid) - end - - @impl true - def init(%__MODULE__{} = state), do: {:ok, %{state | pid: self()}} - - def call(flow, %__MODULE__{function: function, pid: pid}, names) - when is_map(flow) and is_function(function) and is_list(names) do - streams = - Enum.reduce(names, %{}, fn name, acc -> - Map.put(acc, name, Map.fetch!(flow, name)) - end) - - sub_flow = - Enum.reduce(streams, %{}, fn {name, stream}, acc -> - stream = - Stream.map(stream, fn event -> - GenServer.call(pid, {:call, event}, :infinity) - end) - - Map.put(acc, name, stream) - end) - - Map.merge(flow, sub_flow) - end - - def call(flow, function, name) when is_map(flow), do: call(flow, function, [name]) - - def stop(%__MODULE__{pid: pid}), do: GenServer.call(pid, :stop) - - def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) - - @impl true - def handle_call({:call, event}, _from, state) do - new_event = - if is_function(state.function, 1) do - state.function.(event) - else - state.function.(event, state.opts) - end - - {:reply, new_event, state} - end - - def handle_call(:stop, _from, state) do - {:stop, :normal, :ok, state} - end - - def handle_call(:__state__, _from, state), do: {:reply, state, state} -end diff --git a/lib/loop.ex b/lib/loop.ex deleted file mode 100644 index 659ff84..0000000 --- a/lib/loop.ex +++ /dev/null @@ -1,67 +0,0 @@ -defmodule Strom.Loop do - use GenServer - - @default_timeout 5_000 - defstruct data: [], pid: nil, infinite: false, last_data_at: nil, timeout: @default_timeout - - def start, do: start([]) - - def start(%__MODULE__{} = loop), do: loop - - def start(opts) do - loop = %__MODULE__{ - timeout: Keyword.get(opts, :timeout, @default_timeout) - } - - {:ok, pid} = GenServer.start_link(__MODULE__, loop) - __state__(pid) - end - - @impl true - def init(%__MODULE__{} = loop), do: {:ok, %{loop | pid: self()}} - - def call(%__MODULE__{} = loop), do: GenServer.call(loop.pid, :get_data) - - def call(%__MODULE__{} = loop, data), do: GenServer.call(loop.pid, {:put_data, data}) - - def stop(%__MODULE__{} = loop), do: GenServer.call(loop.pid, :stop) - - def infinite?(%__MODULE__{infinite: infinite}), do: infinite - - def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) - - @impl true - def handle_call(:get_data, _from, %__MODULE__{data: data} = loop) do - last_data_at = if is_nil(loop.last_data_at), do: time_now(), else: loop.last_data_at - loop = %{loop | data: [], last_data_at: last_data_at} - - case data do - [] -> - if time_now() - last_data_at > loop.timeout do - {:reply, {:error, {:halt, loop}}, loop} - else - {:reply, {:ok, {[], loop}}, loop} - end - - data -> - {:reply, {:ok, {data, loop}}, loop} - end - end - - def handle_call({:put_data, data}, _from, %__MODULE__{} = loop) do - loop = %{loop | data: loop.data ++ [data], last_data_at: time_now()} - {:reply, {:ok, {[], loop}}, loop} - end - - def handle_call(:stop, _from, %__MODULE__{} = loop) do - {:stop, :normal, :ok, loop} - end - - def handle_call(:__state__, _from, state), do: {:reply, state, state} - - defp time_now do - "Etc/UTC" - |> DateTime.now!() - |> DateTime.to_unix(:millisecond) - end -end diff --git a/lib/module.ex b/lib/module.ex deleted file mode 100644 index de74e02..0000000 --- a/lib/module.ex +++ /dev/null @@ -1,73 +0,0 @@ -defmodule Strom.Module do - # TODO define behaviour - use GenServer - - defstruct module: nil, pid: nil, opts: [], state: nil - - def start(module, opts \\ []) when is_atom(module) do - state = apply(module, :start, [opts]) - state = %__MODULE__{module: module, opts: opts, state: state} - {:ok, pid} = GenServer.start_link(__MODULE__, state) - __state__(pid) - end - - @impl true - def init(%__MODULE__{} = state), do: {:ok, %{state | pid: self()}} - - def call(flow, %__MODULE__{pid: pid} = state, names) - when is_map(flow) and is_list(names) do - streams = - Enum.reduce(names, %{}, fn name, acc -> - Map.put(acc, name, Map.fetch!(flow, name)) - end) - - sub_flow = - Enum.reduce(streams, %{}, fn {name, stream}, acc -> - stream = - if is_pipeline_module?(state.module) do - apply(state.module, :stream, [stream]) - else - Stream.transform(stream, state.state, fn event, acc -> - GenServer.call(pid, {:call, event, acc}, :infinity) - end) - end - - Map.put(acc, name, stream) - end) - - Map.merge(flow, sub_flow) - end - - def call(flow, state, name) when is_map(flow), do: call(flow, state, [name]) - - def stop(%__MODULE__{module: module, state: state, opts: opts, pid: pid}) do - if is_pipeline_module?(module) do - apply(module, :stop, []) - else - apply(module, :stop, [state, opts]) - end - - GenServer.call(pid, :stop) - end - - def __state__(pid) when is_pid(pid), do: GenServer.call(pid, :__state__) - - @impl true - def handle_call({:call, event, acc}, _from, state) do - {events, acc} = apply(state.module, :call, [event, acc, state.opts]) - - {:reply, {events, acc}, state} - end - - def handle_call(:stop, _from, state) do - {:stop, :normal, :ok, state} - end - - def handle_call(:__state__, _from, state), do: {:reply, state, state} - - defp is_pipeline_module?(module) when is_atom(module) do - is_list(module.alf_components()) - rescue - _error -> false - end -end diff --git a/lib/transformer.ex b/lib/transformer.ex index ea9a8ac..3bdf3f8 100644 --- a/lib/transformer.ex +++ b/lib/transformer.ex @@ -28,7 +28,9 @@ defmodule Strom.Transformer do end def call(flow, %__MODULE__{} = call, names, {function, acc}) - when is_map(flow) and is_list(names) and is_function(function, 3) do + when is_map(flow) and is_function(function, 3) do + names = if is_list(names), do: names, else: [names] + input_streams = Enum.reduce(names, %{}, fn name, streams -> Map.put(streams, {name, function, acc}, Map.fetch!(flow, name)) @@ -72,13 +74,13 @@ defmodule Strom.Transformer do end def call(flow, %__MODULE__{} = call, names, {function, acc}) - when is_map(flow) and is_list(names) and is_function(function, 2) do + when is_map(flow) and is_function(function, 2) do fun = fn el, acc, nil -> function.(el, acc) end call(flow, %__MODULE__{} = call, names, {fun, acc}) end def call(flow, %__MODULE__{} = call, names, function) - when is_map(flow) and is_list(names) and is_function(function, 1) do + when is_map(flow) and is_function(function, 1) do fun = fn el, nil, nil -> {[function.(el)], nil} end call(flow, %__MODULE__{} = call, names, {fun, nil}) end diff --git a/test/function_test.exs b/test/function_test.exs deleted file mode 100644 index 5d801a6..0000000 --- a/test/function_test.exs +++ /dev/null @@ -1,65 +0,0 @@ -defmodule Strom.FunctionTest do - use ExUnit.Case, async: true - - alias Strom.Function - alias Strom.Source - alias Strom.Source.ReadLines - - setup do - path = "test/data/orders.csv" - source = Source.start(%ReadLines{path: path}) - flow = Source.call(%{}, source, :orders) - %{flow: flow} - end - - test "start and stop" do - function = Function.start(&"foo-#{&1}") - assert Process.alive?(function.pid) - :ok = Function.stop(function) - refute Process.alive?(function.pid) - end - - test "function", %{flow: flow} do - function = Function.start(&"foo-#{&1}") - - %{orders: orders} = Function.call(flow, function, [:orders]) - orders = Enum.to_list(orders) - Enum.each(orders, fn line -> assert String.starts_with?(line, "foo-") end) - assert length(orders) == length(String.split(File.read!("test/data/orders.csv"), "\n")) - end - - test "with several streams", %{flow: flow} do - path = "test/data/parcels.csv" - source2 = Source.start(%ReadLines{path: path}) - - function = - Function.start(&"foo-#{&1}") - - %{orders: orders, parcels: parcels} = - flow - |> Source.call(source2, :parcels) - |> Function.call(function, [:parcels]) - - parcels = Enum.to_list(parcels) - Enum.each(parcels, fn line -> assert String.starts_with?(line, "foo-") end) - assert length(parcels) == length(String.split(File.read!("test/data/parcels.csv"), "\n")) - - orders = Enum.to_list(orders) - assert Enum.join(orders, "\n") == File.read!("test/data/orders.csv") - end - - test "when applied to empty flow" do - function = Function.start(&"foo-#{&1}") - - assert_raise KeyError, fn -> - Function.call(%{}, function, [:orders]) - end - end - - test "with extra argument" do - function = Function.start(fn event, extra -> "#{extra}-#{event}" end, "foo") - - %{events: stream} = Function.call(%{events: [1, 2, 3]}, function, :events) - assert Enum.to_list(stream) == ["foo-1", "foo-2", "foo-3"] - end -end diff --git a/test/integration/sleep_test.exs b/test/integration/sleep_test.exs deleted file mode 100644 index a52182b..0000000 --- a/test/integration/sleep_test.exs +++ /dev/null @@ -1,26 +0,0 @@ -defmodule Strom.Integration.SleepTest do - use ExUnit.Case, async: false - - # test "sleep in mixer" do - # flow = %{s1: Stream.cycle([1, 2, 3]), s2: Stream.cycle([10, 20, 30])} - # - # sleep_fun = - # Strom.Function.start( - # &Stream.map(&1, fn el -> - # Process.sleep(100) - # el - # end) - # ) - # - # to_string = Strom.Function.start(&Stream.map(&1, fn el -> "#{el}" end)) - # - # sink = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/sleep.txt"}) - # - # flow - # |> Strom.Function.call(sleep_fun, :s1) - # |> Strom.Mixer.call(Strom.Mixer.start(), [:s1, :s2], :stream) - # |> Strom.Function.call(to_string, :stream) - # |> Strom.Function.call(sleep_fun, :stream) - # |> Strom.Sink.call(sink, :stream, true) - # end -end diff --git a/test/loop_test.exs b/test/loop_test.exs deleted file mode 100644 index 28a2948..0000000 --- a/test/loop_test.exs +++ /dev/null @@ -1,30 +0,0 @@ -defmodule Strom.LoopTest do - use ExUnit.Case - - test "loop" do - flow = %{stream: [1, 2, 3]} - - plus_one = - Strom.Function.start(&(&1 + 1)) - - mixer = Strom.Mixer.start() - splitter = Strom.Splitter.start() - - loop = Strom.Loop.start(timeout: 100) - source_loop = Strom.Source.start(loop) - sink_loop = Strom.Sink.start(loop) - - flow = - flow - |> Strom.Source.call(source_loop, :looped) - |> Strom.Mixer.call(mixer, [:looped, :stream], :merged) - |> Strom.Function.call(plus_one, :merged) - |> Strom.Splitter.call(splitter, :merged, %{ - ok: fn el -> el >= 10 end, - not_ok: fn el -> el < 10 end - }) - |> Strom.Sink.call(sink_loop, :not_ok, true) - - assert Enum.to_list(flow[:ok]) == [10, 10, 10] - end -end diff --git a/test/module_test.exs b/test/module_test.exs deleted file mode 100644 index 426a944..0000000 --- a/test/module_test.exs +++ /dev/null @@ -1,70 +0,0 @@ -defmodule Strom.ModuleTest do - use ExUnit.Case, async: true - - alias Strom.Module - alias Strom.Source - alias Strom.Source.ReadLines - - defmodule MyModule do - defstruct state: nil - - def start(_opts) do - :memo - end - - def call(event, memo, opts) do - {["#{opts[:prefix]}-#{event}"], memo} - end - - def stop(:memo, opts), do: opts - end - - setup do - path = "test/data/orders.csv" - source = Source.start(%ReadLines{path: path}) - flow = Source.call(%{}, source, :orders) - %{flow: flow} - end - - test "start and stop" do - module = Module.start(MyModule, prefix: "foo") - assert Process.alive?(module.pid) - :ok = Module.stop(module) - refute Process.alive?(module.pid) - end - - test "module", %{flow: flow} do - module = Module.start(MyModule, prefix: "foo") - %{orders: orders} = Module.call(flow, module, [:orders]) - orders = Enum.to_list(orders) - Enum.each(orders, fn line -> assert String.starts_with?(line, "foo-") end) - assert length(orders) == length(String.split(File.read!("test/data/orders.csv"), "\n")) - end - - test "with several streams", %{flow: flow} do - path = "test/data/parcels.csv" - source2 = Source.start(%ReadLines{path: path}) - - module = Module.start(MyModule, prefix: "foo") - - %{orders: orders, parcels: parcels} = - flow - |> Source.call(source2, :parcels) - |> Module.call(module, [:parcels]) - - parcels = Enum.to_list(parcels) - Enum.each(parcels, fn line -> assert String.starts_with?(line, "foo-") end) - assert length(parcels) == length(String.split(File.read!("test/data/parcels.csv"), "\n")) - - orders = Enum.to_list(orders) - assert Enum.join(orders, "\n") == File.read!("test/data/orders.csv") - end - - test "when applied to empty flow" do - module = Module.start(MyModule, prefix: "foo") - - assert_raise KeyError, fn -> - Module.call(%{}, module, [:orders]) - end - end -end diff --git a/test/transformer_test.exs b/test/transformer_test.exs index aacc7e8..0187917 100644 --- a/test/transformer_test.exs +++ b/test/transformer_test.exs @@ -4,25 +4,32 @@ defmodule Strom.TransformerTest do alias Strom.Transformer test "start and stop" do - call = Transformer.start() - assert Process.alive?(call.pid) - :ok = Transformer.stop(call) - refute Process.alive?(call.pid) + transformer = Transformer.start() + assert Process.alive?(transformer.pid) + :ok = Transformer.stop(transformer) + refute Process.alive?(transformer.pid) end test "call" do - call = Transformer.start() + transformer = Transformer.start() flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} fun = &(&1 * &1) - flow = Transformer.call(flow, call, [:numbers1, :numbers2], fun) + flow = Transformer.call(flow, transformer, [:numbers1, :numbers2], fun) assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 4, 9, 16, 25] assert Enum.sort(Enum.to_list(flow[:numbers2])) == [36, 49, 64, 81, 100] assert Enum.sort(Enum.to_list(flow[:numbers3])) == [0, 0, 0, 0, 0] end + test "call with one stream" do + transformer = Transformer.start() + flow = %{numbers1: [1, 2, 3, 4, 5]} + flow = Transformer.call(flow, transformer, :numbers1, &(&1 * &1)) + assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 4, 9, 16, 25] + end + test "call with accumulator" do - call = Transformer.start() + transformer = Transformer.start() flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} @@ -30,7 +37,7 @@ defmodule Strom.TransformerTest do {[el, acc], acc + 1} end - flow = Transformer.call(flow, call, [:numbers1, :numbers2], {fun, 100}) + flow = Transformer.call(flow, transformer, [:numbers1, :numbers2], {fun, 100}) assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 2, 3, 4, 5, 100, 101, 102, 103, 104] assert Enum.sort(Enum.to_list(flow[:numbers2])) == [6, 7, 8, 9, 10, 100, 101, 102, 103, 104] @@ -38,7 +45,7 @@ defmodule Strom.TransformerTest do end test "call with opts and accumulator" do - call = Transformer.start(opts: %{add: 1}) + transformer = Transformer.start(opts: %{add: 1}) flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10], numbers3: [0, 0, 0, 0, 0]} @@ -46,7 +53,7 @@ defmodule Strom.TransformerTest do {[el, acc], acc + opts[:add]} end - flow = Transformer.call(flow, call, [:numbers1, :numbers2], {fun, 100}) + flow = Transformer.call(flow, transformer, [:numbers1, :numbers2], {fun, 100}) assert Enum.sort(Enum.to_list(flow[:numbers1])) == [1, 2, 3, 4, 5, 100, 101, 102, 103, 104] assert Enum.sort(Enum.to_list(flow[:numbers2])) == [6, 7, 8, 9, 10, 100, 101, 102, 103, 104] From ede8090305d02ba2c26498d9acaeb303d725bc1d Mon Sep 17 00:00:00 2001 From: Anton Mishchuk Date: Fri, 5 Jan 2024 14:06:48 +0100 Subject: [PATCH 17/17] Fix tests --- lib/dsl.ex | 13 +- lib/flow.ex | 23 +- mix.exs | 2 +- test/data/even.txt | 3 + test/data/odd.txt | 7 + test/data/output.csv | 107 +++++++++ test/dsl_test.exs | 72 +++--- test/examples/parcels/old_test.exs | 209 ------------------ .../{ => parcels}/parcels_data_test.exs | 36 ++- test/examples/{ => parcels}/parcels_test.exs | 1 - test/examples/simple_numbers_test.exs | 78 +++---- test/examples/telegram_test.exs | 16 +- test/examples/words_count_test.exs | 20 +- test/gen_mix_test.exs | 78 +++---- test/integration/split_and_mix_test.exs | 6 +- 15 files changed, 281 insertions(+), 390 deletions(-) delete mode 100644 test/examples/parcels/old_test.exs rename test/examples/{ => parcels}/parcels_data_test.exs (72%) rename test/examples/{ => parcels}/parcels_test.exs (99%) diff --git a/lib/dsl.ex b/lib/dsl.ex index 89e4dfc..d80d13d 100644 --- a/lib/dsl.ex +++ b/lib/dsl.ex @@ -16,7 +16,7 @@ defmodule Strom.DSL do end defmodule Transform do - defstruct function: nil, acc: nil, inputs: [], call: nil + defstruct function: nil, acc: nil, opts: nil, inputs: [], call: nil end defmodule Rename do @@ -67,6 +67,17 @@ defmodule Strom.DSL do end end + defmacro transform(inputs, function, acc, opts) do + quote do + %Strom.DSL.Transform{ + function: unquote(function), + acc: unquote(acc), + opts: unquote(opts), + inputs: unquote(inputs) + } + end + end + defmacro transform(inputs, function, acc) do quote do %Strom.DSL.Transform{ diff --git a/lib/flow.ex b/lib/flow.ex index 6348f4e..ef91ad2 100644 --- a/lib/flow.ex +++ b/lib/flow.ex @@ -46,9 +46,12 @@ defmodule Strom.Flow do %DSL.Splitter{opts: opts} = splitter -> %{splitter | splitter: Strom.Splitter.start(opts)} - %DSL.Transform{} = fun -> + %DSL.Transform{opts: nil} = fun -> %{fun | call: Strom.Transformer.start()} + %DSL.Transform{opts: opts} = fun when is_list(opts) -> + %{fun | call: Strom.Transformer.start(opts)} + %DSL.Rename{names: names} = ren -> rename = Strom.Renamer.start(names) %{ren | rename: rename} @@ -87,10 +90,10 @@ defmodule Strom.Flow do Strom.Splitter.call(flow, splitter, input, partitions) %DSL.Transform{call: call, function: function, acc: acc, inputs: inputs} -> - if is_function(function, 2) do - Strom.Transformer.call(flow, call, inputs, {function, acc}) - else + if is_function(function, 1) do Strom.Transformer.call(flow, call, inputs, function) + else + Strom.Transformer.call(flow, call, inputs, {function, acc}) end %DSL.Rename{rename: rename, names: names} -> @@ -126,13 +129,17 @@ defmodule Strom.Flow do end @impl true - def handle_info({_task_ref, :ok}, mixer) do + def handle_info(:continue, flow) do + {:noreply, flow} + end + + def handle_info({_task_ref, :ok}, flow) do # do nothing for now - {:noreply, mixer} + {:noreply, flow} end - def handle_info({:DOWN, _task_ref, :process, _task_pid, :normal}, mixer) do + def handle_info({:DOWN, _task_ref, :process, _task_pid, :normal}, flow) do # do nothing for now - {:noreply, mixer} + {:noreply, flow} end end diff --git a/mix.exs b/mix.exs index 2c138ad..5ed1e43 100644 --- a/mix.exs +++ b/mix.exs @@ -16,7 +16,7 @@ defmodule Strom.MixProject do def application do [ - extra_applications: [:logger, :observer, :runtime_tools, :wx], + extra_applications: [:logger], mod: {Strom.Application, []} ] end diff --git a/test/data/even.txt b/test/data/even.txt index e69de29..e2ba1ef 100644 --- a/test/data/even.txt +++ b/test/data/even.txt @@ -0,0 +1,3 @@ +2 +4 +6 diff --git a/test/data/odd.txt b/test/data/odd.txt index e69de29..443b0bc 100644 --- a/test/data/odd.txt +++ b/test/data/odd.txt @@ -0,0 +1,7 @@ +11 +21 +31 +41 +51 +3 +5 diff --git a/test/data/output.csv b/test/data/output.csv index e69de29..f65c9b7 100644 --- a/test/data/output.csv +++ b/test/data/output.csv @@ -0,0 +1,107 @@ +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 +ORDER_CREATED,2017-04-18T20:00:00.000Z,111,3 +ORDER_CREATED,2017-04-20T09:00:00.000Z,222,2 +ORDER_CREATED,2017-04-21T09:00:00.000Z,333,2 diff --git a/test/dsl_test.exs b/test/dsl_test.exs index cdc465a..6753b41 100644 --- a/test/dsl_test.exs +++ b/test/dsl_test.exs @@ -4,56 +4,35 @@ defmodule Strom.DSLTest do alias Strom.Source.ReadLines alias Strom.Sink.WriteLines - defmodule Pipeline do - use ALF.DSL - - @components [ - stage(:to_integer), - stage(:add_one) - ] - - def to_integer(event, _), do: String.to_integer(event) - def add_one(event, _), do: event + 1 - end - - defmodule ToString do - use ALF.DSL - - @components [ - stage(:to_string) - ] - - def to_string(event, _), do: "#{event}" - end - defmodule MyFlow do use Strom.DSL def odd_fun(event), do: rem(event, 2) == 1 + def even_fun(event), do: rem(event, 2) == 0 def to_string(el), do: "#{el}" - def topology(opts) do - source1 = %ReadLines{path: "test/data/numbers1.txt"} - source2 = %ReadLines{path: "test/data/numbers2.txt"} - sink_odd = %WriteLines{path: "test/data/odd.txt"} - sink_even = %WriteLines{path: "test/data/even.txt"} + def to_integer(event), do: String.to_integer(event) + def add_one(event), do: event + 1 + + def topology(opts) do partitions = %{ odd: &__MODULE__.odd_fun/1, even: &__MODULE__.even_fun/1 } [ - source(:numbers1, source1), - source(:numbers2, source2), + source(:numbers1, %ReadLines{path: "test/data/numbers1.txt"}), + source(:numbers2, %ReadLines{path: "test/data/numbers2.txt"}), mixer([:numbers1, :numbers2], :mixed), - module(:mixed, Pipeline, sync: true), + transform(:mixed, &__MODULE__.to_integer/1), + transform(:mixed, &__MODULE__.add_one/1), splitter(:mixed, partitions), - function([:odd, :even], opts[:to_string_fun]), - sink(:odd, sink_odd, true), - sink(:even, sink_even, true) + transform([:odd, :even], opts[:to_string_fun]), + sink(:odd, %WriteLines{path: "test/data/odd.txt"}, true), + sink(:even, %WriteLines{path: "test/data/even.txt"}, true) ] end end @@ -83,6 +62,29 @@ defmodule Strom.DSLTest do MyFlow.stop() end + describe "transform with options" do + defmodule FlowTransform do + use Strom.DSL + + def fun(event, acc, opts) do + {[event + acc + opts[:add]], acc + opts[:add]} + end + + def topology(_opts) do + [ + source(:s1, [1, 2, 3]), + transform(:s1, &__MODULE__.fun/3, 1000, opts: %{add: 1}) + ] + end + end + + test "transform with options" do + FlowTransform.start() + %{s1: stream} = FlowTransform.call(%{}) + assert Enum.to_list(stream) == [1002, 1004, 1006] + end + end + describe "combining several flows" do defmodule Flow1 do use Strom.DSL @@ -105,8 +107,8 @@ defmodule Strom.DSLTest do def topology(_) do [ - function(:stream1, &__MODULE__.add_one/1), - function(:stream2, &__MODULE__.add_one/1) + transform(:stream1, &__MODULE__.add_one/1), + transform(:stream2, &__MODULE__.add_one/1) ] end end diff --git a/test/examples/parcels/old_test.exs b/test/examples/parcels/old_test.exs deleted file mode 100644 index af9040d..0000000 --- a/test/examples/parcels/old_test.exs +++ /dev/null @@ -1,209 +0,0 @@ -defmodule Strom.Examples.Parcels.BuildPipeline do - use ALF.DSL - - @components [ - stage(:build_event) - ] - - def build_event(event, _) do - list = String.split(event, ",") - type = Enum.at(list, 0) - {:ok, occurred_at, _} = DateTime.from_iso8601(Enum.at(list, 1)) - order_number = String.to_integer(Enum.at(list, 2)) - - case type do - "ORDER_CREATED" -> - %{ - type: type, - occurred_at: occurred_at, - order_number: order_number, - to_ship: String.to_integer(Enum.at(list, 3)) - } - - "PARCEL_SHIPPED" -> - %{type: type, occurred_at: occurred_at, order_number: order_number} - end - end -end - -defmodule Strom.Examples.Parcels.OrderingPipeline do - use ALF.DSL - - @components [ - composer(:check_order, memo: MapSet.new()), - composer(:wait, memo: %{}) - ] - - def check_order(event, order_numbers, _) do - order_number = event[:order_number] - - case event[:type] do - "ORDER_CREATED" -> - {[event], MapSet.put(order_numbers, order_number)} - - "PARCEL_SHIPPED" -> - if MapSet.member?(order_numbers, order_number) do - {[event], order_numbers} - else - {[Map.put(event, :wait, order_number)], order_numbers} - end - end - end - - def wait(event, waiting, _) do - case event[:type] do - "ORDER_CREATED" -> - order_number = event[:order_number] - {[event | Map.get(waiting, order_number, [])], Map.delete(waiting, order_number)} - - "PARCEL_SHIPPED" -> - if event[:wait] do - other_waiting = Map.get(waiting, event[:wait], []) - {[], Map.put(waiting, event[:wait], [event | other_waiting])} - else - {[event], waiting} - end - end - end -end - -defmodule Strom.Examples.Parcels.Pipeline do - use ALF.DSL - - @components [ - composer(:check_expired, memo: []), - composer(:check_count, memo: %{}) - ] - - @seconds_in_week 3600 * 24 * 7 - - def check_expired(event, memo, _) do - order_number = event[:order_number] - - case event[:type] do - "ORDER_CREATED" -> - memo = [{order_number, event[:occurred_at]} | memo] - {[event], memo} - - "PARCEL_SHIPPED" -> - {expired, still_valid} = - Enum.split_while(Enum.reverse(memo), fn {_, order_time} -> - DateTime.diff(event[:occurred_at], order_time, :second) > @seconds_in_week - end) - - expired_events = - Enum.map(expired, fn {order_number, time} -> - %{type: "THRESHOLD_EXCEEDED", order_number: order_number, occurred_at: time} - end) - - {expired_events ++ [event], still_valid} - end - end - - def check_count(event, memo, _) do - order_number = event[:order_number] - - case event[:type] do - "ORDER_CREATED" -> - # putting order time here, it's always less than parcels time - memo = Map.put(memo, order_number, {event[:to_ship], event[:occurred_at]}) - {[], memo} - - "PARCEL_SHIPPED" -> - case Map.get(memo, order_number) do - # was deleted in THRESHOLD_EXCEEDED - nil -> - {[], memo} - - {1, last_occurred_at} -> - last_occurred_at = latest_occurred_at(event[:occurred_at], last_occurred_at) - - ok_event = %{ - type: "ALL_PARCELS_SHIPPED", - order_number: order_number, - occurred_at: last_occurred_at - } - - memo = Map.put(memo, order_number, :all_parcels_shipped) - {[ok_event], memo} - - {amount, last_occurred_at} when amount > 1 -> - last_occurred_at = latest_occurred_at(event[:occurred_at], last_occurred_at) - memo = Map.put(memo, order_number, {amount - 1, last_occurred_at}) - {[], memo} - end - - "THRESHOLD_EXCEEDED" -> - case Map.get(memo, order_number) do - :all_parcels_shipped -> - {[], Map.delete(memo, order_number)} - - _count -> - {[event], Map.delete(memo, order_number)} - end - end - end - - def latest_occurred_at(occurred_at, last_occurred_at) do - case DateTime.compare(occurred_at, last_occurred_at) do - :gt -> - occurred_at - - _ -> - last_occurred_at - end - end -end - -defmodule Strom.Examples.Parcels.OldParcelsTest do - use ExUnit.Case, async: true - - alias Strom.Examples.Parcels.BuildPipeline - alias Strom.Examples.Parcels.OrderingPipeline - alias Strom.Examples.Parcels.Pipeline - alias Strom.Source.ReadLines - - def expected_results do - [ - %{ - order_number: 111, - type: "ALL_PARCELS_SHIPPED", - occurred_at: ~U[2017-04-21T08:00:00.000Z] - }, - %{ - order_number: 222, - type: "THRESHOLD_EXCEEDED", - occurred_at: ~U[2017-04-20 09:00:00.000Z] - }, - %{ - order_number: 333, - type: "THRESHOLD_EXCEEDED", - occurred_at: ~U[2017-04-21 09:00:00.000Z] - } - ] - end - - describe "with several pipelines" do - defmodule SeveralPipelinesFlow do - use Strom.DSL - - def topology(_opts) do - [ - source(:parcels, %ReadLines{path: "test/examples/parcels/parcels.csv"}), - source(:orders, %ReadLines{path: "test/examples/parcels/orders.csv"}), - mixer([:orders, :parcels], :mixed), - module(:mixed, BuildPipeline), - module(:mixed, OrderingPipeline), - module(:mixed, Pipeline) - ] - end - end - - test "with several pipelines" do - SeveralPipelinesFlow.start() - %{mixed: mixed} = SeveralPipelinesFlow.call(%{}) - - assert Enum.sort(Enum.to_list(mixed)) == Enum.sort(expected_results()) - end - end -end diff --git a/test/examples/parcels_data_test.exs b/test/examples/parcels/parcels_data_test.exs similarity index 72% rename from test/examples/parcels_data_test.exs rename to test/examples/parcels/parcels_data_test.exs index 025a6e6..0d9ae46 100644 --- a/test/examples/parcels_data_test.exs +++ b/test/examples/parcels/parcels_data_test.exs @@ -5,25 +5,11 @@ defmodule Strom.Examples.ParcelsDataTest do use Strom.DSL defmodule BuildEvent do - def start(_opts) do - %{ - occurred_at: DateTime.add(DateTime.now!("Etc/UTC"), -(3600 * 24 * 30), :second), - order_number: 0 - } - end - - def stop(_opts, _acc), do: :ok - - def call(:tick, last_order, _opts) do + def call(:tick, last_order) do occurred_at = DateTime.add(last_order[:occurred_at], :rand.uniform(10), :second) to_ship = :rand.uniform(5) order_number = last_order[:order_number] + 1 - if order_number > 10_010 do - Process.sleep(5000) - raise("done") - end - order = %{ type: "ORDER_CREATED", occurred_at: occurred_at, @@ -62,19 +48,25 @@ defmodule Strom.Examples.ParcelsDataTest do parcels: &(&1[:type] == "PARCEL_SHIPPED") } + acc = %{ + occurred_at: DateTime.add(DateTime.now!("Etc/UTC"), -(3600 * 24 * 30), :second), + order_number: 0 + } + [ - module(:stream, BuildEvent), + transform(:stream, &BuildEvent.call/2, acc), splitter(:stream, partitions), - function(:orders, &__MODULE__.order_to_string/1), - function(:parcels, &__MODULE__.parcel_to_string/1), + transform(:orders, &__MODULE__.order_to_string/1), + transform(:parcels, &__MODULE__.parcel_to_string/1), sink(:orders, %Strom.Sink.WriteLines{path: "test_data/orders.csv"}), sink(:parcels, %Strom.Sink.WriteLines{path: "test_data/parcels.csv"}, true) ] end end - # test "test" do - # GenData.start() - # GenData.call(%{stream: Stream.cycle([:tick])}) - # end +# test "test" do +# GenData.start() +# GenData.call(%{stream: List.duplicate(:tick, 10_000)}) +# GenData.stop() +# end end diff --git a/test/examples/parcels_test.exs b/test/examples/parcels/parcels_test.exs similarity index 99% rename from test/examples/parcels_test.exs rename to test/examples/parcels/parcels_test.exs index 9965dba..d62cbe1 100644 --- a/test/examples/parcels_test.exs +++ b/test/examples/parcels/parcels_test.exs @@ -3,7 +3,6 @@ defmodule Strom.Examples.ParcelsTest do defmodule ParcelsFlow do alias Strom.Source.ReadLines - alias Strom.Sink.WriteLines use Strom.DSL diff --git a/test/examples/simple_numbers_test.exs b/test/examples/simple_numbers_test.exs index 33210d3..48395ae 100644 --- a/test/examples/simple_numbers_test.exs +++ b/test/examples/simple_numbers_test.exs @@ -1,7 +1,7 @@ defmodule Strom.Examples.SimpleNumbersTest do use ExUnit.Case - alias Strom.{Mixer, Splitter, Function} + alias Strom.{Mixer, Splitter, Transformer} test "simple numbers" do flow = %{numbers1: [1, 2, 3, 4, 5], numbers2: [6, 7, 8, 9, 10]} @@ -14,12 +14,12 @@ defmodule Strom.Examples.SimpleNumbersTest do even: fn el -> rem(el, 2) == 0 end } - function = Function.start(&(&1 + 1)) + transformer = Transformer.start() %{odd: odd, even: even} = flow |> Mixer.call(mixer, [:numbers1, :numbers2], :number) - |> Function.call(function, :number) + |> Transformer.call(transformer, :number, &(&1 + 1)) |> Splitter.call(splitter, :number, partitions) assert Enum.sort(Enum.to_list(odd)) == [3, 5, 7, 9, 11] @@ -30,35 +30,29 @@ defmodule Strom.Examples.SimpleNumbersTest do defmodule RoundRobin do use Strom.DSL - def add_label(event, label), do: {event, label} - - defmodule DoMix do - def start(names) do - Enum.reduce(names, %{}, &Map.put(&2, &1, [])) - end + def add_label(event, label) do + {[{event, label}], label} + end - def call({number, label}, acc, names) do - [another] = Enum.reject(names, &(&1 == label)) + def call({number, label}, acc) do + [another] = Enum.reject(Map.keys(acc), &(&1 == label)) - case Map.fetch!(acc, another) do - [hd | tl] -> - {[hd, number], Map.put(acc, another, tl)} + case Map.fetch!(acc, another) do + [hd | tl] -> + {[hd, number], Map.put(acc, another, tl)} - [] -> - numbers = Map.fetch!(acc, label) - {[], Map.put(acc, label, numbers ++ [number])} - end + [] -> + numbers = Map.fetch!(acc, label) + {[], Map.put(acc, label, numbers ++ [number])} end - - def stop(_acc, _opts), do: :ok end def topology(_opts) do [ - function(:first, &__MODULE__.add_label/2, :first), - function(:second, &__MODULE__.add_label/2, :second), + transform(:first, &__MODULE__.add_label/2, :first), + transform(:second, &__MODULE__.add_label/2, :second), mixer([:first, :second], :mixed), - module(:mixed, DoMix, [:first, :second]) + transform(:mixed, &__MODULE__.call/2, %{first: [], second: []}) ] end end @@ -84,37 +78,31 @@ defmodule Strom.Examples.SimpleNumbersTest do defmodule RoundRobinMany do use Strom.DSL - def add_label(event, label), do: {event, label} - - defmodule DoMix do - def start(names) do - Enum.reduce(names, %{}, &Map.put(&2, &1, [])) - end - - def call({number, label}, acc, names) do - others = Enum.reject(names, &(&1 == label)) + def add_label(event, label) do + {[{event, label}], label} + end - if Enum.all?(others, &(length(Map.fetch!(acc, &1)) > 0)) do - Enum.reduce(others, {[number], acc}, fn other, {nums, acc} -> - [hd | tl] = Map.fetch!(acc, other) - {[hd | nums], Map.put(acc, other, tl)} - end) - else - numbers = Map.fetch!(acc, label) - {[], Map.put(acc, label, numbers ++ [number])} - end + def call({number, label}, acc) do + others = Enum.reject(Map.keys(acc), &(&1 == label)) + + if Enum.all?(others, &(length(Map.fetch!(acc, &1)) > 0)) do + Enum.reduce(others, {[number], acc}, fn other, {nums, acc} -> + [hd | tl] = Map.fetch!(acc, other) + {[hd | nums], Map.put(acc, other, tl)} + end) + else + numbers = Map.fetch!(acc, label) + {[], Map.put(acc, label, numbers ++ [number])} end - - def stop(_acc, _opts), do: :ok end def topology(names) do Enum.map(names, fn name -> - function(name, &__MODULE__.add_label/2, name) + transform(name, &__MODULE__.add_label/2, name) end) ++ [ mixer(names, :mixed), - module(:mixed, DoMix, names) + transform(:mixed, &__MODULE__.call/2, Enum.reduce(names, %{}, &Map.put(&2, &1, []))) ] end end diff --git a/test/examples/telegram_test.exs b/test/examples/telegram_test.exs index d5dea5a..f9241dc 100644 --- a/test/examples/telegram_test.exs +++ b/test/examples/telegram_test.exs @@ -8,21 +8,15 @@ defmodule Strom.Integration.TelegramTest do alias Strom.Sink.WriteLines defmodule Decompose do - def start([]), do: nil - - def call(event, nil, []) do + def call(event, nil) do {String.split(event, ","), nil} end - - def stop(nil, []), do: :ok end defmodule Recompose do @length 100 - def start([]), do: [] - - def call(event, words, []) do + def call(event, words) do line = Enum.join(words, " ") new_line = line <> " " <> event @@ -32,15 +26,13 @@ defmodule Strom.Integration.TelegramTest do {[], words ++ [event]} end end - - def stop(_acc, []), do: :ok end def topology(_opts) do [ source(:input, %ReadLines{path: "test/data/orders.csv"}), - module(:input, Decompose), - module(:input, Recompose), + transform(:input, &Decompose.call/2, nil), + transform(:input, &Recompose.call/2, []), sink(:input, %WriteLines{path: "test/data/telegram.txt"}, true) ] end diff --git a/test/examples/words_count_test.exs b/test/examples/words_count_test.exs index bac4cec..24d7ebf 100644 --- a/test/examples/words_count_test.exs +++ b/test/examples/words_count_test.exs @@ -7,11 +7,9 @@ defmodule Strom.Examples.WordsCountTest do alias Strom.Source.ReadLines defmodule DoCount do - def start(_opts), do: %{} + def call(:done, acc), do: {[acc], %{}} - def call(:done, acc, _), do: {[acc], %{}} - - def call(string, acc, _) do + def call(string, acc) do acc = string |> String.downcase() @@ -23,16 +21,12 @@ defmodule Strom.Examples.WordsCountTest do {[], acc} end - - def stop(_acc, _opts), do: :ok end defmodule SumAll do - def start(_opts), do: %{} + def call(:done, acc), do: {[acc], %{}} - def call(:done, acc, _), do: {[acc], %{}} - - def call(sums, acc, _) do + def call(sums, acc) do acc = sums |> Enum.reduce(acc, fn {word, count}, acc -> @@ -42,8 +36,6 @@ defmodule Strom.Examples.WordsCountTest do {[], acc} end - - def stop(_acc, _opts), do: :ok end def topology({file_name, count}) do @@ -59,10 +51,10 @@ defmodule Strom.Examples.WordsCountTest do ] ++ dones ++ [ - module(all_names, DoCount), + transform(all_names, &DoCount.call/2, %{}), mixer(all_names, :mixed), source(:mixed, [:done]), - module(:mixed, SumAll) + transform(:mixed, &SumAll.call/2, %{}) ] end end diff --git a/test/gen_mix_test.exs b/test/gen_mix_test.exs index 4a85626..20c13f2 100644 --- a/test/gen_mix_test.exs +++ b/test/gen_mix_test.exs @@ -71,43 +71,43 @@ defmodule Strom.GenMixTest do Task.await(task2, :infinity) end - test "huge files" do - :observer.start() - source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) - source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) - - sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) - sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) - - flow = - %{} - |> Strom.Source.call(source1, :source1) - |> Strom.Source.call(source2, :source2) - - mix1 = GenMix.start() - mix2 = GenMix.start() - call1 = Strom.GenCall.start() - call2 = Strom.GenCall.start() - - inputs = %{ - source1: fn el -> el end, - source2: fn el -> el end - } - - outputs = %{ - odd: fn el -> rem(el, 2) == 1 end, - even: fn el -> rem(el, 2) == 0 end - } - - function1 = fn el -> String.length(el) end - function2 = fn el -> "#{el}" end - - flow - |> GenMix.call(mix1, inputs, inputs) - |> Strom.GenCall.call(call1, [:source1, :source2], function1) - |> GenMix.call(mix2, inputs, outputs) - |> Strom.GenCall.call(call2, [:odd, :even], function2) - |> Strom.Sink.call(sink1, [:odd]) - |> Strom.Sink.call(sink2, [:even], true) - end +# test "huge files" do +# :observer.start() +# source1 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/orders.csv"}) +# source2 = Strom.Source.start(%Strom.Source.ReadLines{path: "test_data/parcels.csv"}) +# +# sink1 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/odd.csv"}) +# sink2 = Strom.Sink.start(%Strom.Sink.WriteLines{path: "test_data/even.csv"}) +# +# flow = +# %{} +# |> Strom.Source.call(source1, :source1) +# |> Strom.Source.call(source2, :source2) +# +# mix1 = GenMix.start() +# mix2 = GenMix.start() +# call1 = Strom.Transformer.start() +# call2 = Strom.Transformer.start() +# +# inputs = %{ +# source1: fn el -> el end, +# source2: fn el -> el end +# } +# +# outputs = %{ +# odd: fn el -> rem(el, 2) == 1 end, +# even: fn el -> rem(el, 2) == 0 end +# } +# +# function1 = fn el -> String.length(el) end +# function2 = fn el -> "#{el}" end +# +# flow +# |> GenMix.call(mix1, inputs, inputs) +# |> Strom.Transformer.call(call1, [:source1, :source2], function1) +# |> GenMix.call(mix2, inputs, outputs) +# |> Strom.Transformer.call(call2, [:odd, :even], function2) +# |> Strom.Sink.call(sink1, [:odd]) +# |> Strom.Sink.call(sink2, [:even], true) +# end end diff --git a/test/integration/split_and_mix_test.exs b/test/integration/split_and_mix_test.exs index 77afa28..ddf315c 100644 --- a/test/integration/split_and_mix_test.exs +++ b/test/integration/split_and_mix_test.exs @@ -1,7 +1,7 @@ defmodule Strom.Integration.SplitAndMixTest do use ExUnit.Case - alias Strom.{Source, Mixer, Splitter, Function} + alias Strom.{Source, Mixer, Splitter, Transformer} alias Strom.Source.ReadLines setup do @@ -13,7 +13,7 @@ defmodule Strom.Integration.SplitAndMixTest do splitter = Splitter.start() mixer = Mixer.start() - function = Function.start(&"foo-#{&1}") + transformer = Transformer.start() partitions = %{ "111" => fn el -> String.contains?(el, ",111,") end, @@ -25,7 +25,7 @@ defmodule Strom.Integration.SplitAndMixTest do %{} |> Source.call(orders_source, :orders) |> Splitter.call(splitter, :orders, partitions) - |> Function.call(function, ["111", "222", "333"]) + |> Transformer.call(transformer, ["111", "222", "333"], &"foo-#{&1}") |> Mixer.call(mixer, ["111", "222", "333"], :modified) modified = Enum.to_list(stream)