diff --git a/Cargo.lock b/Cargo.lock index 4dfd13e25..8d3d5e2c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,9 +109,9 @@ dependencies = [ [[package]] name = "arbitrary" -version = "1.3.1" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2e1373abdaa212b704512ec2bd8b26bd0b7d5c3f70117411a5d9a451383c859" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" dependencies = [ "derive_arbitrary", ] @@ -351,9 +351,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.6" +version = "4.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d04704f56c2cde07f43e8e2c154b43f216dc5c92fc98ada720177362f953b956" +checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b" dependencies = [ "clap_builder", "clap_derive", @@ -361,9 +361,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.6" +version = "4.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e231faeaca65ebd1ea3c737966bf858971cd38c3849107aa3ea7de90a804e45" +checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663" dependencies = [ "anstream", "anstyle", @@ -373,21 +373,21 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.4.2" +version = "4.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873" +checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" dependencies = [ "heck", "proc-macro2 1.0.69", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] name = "clap_lex" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" [[package]] name = "colorchoice" @@ -450,7 +450,7 @@ dependencies = [ "anes", "cast", "ciborium", - "clap 4.4.6", + "clap 4.4.7", "criterion-plot", "is-terminal", "itertools 0.10.5", @@ -533,13 +533,13 @@ dependencies = [ [[package]] name = "derive_arbitrary" -version = "1.3.1" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53e0efad4403bfc52dc201159c4b842a246a14b98c64b55dfd0f2d89729dfeb8" +checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2 1.0.69", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -590,6 +590,18 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "enum_dispatch" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f33313078bb8d4d05a2733a94ac4c2d8a0df9a2b84424ebf4f33bfc224a890e" +dependencies = [ + "once_cell", + "proc-macro2 1.0.69", + "quote 1.0.33", + "syn 2.0.39", +] + [[package]] name = "env_logger" version = "0.8.4" @@ -635,9 +647,9 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" +checksum = "7c18ee0ed65a5f1f81cac6b1d213b69c35fa47d4252ad41f1486dbd8226fe36e" dependencies = [ "libc", "windows-sys", @@ -722,36 +734,36 @@ checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" [[package]] name = "futures-channel" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" dependencies = [ "futures-core", ] [[package]] name = "futures-core" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" [[package]] name = "futures-sink" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" +checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" [[package]] name = "futures-task" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" [[package]] name = "futures-util" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" dependencies = [ "futures-core", "futures-task", @@ -761,9 +773,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" dependencies = [ "cfg-if 1.0.0", "js-sys", @@ -1001,9 +1013,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.0.2" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" +checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" dependencies = [ "equivalent", "hashbrown 0.14.2", @@ -1058,9 +1070,9 @@ checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "js-sys" -version = "0.3.64" +version = "0.3.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "54c0c35952f67de54bb584e9fd912b3023117cbafc0a77d8f3dee1fb5f572fe8" dependencies = [ "wasm-bindgen", ] @@ -1079,9 +1091,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.149" +version = "0.2.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" [[package]] name = "libloading" @@ -1101,9 +1113,9 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "linux-raw-sys" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" +checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" [[package]] name = "lock_api" @@ -1136,7 +1148,7 @@ version = "0.0.1" dependencies = [ "proc-macro2 1.0.69", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -1242,7 +1254,7 @@ dependencies = [ name = "nemo-benches" version = "0.3.1-dev" dependencies = [ - "clap 4.4.6", + "clap 4.4.7", "colored", "criterion", "env_logger 0.10.0", @@ -1260,7 +1272,7 @@ version = "0.3.1-dev" dependencies = [ "assert_cmd", "assert_fs", - "clap 4.4.6", + "clap 4.4.7", "colored", "dir-test", "env_logger 0.10.0", @@ -1277,6 +1289,7 @@ dependencies = [ "arbitrary", "ascii_tree", "bytesize", + "enum_dispatch", "env_logger 0.10.0", "howlong", "linked-hash-map", @@ -1313,7 +1326,7 @@ dependencies = [ "nemo-physical", "thiserror", "wasm-bindgen", - "wasm-bindgen-futures 0.4.37", + "wasm-bindgen-futures 0.4.38", "wasm-bindgen-test", "web-sys", ] @@ -1454,9 +1467,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "openssl" -version = "0.10.57" +version = "0.10.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" +checksum = "7a257ad03cd8fb16ad4172fedf8094451e1af1c4b70097636ef2eac9a5f0cc33" dependencies = [ "bitflags 2.4.1", "cfg-if 1.0.0", @@ -1475,7 +1488,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2 1.0.69", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -1486,9 +1499,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.93" +version = "0.9.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" +checksum = "40a4130519a360279579c2053038317e40eff64d13fd3f004f9e1b72b8a6aaf9" dependencies = [ "cc", "libc", @@ -1526,7 +1539,7 @@ checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if 1.0.0", "libc", - "redox_syscall 0.4.1", + "redox_syscall", "smallvec", "windows-targets", ] @@ -1556,7 +1569,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.0.2", + "indexmap 2.1.0", ] [[package]] @@ -1706,7 +1719,7 @@ dependencies = [ "proc-macro2 1.0.69", "pyo3-macros-backend", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -1718,7 +1731,7 @@ dependencies = [ "heck", "proc-macro2 1.0.69", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -1829,15 +1842,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.4.1" @@ -1909,7 +1913,7 @@ dependencies = [ "tower-service", "url", "wasm-bindgen", - "wasm-bindgen-futures 0.4.37", + "wasm-bindgen-futures 0.4.38", "web-sys", "winreg", ] @@ -1957,12 +1961,12 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.38.20" +version = "0.38.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ce50cb2e16c2903e30d1cbccfd8387a74b9d4c938b6a4c5ec6cc7556f7a8a0" +checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3" dependencies = [ "bitflags 2.4.1", - "errno 0.3.5", + "errno 0.3.6", "libc", "linux-raw-sys", "windows-sys", @@ -2035,29 +2039,29 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.189" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537" +checksum = "bca2a08484b285dcb282d0f67b26cadc0df8b19f8c12502c13d966bf9482f001" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.189" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5" +checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1" dependencies = [ "proc-macro2 1.0.69", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] name = "serde_json" -version = "1.0.107" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ "itoa", "ryu", @@ -2142,9 +2146,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.38" +version = "2.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" dependencies = [ "proc-macro2 1.0.69", "quote 1.0.33", @@ -2180,13 +2184,13 @@ checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a" [[package]] name = "tempfile" -version = "3.8.0" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" dependencies = [ "cfg-if 1.0.0", "fastrand", - "redox_syscall 0.3.5", + "redox_syscall", "rustix", "windows-sys", ] @@ -2214,7 +2218,7 @@ checksum = "f66edd6b6cd810743c0c71e1d085e92b01ce6a72782032e3f794c8284fe4bcdd" dependencies = [ "proc-macro2 1.0.69", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -2243,7 +2247,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ "proc-macro2 1.0.69", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -2308,9 +2312,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d68074620f57a0b21594d9735eb2e98ab38b17f80d3fcb189fca266771ca60d" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" dependencies = [ "bytes", "futures-core", @@ -2461,9 +2465,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "7daec296f25a1bae309c0cd5c29c4b260e510e6d813c286b19eaadf409d40fce" dependencies = [ "cfg-if 1.0.0", "wasm-bindgen-macro", @@ -2471,16 +2475,16 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "e397f4664c0e4e428e8313a469aaa58310d302159845980fd23b0f22a847f217" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2 1.0.69", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", "wasm-bindgen-shared", ] @@ -2499,9 +2503,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.37" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +checksum = "9afec9963e3d0994cac82455b2b3502b81a7f40f9a0d32181f7528d9f4b43e02" dependencies = [ "cfg-if 1.0.0", "js-sys", @@ -2511,9 +2515,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "5961017b3b08ad5f3fe39f1e79877f8ee7c23c5e5fd5eb80de95abc41f1f16b2" dependencies = [ "quote 1.0.33", "wasm-bindgen-macro-support", @@ -2521,22 +2525,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "c5353b8dab669f5e10f5bd76df26a9360c748f054f862ff5f3f8aae0c7fb3907" dependencies = [ "proc-macro2 1.0.69", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "0d046c5d029ba91a1ed14da14dca44b68bf2f124cfbaf741c54151fdb3e0750b" [[package]] name = "wasm-bindgen-test" @@ -2565,9 +2569,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.64" +version = "0.3.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +checksum = "5db499c5f66323272151db0e666cd34f78617522fb0c1604d31a27c50c206a85" dependencies = [ "js-sys", "wasm-bindgen", @@ -2691,20 +2695,20 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.21" +version = "0.7.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686b7e407015242119c33dab17b8f61ba6843534de936d94368856528eae4dcc" +checksum = "8cd369a67c0edfef15010f980c3cbe45d7f651deac2cd67ce097cd801de16557" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.21" +version = "0.7.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020f3dfe25dfc38dfea49ce62d5d45ecdd7f0d8a724fa63eb36b6eba4ec76806" +checksum = "c2f140bda219a26ccc0cdb03dba58af72590c53b22642577d88a927bc5c87d6b" dependencies = [ "proc-macro2 1.0.69", "quote 1.0.33", - "syn 2.0.38", + "syn 2.0.39", ] diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index 529e40363..0b3ff50f4 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -117,13 +117,6 @@ fn run(mut cli: CliApp) -> Result<(), Error> { log::info!("Rules parsed"); log::trace!("{:?}", program); - for atom in program.rules().iter().flat_map(|rule| rule.head()) { - if atom.aggregates().next().is_some() { - log::warn!("Program is using the experimental aggregates feature and currently depends on the internally chosen variable orders for predicates.",); - break; - } - } - let parsed_fact = cli.trace_fact.map(parse_fact).transpose()?; if cli.write_all_idb_predicates { diff --git a/nemo-physical/Cargo.toml b/nemo-physical/Cargo.toml index 0bb478377..2bd79c92d 100644 --- a/nemo-physical/Cargo.toml +++ b/nemo-physical/Cargo.toml @@ -16,6 +16,7 @@ default = ["timing"] timing = ["dep:howlong"] [dependencies] +enum_dispatch = "0.3.12" log = "0.4" bytesize = "1.2" thiserror = "1.0" diff --git a/nemo-physical/src/aggregates/operation.rs b/nemo-physical/src/aggregates/operation.rs index cd1e46e60..cd25520ff 100644 --- a/nemo-physical/src/aggregates/operation.rs +++ b/nemo-physical/src/aggregates/operation.rs @@ -3,9 +3,12 @@ use crate::datatypes::DataTypeName; use super::processors::{ - aggregate::Aggregate, count_aggregate::CountAggregateProcessor, - max_aggregate::MaxAggregateProcessor, min_aggregate::MinAggregateProcessor, - processor::AggregateProcessor, sum_aggregate::SumAggregateProcessor, + aggregate::Aggregate, + count_aggregate::CountAggregateProcessor, + max_aggregate::MaxAggregateProcessor, + min_aggregate::MinAggregateProcessor, + processor::{AggregateProcessor, AggregateProcessorT}, + sum_aggregate::SumAggregateProcessor, }; #[derive(Clone, Copy, Debug, PartialEq)] @@ -23,14 +26,23 @@ pub enum AggregateOperation { impl AggregateOperation { /// Creates a new aggregate processor for the given aggregate operation. - /// TODO: This is currently implemented using dynamic dispatch, but this may change in the future. - pub fn create_processor(&self) -> Box> { - match self { - AggregateOperation::Count => Box::new(CountAggregateProcessor::new()), - AggregateOperation::Max => Box::new(MaxAggregateProcessor::new()), - AggregateOperation::Min => Box::new(MinAggregateProcessor::new()), - AggregateOperation::Sum => Box::new(SumAggregateProcessor::new()), - } + pub(crate) fn create_processor(&self) -> AggregateProcessorT { + let aggregate_processor: AggregateProcessorT = match self { + AggregateOperation::Count => CountAggregateProcessor::new().into(), + AggregateOperation::Max => MaxAggregateProcessor::new().into(), + AggregateOperation::Min => MinAggregateProcessor::new().into(), + AggregateOperation::Sum => SumAggregateProcessor::new().into(), + }; + + aggregate_processor + } + + /// Returns whether the aggregate processor is invariant to being called with the same aggregated value multiple times in a row. + /// This function has to return the same value independent of the aggregated value type. + /// + /// If `true` is returned this allows for additional optimizations when creating the execution plan. In particular, peripheral variables (not group-by, aggregate or distinct variables) can be converted to distinct variables in an idempotent aggregate processor without changing the semantics of the aggregate. + pub fn idempotent(&self) -> bool { + self.create_processor::().idempotent() } /// Returns whether the aggregate operation always produces an aggregate output column of the same type. diff --git a/nemo-physical/src/aggregates/processors/count_aggregate.rs b/nemo-physical/src/aggregates/processors/count_aggregate.rs index efda6a0e6..a1a58b3c2 100644 --- a/nemo-physical/src/aggregates/processors/count_aggregate.rs +++ b/nemo-physical/src/aggregates/processors/count_aggregate.rs @@ -9,6 +9,7 @@ use super::{ processor::{AggregateGroupProcessor, AggregateProcessor}, }; +#[derive(Debug)] pub(crate) struct CountAggregateProcessor where A: Aggregate, @@ -34,6 +35,7 @@ impl AggregateProcessor for CountAggregateProcessor { } } +#[derive(Debug)] pub(crate) struct CountAggregateGroupProcessor where A: Aggregate, diff --git a/nemo-physical/src/aggregates/processors/max_aggregate.rs b/nemo-physical/src/aggregates/processors/max_aggregate.rs index fe8156f55..ad58a1d44 100644 --- a/nemo-physical/src/aggregates/processors/max_aggregate.rs +++ b/nemo-physical/src/aggregates/processors/max_aggregate.rs @@ -9,6 +9,7 @@ use super::{ processor::{AggregateGroupProcessor, AggregateProcessor}, }; +#[derive(Debug)] pub(crate) struct MaxAggregateProcessor where A: PartialEq + PartialOrd + 'static, @@ -34,6 +35,7 @@ impl AggregateProcessor for MaxAggregateProcessor { } } +#[derive(Debug)] pub(crate) struct MaxAggregateGroupProcessor where A: Aggregate, diff --git a/nemo-physical/src/aggregates/processors/min_aggregate.rs b/nemo-physical/src/aggregates/processors/min_aggregate.rs index cfe87b438..abfdea167 100644 --- a/nemo-physical/src/aggregates/processors/min_aggregate.rs +++ b/nemo-physical/src/aggregates/processors/min_aggregate.rs @@ -9,6 +9,7 @@ use super::{ processor::{AggregateGroupProcessor, AggregateProcessor}, }; +#[derive(Debug)] pub(crate) struct MinAggregateProcessor where A: Aggregate, @@ -34,6 +35,7 @@ impl AggregateProcessor for MinAggregateProcessor { } } +#[derive(Debug)] pub(crate) struct MinAggregateGroupProcessor where A: Aggregate, diff --git a/nemo-physical/src/aggregates/processors/processor.rs b/nemo-physical/src/aggregates/processors/processor.rs index 5774a35e4..b439447bc 100644 --- a/nemo-physical/src/aggregates/processors/processor.rs +++ b/nemo-physical/src/aggregates/processors/processor.rs @@ -2,22 +2,42 @@ use crate::datatypes::StorageValueT; -use super::aggregate::Aggregate; +use super::{ + aggregate::Aggregate, + count_aggregate::{CountAggregateGroupProcessor, CountAggregateProcessor}, + max_aggregate::{MaxAggregateGroupProcessor, MaxAggregateProcessor}, + min_aggregate::{MinAggregateGroupProcessor, MinAggregateProcessor}, + sum_aggregate::{SumAggregateGroupProcessor, SumAggregateProcessor}, +}; + +use enum_dispatch::enum_dispatch; /// Allows for aggregation of a column, by providing [`AggregateGroupProcessor`] for every group in the input trie scan. -pub trait AggregateProcessor { +#[enum_dispatch] +pub(crate) trait AggregateProcessor { /// Returns whether the aggregate processor is invariant to being called with the same aggregated value multiple times in a row. /// This function has to return the same value independent of the aggregated value type. /// - /// If `true` is returned this allows for additional optimizations when creating the execution plan (e.g. not needing to reorder if the distinct variables are in the wrong variable order). + /// If `true` is returned this allows for additional optimizations when creating the execution plan. In particular, peripheral variables (not group-by, aggregate or distinct variables) can be converted to distinct variables in an idempotent aggregate processor without changing the semantics of the aggregate. + /// + /// See [`super::super::operation::AggregateOperation::idempotent`] fn idempotent(&self) -> bool; /// Creates a [`AggregateGroupProcessor`] for aggregating values with the same values in group-by columns. fn group(&self) -> Box>; } +#[enum_dispatch(AggregateProcessor)] +#[derive(Debug)] +pub(crate) enum AggregateProcessorT { + Count(CountAggregateProcessor), + Max(MaxAggregateProcessor), + Min(MinAggregateProcessor), + Sum(SumAggregateProcessor), +} + /// Allows aggregation of multiple rows (all with the same group-by values) to produce a single aggregate value. -pub trait AggregateGroupProcessor +pub(crate) trait AggregateGroupProcessor where A: Aggregate, { @@ -27,3 +47,12 @@ where /// Returns the resulting aggregated value of all the processed input values. fn finish(&self) -> Option; } + +#[enum_dispatch(AggregateGroupProcessor)] +#[derive(Debug)] +pub(crate) enum AggregateGroupProcessorT { + Count(CountAggregateGroupProcessor), + Max(MaxAggregateGroupProcessor), + Min(MinAggregateGroupProcessor), + Sum(SumAggregateGroupProcessor), +} diff --git a/nemo-physical/src/aggregates/processors/sum_aggregate.rs b/nemo-physical/src/aggregates/processors/sum_aggregate.rs index e42cccc72..af0f12c08 100644 --- a/nemo-physical/src/aggregates/processors/sum_aggregate.rs +++ b/nemo-physical/src/aggregates/processors/sum_aggregate.rs @@ -9,6 +9,7 @@ use super::{ processor::{AggregateGroupProcessor, AggregateProcessor}, }; +#[derive(Debug)] pub(crate) struct SumAggregateProcessor where A: Aggregate, @@ -34,6 +35,7 @@ impl AggregateProcessor for SumAggregateProcessor { } } +#[derive(Debug)] pub(crate) struct SumAggregateGroupProcessor where A: Aggregate, diff --git a/nemo-physical/src/tabular/operations/triescan_aggregate.rs b/nemo-physical/src/tabular/operations/triescan_aggregate.rs index f82a1e5a2..ceb16cf72 100644 --- a/nemo-physical/src/tabular/operations/triescan_aggregate.rs +++ b/nemo-physical/src/tabular/operations/triescan_aggregate.rs @@ -1,7 +1,10 @@ use std::{cell::UnsafeCell, fmt::Debug}; use crate::{ - aggregates::{operation::AggregateOperation, processors::processor::AggregateGroupProcessor}, + aggregates::{ + operation::AggregateOperation, + processors::processor::{AggregateGroupProcessor, AggregateProcessor}, + }, columnar::traits::columnscan::ColumnScanT, datatypes::{Double, Float, StorageTypeName, StorageValueT}, tabular::traits::{partial_trie_scan::PartialTrieScan, trie_scan::TrieScan}, @@ -22,7 +25,7 @@ enum AggregatedOutputValue { /// Describes which columns of the input trie scan will be group-by, distinct and aggregate columns and other information about the aggregation. #[derive(Debug, Clone, Copy)] pub struct AggregationInstructions { - /// Name of the aggregate operation, which determines the [`AggregateGroupProcessor`] that will be used + /// Type of the aggregate operation, which determines the aggregate processor that will be used pub aggregate_operation: AggregateOperation, /// Number of group-by columns /// @@ -74,12 +77,13 @@ impl AggregationInstructions { /// [`TrieScan`] which performs an aggregate operation. /// /// Input columns: -/// * Possibly group-by columns -/// * A single aggregated column possibly mixed with additional distinct columns +/// * Zero or more group-by columns, followed by +/// * a single aggregated column possibly mixed with additional distinct columns, followed by +/// * zero or more peripheral columns, that do not impact the result of the aggregate at all and are not used during aggregation. /// /// Output columns: -/// * Group-by columns -/// * Aggregate output column +/// * Zero or more group-by columns, followed by +/// * one aggregate output column #[derive(Debug)] pub struct TrieScanAggregate { aggregated_input_column_storage_type: StorageTypeName, diff --git a/nemo-python/README.md b/nemo-python/README.md index 94670a8d7..6619bcab7 100644 --- a/nemo-python/README.md +++ b/nemo-python/README.md @@ -3,7 +3,7 @@ This crate provide python bindings for the `nemo` crate. > **Note** -> These bindings are currently in an experimental state and likely subject to change. +> These bindings are currently in an experimental state and subject to change. ## Building diff --git a/nemo-wasm/README.md b/nemo-wasm/README.md index a92346705..fee98744c 100644 --- a/nemo-wasm/README.md +++ b/nemo-wasm/README.md @@ -3,7 +3,7 @@ This crate provides a Web Assembly build and JavaScript/TypeScript bindings for the `nemo` crate. > **Note** -> These bindings are currently in an experimental state and likely subject to change. +> These bindings are currently in an experimental state and subject to change. ## Building diff --git a/nemo-wasm/src/lib.rs b/nemo-wasm/src/lib.rs index 82421ac69..ade181276 100644 --- a/nemo-wasm/src/lib.rs +++ b/nemo-wasm/src/lib.rs @@ -164,8 +164,8 @@ struct SyncAccessHandleWriter(web_sys::FileSystemSyncAccessHandle); #[cfg(web_sys_unstable_apis)] impl std::io::Write for SyncAccessHandleWriter { fn write(&mut self, buf: &[u8]) -> Result { - let mut buf: Vec<_> = buf.into(); - let bytes_written = self.0.write_with_u8_array(&mut buf).map_err(|js_value| { + let buf: Vec<_> = buf.into(); + let bytes_written = self.0.write_with_u8_array(&buf).map_err(|js_value| { std_io_error_from_js_value( js_value, "Error while writing to FileSystemSyncAccessHandle", diff --git a/nemo/src/execution/planning.rs b/nemo/src/execution/planning.rs index 2aa834558..09a85f34e 100644 --- a/nemo/src/execution/planning.rs +++ b/nemo/src/execution/planning.rs @@ -19,4 +19,6 @@ pub mod plan_util; pub mod negation; +mod aggregates; + pub mod arithmetic; diff --git a/nemo/src/execution/planning/aggregates.rs b/nemo/src/execution/planning/aggregates.rs new file mode 100644 index 000000000..ff06dc9b1 --- /dev/null +++ b/nemo/src/execution/planning/aggregates.rs @@ -0,0 +1,211 @@ +use std::collections::HashSet; + +use nemo_physical::{ + management::execution_plan::ExecutionNodeRef, + tabular::operations::{ + triescan_aggregate::AggregationInstructions, triescan_project::ProjectReordering, + }, +}; + +use crate::{ + model::{chase_model::ChaseAggregate, Variable}, + program_analysis::variable_order::VariableOrder, + table_manager::SubtableExecutionPlan, +}; + +pub(super) fn generate_node_aggregate( + current_plan: &mut SubtableExecutionPlan, + mut variable_order: VariableOrder, + mut node: ExecutionNodeRef, + aggregates: &Vec, + aggregate_group_by_variables: &HashSet, +) -> (ExecutionNodeRef, VariableOrder) { + assert!( + aggregates.len() <= 1, + "currently only one aggregate term per rule is supported" + ); + for aggregate in aggregates { + if reorder_required(&variable_order, aggregate_group_by_variables, aggregate) { + // Perform reordering of input trie scan and materialize into temporary trie + + // Build variable order + // Start with group-by variables, followed by the aggregate input variables + // Try to preserve as much of the relative ordering as possible, to reduce the amount of swap that need to be done during the projecting/reordering + let input_variable_set = HashSet::from_iter(aggregate.input_variables.iter().cloned()); + let mut variable_order_after_reordering = + variable_order.restrict_to(aggregate_group_by_variables); + for input_variable in variable_order + .restrict_to(&input_variable_set) + .as_ordered_list() + { + variable_order_after_reordering.push(input_variable); + } + + let reordering_column_indices: Vec<_> = variable_order_after_reordering + .as_ordered_list() + .iter() + .map(|variable| *variable_order.get(variable).unwrap()) + .collect(); + + let reordering = + ProjectReordering::from_vector(reordering_column_indices, variable_order.len()); + + node = current_plan.plan_mut().project(node, reordering); + + current_plan.add_temporary_table(node.clone(), "Subtable Aggregate Reorder"); + + // Update variable order to reordering + variable_order = variable_order_after_reordering; + } + + let aggregated_column_index = *variable_order + .get(aggregate.aggregated_input_variable()) + .expect("variable that is aggregated has to be in the variable order"); + + // Determine `last_distinct_column_index` + let mut last_distinct_column_index = 0; + for (index, variable) in variable_order.iter().enumerate() { + if aggregate.input_variables.contains(variable) { + last_distinct_column_index = index; + } + } + + node = current_plan.plan_mut().aggregate( + node, + AggregationInstructions { + aggregate_operation: aggregate.aggregate_operation, + group_by_column_count: aggregate_group_by_variables.len(), + aggregated_column_index, + last_distinct_column_index, + }, + ); + + // Update variable order after aggregation + // Remove distinct and unused variables of the aggregate + variable_order = variable_order.restrict_to(aggregate_group_by_variables); + // Add aggregate output variable + variable_order.push(aggregate.output_variable.clone()); + } + + (node, variable_order) +} + +// Checks whether the variable order has to be changed in order to apply the aggregate using the [`TrieScanAggregate`] +fn reorder_required( + variable_order: &VariableOrder, + aggregate_group_by_variables: &HashSet, + aggregate: &ChaseAggregate, +) -> bool { + let idempotent = aggregate.aggregate_operation.idempotent::(); + let group_by_variable_count = aggregate_group_by_variables.len(); + + // Whether a peripheral column has been seen (see [`TrieScanAggregate`]) + let mut after_peripheral_variable = false; + + for (index, variable) in variable_order.iter().enumerate() { + if index < group_by_variable_count { + // Check that the group-by variables are exactly the first variables in the variable order + // See [`AggregationInstructions`] and [`TrieScanAggregate`] documentation + // Otherwise the columns would need to be reordered + if !aggregate_group_by_variables.contains(variable) { + return true; + } + } else if !idempotent { + // Check that distinct/aggregated variables are exactly the variables after the group-by variables + // An exception are idempotent processors, because it allows the promotion of peripheral to distinct variables + if aggregate.input_variables.contains(variable) { + if after_peripheral_variable { + return true; + } + } else { + after_peripheral_variable = true; + } + } + } + + false +} + +#[cfg(test)] +mod test { + use std::collections::HashSet; + + use nemo_physical::aggregates::operation::AggregateOperation; + + use crate::{ + execution::planning::aggregates::reorder_required, + model::{chase_model::ChaseAggregate, Identifier, LogicalAggregateOperation, Variable}, + program_analysis::variable_order::VariableOrder, + }; + + #[test] + fn required_reordering() { + let aggregated_input_variable = Variable::Universal(Identifier("aggregated".to_string())); + + let mut variable_order = VariableOrder::new(); + variable_order.push(Variable::Universal(Identifier("group_by_1".to_string()))); + variable_order.push(Variable::Universal(Identifier("group_by_2".to_string()))); + variable_order.push(Variable::Universal(Identifier("other_1".to_string()))); + variable_order.push(aggregated_input_variable.clone()); + variable_order.push(Variable::Universal(Identifier("other_2".to_string()))); + + let idempotent_aggregate = ChaseAggregate { + aggregate_operation: AggregateOperation::Min, + input_variables: vec![aggregated_input_variable.clone()], + output_variable: Variable::Universal(Identifier("output_1".to_string())), + logical_aggregate_operation: LogicalAggregateOperation::MinNumber, + }; + + assert!(!reorder_required( + &variable_order, + &HashSet::new(), + &idempotent_aggregate + ),); + + let non_idempotent_aggregate = ChaseAggregate { + aggregate_operation: AggregateOperation::Count, + input_variables: vec![aggregated_input_variable.clone()], + output_variable: Variable::Universal(Identifier("output_1".to_string())), + logical_aggregate_operation: LogicalAggregateOperation::CountValues, + }; + + assert!(reorder_required( + &variable_order, + &HashSet::new(), + &non_idempotent_aggregate + )); + + let mut group_by_variables = HashSet::from_iter(vec![ + Variable::Universal(Identifier("group_by_1".to_string())), + Variable::Universal(Identifier("group_by_2".to_string())), + ]); + + assert!(!reorder_required( + &variable_order, + &group_by_variables, + &idempotent_aggregate + )); + + assert!(reorder_required( + &variable_order, + &group_by_variables, + &non_idempotent_aggregate + )); + + group_by_variables.insert(Variable::Universal(Identifier("other_1".to_string()))); + + assert!(!reorder_required( + &variable_order, + &group_by_variables, + &non_idempotent_aggregate + )); + + assert!(reorder_required( + &variable_order, + &HashSet::from_iter( + vec![Variable::Universal(Identifier("other_1".to_string())),].into_iter(), + ), + &idempotent_aggregate + )); + } +} diff --git a/nemo/src/execution/planning/plan_body_seminaive.rs b/nemo/src/execution/planning/plan_body_seminaive.rs index 78776176e..57f9bf43f 100644 --- a/nemo/src/execution/planning/plan_body_seminaive.rs +++ b/nemo/src/execution/planning/plan_body_seminaive.rs @@ -2,10 +2,7 @@ use std::collections::HashSet; -use nemo_physical::{ - management::execution_plan::ExecutionNodeRef, - tabular::operations::triescan_aggregate::AggregationInstructions, -}; +use nemo_physical::management::execution_plan::ExecutionNodeRef; use crate::{ execution::execution_engine::RuleInfo, @@ -18,8 +15,8 @@ use crate::{ }; use super::{ - arithmetic::generate_node_arithmetic, negation::NegationGenerator, plan_util::cut_last_layers, - BodyStrategy, SeminaiveJoinGenerator, + aggregates::generate_node_aggregate, arithmetic::generate_node_arithmetic, + negation::NegationGenerator, plan_util::cut_last_layers, BodyStrategy, SeminaiveJoinGenerator, }; /// Implementation of the semi-naive existential rule evaluation strategy. @@ -68,7 +65,7 @@ impl SeminaiveStrategy { None } else { // Compute group-by variables for all aggregates in the rule - // This is the set off all universal variables in the head except for the aggregated variables + // This is the set of all universal variables in the head except for the aggregated variables Some(analysis.head_variables.iter().filter(|variable| match variable { Variable::Universal(identifier) => !identifier.0.starts_with(AGGREGATE_VARIABLE_PREFIX), Variable::Existential(_) => panic!("existential head variables are currently not supported together with aggregates"), @@ -131,62 +128,16 @@ impl BodyStrategy for SeminaiveStrategy { ) } - // Perform aggregate operations - assert!( - self.aggregates.len() <= 1, - "currently only one aggregate term per rule is supported" - ); - for aggregate in &self.aggregates { - let aggregate_group_by_variables = self.aggregate_group_by_variables.as_ref().unwrap(); - - let aggregated_column_index = *variable_order - .get( - &aggregate - .variables - .first() - .expect("min aggregate requires exactly one variable") - .clone(), - ) - .expect("variable that is aggregated has to be in the variable order"); - - // Check that the group-by variables are exactly the first variables in the variable order - // See [`AggregationInstructions`] and [`TrieScanAggregate`] documentation - // Otherwise the columns would need to be reordered, which is not supported yet - for group_by_variable in aggregate_group_by_variables { - let index = variable_order.get(group_by_variable).expect("aggregate group-by variable is not in variable order, even though it should be a head variable and thus also safe and in the variable order"); - if index >= &aggregate_group_by_variables.len() { - panic!("aggregate group by variable {group_by_variable} is at an invalid position in the variable order to allow for aggregation without projection/reorder (index is {index}, but should be smaller than {}).", aggregate_group_by_variables.len()); - } - } - - let processor = aggregate.aggregate_operation.create_processor::(); - if !processor.idempotent() { - // Check that the aggregate variable is directly behind the group-by variables (because there are no distinct variables) - // This is only required when the operation is not idempotent, - // because otherwise the the result would not change by intermediate columns in the variable order - if aggregated_column_index != aggregate_group_by_variables.len() { - panic!("aggregated variable {} is at an invalid position in the variable order to allow for aggregation without projection/reorder (index is {aggregated_column_index}, but should equal {}).", aggregate - .variables.first().unwrap(), aggregate_group_by_variables.len()); - } - } - - node_seminaive = current_plan.plan_mut().aggregate( + if let Some(aggregate_group_by_variables) = &self.aggregate_group_by_variables { + // Perform aggregate operations + // This updates the variable order with the aggregate placeholder variables replacing the aggregate input variables + (node_seminaive, *variable_order) = generate_node_aggregate( + current_plan, + variable_order.clone(), node_seminaive, - AggregationInstructions { - aggregate_operation: aggregate.aggregate_operation, - group_by_column_count: aggregate_group_by_variables.len(), - aggregated_column_index, - last_distinct_column_index: aggregated_column_index, - }, + &self.aggregates, + aggregate_group_by_variables, ); - - // Update variable order - { - // Remove distinct and unused variables of the aggregate - *variable_order = variable_order.restrict_to(aggregate_group_by_variables); - // Add aggregate output variable - variable_order.push(aggregate.output_variable.clone()); - } } // Cut away layers not used after arithmetic operations diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 01195aad3..3174e394a 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -835,21 +835,11 @@ impl<'a> RuleParser<'a> { if let Some(logical_aggregate_operation) = (&aggregate_operation_identifier).into() { - let len_variables = variables.len(); - let aggregate = Aggregate { logical_aggregate_operation, terms: variables.into_iter().map(PrimitiveTerm::Variable).collect(), }; - // Check that there is exactly one variable used in the aggregate - // This may change when distinct variables are implemented - if len_variables != 1 { - return Err(Err::Failure( - ParseError::InvalidVariableCountInAggregate(aggregate).at(input), - )); - } - Ok((remainder, Term::Aggregation(aggregate))) } else { Err(Err::Failure( diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index dc3ec550e..bd4b569c0 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -313,9 +313,6 @@ pub enum ParseError { /// An aggregate may not be used within a complex term. #[error(r#"A term ("{0}") may not contain an aggregate as a subterm."#)] AggregateSubterm(String), - /// An aggregate term uses an invalid amount of variables. - #[error(r#"An aggregate term ("{0}") uses an invalid amount of variables. Currently only exactly one variable is allowed inside aggregates"#)] - InvalidVariableCountInAggregate(Aggregate), /// Unknown aggregate operation #[error(r#"Aggregate operation "{0}" is not known"#)] UnknownAggregateOperation(String), diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index dd0d2fb98..66cab9370 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -1,4 +1,4 @@ -//! An experimental project for rule reasoning. +//! A fast in-memory rule engine #![deny( missing_debug_implementations, diff --git a/nemo/src/model/chase_model/aggregate.rs b/nemo/src/model/chase_model/aggregate.rs index e05730cd7..1d62e9a2b 100644 --- a/nemo/src/model/chase_model/aggregate.rs +++ b/nemo/src/model/chase_model/aggregate.rs @@ -3,12 +3,18 @@ use nemo_physical::aggregates::operation::AggregateOperation; use crate::model::{Aggregate, LogicalAggregateOperation, PrimitiveTerm, Variable}; /// Specifies how the values for a placeholder aggregate variable will get computed. +/// +/// Terminology: +/// * `input_variables` are the distinct variables and the aggregated input variable, not including the group-by variables +/// * `output_variable` is the single aggregated output variable +/// +/// See [`nemo_physical::tabular::operations::TrieScanAggregate`] #[derive(Debug, Clone)] pub struct ChaseAggregate { pub(crate) aggregate_operation: AggregateOperation, pub(crate) logical_aggregate_operation: LogicalAggregateOperation, - pub(crate) variables: Vec, + pub(crate) input_variables: Vec, pub(crate) output_variable: Variable, } @@ -39,8 +45,15 @@ impl ChaseAggregate { Self { aggregate_operation: physical_operation, logical_aggregate_operation, - variables, + input_variables: variables, output_variable, } } + + /// Return the aggregated input variable, which is the first of the input variables + pub fn aggregated_input_variable(&self) -> &Variable { + self.input_variables + .first() + .expect("aggregates require exactly at least one input variable") + } } diff --git a/nemo/src/program_analysis/type_inference.rs b/nemo/src/program_analysis/type_inference.rs index 59bc7c7cc..0138f98b8 100644 --- a/nemo/src/program_analysis/type_inference.rs +++ b/nemo/src/program_analysis/type_inference.rs @@ -209,13 +209,13 @@ fn check_aggregate_types( ) -> Result<(), TypeError> { for (rule, var_types) in program.rules().iter().zip(rule_var_types) { for aggregate in rule.aggregates() { - let variable_type = var_types.get(&aggregate.variables[0]).expect( + let variable_type = var_types.get(&aggregate.input_variables[0]).expect( "Previous analysis should have assigned a type to each aggregate output variable.", ); aggregate .logical_aggregate_operation - .check_input_type(&aggregate.variables[0].name(), *variable_type)?; + .check_input_type(&aggregate.input_variables[0].name(), *variable_type)?; } } diff --git a/nemo/src/program_analysis/type_inference/position_graph.rs b/nemo/src/program_analysis/type_inference/position_graph.rs index e1dc288ab..172ba2af7 100644 --- a/nemo/src/program_analysis/type_inference/position_graph.rs +++ b/nemo/src/program_analysis/type_inference/position_graph.rs @@ -70,7 +70,7 @@ impl PositionGraph { let mut aggregate_input_to_output_variables = HashMap::>::new(); for aggregate in rule.aggregates() { - for input_variable_identifier in &aggregate.variables { + for input_variable_identifier in &aggregate.input_variables { let edge_label = if aggregate.aggregate_operation.static_output_type().is_some() { PositionGraphEdge::BodyToHeadAggregateStaticOutputType diff --git a/resources/testcases/aggregate/distinct.rls b/resources/testcases/aggregate/distinct.rls new file mode 100644 index 000000000..7e4564204 --- /dev/null +++ b/resources/testcases/aggregate/distinct.rls @@ -0,0 +1,7 @@ +@source sourceA[integer,integer,integer]: load-csv("sources/dataA.csv"). +@source sourceB[integer,integer,integer,integer]: load-csv("sources/dataB.csv"). + +r0(#count(?X, ?Y)) :- sourceA(?X, ?Y, ?Z). +r1(#count(?X, ?Y, ?Z)) :- sourceA(?X, ?Y, ?Z). +r2(?X, #count(?Y, ?Z)) :- sourceA(?X, ?Y, ?Z). +r3(?X1, #sum(?X2, ?X4)) :- sourceB(?X1, ?X2, ?X3, ?X4). diff --git a/resources/testcases/aggregate/distinct/r0.csv b/resources/testcases/aggregate/distinct/r0.csv new file mode 100644 index 000000000..7f8f011eb --- /dev/null +++ b/resources/testcases/aggregate/distinct/r0.csv @@ -0,0 +1 @@ +7 diff --git a/resources/testcases/aggregate/distinct/r1.csv b/resources/testcases/aggregate/distinct/r1.csv new file mode 100644 index 000000000..45a4fb75d --- /dev/null +++ b/resources/testcases/aggregate/distinct/r1.csv @@ -0,0 +1 @@ +8 diff --git a/resources/testcases/aggregate/distinct/r2.csv b/resources/testcases/aggregate/distinct/r2.csv new file mode 100644 index 000000000..ebe2429b8 --- /dev/null +++ b/resources/testcases/aggregate/distinct/r2.csv @@ -0,0 +1,5 @@ +-30,1 +1,4 +2,1 +3,1 +42,1 diff --git a/resources/testcases/aggregate/distinct/r3.csv b/resources/testcases/aggregate/distinct/r3.csv new file mode 100644 index 000000000..140439734 --- /dev/null +++ b/resources/testcases/aggregate/distinct/r3.csv @@ -0,0 +1,2 @@ +1,8 +2,1 diff --git a/resources/testcases/aggregate/sources/dataB.csv b/resources/testcases/aggregate/sources/dataB.csv new file mode 100644 index 000000000..d6cb1ba5d --- /dev/null +++ b/resources/testcases/aggregate/sources/dataB.csv @@ -0,0 +1,7 @@ +1,1,1,1 +1,2,3,4 +1,2,4,4 +1,2,5,4 +1,2,6,3 +1,3,4,4 +2,1,1,1