Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add CIDR functions, tests, and more documentation for a clean PR #5

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
build/
duckdb_unittest_tempdir/
.zed/
.vscode/
.ccache/
1 change: 1 addition & 0 deletions src/include/inet_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ struct INetFunctions {
Vector &result);
static void ContainsRight(DataChunk &args, ExpressionState &state,
Vector &result);
static void ExpandCIDR(DataChunk &args, ExpressionState &state, Vector &result);
};

} // namespace duckdb
1 change: 1 addition & 0 deletions src/include/ipaddress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class IPAddress {
static bool TryParse(string_t input, IPAddress &result,
CastParameters &parameters);
static IPAddress FromString(string_t input);
bool IsCIDR();

string ToString() const;
IPAddress Netmask() const;
Expand Down
4 changes: 4 additions & 0 deletions src/inet_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ void InetExtension::Load(DuckDB &db) {
ScalarFunction(">>=", {inet_type, inet_type},
LogicalType::BOOLEAN,
INetFunctions::ContainsRight));
ExtensionUtil::RegisterFunction(
*db.instance,
ScalarFunction("expand_cidr", {inet_type}, LogicalType::LIST(inet_type),
INetFunctions::ExpandCIDR));
}

std::string InetExtension::Name() { return "inet"; }
Expand Down
94 changes: 94 additions & 0 deletions src/inet_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,4 +261,98 @@ void INetFunctions::ContainsRight(DataChunk &args, ExpressionState &state,
});
}

// Expands an IP address in CIDR notation into a list of all individual IP addresses in that range.
//
// For example:
// - "192.168.1.0/24" would expand to all IP addresses from 192.168.1.0 to 192.168.1.255
// - A single IP "192.168.1.1" (non-CIDR) would return just that single IP
//
// @param args DataChunk containing the input INET address(es)
// @param state Current expression state
// @param result Vector to store the resulting list of expanded IP addresses
//
// The function:
// 1. Handles both IPv4 and IPv6 addresses
// 2. For CIDR notation:
// - IPv4: Generates 2^(32-mask) addresses
// - IPv6: Generates 2^(128-mask) addresses
// 3. For non-CIDR addresses: Returns a single-element list with the original IP
// 4. Returns NULL for invalid inputs
//
// Each resulting IP address is returned as a struct containing:
// - ip_type: Type of IP address (IPv4 or IPv6)
// - address: The actual IP address value
// - mask: Full host mask (32 for IPv4, 128 for IPv6)
void INetFunctions::ExpandCIDR(DataChunk & args, ExpressionState & state, Vector & result) {
auto & ipaddress_vector = args.data[0];
UnifiedVectorFormat ipaddress_data;
ipaddress_vector.ToUnifiedFormat(args.size(), ipaddress_data);

auto & entries = StructVector::GetEntries(ipaddress_vector);
auto ip_type_data = FlatVector::GetData < uint8_t > ( * entries[0]);
auto address_data = FlatVector::GetData < hugeint_t > ( * entries[1]);
auto mask_data = FlatVector::GetData < uint16_t > ( * entries[2]);

for (idx_t i = 0; i < args.size(); i++) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you format this using the DuckDB clang-format?

idx_t address_idx = ipaddress_data.sel -> get_index(i);

if (!ipaddress_data.validity.RowIsValid(address_idx)) {
result.SetValue(i, Value());
continue;
}

vector < Value > ip_list;
auto inet_type = LogicalType::STRUCT({
make_pair("ip_type", LogicalType::UTINYINT),
make_pair("address", LogicalType::HUGEINT),
make_pair("mask", LogicalType::USMALLINT)
});

auto addr_type = IPAddressType(ip_type_data[address_idx]);
auto addr = FromCompatAddr(address_data[address_idx], addr_type);
auto mask = mask_data[address_idx];
IPAddress inet(addr_type, addr, mask);

if (inet.IsCIDR()) {
// Calculate first and last address in CIDR range
IPAddress network = inet.Network();
IPAddress broadcast = inet.Broadcast();
hugeint_t hosts;
if (addr_type == IPAddressType::IP_ADDRESS_V4) {
// For IPv4: 2^(32-mask) addresses
if (mask > 32) continue; // Invalid mask
hosts = hugeint_t(1) << (32 - mask);
} else {
// For IPv6: 2^(128-mask) addresses
if (mask > 128) continue; // Invalid mask
// TODO if (mask < 64) limit expansion for very large networks
hosts = hugeint_t(1) << (128 - mask);
}

// Add all addresses in the range
auto current = network;
hugeint_t one(1);
for (hugeint_t j = 0; j < hosts && current.address <= broadcast.address; j = j + one) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add some safeguard to limit this to a certain max amount? e.g. what happens if we run expand_cidr(inet '::/0')?

ip_list.emplace_back(Value::STRUCT(inet_type, {
Value::UTINYINT(uint8_t(current.type)),
Value::HUGEINT(ToCompatAddr(current.address, current.type)),
Value::USMALLINT(current.type == IPAddressType::IP_ADDRESS_V4 ? 32 : 128) // Full host mask
}));

// Increment address within network bounds
current.address = current.address + one;
}
} else {
// Just add the single IP address
ip_list.emplace_back(Value::STRUCT(inet_type, {
Value::UTINYINT(ip_type_data[address_idx]),
Value::HUGEINT(address_data[address_idx]),
Value::USMALLINT(addr_type == IPAddressType::IP_ADDRESS_V4 ? 32 : 128) // Full host mask
}));
}

result.SetValue(i, Value::LIST(inet_type, std::move(ip_list)));
}
}

} // namespace duckdb
5 changes: 5 additions & 0 deletions src/ipaddress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,11 @@ static string ToStringIPv6(const IPAddress &addr) {
return result.str();
}

bool IPAddress::IsCIDR() {
return (type == IPAddressType::IP_ADDRESS_V4 && mask < IPV4_DEFAULT_MASK) ||
(type == IPAddressType::IP_ADDRESS_V6 && mask < IPV6_DEFAULT_MASK);
}

string IPAddress::ToString() const {
if (type == IPAddressType::IP_ADDRESS_V4) {
return ToStringIPv4(this->address, this->mask);
Expand Down
32 changes: 32 additions & 0 deletions test/sql/test_inet_cidr_expansion.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# name: test/sql/inet/test_inet_cidr_expansion.test
# description: Test inet cidr expansion function
# group: [inet]

require inet

statement ok
PRAGMA enable_verification

# basic expansion
query I
SELECT expand_cidr(inet '192.168.1.0/30');
----
[192.168.1.0, 192.168.1.1, 192.168.1.2, 192.168.1.3]

# bare ip
query I
SELECT expand_cidr(inet '192.168.1.0');
----
[192.168.1.0]

# ipv6
query I
SELECT expand_cidr(inet '2001:db8::/126');
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a test with NULL values?

----
[2001:db8::, 2001:db8::1, 2001:db8::2, 2001:db8::3]

# bare ipv6
query I
SELECT expand_cidr('2603:3005:1507:5900:9c2b:2430:c08e:addf'::INET);
----
[2603:3005:1507:5900:9c2b:2430:c08e:addf]