Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for bloom filters #11

Draft
wants to merge 1 commit into
base: feat/degen-etl-improvements
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions ethereumetl/cli/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ def stream(last_synced_block_file, lag, output, start_block, end_block, entity_t
if os.environ['KAFKA_BROKER_URI'] == None:
raise ValueError('KAFKA_BROKER_URI env is missing')

if os.environ['SYNC_MODE'] == None or os.environ['SYNC_MODE'] not in constants.VALID_SYNC_MODES:
raise ValueError('SYNC_MODE env is missing or incorrect')

if os.environ['METRICS_PORT'] == None:
raise ValueError('METRICS_PORT env is missing')

Expand All @@ -90,6 +93,12 @@ def stream(last_synced_block_file, lag, output, start_block, end_block, entity_t
if os.environ['REDIS_LIVE_MESSAGE_TTL'] == None:
raise ValueError('REDIS_LIVE_MESSAGE_TTL env is missing')

if os.environ['REDIS_BF_SIZE'] == None:
raise ValueError('REDIS_BF_SIZE env is missing')

if os.environ['REDIS_BF_ERROR_RATE'] == None:
raise ValueError('REDIS_BF_ERROR_RATE env is missing')

if mode == constants.RUN_MODE_CORRECTION:
blocks_to_reprocess = [int(block) for block in blocks_to_reprocess.split(',')]
logging.info('blocks_to_reprocess: {} with length: {}'.format(blocks_to_reprocess, len(blocks_to_reprocess)))
Expand Down
23 changes: 17 additions & 6 deletions ethereumetl/redis/redis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import redis
import hashlib
from redisbloom.client import Client
from ethereumetl.constants import constants

class RedisConnector:
Expand All @@ -12,6 +13,7 @@ def __init__(self):
redis_database = os.environ['REDIS_DB']

self.redis_client = redis.StrictRedis(host=redis_host, port=redis_port, db=redis_database)
self.redis_bf = Client(host=redis_host, port=redis_port, db=redis_database)

# utility functions to be used in "live" sync_mode
def exists_in_set(self, key, value):
Expand All @@ -24,20 +26,29 @@ def add_to_set(self, key, value):

# utility functions to be used in "backfill" sync_mode
def exists_in_bf(self, key, value):
# TODO: add logic
pass
key = self.create_key(key, value, constants.REDIS_BACKFILL_MODE_PREFIX)

if not self.redis_client.exists(key):
self.create_bf(key)
return False # as BF was not present

return self.redis_bf.bfExists(key, value)

def add_to_bf(self, key, value):
# TODO: add logic
pass
key = self.create_key(key, value, constants.REDIS_BACKFILL_MODE_PREFIX)

if not self.redis_client.exists(key):
self.create_bf(key)

return self.redis_bf.bfAdd(key, value)

def create_bf(self, key):
# TODO: add logic
pass
self.redis_bf.bfCreate(key, os.environ['REDIS_BF_ERROR_RATE'], os.environ['REDIS_BF_SIZE'])

def create_key(self, key, value, mode):
hashed_data = hashlib.sha1(f"{key}_{value}".encode()).hexdigest()
return f"{mode}_{hashed_data}"

def close(self):
self.redis_client.close()
self.redis_bf.close()