From 5621377a5afe8f72da180a765cc1c9fc0b1c0738 Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Thu, 30 Aug 2018 02:40:31 -0400 Subject: [PATCH 01/16] Initial work on kafka shim --- blueox/__init__.py | 10 ++- blueox/ports.py | 10 +++ blueox/recorders/__init__.py | 0 blueox/recorders/kafka_recorder.py | 101 +++++++++++++++++++++++++++++ requirements.txt | 1 + 5 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 blueox/recorders/__init__.py create mode 100644 blueox/recorders/kafka_recorder.py diff --git a/blueox/__init__.py b/blueox/__init__.py index 8cc8cfc..e9b1a62 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -30,20 +30,26 @@ from .errors import Error from .logger import LogHandler from .timer import timeit +from .recorders import kafka_recorder log = logging.getLogger(__name__) +OVERRIDE_KAFKA_RECORDER = os.getenv('BLUEOX_OVERRIDE_KAFKA_RECORDER', 0) + def configure(host, port, recorder=None): """Initialize blueox - This instructs the blueox system where to send it's logging data. If blueox is not configured, log data will + This instructs the blueox system where to send its logging data. If blueox is not configured, log data will be silently dropped. Currently we support logging through the network (and the configured host and port) to a blueoxd instances, or to the specified recorder function """ - if recorder: + if int(OVERRIDE_KAFKA_RECORDER) == 1: + log.info("Kafka override set, using kafka recorder") + _context_mod._recorder_function = kafka_recorder.send + elif recorder: _context_mod._recorder_function = recorder elif host and port: network.init(host, port) diff --git a/blueox/ports.py b/blueox/ports.py index 5b1ca2f..909b57b 100644 --- a/blueox/ports.py +++ b/blueox/ports.py @@ -41,3 +41,13 @@ def default_control_host(host=None): def default_collect_host(host=None): default_host = os.environ.get(ENV_VAR_COLLECT_HOST, DEFAULT_HOST) return _default_host(host, default_host, DEFAULT_COLLECT_PORT) + + +# For consistency, we'll abstract kafka connections in the same way +ENV_VAR_KAFKA_HOST = 'BLUEOX_KAFKA_HOST' +DEFAULT_KAFKA_PORT = 9002 + + +def default_kafka_host(host=None): + default_host = os.environ.get(ENV_VAR_KAFKA_HOST, DEFAULT_HOST) + return _default_host(host, default_host, DEFAULT_KAFKA_PORT) diff --git a/blueox/recorders/__init__.py b/blueox/recorders/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/blueox/recorders/kafka_recorder.py b/blueox/recorders/kafka_recorder.py new file mode 100644 index 0000000..18bacbc --- /dev/null +++ b/blueox/recorders/kafka_recorder.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +""" +blueox.kafka +~~~~~~~~ + +This module provides the interface into Kafka + +:copyright: (c) 2018 by Aaron Biller?? +:license: ISC, see LICENSE for more details. + +""" +import atexit +import logging +import msgpack + +from kafka import KafkaProducer + +from .. import ports +from .. import utils + +log = logging.getLogger(__name__) + +# If we have pending outgoing messages, this is how long we'll wait after +# being told to exit. +LINGER_SHUTDOWN_MSECS = 2000 + +# Producer can be shared between threads +_kafka_producer = None + + +def init(host=None): + """Initialize the global kafka producer + + Supports a host arg with an overriding kafka host string + in the format 'hostname:port' + """ + global _kafka_producer + + host = ports.default_kafka_host(host) + + _kafka_producer = KafkaProducer(bootstrap_servers=host) + + +def _serialize_context(context): + context_dict = context.to_dict() + for key in ('host', 'type'): + if len(context_dict.get(key, "")) > 64: + raise ValueError("Value too long: %r" % key) + + context_dict = { + k: v.encode('utf-8') if isinstance(v, unicode) + else v for k, v in context_dict.items() + } + + try: + context_data = msgpack.packb(context_dict) + except TypeError: + try: + # If we fail to serialize our context, we can try again with an + # enhanced packer (it's slower though) + context_data = msgpack.packb(context_dict, + default=utils.msgpack_encode_default) + except TypeError: + log.exception("Serialization failure (not fatal, dropping data)") + + # One last try after dropping the body + context_dict['body'] = None + context_data = msgpack.packb(context_dict) + + return context_data + + +def send(context): + global _kafka_producer + + try: + context_data = _serialize_context(context) + except Exception: + log.exception("Failed to serialize context") + return + + if _kafka_producer: + try: + log.debug("Sending msg") + _kafka_producer.send('events', context_data) + except Exception: + log.exception("Failed during publish to kafka.") + else: + log.info("Skipping sending event %s", context.name) + + +def close(): + global _kafka_producer + + if _kafka_producer: + _kafka_producer.flush() + _kafka_producer.close(timeout=LINGER_SHUTDOWN_MSECS) + _kafka_producer = None + + +atexit.register(close) diff --git a/requirements.txt b/requirements.txt index dfdd0e7..61a4d6d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ pyflakes tornado==3.2 boto yapf +kafka-python From c2550453bb664f988398622b71d8be248b8474d8 Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Thu, 30 Aug 2018 15:24:32 -0400 Subject: [PATCH 02/16] Add kafka recorder tests --- blueox/__init__.py | 12 +- blueox/ports.py | 2 +- .../recorders/{kafka_recorder.py => kafka.py} | 43 ++++--- tests/ports_test.py | 28 +++- tests/recorders/__init__.py | 0 tests/recorders/kafka_test.py | 120 ++++++++++++++++++ 6 files changed, 177 insertions(+), 28 deletions(-) rename blueox/recorders/{kafka_recorder.py => kafka.py} (72%) create mode 100644 tests/recorders/__init__.py create mode 100644 tests/recorders/kafka_test.py diff --git a/blueox/__init__.py b/blueox/__init__.py index e9b1a62..22bb854 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -30,12 +30,10 @@ from .errors import Error from .logger import LogHandler from .timer import timeit -from .recorders import kafka_recorder +from .recorders import kafka log = logging.getLogger(__name__) -OVERRIDE_KAFKA_RECORDER = os.getenv('BLUEOX_OVERRIDE_KAFKA_RECORDER', 0) - def configure(host, port, recorder=None): """Initialize blueox @@ -46,9 +44,13 @@ def configure(host, port, recorder=None): Currently we support logging through the network (and the configured host and port) to a blueoxd instances, or to the specified recorder function """ - if int(OVERRIDE_KAFKA_RECORDER) == 1: + override_kafka_recorder = os.getenv('BLUEOX_OVERRIDE_KAFKA_RECORDER', 0) + + if int(override_kafka_recorder) == 1: log.info("Kafka override set, using kafka recorder") - _context_mod._recorder_function = kafka_recorder.send + host = ports.default_kafka_host() + kafka.init(host) + _context_mod._recorder_function = kafka.send elif recorder: _context_mod._recorder_function = recorder elif host and port: diff --git a/blueox/ports.py b/blueox/ports.py index 909b57b..4cdc65e 100644 --- a/blueox/ports.py +++ b/blueox/ports.py @@ -45,7 +45,7 @@ def default_collect_host(host=None): # For consistency, we'll abstract kafka connections in the same way ENV_VAR_KAFKA_HOST = 'BLUEOX_KAFKA_HOST' -DEFAULT_KAFKA_PORT = 9002 +DEFAULT_KAFKA_PORT = 9092 def default_kafka_host(host=None): diff --git a/blueox/recorders/kafka_recorder.py b/blueox/recorders/kafka.py similarity index 72% rename from blueox/recorders/kafka_recorder.py rename to blueox/recorders/kafka.py index 18bacbc..386a7e0 100644 --- a/blueox/recorders/kafka_recorder.py +++ b/blueox/recorders/kafka.py @@ -9,14 +9,16 @@ :license: ISC, see LICENSE for more details. """ +from __future__ import absolute_import + import atexit import logging import msgpack +import threading from kafka import KafkaProducer -from .. import ports -from .. import utils +from blueox import utils log = logging.getLogger(__name__) @@ -24,21 +26,22 @@ # being told to exit. LINGER_SHUTDOWN_MSECS = 2000 -# Producer can be shared between threads -_kafka_producer = None +threadLocal = threading.local() + +# Context can be shared between threads +_kafka_hosts = None -def init(host=None): - """Initialize the global kafka producer - Supports a host arg with an overriding kafka host string - in the format 'hostname:port' - """ - global _kafka_producer +def init(host): + global _kafka_hosts - host = ports.default_kafka_host(host) + _kafka_hosts = host - _kafka_producer = KafkaProducer(bootstrap_servers=host) + +def _thread_connect(): + if _kafka_hosts and not getattr(threadLocal, 'kp', None): + threadLocal.kp = KafkaProducer(bootstrap_servers=_kafka_hosts) def _serialize_context(context): @@ -71,7 +74,7 @@ def _serialize_context(context): def send(context): - global _kafka_producer + _thread_connect() try: context_data = _serialize_context(context) @@ -79,10 +82,10 @@ def send(context): log.exception("Failed to serialize context") return - if _kafka_producer: + if _kafka_hosts and threadLocal.kp is not None: try: log.debug("Sending msg") - _kafka_producer.send('events', context_data) + threadLocal.kp.send('events', context_data) except Exception: log.exception("Failed during publish to kafka.") else: @@ -90,12 +93,10 @@ def send(context): def close(): - global _kafka_producer - - if _kafka_producer: - _kafka_producer.flush() - _kafka_producer.close(timeout=LINGER_SHUTDOWN_MSECS) - _kafka_producer = None + if getattr(threadLocal, 'kp', None): + threadLocal.kp.flush() + threadLocal.kp.close(timeout=LINGER_SHUTDOWN_MSECS) + threadLocal.kp = None atexit.register(close) diff --git a/tests/ports_test.py b/tests/ports_test.py index c7d278c..5b1e711 100644 --- a/tests/ports_test.py +++ b/tests/ports_test.py @@ -1,5 +1,8 @@ import os -from testify import * +from testify import ( + TestCase, + assert_equal, + teardown) from blueox import ports @@ -71,3 +74,26 @@ def test_env_port(self): os.environ['BLUEOX_HOST'] = 'master:123' host = ports.default_collect_host() assert_equal(host, "master:123") + + +class DefaultKafkaHost(TestCase): + @teardown + def clear_env(self): + try: + del os.environ['BLUEOX_KAFKA_HOST'] + except KeyError: + pass + + def test_emtpy(self): + host = ports.default_kafka_host() + assert_equal(host, '127.0.0.1:9092') + + def test_env(self): + os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s' + host = ports.default_kafka_host() + assert_equal(host, 'local.svc.team-me.aws.jk8s:9092') + + def test_env_port(self): + os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s:9002' + host = ports.default_kafka_host() + assert_equal(host, 'local.svc.team-me.aws.jk8s:9002') diff --git a/tests/recorders/__init__.py b/tests/recorders/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/recorders/kafka_test.py b/tests/recorders/kafka_test.py new file mode 100644 index 0000000..be62636 --- /dev/null +++ b/tests/recorders/kafka_test.py @@ -0,0 +1,120 @@ +import os +import random +import decimal +import datetime + +import msgpack +from testify import ( + TestCase, + setup, + teardown, + assert_equal) + +from blueox import default_configure +from blueox import utils +from blueox import network +from blueox import context +from blueox.recorders import kafka + + +class MockKafkaProducer(object): + last_topic = None + last_data = None + close_timeout = None + + def __call__(self, bootstrap_servers=None): + self.bootstrap_servers = bootstrap_servers + return self + + def send(self, topic, data): + self.last_topic = topic + self.last_data = data + + def flush(self): + pass + + def close(self, timeout=None): + self.close_timeout = timeout + + +class KafkaOverrideTestCase(TestCase): + @teardown + def clear_env(self): + try: + del os.environ['BLUEOX_OVERRIDE_KAFKA_RECORDER'] + except KeyError: + pass + + def test_configure_no_override(self): + default_configure() + assert_equal(context._recorder_function, network.send) + + def test_configure_override(self): + os.environ['BLUEOX_OVERRIDE_KAFKA_RECORDER'] = '1' + default_configure() + assert_equal(context._recorder_function, kafka.send) + + +class KafkaSendTestCase(TestCase): + @setup + def build_context(self): + self.context = context.Context('test', 1) + + @setup + def init_kafka(self): + self.port = random.randint(30000, 40000) + kafka.init('127.0.0.1:{}'.format(self.port)) + + @setup + def configure_kafka(self): + context._recorder_function = kafka.send + self.kp = MockKafkaProducer() + kafka.KafkaProducer = self.kp + + @teardown + def unconfigure_kafka(self): + context._recorder_function = None + + def test(self): + with self.context: + self.context.set('foo', True) + self.context.set('bar.baz', 10.0) + + data = msgpack.unpackb(self.kp.last_data) + assert_equal(self.kp.last_topic, 'events') + assert_equal(data['id'], 1) + assert_equal(data['type'], 'test') + assert_equal(utils.get_deep(data['body'], "bar.baz"), 10.0) + + kafka.close() + assert_equal(self.kp.close_timeout, kafka.LINGER_SHUTDOWN_MSECS) + + +class SerializeContextTestCase(TestCase): + @setup + def build_context(self): + self.context = context.Context('test', 1) + + def test_types(self): + with self.context: + self.context.set('decimal_value', decimal.Decimal("6.66")) + self.context.set('date_value', datetime.date(2013, 12, 10)) + self.context.set('datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) + + context_data = kafka._serialize_context(self.context) + data = msgpack.unpackb(context_data) + assert_equal(data['body']['decimal_value'], "6.66") + assert_equal(data['body']['date_value'], "2013-12-10") + assert_equal( + datetime.datetime.fromtimestamp(float(data['body']['datetime_value'])), + datetime.datetime(2013, 12, 10, 12, 12, 12)) + + def test_exception(self): + with self.context: + self.context.set('value', Exception('hello')) + + context_data = kafka._serialize_context(self.context) + data = msgpack.unpackb(context_data) + + # The serialization should fail, but that just means we don't have any data. + assert_equal(data['body'], None) From 420b35d3315aadd2148b20af461eeb35fe23bf91 Mon Sep 17 00:00:00 2001 From: Brandon Bickford Date: Thu, 30 Aug 2018 14:04:31 -0700 Subject: [PATCH 03/16] Squashed commit of the following: commit 6d6f53a5ef701acc1f364303e174d59545fadf72 Author: Brandon Bickford Date: Thu Aug 30 09:58:59 2018 -0700 Bump version commit b45384a26f6ffeb6180d092d2daecc90d6923a24 Author: Brandon Bickford Date: Wed Aug 29 16:20:59 2018 -0700 Ignore unknown types --- CHANGES | 3 +++ blueox/__init__.py | 2 +- blueox/utils.py | 3 +-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 2de7816..b46839c 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +blueox (0.11.6.4) + * Fix encoding of unknown types + blueox (0.11.6.3) * Fix handling of unicode strings diff --git a/blueox/__init__.py b/blueox/__init__.py index 8cc8cfc..2f3fc36 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -9,7 +9,7 @@ """ __title__ = 'blueox' -__version__ = '0.11.6.3' +__version__ = '0.11.6.4' __author__ = 'Rhett Garber' __author_email__ = 'rhettg@gmail.com' __license__ = 'ISC' diff --git a/blueox/utils.py b/blueox/utils.py index 28d7a93..5a15b29 100644 --- a/blueox/utils.py +++ b/blueox/utils.py @@ -82,5 +82,4 @@ def msgpack_encode_default(obj): return time.mktime(obj.utctimetuple()) if isinstance(obj, datetime.date): return obj.strftime("%Y-%m-%d") - - raise TypeError("Unknown type: %r" % (obj,)) + return None From fc753b7278ecca0c26914ca016d9097e46d1da0b Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Fri, 31 Aug 2018 01:15:43 -0400 Subject: [PATCH 04/16] Update recorder override functionality --- blueox/__init__.py | 55 +++++++++++-------- blueox/recorders/kafka.py | 13 ++++- blueox/{network.py => recorders/zmq.py} | 13 ++++- tests/recorders/kafka_test.py | 20 ++++--- .../zmq_test.py} | 38 +++++++------ 5 files changed, 86 insertions(+), 53 deletions(-) rename blueox/{network.py => recorders/zmq.py} (94%) rename tests/{network_test.py => recorders/zmq_test.py} (72%) diff --git a/blueox/__init__.py b/blueox/__init__.py index 22bb854..2980591 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -21,7 +21,6 @@ import os from . import utils -from . import network from . import ports from .context import ( Context, set, append, add, context_wrap, current_context, find_context, @@ -30,44 +29,53 @@ from .errors import Error from .logger import LogHandler from .timer import timeit -from .recorders import kafka +from .recorders import kafka, zmq log = logging.getLogger(__name__) +RECORDER_ZMQ = 'zmq' +RECORDER_KAFKA = 'kafka' +RECORDERS = { + RECORDER_ZMQ: zmq, + RECORDER_KAFKA: kafka, +} +DEFAULT_RECORDER = RECORDER_ZMQ + def configure(host, port, recorder=None): """Initialize blueox - This instructs the blueox system where to send its logging data. If blueox is not configured, log data will - be silently dropped. + This instructs the blueox system where to send its logging data. + If blueox is not configured, log data will be silently dropped. - Currently we support logging through the network (and the configured host and port) to a blueoxd instances, or - to the specified recorder function + Currently we support logging through the network (and the configured host + and port) to a blueoxd instances, or to the specified recorder function. """ - override_kafka_recorder = os.getenv('BLUEOX_OVERRIDE_KAFKA_RECORDER', 0) - - if int(override_kafka_recorder) == 1: - log.info("Kafka override set, using kafka recorder") - host = ports.default_kafka_host() - kafka.init(host) - _context_mod._recorder_function = kafka.send - elif recorder: + if callable(recorder): _context_mod._recorder_function = recorder - elif host and port: - network.init(host, port) - _context_mod._recorder_function = network.send + else: - log.info("Empty blueox configuration") - _context_mod._recorder_function = None + _rec = RECORDERS.get(recorder, None) + if _rec is not None: + _rec.init(host, port) + _context_mod._recorder_function = _rec.send + else: + log.info("Empty blueox configuration") + _context_mod._recorder_function = None -def default_configure(host=None): + +def default_configure(host=None, recorder=DEFAULT_RECORDER): """Configure BlueOx based on defaults Accepts a connection string override in the form `localhost:3514`. Respects environment variable BLUEOX_HOST """ - host = ports.default_collect_host(host) + _rec = RECORDERS.get(recorder, None) + if _rec is None: + _rec = RECORDERS.get(DEFAULT_RECORDER) + + host = _rec.default_host(host) hostname, port = host.split(':') try: @@ -75,8 +83,9 @@ def default_configure(host=None): except ValueError: raise Error("Invalid value for port") - configure(hostname, int_port) + configure(hostname, int_port, recorder=recorder) def shutdown(): - network.close() + zmq.close() + kafka.close() diff --git a/blueox/recorders/kafka.py b/blueox/recorders/kafka.py index 386a7e0..cc5e89b 100644 --- a/blueox/recorders/kafka.py +++ b/blueox/recorders/kafka.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -blueox.kafka +blueox.recorders.kafka ~~~~~~~~ This module provides the interface into Kafka @@ -18,6 +18,7 @@ from kafka import KafkaProducer +from blueox import ports from blueox import utils log = logging.getLogger(__name__) @@ -27,16 +28,22 @@ LINGER_SHUTDOWN_MSECS = 2000 +def default_host(host=None): + """Build a default host string for the kafka producer + """ + return ports.default_kafka_host(host) + + threadLocal = threading.local() # Context can be shared between threads _kafka_hosts = None -def init(host): +def init(host, port): global _kafka_hosts - _kafka_hosts = host + _kafka_hosts = '{}:{}'.format(host, port) def _thread_connect(): diff --git a/blueox/network.py b/blueox/recorders/zmq.py similarity index 94% rename from blueox/network.py rename to blueox/recorders/zmq.py index ede1a1e..88b227a 100644 --- a/blueox/network.py +++ b/blueox/recorders/zmq.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -blueox.network +blueox.recorders.zmq ~~~~~~~~ This module provides our interface into ZeroMQ @@ -9,6 +9,8 @@ :license: ISC, see LICENSE for more details. """ +from __future__ import absolute_import + import atexit import logging import msgpack @@ -16,7 +18,8 @@ import threading import zmq -from . import utils +from blueox import ports +from blueox import utils log = logging.getLogger(__name__) @@ -44,6 +47,12 @@ def check_meta_version(meta): raise ValueError(value) +def default_host(host=None): + """Build a default host string for the kafka producer + """ + return ports.default_collect_host(host) + + threadLocal = threading.local() # Context can be shared between threads diff --git a/tests/recorders/kafka_test.py b/tests/recorders/kafka_test.py index be62636..2716112 100644 --- a/tests/recorders/kafka_test.py +++ b/tests/recorders/kafka_test.py @@ -10,11 +10,11 @@ teardown, assert_equal) -from blueox import default_configure +from blueox import default_configure, RECORDER_KAFKA from blueox import utils -from blueox import network from blueox import context from blueox.recorders import kafka +from blueox.recorders import zmq class MockKafkaProducer(object): @@ -47,11 +47,10 @@ def clear_env(self): def test_configure_no_override(self): default_configure() - assert_equal(context._recorder_function, network.send) + assert_equal(context._recorder_function, zmq.send) def test_configure_override(self): - os.environ['BLUEOX_OVERRIDE_KAFKA_RECORDER'] = '1' - default_configure() + default_configure(recorder=RECORDER_KAFKA) assert_equal(context._recorder_function, kafka.send) @@ -63,7 +62,7 @@ def build_context(self): @setup def init_kafka(self): self.port = random.randint(30000, 40000) - kafka.init('127.0.0.1:{}'.format(self.port)) + kafka.init('127.0.0.1', self.port) @setup def configure_kafka(self): @@ -99,14 +98,16 @@ def test_types(self): with self.context: self.context.set('decimal_value', decimal.Decimal("6.66")) self.context.set('date_value', datetime.date(2013, 12, 10)) - self.context.set('datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) + self.context.set( + 'datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) context_data = kafka._serialize_context(self.context) data = msgpack.unpackb(context_data) assert_equal(data['body']['decimal_value'], "6.66") assert_equal(data['body']['date_value'], "2013-12-10") assert_equal( - datetime.datetime.fromtimestamp(float(data['body']['datetime_value'])), + datetime.datetime.fromtimestamp( + float(data['body']['datetime_value'])), datetime.datetime(2013, 12, 10, 12, 12, 12)) def test_exception(self): @@ -116,5 +117,6 @@ def test_exception(self): context_data = kafka._serialize_context(self.context) data = msgpack.unpackb(context_data) - # The serialization should fail, but that just means we don't have any data. + # The serialization should fail, but that just + # means we don't have any data. assert_equal(data['body'], None) diff --git a/tests/network_test.py b/tests/recorders/zmq_test.py similarity index 72% rename from tests/network_test.py rename to tests/recorders/zmq_test.py index dbfa4c2..c6ae28a 100644 --- a/tests/network_test.py +++ b/tests/recorders/zmq_test.py @@ -3,18 +3,24 @@ import decimal import datetime -from testify import * +from testify import ( + TestCase, + setup, + teardown, + assert_equal) import zmq import msgpack from blueox import utils -from blueox import network from blueox import context +from blueox.recorders import zmq as zmq_rec + class NoNetworkSendTestCase(TestCase): def test(self): """Verify that if network isn't setup, send just does nothing""" - network.send(context.Context('test', 1)) + zmq_rec.send(context.Context('test', 1)) + class NetworkSendTestCase(TestCase): @setup @@ -24,11 +30,11 @@ def build_context(self): @setup def init_network(self): self.port = random.randint(30000, 40000) - network.init("127.0.0.1", self.port) + zmq_rec.init("127.0.0.1", self.port) @setup def configure_network(self): - context._recorder_function = network.send + context._recorder_function = zmq_rec.send @teardown def unconfigure_network(self): @@ -36,7 +42,7 @@ def unconfigure_network(self): @setup def build_server_socket(self): - self.server = network._zmq_context.socket(zmq.PULL) + self.server = zmq_rec._zmq_context.socket(zmq.PULL) self.server.bind("tcp://127.0.0.1:%d" % self.port) @teardown @@ -45,7 +51,7 @@ def destroy_server(self): @teardown def destory_network(self): - network.close() + zmq_rec.close() def test(self): with self.context: @@ -53,8 +59,9 @@ def test(self): self.context.set('bar.baz', 10.0) event_meta, raw_data = self.server.recv_multipart() - network.check_meta_version(event_meta) - _, event_time, event_host, event_type = struct.unpack(network.META_STRUCT_FMT, event_meta) + zmq_rec.check_meta_version(event_meta) + _, event_time, event_host, event_type = struct.unpack( + zmq_rec.META_STRUCT_FMT, event_meta) assert_equal(event_type, 'test') data = msgpack.unpackb(raw_data) @@ -72,26 +79,25 @@ def test_types(self): with self.context: self.context.set('decimal_value', decimal.Decimal("6.66")) self.context.set('date_value', datetime.date(2013, 12, 10)) - self.context.set('datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) + self.context.set( + 'datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) - meta_data, context_data = network._serialize_context(self.context) + meta_data, context_data = zmq_rec._serialize_context(self.context) data = msgpack.unpackb(context_data) assert_equal(data['body']['decimal_value'], "6.66") assert_equal(data['body']['date_value'], "2013-12-10") assert_equal( - datetime.datetime.fromtimestamp(float(data['body']['datetime_value'])), + datetime.datetime.fromtimestamp( + float(data['body']['datetime_value'])), datetime.datetime(2013, 12, 10, 12, 12, 12)) def test_exception(self): with self.context: self.context.set('value', Exception('hello')) - meta_data, context_data = network._serialize_context(self.context) + meta_data, context_data = zmq_rec._serialize_context(self.context) data = msgpack.unpackb(context_data) # The serialization should fail, but that just means we don't have any # data. assert_equal(data['body'], None) - - - From 18e43c73d19a1b8564281f0a8174d80cd5ad8942 Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Fri, 31 Aug 2018 01:16:29 -0400 Subject: [PATCH 05/16] Fix sundry syntax/formatting --- blueox/client.py | 16 ++++---- blueox/context.py | 19 +++++---- blueox/ports.py | 4 +- tests/ports_test.py | 4 +- tests/tornado_utils_test.py | 80 ++++++++++++++++++------------------- 5 files changed, 63 insertions(+), 60 deletions(-) diff --git a/blueox/client.py b/blueox/client.py index 8062c64..ec2f420 100644 --- a/blueox/client.py +++ b/blueox/client.py @@ -3,7 +3,8 @@ blueox.client ~~~~~~~~ -This module provides utilities for writing client applications which connect or use blueox data. +This module provides utilities for writing client applications +which connector use blueox data. :copyright: (c) 2012 by Rhett Garber :license: ISC, see LICENSE for more details. @@ -35,7 +36,8 @@ def default_host(host=None): def decode_stream(stream): - """A generator which reads data out of the buffered file stream, unpacks and decodes the blueox events + """A generator which reads data out of the buffered file stream, + unpacks and decodes the blueox events This is useful for parsing on disk log files generated by blueoxd """ @@ -97,8 +99,8 @@ def subscribe_stream(control_host, subscribe): sock.connect("tcp://%s" % (stream_host,)) # Now that we are connected, loop almost forever emiting events. - # If we fail to receive any events within the specified timeout, we'll quit - # and verify that we are connected to a valid stream. + # If we fail to receive any events within the specified timeout, + # we'll quit and verify that we are connected to a valid stream. poller = zmq.Poller() poller.register(sock, zmq.POLLIN) while True: @@ -113,7 +115,7 @@ def subscribe_stream(control_host, subscribe): if not prefix and subscription and channel != subscription: continue - yield msgpack.unpackb(data,encoding='utf8') + yield msgpack.unpackb(data, encoding='utf8') else: break @@ -137,10 +139,10 @@ def stdin_stream(): class Grouper(object): """Utility for grouping events and sub-events together. - + Events fed into a Grouper are joined by their common 'id'. Encountering the parent event type will trigger emitting a list of all events and sub events - for that single id. + for that single id. This assumes that the parent event will be the last encountered. diff --git a/blueox/context.py b/blueox/context.py index c23fcb7..fc117a9 100644 --- a/blueox/context.py +++ b/blueox/context.py @@ -19,7 +19,6 @@ import logging from . import utils -from . import network log = logging.getLogger(__name__) @@ -41,8 +40,10 @@ def __init__(self, type_name, id=None, sample=None): heirarchy of parent requests. Examples: '.foo' - Will generate a name like '.foo' - '.foo.bar' - If the parent ends in '.foo', the final name will be '.bar' - '^.foo' - Will use the top-most context, generating '.foo' + '.foo.bar' - If the parent ends in '.foo', the final name + will be '.bar' + '^.foo' - Will use the top-most context, generating + '.foo' 'top.foo.bar' - The name will be based on the longest matched parent context. If there is a parent context named 'top' and a parent context named 'top.foo', the new context will be named @@ -111,11 +112,13 @@ def __init__(self, type_name, id=None, sample=None): elif parent_ctx: self.id = parent_ctx.id else: - # Generate an id if one wasn't provided and we don't have any parents - # We're going to encode the time as the front 4 bytes so we have some order to the ids - # that could prove useful later on by making sorting a little easier. - self.id = (struct.pack(">L", int(time.time())) + os.urandom(12)).encode( - 'hex') + # Generate an id if one wasn't provided and we don't have any + # parents. We're going to encode the time as the front 4 bytes + # so we have some order to the ids that could prove useful + # later on by making sorting a little easier. + self.id = ( + struct.pack(">L", int(time.time())) + + os.urandom(12)).encode('hex') if parent_ctx and not parent_ctx.enabled: self.enabled = False diff --git a/blueox/ports.py b/blueox/ports.py index 4cdc65e..39470d9 100644 --- a/blueox/ports.py +++ b/blueox/ports.py @@ -28,7 +28,7 @@ def _default_host(host, default_host, default_port): if not host: host = default_host if ':' not in host: - host = "{}:{}".format(host, default_port) + host = '{}:{}'.format(host, default_port) return host @@ -45,7 +45,7 @@ def default_collect_host(host=None): # For consistency, we'll abstract kafka connections in the same way ENV_VAR_KAFKA_HOST = 'BLUEOX_KAFKA_HOST' -DEFAULT_KAFKA_PORT = 9092 +DEFAULT_KAFKA_PORT = 9002 def default_kafka_host(host=None): diff --git a/tests/ports_test.py b/tests/ports_test.py index 5b1e711..4ed75d1 100644 --- a/tests/ports_test.py +++ b/tests/ports_test.py @@ -86,12 +86,12 @@ def clear_env(self): def test_emtpy(self): host = ports.default_kafka_host() - assert_equal(host, '127.0.0.1:9092') + assert_equal(host, '127.0.0.1:9002') def test_env(self): os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s' host = ports.default_kafka_host() - assert_equal(host, 'local.svc.team-me.aws.jk8s:9092') + assert_equal(host, 'local.svc.team-me.aws.jk8s:9002') def test_env_port(self): os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s:9002' diff --git a/tests/tornado_utils_test.py b/tests/tornado_utils_test.py index e6aedd4..085da93 100644 --- a/tests/tornado_utils_test.py +++ b/tests/tornado_utils_test.py @@ -1,9 +1,8 @@ import time -import pprint import random import collections import traceback -from testify import * +from testify import assert_equal, setup import tornado.ioloop import tornado.gen @@ -14,7 +13,10 @@ # vendor module. Tornado testing in Testify import tornado_test -class AsyncHandler(blueox.tornado_utils.BlueOxRequestHandlerMixin, tornado.web.RequestHandler): + +class AsyncHandler( + blueox.tornado_utils.BlueOxRequestHandlerMixin, + tornado.web.RequestHandler): @blueox.tornado_utils.coroutine def get(self): loop = self.request.connection.stream.io_loop @@ -22,7 +24,8 @@ def get(self): req_id = self.blueox_ctx.id blueox.set('async', True) - result = yield blueox.tornado_utils.AsyncHTTPClient(loop).fetch(self.application.test_url) + result = yield blueox.tornado_utils.AsyncHTTPClient(loop).fetch( + self.application.test_url) assert result.code == 200 with blueox.Context('.extra'): @@ -32,31 +35,40 @@ def get(self): self.finish() -class AsyncErrorHandler(blueox.tornado_utils.BlueOxRequestHandlerMixin, tornado.web.RequestHandler): +class AsyncErrorHandler( + blueox.tornado_utils.BlueOxRequestHandlerMixin, + tornado.web.RequestHandler): @blueox.tornado_utils.coroutine def get(self): loop = self.request.connection.stream.io_loop - called = yield tornado.gen.Task(loop.add_timeout, time.time() + random.randint(1, 2)) + _ = yield tornado.gen.Task(loop.add_timeout, time.time() + + random.randint(1, 2)) raise Exception('hi') def write_error(self, status_code, **kwargs): if 'exc_info' in kwargs: - blueox.set('exception', ''.join(traceback.format_exception(*kwargs["exc_info"]))) + blueox.set('exception', ''.join( + traceback.format_exception(*kwargs["exc_info"]))) - return super(AsyncErrorHandler, self).write_error(status_code, **kwargs) + return super(AsyncErrorHandler, self).write_error(status_code, + **kwargs) -class AsyncTimeoutHandler(blueox.tornado_utils.BlueOxRequestHandlerMixin, tornado.web.RequestHandler): +class AsyncTimeoutHandler( + blueox.tornado_utils.BlueOxRequestHandlerMixin, + tornado.web.RequestHandler): @blueox.tornado_utils.coroutine def get(self): loop = self.request.connection.stream.io_loop - called = yield tornado.gen.Task(loop.add_timeout, time.time() + 1.0) + _ = yield tornado.gen.Task(loop.add_timeout, time.time() + 1.0) -class AsyncRecurseTimeoutHandler(blueox.tornado_utils.BlueOxRequestHandlerMixin, tornado.web.RequestHandler): +class AsyncRecurseTimeoutHandler( + blueox.tornado_utils.BlueOxRequestHandlerMixin, + tornado.web.RequestHandler): @blueox.tornado_utils.coroutine def post(self): loop = self.request.connection.stream.io_loop @@ -64,8 +76,8 @@ def post(self): blueox.set("start", True) try: - f = yield http_client.fetch(self.request.body, request_timeout=0.5) - except tornado.httpclient.HTTPError, e: + _ = yield http_client.fetch(self.request.body, request_timeout=0.5) + except tornado.httpclient.HTTPError: self.write("got it") else: self.write("nope") @@ -73,13 +85,14 @@ def post(self): blueox.set("end", True) -class MainHandler(blueox.tornado_utils.BlueOxRequestHandlerMixin, tornado.web.RequestHandler): +class MainHandler( + blueox.tornado_utils.BlueOxRequestHandlerMixin, + tornado.web.RequestHandler): def get(self): blueox.set('async', False) self.write("Hello, world") - class SimpleTestCase(tornado_test.AsyncHTTPTestCase): @setup def setup_bluox(self): @@ -112,11 +125,6 @@ def test_error(self): f = self.http_client.fetch(self.get_url("/error"), self.stop) resp = self.wait() - #for ctx_id in self.log_ctx: - #print ctx_id - #for ctx in self.log_ctx[ctx_id]: - #pprint.pprint(ctx.to_dict()) - assert_equal(len(self.log_ctx), 2) found_exception = False @@ -128,31 +136,22 @@ def test_error(self): assert found_exception def test_timeout_error(self): - f = self.http_client.fetch(self.get_url("/timeout"), self.stop, request_timeout=0.5) + f = self.http_client.fetch( + self.get_url("/timeout"), self.stop, request_timeout=0.5) resp = self.wait() - #for ctx_id in self.log_ctx: - #print ctx_id - #for ctx in self.log_ctx[ctx_id]: - #pprint.pprint(ctx.to_dict()) - assert_equal(len(self.log_ctx), 1) ctx = self.log_ctx[self.log_ctx.keys()[0]][0] assert_equal(get_deep(ctx.to_dict(), 'body.response.code'), 599) def test_recurse_timeout_error(self): url = self.get_url("/timeout") - f = self.http_client.fetch(self.get_url("/recurse_timeout"), self.stop, + _ = self.http_client.fetch(self.get_url("/recurse_timeout"), self.stop, body=url, method="POST", request_timeout=1.5) resp = self.wait() - #for ctx_id in self.log_ctx: - #print ctx_id - #for ctx in self.log_ctx[ctx_id]: - #pprint.pprint(ctx.to_dict()) - assert_equal(resp.code, 200) assert_equal(resp.body, "got it") @@ -161,7 +160,9 @@ def test_recurse_timeout_error(self): for ctx_list in self.log_ctx.values(): for ctx in ctx_list: c = ctx.to_dict() - if c['type'] == 'request.httpclient' and c['body']['response']['code'] == 599: + if ( + c['type'] == 'request.httpclient' and + c['body']['response']['code'] == 599): found_timeout = True if c['type'] == 'request' and get_deep(c, 'body.start'): @@ -175,13 +176,8 @@ def test_context(self): self.http_client.fetch(self.get_url("/async"), self.stop) resp = self.wait() - #for ctx_id in self.log_ctx: - #print - #print ctx_id - #for ctx in self.log_ctx[ctx_id]: - #pprint.pprint(ctx.to_dict()) - - # If everything worked properly, we should have two separate ids, one will have two contexts associated with it. + # If everything worked properly, we should have two separate ids, + # one will have two contexts associated with it. # Hopefully it's the right one. found_sync = None found_async = None @@ -191,7 +187,9 @@ def test_context(self): if ctx.name == "request" and ctx.to_dict()['body']['async']: assert_equal(len(ctx_list), 3) found_async = ctx - if ctx.name == "request" and not ctx.to_dict()['body']['async']: + if ( + ctx.name == "request" and + not ctx.to_dict()['body']['async']): assert_equal(len(ctx_list), 1) found_sync = ctx if ctx.name.endswith("httpclient"): From 080a128ae14315aee78bfc89825272cbf90e3f0f Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Fri, 31 Aug 2018 08:51:56 -0400 Subject: [PATCH 06/16] Clean up more syntax, flip constant names --- blueox/__init__.py | 11 +++++------ blueox/logger.py | 7 +++++-- blueox/store.py | 13 +++++++------ blueox/timer.py | 4 +++- blueox/tornado_utils.py | 34 ++++++++++++++++++---------------- tests/recorders/kafka_test.py | 4 ++-- 6 files changed, 40 insertions(+), 33 deletions(-) diff --git a/blueox/__init__.py b/blueox/__init__.py index 2980591..7bd2198 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -18,7 +18,6 @@ __url__ = 'https://github.com/rhettg/BlueOx' import logging -import os from . import utils from . import ports @@ -33,13 +32,13 @@ log = logging.getLogger(__name__) -RECORDER_ZMQ = 'zmq' -RECORDER_KAFKA = 'kafka' +ZMQ_RECORDER = 'zmq' +KAFKA_RECORDER = 'kafka' RECORDERS = { - RECORDER_ZMQ: zmq, - RECORDER_KAFKA: kafka, + ZMQ_RECORDER: zmq, + KAFKA_RECORDER: kafka, } -DEFAULT_RECORDER = RECORDER_ZMQ +DEFAULT_RECORDER = ZMQ_RECORDER def configure(host, port, recorder=None): diff --git a/blueox/logger.py b/blueox/logger.py index 37d2180..6a7c025 100644 --- a/blueox/logger.py +++ b/blueox/logger.py @@ -3,7 +3,9 @@ blueox.logger ~~~~~~~~ -This module provides integration with blueox and standard python logging module. +This module provides integration with blueox and standard +python logging module. + :copyright: (c) 2012 by Rhett Garber :license: ISC, see LICENSE for more details. @@ -20,7 +22,8 @@ class LogHandler(logging.Handler): Records standard fields such as logger name, level the message and if an exception was provided, the string formatted exception. - The type name, if not specified will be something like '.log' + The type name, if not specified will be something like + '.log' """ def __init__(self, type_name=None): diff --git a/blueox/store.py b/blueox/store.py index 66f4f19..afeaa2f 100644 --- a/blueox/store.py +++ b/blueox/store.py @@ -226,7 +226,8 @@ def list_log_files(log_path): def filter_log_files_for_active(log_files): - """Filter our list of log files to remove those we expect might be active.""" + """Filter our list of log files to remove those we expect might be active. + """ out_log_files = [] files_by_type = collections.defaultdict(list) @@ -242,11 +243,11 @@ def filter_log_files_for_active(log_files): out_log_files += type_files - # If that last log file is old, then it's probably not being used either. - # We add a buffer of an hour just to make sure everything has rotated - # away safely when this is run close to midnight. - cutoff_date = (datetime.datetime.utcnow() - datetime.timedelta(hours=1) - ).date() + # If that last log file is old, then it's probably not being used + # either. We add a buffer of an hour just to make sure everything has + # rotated away safely when this is run close to midnight. + cutoff_date = ( + datetime.datetime.utcnow() - datetime.timedelta(hours=1)).date() if last_lf.date < cutoff_date: out_log_files.append(last_lf) diff --git a/blueox/timer.py b/blueox/timer.py index dc0cf18..77a22ff 100644 --- a/blueox/timer.py +++ b/blueox/timer.py @@ -3,7 +3,9 @@ blueox.timer ~~~~~~~~ -This module has a timer context manager for easily tracking wall-clock time for some execution +This module has a timer context manager for easily tracking wall-clock +time for some execution + :copyright: (c) 2012 by Rhett Garber :license: ISC, see LICENSE for more details. diff --git a/blueox/tornado_utils.py b/blueox/tornado_utils.py index ffbfa9b..af3e2c4 100644 --- a/blueox/tornado_utils.py +++ b/blueox/tornado_utils.py @@ -5,8 +5,8 @@ This module provides hooks for using blueox with the Tornado async web server. Making blueox useful inside tornado is a challenge since you'll likely want a -blueox context per request, but multiple requests can be going on at once inside -tornado. +blueox context per request, but multiple requests can be going on at once +inside tornado. :copyright: (c) 2012 by Rhett Garber :license: ISC, see LICENSE for more details. @@ -19,8 +19,6 @@ import sys import time -log = logging.getLogger(__name__) - import tornado.web import tornado.gen import tornado.httpclient @@ -29,6 +27,8 @@ import blueox +log = logging.getLogger(__name__) + def _gen_wrapper(ctx, generator): """Generator Wrapper that starts/stops our context @@ -112,7 +112,8 @@ def on_finish(self): class SampleRequestHandler(BlueOxRequestHandlerMixin, tornado.web.RequestHandler): - """Sample base request handler that provides basic information about the request. + """Sample base request handler that provides basic + information about the request. """ def prepare(self): @@ -123,8 +124,8 @@ def prepare(self): def write_error(self, status_code, **kwargs): if 'exc_info' in kwargs: - blueox.set('exception', - ''.join(traceback.format_exception(*kwargs["exc_info"]))) + blueox.set('exception', ''.join( + traceback.format_exception(*kwargs["exc_info"]))) return super(SampleRequestHandler, self).write_error(status_code, **kwargs) @@ -159,15 +160,16 @@ def fetch(self, request, callback=None, **kwargs): ctx.stop() # I'd love to use the future to handle the completion step, BUT, we - # need this to happen first. If the caller has provided a callback, we don't want them - # to get called before we do. Rather than poke into the internal datastructures, we'll just - # handle the callback explicitly + # need this to happen first. If the caller has provided a callback, we + # don't want them to get called before we do. Rather than poke into the + # internal datastructures, we'll just handle the callback explicitly def complete_context(response): ctx.start() ctx.set('response.code', response.code) - ctx.set('response.size', len(response.body) if response.body else 0) + ctx.set('response.size', + len(response.body) if response.body else 0) ctx.done() @@ -175,12 +177,12 @@ def complete_context(response): def fetch_complete(future): # This error handling is just copied from tornado.httpclient as - # we need to record a real HTTPError. httpclient might do the same thing - # again if needs to deal with the caller's callbacks. + # we need to record a real HTTPError. httpclient might do the + # same thing again if needs to deal with the caller's callbacks exc = future.exception() - if isinstance( - exc, - tornado.httpclient.HTTPError) and exc.response is not None: + if ( + isinstance(exc, tornado.httpclient.HTTPError) and + exc.response is not None): response = exc.response elif exc is not None: response = tornado.httpclient.HTTPResponse( diff --git a/tests/recorders/kafka_test.py b/tests/recorders/kafka_test.py index 2716112..7ab3ee8 100644 --- a/tests/recorders/kafka_test.py +++ b/tests/recorders/kafka_test.py @@ -10,7 +10,7 @@ teardown, assert_equal) -from blueox import default_configure, RECORDER_KAFKA +from blueox import default_configure, KAFKA_RECORDER from blueox import utils from blueox import context from blueox.recorders import kafka @@ -50,7 +50,7 @@ def test_configure_no_override(self): assert_equal(context._recorder_function, zmq.send) def test_configure_override(self): - default_configure(recorder=RECORDER_KAFKA) + default_configure(recorder=KAFKA_RECORDER) assert_equal(context._recorder_function, kafka.send) From 7317a36d8e8a57999400a40ae804178fdf9cf1b7 Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Fri, 31 Aug 2018 09:01:04 -0400 Subject: [PATCH 07/16] Add kafka recorder defaults to contrib --- blueox/contrib/__init__.py | 1 - blueox/contrib/celery/__init__.py | 1 - blueox/contrib/celery/celery_signals.py | 19 ++++++++++++------- blueox/contrib/django/__init__.py | 1 - blueox/contrib/django/middleware.py | 14 +++++++++++--- blueox/contrib/flask/__init__.py | 11 ++++++++--- 6 files changed, 31 insertions(+), 16 deletions(-) diff --git a/blueox/contrib/__init__.py b/blueox/contrib/__init__.py index 8b13789..e69de29 100644 --- a/blueox/contrib/__init__.py +++ b/blueox/contrib/__init__.py @@ -1 +0,0 @@ - diff --git a/blueox/contrib/celery/__init__.py b/blueox/contrib/celery/__init__.py index 8b13789..e69de29 100644 --- a/blueox/contrib/celery/__init__.py +++ b/blueox/contrib/celery/__init__.py @@ -1 +0,0 @@ - diff --git a/blueox/contrib/celery/celery_signals.py b/blueox/contrib/celery/celery_signals.py index 3c20092..cc063db 100644 --- a/blueox/contrib/celery/celery_signals.py +++ b/blueox/contrib/celery/celery_signals.py @@ -1,8 +1,7 @@ """Hooks for gathering celery task data into blueox. -Importing this module will register signal handlers into Celery worker's runtime. - -We also will track creation of tasks on the client side. +Importing this module will register signal handlers into Celery +worker's runtime. We also will track creation of tasks on the client side. """ import traceback @@ -33,9 +32,9 @@ def on_task_sent(sender=None, body=None, **kwargs): @signals.task_sent.connect def on_task_sent(**kwargs): with blueox.Context('.celery.task_sent'): - # Arguments for this signal are different than the worker signals. Sometimes - # they are even different than what the documentation says. See also - # https://github.com/celery/celery/issues/1606 + # Arguments for this signal are different than the worker signals. + # Sometimes they are even different than what the documentation + # says. See also https://github.com/celery/celery/issues/1606 blueox.set('task_id', kwargs.get('task_id', kwargs.get('id'))) blueox.set('task', str(kwargs['task'])) blueox.set('eta', kwargs['eta']) @@ -43,7 +42,13 @@ def on_task_sent(**kwargs): @signals.worker_process_init.connect def on_worker_process_init(**kwargs): - if hasattr(settings, 'BLUEOX_HOST'): + if hasattr(settings, 'BLUEOX_KAFKA_HOST'): + if settings.BLUEOX_KAFKA_HOST: + rec = blueox.KAFKA_RECORDER + blueox.default_configure(settings.BLUEOX_KAFKA_HOST, recorder=rec) + else: + blueox.configure(None, None) + elif hasattr(settings, 'BLUEOX_HOST'): if settings.BLUEOX_HOST: blueox.default_configure(settings.BLUEOX_HOST) else: diff --git a/blueox/contrib/django/__init__.py b/blueox/contrib/django/__init__.py index 8b13789..e69de29 100644 --- a/blueox/contrib/django/__init__.py +++ b/blueox/contrib/django/__init__.py @@ -1 +0,0 @@ - diff --git a/blueox/contrib/django/middleware.py b/blueox/contrib/django/middleware.py index b16f486..86fe8c7 100644 --- a/blueox/contrib/django/middleware.py +++ b/blueox/contrib/django/middleware.py @@ -1,6 +1,5 @@ import sys import traceback -import logging import blueox @@ -10,7 +9,14 @@ class Middleware(object): def __init__(self): - if hasattr(settings, 'BLUEOX_HOST'): + if hasattr(settings, 'BLUEOX_KAFKA_HOST'): + if settings.BLUEOX_KAFKA_HOST: + rec = blueox.KAFKA_RECORDER + blueox.default_configure( + settings.BLUEOX_KAFKA_HOST, recorder=rec) + else: + blueox.configure(None, None) + elif hasattr(settings, 'BLUEOX_HOST'): if settings.BLUEOX_HOST: blueox.default_configure(settings.BLUEOX_HOST) else: @@ -28,7 +34,9 @@ def process_request(self, request): headers = {} for k, v in request.META.iteritems(): - if k.startswith('HTTP_') or k in ('CONTENT_LENGTH', 'CONTENT_TYPE'): + if ( + k.startswith('HTTP_') or + k in ('CONTENT_LENGTH', 'CONTENT_TYPE')): headers[k] = v blueox.set('headers', headers) diff --git a/blueox/contrib/flask/__init__.py b/blueox/contrib/flask/__init__.py index 56fb178..620912c 100644 --- a/blueox/contrib/flask/__init__.py +++ b/blueox/contrib/flask/__init__.py @@ -23,7 +23,12 @@ class BlueOxMiddleware(object): def __init__(self, app): self.app = app - if 'BLUEOX_HOST' in app.config: + if 'BLUEOX_KAFKA_HOST' in app.config: + self.blueox_kafka_host = app.config['BLUEOX_KAFKA_HOST'] + if self.blueox_kafka_host: + rec = blueox.KAFKA_RECORDER + blueox.default_configure(self.blueox_kafka_host, recorder=rec) + elif 'BLUEOX_HOST' in app.config: self.blueox_host = app.config['BLUEOX_HOST'] if self.blueox_host: blueox.default_configure(self.blueox_host) @@ -45,8 +50,8 @@ def before_request(self, *args, **kwargs): headers = {} for k, v in request.environ.iteritems(): if ( - k.startswith('HTTP_') or k in - ('CONTENT_LENGTH', 'CONTENT_TYPE')): + k.startswith('HTTP_') or + k in ('CONTENT_LENGTH', 'CONTENT_TYPE')): headers[k] = v blueox.set('headers', headers) From 870c56dde279bb3359949bc99cfbc6761b80a2bf Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Sat, 1 Sep 2018 02:09:38 -0400 Subject: [PATCH 08/16] jk it's a pycernan shim now --- blueox/__init__.py | 8 +- blueox/contrib/celery/celery_signals.py | 9 +- blueox/contrib/django/middleware.py | 8 +- blueox/contrib/flask/__init__.py | 11 +- blueox/ports.py | 12 +- blueox/recorders/kafka.py | 109 ---------------- blueox/recorders/pycernan.py | 139 ++++++++++++++++++++ blueox/recorders/zmq.py | 2 +- requirements.txt | 2 +- tests/ports_test.py | 25 ++-- tests/recorders/kafka_test.py | 122 ------------------ tests/recorders/pycernan_test.py | 164 ++++++++++++++++++++++++ vendor/pycernan-0.0.10.zip | Bin 0 -> 7982 bytes 13 files changed, 345 insertions(+), 266 deletions(-) delete mode 100644 blueox/recorders/kafka.py create mode 100644 blueox/recorders/pycernan.py delete mode 100644 tests/recorders/kafka_test.py create mode 100644 tests/recorders/pycernan_test.py create mode 100644 vendor/pycernan-0.0.10.zip diff --git a/blueox/__init__.py b/blueox/__init__.py index 7bd2198..17e2730 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -28,15 +28,15 @@ from .errors import Error from .logger import LogHandler from .timer import timeit -from .recorders import kafka, zmq +from .recorders import pycernan, zmq log = logging.getLogger(__name__) ZMQ_RECORDER = 'zmq' -KAFKA_RECORDER = 'kafka' +PYCERNAN_RECORDER = 'pycernan' RECORDERS = { ZMQ_RECORDER: zmq, - KAFKA_RECORDER: kafka, + PYCERNAN_RECORDER: pycernan, } DEFAULT_RECORDER = ZMQ_RECORDER @@ -87,4 +87,4 @@ def default_configure(host=None, recorder=DEFAULT_RECORDER): def shutdown(): zmq.close() - kafka.close() + pycernan.close() diff --git a/blueox/contrib/celery/celery_signals.py b/blueox/contrib/celery/celery_signals.py index cc063db..c37dba6 100644 --- a/blueox/contrib/celery/celery_signals.py +++ b/blueox/contrib/celery/celery_signals.py @@ -42,10 +42,11 @@ def on_task_sent(**kwargs): @signals.worker_process_init.connect def on_worker_process_init(**kwargs): - if hasattr(settings, 'BLUEOX_KAFKA_HOST'): - if settings.BLUEOX_KAFKA_HOST: - rec = blueox.KAFKA_RECORDER - blueox.default_configure(settings.BLUEOX_KAFKA_HOST, recorder=rec) + if hasattr(settings, 'BLUEOX_PYCERNAN_HOST'): + if settings.BLUEOX_PYCERNAN_HOST: + rec = blueox.PYCERNAN_RECORDER + blueox.default_configure( + settings.BLUEOX_PYCERNAN_HOST, recorder=rec) else: blueox.configure(None, None) elif hasattr(settings, 'BLUEOX_HOST'): diff --git a/blueox/contrib/django/middleware.py b/blueox/contrib/django/middleware.py index 86fe8c7..1471ac2 100644 --- a/blueox/contrib/django/middleware.py +++ b/blueox/contrib/django/middleware.py @@ -9,11 +9,11 @@ class Middleware(object): def __init__(self): - if hasattr(settings, 'BLUEOX_KAFKA_HOST'): - if settings.BLUEOX_KAFKA_HOST: - rec = blueox.KAFKA_RECORDER + if hasattr(settings, 'BLUEOX_PYCERNAN_HOST'): + if settings.BLUEOX_PYCERNAN_HOST: + rec = blueox.PYCERNAN_RECORDER blueox.default_configure( - settings.BLUEOX_KAFKA_HOST, recorder=rec) + settings.BLUEOX_PYCERNAN_HOST, recorder=rec) else: blueox.configure(None, None) elif hasattr(settings, 'BLUEOX_HOST'): diff --git a/blueox/contrib/flask/__init__.py b/blueox/contrib/flask/__init__.py index 620912c..57de85d 100644 --- a/blueox/contrib/flask/__init__.py +++ b/blueox/contrib/flask/__init__.py @@ -23,11 +23,12 @@ class BlueOxMiddleware(object): def __init__(self, app): self.app = app - if 'BLUEOX_KAFKA_HOST' in app.config: - self.blueox_kafka_host = app.config['BLUEOX_KAFKA_HOST'] - if self.blueox_kafka_host: - rec = blueox.KAFKA_RECORDER - blueox.default_configure(self.blueox_kafka_host, recorder=rec) + if 'BLUEOX_PYCERNAN_HOST' in app.config: + self.blueox_pycernan_host = app.config['BLUEOX_PYCERNAN_HOST'] + if self.blueox_pycernan_host: + rec = blueox.PYCERNAN_RECORDER + blueox.default_configure( + self.blueox_pycernan_host, recorder=rec) elif 'BLUEOX_HOST' in app.config: self.blueox_host = app.config['BLUEOX_HOST'] if self.blueox_host: diff --git a/blueox/ports.py b/blueox/ports.py index 39470d9..d977847 100644 --- a/blueox/ports.py +++ b/blueox/ports.py @@ -43,11 +43,11 @@ def default_collect_host(host=None): return _default_host(host, default_host, DEFAULT_COLLECT_PORT) -# For consistency, we'll abstract kafka connections in the same way -ENV_VAR_KAFKA_HOST = 'BLUEOX_KAFKA_HOST' -DEFAULT_KAFKA_PORT = 9002 +# For consistency, we'll abstract pycernan connections in the same way +ENV_VAR_PYCERNAN_HOST = 'BLUEOX_PYCERNAN_HOST' +DEFAULT_PYCERNAN_PORT = 2003 -def default_kafka_host(host=None): - default_host = os.environ.get(ENV_VAR_KAFKA_HOST, DEFAULT_HOST) - return _default_host(host, default_host, DEFAULT_KAFKA_PORT) +def default_pycernan_host(host=None): + default_host = os.environ.get(ENV_VAR_PYCERNAN_HOST, DEFAULT_HOST) + return _default_host(host, default_host, DEFAULT_PYCERNAN_PORT) diff --git a/blueox/recorders/kafka.py b/blueox/recorders/kafka.py deleted file mode 100644 index cc5e89b..0000000 --- a/blueox/recorders/kafka.py +++ /dev/null @@ -1,109 +0,0 @@ -# -*- coding: utf-8 -*- -""" -blueox.recorders.kafka -~~~~~~~~ - -This module provides the interface into Kafka - -:copyright: (c) 2018 by Aaron Biller?? -:license: ISC, see LICENSE for more details. - -""" -from __future__ import absolute_import - -import atexit -import logging -import msgpack -import threading - -from kafka import KafkaProducer - -from blueox import ports -from blueox import utils - -log = logging.getLogger(__name__) - -# If we have pending outgoing messages, this is how long we'll wait after -# being told to exit. -LINGER_SHUTDOWN_MSECS = 2000 - - -def default_host(host=None): - """Build a default host string for the kafka producer - """ - return ports.default_kafka_host(host) - - -threadLocal = threading.local() - -# Context can be shared between threads -_kafka_hosts = None - - -def init(host, port): - global _kafka_hosts - - _kafka_hosts = '{}:{}'.format(host, port) - - -def _thread_connect(): - if _kafka_hosts and not getattr(threadLocal, 'kp', None): - threadLocal.kp = KafkaProducer(bootstrap_servers=_kafka_hosts) - - -def _serialize_context(context): - context_dict = context.to_dict() - for key in ('host', 'type'): - if len(context_dict.get(key, "")) > 64: - raise ValueError("Value too long: %r" % key) - - context_dict = { - k: v.encode('utf-8') if isinstance(v, unicode) - else v for k, v in context_dict.items() - } - - try: - context_data = msgpack.packb(context_dict) - except TypeError: - try: - # If we fail to serialize our context, we can try again with an - # enhanced packer (it's slower though) - context_data = msgpack.packb(context_dict, - default=utils.msgpack_encode_default) - except TypeError: - log.exception("Serialization failure (not fatal, dropping data)") - - # One last try after dropping the body - context_dict['body'] = None - context_data = msgpack.packb(context_dict) - - return context_data - - -def send(context): - _thread_connect() - - try: - context_data = _serialize_context(context) - except Exception: - log.exception("Failed to serialize context") - return - - if _kafka_hosts and threadLocal.kp is not None: - try: - log.debug("Sending msg") - threadLocal.kp.send('events', context_data) - except Exception: - log.exception("Failed during publish to kafka.") - else: - log.info("Skipping sending event %s", context.name) - - -def close(): - if getattr(threadLocal, 'kp', None): - threadLocal.kp.flush() - threadLocal.kp.close(timeout=LINGER_SHUTDOWN_MSECS) - threadLocal.kp = None - - -atexit.register(close) diff --git a/blueox/recorders/pycernan.py b/blueox/recorders/pycernan.py new file mode 100644 index 0000000..1edfd5b --- /dev/null +++ b/blueox/recorders/pycernan.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +""" +blueox.recorders.pycernan +~~~~~~~~ + +This module provides the interface into pycernan + +:copyright: (c) 2018 by Aaron Biller?? +:license: ISC, see LICENSE for more details. + +""" +from __future__ import absolute_import + +import atexit +import datetime +import decimal +import json +import logging +import os +import threading + +from pycernan.avro import Client + +from blueox import ports + +log = logging.getLogger(__name__) + +_uname = os.uname()[1] + +# Global blueox avro schema definition +BLUEOX_AVRO_RECORD = { + "doc": "A BlueOx event", + "name": "blueox_event", + "namespace": "blueox.{}".format(_uname), + "type": "record", + "fields": [ + {"name": "id", "type": "string"}, + {"name": "type", "type": "string"}, + {"name": "host", "type": "string"}, + {"name": "pid", "type": "long"}, + {"name": "start", "type": "double"}, + {"name": "end", "type": "double"}, + {"name": "body", "type": ["null", "string"], "default": "null"} + ] +} + + +def default_host(host=None): + """Build a default host string for pycernan + """ + return ports.default_pycernan_host(host) + + +def _serializer(obj): + """Serialize native python objects + """ + if isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + elif isinstance(obj, decimal.Decimal): + return float(obj) + try: + obj = str(obj) + except Exception: + raise TypeError(repr(obj) + ' is not JSON serializable') + return obj + + +threadLocal = threading.local() + +# Context can be shared between threads +_client = None + + +def init(host, port): + global _client + + _client = Client(host=host, port=port) + + +def _thread_connect(): + if _client and not getattr(threadLocal, 'client', None): + threadLocal.client = _client + + +def _serialize_context(context): + context_dict = context.to_dict() + for key in ('host', 'type'): + if len(context_dict.get(key, '')) > 64: + raise ValueError('Value too long: %r' % key) + + context_dict['id'] = str(context_dict['id']) + + body = context_dict.get('body', None) + if body is not None: + try: + context_dict['body'] = json.dumps(body, default=_serializer) + except (TypeError, ValueError): + try: + context_dict['body'] = unicode(body) + except Exception: + log.exception( + 'Serialization failure (not fatal, dropping data)') + context_dict['body'] = None + + context_dict = { + k: v.encode('utf-8') if isinstance(v, unicode) + else v for k, v in context_dict.items() + } + + return context_dict + + +def send(context): + _thread_connect() + + try: + context_data = [_serialize_context(context)] + except Exception: + log.exception('Failed to serialize context') + return + + if _client and threadLocal.client is not None: + try: + log.debug('Sending msg') + threadLocal.client.publish( + BLUEOX_AVRO_RECORD, context_data, sync=False) + except Exception: + log.exception('Failed during publish to pycernan.') + else: + log.info('Skipping sending event %s', context.name) + + +def close(): + if getattr(threadLocal, 'client', None): + threadLocal.client.close() + threadLocal.client = None + + +atexit.register(close) diff --git a/blueox/recorders/zmq.py b/blueox/recorders/zmq.py index 88b227a..fa02207 100644 --- a/blueox/recorders/zmq.py +++ b/blueox/recorders/zmq.py @@ -48,7 +48,7 @@ def check_meta_version(meta): def default_host(host=None): - """Build a default host string for the kafka producer + """Build a default host string for the blueox collector """ return ports.default_collect_host(host) diff --git a/requirements.txt b/requirements.txt index 61a4d6d..4e7e345 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,4 @@ pyflakes tornado==3.2 boto yapf -kafka-python +./vendor/pycernan-0.0.10.zip diff --git a/tests/ports_test.py b/tests/ports_test.py index 4ed75d1..0c5aeb5 100644 --- a/tests/ports_test.py +++ b/tests/ports_test.py @@ -76,24 +76,29 @@ def test_env_port(self): assert_equal(host, "master:123") -class DefaultKafkaHost(TestCase): +class DefaultPycernanHost(TestCase): @teardown def clear_env(self): try: - del os.environ['BLUEOX_KAFKA_HOST'] + del os.environ['BLUEOX_PYCERNAN_HOST'] except KeyError: pass def test_emtpy(self): - host = ports.default_kafka_host() - assert_equal(host, '127.0.0.1:9002') + host = ports.default_pycernan_host() + assert_equal(host, '127.0.0.1:2003') def test_env(self): - os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s' - host = ports.default_kafka_host() - assert_equal(host, 'local.svc.team-me.aws.jk8s:9002') + os.environ['BLUEOX_PYCERNAN_HOST'] = 'local.svc.team-me.aws.jk8s' + host = ports.default_pycernan_host() + assert_equal(host, 'local.svc.team-me.aws.jk8s:2003') def test_env_port(self): - os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s:9002' - host = ports.default_kafka_host() - assert_equal(host, 'local.svc.team-me.aws.jk8s:9002') + os.environ['BLUEOX_PYCERNAN_HOST'] = 'local.svc.team-me.aws.jk8s:2003' + host = ports.default_pycernan_host() + assert_equal(host, 'local.svc.team-me.aws.jk8s:2003') + + def test_passed(self): + _host = 'my.wish.is.your.command' + host = ports.default_pycernan_host(_host) + assert_equal(host, 'my.wish.is.your.command:2003') diff --git a/tests/recorders/kafka_test.py b/tests/recorders/kafka_test.py deleted file mode 100644 index 7ab3ee8..0000000 --- a/tests/recorders/kafka_test.py +++ /dev/null @@ -1,122 +0,0 @@ -import os -import random -import decimal -import datetime - -import msgpack -from testify import ( - TestCase, - setup, - teardown, - assert_equal) - -from blueox import default_configure, KAFKA_RECORDER -from blueox import utils -from blueox import context -from blueox.recorders import kafka -from blueox.recorders import zmq - - -class MockKafkaProducer(object): - last_topic = None - last_data = None - close_timeout = None - - def __call__(self, bootstrap_servers=None): - self.bootstrap_servers = bootstrap_servers - return self - - def send(self, topic, data): - self.last_topic = topic - self.last_data = data - - def flush(self): - pass - - def close(self, timeout=None): - self.close_timeout = timeout - - -class KafkaOverrideTestCase(TestCase): - @teardown - def clear_env(self): - try: - del os.environ['BLUEOX_OVERRIDE_KAFKA_RECORDER'] - except KeyError: - pass - - def test_configure_no_override(self): - default_configure() - assert_equal(context._recorder_function, zmq.send) - - def test_configure_override(self): - default_configure(recorder=KAFKA_RECORDER) - assert_equal(context._recorder_function, kafka.send) - - -class KafkaSendTestCase(TestCase): - @setup - def build_context(self): - self.context = context.Context('test', 1) - - @setup - def init_kafka(self): - self.port = random.randint(30000, 40000) - kafka.init('127.0.0.1', self.port) - - @setup - def configure_kafka(self): - context._recorder_function = kafka.send - self.kp = MockKafkaProducer() - kafka.KafkaProducer = self.kp - - @teardown - def unconfigure_kafka(self): - context._recorder_function = None - - def test(self): - with self.context: - self.context.set('foo', True) - self.context.set('bar.baz', 10.0) - - data = msgpack.unpackb(self.kp.last_data) - assert_equal(self.kp.last_topic, 'events') - assert_equal(data['id'], 1) - assert_equal(data['type'], 'test') - assert_equal(utils.get_deep(data['body'], "bar.baz"), 10.0) - - kafka.close() - assert_equal(self.kp.close_timeout, kafka.LINGER_SHUTDOWN_MSECS) - - -class SerializeContextTestCase(TestCase): - @setup - def build_context(self): - self.context = context.Context('test', 1) - - def test_types(self): - with self.context: - self.context.set('decimal_value', decimal.Decimal("6.66")) - self.context.set('date_value', datetime.date(2013, 12, 10)) - self.context.set( - 'datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) - - context_data = kafka._serialize_context(self.context) - data = msgpack.unpackb(context_data) - assert_equal(data['body']['decimal_value'], "6.66") - assert_equal(data['body']['date_value'], "2013-12-10") - assert_equal( - datetime.datetime.fromtimestamp( - float(data['body']['datetime_value'])), - datetime.datetime(2013, 12, 10, 12, 12, 12)) - - def test_exception(self): - with self.context: - self.context.set('value', Exception('hello')) - - context_data = kafka._serialize_context(self.context) - data = msgpack.unpackb(context_data) - - # The serialization should fail, but that just - # means we don't have any data. - assert_equal(data['body'], None) diff --git a/tests/recorders/pycernan_test.py b/tests/recorders/pycernan_test.py new file mode 100644 index 0000000..56ef550 --- /dev/null +++ b/tests/recorders/pycernan_test.py @@ -0,0 +1,164 @@ +import datetime +import decimal +import json +import random + +from testify import ( + TestCase, + setup, + teardown, + assert_equal, + assert_raises) + +from pycernan.avro.serde import serialize +from pycernan.avro.exceptions import DatumTypeException + +from blueox import default_configure, PYCERNAN_RECORDER +from blueox import utils +from blueox import context +from blueox.recorders import pycernan as pycernan_rec +from blueox.recorders import zmq + + +class MockPycernanClient(object): + last_schema = None + last_batch = None + last_sync = None + + def __call__(self, host=None, port=None): + self.host = host + self.port = port + return self + + def publish(self, schema, batch, sync=None): + self.last_schema = schema + self.last_batch = batch + self.last_sync = sync + + def close(self): + pass + + +class CantSerializeMe(object): + def __repr__(self): + return chr(167) + + +class PycernanOverrideTestCase(TestCase): + def test_configure_no_override(self): + default_configure() + assert_equal(context._recorder_function, zmq.send) + + def test_configure_override(self): + pycernan_rec.Client = MockPycernanClient() + default_configure(recorder=PYCERNAN_RECORDER) + assert_equal(context._recorder_function, pycernan_rec.send) + + +class PycernanSendTestCase(TestCase): + @setup + def build_context(self): + self.context = context.Context('test', 1) + + @setup + def init_pycernan(self): + self.port = random.randint(30000, 40000) + self.client = MockPycernanClient() + pycernan_rec.Client = self.client + pycernan_rec.init('127.0.0.1', self.port) + + @setup + def configure_pycernan(self): + context._recorder_function = pycernan_rec.send + + @teardown + def unconfigure_pycernan(self): + context._recorder_function = None + + @teardown + def destroy_recorder(self): + pycernan_rec.close() + + def test(self): + with self.context: + self.context.set('foo', True) + self.context.set('bar.baz', 10.0) + + data = self.client.last_batch[0] + data['body'] = json.loads(data['body']) + assert_equal(self.client.last_schema, pycernan_rec.BLUEOX_AVRO_RECORD) + assert_equal(self.client.last_sync, False) + assert_equal(data['id'], '1') + assert_equal(data['type'], 'test') + assert_equal(utils.get_deep(data['body'], 'bar.baz'), 10.0) + + assert_equal(self.client.host, '127.0.0.1') + assert_equal(self.client.port, self.port) + + +class SerializeContextTestCase(TestCase): + @setup + def build_context(self): + self.context = context.Context('test', 1) + + def test_types(self): + with self.context: + self.context.set('decimal_value', decimal.Decimal('6.66')) + self.context.set('date_value', datetime.date(2013, 12, 10)) + self.context.set( + 'datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) + + data = pycernan_rec._serialize_context(self.context) + data['body'] = json.loads(data['body']) + assert_equal(data['body']['decimal_value'], 6.66) + assert_equal(data['body']['date_value'], '2013-12-10') + assert_equal( + datetime.datetime.strptime( + data['body']['datetime_value'], '%Y-%m-%dT%H:%M:%S'), + datetime.datetime(2013, 12, 10, 12, 12, 12)) + + def test_exception(self): + with self.context: + self.context.set('value', CantSerializeMe()) + + data = pycernan_rec._serialize_context(self.context) + + # The serialization should fail, but that just + # means we don't have any data. + assert_equal(data['body'], None) + + +class EncodeAvroTestCase(TestCase): + @setup + def build_context(self): + self.context = context.Context('test', 1) + + def test_success(self): + with self.context: + self.context.set('foo', True) + self.context.set('bar.baz', 10.0) + + data = pycernan_rec._serialize_context(self.context) + serialize(pycernan_rec.BLUEOX_AVRO_RECORD, [data]) + + def test_failure(self): + with self.context: + self.context.set('foo', True) + self.context.set('bar.baz', 10.0) + self.context.set('decimal_value', decimal.Decimal('6.66')) + self.context.set('date_value', datetime.date(2013, 12, 10)) + self.context.set( + 'datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) + + data = pycernan_rec._serialize_context(self.context) + data['host'] = None + with assert_raises(DatumTypeException): + serialize(pycernan_rec.BLUEOX_AVRO_RECORD, [data]) + + def test_none_body(self): + with self.context: + self.context.set('bad_char', CantSerializeMe()) + + data = pycernan_rec._serialize_context(self.context) + assert_equal(data['body'], None) + serialize(pycernan_rec.BLUEOX_AVRO_RECORD, [data]) diff --git a/vendor/pycernan-0.0.10.zip b/vendor/pycernan-0.0.10.zip new file mode 100644 index 0000000000000000000000000000000000000000..c47a78e266e7fed9ee337c74351025254c4cc90d GIT binary patch literal 7982 zcmds+WmuGJx5tN;6k%vKAfN(64BeeWcZbA~!_XitEg)SIN{6(RgeW21At)dzAs{Fq zAp&RE`}o4kVQ=^6GjrY7b3I>v&sul0su%6 zPe)I4s59IY&H`oyv$BKPR25`dAj&c-($Rg&?O=TAgVTtk0KT3Hkg6WQM)D3;DTYTH z&W^&q6oY@vbNTm!sovZCvGZhT=^niy=HTHC!88Bs8&=n-Fc#3QWdbuhe1k< z_6+_qRH%yct5(m);!;Y| zto9aB8h!9-F#fAuP~EpXnB<@JQ5mqh*oUdo(4!5qz=2N+CI~1ZH0$#WQH~B-83o;` zk$NL56$S>a-MIqcCBp_bA-b>h45jUv>ajVb8b$3T*4C<+gHoCcu=BB3H>zBK`ed=a z!Zlg1Pb+sui&uO>j9H?`{$I(!+i$gs4E@DE*Rd9jSTixzfZjQp6jx1ThnMI`QI2fk zV;b_F4N@RqG8{<8!mPGtu^YBu`Qhrx@r7sEPkJG z9|42=)7=1f`b0g6pr2fw_BCauS*Xq_U={jvVL=8qi`?)Vxu--dC?6N z9t`7(nTZi0D01&JUwGw2#8|(dr4Qr#_G6n57(`Jmj^|U0^Mo$HK4zx_|E2pO%Ao0% z9~GJ$cM+A#5%$Hl^9XvWDB3Rj&>eB=P7qt~x|8Pub#-%Ob@U8TYgOJr?7CGDv&oeF z?T{`sEe?rA)EnWFbI&E>oOr&dciFvC&irvO1R=C8k>7N(boX$_pisGuHS3c6E`Yoh z-+(%t8Jn^muSYtnoT-_|pgB}+^8h_?44#<0jE79uP}R>-+VrC1QC10L2E^Vx(D5A> z7p}R+XKU97FkwNzpZ(T0$0MNprwF|J<7`R#V)-Gb=THmB&(S@)L9d}P<1JSd`DkV6 zY)}xRo5i8n8qXJ8d6BWsa1!J&JJ0^aVe1-szq_Zcgk3VS^KGh}-Awlhhr8g7J{uMI zowW4~wpfcVJk)T^BGD=Dc~_|PAx*WNQint&Q8p_3yuAHo_U)P{du9bYd@2Th@N>H7 z=jJdw*o|P$)&0JPWpY6I|cx6R!NpL zR$CZ2>&oue7-|dH*5a^GuC*uVm4!a9kp_aLtk5lv}b2l2`;zQ?Jq&=&yEr}KkHyi87aLuARATVbf{8B4Fk?-VzcFC(9vb&#WXHij`vVR(O?D> zNemGr^sxZ|y5Cyv&W@OqDD7$Z5)Z!bfsP_BFs>@Pk}s>nd!!51P5ySy>{JE zo{9K-o_!#dg*C0&wKD@Id&yY3c16p@ z6sgr>ho-4E-X4nH7HFsl{3%K-o-g>|HL{a9JfN!5$kXkIHZ084>^*LzK|#iRrF4Py zqSS}!xSY8q1JsY$oIns4aU{+fY&5O1B{ko-X-K&x)cz3r56~0y$`N3zEPB~p3(OFC zP*-_3=2?#R>F1*F7{#mkNkW14wsZVGkma_3EcL~Hwf>Ta$5@!1fbRnNMh{^(Ip?zr zLH#y#Vafj}6xXS>O; z(MKgO0(&f4X=x&+K>bwd>RcNB#^#}n2NVm_MvqC`j*F7*+0o^yBTuRXX+d9E_#&S+ zn&5d$*i%S-c%55wqvF_Y)M?p8UMkLuf*d1qWSR56TUwcnjy^BhEAPk>QX&qv4kqJ1 zJ*9VX{r<;a7sd*Yauf=hg@1Oi&yb-=&4hEinYU%1L9o->O2F%E*_W{#CrfCFn7$3=rl6ITkE#y_RFRY=7su>Y! ztb}4RMPg^F_B{8BQ&KpMaMs>U>8BWb4q@GFV<;h(m2N~)o=r?l&xCeXn#a%0;2e!% zAaSFas97YpRXja|C};ej63MgWa0GXVki}>o?Ww_GO6!QfQQF zHhfULn?IR@*Y~ZR9gJmYd4_xu9q#CQDOQIcJ+eaq0N!K%E9r#UK;fdf6x*bfa4FP)C}j*a$3_m6D*d3W`7$wMZet<1hr2nfD)|GkzEcs0KtXv%JqshFeLwBXc7 z&s^2hbJvlzyqmrAUBBxw(e}KvfD2ltX5=t8^VGCi)Xzd; zIKs!4xVB3PK7<~`cjnx%Q@WtMC5g|?3L%ryr)g;hY#uKkCWU22+GZrVe~^=Y{<_`n zL#~5~)@y%4=UZ9jvPG)hUJSQJ+r~?M#C)`}pBc6%S6c@48WP8t(!M)I9xoF({^3u* zqgUlHmN3X>q4&YI7QOr!%DPfDf%|NkgV{Gf;cmC^HOE0=w7F|cofiL zfk+tY5DCNG-zE$$P-hD$;<$!rLt>_RfUh@ngcR|TQS^+Q8FA{R(LV!i#uVcq>5^)E zHG3B9baBX#4~`?$b6wKd_CvuWEIxe)bupjp?;7r7jbePT4zaSI zSbhW0fRzbT6i(xt)cUf|4Y}s-YGct0XwVDMC!;qMC0C7SypW!n9VeHG%Q4WWDpigg zzRFHLtuH5)k0-!H<1mXM~)It8MZ%#Wv#?PQcAeF z3@OwfSWzeP6jnq>`l(&S|@r9@8D0R^D6RTH#5dN1sv75d?Ehzj`wYOEvv;uvpy&B zMY>vbvDylf3zjzwhlbN`S#0?6KW}nQoK}4lb*9Xc{s}tg?p4raC{SiITmD&Pk*d1v zr~9N%4N>>)@0QUs<5rvTcx@jto{vI_y?gG9d)gJjBlDleC?ixaPaE*g2FR=UJ4j>;u(t#NI{sVp1wXS5NzHt*ZhD4YG4Jr zYo=%jiCWi9Fz|mIYbQ;jLRn=EZI+fnVRf{z>T_7RY3*0|w4^{Cxuj<>fQbYxyE3-- z`xELHa)P9mMqxRM&1;IHFde0e4ORI`qMX<|2VV0I7p(R;L-Lv=yN}`=!}j5X3tmdo zcg72>^Cdcj`;1K|coX&?wdj8%{KjmeG&r$P6L|J&N=&&)W{X+j0fWiy^c&mcaN&_x z38fis)0?oIt(=LI?+d3VDh*M&X1?YO-mIe)1Kt8p`rFv6YH$h$8)wR~r<~%VTqW^= zf}t=ZpW@GZ;k37j17eE1Hkoeghvp=p7)z3#R2$S=}AFOsA_cIp*1E zjfJ_)w$x;=!zLc~EGMBfL29aPOIfNFRcAvzCR4ANc?0C>^p9NPq zHJfD=d}6~su*zr9;7t&!5&mOH(4)g?ZlQqogb4cXt85L4^x3gx9%?!%GQJ5&<0f-s zZo$GK=0IfwgF%rthl4l4?i-h5t9~b;Qga^_-1W|nIV#z4nvkCuE@QDHiu+DBZ8EP?x9@W@O{N13uB}oCQ8{$j(B1J)RQZ%Hj zvwJU3b`U07it*q1+xWvT{=ojz`B~ zhNSMkclC4`N#%M7@n~uDlFjfl*Y>rtVEK<#!F9g=T2x05isV82F`~I_T7}WAs*kOE zTQeU;1~=|UoFHBJakXj8^bc#o3bnFgv4LAU{C{sFpHB)0ozg@)Vln~{8O^Sw! zmb#?02CM4>SGOT?yg)F1$bzs(_FFnqY4gWsH02bdcot@<$#wW%-cfLRmg#N(be)vr zr80}4e#*lhFd+vas;=$xc}h&-RS+;uVDl_od~Q|lDJ#{ai(f-#v6OAGl`jLu3Q1-W z0ZuF`K>#Q(W%`L8>AjlDq-Gw&Rvg=Rk?wT@C(8!G5;U1#*VcxXxo~b<>Y|dyq#4do z>SD4FHN`YvS$e`;DkVIGcQ}Z_`melnhB~>~I73~|-8@%_m(|?k)x(^Nmk;{+{B0G5 zDIxMC0HU@5u>C8ttAnEn4C)Sroufhjt3pdxxe4f#9Eh3;fLIm)&%feYKpmlQ3n<*& z(*$M%xBJubKe0&2K$O3~Izh1i{N)9lKYn%Uu3n_5DqK*myM6eJ0RZF$pdc*1O8L)S z#AVEN-Q)|57sC0gm|wl+%arpqxZ3Bupxj0KI|`!fc^PwE7xMxWa}DN~aTmS5%e?El zkrzA?^#5zte|VPy)n(#!O~?zP^FI;)yCr!UdR>3;0?Lf>4`%)e{j*zm8F*d2e*r8* zoRnXF{@P)`-r4@L@>lhL8G2n&asf5E`8(*Z66G@Ux&-2aiH0~YzYM$H(7#!w>tg=} zHv6~O|H=d|+j|ba8i+5z_C&w4_g7fH%)BlrUNE_d|DO4OL*!NN)wpuOl}B{v|88zC zxR;{LRrFOczd(Z#H&VYxUzYZ(?5lEq!H)P>?Eez?tMIFuegR+nSMci;UR41Nar6NI Q0>pm~AyfS4_aA`&0j0yk>;M1& literal 0 HcmV?d00001 From 3468293c9e5290199b999fc6a2aeef587de37102 Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Thu, 30 Aug 2018 02:40:31 -0400 Subject: [PATCH 09/16] Initial work on kafka shim --- blueox/__init__.py | 10 ++- blueox/ports.py | 10 +++ blueox/recorders/__init__.py | 0 blueox/recorders/kafka_recorder.py | 101 +++++++++++++++++++++++++++++ requirements.txt | 1 + 5 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 blueox/recorders/__init__.py create mode 100644 blueox/recorders/kafka_recorder.py diff --git a/blueox/__init__.py b/blueox/__init__.py index 2f3fc36..bdb04d1 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -30,20 +30,26 @@ from .errors import Error from .logger import LogHandler from .timer import timeit +from .recorders import kafka_recorder log = logging.getLogger(__name__) +OVERRIDE_KAFKA_RECORDER = os.getenv('BLUEOX_OVERRIDE_KAFKA_RECORDER', 0) + def configure(host, port, recorder=None): """Initialize blueox - This instructs the blueox system where to send it's logging data. If blueox is not configured, log data will + This instructs the blueox system where to send its logging data. If blueox is not configured, log data will be silently dropped. Currently we support logging through the network (and the configured host and port) to a blueoxd instances, or to the specified recorder function """ - if recorder: + if int(OVERRIDE_KAFKA_RECORDER) == 1: + log.info("Kafka override set, using kafka recorder") + _context_mod._recorder_function = kafka_recorder.send + elif recorder: _context_mod._recorder_function = recorder elif host and port: network.init(host, port) diff --git a/blueox/ports.py b/blueox/ports.py index 5b1ca2f..909b57b 100644 --- a/blueox/ports.py +++ b/blueox/ports.py @@ -41,3 +41,13 @@ def default_control_host(host=None): def default_collect_host(host=None): default_host = os.environ.get(ENV_VAR_COLLECT_HOST, DEFAULT_HOST) return _default_host(host, default_host, DEFAULT_COLLECT_PORT) + + +# For consistency, we'll abstract kafka connections in the same way +ENV_VAR_KAFKA_HOST = 'BLUEOX_KAFKA_HOST' +DEFAULT_KAFKA_PORT = 9002 + + +def default_kafka_host(host=None): + default_host = os.environ.get(ENV_VAR_KAFKA_HOST, DEFAULT_HOST) + return _default_host(host, default_host, DEFAULT_KAFKA_PORT) diff --git a/blueox/recorders/__init__.py b/blueox/recorders/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/blueox/recorders/kafka_recorder.py b/blueox/recorders/kafka_recorder.py new file mode 100644 index 0000000..18bacbc --- /dev/null +++ b/blueox/recorders/kafka_recorder.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +""" +blueox.kafka +~~~~~~~~ + +This module provides the interface into Kafka + +:copyright: (c) 2018 by Aaron Biller?? +:license: ISC, see LICENSE for more details. + +""" +import atexit +import logging +import msgpack + +from kafka import KafkaProducer + +from .. import ports +from .. import utils + +log = logging.getLogger(__name__) + +# If we have pending outgoing messages, this is how long we'll wait after +# being told to exit. +LINGER_SHUTDOWN_MSECS = 2000 + +# Producer can be shared between threads +_kafka_producer = None + + +def init(host=None): + """Initialize the global kafka producer + + Supports a host arg with an overriding kafka host string + in the format 'hostname:port' + """ + global _kafka_producer + + host = ports.default_kafka_host(host) + + _kafka_producer = KafkaProducer(bootstrap_servers=host) + + +def _serialize_context(context): + context_dict = context.to_dict() + for key in ('host', 'type'): + if len(context_dict.get(key, "")) > 64: + raise ValueError("Value too long: %r" % key) + + context_dict = { + k: v.encode('utf-8') if isinstance(v, unicode) + else v for k, v in context_dict.items() + } + + try: + context_data = msgpack.packb(context_dict) + except TypeError: + try: + # If we fail to serialize our context, we can try again with an + # enhanced packer (it's slower though) + context_data = msgpack.packb(context_dict, + default=utils.msgpack_encode_default) + except TypeError: + log.exception("Serialization failure (not fatal, dropping data)") + + # One last try after dropping the body + context_dict['body'] = None + context_data = msgpack.packb(context_dict) + + return context_data + + +def send(context): + global _kafka_producer + + try: + context_data = _serialize_context(context) + except Exception: + log.exception("Failed to serialize context") + return + + if _kafka_producer: + try: + log.debug("Sending msg") + _kafka_producer.send('events', context_data) + except Exception: + log.exception("Failed during publish to kafka.") + else: + log.info("Skipping sending event %s", context.name) + + +def close(): + global _kafka_producer + + if _kafka_producer: + _kafka_producer.flush() + _kafka_producer.close(timeout=LINGER_SHUTDOWN_MSECS) + _kafka_producer = None + + +atexit.register(close) diff --git a/requirements.txt b/requirements.txt index dfdd0e7..61a4d6d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ pyflakes tornado==3.2 boto yapf +kafka-python From cd6782beb53ee94cf275d01f60649ce7546d80ce Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Thu, 30 Aug 2018 15:24:32 -0400 Subject: [PATCH 10/16] Add kafka recorder tests --- blueox/__init__.py | 12 +- blueox/ports.py | 2 +- .../recorders/{kafka_recorder.py => kafka.py} | 43 ++++--- tests/ports_test.py | 28 +++- tests/recorders/__init__.py | 0 tests/recorders/kafka_test.py | 120 ++++++++++++++++++ 6 files changed, 177 insertions(+), 28 deletions(-) rename blueox/recorders/{kafka_recorder.py => kafka.py} (72%) create mode 100644 tests/recorders/__init__.py create mode 100644 tests/recorders/kafka_test.py diff --git a/blueox/__init__.py b/blueox/__init__.py index bdb04d1..6fae302 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -30,12 +30,10 @@ from .errors import Error from .logger import LogHandler from .timer import timeit -from .recorders import kafka_recorder +from .recorders import kafka log = logging.getLogger(__name__) -OVERRIDE_KAFKA_RECORDER = os.getenv('BLUEOX_OVERRIDE_KAFKA_RECORDER', 0) - def configure(host, port, recorder=None): """Initialize blueox @@ -46,9 +44,13 @@ def configure(host, port, recorder=None): Currently we support logging through the network (and the configured host and port) to a blueoxd instances, or to the specified recorder function """ - if int(OVERRIDE_KAFKA_RECORDER) == 1: + override_kafka_recorder = os.getenv('BLUEOX_OVERRIDE_KAFKA_RECORDER', 0) + + if int(override_kafka_recorder) == 1: log.info("Kafka override set, using kafka recorder") - _context_mod._recorder_function = kafka_recorder.send + host = ports.default_kafka_host() + kafka.init(host) + _context_mod._recorder_function = kafka.send elif recorder: _context_mod._recorder_function = recorder elif host and port: diff --git a/blueox/ports.py b/blueox/ports.py index 909b57b..4cdc65e 100644 --- a/blueox/ports.py +++ b/blueox/ports.py @@ -45,7 +45,7 @@ def default_collect_host(host=None): # For consistency, we'll abstract kafka connections in the same way ENV_VAR_KAFKA_HOST = 'BLUEOX_KAFKA_HOST' -DEFAULT_KAFKA_PORT = 9002 +DEFAULT_KAFKA_PORT = 9092 def default_kafka_host(host=None): diff --git a/blueox/recorders/kafka_recorder.py b/blueox/recorders/kafka.py similarity index 72% rename from blueox/recorders/kafka_recorder.py rename to blueox/recorders/kafka.py index 18bacbc..386a7e0 100644 --- a/blueox/recorders/kafka_recorder.py +++ b/blueox/recorders/kafka.py @@ -9,14 +9,16 @@ :license: ISC, see LICENSE for more details. """ +from __future__ import absolute_import + import atexit import logging import msgpack +import threading from kafka import KafkaProducer -from .. import ports -from .. import utils +from blueox import utils log = logging.getLogger(__name__) @@ -24,21 +26,22 @@ # being told to exit. LINGER_SHUTDOWN_MSECS = 2000 -# Producer can be shared between threads -_kafka_producer = None +threadLocal = threading.local() + +# Context can be shared between threads +_kafka_hosts = None -def init(host=None): - """Initialize the global kafka producer - Supports a host arg with an overriding kafka host string - in the format 'hostname:port' - """ - global _kafka_producer +def init(host): + global _kafka_hosts - host = ports.default_kafka_host(host) + _kafka_hosts = host - _kafka_producer = KafkaProducer(bootstrap_servers=host) + +def _thread_connect(): + if _kafka_hosts and not getattr(threadLocal, 'kp', None): + threadLocal.kp = KafkaProducer(bootstrap_servers=_kafka_hosts) def _serialize_context(context): @@ -71,7 +74,7 @@ def _serialize_context(context): def send(context): - global _kafka_producer + _thread_connect() try: context_data = _serialize_context(context) @@ -79,10 +82,10 @@ def send(context): log.exception("Failed to serialize context") return - if _kafka_producer: + if _kafka_hosts and threadLocal.kp is not None: try: log.debug("Sending msg") - _kafka_producer.send('events', context_data) + threadLocal.kp.send('events', context_data) except Exception: log.exception("Failed during publish to kafka.") else: @@ -90,12 +93,10 @@ def send(context): def close(): - global _kafka_producer - - if _kafka_producer: - _kafka_producer.flush() - _kafka_producer.close(timeout=LINGER_SHUTDOWN_MSECS) - _kafka_producer = None + if getattr(threadLocal, 'kp', None): + threadLocal.kp.flush() + threadLocal.kp.close(timeout=LINGER_SHUTDOWN_MSECS) + threadLocal.kp = None atexit.register(close) diff --git a/tests/ports_test.py b/tests/ports_test.py index c7d278c..5b1e711 100644 --- a/tests/ports_test.py +++ b/tests/ports_test.py @@ -1,5 +1,8 @@ import os -from testify import * +from testify import ( + TestCase, + assert_equal, + teardown) from blueox import ports @@ -71,3 +74,26 @@ def test_env_port(self): os.environ['BLUEOX_HOST'] = 'master:123' host = ports.default_collect_host() assert_equal(host, "master:123") + + +class DefaultKafkaHost(TestCase): + @teardown + def clear_env(self): + try: + del os.environ['BLUEOX_KAFKA_HOST'] + except KeyError: + pass + + def test_emtpy(self): + host = ports.default_kafka_host() + assert_equal(host, '127.0.0.1:9092') + + def test_env(self): + os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s' + host = ports.default_kafka_host() + assert_equal(host, 'local.svc.team-me.aws.jk8s:9092') + + def test_env_port(self): + os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s:9002' + host = ports.default_kafka_host() + assert_equal(host, 'local.svc.team-me.aws.jk8s:9002') diff --git a/tests/recorders/__init__.py b/tests/recorders/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/recorders/kafka_test.py b/tests/recorders/kafka_test.py new file mode 100644 index 0000000..be62636 --- /dev/null +++ b/tests/recorders/kafka_test.py @@ -0,0 +1,120 @@ +import os +import random +import decimal +import datetime + +import msgpack +from testify import ( + TestCase, + setup, + teardown, + assert_equal) + +from blueox import default_configure +from blueox import utils +from blueox import network +from blueox import context +from blueox.recorders import kafka + + +class MockKafkaProducer(object): + last_topic = None + last_data = None + close_timeout = None + + def __call__(self, bootstrap_servers=None): + self.bootstrap_servers = bootstrap_servers + return self + + def send(self, topic, data): + self.last_topic = topic + self.last_data = data + + def flush(self): + pass + + def close(self, timeout=None): + self.close_timeout = timeout + + +class KafkaOverrideTestCase(TestCase): + @teardown + def clear_env(self): + try: + del os.environ['BLUEOX_OVERRIDE_KAFKA_RECORDER'] + except KeyError: + pass + + def test_configure_no_override(self): + default_configure() + assert_equal(context._recorder_function, network.send) + + def test_configure_override(self): + os.environ['BLUEOX_OVERRIDE_KAFKA_RECORDER'] = '1' + default_configure() + assert_equal(context._recorder_function, kafka.send) + + +class KafkaSendTestCase(TestCase): + @setup + def build_context(self): + self.context = context.Context('test', 1) + + @setup + def init_kafka(self): + self.port = random.randint(30000, 40000) + kafka.init('127.0.0.1:{}'.format(self.port)) + + @setup + def configure_kafka(self): + context._recorder_function = kafka.send + self.kp = MockKafkaProducer() + kafka.KafkaProducer = self.kp + + @teardown + def unconfigure_kafka(self): + context._recorder_function = None + + def test(self): + with self.context: + self.context.set('foo', True) + self.context.set('bar.baz', 10.0) + + data = msgpack.unpackb(self.kp.last_data) + assert_equal(self.kp.last_topic, 'events') + assert_equal(data['id'], 1) + assert_equal(data['type'], 'test') + assert_equal(utils.get_deep(data['body'], "bar.baz"), 10.0) + + kafka.close() + assert_equal(self.kp.close_timeout, kafka.LINGER_SHUTDOWN_MSECS) + + +class SerializeContextTestCase(TestCase): + @setup + def build_context(self): + self.context = context.Context('test', 1) + + def test_types(self): + with self.context: + self.context.set('decimal_value', decimal.Decimal("6.66")) + self.context.set('date_value', datetime.date(2013, 12, 10)) + self.context.set('datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) + + context_data = kafka._serialize_context(self.context) + data = msgpack.unpackb(context_data) + assert_equal(data['body']['decimal_value'], "6.66") + assert_equal(data['body']['date_value'], "2013-12-10") + assert_equal( + datetime.datetime.fromtimestamp(float(data['body']['datetime_value'])), + datetime.datetime(2013, 12, 10, 12, 12, 12)) + + def test_exception(self): + with self.context: + self.context.set('value', Exception('hello')) + + context_data = kafka._serialize_context(self.context) + data = msgpack.unpackb(context_data) + + # The serialization should fail, but that just means we don't have any data. + assert_equal(data['body'], None) From eb57ad5740c3f8df46e9ebb24958598d4784f79c Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Fri, 31 Aug 2018 01:15:43 -0400 Subject: [PATCH 11/16] Update recorder override functionality --- blueox/__init__.py | 55 +++++++++++-------- blueox/recorders/kafka.py | 13 ++++- blueox/{network.py => recorders/zmq.py} | 13 ++++- tests/recorders/kafka_test.py | 20 ++++--- .../zmq_test.py} | 38 +++++++------ 5 files changed, 86 insertions(+), 53 deletions(-) rename blueox/{network.py => recorders/zmq.py} (94%) rename tests/{network_test.py => recorders/zmq_test.py} (72%) diff --git a/blueox/__init__.py b/blueox/__init__.py index 6fae302..fae59eb 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -21,7 +21,6 @@ import os from . import utils -from . import network from . import ports from .context import ( Context, set, append, add, context_wrap, current_context, find_context, @@ -30,44 +29,53 @@ from .errors import Error from .logger import LogHandler from .timer import timeit -from .recorders import kafka +from .recorders import kafka, zmq log = logging.getLogger(__name__) +RECORDER_ZMQ = 'zmq' +RECORDER_KAFKA = 'kafka' +RECORDERS = { + RECORDER_ZMQ: zmq, + RECORDER_KAFKA: kafka, +} +DEFAULT_RECORDER = RECORDER_ZMQ + def configure(host, port, recorder=None): """Initialize blueox - This instructs the blueox system where to send its logging data. If blueox is not configured, log data will - be silently dropped. + This instructs the blueox system where to send its logging data. + If blueox is not configured, log data will be silently dropped. - Currently we support logging through the network (and the configured host and port) to a blueoxd instances, or - to the specified recorder function + Currently we support logging through the network (and the configured host + and port) to a blueoxd instances, or to the specified recorder function. """ - override_kafka_recorder = os.getenv('BLUEOX_OVERRIDE_KAFKA_RECORDER', 0) - - if int(override_kafka_recorder) == 1: - log.info("Kafka override set, using kafka recorder") - host = ports.default_kafka_host() - kafka.init(host) - _context_mod._recorder_function = kafka.send - elif recorder: + if callable(recorder): _context_mod._recorder_function = recorder - elif host and port: - network.init(host, port) - _context_mod._recorder_function = network.send + else: - log.info("Empty blueox configuration") - _context_mod._recorder_function = None + _rec = RECORDERS.get(recorder, None) + if _rec is not None: + _rec.init(host, port) + _context_mod._recorder_function = _rec.send + else: + log.info("Empty blueox configuration") + _context_mod._recorder_function = None -def default_configure(host=None): + +def default_configure(host=None, recorder=DEFAULT_RECORDER): """Configure BlueOx based on defaults Accepts a connection string override in the form `localhost:3514`. Respects environment variable BLUEOX_HOST """ - host = ports.default_collect_host(host) + _rec = RECORDERS.get(recorder, None) + if _rec is None: + _rec = RECORDERS.get(DEFAULT_RECORDER) + + host = _rec.default_host(host) hostname, port = host.split(':') try: @@ -75,8 +83,9 @@ def default_configure(host=None): except ValueError: raise Error("Invalid value for port") - configure(hostname, int_port) + configure(hostname, int_port, recorder=recorder) def shutdown(): - network.close() + zmq.close() + kafka.close() diff --git a/blueox/recorders/kafka.py b/blueox/recorders/kafka.py index 386a7e0..cc5e89b 100644 --- a/blueox/recorders/kafka.py +++ b/blueox/recorders/kafka.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -blueox.kafka +blueox.recorders.kafka ~~~~~~~~ This module provides the interface into Kafka @@ -18,6 +18,7 @@ from kafka import KafkaProducer +from blueox import ports from blueox import utils log = logging.getLogger(__name__) @@ -27,16 +28,22 @@ LINGER_SHUTDOWN_MSECS = 2000 +def default_host(host=None): + """Build a default host string for the kafka producer + """ + return ports.default_kafka_host(host) + + threadLocal = threading.local() # Context can be shared between threads _kafka_hosts = None -def init(host): +def init(host, port): global _kafka_hosts - _kafka_hosts = host + _kafka_hosts = '{}:{}'.format(host, port) def _thread_connect(): diff --git a/blueox/network.py b/blueox/recorders/zmq.py similarity index 94% rename from blueox/network.py rename to blueox/recorders/zmq.py index ede1a1e..88b227a 100644 --- a/blueox/network.py +++ b/blueox/recorders/zmq.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -blueox.network +blueox.recorders.zmq ~~~~~~~~ This module provides our interface into ZeroMQ @@ -9,6 +9,8 @@ :license: ISC, see LICENSE for more details. """ +from __future__ import absolute_import + import atexit import logging import msgpack @@ -16,7 +18,8 @@ import threading import zmq -from . import utils +from blueox import ports +from blueox import utils log = logging.getLogger(__name__) @@ -44,6 +47,12 @@ def check_meta_version(meta): raise ValueError(value) +def default_host(host=None): + """Build a default host string for the kafka producer + """ + return ports.default_collect_host(host) + + threadLocal = threading.local() # Context can be shared between threads diff --git a/tests/recorders/kafka_test.py b/tests/recorders/kafka_test.py index be62636..2716112 100644 --- a/tests/recorders/kafka_test.py +++ b/tests/recorders/kafka_test.py @@ -10,11 +10,11 @@ teardown, assert_equal) -from blueox import default_configure +from blueox import default_configure, RECORDER_KAFKA from blueox import utils -from blueox import network from blueox import context from blueox.recorders import kafka +from blueox.recorders import zmq class MockKafkaProducer(object): @@ -47,11 +47,10 @@ def clear_env(self): def test_configure_no_override(self): default_configure() - assert_equal(context._recorder_function, network.send) + assert_equal(context._recorder_function, zmq.send) def test_configure_override(self): - os.environ['BLUEOX_OVERRIDE_KAFKA_RECORDER'] = '1' - default_configure() + default_configure(recorder=RECORDER_KAFKA) assert_equal(context._recorder_function, kafka.send) @@ -63,7 +62,7 @@ def build_context(self): @setup def init_kafka(self): self.port = random.randint(30000, 40000) - kafka.init('127.0.0.1:{}'.format(self.port)) + kafka.init('127.0.0.1', self.port) @setup def configure_kafka(self): @@ -99,14 +98,16 @@ def test_types(self): with self.context: self.context.set('decimal_value', decimal.Decimal("6.66")) self.context.set('date_value', datetime.date(2013, 12, 10)) - self.context.set('datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) + self.context.set( + 'datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) context_data = kafka._serialize_context(self.context) data = msgpack.unpackb(context_data) assert_equal(data['body']['decimal_value'], "6.66") assert_equal(data['body']['date_value'], "2013-12-10") assert_equal( - datetime.datetime.fromtimestamp(float(data['body']['datetime_value'])), + datetime.datetime.fromtimestamp( + float(data['body']['datetime_value'])), datetime.datetime(2013, 12, 10, 12, 12, 12)) def test_exception(self): @@ -116,5 +117,6 @@ def test_exception(self): context_data = kafka._serialize_context(self.context) data = msgpack.unpackb(context_data) - # The serialization should fail, but that just means we don't have any data. + # The serialization should fail, but that just + # means we don't have any data. assert_equal(data['body'], None) diff --git a/tests/network_test.py b/tests/recorders/zmq_test.py similarity index 72% rename from tests/network_test.py rename to tests/recorders/zmq_test.py index dbfa4c2..c6ae28a 100644 --- a/tests/network_test.py +++ b/tests/recorders/zmq_test.py @@ -3,18 +3,24 @@ import decimal import datetime -from testify import * +from testify import ( + TestCase, + setup, + teardown, + assert_equal) import zmq import msgpack from blueox import utils -from blueox import network from blueox import context +from blueox.recorders import zmq as zmq_rec + class NoNetworkSendTestCase(TestCase): def test(self): """Verify that if network isn't setup, send just does nothing""" - network.send(context.Context('test', 1)) + zmq_rec.send(context.Context('test', 1)) + class NetworkSendTestCase(TestCase): @setup @@ -24,11 +30,11 @@ def build_context(self): @setup def init_network(self): self.port = random.randint(30000, 40000) - network.init("127.0.0.1", self.port) + zmq_rec.init("127.0.0.1", self.port) @setup def configure_network(self): - context._recorder_function = network.send + context._recorder_function = zmq_rec.send @teardown def unconfigure_network(self): @@ -36,7 +42,7 @@ def unconfigure_network(self): @setup def build_server_socket(self): - self.server = network._zmq_context.socket(zmq.PULL) + self.server = zmq_rec._zmq_context.socket(zmq.PULL) self.server.bind("tcp://127.0.0.1:%d" % self.port) @teardown @@ -45,7 +51,7 @@ def destroy_server(self): @teardown def destory_network(self): - network.close() + zmq_rec.close() def test(self): with self.context: @@ -53,8 +59,9 @@ def test(self): self.context.set('bar.baz', 10.0) event_meta, raw_data = self.server.recv_multipart() - network.check_meta_version(event_meta) - _, event_time, event_host, event_type = struct.unpack(network.META_STRUCT_FMT, event_meta) + zmq_rec.check_meta_version(event_meta) + _, event_time, event_host, event_type = struct.unpack( + zmq_rec.META_STRUCT_FMT, event_meta) assert_equal(event_type, 'test') data = msgpack.unpackb(raw_data) @@ -72,26 +79,25 @@ def test_types(self): with self.context: self.context.set('decimal_value', decimal.Decimal("6.66")) self.context.set('date_value', datetime.date(2013, 12, 10)) - self.context.set('datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) + self.context.set( + 'datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) - meta_data, context_data = network._serialize_context(self.context) + meta_data, context_data = zmq_rec._serialize_context(self.context) data = msgpack.unpackb(context_data) assert_equal(data['body']['decimal_value'], "6.66") assert_equal(data['body']['date_value'], "2013-12-10") assert_equal( - datetime.datetime.fromtimestamp(float(data['body']['datetime_value'])), + datetime.datetime.fromtimestamp( + float(data['body']['datetime_value'])), datetime.datetime(2013, 12, 10, 12, 12, 12)) def test_exception(self): with self.context: self.context.set('value', Exception('hello')) - meta_data, context_data = network._serialize_context(self.context) + meta_data, context_data = zmq_rec._serialize_context(self.context) data = msgpack.unpackb(context_data) # The serialization should fail, but that just means we don't have any # data. assert_equal(data['body'], None) - - - From 1e4292293dae6bd3b1069e3620fa5a7f82b2223d Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Fri, 31 Aug 2018 01:16:29 -0400 Subject: [PATCH 12/16] Fix sundry syntax/formatting --- blueox/client.py | 16 ++++---- blueox/context.py | 19 +++++---- blueox/ports.py | 4 +- tests/ports_test.py | 4 +- tests/tornado_utils_test.py | 80 ++++++++++++++++++------------------- 5 files changed, 63 insertions(+), 60 deletions(-) diff --git a/blueox/client.py b/blueox/client.py index 8062c64..ec2f420 100644 --- a/blueox/client.py +++ b/blueox/client.py @@ -3,7 +3,8 @@ blueox.client ~~~~~~~~ -This module provides utilities for writing client applications which connect or use blueox data. +This module provides utilities for writing client applications +which connector use blueox data. :copyright: (c) 2012 by Rhett Garber :license: ISC, see LICENSE for more details. @@ -35,7 +36,8 @@ def default_host(host=None): def decode_stream(stream): - """A generator which reads data out of the buffered file stream, unpacks and decodes the blueox events + """A generator which reads data out of the buffered file stream, + unpacks and decodes the blueox events This is useful for parsing on disk log files generated by blueoxd """ @@ -97,8 +99,8 @@ def subscribe_stream(control_host, subscribe): sock.connect("tcp://%s" % (stream_host,)) # Now that we are connected, loop almost forever emiting events. - # If we fail to receive any events within the specified timeout, we'll quit - # and verify that we are connected to a valid stream. + # If we fail to receive any events within the specified timeout, + # we'll quit and verify that we are connected to a valid stream. poller = zmq.Poller() poller.register(sock, zmq.POLLIN) while True: @@ -113,7 +115,7 @@ def subscribe_stream(control_host, subscribe): if not prefix and subscription and channel != subscription: continue - yield msgpack.unpackb(data,encoding='utf8') + yield msgpack.unpackb(data, encoding='utf8') else: break @@ -137,10 +139,10 @@ def stdin_stream(): class Grouper(object): """Utility for grouping events and sub-events together. - + Events fed into a Grouper are joined by their common 'id'. Encountering the parent event type will trigger emitting a list of all events and sub events - for that single id. + for that single id. This assumes that the parent event will be the last encountered. diff --git a/blueox/context.py b/blueox/context.py index c23fcb7..fc117a9 100644 --- a/blueox/context.py +++ b/blueox/context.py @@ -19,7 +19,6 @@ import logging from . import utils -from . import network log = logging.getLogger(__name__) @@ -41,8 +40,10 @@ def __init__(self, type_name, id=None, sample=None): heirarchy of parent requests. Examples: '.foo' - Will generate a name like '.foo' - '.foo.bar' - If the parent ends in '.foo', the final name will be '.bar' - '^.foo' - Will use the top-most context, generating '.foo' + '.foo.bar' - If the parent ends in '.foo', the final name + will be '.bar' + '^.foo' - Will use the top-most context, generating + '.foo' 'top.foo.bar' - The name will be based on the longest matched parent context. If there is a parent context named 'top' and a parent context named 'top.foo', the new context will be named @@ -111,11 +112,13 @@ def __init__(self, type_name, id=None, sample=None): elif parent_ctx: self.id = parent_ctx.id else: - # Generate an id if one wasn't provided and we don't have any parents - # We're going to encode the time as the front 4 bytes so we have some order to the ids - # that could prove useful later on by making sorting a little easier. - self.id = (struct.pack(">L", int(time.time())) + os.urandom(12)).encode( - 'hex') + # Generate an id if one wasn't provided and we don't have any + # parents. We're going to encode the time as the front 4 bytes + # so we have some order to the ids that could prove useful + # later on by making sorting a little easier. + self.id = ( + struct.pack(">L", int(time.time())) + + os.urandom(12)).encode('hex') if parent_ctx and not parent_ctx.enabled: self.enabled = False diff --git a/blueox/ports.py b/blueox/ports.py index 4cdc65e..39470d9 100644 --- a/blueox/ports.py +++ b/blueox/ports.py @@ -28,7 +28,7 @@ def _default_host(host, default_host, default_port): if not host: host = default_host if ':' not in host: - host = "{}:{}".format(host, default_port) + host = '{}:{}'.format(host, default_port) return host @@ -45,7 +45,7 @@ def default_collect_host(host=None): # For consistency, we'll abstract kafka connections in the same way ENV_VAR_KAFKA_HOST = 'BLUEOX_KAFKA_HOST' -DEFAULT_KAFKA_PORT = 9092 +DEFAULT_KAFKA_PORT = 9002 def default_kafka_host(host=None): diff --git a/tests/ports_test.py b/tests/ports_test.py index 5b1e711..4ed75d1 100644 --- a/tests/ports_test.py +++ b/tests/ports_test.py @@ -86,12 +86,12 @@ def clear_env(self): def test_emtpy(self): host = ports.default_kafka_host() - assert_equal(host, '127.0.0.1:9092') + assert_equal(host, '127.0.0.1:9002') def test_env(self): os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s' host = ports.default_kafka_host() - assert_equal(host, 'local.svc.team-me.aws.jk8s:9092') + assert_equal(host, 'local.svc.team-me.aws.jk8s:9002') def test_env_port(self): os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s:9002' diff --git a/tests/tornado_utils_test.py b/tests/tornado_utils_test.py index e6aedd4..085da93 100644 --- a/tests/tornado_utils_test.py +++ b/tests/tornado_utils_test.py @@ -1,9 +1,8 @@ import time -import pprint import random import collections import traceback -from testify import * +from testify import assert_equal, setup import tornado.ioloop import tornado.gen @@ -14,7 +13,10 @@ # vendor module. Tornado testing in Testify import tornado_test -class AsyncHandler(blueox.tornado_utils.BlueOxRequestHandlerMixin, tornado.web.RequestHandler): + +class AsyncHandler( + blueox.tornado_utils.BlueOxRequestHandlerMixin, + tornado.web.RequestHandler): @blueox.tornado_utils.coroutine def get(self): loop = self.request.connection.stream.io_loop @@ -22,7 +24,8 @@ def get(self): req_id = self.blueox_ctx.id blueox.set('async', True) - result = yield blueox.tornado_utils.AsyncHTTPClient(loop).fetch(self.application.test_url) + result = yield blueox.tornado_utils.AsyncHTTPClient(loop).fetch( + self.application.test_url) assert result.code == 200 with blueox.Context('.extra'): @@ -32,31 +35,40 @@ def get(self): self.finish() -class AsyncErrorHandler(blueox.tornado_utils.BlueOxRequestHandlerMixin, tornado.web.RequestHandler): +class AsyncErrorHandler( + blueox.tornado_utils.BlueOxRequestHandlerMixin, + tornado.web.RequestHandler): @blueox.tornado_utils.coroutine def get(self): loop = self.request.connection.stream.io_loop - called = yield tornado.gen.Task(loop.add_timeout, time.time() + random.randint(1, 2)) + _ = yield tornado.gen.Task(loop.add_timeout, time.time() + + random.randint(1, 2)) raise Exception('hi') def write_error(self, status_code, **kwargs): if 'exc_info' in kwargs: - blueox.set('exception', ''.join(traceback.format_exception(*kwargs["exc_info"]))) + blueox.set('exception', ''.join( + traceback.format_exception(*kwargs["exc_info"]))) - return super(AsyncErrorHandler, self).write_error(status_code, **kwargs) + return super(AsyncErrorHandler, self).write_error(status_code, + **kwargs) -class AsyncTimeoutHandler(blueox.tornado_utils.BlueOxRequestHandlerMixin, tornado.web.RequestHandler): +class AsyncTimeoutHandler( + blueox.tornado_utils.BlueOxRequestHandlerMixin, + tornado.web.RequestHandler): @blueox.tornado_utils.coroutine def get(self): loop = self.request.connection.stream.io_loop - called = yield tornado.gen.Task(loop.add_timeout, time.time() + 1.0) + _ = yield tornado.gen.Task(loop.add_timeout, time.time() + 1.0) -class AsyncRecurseTimeoutHandler(blueox.tornado_utils.BlueOxRequestHandlerMixin, tornado.web.RequestHandler): +class AsyncRecurseTimeoutHandler( + blueox.tornado_utils.BlueOxRequestHandlerMixin, + tornado.web.RequestHandler): @blueox.tornado_utils.coroutine def post(self): loop = self.request.connection.stream.io_loop @@ -64,8 +76,8 @@ def post(self): blueox.set("start", True) try: - f = yield http_client.fetch(self.request.body, request_timeout=0.5) - except tornado.httpclient.HTTPError, e: + _ = yield http_client.fetch(self.request.body, request_timeout=0.5) + except tornado.httpclient.HTTPError: self.write("got it") else: self.write("nope") @@ -73,13 +85,14 @@ def post(self): blueox.set("end", True) -class MainHandler(blueox.tornado_utils.BlueOxRequestHandlerMixin, tornado.web.RequestHandler): +class MainHandler( + blueox.tornado_utils.BlueOxRequestHandlerMixin, + tornado.web.RequestHandler): def get(self): blueox.set('async', False) self.write("Hello, world") - class SimpleTestCase(tornado_test.AsyncHTTPTestCase): @setup def setup_bluox(self): @@ -112,11 +125,6 @@ def test_error(self): f = self.http_client.fetch(self.get_url("/error"), self.stop) resp = self.wait() - #for ctx_id in self.log_ctx: - #print ctx_id - #for ctx in self.log_ctx[ctx_id]: - #pprint.pprint(ctx.to_dict()) - assert_equal(len(self.log_ctx), 2) found_exception = False @@ -128,31 +136,22 @@ def test_error(self): assert found_exception def test_timeout_error(self): - f = self.http_client.fetch(self.get_url("/timeout"), self.stop, request_timeout=0.5) + f = self.http_client.fetch( + self.get_url("/timeout"), self.stop, request_timeout=0.5) resp = self.wait() - #for ctx_id in self.log_ctx: - #print ctx_id - #for ctx in self.log_ctx[ctx_id]: - #pprint.pprint(ctx.to_dict()) - assert_equal(len(self.log_ctx), 1) ctx = self.log_ctx[self.log_ctx.keys()[0]][0] assert_equal(get_deep(ctx.to_dict(), 'body.response.code'), 599) def test_recurse_timeout_error(self): url = self.get_url("/timeout") - f = self.http_client.fetch(self.get_url("/recurse_timeout"), self.stop, + _ = self.http_client.fetch(self.get_url("/recurse_timeout"), self.stop, body=url, method="POST", request_timeout=1.5) resp = self.wait() - #for ctx_id in self.log_ctx: - #print ctx_id - #for ctx in self.log_ctx[ctx_id]: - #pprint.pprint(ctx.to_dict()) - assert_equal(resp.code, 200) assert_equal(resp.body, "got it") @@ -161,7 +160,9 @@ def test_recurse_timeout_error(self): for ctx_list in self.log_ctx.values(): for ctx in ctx_list: c = ctx.to_dict() - if c['type'] == 'request.httpclient' and c['body']['response']['code'] == 599: + if ( + c['type'] == 'request.httpclient' and + c['body']['response']['code'] == 599): found_timeout = True if c['type'] == 'request' and get_deep(c, 'body.start'): @@ -175,13 +176,8 @@ def test_context(self): self.http_client.fetch(self.get_url("/async"), self.stop) resp = self.wait() - #for ctx_id in self.log_ctx: - #print - #print ctx_id - #for ctx in self.log_ctx[ctx_id]: - #pprint.pprint(ctx.to_dict()) - - # If everything worked properly, we should have two separate ids, one will have two contexts associated with it. + # If everything worked properly, we should have two separate ids, + # one will have two contexts associated with it. # Hopefully it's the right one. found_sync = None found_async = None @@ -191,7 +187,9 @@ def test_context(self): if ctx.name == "request" and ctx.to_dict()['body']['async']: assert_equal(len(ctx_list), 3) found_async = ctx - if ctx.name == "request" and not ctx.to_dict()['body']['async']: + if ( + ctx.name == "request" and + not ctx.to_dict()['body']['async']): assert_equal(len(ctx_list), 1) found_sync = ctx if ctx.name.endswith("httpclient"): From 8f5a69a3eb86bd12f359e979227239eddfba432a Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Fri, 31 Aug 2018 08:51:56 -0400 Subject: [PATCH 13/16] Clean up more syntax, flip constant names --- blueox/__init__.py | 11 +++++------ blueox/logger.py | 7 +++++-- blueox/store.py | 13 +++++++------ blueox/timer.py | 4 +++- blueox/tornado_utils.py | 34 ++++++++++++++++++---------------- tests/recorders/kafka_test.py | 4 ++-- 6 files changed, 40 insertions(+), 33 deletions(-) diff --git a/blueox/__init__.py b/blueox/__init__.py index fae59eb..a74caaa 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -18,7 +18,6 @@ __url__ = 'https://github.com/rhettg/BlueOx' import logging -import os from . import utils from . import ports @@ -33,13 +32,13 @@ log = logging.getLogger(__name__) -RECORDER_ZMQ = 'zmq' -RECORDER_KAFKA = 'kafka' +ZMQ_RECORDER = 'zmq' +KAFKA_RECORDER = 'kafka' RECORDERS = { - RECORDER_ZMQ: zmq, - RECORDER_KAFKA: kafka, + ZMQ_RECORDER: zmq, + KAFKA_RECORDER: kafka, } -DEFAULT_RECORDER = RECORDER_ZMQ +DEFAULT_RECORDER = ZMQ_RECORDER def configure(host, port, recorder=None): diff --git a/blueox/logger.py b/blueox/logger.py index 37d2180..6a7c025 100644 --- a/blueox/logger.py +++ b/blueox/logger.py @@ -3,7 +3,9 @@ blueox.logger ~~~~~~~~ -This module provides integration with blueox and standard python logging module. +This module provides integration with blueox and standard +python logging module. + :copyright: (c) 2012 by Rhett Garber :license: ISC, see LICENSE for more details. @@ -20,7 +22,8 @@ class LogHandler(logging.Handler): Records standard fields such as logger name, level the message and if an exception was provided, the string formatted exception. - The type name, if not specified will be something like '.log' + The type name, if not specified will be something like + '.log' """ def __init__(self, type_name=None): diff --git a/blueox/store.py b/blueox/store.py index 66f4f19..afeaa2f 100644 --- a/blueox/store.py +++ b/blueox/store.py @@ -226,7 +226,8 @@ def list_log_files(log_path): def filter_log_files_for_active(log_files): - """Filter our list of log files to remove those we expect might be active.""" + """Filter our list of log files to remove those we expect might be active. + """ out_log_files = [] files_by_type = collections.defaultdict(list) @@ -242,11 +243,11 @@ def filter_log_files_for_active(log_files): out_log_files += type_files - # If that last log file is old, then it's probably not being used either. - # We add a buffer of an hour just to make sure everything has rotated - # away safely when this is run close to midnight. - cutoff_date = (datetime.datetime.utcnow() - datetime.timedelta(hours=1) - ).date() + # If that last log file is old, then it's probably not being used + # either. We add a buffer of an hour just to make sure everything has + # rotated away safely when this is run close to midnight. + cutoff_date = ( + datetime.datetime.utcnow() - datetime.timedelta(hours=1)).date() if last_lf.date < cutoff_date: out_log_files.append(last_lf) diff --git a/blueox/timer.py b/blueox/timer.py index dc0cf18..77a22ff 100644 --- a/blueox/timer.py +++ b/blueox/timer.py @@ -3,7 +3,9 @@ blueox.timer ~~~~~~~~ -This module has a timer context manager for easily tracking wall-clock time for some execution +This module has a timer context manager for easily tracking wall-clock +time for some execution + :copyright: (c) 2012 by Rhett Garber :license: ISC, see LICENSE for more details. diff --git a/blueox/tornado_utils.py b/blueox/tornado_utils.py index ffbfa9b..af3e2c4 100644 --- a/blueox/tornado_utils.py +++ b/blueox/tornado_utils.py @@ -5,8 +5,8 @@ This module provides hooks for using blueox with the Tornado async web server. Making blueox useful inside tornado is a challenge since you'll likely want a -blueox context per request, but multiple requests can be going on at once inside -tornado. +blueox context per request, but multiple requests can be going on at once +inside tornado. :copyright: (c) 2012 by Rhett Garber :license: ISC, see LICENSE for more details. @@ -19,8 +19,6 @@ import sys import time -log = logging.getLogger(__name__) - import tornado.web import tornado.gen import tornado.httpclient @@ -29,6 +27,8 @@ import blueox +log = logging.getLogger(__name__) + def _gen_wrapper(ctx, generator): """Generator Wrapper that starts/stops our context @@ -112,7 +112,8 @@ def on_finish(self): class SampleRequestHandler(BlueOxRequestHandlerMixin, tornado.web.RequestHandler): - """Sample base request handler that provides basic information about the request. + """Sample base request handler that provides basic + information about the request. """ def prepare(self): @@ -123,8 +124,8 @@ def prepare(self): def write_error(self, status_code, **kwargs): if 'exc_info' in kwargs: - blueox.set('exception', - ''.join(traceback.format_exception(*kwargs["exc_info"]))) + blueox.set('exception', ''.join( + traceback.format_exception(*kwargs["exc_info"]))) return super(SampleRequestHandler, self).write_error(status_code, **kwargs) @@ -159,15 +160,16 @@ def fetch(self, request, callback=None, **kwargs): ctx.stop() # I'd love to use the future to handle the completion step, BUT, we - # need this to happen first. If the caller has provided a callback, we don't want them - # to get called before we do. Rather than poke into the internal datastructures, we'll just - # handle the callback explicitly + # need this to happen first. If the caller has provided a callback, we + # don't want them to get called before we do. Rather than poke into the + # internal datastructures, we'll just handle the callback explicitly def complete_context(response): ctx.start() ctx.set('response.code', response.code) - ctx.set('response.size', len(response.body) if response.body else 0) + ctx.set('response.size', + len(response.body) if response.body else 0) ctx.done() @@ -175,12 +177,12 @@ def complete_context(response): def fetch_complete(future): # This error handling is just copied from tornado.httpclient as - # we need to record a real HTTPError. httpclient might do the same thing - # again if needs to deal with the caller's callbacks. + # we need to record a real HTTPError. httpclient might do the + # same thing again if needs to deal with the caller's callbacks exc = future.exception() - if isinstance( - exc, - tornado.httpclient.HTTPError) and exc.response is not None: + if ( + isinstance(exc, tornado.httpclient.HTTPError) and + exc.response is not None): response = exc.response elif exc is not None: response = tornado.httpclient.HTTPResponse( diff --git a/tests/recorders/kafka_test.py b/tests/recorders/kafka_test.py index 2716112..7ab3ee8 100644 --- a/tests/recorders/kafka_test.py +++ b/tests/recorders/kafka_test.py @@ -10,7 +10,7 @@ teardown, assert_equal) -from blueox import default_configure, RECORDER_KAFKA +from blueox import default_configure, KAFKA_RECORDER from blueox import utils from blueox import context from blueox.recorders import kafka @@ -50,7 +50,7 @@ def test_configure_no_override(self): assert_equal(context._recorder_function, zmq.send) def test_configure_override(self): - default_configure(recorder=RECORDER_KAFKA) + default_configure(recorder=KAFKA_RECORDER) assert_equal(context._recorder_function, kafka.send) From 997938314946e36da5371a289bbe83704bc62383 Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Fri, 31 Aug 2018 09:01:04 -0400 Subject: [PATCH 14/16] Add kafka recorder defaults to contrib --- blueox/contrib/__init__.py | 1 - blueox/contrib/celery/__init__.py | 1 - blueox/contrib/celery/celery_signals.py | 19 ++++++++++++------- blueox/contrib/django/__init__.py | 1 - blueox/contrib/django/middleware.py | 14 +++++++++++--- blueox/contrib/flask/__init__.py | 11 ++++++++--- 6 files changed, 31 insertions(+), 16 deletions(-) diff --git a/blueox/contrib/__init__.py b/blueox/contrib/__init__.py index 8b13789..e69de29 100644 --- a/blueox/contrib/__init__.py +++ b/blueox/contrib/__init__.py @@ -1 +0,0 @@ - diff --git a/blueox/contrib/celery/__init__.py b/blueox/contrib/celery/__init__.py index 8b13789..e69de29 100644 --- a/blueox/contrib/celery/__init__.py +++ b/blueox/contrib/celery/__init__.py @@ -1 +0,0 @@ - diff --git a/blueox/contrib/celery/celery_signals.py b/blueox/contrib/celery/celery_signals.py index 3c20092..cc063db 100644 --- a/blueox/contrib/celery/celery_signals.py +++ b/blueox/contrib/celery/celery_signals.py @@ -1,8 +1,7 @@ """Hooks for gathering celery task data into blueox. -Importing this module will register signal handlers into Celery worker's runtime. - -We also will track creation of tasks on the client side. +Importing this module will register signal handlers into Celery +worker's runtime. We also will track creation of tasks on the client side. """ import traceback @@ -33,9 +32,9 @@ def on_task_sent(sender=None, body=None, **kwargs): @signals.task_sent.connect def on_task_sent(**kwargs): with blueox.Context('.celery.task_sent'): - # Arguments for this signal are different than the worker signals. Sometimes - # they are even different than what the documentation says. See also - # https://github.com/celery/celery/issues/1606 + # Arguments for this signal are different than the worker signals. + # Sometimes they are even different than what the documentation + # says. See also https://github.com/celery/celery/issues/1606 blueox.set('task_id', kwargs.get('task_id', kwargs.get('id'))) blueox.set('task', str(kwargs['task'])) blueox.set('eta', kwargs['eta']) @@ -43,7 +42,13 @@ def on_task_sent(**kwargs): @signals.worker_process_init.connect def on_worker_process_init(**kwargs): - if hasattr(settings, 'BLUEOX_HOST'): + if hasattr(settings, 'BLUEOX_KAFKA_HOST'): + if settings.BLUEOX_KAFKA_HOST: + rec = blueox.KAFKA_RECORDER + blueox.default_configure(settings.BLUEOX_KAFKA_HOST, recorder=rec) + else: + blueox.configure(None, None) + elif hasattr(settings, 'BLUEOX_HOST'): if settings.BLUEOX_HOST: blueox.default_configure(settings.BLUEOX_HOST) else: diff --git a/blueox/contrib/django/__init__.py b/blueox/contrib/django/__init__.py index 8b13789..e69de29 100644 --- a/blueox/contrib/django/__init__.py +++ b/blueox/contrib/django/__init__.py @@ -1 +0,0 @@ - diff --git a/blueox/contrib/django/middleware.py b/blueox/contrib/django/middleware.py index b16f486..86fe8c7 100644 --- a/blueox/contrib/django/middleware.py +++ b/blueox/contrib/django/middleware.py @@ -1,6 +1,5 @@ import sys import traceback -import logging import blueox @@ -10,7 +9,14 @@ class Middleware(object): def __init__(self): - if hasattr(settings, 'BLUEOX_HOST'): + if hasattr(settings, 'BLUEOX_KAFKA_HOST'): + if settings.BLUEOX_KAFKA_HOST: + rec = blueox.KAFKA_RECORDER + blueox.default_configure( + settings.BLUEOX_KAFKA_HOST, recorder=rec) + else: + blueox.configure(None, None) + elif hasattr(settings, 'BLUEOX_HOST'): if settings.BLUEOX_HOST: blueox.default_configure(settings.BLUEOX_HOST) else: @@ -28,7 +34,9 @@ def process_request(self, request): headers = {} for k, v in request.META.iteritems(): - if k.startswith('HTTP_') or k in ('CONTENT_LENGTH', 'CONTENT_TYPE'): + if ( + k.startswith('HTTP_') or + k in ('CONTENT_LENGTH', 'CONTENT_TYPE')): headers[k] = v blueox.set('headers', headers) diff --git a/blueox/contrib/flask/__init__.py b/blueox/contrib/flask/__init__.py index 56fb178..620912c 100644 --- a/blueox/contrib/flask/__init__.py +++ b/blueox/contrib/flask/__init__.py @@ -23,7 +23,12 @@ class BlueOxMiddleware(object): def __init__(self, app): self.app = app - if 'BLUEOX_HOST' in app.config: + if 'BLUEOX_KAFKA_HOST' in app.config: + self.blueox_kafka_host = app.config['BLUEOX_KAFKA_HOST'] + if self.blueox_kafka_host: + rec = blueox.KAFKA_RECORDER + blueox.default_configure(self.blueox_kafka_host, recorder=rec) + elif 'BLUEOX_HOST' in app.config: self.blueox_host = app.config['BLUEOX_HOST'] if self.blueox_host: blueox.default_configure(self.blueox_host) @@ -45,8 +50,8 @@ def before_request(self, *args, **kwargs): headers = {} for k, v in request.environ.iteritems(): if ( - k.startswith('HTTP_') or k in - ('CONTENT_LENGTH', 'CONTENT_TYPE')): + k.startswith('HTTP_') or + k in ('CONTENT_LENGTH', 'CONTENT_TYPE')): headers[k] = v blueox.set('headers', headers) From ff06ac9430474bb9bce704be221d017e593f3980 Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Sat, 1 Sep 2018 02:09:38 -0400 Subject: [PATCH 15/16] jk it's a pycernan shim now --- blueox/__init__.py | 8 +- blueox/contrib/celery/celery_signals.py | 9 +- blueox/contrib/django/middleware.py | 8 +- blueox/contrib/flask/__init__.py | 11 +- blueox/ports.py | 12 +- blueox/recorders/kafka.py | 109 ---------------- blueox/recorders/pycernan.py | 139 ++++++++++++++++++++ blueox/recorders/zmq.py | 2 +- requirements.txt | 2 +- tests/ports_test.py | 25 ++-- tests/recorders/kafka_test.py | 122 ------------------ tests/recorders/pycernan_test.py | 164 ++++++++++++++++++++++++ vendor/pycernan-0.0.10.zip | Bin 0 -> 7982 bytes 13 files changed, 345 insertions(+), 266 deletions(-) delete mode 100644 blueox/recorders/kafka.py create mode 100644 blueox/recorders/pycernan.py delete mode 100644 tests/recorders/kafka_test.py create mode 100644 tests/recorders/pycernan_test.py create mode 100644 vendor/pycernan-0.0.10.zip diff --git a/blueox/__init__.py b/blueox/__init__.py index a74caaa..1dbd74f 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -28,15 +28,15 @@ from .errors import Error from .logger import LogHandler from .timer import timeit -from .recorders import kafka, zmq +from .recorders import pycernan, zmq log = logging.getLogger(__name__) ZMQ_RECORDER = 'zmq' -KAFKA_RECORDER = 'kafka' +PYCERNAN_RECORDER = 'pycernan' RECORDERS = { ZMQ_RECORDER: zmq, - KAFKA_RECORDER: kafka, + PYCERNAN_RECORDER: pycernan, } DEFAULT_RECORDER = ZMQ_RECORDER @@ -87,4 +87,4 @@ def default_configure(host=None, recorder=DEFAULT_RECORDER): def shutdown(): zmq.close() - kafka.close() + pycernan.close() diff --git a/blueox/contrib/celery/celery_signals.py b/blueox/contrib/celery/celery_signals.py index cc063db..c37dba6 100644 --- a/blueox/contrib/celery/celery_signals.py +++ b/blueox/contrib/celery/celery_signals.py @@ -42,10 +42,11 @@ def on_task_sent(**kwargs): @signals.worker_process_init.connect def on_worker_process_init(**kwargs): - if hasattr(settings, 'BLUEOX_KAFKA_HOST'): - if settings.BLUEOX_KAFKA_HOST: - rec = blueox.KAFKA_RECORDER - blueox.default_configure(settings.BLUEOX_KAFKA_HOST, recorder=rec) + if hasattr(settings, 'BLUEOX_PYCERNAN_HOST'): + if settings.BLUEOX_PYCERNAN_HOST: + rec = blueox.PYCERNAN_RECORDER + blueox.default_configure( + settings.BLUEOX_PYCERNAN_HOST, recorder=rec) else: blueox.configure(None, None) elif hasattr(settings, 'BLUEOX_HOST'): diff --git a/blueox/contrib/django/middleware.py b/blueox/contrib/django/middleware.py index 86fe8c7..1471ac2 100644 --- a/blueox/contrib/django/middleware.py +++ b/blueox/contrib/django/middleware.py @@ -9,11 +9,11 @@ class Middleware(object): def __init__(self): - if hasattr(settings, 'BLUEOX_KAFKA_HOST'): - if settings.BLUEOX_KAFKA_HOST: - rec = blueox.KAFKA_RECORDER + if hasattr(settings, 'BLUEOX_PYCERNAN_HOST'): + if settings.BLUEOX_PYCERNAN_HOST: + rec = blueox.PYCERNAN_RECORDER blueox.default_configure( - settings.BLUEOX_KAFKA_HOST, recorder=rec) + settings.BLUEOX_PYCERNAN_HOST, recorder=rec) else: blueox.configure(None, None) elif hasattr(settings, 'BLUEOX_HOST'): diff --git a/blueox/contrib/flask/__init__.py b/blueox/contrib/flask/__init__.py index 620912c..57de85d 100644 --- a/blueox/contrib/flask/__init__.py +++ b/blueox/contrib/flask/__init__.py @@ -23,11 +23,12 @@ class BlueOxMiddleware(object): def __init__(self, app): self.app = app - if 'BLUEOX_KAFKA_HOST' in app.config: - self.blueox_kafka_host = app.config['BLUEOX_KAFKA_HOST'] - if self.blueox_kafka_host: - rec = blueox.KAFKA_RECORDER - blueox.default_configure(self.blueox_kafka_host, recorder=rec) + if 'BLUEOX_PYCERNAN_HOST' in app.config: + self.blueox_pycernan_host = app.config['BLUEOX_PYCERNAN_HOST'] + if self.blueox_pycernan_host: + rec = blueox.PYCERNAN_RECORDER + blueox.default_configure( + self.blueox_pycernan_host, recorder=rec) elif 'BLUEOX_HOST' in app.config: self.blueox_host = app.config['BLUEOX_HOST'] if self.blueox_host: diff --git a/blueox/ports.py b/blueox/ports.py index 39470d9..d977847 100644 --- a/blueox/ports.py +++ b/blueox/ports.py @@ -43,11 +43,11 @@ def default_collect_host(host=None): return _default_host(host, default_host, DEFAULT_COLLECT_PORT) -# For consistency, we'll abstract kafka connections in the same way -ENV_VAR_KAFKA_HOST = 'BLUEOX_KAFKA_HOST' -DEFAULT_KAFKA_PORT = 9002 +# For consistency, we'll abstract pycernan connections in the same way +ENV_VAR_PYCERNAN_HOST = 'BLUEOX_PYCERNAN_HOST' +DEFAULT_PYCERNAN_PORT = 2003 -def default_kafka_host(host=None): - default_host = os.environ.get(ENV_VAR_KAFKA_HOST, DEFAULT_HOST) - return _default_host(host, default_host, DEFAULT_KAFKA_PORT) +def default_pycernan_host(host=None): + default_host = os.environ.get(ENV_VAR_PYCERNAN_HOST, DEFAULT_HOST) + return _default_host(host, default_host, DEFAULT_PYCERNAN_PORT) diff --git a/blueox/recorders/kafka.py b/blueox/recorders/kafka.py deleted file mode 100644 index cc5e89b..0000000 --- a/blueox/recorders/kafka.py +++ /dev/null @@ -1,109 +0,0 @@ -# -*- coding: utf-8 -*- -""" -blueox.recorders.kafka -~~~~~~~~ - -This module provides the interface into Kafka - -:copyright: (c) 2018 by Aaron Biller?? -:license: ISC, see LICENSE for more details. - -""" -from __future__ import absolute_import - -import atexit -import logging -import msgpack -import threading - -from kafka import KafkaProducer - -from blueox import ports -from blueox import utils - -log = logging.getLogger(__name__) - -# If we have pending outgoing messages, this is how long we'll wait after -# being told to exit. -LINGER_SHUTDOWN_MSECS = 2000 - - -def default_host(host=None): - """Build a default host string for the kafka producer - """ - return ports.default_kafka_host(host) - - -threadLocal = threading.local() - -# Context can be shared between threads -_kafka_hosts = None - - -def init(host, port): - global _kafka_hosts - - _kafka_hosts = '{}:{}'.format(host, port) - - -def _thread_connect(): - if _kafka_hosts and not getattr(threadLocal, 'kp', None): - threadLocal.kp = KafkaProducer(bootstrap_servers=_kafka_hosts) - - -def _serialize_context(context): - context_dict = context.to_dict() - for key in ('host', 'type'): - if len(context_dict.get(key, "")) > 64: - raise ValueError("Value too long: %r" % key) - - context_dict = { - k: v.encode('utf-8') if isinstance(v, unicode) - else v for k, v in context_dict.items() - } - - try: - context_data = msgpack.packb(context_dict) - except TypeError: - try: - # If we fail to serialize our context, we can try again with an - # enhanced packer (it's slower though) - context_data = msgpack.packb(context_dict, - default=utils.msgpack_encode_default) - except TypeError: - log.exception("Serialization failure (not fatal, dropping data)") - - # One last try after dropping the body - context_dict['body'] = None - context_data = msgpack.packb(context_dict) - - return context_data - - -def send(context): - _thread_connect() - - try: - context_data = _serialize_context(context) - except Exception: - log.exception("Failed to serialize context") - return - - if _kafka_hosts and threadLocal.kp is not None: - try: - log.debug("Sending msg") - threadLocal.kp.send('events', context_data) - except Exception: - log.exception("Failed during publish to kafka.") - else: - log.info("Skipping sending event %s", context.name) - - -def close(): - if getattr(threadLocal, 'kp', None): - threadLocal.kp.flush() - threadLocal.kp.close(timeout=LINGER_SHUTDOWN_MSECS) - threadLocal.kp = None - - -atexit.register(close) diff --git a/blueox/recorders/pycernan.py b/blueox/recorders/pycernan.py new file mode 100644 index 0000000..1edfd5b --- /dev/null +++ b/blueox/recorders/pycernan.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +""" +blueox.recorders.pycernan +~~~~~~~~ + +This module provides the interface into pycernan + +:copyright: (c) 2018 by Aaron Biller?? +:license: ISC, see LICENSE for more details. + +""" +from __future__ import absolute_import + +import atexit +import datetime +import decimal +import json +import logging +import os +import threading + +from pycernan.avro import Client + +from blueox import ports + +log = logging.getLogger(__name__) + +_uname = os.uname()[1] + +# Global blueox avro schema definition +BLUEOX_AVRO_RECORD = { + "doc": "A BlueOx event", + "name": "blueox_event", + "namespace": "blueox.{}".format(_uname), + "type": "record", + "fields": [ + {"name": "id", "type": "string"}, + {"name": "type", "type": "string"}, + {"name": "host", "type": "string"}, + {"name": "pid", "type": "long"}, + {"name": "start", "type": "double"}, + {"name": "end", "type": "double"}, + {"name": "body", "type": ["null", "string"], "default": "null"} + ] +} + + +def default_host(host=None): + """Build a default host string for pycernan + """ + return ports.default_pycernan_host(host) + + +def _serializer(obj): + """Serialize native python objects + """ + if isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + elif isinstance(obj, decimal.Decimal): + return float(obj) + try: + obj = str(obj) + except Exception: + raise TypeError(repr(obj) + ' is not JSON serializable') + return obj + + +threadLocal = threading.local() + +# Context can be shared between threads +_client = None + + +def init(host, port): + global _client + + _client = Client(host=host, port=port) + + +def _thread_connect(): + if _client and not getattr(threadLocal, 'client', None): + threadLocal.client = _client + + +def _serialize_context(context): + context_dict = context.to_dict() + for key in ('host', 'type'): + if len(context_dict.get(key, '')) > 64: + raise ValueError('Value too long: %r' % key) + + context_dict['id'] = str(context_dict['id']) + + body = context_dict.get('body', None) + if body is not None: + try: + context_dict['body'] = json.dumps(body, default=_serializer) + except (TypeError, ValueError): + try: + context_dict['body'] = unicode(body) + except Exception: + log.exception( + 'Serialization failure (not fatal, dropping data)') + context_dict['body'] = None + + context_dict = { + k: v.encode('utf-8') if isinstance(v, unicode) + else v for k, v in context_dict.items() + } + + return context_dict + + +def send(context): + _thread_connect() + + try: + context_data = [_serialize_context(context)] + except Exception: + log.exception('Failed to serialize context') + return + + if _client and threadLocal.client is not None: + try: + log.debug('Sending msg') + threadLocal.client.publish( + BLUEOX_AVRO_RECORD, context_data, sync=False) + except Exception: + log.exception('Failed during publish to pycernan.') + else: + log.info('Skipping sending event %s', context.name) + + +def close(): + if getattr(threadLocal, 'client', None): + threadLocal.client.close() + threadLocal.client = None + + +atexit.register(close) diff --git a/blueox/recorders/zmq.py b/blueox/recorders/zmq.py index 88b227a..fa02207 100644 --- a/blueox/recorders/zmq.py +++ b/blueox/recorders/zmq.py @@ -48,7 +48,7 @@ def check_meta_version(meta): def default_host(host=None): - """Build a default host string for the kafka producer + """Build a default host string for the blueox collector """ return ports.default_collect_host(host) diff --git a/requirements.txt b/requirements.txt index 61a4d6d..4e7e345 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,4 @@ pyflakes tornado==3.2 boto yapf -kafka-python +./vendor/pycernan-0.0.10.zip diff --git a/tests/ports_test.py b/tests/ports_test.py index 4ed75d1..0c5aeb5 100644 --- a/tests/ports_test.py +++ b/tests/ports_test.py @@ -76,24 +76,29 @@ def test_env_port(self): assert_equal(host, "master:123") -class DefaultKafkaHost(TestCase): +class DefaultPycernanHost(TestCase): @teardown def clear_env(self): try: - del os.environ['BLUEOX_KAFKA_HOST'] + del os.environ['BLUEOX_PYCERNAN_HOST'] except KeyError: pass def test_emtpy(self): - host = ports.default_kafka_host() - assert_equal(host, '127.0.0.1:9002') + host = ports.default_pycernan_host() + assert_equal(host, '127.0.0.1:2003') def test_env(self): - os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s' - host = ports.default_kafka_host() - assert_equal(host, 'local.svc.team-me.aws.jk8s:9002') + os.environ['BLUEOX_PYCERNAN_HOST'] = 'local.svc.team-me.aws.jk8s' + host = ports.default_pycernan_host() + assert_equal(host, 'local.svc.team-me.aws.jk8s:2003') def test_env_port(self): - os.environ['BLUEOX_KAFKA_HOST'] = 'local.svc.team-me.aws.jk8s:9002' - host = ports.default_kafka_host() - assert_equal(host, 'local.svc.team-me.aws.jk8s:9002') + os.environ['BLUEOX_PYCERNAN_HOST'] = 'local.svc.team-me.aws.jk8s:2003' + host = ports.default_pycernan_host() + assert_equal(host, 'local.svc.team-me.aws.jk8s:2003') + + def test_passed(self): + _host = 'my.wish.is.your.command' + host = ports.default_pycernan_host(_host) + assert_equal(host, 'my.wish.is.your.command:2003') diff --git a/tests/recorders/kafka_test.py b/tests/recorders/kafka_test.py deleted file mode 100644 index 7ab3ee8..0000000 --- a/tests/recorders/kafka_test.py +++ /dev/null @@ -1,122 +0,0 @@ -import os -import random -import decimal -import datetime - -import msgpack -from testify import ( - TestCase, - setup, - teardown, - assert_equal) - -from blueox import default_configure, KAFKA_RECORDER -from blueox import utils -from blueox import context -from blueox.recorders import kafka -from blueox.recorders import zmq - - -class MockKafkaProducer(object): - last_topic = None - last_data = None - close_timeout = None - - def __call__(self, bootstrap_servers=None): - self.bootstrap_servers = bootstrap_servers - return self - - def send(self, topic, data): - self.last_topic = topic - self.last_data = data - - def flush(self): - pass - - def close(self, timeout=None): - self.close_timeout = timeout - - -class KafkaOverrideTestCase(TestCase): - @teardown - def clear_env(self): - try: - del os.environ['BLUEOX_OVERRIDE_KAFKA_RECORDER'] - except KeyError: - pass - - def test_configure_no_override(self): - default_configure() - assert_equal(context._recorder_function, zmq.send) - - def test_configure_override(self): - default_configure(recorder=KAFKA_RECORDER) - assert_equal(context._recorder_function, kafka.send) - - -class KafkaSendTestCase(TestCase): - @setup - def build_context(self): - self.context = context.Context('test', 1) - - @setup - def init_kafka(self): - self.port = random.randint(30000, 40000) - kafka.init('127.0.0.1', self.port) - - @setup - def configure_kafka(self): - context._recorder_function = kafka.send - self.kp = MockKafkaProducer() - kafka.KafkaProducer = self.kp - - @teardown - def unconfigure_kafka(self): - context._recorder_function = None - - def test(self): - with self.context: - self.context.set('foo', True) - self.context.set('bar.baz', 10.0) - - data = msgpack.unpackb(self.kp.last_data) - assert_equal(self.kp.last_topic, 'events') - assert_equal(data['id'], 1) - assert_equal(data['type'], 'test') - assert_equal(utils.get_deep(data['body'], "bar.baz"), 10.0) - - kafka.close() - assert_equal(self.kp.close_timeout, kafka.LINGER_SHUTDOWN_MSECS) - - -class SerializeContextTestCase(TestCase): - @setup - def build_context(self): - self.context = context.Context('test', 1) - - def test_types(self): - with self.context: - self.context.set('decimal_value', decimal.Decimal("6.66")) - self.context.set('date_value', datetime.date(2013, 12, 10)) - self.context.set( - 'datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) - - context_data = kafka._serialize_context(self.context) - data = msgpack.unpackb(context_data) - assert_equal(data['body']['decimal_value'], "6.66") - assert_equal(data['body']['date_value'], "2013-12-10") - assert_equal( - datetime.datetime.fromtimestamp( - float(data['body']['datetime_value'])), - datetime.datetime(2013, 12, 10, 12, 12, 12)) - - def test_exception(self): - with self.context: - self.context.set('value', Exception('hello')) - - context_data = kafka._serialize_context(self.context) - data = msgpack.unpackb(context_data) - - # The serialization should fail, but that just - # means we don't have any data. - assert_equal(data['body'], None) diff --git a/tests/recorders/pycernan_test.py b/tests/recorders/pycernan_test.py new file mode 100644 index 0000000..56ef550 --- /dev/null +++ b/tests/recorders/pycernan_test.py @@ -0,0 +1,164 @@ +import datetime +import decimal +import json +import random + +from testify import ( + TestCase, + setup, + teardown, + assert_equal, + assert_raises) + +from pycernan.avro.serde import serialize +from pycernan.avro.exceptions import DatumTypeException + +from blueox import default_configure, PYCERNAN_RECORDER +from blueox import utils +from blueox import context +from blueox.recorders import pycernan as pycernan_rec +from blueox.recorders import zmq + + +class MockPycernanClient(object): + last_schema = None + last_batch = None + last_sync = None + + def __call__(self, host=None, port=None): + self.host = host + self.port = port + return self + + def publish(self, schema, batch, sync=None): + self.last_schema = schema + self.last_batch = batch + self.last_sync = sync + + def close(self): + pass + + +class CantSerializeMe(object): + def __repr__(self): + return chr(167) + + +class PycernanOverrideTestCase(TestCase): + def test_configure_no_override(self): + default_configure() + assert_equal(context._recorder_function, zmq.send) + + def test_configure_override(self): + pycernan_rec.Client = MockPycernanClient() + default_configure(recorder=PYCERNAN_RECORDER) + assert_equal(context._recorder_function, pycernan_rec.send) + + +class PycernanSendTestCase(TestCase): + @setup + def build_context(self): + self.context = context.Context('test', 1) + + @setup + def init_pycernan(self): + self.port = random.randint(30000, 40000) + self.client = MockPycernanClient() + pycernan_rec.Client = self.client + pycernan_rec.init('127.0.0.1', self.port) + + @setup + def configure_pycernan(self): + context._recorder_function = pycernan_rec.send + + @teardown + def unconfigure_pycernan(self): + context._recorder_function = None + + @teardown + def destroy_recorder(self): + pycernan_rec.close() + + def test(self): + with self.context: + self.context.set('foo', True) + self.context.set('bar.baz', 10.0) + + data = self.client.last_batch[0] + data['body'] = json.loads(data['body']) + assert_equal(self.client.last_schema, pycernan_rec.BLUEOX_AVRO_RECORD) + assert_equal(self.client.last_sync, False) + assert_equal(data['id'], '1') + assert_equal(data['type'], 'test') + assert_equal(utils.get_deep(data['body'], 'bar.baz'), 10.0) + + assert_equal(self.client.host, '127.0.0.1') + assert_equal(self.client.port, self.port) + + +class SerializeContextTestCase(TestCase): + @setup + def build_context(self): + self.context = context.Context('test', 1) + + def test_types(self): + with self.context: + self.context.set('decimal_value', decimal.Decimal('6.66')) + self.context.set('date_value', datetime.date(2013, 12, 10)) + self.context.set( + 'datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) + + data = pycernan_rec._serialize_context(self.context) + data['body'] = json.loads(data['body']) + assert_equal(data['body']['decimal_value'], 6.66) + assert_equal(data['body']['date_value'], '2013-12-10') + assert_equal( + datetime.datetime.strptime( + data['body']['datetime_value'], '%Y-%m-%dT%H:%M:%S'), + datetime.datetime(2013, 12, 10, 12, 12, 12)) + + def test_exception(self): + with self.context: + self.context.set('value', CantSerializeMe()) + + data = pycernan_rec._serialize_context(self.context) + + # The serialization should fail, but that just + # means we don't have any data. + assert_equal(data['body'], None) + + +class EncodeAvroTestCase(TestCase): + @setup + def build_context(self): + self.context = context.Context('test', 1) + + def test_success(self): + with self.context: + self.context.set('foo', True) + self.context.set('bar.baz', 10.0) + + data = pycernan_rec._serialize_context(self.context) + serialize(pycernan_rec.BLUEOX_AVRO_RECORD, [data]) + + def test_failure(self): + with self.context: + self.context.set('foo', True) + self.context.set('bar.baz', 10.0) + self.context.set('decimal_value', decimal.Decimal('6.66')) + self.context.set('date_value', datetime.date(2013, 12, 10)) + self.context.set( + 'datetime_value', datetime.datetime(2013, 12, 10, 12, 12, 12)) + + data = pycernan_rec._serialize_context(self.context) + data['host'] = None + with assert_raises(DatumTypeException): + serialize(pycernan_rec.BLUEOX_AVRO_RECORD, [data]) + + def test_none_body(self): + with self.context: + self.context.set('bad_char', CantSerializeMe()) + + data = pycernan_rec._serialize_context(self.context) + assert_equal(data['body'], None) + serialize(pycernan_rec.BLUEOX_AVRO_RECORD, [data]) diff --git a/vendor/pycernan-0.0.10.zip b/vendor/pycernan-0.0.10.zip new file mode 100644 index 0000000000000000000000000000000000000000..c47a78e266e7fed9ee337c74351025254c4cc90d GIT binary patch literal 7982 zcmds+WmuGJx5tN;6k%vKAfN(64BeeWcZbA~!_XitEg)SIN{6(RgeW21At)dzAs{Fq zAp&RE`}o4kVQ=^6GjrY7b3I>v&sul0su%6 zPe)I4s59IY&H`oyv$BKPR25`dAj&c-($Rg&?O=TAgVTtk0KT3Hkg6WQM)D3;DTYTH z&W^&q6oY@vbNTm!sovZCvGZhT=^niy=HTHC!88Bs8&=n-Fc#3QWdbuhe1k< z_6+_qRH%yct5(m);!;Y| zto9aB8h!9-F#fAuP~EpXnB<@JQ5mqh*oUdo(4!5qz=2N+CI~1ZH0$#WQH~B-83o;` zk$NL56$S>a-MIqcCBp_bA-b>h45jUv>ajVb8b$3T*4C<+gHoCcu=BB3H>zBK`ed=a z!Zlg1Pb+sui&uO>j9H?`{$I(!+i$gs4E@DE*Rd9jSTixzfZjQp6jx1ThnMI`QI2fk zV;b_F4N@RqG8{<8!mPGtu^YBu`Qhrx@r7sEPkJG z9|42=)7=1f`b0g6pr2fw_BCauS*Xq_U={jvVL=8qi`?)Vxu--dC?6N z9t`7(nTZi0D01&JUwGw2#8|(dr4Qr#_G6n57(`Jmj^|U0^Mo$HK4zx_|E2pO%Ao0% z9~GJ$cM+A#5%$Hl^9XvWDB3Rj&>eB=P7qt~x|8Pub#-%Ob@U8TYgOJr?7CGDv&oeF z?T{`sEe?rA)EnWFbI&E>oOr&dciFvC&irvO1R=C8k>7N(boX$_pisGuHS3c6E`Yoh z-+(%t8Jn^muSYtnoT-_|pgB}+^8h_?44#<0jE79uP}R>-+VrC1QC10L2E^Vx(D5A> z7p}R+XKU97FkwNzpZ(T0$0MNprwF|J<7`R#V)-Gb=THmB&(S@)L9d}P<1JSd`DkV6 zY)}xRo5i8n8qXJ8d6BWsa1!J&JJ0^aVe1-szq_Zcgk3VS^KGh}-Awlhhr8g7J{uMI zowW4~wpfcVJk)T^BGD=Dc~_|PAx*WNQint&Q8p_3yuAHo_U)P{du9bYd@2Th@N>H7 z=jJdw*o|P$)&0JPWpY6I|cx6R!NpL zR$CZ2>&oue7-|dH*5a^GuC*uVm4!a9kp_aLtk5lv}b2l2`;zQ?Jq&=&yEr}KkHyi87aLuARATVbf{8B4Fk?-VzcFC(9vb&#WXHij`vVR(O?D> zNemGr^sxZ|y5Cyv&W@OqDD7$Z5)Z!bfsP_BFs>@Pk}s>nd!!51P5ySy>{JE zo{9K-o_!#dg*C0&wKD@Id&yY3c16p@ z6sgr>ho-4E-X4nH7HFsl{3%K-o-g>|HL{a9JfN!5$kXkIHZ084>^*LzK|#iRrF4Py zqSS}!xSY8q1JsY$oIns4aU{+fY&5O1B{ko-X-K&x)cz3r56~0y$`N3zEPB~p3(OFC zP*-_3=2?#R>F1*F7{#mkNkW14wsZVGkma_3EcL~Hwf>Ta$5@!1fbRnNMh{^(Ip?zr zLH#y#Vafj}6xXS>O; z(MKgO0(&f4X=x&+K>bwd>RcNB#^#}n2NVm_MvqC`j*F7*+0o^yBTuRXX+d9E_#&S+ zn&5d$*i%S-c%55wqvF_Y)M?p8UMkLuf*d1qWSR56TUwcnjy^BhEAPk>QX&qv4kqJ1 zJ*9VX{r<;a7sd*Yauf=hg@1Oi&yb-=&4hEinYU%1L9o->O2F%E*_W{#CrfCFn7$3=rl6ITkE#y_RFRY=7su>Y! ztb}4RMPg^F_B{8BQ&KpMaMs>U>8BWb4q@GFV<;h(m2N~)o=r?l&xCeXn#a%0;2e!% zAaSFas97YpRXja|C};ej63MgWa0GXVki}>o?Ww_GO6!QfQQF zHhfULn?IR@*Y~ZR9gJmYd4_xu9q#CQDOQIcJ+eaq0N!K%E9r#UK;fdf6x*bfa4FP)C}j*a$3_m6D*d3W`7$wMZet<1hr2nfD)|GkzEcs0KtXv%JqshFeLwBXc7 z&s^2hbJvlzyqmrAUBBxw(e}KvfD2ltX5=t8^VGCi)Xzd; zIKs!4xVB3PK7<~`cjnx%Q@WtMC5g|?3L%ryr)g;hY#uKkCWU22+GZrVe~^=Y{<_`n zL#~5~)@y%4=UZ9jvPG)hUJSQJ+r~?M#C)`}pBc6%S6c@48WP8t(!M)I9xoF({^3u* zqgUlHmN3X>q4&YI7QOr!%DPfDf%|NkgV{Gf;cmC^HOE0=w7F|cofiL zfk+tY5DCNG-zE$$P-hD$;<$!rLt>_RfUh@ngcR|TQS^+Q8FA{R(LV!i#uVcq>5^)E zHG3B9baBX#4~`?$b6wKd_CvuWEIxe)bupjp?;7r7jbePT4zaSI zSbhW0fRzbT6i(xt)cUf|4Y}s-YGct0XwVDMC!;qMC0C7SypW!n9VeHG%Q4WWDpigg zzRFHLtuH5)k0-!H<1mXM~)It8MZ%#Wv#?PQcAeF z3@OwfSWzeP6jnq>`l(&S|@r9@8D0R^D6RTH#5dN1sv75d?Ehzj`wYOEvv;uvpy&B zMY>vbvDylf3zjzwhlbN`S#0?6KW}nQoK}4lb*9Xc{s}tg?p4raC{SiITmD&Pk*d1v zr~9N%4N>>)@0QUs<5rvTcx@jto{vI_y?gG9d)gJjBlDleC?ixaPaE*g2FR=UJ4j>;u(t#NI{sVp1wXS5NzHt*ZhD4YG4Jr zYo=%jiCWi9Fz|mIYbQ;jLRn=EZI+fnVRf{z>T_7RY3*0|w4^{Cxuj<>fQbYxyE3-- z`xELHa)P9mMqxRM&1;IHFde0e4ORI`qMX<|2VV0I7p(R;L-Lv=yN}`=!}j5X3tmdo zcg72>^Cdcj`;1K|coX&?wdj8%{KjmeG&r$P6L|J&N=&&)W{X+j0fWiy^c&mcaN&_x z38fis)0?oIt(=LI?+d3VDh*M&X1?YO-mIe)1Kt8p`rFv6YH$h$8)wR~r<~%VTqW^= zf}t=ZpW@GZ;k37j17eE1Hkoeghvp=p7)z3#R2$S=}AFOsA_cIp*1E zjfJ_)w$x;=!zLc~EGMBfL29aPOIfNFRcAvzCR4ANc?0C>^p9NPq zHJfD=d}6~su*zr9;7t&!5&mOH(4)g?ZlQqogb4cXt85L4^x3gx9%?!%GQJ5&<0f-s zZo$GK=0IfwgF%rthl4l4?i-h5t9~b;Qga^_-1W|nIV#z4nvkCuE@QDHiu+DBZ8EP?x9@W@O{N13uB}oCQ8{$j(B1J)RQZ%Hj zvwJU3b`U07it*q1+xWvT{=ojz`B~ zhNSMkclC4`N#%M7@n~uDlFjfl*Y>rtVEK<#!F9g=T2x05isV82F`~I_T7}WAs*kOE zTQeU;1~=|UoFHBJakXj8^bc#o3bnFgv4LAU{C{sFpHB)0ozg@)Vln~{8O^Sw! zmb#?02CM4>SGOT?yg)F1$bzs(_FFnqY4gWsH02bdcot@<$#wW%-cfLRmg#N(be)vr zr80}4e#*lhFd+vas;=$xc}h&-RS+;uVDl_od~Q|lDJ#{ai(f-#v6OAGl`jLu3Q1-W z0ZuF`K>#Q(W%`L8>AjlDq-Gw&Rvg=Rk?wT@C(8!G5;U1#*VcxXxo~b<>Y|dyq#4do z>SD4FHN`YvS$e`;DkVIGcQ}Z_`melnhB~>~I73~|-8@%_m(|?k)x(^Nmk;{+{B0G5 zDIxMC0HU@5u>C8ttAnEn4C)Sroufhjt3pdxxe4f#9Eh3;fLIm)&%feYKpmlQ3n<*& z(*$M%xBJubKe0&2K$O3~Izh1i{N)9lKYn%Uu3n_5DqK*myM6eJ0RZF$pdc*1O8L)S z#AVEN-Q)|57sC0gm|wl+%arpqxZ3Bupxj0KI|`!fc^PwE7xMxWa}DN~aTmS5%e?El zkrzA?^#5zte|VPy)n(#!O~?zP^FI;)yCr!UdR>3;0?Lf>4`%)e{j*zm8F*d2e*r8* zoRnXF{@P)`-r4@L@>lhL8G2n&asf5E`8(*Z66G@Ux&-2aiH0~YzYM$H(7#!w>tg=} zHv6~O|H=d|+j|ba8i+5z_C&w4_g7fH%)BlrUNE_d|DO4OL*!NN)wpuOl}B{v|88zC zxR;{LRrFOczd(Z#H&VYxUzYZ(?5lEq!H)P>?Eez?tMIFuegR+nSMci;UR41Nar6NI Q0>pm~AyfS4_aA`&0j0yk>;M1& literal 0 HcmV?d00001 From 344b2d1e6ad705171744f99ddba44ec19cd695b5 Mon Sep 17 00:00:00 2001 From: Aaron Biller Date: Tue, 4 Sep 2018 12:42:44 -0400 Subject: [PATCH 16/16] Add changes and bump version --- CHANGES | 8 ++++++++ blueox/__init__.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index b46839c..974ccbd 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,11 @@ +blueox (0.12.0) + * Move recorders to separate module + * Add pycernan recorder + * Update the way blueox is configured to allow desired + recorder from imported constant + +-- Aaron Biller Tue Sep 4 12:40:45 2018 -0400 + blueox (0.11.6.4) * Fix encoding of unknown types diff --git a/blueox/__init__.py b/blueox/__init__.py index 1dbd74f..69d63c3 100644 --- a/blueox/__init__.py +++ b/blueox/__init__.py @@ -9,7 +9,7 @@ """ __title__ = 'blueox' -__version__ = '0.11.6.4' +__version__ = '0.12.0' __author__ = 'Rhett Garber' __author_email__ = 'rhettg@gmail.com' __license__ = 'ISC'