|
| 1 | +""" |
| 2 | + Avro schema registry module: Deals with encoding and decoding of messages with avro schemas |
| 3 | +
|
| 4 | +""" |
| 5 | +import sys |
| 6 | + |
| 7 | +from confluent_kafka import Producer |
| 8 | + |
| 9 | +VALID_LEVELS = ['NONE', 'FULL', 'FORWARD', 'BACKWARD'] |
| 10 | + |
| 11 | + |
| 12 | +def loads(schema_str): |
| 13 | + """ Parse a schema given a schema string """ |
| 14 | + if sys.version_info[0] < 3: |
| 15 | + return schema.parse(schema_str) |
| 16 | + else: |
| 17 | + return schema.Parse(schema_str) |
| 18 | + |
| 19 | + |
| 20 | +def load(fp): |
| 21 | + """ Parse a schema from a file path """ |
| 22 | + with open(fp) as f: |
| 23 | + return loads(f.read()) |
| 24 | + |
| 25 | + |
| 26 | +# avro.schema.RecordSchema and avro.schema.PrimitiveSchema classes are not hashable. Hence defining them explicitely as a quick fix |
| 27 | +def _hash_func(self): |
| 28 | + return hash(str(self)) |
| 29 | + |
| 30 | + |
| 31 | +try: |
| 32 | + from avro import schema |
| 33 | + |
| 34 | + schema.RecordSchema.__hash__ = _hash_func |
| 35 | + schema.PrimitiveSchema.__hash__ = _hash_func |
| 36 | +except ImportError: |
| 37 | + pass |
| 38 | + |
| 39 | + |
| 40 | +class ClientError(Exception): |
| 41 | + """ Error thrown by Schema Registry clients """ |
| 42 | + |
| 43 | + def __init__(self, message, http_code=None): |
| 44 | + self.message = message |
| 45 | + self.http_code = http_code |
| 46 | + super(ClientError, self).__init__(self.__str__()) |
| 47 | + |
| 48 | + def __repr__(self): |
| 49 | + return "ClientError(error={error})".format(error=self.message) |
| 50 | + |
| 51 | + def __str__(self): |
| 52 | + return self.message |
| 53 | + |
| 54 | + |
| 55 | +from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient |
| 56 | +from confluent_kafka.avro.serializer import SerializerError |
| 57 | +from confluent_kafka.avro.serializer.message_serializer import MessageSerializer |
| 58 | + |
| 59 | + |
| 60 | +class AvroProducer(Producer): |
| 61 | + """ |
| 62 | + Kafka Producer client which does avro schema encoding to messages. |
| 63 | + Handles schema registration, Message serialization. |
| 64 | +
|
| 65 | + Constructor takes below parameters |
| 66 | +
|
| 67 | + @:param: config: dict object with config parameters containing url for schema registry (schema.registry.url). |
| 68 | + @:param: default_key_schema: Optional avro schema for key |
| 69 | + @:param: default_value_schema: Optional avro schema for value |
| 70 | + """ |
| 71 | + |
| 72 | + def __init__(self, config, default_key_schema=None, |
| 73 | + default_value_schema=None): |
| 74 | + if ('schema.registry.url' not in config.keys()): |
| 75 | + raise ValueError("Missing parameter: schema.registry.url") |
| 76 | + schem_registry_url = config["schema.registry.url"] |
| 77 | + del config["schema.registry.url"] |
| 78 | + |
| 79 | + super(AvroProducer, self).__init__(config) |
| 80 | + self._serializer = MessageSerializer(CachedSchemaRegistryClient(url=schem_registry_url)) |
| 81 | + self._key_schema = default_key_schema |
| 82 | + self._value_schema = default_value_schema |
| 83 | + |
| 84 | + def produce(self, **kwargs): |
| 85 | + """ |
| 86 | + Sends message to kafka by encoding with specified avro schema |
| 87 | + @:param: topic: topic name |
| 88 | + @:param: value: A dictionary object |
| 89 | + @:param: value_schema : Avro schema for value |
| 90 | + @:param: key: A dictionary object |
| 91 | + @:param: key_schema : Avro schema for key |
| 92 | + @:exception: SerializerError |
| 93 | + """ |
| 94 | + # get schemas from kwargs if defined |
| 95 | + key_schema = kwargs.pop('key_schema', self._key_schema) |
| 96 | + value_schema = kwargs.pop('value_schema', self._value_schema) |
| 97 | + topic = kwargs.pop('topic', None) |
| 98 | + if not topic: |
| 99 | + raise ClientError("Topic name not specified.") |
| 100 | + value = kwargs.pop('value', None) |
| 101 | + key = kwargs.pop('key', None) |
| 102 | + if value: |
| 103 | + if value_schema: |
| 104 | + value = self._serializer.encode_record_with_schema(topic, value_schema, value) |
| 105 | + else: |
| 106 | + raise SerializerError("Avro schema required for value") |
| 107 | + |
| 108 | + if key: |
| 109 | + if key_schema: |
| 110 | + key = self._serializer.encode_record_with_schema(topic, key_schema, key, True) |
| 111 | + else: |
| 112 | + raise SerializerError("Avro schema required for key") |
| 113 | + |
| 114 | + super(AvroProducer, self).produce(topic, value, key, **kwargs) |
0 commit comments