# encoding: utf-8
require 'logstash/codecs/base'
require 'logstash/util/charset'
require 'protocol_buffers' 

# This codec converts protobuf encoded messages into logstash events and vice versa. 
#
# Requires the protobuf definitions as ruby files. You can create those using the [ruby-protoc compiler](https://github.com/codekitchen/ruby-protocol-buffers).
# 
# The following shows a usage example for decoding events from a kafka stream:
# [source,ruby]
# kafka 
# {
#  zk_connect => "127.0.0.1"
#  topic_id => "your_topic_goes_here"
#  codec => protobuf 
#  {
#    class_name => "Animal::Unicorn"
#    include_path => ['/path/to/protobuf/definitions/UnicornProtobuf.pb.rb']
#  }
# }
#
class LogStash::Codecs::Protobuf < LogStash::Codecs::Base
  config_name 'protobuf'

  # Name of the class to decode.
  # If your protobuf definition contains modules, prepend them to the class name with double colons like so:
  # [source,ruby]
  # class_name => "Foods::Dairy::Cheese"
  # 
  # This corresponds to a protobuf definition starting as follows:
  # [source,ruby]
  # module Foods
  #    module Dairy
  #        class Cheese
  #            # here are your field definitions.
  # 
  # If your class references other definitions: you only have to add the main class here.
  config :class_name, :validate => :string, :required => true

  # List of absolute pathes to files with protobuf definitions. 
  # When using more than one file, make sure to arrange the files in reverse order of dependency so that each class is loaded before it is 
  # refered to by another.
  # 
  # Example: a class _Cheese_ referencing another protobuf class _Milk_
  # [source,ruby]
  # module Foods
  #   module Dairy
  #         class Cheese
  #            set_fully_qualified_name "Foods.Dairy.Cheese"
  #            optional ::Foods::Cheese::Milk, :milk, 1
  #            optional :int64, :unique_id, 2
  #            # here be more field definitions
  #
  # would be configured as
  # [source,ruby]
  # include_path => ['/path/to/protobuf/definitions/Milk.pb.rb','/path/to/protobuf/definitions/Cheese.pb.rb']
  #
  # When using the codec in an output plugin: 
  # * make sure to include all the desired fields in the protobuf definition, including timestamp. 
  #   Remove fields that are not part of the protobuf definition from the event by using the mutate filter.
  # * the @ symbol is currently not supported in field names when loading the protobuf definitions for encoding. Make sure to call the timestamp field "timestamp" 
  #   instead of "@timestamp" in the protobuf file. Logstash event fields will be stripped of the leading @ before conversion.
  #  
  config :include_path, :validate => :array, :required => true

  def register
    @pb_class_references = {}
    include_path.each { |path| load_protobuf_classfiles(path) }
    @protobuf_class = create_protobuf_object(@class_name)
    
  end

  def decode(data)
    decoded = @protobuf_class.parse(data.to_s)
    yield LogStash::Event.new(decoded.to_hash) if block_given?
  end # def decode


  def encode(event)
    begin
      data = prepare_nested_objects(event.to_hash, @class_name)
      pbo = @protobuf_class.new(data) 
      protobytes = pbo.serialize_to_string
      @on_event.call(event, protobytes)
    rescue NoMethodError
      @logger.warn("Error 2: NoMethodError. Maybe mismatching protobuf definition? Make sure that your protobuf definition has at least these fields: " + event.to_hash.keys.join(", "))
    rescue => e
      @logger.warn("Could not encode protobuf: " + e.message)
    end
  end # def encode

  # Creates instances of nested protobuf references recursively. TODO improve documentation
  private
  def prepare_nested_objects(fields, class_name)
    fields = prepare_for_encoding(fields)
    referenced_classes = @pb_class_references[class_name] # returns a hash with member names and their protobuf class names
    referenced_classes.map do | (k,class_name) |
      if fields.include?(k)
        value = fields[k] 
        proto_obj = create_protobuf_object(class_name)
        fields[k] = 
          if value.is_a?(::Array) 
            # make this field an array/list of protobuf objects
            # value is a list of hashed complex objects, each of which needs to be protobuffed and
            # put back into the list.
            value.map { |x| prepare_nested_objects(x, class_name) } 
            value
          else 
            proto_obj.new( prepare_nested_objects(value, class_name) )
          end # if is array
      end
    end 
    fields
  end



  # Removes @ characters from the member names of the event.
  # Necessary for @timestamp fields and the likes. Otherwise we'd run into errors (no such method) upon creating the protobuf object.
  # Then convert timestamps and other objects to strings so that they can be passed to the protobuf object constructor method.
  def prepare_for_encoding(datahash)
    next unless datahash.is_a?(::Hash)
    ::Hash[datahash.map{|(k,v)| [k.to_s.gsub(/@/,'').to_sym, convert_value(v)] }]
  end

  def convert_value(v)
    (convertable_to_string?(v) ? v.to_s : v)    
  end

  def convertable_to_string?(v)
    !(v.is_a?(Fixnum) || v.is_a?(::Hash) || v.is_a?(::Array) || [true, false].include?(v))
  end

  # Creates an instance of a protobuf class name. This instance will be used later to call the decode and encode methods on.
  def create_protobuf_object(name)
    begin
      
      name.split('::').inject(Object) { |n,c| n.const_get c }
     end
  end



  # Analyses a protobuf definition on which other protobuf classes it uses. 
  # This is needed for the encoder section of the codec. 
  # When encoding an event into a pb class which uses other pb classes, we need to create the 
  # objects for those nested classes first, so that we can reference them when encoding the topmost 
  # class. In order to be able to do so, this method reads each protobuf class line by line and
  # stores the information in the @pb_class_references member.
  # Params:
  # +filename+:: the absolute path to the protobuf definition.
  def load_class_reference_information(filename)
    regex_class_name = /\s*class\s*(?<name>.+?)\s+/
    regex_module_name = /\s*module\s*(?<name>.+?)\s+/
    regex_pbdefs = /\s*(optional|repeated)(\s*):(?<type>.+),(\s*):(?<name>\w+),(\s*)(?<position>\d+)/
    begin 
      class_name = ""
      type = ""
      field_name = ""
      classname_found = false
      File.readlines(filename).each do |line|
        # Check if the current line contains the module name (but only if the class name hasn't been found yet because it might be declared twice in the file)
        if ! (line =~ regex_module_name).nil? && !classname_found 
          # Module name found, so we start to create the class name string which starts with the module.
          class_name << $1 
          class_name << "::"
        end
        
        # Check if the current line contains the class name (but only if it hasn't been found yet because it might be declared twice in the file)
        if ! (line =~ regex_class_name).nil? && !classname_found
          # class name found. Let's append it to the class name string, which might already contain the module name
          class_name << $1
          # initialize the hash for the field specific information that we will collect in the next step
          @pb_class_references[class_name] = {}
          classname_found = true
        end

        if ! (line =~ regex_pbdefs).nil?
          type = $1
          field_name = $2
          if type =~ /::/
            # the line contains a field declaration which references another class. We need to store the name of that class.
            @pb_class_references[class_name][field_name] = type.gsub!(/^:/,"")
          end
        end
      end
    rescue Exception => e
      @logger.warn("error 3: unable to read pb definition from file  " + filename+ ". Reason: #{e.inspect}. Last settings were: class #{class_name} field #{field_name} type #{type}. Backtrace: " + e.backtrace.inspect.to_s)
    end
    if class_name.nil?
      @logger.warn("error 4: class name not found in file  " + filename)
    end    
  end

  # This method calls 'require' for the protobuf class files listed in the 'include_path' section of the config.
  # When given a directory instead of a file, it will require all files in the directory.
  # Params:
  # +dir_or_file+:: the absolute path to the file or directory that need to be loaded
  def load_protobuf_classfiles(dir_or_file)
    begin
      if dir_or_file.end_with? ('.rb')
        
        require dir_or_file
        load_class_reference_information dir_or_file
      else 
        Dir[ dir_or_file + '/*.rb'].each { |file|
          
          require file
          load_class_reference_information file 
        }
      end
    end
  end


end # class LogStash::Codecs::Protobuf
