src/app/linear_method/proto/linear.proto

// configuration of linear methods
package PS.LM;
import "data/proto/data.proto";
import "learner/proto/bcd.proto";
import "filter/proto/filter.proto";

message Config {
  optional DataConfig training_data = 1;
  optional DataConfig validation_data = 2;

  optional DataConfig model_output = 4;
  optional DataConfig model_input = 5;

  optional LossConfig loss = 10;
  optional PenaltyConfig penalty = 11;

  optional LearningRateConfig learning_rate = 12;

  optional SGDConfig async_sgd = 17;
  optional BCDConfig darlin = 15;

}

extend BCDConfig {
  // Used by the trust region method. All changes of parameters will be bounded
  // by *delta*. *delta* is updated according to the convergence,  whose intial
  // value is *delta_init_value* and maximal value is *delta_max_value*
  optional double delta_init_value = 101 [default = 1];
  optional double delta_max_value = 102 [default = 5];
  // kkt_filter_threshold = max_gradient_violation / num_examples *
  // kkt_filter_threshold_ratio. increasing this number reduces the effect of
  // kkt filter.
  optional double kkt_filter_threshold_ratio = 103 [default = 10];
}

message SGDConfig {
  enum Algo {
    STANDARD = 1;
    FTRL = 2;
  }
  required Algo algo = 1;

  // The size of minibatch
  optional int32 minibatch = 2 [default = 1000];

  optional int32 data_buf = 12 [default = 1000];  // in mb

  optional bool ada_grad = 5 [default = true];

  optional int32 max_delay = 4 [default = 0];

  // The number of data passes
  optional int32 num_data_pass = 11 [default = 1];

  // in sec
  optional int32 report_interval = 3 [default = 1];

  // features which occurs <= *tail_feature_freq* will be filtered before
  // training. it save both memory and bandwidth.
  optional int32 tail_feature_freq = 6 [default = 0];

  // It controls the countmin size. We filter the tail features by countmin, which
  // is more efficient than hash, but still is the memory bottleneck for servers. A
  // smaller ratio reduces the memory footprint, but may increase the size of
  // filtered feature.
  optional float countmin_n = 8 [default = 1e8];
  optional int32 countmin_k = 7 [default = 2];

  repeated FilterConfig push_filter = 13;
  repeated FilterConfig pull_filter = 14;
}

message LossConfig {
  enum Type {
    SQUARE = 1;
    LOGIT = 2;
    HINGE = 3;
    SQUARE_HINGE = 4;
  }
  required Type type = 1;
}

message PenaltyConfig {
  enum Type {
    L1 = 1;  // lambda(0) * ||w||_1 + lambda(1) * ||w||_F^2
    L2 = 2;  // lambda(0) * ||w||_F^2
  }
  required Type type = 1;
  repeated double lambda = 2;
}


message LearningRateConfig {
  enum Type {
    CONSTANT = 1;  // = alpha
    DECAY = 2;  // = alpha / (beta + x), where x is user-defined, such as sqrt(iter)
  }
  optional Type type = 1;
  optional double alpha = 2;
  optional double beta = 3;
}