layer_base
2015-12-03 09:24:14 0 举报
AI智能生成
tiny_cnn
作者其他创作
大纲/内容
layer_base(...)
parallelize_(true), next_(NULL), prev_(NULL)
weight_init_(std::make_shared())
bias_init_(std::make_shared(0.0))
init_weight()
weight_init_->fill(&W_, fan_in_size(), fan_out_size());
bias_init_->fill(&b_, fan_in_size(), fan_out_size());
std::fill(Whessian_.begin(), Whessian_.end(), 0.0);
std::fill(bhessian_.begin(), bhessian_.end(), 0.0);
clear_diff(CNN_TASK_SIZE);
getter
const vec_t& output(int worker_index) const { return output_[worker_index]; }
const vec_t& delta(int worker_index) const { return prev_delta_[worker_index]; }
weight() { return W_; }
bias() { return b_; }
weight_diff(int index) { return dW_[index]; }
bias_diff(int index) { return db_[index]; }
is_exploded() const { return has_infinite(W_) || has_infinite(b_); }
layer_base* next() { return next_; }
layer_base* prev() { return prev_; }
setter
template
layer_base& weight_init(const WeightInit& f) { weight_init_ = std::make_shared(f); return *this; }
template
layer_base& bias_init(const BiasInit& f) { bias_init_ = std::make_shared(f); return *this; }
template
layer_base& weight_init(std::shared_ptr f) { weight_init_ = f; return *this; }
template
layer_base& bias_init(std::shared_ptr f) { bias_init_ = f; return *this; }
save/load
virtual void save(std::ostream& os) const
virtual void load(std::istream& is)
virtual functions(input/output)
///< input dimension
virtual layer_size_t in_size() const { return in_size_; }
///< output dimension
virtual layer_size_t out_size() const { return out_size_; }
///< number of parameters
virtual size_t param_size() const { return W_.size() + b_.size(); }
///< number of incoming connections for each output unit
virtual size_t fan_in_size() const = 0;
///< number of outgoing connections for each input unit
virtual size_t fan_out_size() const = 0;
///< number of connections
virtual size_t connection_size() const = 0;
///< input shape(width x height x depth)
virtual index3d in_shape() const { return index3d(in_size(), 1, 1); }
///< output shape(width x height x depth)
virtual index3d out_shape() const { return index3d(out_size(), 1, 1); }
///< name of layer. should be unique for each concrete class
virtual std::string layer_type() const = 0;
virtual activation::function& activation_function() = 0;
visualize
// visualize
///< visualize latest output of this layer
///< default implementation interpret output as 1d-vector,
///< so "visual" layer(like convolutional layer) should override this for better visualization.
virtual image<> output_to_image(size_t worker_index = 0) const {
return vec2image(output_[worker_index]);
}
divide_hessian(int denominator)
for (auto& w : Whessian_) w /= denominator;
for (auto& b : bhessian_) b /= denominator;
connect(std::shared_ptr& tail)
if (out_size() != 0 && tail->in_size() != out_size()){..}
next_ = tail.get();
tail->prev_ = this;
template
void update_weight(Optimizer *o, int worker_size, size_t batch_size)
merge(worker_size, batch_size);
clear_diff(worker_size);
o->update(dW_[0], Whessian_, W_);
o->update(db_[0], bhessian_, b_);
post_update();
forward/back
* return output vector
* output vector must be stored to output_[worker_index]
virtual const vec_t& forward_propagation(const vec_t& in, size_t worker_index) = 0;
* return delta of previous layer (delta=\frac{dE}{da}, a=wx in fully-connected layer)
* delta must be stored to prev_delta_[worker_index]
virtual const vec_t& back_propagation(const vec_t& current_delta, size_t worker_index) = 0;
* return delta2 of previous layer (delta2=\frac{d^2E}{da^2}, diagonal of hessian matrix)
* it is never called if optimizer is hessian-free
virtual const vec_t& back_propagation_2nd(const vec_t& current_delta2) = 0;
// called afrer updating weight
virtual void post_update() {}
bool has_same_weights(const layer_base& rhs, float_t eps) const
private:
void merge(size_t worker_size, size_t batch_size){...}
void clear_diff(size_t worker_size) {....}
void set_size(layer_size_t in_dim, layer_size_t out_dim, size_t weight_dim, size_t bias_dim){...}
protected:
layer_size_t in_size_;
layer_size_t out_size_;
bool parallelize_;
layer_base* next_;
layer_base* prev_;
vec_t a_[CNN_TASK_SIZE]; // w * x
vec_t output_[CNN_TASK_SIZE]; // last output of current layer, set by fprop
vec_t prev_delta_[CNN_TASK_SIZE]; // last delta of previous layer, set by bprop
vec_t W_; // weight vector
vec_t b_; // bias vector
vec_t dW_[CNN_TASK_SIZE];
vec_t db_[CNN_TASK_SIZE];
vec_t Whessian_; // diagonal terms of hessian matrix
vec_t bhessian_;
vec_t prev_delta2_; // d^2E/da^2
std::shared_ptr weight_init_;
std::shared_ptr bias_init_;
0 条评论
下一页