首页  思维导图  详情

layer_base

2015-12-03 09:24:14   0  举报





仅支持查看

AI智能生成

tiny_cnn

cnn

作者其他创作

大纲/内容

layer_base(...)

parallelize_(true), next_(NULL), prev_(NULL)

weight_init_(std::make_shared())

bias_init_(std::make_shared(0.0))

init_weight()

weight_init_->fill(&W_, fan_in_size(), fan_out_size());

bias_init_->fill(&b_, fan_in_size(), fan_out_size());

std::fill(Whessian_.begin(), Whessian_.end(), 0.0);

std::fill(bhessian_.begin(), bhessian_.end(), 0.0);

clear_diff(CNN_TASK_SIZE);

getter

const vec_t& output(int worker_index) const { return output_[worker_index]; }

const vec_t& delta(int worker_index) const { return prev_delta_[worker_index]; }

weight() { return W_; }

bias() { return b_; }

weight_diff(int index) { return dW_[index]; }

bias_diff(int index) { return db_[index]; }

is_exploded() const { return has_infinite(W_) || has_infinite(b_); }

layer_base* next() { return next_; }

layer_base* prev() { return prev_; }

setter

template layer_base& weight_init(const WeightInit& f) { weight_init_ = std::make_shared(f); return *this; }

template layer_base& bias_init(const BiasInit& f) { bias_init_ = std::make_shared(f); return *this; }

template layer_base& weight_init(std::shared_ptr f) { weight_init_ = f; return *this; }

template layer_base& bias_init(std::shared_ptr f) { bias_init_ = f; return *this; }

save/load

virtual void save(std::ostream& os) const

virtual void load(std::istream& is)

virtual functions(input/output)

///< input dimension virtual layer_size_t in_size() const { return in_size_; }

///< output dimension virtual layer_size_t out_size() const { return out_size_; }

///< number of parameters virtual size_t param_size() const { return W_.size() + b_.size(); }

///< number of incoming connections for each output unit virtual size_t fan_in_size() const = 0;

///< number of outgoing connections for each input unit virtual size_t fan_out_size() const = 0;

///< number of connections virtual size_t connection_size() const = 0;

///< input shape(width x height x depth) virtual index3d in_shape() const { return index3d(in_size(), 1, 1); }

///< output shape(width x height x depth) virtual index3d out_shape() const { return index3d(out_size(), 1, 1); }

///< name of layer. should be unique for each concrete class virtual std::string layer_type() const = 0;

virtual activation::function& activation_function() = 0;

visualize

// visualize ///< visualize latest output of this layer ///< default implementation interpret output as 1d-vector, ///< so "visual" layer(like convolutional layer) should override this for better visualization. virtual image<> output_to_image(size_t worker_index = 0) const { return vec2image(output_[worker_index]); }

divide_hessian(int denominator)

for (auto& w : Whessian_) w /= denominator;

for (auto& b : bhessian_) b /= denominator;

connect(std::shared_ptr& tail)

if (out_size() != 0 && tail->in_size() != out_size()){..}

next_ = tail.get();

tail->prev_ = this;

template void update_weight(Optimizer *o, int worker_size, size_t batch_size)

merge(worker_size, batch_size);

clear_diff(worker_size);

o->update(dW_[0], Whessian_, W_);

o->update(db_[0], bhessian_, b_);

post_update();

forward/back

* return output vector * output vector must be stored to output_[worker_index] virtual const vec_t& forward_propagation(const vec_t& in, size_t worker_index) = 0;

* return delta of previous layer (delta=\frac{dE}{da}, a=wx in fully-connected layer) * delta must be stored to prev_delta_[worker_index] virtual const vec_t& back_propagation(const vec_t& current_delta, size_t worker_index) = 0;

* return delta2 of previous layer (delta2=\frac{d^2E}{da^2}, diagonal of hessian matrix) * it is never called if optimizer is hessian-free virtual const vec_t& back_propagation_2nd(const vec_t& current_delta2) = 0;

// called afrer updating weight virtual void post_update() {}

bool has_same_weights(const layer_base& rhs, float_t eps) const

private:

void merge(size_t worker_size, size_t batch_size){...}

void clear_diff(size_t worker_size) {....}

void set_size(layer_size_t in_dim, layer_size_t out_dim, size_t weight_dim, size_t bias_dim){...}

protected:

layer_size_t in_size_; layer_size_t out_size_; bool parallelize_; layer_base* next_; layer_base* prev_; vec_t a_[CNN_TASK_SIZE]; // w * x vec_t output_[CNN_TASK_SIZE]; // last output of current layer, set by fprop vec_t prev_delta_[CNN_TASK_SIZE]; // last delta of previous layer, set by bprop vec_t W_; // weight vector vec_t b_; // bias vector vec_t dW_[CNN_TASK_SIZE]; vec_t db_[CNN_TASK_SIZE]; vec_t Whessian_; // diagonal terms of hessian matrix vec_t bhessian_; vec_t prev_delta2_; // d^2E/da^2 std::shared_ptr weight_init_; std::shared_ptr bias_init_;