activation.cpp #include // See https://en.wikipedia.org/wiki/Activation_function #include "activation.h" Activation::Activation() { activation_type = ACTIVATION_SIGMOID; } Activation::Activation(Activation_Types _activation_type) { activation_type = _activation_type; } Activation::~Activation() { } double Activation::activate(const double& value, const bool derivative, const double& alpha) { switch (activation_type) { case (ACTIVATION_ABS) : return (abs(value, derivative)); break; case (ACTIVATION_ARCTAN) : return (arctan(value, derivative)); break; case (ACTIVATION_BENT) : return (bent(value, derivative)); break; case (ACTIVATION_BINARY_HALF_STEP) : return (binaryHalfStep(value, derivative)); break; case (ACTIVATION_BINARY_STEP) : return (binaryStep(value, derivative)); break; case (ACTIVATION_BOUNDED_RELU) : return (boundedRelu(value, derivative)); break; case (ACTIVATION_ELU) : return (elu(value, derivative)); break; case (ACTIVATION_GAUSSIAN) : return (gaussian(value, derivative)); break; case (ACTIVATION_IDENTITY) : return (identity(value, derivative)); break; case (ACTIVATION_LINEAR) : return (linear(value, derivative)); break; case (ACTIVATION_LOG) : return (log(value, derivative)); break; case (ACTIVATION_PRELU) : return (prelu(value, derivative)); break; case (ACTIVATION_RELU) : return (relu(value, derivative)); break; case (ACTIVATION_SCALED_TANH) : return (scaledTanh(value, derivative)); break; case (ACTIVATION_SIGMOID) : return (sigmoid(value, derivative)); break; case (ACTIVATION_SINC) : return (sinc(value, derivative)); break; case (ACTIVATION_SINUSOID) : return (sinusoid(value, derivative)); break; case (ACTIVATION_SOFT_EXPONENTIAL) : return (softExponential(value, alpha, derivative)); break; case (ACTIVATION_SOFT_PLUS) : return (softPlus(value, derivative)); break; case (ACTIVATION_SOFT_RELU) : return (softRelu(value, derivative)); break; case (ACTIVATION_SOFT_SIGN) : return (softSign(value, derivative)); break; case (ACTIVATION_SOFT_STEP) : return (softRelu(value, derivative)); break; case (ACTIVATION_SQRT) : return (sqrt(value, derivative)); break; case (ACTIVATION_SQUARE) : return (square(value, derivative)); break; case (ACTIVATION_SQUASH) : return (squash(value, derivative)); break; case (ACTIVATION_STEP) : return (step(value, derivative)); break; case (ACTIVATION_TANH) : return (tanh(value, derivative)); break; default: return (sigmoid(value, derivative)); break; } } // Returns a value between ? // // f(x) = abs(x) // derivative f(x) = todo double Activation::abs(const double& value, const bool derivative) { if (derivative) return value < 0 ? -1 : 1; // todo. else return std::abs(value); } // Returns a value between -pi/2 and +pi/2. // // f(x) = tan^-1(x) // derivative f(x) = 1/(x^2+1) double Activation::arctan(const double& value, const bool derivative) { if (derivative) // return (std::cos(value) * std::cos(value)); // todo is this the same return 1 / ((value * value) + 1); else return std::atan(value); // todo is this the same as tan^-1(x)? } // Bent Identity. // // Returns a value between -infinity to +infinity. // // f(x) = ((sqrt(x^2 + 1) - 1)/2) + x // derivative f(x) = ((x / 2 * sqrt(x^2+1)) + 1 double Activation::bent(const double& value, const bool derivative) { if (derivative) return (value / 2 * std::sqrt((value * value) + 1)) + 1; else return ((std::sqrt((value * value) + 1) - 1) / 2) + value; } // Binary Half Step. // See Binary Step. // // Returns a value between 0.0 and +1.0. // // f(x) = 0 for x<0; .5 for x==0; 1 for x>0 // derivative f(x) = 0 for x != 0; ? for x == 0 double Activation::binaryHalfStep(const double& value, const bool derivative) { if (derivative) { if (value < 0) return 0; else if (value == 0) return value; else if (value <= 0.5) return 0.5; // todo confirm this is correct for the derative else // value > 0.5 return 0; } else { if (value < 0) return 0; else if (value == 0) return 0.5; else // value > 0 return 1; } } // Binary Step. // Also known as Step. // Also known as Heaviside step. // // Returns a value between 0.0 and +1.0. // // f(x) = 0 for x<0; 1 for x>=0 // derivative f(x) = 0 for x != 0; ? for x == 0 double Activation::binaryStep(const double& value, const bool derivative) { if (derivative) return value != 0 ? 0 : value; // todo confirm to return value for !0. else return value >= 0 ? 1 : 0; } // Constrains the value between 0 and 1, and favors 0 and 1 as local minimums during training. // // Returns a value between -1.0 and +1.0. // // f(x) = min(a, max(0, x)) // f(x) = min(max(x + 0.5, 0), 1) double Activation::boundedRelu(const double& value, const bool derivative) { if (derivative) return 0; // TODO else return 0; // TODO } // Exponential Linear Unit. // // The results of models with relu are pretty impressive and it has become very quickly the standard. // // However, even if it is not possible for ReLUs to saturate, they can turn "dead" which means they are // never activated because the pre-activation value is always negative. // For such units, no gradient can flow through the net. // // Since the output of relu is always non-negative, their mean activation is always positive. // A positive mean introduces a bias for the next layer which can slow down the learning. // // A solution is to use the elu, which acts like relu if value is positive, but for negative values it is a // function bounded by a fixed value "-1", for alpha=1. This behavior helps to push the mean activation of // neurons closer to zero which is beneficial for learning and it helps to learn representations that are more // robust to noise. // // See http://www.picalike.com/blog/2015/11/28/relu-was-yesterday-tomorrow-comes-elu/. // // Returns a value between -alpha and +infinity. // // f(x) = x * (x > 0) + (x < 0) * (alpha * (T.exp(x) - 1)) //todo check if this is same formula as next line. // f(a,x) = alpha*((e^x) - 1) for x<0; x for x>=0; // derivative f(a,x) = f(x) + alpha for x<0; 1 for x>=0; double Activation::elu(const double& value, const double& alpha, const bool derivative) { if (derivative) { //double output = elu(value, alpha, false); //return output > 0 ? 1.0 : output + 1; return value >= 0 ? 1.0 : (alpha * (std::exp(value) - 1)) + 1; } else return value >= 0 ? value : alpha * (std::exp(value) - 1); } // Returns a value between 0.0 and +1.0. // // f(x) = exp(-x*-x) // derivative f(x) = -2x(exp(-x*-x)) double Activation::gaussian(const double& value, const bool derivative) { if (derivative) return -2 * value * std::exp(-value * -value); else return std::exp(-value * -value); } // Identity function. // // Returns a value between -infinity and +infinity. // // f(x) = x // derivative f(x) = 1 double Activation::identity(const double& value, const bool derivative) { if (derivative) return 1; else return value; } // Identity function. // // Returns a value between -infinity and +infinity. // // f(x) = x // derivative f(x) = 1 double Activation::linear(const double& value, const bool derivative) { if (derivative) return 1; else return value; } // Returns a value between . // // f(x) = 1 / (1 + e^-x) double Activation::log(const double& value, const bool derivative) { if (derivative) return 0; // TODO else return 1.0 / (1.0 + std::exp(-value)); /* if (value < -45.0) return 0.0; else if (value > 45.0) return 1.0; else return 1.0 / (1.0 + std::exp(-value)); */ } // Parameteric Rectified Linear Unit. // // Returns a value between -infinity and +infinity. // // f(a,x) = ax for x<0; x for x>=0; // derivative f(a,x) = a for x<0; 1 for x>=0 double Activation::prelu(const double& value, const double& alpha, const bool derivative) { if (derivative) return value >= 0 ? 1.0 : alpha; else return value >= 0 ? value : alpha * value; } // Rectified linear unit. // // Fast and non-saturating: max(x, 0). // // The results of models with relu are pretty impressive and it has become very quickly the standard. // // However, even if it is not possible for ReLUs to saturate, they can turn "dead" which means they are // never activated because the pre-activation value is always negative. // For such units, no gradient can flow through the net. // // Since the output of relu is always non-negative, their mean activation is always positive. // A positive mean introduces a bias for the next layer which can slow down the learning. // // A solution is to use the elu, which acts like relu if value is positive, but for negative values it is a // function bounded by a fixed value "-1", for alpha=1. This behavior helps to push the mean activation of // neurons closer to zero which is beneficial for learning and it helps to learn representations that are more // robust to noise. // // See http://www.picalike.com/blog/2015/11/28/relu-was-yesterday-tomorrow-comes-elu/. // // Returns a value between 0 and +infinity. // // f(x) = max(0, x) // f(x) = 0 for x<0; x for x>=0 // derivative f(x) = 0 for x<0; 1 for x>=0 double Activation::relu(const double& value, const bool derivative) { if (derivative) return value >= 0 ? 1.0 : 0.0; else return value >= 0 ? value : 0.0; } // Returns a value between -1.0 and +1.0. // // f(x) = 1.7159 * tanh(0.66667 * x) double Activation::scaledTanh(const double& value, const bool derivative) { if (derivative) // TODO... { double tanh_value = std::tanh(value); return 0.66667f * (1.7159f - 1 / 1.7159f * tanh_value * tanh_value); } else return 1.7159 * std::tanh(0.66667 * value); } // Returns a value between 0.0 and 1.0. double Activation::sigmoid(const double& value, const bool derivative) { if (derivative) return sigmoid(value) * (1.0 - sigmoid(value)); else return 1.0 / double((1.0 + exp(-value))); } /* // Returns a value between 0.0 and 1.0. double Activation::sigmoid(const double& value) { return 1.0 / double((1.0 + exp(-value))); } double Activation::sigmoid_derivative(const double& value) { return sigmoid(value) * (1.0 - sigmoid(value)); } */ double Activation::sigmoid_limit(double value, double positive_limit, double negative_limit) { if (value < negative_limit) return 0.0; else if (value > positive_limit) return 1.0; else return 1.0 / (1.0 + std::exp(-value)); } // Returns a value between ~-.217234 and 1.0. // // f(x) = 1 for x == 0; sin(x)/x for x != 0. // derivative f(x) = 0 for x=0; (cos(x)/x) - (sin(x)/(x^2)) for x!= 0 double Activation::sinc(const double& value, const bool derivative) { if (derivative) return value == 0 ? 0 : (std::cos(value) / value) - (std::sin(value) / (value * value)); // todo check if last part should just be value and not value*value. else return value == 0 ? 1 : std::sin(value)/value; } // Sinusoid. // // Returns a value between -1.0 and 1.0. // // f(x) = sin(x) // derivative f(x) = cos(x) double Activation::sinusoid(const double& value, const bool derivative) { if (derivative) return std::cos(value); else return std::sin(value); } // Returns a value between -infinity and +infinity. // // f(a,x) = - (loge(1 - alpha * (x+alpha))) / alpha for alpha < 0 // f(a,x) = x for alpha == 0 // f(a,x) = ((exp(alpha*x) - 1) / alpha) + alpha for alpha > 0 // derivative f(x) = 1 / (1-alpha(alpha + x)) for alpha <0 // derivative f(x) = exp(alpha * x) for alpha >=0 double Activation::softExponential(const double& value, const double& alpha, const bool derivative) { if (derivative) //return alpha >= 0 ? std::exp(alpha * value) : 1 / (1 + std::exp(-value)); return alpha >= 0 ? std::exp(alpha * value) : 1 / (1 - alpha*(alpha + value)); else { if (alpha < 0) return -((std::log(1 - alpha * (value + alpha)))) / alpha; //todo check if std::log is to be used here for loge else if (alpha == 0) return value; else // alpha > 0. return ((std::exp(alpha*value) - 1) / alpha) + alpha; } } // Returns a value between 0 and +infinity. // // f(x) = log(1 + exp(x)) // derivative f(x) = 1 / (1 + exp(-x)) double Activation::softPlus(const double& value, const bool derivative) { if (derivative) return 1 / (1 + std::exp(-value)); else return std::log(1 + std::exp(value)); } // Returns a value between . // // f(x) = log(1 + e^x) double Activation::softRelu(const double& value, const bool derivative) { if (derivative) return 0; // TODO else return 0; // TODO } // Returns a value between -1.0 and 1.0. // // f(x) = x/(1 + abs(x)) // derivative f(x) = 1/((1 + abs(x))*(1 + abs(x))) double Activation::softSign(const double& value, const bool derivative) { if (derivative) return 1 / ((1 + std::abs(value))*(1 + std::abs(value))); else return value / (1 + std::abs(value)); } // Soft Step aka Logistic. // // Returns a value between 0.0 and +1.0. // // f(x) = 1 / (1 + e^-x) // derivative f(x) = (1 / (1 + e^-x)) * (1 - (1 / (1 + e^-x))) double Activation::softStep(const double& value, const bool derivative) { if (derivative) return (1 / (1 + std::exp(-value))) * (1 - (1 / (1 + std::exp(-value)))); else return 1 / (1 + std::exp(-value)); } // Returns a value between -1.0 and +1.0. // // f(x) = sqrt(x) double Activation::sqrt(const double& value, const bool derivative) { if (derivative) return 0; // TODO else return std::sqrt(value); // TODO } // Returns a value between -1.0 and +1.0. // // f(x) = x^2 double Activation::square(const double& value, const bool derivative) { if (derivative) return 0; // TODO else return value * value; // TODO } // Returns a value between -1.0 and +1.0. // // f(x) = double Activation::squash(const double& value, const bool derivative) { if (derivative) { if (value > 0) return (value) / (1 + value); else return (value) / (1 - value); } else return (value) / (1 + std::abs(value)); } // Binary Step. // // Returns a value between 0.0 and +1.0. // // f(x) = 0 for x<0; 1 for x>=0 // // derivative f(x) = 0 for x != 0; ? for x == 0 double Activation::step(const double& value, const bool derivative) { if (derivative) return value != 0 ? 0 : value; // todo confirm to return value for !0. else return value >= 0 ? 1 : 0; } // Returns a value between -1.0 and +1.0. // // f(x) = a*tanh(b*x) // // f(x) = tanh(x) = (2/(1+exp(-2*value))) - 1 // derivative f(x) = 1 - f(x) * f(x) double Activation::tanh(const double& value, const bool derivative) { if (derivative) { double tanh_value = std::tanh(value); return (1.0 - tanh_value * tanh_value); //return (1.0 - std::tanh(value)) * (1.0 + std::tanh(value)); } else return std::tanh(value); } // Returns a value between -1.0 and +1.0. double Activation::tanh_limit(double& value, double positive_limit, double negative_limit) { if (value < negative_limit) return -1.0; else if (value > positive_limit) return 1.0; else return tanh(value); /* if (value < -45.0) return -1.0; else if (value > 45.0) return 1.0; else return std::tanh(value); */ } Activation_Types Activation::getActivationType() { return activation_type; } void Activation::setActivationType(Activation_Types _activation_type) { activation_type = _activation_type; } /* public double SoftMax(double x, string layer) { // Determine max double max = double.MinValue; if (layer == "ih") max = (ihSum0 > ihSum1) ? ihSum0 : ihSum1; else if (layer == "ho") max = (hoSum0 > hoSum1) ? hoSum0 : hoSum1; // Compute scale double scale = 0.0; if (layer == "ih") scale = Math.Exp(ihSum0 - max) + Math.Exp(ihSum1 - max); else if (layer == "ho") scale = Math.Exp(hoSum0 - max ) + Math.Exp(hoSum1 - max); return Math.Exp(x - max) / scale; } */