Neuron Net
Macros | Functions
Attention layer's neuron Gradients Calculation

Describes the gradients calculation process for the Neuron of attention layer. More...

Macros

#define def_k_AttentionGradients   14
 Index of the kernel for gradients calculation process (AttentionIsideGradients) More...
 
#define def_k_ag_querys   0
 Matrix of Querys. More...
 
#define def_k_ag_querys_g   1
 Matrix of Querys' Gradients. More...
 
#define def_k_ag_keys   2
 Matrix of Keys. More...
 
#define def_k_ag_keys_g   3
 Matrix of Keys' Gradients. More...
 
#define def_k_ag_values   4
 Matrix of Values. More...
 
#define def_k_ag_values_g   5
 Matrix of Values' Gradients. More...
 
#define def_k_ag_scores   6
 Matrix of Scores. More...
 
#define def_k_ag_gradient   7
 Matrix of Gradients from previous iteration. More...
 

Functions

__kernel void AttentionIsideGradients (__global double *querys, __global double *querys_g, __global double *keys, __global double *keys_g, __global double *values, __global double *values_g, __global double *scores, __global double *gradient)
 ' Describes the gradients calculation process for the Neuron of attention layer (CNeuronAttentionOCL). More...
 

Detailed Description

Describes the gradients calculation process for the Neuron of attention layer.

Detailed description on the link.

Macro Definition Documentation

◆ def_k_ag_gradient

#define def_k_ag_gradient   7

Matrix of Gradients from previous iteration.

Definition at line 273 of file NeuroNet.mqh.

◆ def_k_ag_keys

#define def_k_ag_keys   2

Matrix of Keys.

Definition at line 268 of file NeuroNet.mqh.

◆ def_k_ag_keys_g

#define def_k_ag_keys_g   3

Matrix of Keys' Gradients.

Definition at line 269 of file NeuroNet.mqh.

◆ def_k_ag_querys

#define def_k_ag_querys   0

Matrix of Querys.

Definition at line 266 of file NeuroNet.mqh.

◆ def_k_ag_querys_g

#define def_k_ag_querys_g   1

Matrix of Querys' Gradients.

Definition at line 267 of file NeuroNet.mqh.

◆ def_k_ag_scores

#define def_k_ag_scores   6

Matrix of Scores.

Definition at line 272 of file NeuroNet.mqh.

◆ def_k_ag_values

#define def_k_ag_values   4

Matrix of Values.

Definition at line 270 of file NeuroNet.mqh.

◆ def_k_ag_values_g

#define def_k_ag_values_g   5

Matrix of Values' Gradients.

Definition at line 271 of file NeuroNet.mqh.

◆ def_k_AttentionGradients

#define def_k_AttentionGradients   14

Index of the kernel for gradients calculation process (AttentionIsideGradients)

Definition at line 265 of file NeuroNet.mqh.

Function Documentation

◆ AttentionIsideGradients()

__kernel void AttentionIsideGradients ( __global double *  querys,
__global double *  querys_g,
__global double *  keys,
__global double *  keys_g,
__global double *  values,
__global double *  values_g,
__global double *  scores,
__global double *  gradient 
)

' Describes the gradients calculation process for the Neuron of attention layer (CNeuronAttentionOCL).

Detailed description on the link.

Parameters
[in]querysMatrix of Querys
[out]querys_gMatrix of Querys' Gradients
[in]keysMatrix of Keys
[out]keys_gMatrix of Keys' Gradients
[in]valuesMatrix of Values
[out]values_gMatrix of Values' Gradients
[in]scoresMatrix of Scores
[in]gradientMatrix of Gradients from previous iteration

Definition at line 596 of file NeuroNet.cl.

601  {
602  int u=get_global_id(0);
603  int d=get_global_id(1);
604  int units=get_global_size(0);
605  int dimension=get_global_size(1);
606  double koef=sqrt((double)(units*dimension));
607  if(koef<1)
608  koef=1;
609 
610  double vg=0;
611  double qg=0;
612  double kg=0;
613  for(int iu=0;iu<units;iu++)
614  {
615  double g=gradient[iu*dimension+d];
616  double sc=scores[iu*units+u];
617  vg+=sc*g;
618 //---
619  double sqg=0;
620  double skg=0;
621  for(int id=0;id<dimension;id++)
622  {
623  sqg+=values[iu*dimension+id]*gradient[u*dimension+id];
624  skg+=values[u*dimension+id]*gradient[iu*dimension+id];
625  }
626  qg+=(scores[u*units+iu]==0 || scores[u*units+iu]==1 ? 0.0001 : scores[u*units+iu]*(1-scores[u*units+iu]))*sqg*keys[iu*dimension+d]/koef;
627 //---
628  kg+=(scores[iu*units+u]==0 || scores[iu*units+u]==1 ? 0.0001 : scores[iu*units+u]*(1-scores[iu*units+u]))*skg*querys[iu*dimension+d]/koef;
629  }
630  int shift=u*dimension+d;
631  values_g[shift]=vg;
632  querys_g[shift]=qg;
633  keys_g[shift]=kg;
634  }