arm_math.h - lpc-field - Template project for programming NXP's LPC1768 MCUs

arm_math.h (233229B)
      1 /* ----------------------------------------------------------------------
      2  * Copyright (C) 2010 ARM Limited. All rights reserved.
      3  *
      4  * $Date:        15. July 2011
      5  * $Revision: 	V1.0.10
      6  *
      7  * Project: 	    CMSIS DSP Library
      8  * Title:	     arm_math.h
      9  *
     10  * Description:	 Public header file for CMSIS DSP Library
     11  *
     12  * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
     13  *
     14  * Version 1.0.10 2011/7/15
     15  *    Big Endian support added and Merged M0 and M3/M4 Source code.
     16  *
     17  * Version 1.0.3 2010/11/29
     18  *    Re-organized the CMSIS folders and updated documentation.
     19  *
     20  * Version 1.0.2 2010/11/11
     21  *    Documentation updated.
     22  *
     23  * Version 1.0.1 2010/10/05
     24  *    Production release and review comments incorporated.
     25  *
     26  * Version 1.0.0 2010/09/20
     27  *    Production release and review comments incorporated.
     28  * -------------------------------------------------------------------- */
     29 
     30 /**
     31    \mainpage CMSIS DSP Software Library
     32    *
     33    * <b>Introduction</b>
     34    *
     35    * This user manual describes the CMSIS DSP software library,
     36    * a suite of common signal processing functions for use on Cortex-M processor based devices.
     37    *
     38    * The library is divided into a number of modules each covering a specific category:
     39    * - Basic math functions
     40    * - Fast math functions
     41    * - Complex math functions
     42    * - Filters
     43    * - Matrix functions
     44    * - Transforms
     45    * - Motor control functions
     46    * - Statistical functions
     47    * - Support functions
     48    * - Interpolation functions
     49    *
     50    * The library has separate functions for operating on 8-bit integers, 16-bit integers,
     51    * 32-bit integer and 32-bit floating-point values.
     52    *
     53    * <b>Processor Support</b>
     54    *
     55    * The library is completely written in C and is fully CMSIS compliant.
     56    * High performance is achieved through maximum use of Cortex-M4 intrinsics.
     57    *
     58    * The supplied library source code also builds and runs on the Cortex-M3 and Cortex-M0 processor,
     59    * with the DSP intrinsics being emulated through software.
     60    *
     61    *
     62    * <b>Toolchain Support</b>
     63    *
     64    * The library has been developed and tested with MDK-ARM version 4.21.
     65    * The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly.
     66    *
     67    * <b>Using the Library</b>
     68    *
     69    * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
     70    * - arm_cortexM4lf_math.lib (Little endian and Floating Point Unit on Cortex-M4)
     71    * - arm_cortexM4bf_math.lib (Big endian and Floating Point Unit on Cortex-M4)
     72    * - arm_cortexM4l_math.lib (Little endian on Cortex-M4)
     73    * - arm_cortexM4b_math.lib (Big endian on Cortex-M4)
     74    * - arm_cortexM3l_math.lib (Little endian on Cortex-M3)
     75    * - arm_cortexM3b_math.lib (Big endian on Cortex-M3)
     76    * - arm_cortexM0l_math.lib (Little endian on Cortex-M0)
     77    * - arm_cortexM0b_math.lib (Big endian on Cortex-M3)
     78    *
     79    * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
     80    * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
     81    * public header file <code> arm_math.h</code> for Cortex-M4/M3/M0 with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
     82    * Define the appropriate pre processor MACRO ARM_MATH_CM4 or  ARM_MATH_CM3 or
     83    * ARM_MATH_CM0 depending on the target processor in the application.
     84    *
     85    * <b>Examples</b>
     86    *
     87    * The library ships with a number of examples which demonstrate how to use the library functions.
     88    *
     89    * <b>Building the Library</b>
     90    *
     91    * The library installer contains project files to re build libraries on MDK Tool chain in the <code>CMSIS\DSP_Lib\Source\ARM</code> folder.
     92    * - arm_cortexM0b_math.uvproj
     93    * - arm_cortexM0l_math.uvproj
     94    * - arm_cortexM3b_math.uvproj
     95    * - arm_cortexM3l_math.uvproj
     96    * - arm_cortexM4b_math.uvproj
     97    * - arm_cortexM4l_math.uvproj
     98    * - arm_cortexM4bf_math.uvproj
     99    * - arm_cortexM4lf_math.uvproj
    100    *
    101    * Each library project have differant pre-processor macros.
    102    *
    103    * <b>ARM_MATH_CMx:</b>
    104    * Define macro ARM_MATH_CM4 for building the library on Cortex-M4 target, ARM_MATH_CM3 for building library on Cortex-M3 target
    105    * and ARM_MATH_CM0 for building library on cortex-M0 target.
    106    *
    107    * <b>ARM_MATH_BIG_ENDIAN:</b>
    108    * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
    109    *
    110    * <b>ARM_MATH_MATRIX_CHECK:</b>
    111    * Define macro for checking on the input and output sizes of matrices
    112    *
    113    * <b>ARM_MATH_ROUNDING:</b>
    114    * Define macro for rounding on support functions
    115    *
    116    * <b>__FPU_PRESENT:</b>
    117    * Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for M4bf and M4lf libraries
    118    *
    119    *
    120    * The project can be built by opening the appropriate project in MDK-ARM 4.21 chain and defining the optional pre processor MACROs detailed above.
    121    *
    122    * <b>Copyright Notice</b>
    123    *
    124    * Copyright (C) 2010 ARM Limited. All rights reserved.
    125    */
    126 
    127 
    128 /**
    129  * @ingroup DSP_Functions
    130  * @defgroup groupMath Basic Math Functions
    131  */
    132 
    133 /**
    134  * @ingroup DSP_Functions
    135  * @defgroup groupFastMath Fast Math Functions
    136  * This set of functions provides a fast approximation to sine, cosine, and square root.
    137  * As compared to most of the other functions in the CMSIS math library, the fast math functions
    138  * operate on individual values and not arrays.
    139  * There are separate functions for Q15, Q31, and floating-point data.
    140  *
    141  */
    142 
    143 /**
    144  * @ingroup DSP_Functions
    145  * @defgroup groupCmplxMath Complex Math Functions
    146  * This set of functions operates on complex data vectors.
    147  * The data in the complex arrays is stored in an interleaved fashion
    148  * (real, imag, real, imag, ...).
    149  * In the API functions, the number of samples in a complex array refers
    150  * to the number of complex values; the array contains twice this number of
    151  * real values.
    152  */
    153 
    154 
    155 /**
    156  * @ingroup DSP_Functions
    157  * @defgroup groupFilters Filtering Functions
    158  */
    159 
    160 /**
    161  * @ingroup DSP_Functions
    162  * @defgroup groupMatrix Matrix Functions
    163  *
    164  * This set of functions provides basic matrix math operations.
    165  * The functions operate on matrix data structures.  For example,
    166  * the type
    167  * definition for the floating-point matrix structure is shown
    168  * below:
    169  * <pre>
    170  *     typedef struct
    171  *     {
    172  *       uint16_t numRows;     // number of rows of the matrix.
    173  *       uint16_t numCols;     // number of columns of the matrix.
    174  *       float32_t *pData;     // points to the data of the matrix.
    175  *     } arm_matrix_instance_f32;
    176  * </pre>
    177  * There are similar definitions for Q15 and Q31 data types.
    178  *
    179  * The structure specifies the size of the matrix and then points to
    180  * an array of data.  The array is of size <code>numRows X numCols</code>
    181  * and the values are arranged in row order.  That is, the
    182  * matrix element (i, j) is stored at:
    183  * <pre>
    184  *     pData[i*numCols + j]
    185  * </pre>
    186  *
    187  * \par Init Functions
    188  * There is an associated initialization function for each type of matrix
    189  * data structure.
    190  * The initialization function sets the values of the internal structure fields.
    191  * Refer to the function <code>arm_mat_init_f32()</code>, <code>arm_mat_init_q31()</code>
    192  * and <code>arm_mat_init_q15()</code> for floating-point, Q31 and Q15 types,  respectively.
    193  *
    194  * \par
    195  * Use of the initialization function is optional. However, if initialization function is used
    196  * then the instance structure cannot be placed into a const data section.
    197  * To place the instance structure in a const data
    198  * section, manually initialize the data structure.  For example:
    199  * <pre>
    200  * <code>arm_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
    201  * <code>arm_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
    202  * <code>arm_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
    203  * </pre>
    204  * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
    205  * specifies the number of columns, and <code>pData</code> points to the
    206  * data array.
    207  *
    208  * \par Size Checking
    209  * By default all of the matrix functions perform size checking on the input and
    210  * output matrices.  For example, the matrix addition function verifies that the
    211  * two input matrices and the output matrix all have the same number of rows and
    212  * columns.  If the size check fails the functions return:
    213  * <pre>
    214  *     ARM_MATH_SIZE_MISMATCH
    215  * </pre>
    216  * Otherwise the functions return
    217  * <pre>
    218  *     ARM_MATH_SUCCESS
    219  * </pre>
    220  * There is some overhead associated with this matrix size checking.
    221  * The matrix size checking is enabled via the #define
    222  * <pre>
    223  *     ARM_MATH_MATRIX_CHECK
    224  * </pre>
    225  * within the library project settings.  By default this macro is defined
    226  * and size checking is enabled.  By changing the project settings and
    227  * undefining this macro size checking is eliminated and the functions
    228  * run a bit faster.  With size checking disabled the functions always
    229  * return <code>ARM_MATH_SUCCESS</code>.
    230  */
    231 
    232 /**
    233  * @ingroup DSP_Functions
    234  * @defgroup groupTransforms Transform Functions
    235  */
    236 
    237 /**
    238  * @ingroup DSP_Functions
    239  * @defgroup groupController Controller Functions
    240  */
    241 
    242 /**
    243  * @ingroup DSP_Functions
    244  * @defgroup groupStats Statistics Functions
    245  */
    246 
    247 /**
    248  * @ingroup DSP_Functions
    249  * @defgroup groupSupport Support Functions
    250  */
    251 
    252 /**
    253  * @ingroup DSP_Functions
    254  * @defgroup groupInterpolation Interpolation Functions
    255  * These functions perform 1- and 2-dimensional interpolation of data.
    256  * Linear interpolation is used for 1-dimensional data and
    257  * bilinear interpolation is used for 2-dimensional data.
    258  */
    259 
    260 /**
    261  * @ingroup DSP_Lib
    262  * @defgroup groupExamples Examples
    263  */
    264 #ifndef _ARM_MATH_H
    265 #define _ARM_MATH_H
    266 
    267 #define __CMSIS_GENERIC              /* disable NVIC and Systick functions */
    268 
    269 #if defined (ARM_MATH_CM4)
    270   #include "core_cm4.h"
    271 #elif defined (ARM_MATH_CM3)
    272   #include "core_cm3.h"
    273 #elif defined (ARM_MATH_CM0)
    274   #include "core_cm0.h"
    275 #else
    276 #include "ARMCM4.h"
    277 #warning "Define either ARM_MATH_CM4 OR ARM_MATH_CM3...By Default building on ARM_MATH_CM4....."
    278 #endif
    279 
    280 #undef  __CMSIS_GENERIC              /* enable NVIC and Systick functions */
    281 #include "string.h"
    282     #include "math.h"
    283 #ifdef	__cplusplus
    284 extern "C"
    285 {
    286 #endif
    287 
    288 
    289   /**
    290    * @brief Macros required for reciprocal calculation in Normalized LMS
    291    */
    292 
    293 #define DELTA_Q31 			(0x100)
    294 #define DELTA_Q15 			0x5
    295 #define INDEX_MASK 			0x0000003F
    296 #define PI					3.14159265358979f
    297 
    298   /**
    299    * @brief Macros required for SINE and COSINE Fast math approximations
    300    */
    301 
    302 #define TABLE_SIZE			256
    303 #define TABLE_SPACING_Q31	0x800000
    304 #define TABLE_SPACING_Q15	0x80
    305 
    306   /**
    307    * @brief Macros required for SINE and COSINE Controller functions
    308    */
    309   /* 1.31(q31) Fixed value of 2/360 */
    310   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
    311 #define INPUT_SPACING			0xB60B61
    312 
    313 
    314   /**
    315    * @brief Error status returned by some functions in the library.
    316    */
    317 
    318   typedef enum
    319     {
    320       ARM_MATH_SUCCESS = 0,              /**< No error */
    321       ARM_MATH_ARGUMENT_ERROR = -1,      /**< One or more arguments are incorrect */
    322       ARM_MATH_LENGTH_ERROR = -2,        /**< Length of data buffer is incorrect */
    323       ARM_MATH_SIZE_MISMATCH = -3,       /**< Size of matrices is not compatible with the operation. */
    324       ARM_MATH_NANINF = -4,              /**< Not-a-number (NaN) or infinity is generated */
    325       ARM_MATH_SINGULAR = -5,            /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
    326       ARM_MATH_TEST_FAILURE = -6         /**< Test Failed  */
    327     } arm_status;
    328 
    329   /**
    330    * @brief 8-bit fractional data type in 1.7 format.
    331    */
    332   typedef int8_t q7_t;
    333 
    334   /**
    335    * @brief 16-bit fractional data type in 1.15 format.
    336    */
    337   typedef int16_t q15_t;
    338 
    339   /**
    340    * @brief 32-bit fractional data type in 1.31 format.
    341    */
    342   typedef int32_t q31_t;
    343 
    344   /**
    345    * @brief 64-bit fractional data type in 1.63 format.
    346    */
    347   typedef int64_t q63_t;
    348 
    349   /**
    350    * @brief 32-bit floating-point type definition.
    351    */
    352   typedef float float32_t;
    353 
    354   /**
    355    * @brief 64-bit floating-point type definition.
    356    */
    357   typedef double float64_t;
    358 
    359   /**
    360    * @brief definition to read/write two 16 bit values.
    361    */
    362 #define __SIMD32(addr)  (*(int32_t **) & (addr))
    363 
    364 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0)
    365   /**
    366    * @brief definition to pack two 16 bit values.
    367    */
    368 #define __PKHBT(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0x0000FFFF) | \
    369                                          (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
    370 
    371 #endif
    372 
    373 
    374    /**
    375    * @brief definition to pack four 8 bit values.
    376    */
    377 #ifndef ARM_MATH_BIG_ENDIAN
    378 
    379 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) |	\
    380                                 (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) |	\
    381 							    (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) |	\
    382 							    (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
    383 #else
    384 
    385 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) |	\
    386                                 (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) |	\
    387 							    (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) |	\
    388 							    (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
    389 
    390 #endif
    391 
    392 
    393   /**
    394    * @brief Clips Q63 to Q31 values.
    395    */
    396   static __INLINE q31_t clip_q63_to_q31(
    397 					q63_t x)
    398   {
    399     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
    400       ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
    401   }
    402 
    403   /**
    404    * @brief Clips Q63 to Q15 values.
    405    */
    406   static __INLINE q15_t clip_q63_to_q15(
    407 					q63_t x)
    408   {
    409     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
    410       ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
    411   }
    412 
    413   /**
    414    * @brief Clips Q31 to Q7 values.
    415    */
    416   static __INLINE q7_t clip_q31_to_q7(
    417 				      q31_t x)
    418   {
    419     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
    420       ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
    421   }
    422 
    423   /**
    424    * @brief Clips Q31 to Q15 values.
    425    */
    426   static __INLINE q15_t clip_q31_to_q15(
    427 					q31_t x)
    428   {
    429     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
    430       ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
    431   }
    432 
    433   /**
    434    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
    435    */
    436 
    437   static __INLINE q63_t mult32x64(
    438 				  q63_t x,
    439 				  q31_t y)
    440   {
    441     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
    442             (((q63_t) (x >> 32) * y)));
    443   }
    444 
    445 
    446 #if defined (ARM_MATH_CM0) && defined ( __CC_ARM   )
    447 #define __CLZ __clz
    448 #endif
    449 
    450 #if defined (ARM_MATH_CM0) && ((defined (__ICCARM__)) ||(defined (__GNUC__)) || defined (__TASKING__) )
    451 
    452   static __INLINE  uint32_t __CLZ(q31_t data);
    453 
    454 
    455   static __INLINE uint32_t __CLZ(q31_t data)
    456   {
    457 	  uint32_t count = 0;
    458 	  uint32_t mask = 0x80000000;
    459 
    460 	  while((data & mask) ==  0)
    461 	  {
    462 		  count += 1u;
    463 		  mask = mask >> 1u;
    464 	  }
    465 
    466 	  return(count);
    467 
    468   }
    469 
    470 #endif
    471 
    472   /**
    473    * @brief Function to Calculates 1/in(reciprocal) value of Q31 Data type.
    474    */
    475 
    476   static __INLINE uint32_t arm_recip_q31(
    477 					 q31_t in,
    478 					 q31_t * dst,
    479 					 q31_t * pRecipTable)
    480   {
    481 
    482     uint32_t out, tempVal;
    483     uint32_t index, i;
    484     uint32_t signBits;
    485 
    486     if(in > 0)
    487       {
    488 	signBits = __CLZ(in) - 1;
    489       }
    490     else
    491       {
    492 	signBits = __CLZ(-in) - 1;
    493       }
    494 
    495     /* Convert input sample to 1.31 format */
    496     in = in << signBits;
    497 
    498     /* calculation of index for initial approximated Val */
    499     index = (uint32_t) (in >> 24u);
    500     index = (index & INDEX_MASK);
    501 
    502     /* 1.31 with exp 1 */
    503     out = pRecipTable[index];
    504 
    505     /* calculation of reciprocal value */
    506     /* running approximation for two iterations */
    507     for (i = 0u; i < 2u; i++)
    508       {
    509 	tempVal = (q31_t) (((q63_t) in * out) >> 31u);
    510 	tempVal = 0x7FFFFFFF - tempVal;
    511 	/*      1.31 with exp 1 */
    512 	//out = (q31_t) (((q63_t) out * tempVal) >> 30u);
    513 	out = (q31_t) clip_q63_to_q31(((q63_t) out * tempVal) >> 30u);
    514       }
    515 
    516     /* write output */
    517     *dst = out;
    518 
    519     /* return num of signbits of out = 1/in value */
    520     return (signBits + 1u);
    521 
    522   }
    523 
    524   /**
    525    * @brief Function to Calculates 1/in(reciprocal) value of Q15 Data type.
    526    */
    527   static __INLINE uint32_t arm_recip_q15(
    528 					 q15_t in,
    529 					 q15_t * dst,
    530 					 q15_t * pRecipTable)
    531   {
    532 
    533     uint32_t out = 0, tempVal = 0;
    534     uint32_t index = 0, i = 0;
    535     uint32_t signBits = 0;
    536 
    537     if(in > 0)
    538       {
    539 	signBits = __CLZ(in) - 17;
    540       }
    541     else
    542       {
    543 	signBits = __CLZ(-in) - 17;
    544       }
    545 
    546     /* Convert input sample to 1.15 format */
    547     in = in << signBits;
    548 
    549     /* calculation of index for initial approximated Val */
    550     index = in >> 8;
    551     index = (index & INDEX_MASK);
    552 
    553     /*      1.15 with exp 1  */
    554     out = pRecipTable[index];
    555 
    556     /* calculation of reciprocal value */
    557     /* running approximation for two iterations */
    558     for (i = 0; i < 2; i++)
    559       {
    560 	tempVal = (q15_t) (((q31_t) in * out) >> 15);
    561 	tempVal = 0x7FFF - tempVal;
    562 	/*      1.15 with exp 1 */
    563 	out = (q15_t) (((q31_t) out * tempVal) >> 14);
    564       }
    565 
    566     /* write output */
    567     *dst = out;
    568 
    569     /* return num of signbits of out = 1/in value */
    570     return (signBits + 1);
    571 
    572   }
    573 
    574 
    575   /*
    576    * @brief C custom defined intrinisic function for only M0 processors
    577    */
    578 #if defined(ARM_MATH_CM0)
    579 
    580   static __INLINE q31_t __SSAT(
    581 			       q31_t x,
    582 			       uint32_t y)
    583   {
    584     int32_t posMax, negMin;
    585     uint32_t i;
    586 
    587     posMax = 1;
    588     for (i = 0; i < (y - 1); i++)
    589       {
    590 	posMax = posMax * 2;
    591       }
    592 
    593     if(x > 0)
    594       {
    595 	posMax = (posMax - 1);
    596 
    597 	if(x > posMax)
    598 	  {
    599 	    x = posMax;
    600 	  }
    601       }
    602     else
    603       {
    604 	negMin = -posMax;
    605 
    606 	if(x < negMin)
    607 	  {
    608 	    x = negMin;
    609 	  }
    610       }
    611     return (x);
    612 
    613 
    614   }
    615 
    616 #endif /* end of ARM_MATH_CM0 */
    617 
    618 
    619 
    620   /*
    621    * @brief C custom defined intrinsic function for M3 and M0 processors
    622    */
    623 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0)
    624 
    625   /*
    626    * @brief C custom defined QADD8 for M3 and M0 processors
    627    */
    628   static __INLINE q31_t __QADD8(
    629 				q31_t x,
    630 				q31_t y)
    631   {
    632 
    633     q31_t sum;
    634     q7_t r, s, t, u;
    635 
    636     r = (char) x;
    637     s = (char) y;
    638 
    639     r = __SSAT((q31_t) (r + s), 8);
    640     s = __SSAT(((q31_t) (((x << 16) >> 24) + ((y << 16) >> 24))), 8);
    641     t = __SSAT(((q31_t) (((x << 8) >> 24) + ((y << 8) >> 24))), 8);
    642     u = __SSAT(((q31_t) ((x >> 24) + (y >> 24))), 8);
    643 
    644     sum = (((q31_t) u << 24) & 0xFF000000) | (((q31_t) t << 16) & 0x00FF0000) |
    645       (((q31_t) s << 8) & 0x0000FF00) | (r & 0x000000FF);
    646 
    647     return sum;
    648 
    649   }
    650 
    651   /*
    652    * @brief C custom defined QSUB8 for M3 and M0 processors
    653    */
    654   static __INLINE q31_t __QSUB8(
    655 				q31_t x,
    656 				q31_t y)
    657   {
    658 
    659     q31_t sum;
    660     q31_t r, s, t, u;
    661 
    662     r = (char) x;
    663     s = (char) y;
    664 
    665     r = __SSAT((r - s), 8);
    666     s = __SSAT(((q31_t) (((x << 16) >> 24) - ((y << 16) >> 24))), 8) << 8;
    667     t = __SSAT(((q31_t) (((x << 8) >> 24) - ((y << 8) >> 24))), 8) << 16;
    668     u = __SSAT(((q31_t) ((x >> 24) - (y >> 24))), 8) << 24;
    669 
    670     sum =
    671       (u & 0xFF000000) | (t & 0x00FF0000) | (s & 0x0000FF00) | (r & 0x000000FF);
    672 
    673     return sum;
    674   }
    675 
    676   /*
    677    * @brief C custom defined QADD16 for M3 and M0 processors
    678    */
    679 
    680   /*
    681    * @brief C custom defined QADD16 for M3 and M0 processors
    682    */
    683   static __INLINE q31_t __QADD16(
    684 				 q31_t x,
    685 				 q31_t y)
    686   {
    687 
    688     q31_t sum;
    689     q31_t r, s;
    690 
    691     r = (short) x;
    692     s = (short) y;
    693 
    694     r = __SSAT(r + s, 16);
    695     s = __SSAT(((q31_t) ((x >> 16) + (y >> 16))), 16) << 16;
    696 
    697     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
    698 
    699     return sum;
    700 
    701   }
    702 
    703   /*
    704    * @brief C custom defined SHADD16 for M3 and M0 processors
    705    */
    706   static __INLINE q31_t __SHADD16(
    707 				  q31_t x,
    708 				  q31_t y)
    709   {
    710 
    711     q31_t sum;
    712     q31_t r, s;
    713 
    714     r = (short) x;
    715     s = (short) y;
    716 
    717     r = ((r >> 1) + (s >> 1));
    718     s = ((q31_t) ((x >> 17) + (y >> 17))) << 16;
    719 
    720     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
    721 
    722     return sum;
    723 
    724   }
    725 
    726   /*
    727    * @brief C custom defined QSUB16 for M3 and M0 processors
    728    */
    729   static __INLINE q31_t __QSUB16(
    730 				 q31_t x,
    731 				 q31_t y)
    732   {
    733 
    734     q31_t sum;
    735     q31_t r, s;
    736 
    737     r = (short) x;
    738     s = (short) y;
    739 
    740     r = __SSAT(r - s, 16);
    741     s = __SSAT(((q31_t) ((x >> 16) - (y >> 16))), 16) << 16;
    742 
    743     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
    744 
    745     return sum;
    746   }
    747 
    748   /*
    749    * @brief C custom defined SHSUB16 for M3 and M0 processors
    750    */
    751   static __INLINE q31_t __SHSUB16(
    752 				  q31_t x,
    753 				  q31_t y)
    754   {
    755 
    756     q31_t diff;
    757     q31_t r, s;
    758 
    759     r = (short) x;
    760     s = (short) y;
    761 
    762     r = ((r >> 1) - (s >> 1));
    763     s = (((x >> 17) - (y >> 17)) << 16);
    764 
    765     diff = (s & 0xFFFF0000) | (r & 0x0000FFFF);
    766 
    767     return diff;
    768   }
    769 
    770   /*
    771    * @brief C custom defined QASX for M3 and M0 processors
    772    */
    773   static __INLINE q31_t __QASX(
    774 			       q31_t x,
    775 			       q31_t y)
    776   {
    777 
    778     q31_t sum = 0;
    779 
    780     sum = ((sum + clip_q31_to_q15((q31_t) ((short) (x >> 16) + (short) y))) << 16) +
    781       clip_q31_to_q15((q31_t) ((short) x - (short) (y >> 16)));
    782 
    783     return sum;
    784   }
    785 
    786   /*
    787    * @brief C custom defined SHASX for M3 and M0 processors
    788    */
    789   static __INLINE q31_t __SHASX(
    790 				q31_t x,
    791 				q31_t y)
    792   {
    793 
    794     q31_t sum;
    795     q31_t r, s;
    796 
    797     r = (short) x;
    798     s = (short) y;
    799 
    800     r = ((r >> 1) - (y >> 17));
    801     s = (((x >> 17) + (s >> 1)) << 16);
    802 
    803     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
    804 
    805     return sum;
    806   }
    807 
    808 
    809   /*
    810    * @brief C custom defined QSAX for M3 and M0 processors
    811    */
    812   static __INLINE q31_t __QSAX(
    813 			       q31_t x,
    814 			       q31_t y)
    815   {
    816 
    817     q31_t sum = 0;
    818 
    819     sum = ((sum + clip_q31_to_q15((q31_t) ((short) (x >> 16) - (short) y))) << 16) +
    820       clip_q31_to_q15((q31_t) ((short) x + (short) (y >> 16)));
    821 
    822     return sum;
    823   }
    824 
    825   /*
    826    * @brief C custom defined SHSAX for M3 and M0 processors
    827    */
    828   static __INLINE q31_t __SHSAX(
    829 				q31_t x,
    830 				q31_t y)
    831   {
    832 
    833     q31_t sum;
    834     q31_t r, s;
    835 
    836     r = (short) x;
    837     s = (short) y;
    838 
    839     r = ((r >> 1) + (y >> 17));
    840     s = (((x >> 17) - (s >> 1)) << 16);
    841 
    842     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
    843 
    844     return sum;
    845   }
    846 
    847   /*
    848    * @brief C custom defined SMUSDX for M3 and M0 processors
    849    */
    850   static __INLINE q31_t __SMUSDX(
    851 				 q31_t x,
    852 				 q31_t y)
    853   {
    854 
    855     return ((q31_t)(((short) x * (short) (y >> 16)) -
    856 		    ((short) (x >> 16) * (short) y)));
    857   }
    858 
    859   /*
    860    * @brief C custom defined SMUADX for M3 and M0 processors
    861    */
    862   static __INLINE q31_t __SMUADX(
    863 				 q31_t x,
    864 				 q31_t y)
    865   {
    866 
    867     return ((q31_t)(((short) x * (short) (y >> 16)) +
    868 		    ((short) (x >> 16) * (short) y)));
    869   }
    870 
    871   /*
    872    * @brief C custom defined QADD for M3 and M0 processors
    873    */
    874   static __INLINE q31_t __QADD(
    875 			       q31_t x,
    876 			       q31_t y)
    877   {
    878     return clip_q63_to_q31((q63_t) x + y);
    879   }
    880 
    881   /*
    882    * @brief C custom defined QSUB for M3 and M0 processors
    883    */
    884   static __INLINE q31_t __QSUB(
    885 			       q31_t x,
    886 			       q31_t y)
    887   {
    888     return clip_q63_to_q31((q63_t) x - y);
    889   }
    890 
    891   /*
    892    * @brief C custom defined SMLAD for M3 and M0 processors
    893    */
    894   static __INLINE q31_t __SMLAD(
    895 				q31_t x,
    896 				q31_t y,
    897 				q31_t sum)
    898   {
    899 
    900     return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
    901             ((short) x * (short) y));
    902   }
    903 
    904   /*
    905    * @brief C custom defined SMLADX for M3 and M0 processors
    906    */
    907   static __INLINE q31_t __SMLADX(
    908 				 q31_t x,
    909 				 q31_t y,
    910 				 q31_t sum)
    911   {
    912 
    913     return (sum + ((short) (x >> 16) * (short) (y)) +
    914             ((short) x * (short) (y >> 16)));
    915   }
    916 
    917   /*
    918    * @brief C custom defined SMLSDX for M3 and M0 processors
    919    */
    920   static __INLINE q31_t __SMLSDX(
    921 				 q31_t x,
    922 				 q31_t y,
    923 				 q31_t sum)
    924   {
    925 
    926     return (sum - ((short) (x >> 16) * (short) (y)) +
    927             ((short) x * (short) (y >> 16)));
    928   }
    929 
    930   /*
    931    * @brief C custom defined SMLALD for M3 and M0 processors
    932    */
    933   static __INLINE q63_t __SMLALD(
    934 				 q31_t x,
    935 				 q31_t y,
    936 				 q63_t sum)
    937   {
    938 
    939     return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
    940             ((short) x * (short) y));
    941   }
    942 
    943   /*
    944    * @brief C custom defined SMLALDX for M3 and M0 processors
    945    */
    946   static __INLINE q63_t __SMLALDX(
    947 				  q31_t x,
    948 				  q31_t y,
    949 				  q63_t sum)
    950   {
    951 
    952     return (sum + ((short) (x >> 16) * (short) y)) +
    953       ((short) x * (short) (y >> 16));
    954   }
    955 
    956   /*
    957    * @brief C custom defined SMUAD for M3 and M0 processors
    958    */
    959   static __INLINE q31_t __SMUAD(
    960 				q31_t x,
    961 				q31_t y)
    962   {
    963 
    964     return (((x >> 16) * (y >> 16)) +
    965             (((x << 16) >> 16) * ((y << 16) >> 16)));
    966   }
    967 
    968   /*
    969    * @brief C custom defined SMUSD for M3 and M0 processors
    970    */
    971   static __INLINE q31_t __SMUSD(
    972 				q31_t x,
    973 				q31_t y)
    974   {
    975 
    976     return (-((x >> 16) * (y >> 16)) +
    977             (((x << 16) >> 16) * ((y << 16) >> 16)));
    978   }
    979 
    980 
    981 
    982 
    983 #endif /* (ARM_MATH_CM3) || defined (ARM_MATH_CM0) */
    984 
    985 
    986   /**
    987    * @brief Instance structure for the Q7 FIR filter.
    988    */
    989   typedef struct
    990   {
    991     uint16_t numTaps;        /**< number of filter coefficients in the filter. */
    992     q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
    993     q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
    994   } arm_fir_instance_q7;
    995 
    996   /**
    997    * @brief Instance structure for the Q15 FIR filter.
    998    */
    999   typedef struct
   1000   {
   1001     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
   1002     q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   1003     q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
   1004   } arm_fir_instance_q15;
   1005 
   1006   /**
   1007    * @brief Instance structure for the Q31 FIR filter.
   1008    */
   1009   typedef struct
   1010   {
   1011     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
   1012     q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   1013     q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
   1014   } arm_fir_instance_q31;
   1015 
   1016   /**
   1017    * @brief Instance structure for the floating-point FIR filter.
   1018    */
   1019   typedef struct
   1020   {
   1021     uint16_t numTaps;     /**< number of filter coefficients in the filter. */
   1022     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   1023     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
   1024   } arm_fir_instance_f32;
   1025 
   1026 
   1027   /**
   1028    * @brief Processing function for the Q7 FIR filter.
   1029    * @param[in] *S points to an instance of the Q7 FIR filter structure.
   1030    * @param[in] *pSrc points to the block of input data.
   1031    * @param[out] *pDst points to the block of output data.
   1032    * @param[in] blockSize number of samples to process.
   1033    * @return none.
   1034    */
   1035   void arm_fir_q7(
   1036 		  const arm_fir_instance_q7 * S,
   1037 		   q7_t * pSrc,
   1038 		  q7_t * pDst,
   1039 		  uint32_t blockSize);
   1040 
   1041 
   1042   /**
   1043    * @brief  Initialization function for the Q7 FIR filter.
   1044    * @param[in,out] *S points to an instance of the Q7 FIR structure.
   1045    * @param[in] numTaps  Number of filter coefficients in the filter.
   1046    * @param[in] *pCoeffs points to the filter coefficients.
   1047    * @param[in] *pState points to the state buffer.
   1048    * @param[in] blockSize number of samples that are processed.
   1049    * @return none
   1050    */
   1051   void arm_fir_init_q7(
   1052 		       arm_fir_instance_q7 * S,
   1053 		       uint16_t numTaps,
   1054 		       q7_t * pCoeffs,
   1055 		       q7_t * pState,
   1056 		       uint32_t blockSize);
   1057 
   1058 
   1059   /**
   1060    * @brief Processing function for the Q15 FIR filter.
   1061    * @param[in] *S points to an instance of the Q15 FIR structure.
   1062    * @param[in] *pSrc points to the block of input data.
   1063    * @param[out] *pDst points to the block of output data.
   1064    * @param[in] blockSize number of samples to process.
   1065    * @return none.
   1066    */
   1067   void arm_fir_q15(
   1068 		   const arm_fir_instance_q15 * S,
   1069 		    q15_t * pSrc,
   1070 		   q15_t * pDst,
   1071 		   uint32_t blockSize);
   1072 
   1073   /**
   1074    * @brief Processing function for the fast Q15 FIR filter for Cortex-M3 and Cortex-M4.
   1075    * @param[in] *S points to an instance of the Q15 FIR filter structure.
   1076    * @param[in] *pSrc points to the block of input data.
   1077    * @param[out] *pDst points to the block of output data.
   1078    * @param[in] blockSize number of samples to process.
   1079    * @return none.
   1080    */
   1081   void arm_fir_fast_q15(
   1082 			const arm_fir_instance_q15 * S,
   1083 			 q15_t * pSrc,
   1084 			q15_t * pDst,
   1085 			uint32_t blockSize);
   1086 
   1087   /**
   1088    * @brief  Initialization function for the Q15 FIR filter.
   1089    * @param[in,out] *S points to an instance of the Q15 FIR filter structure.
   1090    * @param[in] numTaps  Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
   1091    * @param[in] *pCoeffs points to the filter coefficients.
   1092    * @param[in] *pState points to the state buffer.
   1093    * @param[in] blockSize number of samples that are processed at a time.
   1094    * @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
   1095    * <code>numTaps</code> is not a supported value.
   1096    */
   1097 
   1098        arm_status arm_fir_init_q15(
   1099 			      arm_fir_instance_q15 * S,
   1100 			      uint16_t numTaps,
   1101 			      q15_t * pCoeffs,
   1102 			      q15_t * pState,
   1103 			      uint32_t blockSize);
   1104 
   1105   /**
   1106    * @brief Processing function for the Q31 FIR filter.
   1107    * @param[in] *S points to an instance of the Q31 FIR filter structure.
   1108    * @param[in] *pSrc points to the block of input data.
   1109    * @param[out] *pDst points to the block of output data.
   1110    * @param[in] blockSize number of samples to process.
   1111    * @return none.
   1112    */
   1113   void arm_fir_q31(
   1114 		   const arm_fir_instance_q31 * S,
   1115 		    q31_t * pSrc,
   1116 		   q31_t * pDst,
   1117 		   uint32_t blockSize);
   1118 
   1119   /**
   1120    * @brief Processing function for the fast Q31 FIR filter for Cortex-M3 and Cortex-M4.
   1121    * @param[in] *S points to an instance of the Q31 FIR structure.
   1122    * @param[in] *pSrc points to the block of input data.
   1123    * @param[out] *pDst points to the block of output data.
   1124    * @param[in] blockSize number of samples to process.
   1125    * @return none.
   1126    */
   1127   void arm_fir_fast_q31(
   1128 			const arm_fir_instance_q31 * S,
   1129 			 q31_t * pSrc,
   1130 			q31_t * pDst,
   1131 			uint32_t blockSize);
   1132 
   1133   /**
   1134    * @brief  Initialization function for the Q31 FIR filter.
   1135    * @param[in,out] *S points to an instance of the Q31 FIR structure.
   1136    * @param[in] 	numTaps  Number of filter coefficients in the filter.
   1137    * @param[in] 	*pCoeffs points to the filter coefficients.
   1138    * @param[in] 	*pState points to the state buffer.
   1139    * @param[in] 	blockSize number of samples that are processed at a time.
   1140    * @return 		none.
   1141    */
   1142   void arm_fir_init_q31(
   1143 			arm_fir_instance_q31 * S,
   1144 			uint16_t numTaps,
   1145 			q31_t * pCoeffs,
   1146 			q31_t * pState,
   1147 			uint32_t blockSize);
   1148 
   1149   /**
   1150    * @brief Processing function for the floating-point FIR filter.
   1151    * @param[in] *S points to an instance of the floating-point FIR structure.
   1152    * @param[in] *pSrc points to the block of input data.
   1153    * @param[out] *pDst points to the block of output data.
   1154    * @param[in] blockSize number of samples to process.
   1155    * @return none.
   1156    */
   1157   void arm_fir_f32(
   1158 		   const arm_fir_instance_f32 * S,
   1159 		    float32_t * pSrc,
   1160 		   float32_t * pDst,
   1161 		   uint32_t blockSize);
   1162 
   1163   /**
   1164    * @brief  Initialization function for the floating-point FIR filter.
   1165    * @param[in,out] *S points to an instance of the floating-point FIR filter structure.
   1166    * @param[in] 	numTaps  Number of filter coefficients in the filter.
   1167    * @param[in] 	*pCoeffs points to the filter coefficients.
   1168    * @param[in] 	*pState points to the state buffer.
   1169    * @param[in] 	blockSize number of samples that are processed at a time.
   1170    * @return    	none.
   1171    */
   1172   void arm_fir_init_f32(
   1173 			arm_fir_instance_f32 * S,
   1174 			uint16_t numTaps,
   1175 			float32_t * pCoeffs,
   1176 			float32_t * pState,
   1177 			uint32_t blockSize);
   1178 
   1179 
   1180   /**
   1181    * @brief Instance structure for the Q15 Biquad cascade filter.
   1182    */
   1183   typedef struct
   1184   {
   1185     int8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
   1186     q15_t *pState;            /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
   1187     q15_t *pCoeffs;           /**< Points to the array of coefficients.  The array is of length 5*numStages. */
   1188     int8_t postShift;         /**< Additional shift, in bits, applied to each output sample. */
   1189 
   1190   } arm_biquad_casd_df1_inst_q15;
   1191 
   1192 
   1193   /**
   1194    * @brief Instance structure for the Q31 Biquad cascade filter.
   1195    */
   1196   typedef struct
   1197   {
   1198     uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
   1199     q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
   1200     q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
   1201     uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
   1202 
   1203   } arm_biquad_casd_df1_inst_q31;
   1204 
   1205   /**
   1206    * @brief Instance structure for the floating-point Biquad cascade filter.
   1207    */
   1208   typedef struct
   1209   {
   1210     uint32_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
   1211     float32_t *pState;          /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
   1212     float32_t *pCoeffs;         /**< Points to the array of coefficients.  The array is of length 5*numStages. */
   1213 
   1214 
   1215   } arm_biquad_casd_df1_inst_f32;
   1216 
   1217 
   1218 
   1219   /**
   1220    * @brief Processing function for the Q15 Biquad cascade filter.
   1221    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
   1222    * @param[in]  *pSrc points to the block of input data.
   1223    * @param[out] *pDst points to the block of output data.
   1224    * @param[in]  blockSize number of samples to process.
   1225    * @return     none.
   1226    */
   1227 
   1228   void arm_biquad_cascade_df1_q15(
   1229 				  const arm_biquad_casd_df1_inst_q15 * S,
   1230 				   q15_t * pSrc,
   1231 				  q15_t * pDst,
   1232 				  uint32_t blockSize);
   1233 
   1234   /**
   1235    * @brief  Initialization function for the Q15 Biquad cascade filter.
   1236    * @param[in,out] *S           points to an instance of the Q15 Biquad cascade structure.
   1237    * @param[in]     numStages    number of 2nd order stages in the filter.
   1238    * @param[in]     *pCoeffs     points to the filter coefficients.
   1239    * @param[in]     *pState      points to the state buffer.
   1240    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
   1241    * @return        none
   1242    */
   1243 
   1244   void arm_biquad_cascade_df1_init_q15(
   1245 				       arm_biquad_casd_df1_inst_q15 * S,
   1246 				       uint8_t numStages,
   1247 				       q15_t * pCoeffs,
   1248 				       q15_t * pState,
   1249 				       int8_t postShift);
   1250 
   1251 
   1252   /**
   1253    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
   1254    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
   1255    * @param[in]  *pSrc points to the block of input data.
   1256    * @param[out] *pDst points to the block of output data.
   1257    * @param[in]  blockSize number of samples to process.
   1258    * @return     none.
   1259    */
   1260 
   1261   void arm_biquad_cascade_df1_fast_q15(
   1262 				       const arm_biquad_casd_df1_inst_q15 * S,
   1263 				        q15_t * pSrc,
   1264 				       q15_t * pDst,
   1265 				       uint32_t blockSize);
   1266 
   1267 
   1268   /**
   1269    * @brief Processing function for the Q31 Biquad cascade filter
   1270    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
   1271    * @param[in]  *pSrc      points to the block of input data.
   1272    * @param[out] *pDst      points to the block of output data.
   1273    * @param[in]  blockSize  number of samples to process.
   1274    * @return     none.
   1275    */
   1276 
   1277   void arm_biquad_cascade_df1_q31(
   1278 				  const arm_biquad_casd_df1_inst_q31 * S,
   1279 				   q31_t * pSrc,
   1280 				  q31_t * pDst,
   1281 				  uint32_t blockSize);
   1282 
   1283   /**
   1284    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
   1285    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
   1286    * @param[in]  *pSrc      points to the block of input data.
   1287    * @param[out] *pDst      points to the block of output data.
   1288    * @param[in]  blockSize  number of samples to process.
   1289    * @return     none.
   1290    */
   1291 
   1292   void arm_biquad_cascade_df1_fast_q31(
   1293 				       const arm_biquad_casd_df1_inst_q31 * S,
   1294 				        q31_t * pSrc,
   1295 				       q31_t * pDst,
   1296 				       uint32_t blockSize);
   1297 
   1298   /**
   1299    * @brief  Initialization function for the Q31 Biquad cascade filter.
   1300    * @param[in,out] *S           points to an instance of the Q31 Biquad cascade structure.
   1301    * @param[in]     numStages      number of 2nd order stages in the filter.
   1302    * @param[in]     *pCoeffs     points to the filter coefficients.
   1303    * @param[in]     *pState      points to the state buffer.
   1304    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
   1305    * @return        none
   1306    */
   1307 
   1308   void arm_biquad_cascade_df1_init_q31(
   1309 				       arm_biquad_casd_df1_inst_q31 * S,
   1310 				       uint8_t numStages,
   1311 				       q31_t * pCoeffs,
   1312 				       q31_t * pState,
   1313 				       int8_t postShift);
   1314 
   1315   /**
   1316    * @brief Processing function for the floating-point Biquad cascade filter.
   1317    * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.
   1318    * @param[in]  *pSrc      points to the block of input data.
   1319    * @param[out] *pDst      points to the block of output data.
   1320    * @param[in]  blockSize  number of samples to process.
   1321    * @return     none.
   1322    */
   1323 
   1324   void arm_biquad_cascade_df1_f32(
   1325 				  const arm_biquad_casd_df1_inst_f32 * S,
   1326 				   float32_t * pSrc,
   1327 				  float32_t * pDst,
   1328 				  uint32_t blockSize);
   1329 
   1330   /**
   1331    * @brief  Initialization function for the floating-point Biquad cascade filter.
   1332    * @param[in,out] *S           points to an instance of the floating-point Biquad cascade structure.
   1333    * @param[in]     numStages    number of 2nd order stages in the filter.
   1334    * @param[in]     *pCoeffs     points to the filter coefficients.
   1335    * @param[in]     *pState      points to the state buffer.
   1336    * @return        none
   1337    */
   1338 
   1339   void arm_biquad_cascade_df1_init_f32(
   1340 				       arm_biquad_casd_df1_inst_f32 * S,
   1341 				       uint8_t numStages,
   1342 				       float32_t * pCoeffs,
   1343 				       float32_t * pState);
   1344 
   1345 
   1346   /**
   1347    * @brief Instance structure for the floating-point matrix structure.
   1348    */
   1349 
   1350   typedef struct
   1351   {
   1352     uint16_t numRows;     /**< number of rows of the matrix.     */
   1353     uint16_t numCols;     /**< number of columns of the matrix.  */
   1354     float32_t *pData;     /**< points to the data of the matrix. */
   1355   } arm_matrix_instance_f32;
   1356 
   1357   /**
   1358    * @brief Instance structure for the Q15 matrix structure.
   1359    */
   1360 
   1361   typedef struct
   1362   {
   1363     uint16_t numRows;     /**< number of rows of the matrix.     */
   1364     uint16_t numCols;     /**< number of columns of the matrix.  */
   1365     q15_t *pData;         /**< points to the data of the matrix. */
   1366 
   1367   } arm_matrix_instance_q15;
   1368 
   1369   /**
   1370    * @brief Instance structure for the Q31 matrix structure.
   1371    */
   1372 
   1373   typedef struct
   1374   {
   1375     uint16_t numRows;     /**< number of rows of the matrix.     */
   1376     uint16_t numCols;     /**< number of columns of the matrix.  */
   1377     q31_t *pData;         /**< points to the data of the matrix. */
   1378 
   1379   } arm_matrix_instance_q31;
   1380 
   1381 
   1382 
   1383   /**
   1384    * @brief Floating-point matrix addition.
   1385    * @param[in]       *pSrcA points to the first input matrix structure
   1386    * @param[in]       *pSrcB points to the second input matrix structure
   1387    * @param[out]      *pDst points to output matrix structure
   1388    * @return     The function returns either
   1389    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1390    */
   1391 
   1392   arm_status arm_mat_add_f32(
   1393 			     const arm_matrix_instance_f32 * pSrcA,
   1394 			     const arm_matrix_instance_f32 * pSrcB,
   1395 			     arm_matrix_instance_f32 * pDst);
   1396 
   1397   /**
   1398    * @brief Q15 matrix addition.
   1399    * @param[in]       *pSrcA points to the first input matrix structure
   1400    * @param[in]       *pSrcB points to the second input matrix structure
   1401    * @param[out]      *pDst points to output matrix structure
   1402    * @return     The function returns either
   1403    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1404    */
   1405 
   1406   arm_status arm_mat_add_q15(
   1407 			     const arm_matrix_instance_q15 * pSrcA,
   1408 			     const arm_matrix_instance_q15 * pSrcB,
   1409 			     arm_matrix_instance_q15 * pDst);
   1410 
   1411   /**
   1412    * @brief Q31 matrix addition.
   1413    * @param[in]       *pSrcA points to the first input matrix structure
   1414    * @param[in]       *pSrcB points to the second input matrix structure
   1415    * @param[out]      *pDst points to output matrix structure
   1416    * @return     The function returns either
   1417    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1418    */
   1419 
   1420   arm_status arm_mat_add_q31(
   1421 			     const arm_matrix_instance_q31 * pSrcA,
   1422 			     const arm_matrix_instance_q31 * pSrcB,
   1423 			     arm_matrix_instance_q31 * pDst);
   1424 
   1425 
   1426   /**
   1427    * @brief Floating-point matrix transpose.
   1428    * @param[in]  *pSrc points to the input matrix
   1429    * @param[out] *pDst points to the output matrix
   1430    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
   1431    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1432    */
   1433 
   1434   arm_status arm_mat_trans_f32(
   1435 			       const arm_matrix_instance_f32 * pSrc,
   1436 			       arm_matrix_instance_f32 * pDst);
   1437 
   1438 
   1439   /**
   1440    * @brief Q15 matrix transpose.
   1441    * @param[in]  *pSrc points to the input matrix
   1442    * @param[out] *pDst points to the output matrix
   1443    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
   1444    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1445    */
   1446 
   1447   arm_status arm_mat_trans_q15(
   1448 			       const arm_matrix_instance_q15 * pSrc,
   1449 			       arm_matrix_instance_q15 * pDst);
   1450 
   1451   /**
   1452    * @brief Q31 matrix transpose.
   1453    * @param[in]  *pSrc points to the input matrix
   1454    * @param[out] *pDst points to the output matrix
   1455    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
   1456    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1457    */
   1458 
   1459   arm_status arm_mat_trans_q31(
   1460 			       const arm_matrix_instance_q31 * pSrc,
   1461 			       arm_matrix_instance_q31 * pDst);
   1462 
   1463 
   1464   /**
   1465    * @brief Floating-point matrix multiplication
   1466    * @param[in]       *pSrcA points to the first input matrix structure
   1467    * @param[in]       *pSrcB points to the second input matrix structure
   1468    * @param[out]      *pDst points to output matrix structure
   1469    * @return     The function returns either
   1470    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1471    */
   1472 
   1473   arm_status arm_mat_mult_f32(
   1474 			      const arm_matrix_instance_f32 * pSrcA,
   1475 			      const arm_matrix_instance_f32 * pSrcB,
   1476 			      arm_matrix_instance_f32 * pDst);
   1477 
   1478   /**
   1479    * @brief Q15 matrix multiplication
   1480    * @param[in]       *pSrcA points to the first input matrix structure
   1481    * @param[in]       *pSrcB points to the second input matrix structure
   1482    * @param[out]      *pDst points to output matrix structure
   1483    * @return     The function returns either
   1484    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1485    */
   1486 
   1487   arm_status arm_mat_mult_q15(
   1488 			      const arm_matrix_instance_q15 * pSrcA,
   1489 			      const arm_matrix_instance_q15 * pSrcB,
   1490 			      arm_matrix_instance_q15 * pDst,
   1491 			      q15_t * pState);
   1492 
   1493   /**
   1494    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
   1495    * @param[in]       *pSrcA  points to the first input matrix structure
   1496    * @param[in]       *pSrcB  points to the second input matrix structure
   1497    * @param[out]      *pDst   points to output matrix structure
   1498    * @param[in]		  *pState points to the array for storing intermediate results
   1499    * @return     The function returns either
   1500    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1501    */
   1502 
   1503   arm_status arm_mat_mult_fast_q15(
   1504 				   const arm_matrix_instance_q15 * pSrcA,
   1505 				   const arm_matrix_instance_q15 * pSrcB,
   1506 				   arm_matrix_instance_q15 * pDst,
   1507 				   q15_t * pState);
   1508 
   1509   /**
   1510    * @brief Q31 matrix multiplication
   1511    * @param[in]       *pSrcA points to the first input matrix structure
   1512    * @param[in]       *pSrcB points to the second input matrix structure
   1513    * @param[out]      *pDst points to output matrix structure
   1514    * @return     The function returns either
   1515    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1516    */
   1517 
   1518   arm_status arm_mat_mult_q31(
   1519 			      const arm_matrix_instance_q31 * pSrcA,
   1520 			      const arm_matrix_instance_q31 * pSrcB,
   1521 			      arm_matrix_instance_q31 * pDst);
   1522 
   1523   /**
   1524    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
   1525    * @param[in]       *pSrcA points to the first input matrix structure
   1526    * @param[in]       *pSrcB points to the second input matrix structure
   1527    * @param[out]      *pDst points to output matrix structure
   1528    * @return     The function returns either
   1529    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1530    */
   1531 
   1532   arm_status arm_mat_mult_fast_q31(
   1533 				   const arm_matrix_instance_q31 * pSrcA,
   1534 				   const arm_matrix_instance_q31 * pSrcB,
   1535 				   arm_matrix_instance_q31 * pDst);
   1536 
   1537 
   1538   /**
   1539    * @brief Floating-point matrix subtraction
   1540    * @param[in]       *pSrcA points to the first input matrix structure
   1541    * @param[in]       *pSrcB points to the second input matrix structure
   1542    * @param[out]      *pDst points to output matrix structure
   1543    * @return     The function returns either
   1544    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1545    */
   1546 
   1547   arm_status arm_mat_sub_f32(
   1548 			     const arm_matrix_instance_f32 * pSrcA,
   1549 			     const arm_matrix_instance_f32 * pSrcB,
   1550 			     arm_matrix_instance_f32 * pDst);
   1551 
   1552   /**
   1553    * @brief Q15 matrix subtraction
   1554    * @param[in]       *pSrcA points to the first input matrix structure
   1555    * @param[in]       *pSrcB points to the second input matrix structure
   1556    * @param[out]      *pDst points to output matrix structure
   1557    * @return     The function returns either
   1558    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1559    */
   1560 
   1561   arm_status arm_mat_sub_q15(
   1562 			     const arm_matrix_instance_q15 * pSrcA,
   1563 			     const arm_matrix_instance_q15 * pSrcB,
   1564 			     arm_matrix_instance_q15 * pDst);
   1565 
   1566   /**
   1567    * @brief Q31 matrix subtraction
   1568    * @param[in]       *pSrcA points to the first input matrix structure
   1569    * @param[in]       *pSrcB points to the second input matrix structure
   1570    * @param[out]      *pDst points to output matrix structure
   1571    * @return     The function returns either
   1572    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1573    */
   1574 
   1575   arm_status arm_mat_sub_q31(
   1576 			     const arm_matrix_instance_q31 * pSrcA,
   1577 			     const arm_matrix_instance_q31 * pSrcB,
   1578 			     arm_matrix_instance_q31 * pDst);
   1579 
   1580   /**
   1581    * @brief Floating-point matrix scaling.
   1582    * @param[in]  *pSrc points to the input matrix
   1583    * @param[in]  scale scale factor
   1584    * @param[out] *pDst points to the output matrix
   1585    * @return     The function returns either
   1586    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1587    */
   1588 
   1589   arm_status arm_mat_scale_f32(
   1590 			       const arm_matrix_instance_f32 * pSrc,
   1591 			       float32_t scale,
   1592 			       arm_matrix_instance_f32 * pDst);
   1593 
   1594   /**
   1595    * @brief Q15 matrix scaling.
   1596    * @param[in]       *pSrc points to input matrix
   1597    * @param[in]       scaleFract fractional portion of the scale factor
   1598    * @param[in]       shift number of bits to shift the result by
   1599    * @param[out]      *pDst points to output matrix
   1600    * @return     The function returns either
   1601    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1602    */
   1603 
   1604   arm_status arm_mat_scale_q15(
   1605 			       const arm_matrix_instance_q15 * pSrc,
   1606 			       q15_t scaleFract,
   1607 			       int32_t shift,
   1608 			       arm_matrix_instance_q15 * pDst);
   1609 
   1610   /**
   1611    * @brief Q31 matrix scaling.
   1612    * @param[in]       *pSrc points to input matrix
   1613    * @param[in]       scaleFract fractional portion of the scale factor
   1614    * @param[in]       shift number of bits to shift the result by
   1615    * @param[out]      *pDst points to output matrix structure
   1616    * @return     The function returns either
   1617    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   1618    */
   1619 
   1620   arm_status arm_mat_scale_q31(
   1621 			       const arm_matrix_instance_q31 * pSrc,
   1622 			       q31_t scaleFract,
   1623 			       int32_t shift,
   1624 			       arm_matrix_instance_q31 * pDst);
   1625 
   1626 
   1627   /**
   1628    * @brief  Q31 matrix initialization.
   1629    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
   1630    * @param[in]     nRows          number of rows in the matrix.
   1631    * @param[in]     nColumns       number of columns in the matrix.
   1632    * @param[in]     *pData	       points to the matrix data array.
   1633    * @return        none
   1634    */
   1635 
   1636   void arm_mat_init_q31(
   1637 			arm_matrix_instance_q31 * S,
   1638 			uint16_t nRows,
   1639 			uint16_t nColumns,
   1640 			q31_t   *pData);
   1641 
   1642   /**
   1643    * @brief  Q15 matrix initialization.
   1644    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
   1645    * @param[in]     nRows          number of rows in the matrix.
   1646    * @param[in]     nColumns       number of columns in the matrix.
   1647    * @param[in]     *pData	       points to the matrix data array.
   1648    * @return        none
   1649    */
   1650 
   1651   void arm_mat_init_q15(
   1652 			arm_matrix_instance_q15 * S,
   1653 			uint16_t nRows,
   1654 			uint16_t nColumns,
   1655 			q15_t    *pData);
   1656 
   1657   /**
   1658    * @brief  Floating-point matrix initialization.
   1659    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
   1660    * @param[in]     nRows          number of rows in the matrix.
   1661    * @param[in]     nColumns       number of columns in the matrix.
   1662    * @param[in]     *pData	       points to the matrix data array.
   1663    * @return        none
   1664    */
   1665 
   1666   void arm_mat_init_f32(
   1667 			arm_matrix_instance_f32 * S,
   1668 			uint16_t nRows,
   1669 			uint16_t nColumns,
   1670 			float32_t   *pData);
   1671 
   1672 
   1673 
   1674   /**
   1675    * @brief Instance structure for the Q15 PID Control.
   1676    */
   1677   typedef struct
   1678   {
   1679     q15_t A0; 	 /**< The derived gain, A0 = Kp + Ki + Kd . */
   1680 	#ifdef ARM_MATH_CM0
   1681 	q15_t A1;
   1682 	q15_t A2;
   1683 	#else
   1684     q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
   1685 	#endif
   1686     q15_t state[3];       /**< The state array of length 3. */
   1687     q15_t Kp;           /**< The proportional gain. */
   1688     q15_t Ki;           /**< The integral gain. */
   1689     q15_t Kd;           /**< The derivative gain. */
   1690   } arm_pid_instance_q15;
   1691 
   1692   /**
   1693    * @brief Instance structure for the Q31 PID Control.
   1694    */
   1695   typedef struct
   1696   {
   1697     q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
   1698     q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
   1699     q31_t A2;            /**< The derived gain, A2 = Kd . */
   1700     q31_t state[3];      /**< The state array of length 3. */
   1701     q31_t Kp;            /**< The proportional gain. */
   1702     q31_t Ki;            /**< The integral gain. */
   1703     q31_t Kd;            /**< The derivative gain. */
   1704 
   1705   } arm_pid_instance_q31;
   1706 
   1707   /**
   1708    * @brief Instance structure for the floating-point PID Control.
   1709    */
   1710   typedef struct
   1711   {
   1712     float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
   1713     float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
   1714     float32_t A2;          /**< The derived gain, A2 = Kd . */
   1715     float32_t state[3];    /**< The state array of length 3. */
   1716     float32_t Kp;               /**< The proportional gain. */
   1717     float32_t Ki;               /**< The integral gain. */
   1718     float32_t Kd;               /**< The derivative gain. */
   1719   } arm_pid_instance_f32;
   1720 
   1721 
   1722 
   1723   /**
   1724    * @brief  Initialization function for the floating-point PID Control.
   1725    * @param[in,out] *S      points to an instance of the PID structure.
   1726    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
   1727    * @return none.
   1728    */
   1729   void arm_pid_init_f32(
   1730 			arm_pid_instance_f32 * S,
   1731 			int32_t resetStateFlag);
   1732 
   1733   /**
   1734    * @brief  Reset function for the floating-point PID Control.
   1735    * @param[in,out] *S is an instance of the floating-point PID Control structure
   1736    * @return none
   1737    */
   1738   void arm_pid_reset_f32(
   1739 			 arm_pid_instance_f32 * S);
   1740 
   1741 
   1742   /**
   1743    * @brief  Initialization function for the Q31 PID Control.
   1744    * @param[in,out] *S points to an instance of the Q15 PID structure.
   1745    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
   1746    * @return none.
   1747    */
   1748   void arm_pid_init_q31(
   1749 			arm_pid_instance_q31 * S,
   1750 			int32_t resetStateFlag);
   1751 
   1752 
   1753   /**
   1754    * @brief  Reset function for the Q31 PID Control.
   1755    * @param[in,out] *S points to an instance of the Q31 PID Control structure
   1756    * @return none
   1757    */
   1758 
   1759   void arm_pid_reset_q31(
   1760 			 arm_pid_instance_q31 * S);
   1761 
   1762   /**
   1763    * @brief  Initialization function for the Q15 PID Control.
   1764    * @param[in,out] *S points to an instance of the Q15 PID structure.
   1765    * @param[in] resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
   1766    * @return none.
   1767    */
   1768   void arm_pid_init_q15(
   1769 			arm_pid_instance_q15 * S,
   1770 			int32_t resetStateFlag);
   1771 
   1772   /**
   1773    * @brief  Reset function for the Q15 PID Control.
   1774    * @param[in,out] *S points to an instance of the q15 PID Control structure
   1775    * @return none
   1776    */
   1777   void arm_pid_reset_q15(
   1778 			 arm_pid_instance_q15 * S);
   1779 
   1780 
   1781   /**
   1782    * @brief Instance structure for the floating-point Linear Interpolate function.
   1783    */
   1784   typedef struct
   1785   {
   1786     uint32_t nValues;
   1787     float32_t x1;
   1788     float32_t xSpacing;
   1789     float32_t *pYData;          /**< pointer to the table of Y values */
   1790   } arm_linear_interp_instance_f32;
   1791 
   1792   /**
   1793    * @brief Instance structure for the floating-point bilinear interpolation function.
   1794    */
   1795 
   1796   typedef struct
   1797   {
   1798     uint16_t numRows;	/**< number of rows in the data table. */
   1799     uint16_t numCols;	/**< number of columns in the data table. */
   1800     float32_t *pData;	/**< points to the data table. */
   1801   } arm_bilinear_interp_instance_f32;
   1802 
   1803    /**
   1804    * @brief Instance structure for the Q31 bilinear interpolation function.
   1805    */
   1806 
   1807   typedef struct
   1808   {
   1809     uint16_t numRows;	/**< number of rows in the data table. */
   1810     uint16_t numCols;	/**< number of columns in the data table. */
   1811     q31_t *pData;	/**< points to the data table. */
   1812   } arm_bilinear_interp_instance_q31;
   1813 
   1814    /**
   1815    * @brief Instance structure for the Q15 bilinear interpolation function.
   1816    */
   1817 
   1818   typedef struct
   1819   {
   1820     uint16_t numRows;	/**< number of rows in the data table. */
   1821     uint16_t numCols;	/**< number of columns in the data table. */
   1822     q15_t *pData;	/**< points to the data table. */
   1823   } arm_bilinear_interp_instance_q15;
   1824 
   1825    /**
   1826    * @brief Instance structure for the Q15 bilinear interpolation function.
   1827    */
   1828 
   1829   typedef struct
   1830   {
   1831     uint16_t numRows; 	/**< number of rows in the data table. */
   1832     uint16_t numCols;	/**< number of columns in the data table. */
   1833     q7_t *pData;		/**< points to the data table. */
   1834   } arm_bilinear_interp_instance_q7;
   1835 
   1836 
   1837   /**
   1838    * @brief Q7 vector multiplication.
   1839    * @param[in]       *pSrcA points to the first input vector
   1840    * @param[in]       *pSrcB points to the second input vector
   1841    * @param[out]      *pDst  points to the output vector
   1842    * @param[in]       blockSize number of samples in each vector
   1843    * @return none.
   1844    */
   1845 
   1846   void arm_mult_q7(
   1847 		    q7_t * pSrcA,
   1848 		    q7_t * pSrcB,
   1849 		   q7_t * pDst,
   1850 		   uint32_t blockSize);
   1851 
   1852   /**
   1853    * @brief Q15 vector multiplication.
   1854    * @param[in]       *pSrcA points to the first input vector
   1855    * @param[in]       *pSrcB points to the second input vector
   1856    * @param[out]      *pDst  points to the output vector
   1857    * @param[in]       blockSize number of samples in each vector
   1858    * @return none.
   1859    */
   1860 
   1861   void arm_mult_q15(
   1862 		     q15_t * pSrcA,
   1863 		     q15_t * pSrcB,
   1864 		    q15_t * pDst,
   1865 		    uint32_t blockSize);
   1866 
   1867   /**
   1868    * @brief Q31 vector multiplication.
   1869    * @param[in]       *pSrcA points to the first input vector
   1870    * @param[in]       *pSrcB points to the second input vector
   1871    * @param[out]      *pDst points to the output vector
   1872    * @param[in]       blockSize number of samples in each vector
   1873    * @return none.
   1874    */
   1875 
   1876   void arm_mult_q31(
   1877 		     q31_t * pSrcA,
   1878 		     q31_t * pSrcB,
   1879 		    q31_t * pDst,
   1880 		    uint32_t blockSize);
   1881 
   1882   /**
   1883    * @brief Floating-point vector multiplication.
   1884    * @param[in]       *pSrcA points to the first input vector
   1885    * @param[in]       *pSrcB points to the second input vector
   1886    * @param[out]      *pDst points to the output vector
   1887    * @param[in]       blockSize number of samples in each vector
   1888    * @return none.
   1889    */
   1890 
   1891   void arm_mult_f32(
   1892 		     float32_t * pSrcA,
   1893 		     float32_t * pSrcB,
   1894 		    float32_t * pDst,
   1895 		    uint32_t blockSize);
   1896 
   1897 
   1898   /**
   1899    * @brief Instance structure for the Q15 CFFT/CIFFT function.
   1900    */
   1901 
   1902   typedef struct
   1903   {
   1904     uint16_t  fftLen;                /**< length of the FFT. */
   1905     uint8_t   ifftFlag;              /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
   1906     uint8_t   bitReverseFlag;        /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
   1907     q15_t     *pTwiddle;             /**< points to the twiddle factor table. */
   1908     uint16_t  *pBitRevTable;         /**< points to the bit reversal table. */
   1909     uint16_t  twidCoefModifier;      /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
   1910     uint16_t  bitRevFactor;          /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
   1911   } arm_cfft_radix4_instance_q15;
   1912 
   1913   /**
   1914    * @brief Instance structure for the Q31 CFFT/CIFFT function.
   1915    */
   1916 
   1917   typedef struct
   1918   {
   1919     uint16_t    fftLen;              /**< length of the FFT. */
   1920     uint8_t     ifftFlag;            /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
   1921     uint8_t     bitReverseFlag;      /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
   1922     q31_t       *pTwiddle;           /**< points to the twiddle factor table. */
   1923     uint16_t    *pBitRevTable;       /**< points to the bit reversal table. */
   1924     uint16_t    twidCoefModifier;    /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
   1925     uint16_t    bitRevFactor;        /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
   1926   } arm_cfft_radix4_instance_q31;
   1927 
   1928   /**
   1929    * @brief Instance structure for the floating-point CFFT/CIFFT function.
   1930    */
   1931 
   1932   typedef struct
   1933   {
   1934     uint16_t     fftLen;               /**< length of the FFT. */
   1935     uint8_t      ifftFlag;             /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
   1936     uint8_t      bitReverseFlag;       /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
   1937     float32_t    *pTwiddle;            /**< points to the twiddle factor table. */
   1938     uint16_t     *pBitRevTable;        /**< points to the bit reversal table. */
   1939     uint16_t     twidCoefModifier;     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
   1940     uint16_t     bitRevFactor;         /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
   1941 	float32_t    onebyfftLen;          /**< value of 1/fftLen. */
   1942   } arm_cfft_radix4_instance_f32;
   1943 
   1944   /**
   1945    * @brief Processing function for the Q15 CFFT/CIFFT.
   1946    * @param[in]      *S    points to an instance of the Q15 CFFT/CIFFT structure.
   1947    * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
   1948    * @return none.
   1949    */
   1950 
   1951   void arm_cfft_radix4_q15(
   1952 			   const arm_cfft_radix4_instance_q15 * S,
   1953 			   q15_t * pSrc);
   1954 
   1955   /**
   1956    * @brief Initialization function for the Q15 CFFT/CIFFT.
   1957    * @param[in,out] *S             points to an instance of the Q15 CFFT/CIFFT structure.
   1958    * @param[in]     fftLen         length of the FFT.
   1959    * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
   1960    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
   1961    * @return        arm_status     function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
   1962    */
   1963 
   1964   arm_status arm_cfft_radix4_init_q15(
   1965 				      arm_cfft_radix4_instance_q15 * S,
   1966 				      uint16_t fftLen,
   1967 				      uint8_t ifftFlag,
   1968 				      uint8_t bitReverseFlag);
   1969 
   1970   /**
   1971    * @brief Processing function for the Q31 CFFT/CIFFT.
   1972    * @param[in]      *S    points to an instance of the Q31 CFFT/CIFFT structure.
   1973    * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
   1974    * @return none.
   1975    */
   1976 
   1977   void arm_cfft_radix4_q31(
   1978 			   const arm_cfft_radix4_instance_q31 * S,
   1979 			   q31_t * pSrc);
   1980 
   1981   /**
   1982    * @brief  Initialization function for the Q31 CFFT/CIFFT.
   1983    * @param[in,out] *S             points to an instance of the Q31 CFFT/CIFFT structure.
   1984    * @param[in]     fftLen         length of the FFT.
   1985    * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
   1986    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
   1987    * @return        arm_status     function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
   1988    */
   1989 
   1990   arm_status arm_cfft_radix4_init_q31(
   1991 				      arm_cfft_radix4_instance_q31 * S,
   1992 				      uint16_t fftLen,
   1993 				      uint8_t ifftFlag,
   1994 				      uint8_t bitReverseFlag);
   1995 
   1996   /**
   1997    * @brief Processing function for the floating-point CFFT/CIFFT.
   1998    * @param[in]      *S    points to an instance of the floating-point CFFT/CIFFT structure.
   1999    * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
   2000    * @return none.
   2001    */
   2002 
   2003   void arm_cfft_radix4_f32(
   2004 			   const arm_cfft_radix4_instance_f32 * S,
   2005 			   float32_t * pSrc);
   2006 
   2007   /**
   2008    * @brief  Initialization function for the floating-point CFFT/CIFFT.
   2009    * @param[in,out] *S             points to an instance of the floating-point CFFT/CIFFT structure.
   2010    * @param[in]     fftLen         length of the FFT.
   2011    * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
   2012    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
   2013    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
   2014    */
   2015 
   2016   arm_status arm_cfft_radix4_init_f32(
   2017 				      arm_cfft_radix4_instance_f32 * S,
   2018 				      uint16_t fftLen,
   2019 				      uint8_t ifftFlag,
   2020 				      uint8_t bitReverseFlag);
   2021 
   2022 
   2023 
   2024   /*----------------------------------------------------------------------
   2025    *		Internal functions prototypes FFT function
   2026    ----------------------------------------------------------------------*/
   2027 
   2028   /**
   2029    * @brief  Core function for the floating-point CFFT butterfly process.
   2030    * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
   2031    * @param[in]      fftLen           length of the FFT.
   2032    * @param[in]      *pCoef           points to the twiddle coefficient buffer.
   2033    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
   2034    * @return none.
   2035    */
   2036 
   2037   void arm_radix4_butterfly_f32(
   2038 				float32_t * pSrc,
   2039 				uint16_t fftLen,
   2040 				float32_t * pCoef,
   2041 				uint16_t twidCoefModifier);
   2042 
   2043   /**
   2044    * @brief  Core function for the floating-point CIFFT butterfly process.
   2045    * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
   2046    * @param[in]      fftLen           length of the FFT.
   2047    * @param[in]      *pCoef           points to twiddle coefficient buffer.
   2048    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
   2049    * @param[in]      onebyfftLen      value of 1/fftLen.
   2050    * @return none.
   2051    */
   2052 
   2053   void arm_radix4_butterfly_inverse_f32(
   2054 					float32_t * pSrc,
   2055 					uint16_t fftLen,
   2056 					float32_t * pCoef,
   2057 					uint16_t twidCoefModifier,
   2058 					float32_t onebyfftLen);
   2059 
   2060   /**
   2061    * @brief  In-place bit reversal function.
   2062    * @param[in, out] *pSrc        points to the in-place buffer of floating-point data type.
   2063    * @param[in]      fftSize      length of the FFT.
   2064    * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table.
   2065    * @param[in]      *pBitRevTab  points to the bit reversal table.
   2066    * @return none.
   2067    */
   2068 
   2069   void arm_bitreversal_f32(
   2070 			   float32_t *pSrc,
   2071 			   uint16_t fftSize,
   2072 			   uint16_t bitRevFactor,
   2073 			   uint16_t *pBitRevTab);
   2074 
   2075   /**
   2076    * @brief  Core function for the Q31 CFFT butterfly process.
   2077    * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.
   2078    * @param[in]      fftLen           length of the FFT.
   2079    * @param[in]      *pCoef           points to twiddle coefficient buffer.
   2080    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
   2081    * @return none.
   2082    */
   2083 
   2084   void arm_radix4_butterfly_q31(
   2085 				q31_t *pSrc,
   2086 				uint32_t fftLen,
   2087 				q31_t *pCoef,
   2088 				uint32_t twidCoefModifier);
   2089 
   2090   /**
   2091    * @brief  Core function for the Q31 CIFFT butterfly process.
   2092    * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.
   2093    * @param[in]      fftLen           length of the FFT.
   2094    * @param[in]      *pCoef           points to twiddle coefficient buffer.
   2095    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
   2096    * @return none.
   2097    */
   2098 
   2099   void arm_radix4_butterfly_inverse_q31(
   2100 					q31_t * pSrc,
   2101 					uint32_t fftLen,
   2102 					q31_t * pCoef,
   2103 					uint32_t twidCoefModifier);
   2104 
   2105   /**
   2106    * @brief  In-place bit reversal function.
   2107    * @param[in, out] *pSrc        points to the in-place buffer of Q31 data type.
   2108    * @param[in]      fftLen       length of the FFT.
   2109    * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
   2110    * @param[in]      *pBitRevTab  points to bit reversal table.
   2111    * @return none.
   2112    */
   2113 
   2114   void arm_bitreversal_q31(
   2115 			   q31_t * pSrc,
   2116 			   uint32_t fftLen,
   2117 			   uint16_t bitRevFactor,
   2118 			   uint16_t *pBitRevTab);
   2119 
   2120   /**
   2121    * @brief  Core function for the Q15 CFFT butterfly process.
   2122    * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
   2123    * @param[in]      fftLen           length of the FFT.
   2124    * @param[in]      *pCoef16         points to twiddle coefficient buffer.
   2125    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
   2126    * @return none.
   2127    */
   2128 
   2129   void arm_radix4_butterfly_q15(
   2130 				q15_t *pSrc16,
   2131 				uint32_t fftLen,
   2132 				q15_t *pCoef16,
   2133 				uint32_t twidCoefModifier);
   2134 
   2135   /**
   2136    * @brief  Core function for the Q15 CIFFT butterfly process.
   2137    * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
   2138    * @param[in]      fftLen           length of the FFT.
   2139    * @param[in]      *pCoef16         points to twiddle coefficient buffer.
   2140    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
   2141    * @return none.
   2142    */
   2143 
   2144   void arm_radix4_butterfly_inverse_q15(
   2145 					q15_t *pSrc16,
   2146 					uint32_t fftLen,
   2147 					q15_t *pCoef16,
   2148 					uint32_t twidCoefModifier);
   2149 
   2150   /**
   2151    * @brief  In-place bit reversal function.
   2152    * @param[in, out] *pSrc        points to the in-place buffer of Q15 data type.
   2153    * @param[in]      fftLen       length of the FFT.
   2154    * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
   2155    * @param[in]      *pBitRevTab  points to bit reversal table.
   2156    * @return none.
   2157    */
   2158 
   2159   void arm_bitreversal_q15(
   2160 			   q15_t * pSrc,
   2161 			   uint32_t fftLen,
   2162 			   uint16_t bitRevFactor,
   2163 			   uint16_t *pBitRevTab);
   2164 
   2165   /**
   2166    * @brief Instance structure for the Q15 RFFT/RIFFT function.
   2167    */
   2168 
   2169   typedef struct
   2170   {
   2171     uint32_t fftLenReal;                      /**< length of the real FFT. */
   2172     uint32_t fftLenBy2;                       /**< length of the complex FFT. */
   2173     uint8_t  ifftFlagR;                       /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
   2174 	uint8_t  bitReverseFlagR;                 /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
   2175     uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
   2176     q15_t    *pTwiddleAReal;                  /**< points to the real twiddle factor table. */
   2177     q15_t    *pTwiddleBReal;                  /**< points to the imag twiddle factor table. */
   2178     arm_cfft_radix4_instance_q15 *pCfft;	  /**< points to the complex FFT instance. */
   2179   } arm_rfft_instance_q15;
   2180 
   2181   /**
   2182    * @brief Instance structure for the Q31 RFFT/RIFFT function.
   2183    */
   2184 
   2185   typedef struct
   2186   {
   2187     uint32_t fftLenReal;                        /**< length of the real FFT. */
   2188     uint32_t fftLenBy2;                         /**< length of the complex FFT. */
   2189     uint8_t  ifftFlagR;                         /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
   2190 	uint8_t  bitReverseFlagR;                   /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
   2191     uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
   2192     q31_t    *pTwiddleAReal;                    /**< points to the real twiddle factor table. */
   2193     q31_t    *pTwiddleBReal;                    /**< points to the imag twiddle factor table. */
   2194     arm_cfft_radix4_instance_q31 *pCfft;        /**< points to the complex FFT instance. */
   2195   } arm_rfft_instance_q31;
   2196 
   2197   /**
   2198    * @brief Instance structure for the floating-point RFFT/RIFFT function.
   2199    */
   2200 
   2201   typedef struct
   2202   {
   2203     uint32_t  fftLenReal;                       /**< length of the real FFT. */
   2204     uint16_t  fftLenBy2;                        /**< length of the complex FFT. */
   2205     uint8_t   ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
   2206     uint8_t   bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
   2207 	uint32_t  twidCoefRModifier;                /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
   2208     float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
   2209     float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
   2210     arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
   2211   } arm_rfft_instance_f32;
   2212 
   2213   /**
   2214    * @brief Processing function for the Q15 RFFT/RIFFT.
   2215    * @param[in]  *S    points to an instance of the Q15 RFFT/RIFFT structure.
   2216    * @param[in]  *pSrc points to the input buffer.
   2217    * @param[out] *pDst points to the output buffer.
   2218    * @return none.
   2219    */
   2220 
   2221   void arm_rfft_q15(
   2222 		    const arm_rfft_instance_q15 * S,
   2223 		    q15_t * pSrc,
   2224 		    q15_t * pDst);
   2225 
   2226   /**
   2227    * @brief  Initialization function for the Q15 RFFT/RIFFT.
   2228    * @param[in, out] *S             points to an instance of the Q15 RFFT/RIFFT structure.
   2229    * @param[in]      *S_CFFT        points to an instance of the Q15 CFFT/CIFFT structure.
   2230    * @param[in]      fftLenReal     length of the FFT.
   2231    * @param[in]      ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
   2232    * @param[in]      bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
   2233    * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
   2234    */
   2235 
   2236   arm_status arm_rfft_init_q15(
   2237 			       arm_rfft_instance_q15 * S,
   2238 			       arm_cfft_radix4_instance_q15 * S_CFFT,
   2239 			       uint32_t fftLenReal,
   2240 			       uint32_t ifftFlagR,
   2241 			       uint32_t bitReverseFlag);
   2242 
   2243   /**
   2244    * @brief Processing function for the Q31 RFFT/RIFFT.
   2245    * @param[in]  *S    points to an instance of the Q31 RFFT/RIFFT structure.
   2246    * @param[in]  *pSrc points to the input buffer.
   2247    * @param[out] *pDst points to the output buffer.
   2248    * @return none.
   2249    */
   2250 
   2251   void arm_rfft_q31(
   2252 		    const arm_rfft_instance_q31 * S,
   2253 		    q31_t * pSrc,
   2254 		    q31_t * pDst);
   2255 
   2256   /**
   2257    * @brief  Initialization function for the Q31 RFFT/RIFFT.
   2258    * @param[in, out] *S             points to an instance of the Q31 RFFT/RIFFT structure.
   2259    * @param[in, out] *S_CFFT        points to an instance of the Q31 CFFT/CIFFT structure.
   2260    * @param[in]      fftLenReal     length of the FFT.
   2261    * @param[in]      ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
   2262    * @param[in]      bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
   2263    * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
   2264    */
   2265 
   2266   arm_status arm_rfft_init_q31(
   2267 			       arm_rfft_instance_q31 * S,
   2268 			       arm_cfft_radix4_instance_q31 * S_CFFT,
   2269 			       uint32_t fftLenReal,
   2270 			       uint32_t ifftFlagR,
   2271 			       uint32_t bitReverseFlag);
   2272 
   2273   /**
   2274    * @brief  Initialization function for the floating-point RFFT/RIFFT.
   2275    * @param[in,out] *S             points to an instance of the floating-point RFFT/RIFFT structure.
   2276    * @param[in,out] *S_CFFT        points to an instance of the floating-point CFFT/CIFFT structure.
   2277    * @param[in]     fftLenReal     length of the FFT.
   2278    * @param[in]     ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
   2279    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
   2280    * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
   2281    */
   2282 
   2283   arm_status arm_rfft_init_f32(
   2284 			       arm_rfft_instance_f32 * S,
   2285 			       arm_cfft_radix4_instance_f32 * S_CFFT,
   2286 			       uint32_t fftLenReal,
   2287 			       uint32_t ifftFlagR,
   2288 			       uint32_t bitReverseFlag);
   2289 
   2290   /**
   2291    * @brief Processing function for the floating-point RFFT/RIFFT.
   2292    * @param[in]  *S    points to an instance of the floating-point RFFT/RIFFT structure.
   2293    * @param[in]  *pSrc points to the input buffer.
   2294    * @param[out] *pDst points to the output buffer.
   2295    * @return none.
   2296    */
   2297 
   2298   void arm_rfft_f32(
   2299 		    const arm_rfft_instance_f32 * S,
   2300 		    float32_t * pSrc,
   2301 		    float32_t * pDst);
   2302 
   2303   /**
   2304    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
   2305    */
   2306 
   2307   typedef struct
   2308   {
   2309     uint16_t N;                         /**< length of the DCT4. */
   2310     uint16_t Nby2;                      /**< half of the length of the DCT4. */
   2311     float32_t normalize;                /**< normalizing factor. */
   2312     float32_t *pTwiddle;                /**< points to the twiddle factor table. */
   2313     float32_t *pCosFactor;              /**< points to the cosFactor table. */
   2314     arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
   2315     arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
   2316   } arm_dct4_instance_f32;
   2317 
   2318   /**
   2319    * @brief  Initialization function for the floating-point DCT4/IDCT4.
   2320    * @param[in,out] *S         points to an instance of floating-point DCT4/IDCT4 structure.
   2321    * @param[in]     *S_RFFT    points to an instance of floating-point RFFT/RIFFT structure.
   2322    * @param[in]     *S_CFFT    points to an instance of floating-point CFFT/CIFFT structure.
   2323    * @param[in]     N          length of the DCT4.
   2324    * @param[in]     Nby2       half of the length of the DCT4.
   2325    * @param[in]     normalize  normalizing factor.
   2326    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
   2327    */
   2328 
   2329   arm_status arm_dct4_init_f32(
   2330 			       arm_dct4_instance_f32 * S,
   2331 			       arm_rfft_instance_f32 * S_RFFT,
   2332 			       arm_cfft_radix4_instance_f32 * S_CFFT,
   2333 			       uint16_t N,
   2334 			       uint16_t Nby2,
   2335 			       float32_t normalize);
   2336 
   2337   /**
   2338    * @brief Processing function for the floating-point DCT4/IDCT4.
   2339    * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure.
   2340    * @param[in]       *pState        points to state buffer.
   2341    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
   2342    * @return none.
   2343    */
   2344 
   2345   void arm_dct4_f32(
   2346 		    const arm_dct4_instance_f32 * S,
   2347 		    float32_t * pState,
   2348 		    float32_t * pInlineBuffer);
   2349 
   2350   /**
   2351    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
   2352    */
   2353 
   2354   typedef struct
   2355   {
   2356     uint16_t N;                         /**< length of the DCT4. */
   2357     uint16_t Nby2;                      /**< half of the length of the DCT4. */
   2358     q31_t normalize;                    /**< normalizing factor. */
   2359     q31_t *pTwiddle;                    /**< points to the twiddle factor table. */
   2360     q31_t *pCosFactor;                  /**< points to the cosFactor table. */
   2361     arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
   2362     arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
   2363   } arm_dct4_instance_q31;
   2364 
   2365   /**
   2366    * @brief  Initialization function for the Q31 DCT4/IDCT4.
   2367    * @param[in,out] *S         points to an instance of Q31 DCT4/IDCT4 structure.
   2368    * @param[in]     *S_RFFT    points to an instance of Q31 RFFT/RIFFT structure
   2369    * @param[in]     *S_CFFT    points to an instance of Q31 CFFT/CIFFT structure
   2370    * @param[in]     N          length of the DCT4.
   2371    * @param[in]     Nby2       half of the length of the DCT4.
   2372    * @param[in]     normalize  normalizing factor.
   2373    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
   2374    */
   2375 
   2376   arm_status arm_dct4_init_q31(
   2377 			       arm_dct4_instance_q31 * S,
   2378 			       arm_rfft_instance_q31 * S_RFFT,
   2379 			       arm_cfft_radix4_instance_q31 * S_CFFT,
   2380 			       uint16_t N,
   2381 			       uint16_t Nby2,
   2382 			       q31_t normalize);
   2383 
   2384   /**
   2385    * @brief Processing function for the Q31 DCT4/IDCT4.
   2386    * @param[in]       *S             points to an instance of the Q31 DCT4 structure.
   2387    * @param[in]       *pState        points to state buffer.
   2388    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
   2389    * @return none.
   2390    */
   2391 
   2392   void arm_dct4_q31(
   2393 		    const arm_dct4_instance_q31 * S,
   2394 		    q31_t * pState,
   2395 		    q31_t * pInlineBuffer);
   2396 
   2397   /**
   2398    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
   2399    */
   2400 
   2401   typedef struct
   2402   {
   2403     uint16_t N;                         /**< length of the DCT4. */
   2404     uint16_t Nby2;                      /**< half of the length of the DCT4. */
   2405     q15_t normalize;                    /**< normalizing factor. */
   2406     q15_t *pTwiddle;                    /**< points to the twiddle factor table. */
   2407     q15_t *pCosFactor;                  /**< points to the cosFactor table. */
   2408     arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
   2409     arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
   2410   } arm_dct4_instance_q15;
   2411 
   2412   /**
   2413    * @brief  Initialization function for the Q15 DCT4/IDCT4.
   2414    * @param[in,out] *S         points to an instance of Q15 DCT4/IDCT4 structure.
   2415    * @param[in]     *S_RFFT    points to an instance of Q15 RFFT/RIFFT structure.
   2416    * @param[in]     *S_CFFT    points to an instance of Q15 CFFT/CIFFT structure.
   2417    * @param[in]     N          length of the DCT4.
   2418    * @param[in]     Nby2       half of the length of the DCT4.
   2419    * @param[in]     normalize  normalizing factor.
   2420    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
   2421    */
   2422 
   2423   arm_status arm_dct4_init_q15(
   2424 			       arm_dct4_instance_q15 * S,
   2425 			       arm_rfft_instance_q15 * S_RFFT,
   2426 			       arm_cfft_radix4_instance_q15 * S_CFFT,
   2427 			       uint16_t N,
   2428 			       uint16_t Nby2,
   2429 			       q15_t normalize);
   2430 
   2431   /**
   2432    * @brief Processing function for the Q15 DCT4/IDCT4.
   2433    * @param[in]       *S             points to an instance of the Q15 DCT4 structure.
   2434    * @param[in]       *pState        points to state buffer.
   2435    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
   2436    * @return none.
   2437    */
   2438 
   2439   void arm_dct4_q15(
   2440 		    const arm_dct4_instance_q15 * S,
   2441 		    q15_t * pState,
   2442 		    q15_t * pInlineBuffer);
   2443 
   2444   /**
   2445    * @brief Floating-point vector addition.
   2446    * @param[in]       *pSrcA points to the first input vector
   2447    * @param[in]       *pSrcB points to the second input vector
   2448    * @param[out]      *pDst points to the output vector
   2449    * @param[in]       blockSize number of samples in each vector
   2450    * @return none.
   2451    */
   2452 
   2453   void arm_add_f32(
   2454 		   float32_t * pSrcA,
   2455 		   float32_t * pSrcB,
   2456 		   float32_t * pDst,
   2457 		   uint32_t blockSize);
   2458 
   2459   /**
   2460    * @brief Q7 vector addition.
   2461    * @param[in]       *pSrcA points to the first input vector
   2462    * @param[in]       *pSrcB points to the second input vector
   2463    * @param[out]      *pDst points to the output vector
   2464    * @param[in]       blockSize number of samples in each vector
   2465    * @return none.
   2466    */
   2467 
   2468   void arm_add_q7(
   2469 		  q7_t * pSrcA,
   2470 		  q7_t * pSrcB,
   2471 		  q7_t * pDst,
   2472 		  uint32_t blockSize);
   2473 
   2474   /**
   2475    * @brief Q15 vector addition.
   2476    * @param[in]       *pSrcA points to the first input vector
   2477    * @param[in]       *pSrcB points to the second input vector
   2478    * @param[out]      *pDst points to the output vector
   2479    * @param[in]       blockSize number of samples in each vector
   2480    * @return none.
   2481    */
   2482 
   2483   void arm_add_q15(
   2484 		    q15_t * pSrcA,
   2485 		    q15_t * pSrcB,
   2486 		   q15_t * pDst,
   2487 		   uint32_t blockSize);
   2488 
   2489   /**
   2490    * @brief Q31 vector addition.
   2491    * @param[in]       *pSrcA points to the first input vector
   2492    * @param[in]       *pSrcB points to the second input vector
   2493    * @param[out]      *pDst points to the output vector
   2494    * @param[in]       blockSize number of samples in each vector
   2495    * @return none.
   2496    */
   2497 
   2498   void arm_add_q31(
   2499 		    q31_t * pSrcA,
   2500 		    q31_t * pSrcB,
   2501 		   q31_t * pDst,
   2502 		   uint32_t blockSize);
   2503 
   2504   /**
   2505    * @brief Floating-point vector subtraction.
   2506    * @param[in]       *pSrcA points to the first input vector
   2507    * @param[in]       *pSrcB points to the second input vector
   2508    * @param[out]      *pDst points to the output vector
   2509    * @param[in]       blockSize number of samples in each vector
   2510    * @return none.
   2511    */
   2512 
   2513   void arm_sub_f32(
   2514 		    float32_t * pSrcA,
   2515 		    float32_t * pSrcB,
   2516 		   float32_t * pDst,
   2517 		   uint32_t blockSize);
   2518 
   2519   /**
   2520    * @brief Q7 vector subtraction.
   2521    * @param[in]       *pSrcA points to the first input vector
   2522    * @param[in]       *pSrcB points to the second input vector
   2523    * @param[out]      *pDst points to the output vector
   2524    * @param[in]       blockSize number of samples in each vector
   2525    * @return none.
   2526    */
   2527 
   2528   void arm_sub_q7(
   2529 		   q7_t * pSrcA,
   2530 		   q7_t * pSrcB,
   2531 		  q7_t * pDst,
   2532 		  uint32_t blockSize);
   2533 
   2534   /**
   2535    * @brief Q15 vector subtraction.
   2536    * @param[in]       *pSrcA points to the first input vector
   2537    * @param[in]       *pSrcB points to the second input vector
   2538    * @param[out]      *pDst points to the output vector
   2539    * @param[in]       blockSize number of samples in each vector
   2540    * @return none.
   2541    */
   2542 
   2543   void arm_sub_q15(
   2544 		    q15_t * pSrcA,
   2545 		    q15_t * pSrcB,
   2546 		   q15_t * pDst,
   2547 		   uint32_t blockSize);
   2548 
   2549   /**
   2550    * @brief Q31 vector subtraction.
   2551    * @param[in]       *pSrcA points to the first input vector
   2552    * @param[in]       *pSrcB points to the second input vector
   2553    * @param[out]      *pDst points to the output vector
   2554    * @param[in]       blockSize number of samples in each vector
   2555    * @return none.
   2556    */
   2557 
   2558   void arm_sub_q31(
   2559 		    q31_t * pSrcA,
   2560 		    q31_t * pSrcB,
   2561 		   q31_t * pDst,
   2562 		   uint32_t blockSize);
   2563 
   2564   /**
   2565    * @brief Multiplies a floating-point vector by a scalar.
   2566    * @param[in]       *pSrc points to the input vector
   2567    * @param[in]       scale scale factor to be applied
   2568    * @param[out]      *pDst points to the output vector
   2569    * @param[in]       blockSize number of samples in the vector
   2570    * @return none.
   2571    */
   2572 
   2573   void arm_scale_f32(
   2574 		      float32_t * pSrc,
   2575 		     float32_t scale,
   2576 		     float32_t * pDst,
   2577 		     uint32_t blockSize);
   2578 
   2579   /**
   2580    * @brief Multiplies a Q7 vector by a scalar.
   2581    * @param[in]       *pSrc points to the input vector
   2582    * @param[in]       scaleFract fractional portion of the scale value
   2583    * @param[in]       shift number of bits to shift the result by
   2584    * @param[out]      *pDst points to the output vector
   2585    * @param[in]       blockSize number of samples in the vector
   2586    * @return none.
   2587    */
   2588 
   2589   void arm_scale_q7(
   2590 		     q7_t * pSrc,
   2591 		    q7_t scaleFract,
   2592 		    int8_t shift,
   2593 		    q7_t * pDst,
   2594 		    uint32_t blockSize);
   2595 
   2596   /**
   2597    * @brief Multiplies a Q15 vector by a scalar.
   2598    * @param[in]       *pSrc points to the input vector
   2599    * @param[in]       scaleFract fractional portion of the scale value
   2600    * @param[in]       shift number of bits to shift the result by
   2601    * @param[out]      *pDst points to the output vector
   2602    * @param[in]       blockSize number of samples in the vector
   2603    * @return none.
   2604    */
   2605 
   2606   void arm_scale_q15(
   2607 		      q15_t * pSrc,
   2608 		     q15_t scaleFract,
   2609 		     int8_t shift,
   2610 		     q15_t * pDst,
   2611 		     uint32_t blockSize);
   2612 
   2613   /**
   2614    * @brief Multiplies a Q31 vector by a scalar.
   2615    * @param[in]       *pSrc points to the input vector
   2616    * @param[in]       scaleFract fractional portion of the scale value
   2617    * @param[in]       shift number of bits to shift the result by
   2618    * @param[out]      *pDst points to the output vector
   2619    * @param[in]       blockSize number of samples in the vector
   2620    * @return none.
   2621    */
   2622 
   2623   void arm_scale_q31(
   2624 		      q31_t * pSrc,
   2625 		     q31_t scaleFract,
   2626 		     int8_t shift,
   2627 		     q31_t * pDst,
   2628 		     uint32_t blockSize);
   2629 
   2630   /**
   2631    * @brief Q7 vector absolute value.
   2632    * @param[in]       *pSrc points to the input buffer
   2633    * @param[out]      *pDst points to the output buffer
   2634    * @param[in]       blockSize number of samples in each vector
   2635    * @return none.
   2636    */
   2637 
   2638   void arm_abs_q7(
   2639 		   q7_t * pSrc,
   2640 		  q7_t * pDst,
   2641 		  uint32_t blockSize);
   2642 
   2643   /**
   2644    * @brief Floating-point vector absolute value.
   2645    * @param[in]       *pSrc points to the input buffer
   2646    * @param[out]      *pDst points to the output buffer
   2647    * @param[in]       blockSize number of samples in each vector
   2648    * @return none.
   2649    */
   2650 
   2651   void arm_abs_f32(
   2652 		    float32_t * pSrc,
   2653 		   float32_t * pDst,
   2654 		   uint32_t blockSize);
   2655 
   2656   /**
   2657    * @brief Q15 vector absolute value.
   2658    * @param[in]       *pSrc points to the input buffer
   2659    * @param[out]      *pDst points to the output buffer
   2660    * @param[in]       blockSize number of samples in each vector
   2661    * @return none.
   2662    */
   2663 
   2664   void arm_abs_q15(
   2665 		    q15_t * pSrc,
   2666 		   q15_t * pDst,
   2667 		   uint32_t blockSize);
   2668 
   2669   /**
   2670    * @brief Q31 vector absolute value.
   2671    * @param[in]       *pSrc points to the input buffer
   2672    * @param[out]      *pDst points to the output buffer
   2673    * @param[in]       blockSize number of samples in each vector
   2674    * @return none.
   2675    */
   2676 
   2677   void arm_abs_q31(
   2678 		    q31_t * pSrc,
   2679 		   q31_t * pDst,
   2680 		   uint32_t blockSize);
   2681 
   2682   /**
   2683    * @brief Dot product of floating-point vectors.
   2684    * @param[in]       *pSrcA points to the first input vector
   2685    * @param[in]       *pSrcB points to the second input vector
   2686    * @param[in]       blockSize number of samples in each vector
   2687    * @param[out]      *result output result returned here
   2688    * @return none.
   2689    */
   2690 
   2691   void arm_dot_prod_f32(
   2692 			 float32_t * pSrcA,
   2693 			 float32_t * pSrcB,
   2694 			uint32_t blockSize,
   2695 			float32_t * result);
   2696 
   2697   /**
   2698    * @brief Dot product of Q7 vectors.
   2699    * @param[in]       *pSrcA points to the first input vector
   2700    * @param[in]       *pSrcB points to the second input vector
   2701    * @param[in]       blockSize number of samples in each vector
   2702    * @param[out]      *result output result returned here
   2703    * @return none.
   2704    */
   2705 
   2706   void arm_dot_prod_q7(
   2707 		        q7_t * pSrcA,
   2708 		        q7_t * pSrcB,
   2709 		       uint32_t blockSize,
   2710 		       q31_t * result);
   2711 
   2712   /**
   2713    * @brief Dot product of Q15 vectors.
   2714    * @param[in]       *pSrcA points to the first input vector
   2715    * @param[in]       *pSrcB points to the second input vector
   2716    * @param[in]       blockSize number of samples in each vector
   2717    * @param[out]      *result output result returned here
   2718    * @return none.
   2719    */
   2720 
   2721   void arm_dot_prod_q15(
   2722 			 q15_t * pSrcA,
   2723 			 q15_t * pSrcB,
   2724 			uint32_t blockSize,
   2725 			q63_t * result);
   2726 
   2727   /**
   2728    * @brief Dot product of Q31 vectors.
   2729    * @param[in]       *pSrcA points to the first input vector
   2730    * @param[in]       *pSrcB points to the second input vector
   2731    * @param[in]       blockSize number of samples in each vector
   2732    * @param[out]      *result output result returned here
   2733    * @return none.
   2734    */
   2735 
   2736   void arm_dot_prod_q31(
   2737 			 q31_t * pSrcA,
   2738 			 q31_t * pSrcB,
   2739 			uint32_t blockSize,
   2740 			q63_t * result);
   2741 
   2742   /**
   2743    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
   2744    * @param[in]  *pSrc points to the input vector
   2745    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
   2746    * @param[out]  *pDst points to the output vector
   2747    * @param[in]  blockSize number of samples in the vector
   2748    * @return none.
   2749    */
   2750 
   2751   void arm_shift_q7(
   2752 		     q7_t * pSrc,
   2753 		    int8_t shiftBits,
   2754 		    q7_t * pDst,
   2755 		    uint32_t blockSize);
   2756 
   2757   /**
   2758    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
   2759    * @param[in]  *pSrc points to the input vector
   2760    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
   2761    * @param[out]  *pDst points to the output vector
   2762    * @param[in]  blockSize number of samples in the vector
   2763    * @return none.
   2764    */
   2765 
   2766   void arm_shift_q15(
   2767 		      q15_t * pSrc,
   2768 		     int8_t shiftBits,
   2769 		     q15_t * pDst,
   2770 		     uint32_t blockSize);
   2771 
   2772   /**
   2773    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
   2774    * @param[in]  *pSrc points to the input vector
   2775    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
   2776    * @param[out]  *pDst points to the output vector
   2777    * @param[in]  blockSize number of samples in the vector
   2778    * @return none.
   2779    */
   2780 
   2781   void arm_shift_q31(
   2782 		      q31_t * pSrc,
   2783 		     int8_t shiftBits,
   2784 		     q31_t * pDst,
   2785 		     uint32_t blockSize);
   2786 
   2787   /**
   2788    * @brief  Adds a constant offset to a floating-point vector.
   2789    * @param[in]  *pSrc points to the input vector
   2790    * @param[in]  offset is the offset to be added
   2791    * @param[out]  *pDst points to the output vector
   2792    * @param[in]  blockSize number of samples in the vector
   2793    * @return none.
   2794    */
   2795 
   2796   void arm_offset_f32(
   2797 		       float32_t * pSrc,
   2798 		      float32_t offset,
   2799 		      float32_t * pDst,
   2800 		      uint32_t blockSize);
   2801 
   2802   /**
   2803    * @brief  Adds a constant offset to a Q7 vector.
   2804    * @param[in]  *pSrc points to the input vector
   2805    * @param[in]  offset is the offset to be added
   2806    * @param[out]  *pDst points to the output vector
   2807    * @param[in]  blockSize number of samples in the vector
   2808    * @return none.
   2809    */
   2810 
   2811   void arm_offset_q7(
   2812 		      q7_t * pSrc,
   2813 		     q7_t offset,
   2814 		     q7_t * pDst,
   2815 		     uint32_t blockSize);
   2816 
   2817   /**
   2818    * @brief  Adds a constant offset to a Q15 vector.
   2819    * @param[in]  *pSrc points to the input vector
   2820    * @param[in]  offset is the offset to be added
   2821    * @param[out]  *pDst points to the output vector
   2822    * @param[in]  blockSize number of samples in the vector
   2823    * @return none.
   2824    */
   2825 
   2826   void arm_offset_q15(
   2827 		       q15_t * pSrc,
   2828 		      q15_t offset,
   2829 		      q15_t * pDst,
   2830 		      uint32_t blockSize);
   2831 
   2832   /**
   2833    * @brief  Adds a constant offset to a Q31 vector.
   2834    * @param[in]  *pSrc points to the input vector
   2835    * @param[in]  offset is the offset to be added
   2836    * @param[out]  *pDst points to the output vector
   2837    * @param[in]  blockSize number of samples in the vector
   2838    * @return none.
   2839    */
   2840 
   2841   void arm_offset_q31(
   2842 		       q31_t * pSrc,
   2843 		      q31_t offset,
   2844 		      q31_t * pDst,
   2845 		      uint32_t blockSize);
   2846 
   2847   /**
   2848    * @brief  Negates the elements of a floating-point vector.
   2849    * @param[in]  *pSrc points to the input vector
   2850    * @param[out]  *pDst points to the output vector
   2851    * @param[in]  blockSize number of samples in the vector
   2852    * @return none.
   2853    */
   2854 
   2855   void arm_negate_f32(
   2856 		       float32_t * pSrc,
   2857 		      float32_t * pDst,
   2858 		      uint32_t blockSize);
   2859 
   2860   /**
   2861    * @brief  Negates the elements of a Q7 vector.
   2862    * @param[in]  *pSrc points to the input vector
   2863    * @param[out]  *pDst points to the output vector
   2864    * @param[in]  blockSize number of samples in the vector
   2865    * @return none.
   2866    */
   2867 
   2868   void arm_negate_q7(
   2869 		      q7_t * pSrc,
   2870 		     q7_t * pDst,
   2871 		     uint32_t blockSize);
   2872 
   2873   /**
   2874    * @brief  Negates the elements of a Q15 vector.
   2875    * @param[in]  *pSrc points to the input vector
   2876    * @param[out]  *pDst points to the output vector
   2877    * @param[in]  blockSize number of samples in the vector
   2878    * @return none.
   2879    */
   2880 
   2881   void arm_negate_q15(
   2882 		       q15_t * pSrc,
   2883 		      q15_t * pDst,
   2884 		      uint32_t blockSize);
   2885 
   2886   /**
   2887    * @brief  Negates the elements of a Q31 vector.
   2888    * @param[in]  *pSrc points to the input vector
   2889    * @param[out]  *pDst points to the output vector
   2890    * @param[in]  blockSize number of samples in the vector
   2891    * @return none.
   2892    */
   2893 
   2894   void arm_negate_q31(
   2895 		       q31_t * pSrc,
   2896 		      q31_t * pDst,
   2897 		      uint32_t blockSize);
   2898   /**
   2899    * @brief  Copies the elements of a floating-point vector.
   2900    * @param[in]  *pSrc input pointer
   2901    * @param[out]  *pDst output pointer
   2902    * @param[in]  blockSize number of samples to process
   2903    * @return none.
   2904    */
   2905   void arm_copy_f32(
   2906 		     float32_t * pSrc,
   2907 		    float32_t * pDst,
   2908 		    uint32_t blockSize);
   2909 
   2910   /**
   2911    * @brief  Copies the elements of a Q7 vector.
   2912    * @param[in]  *pSrc input pointer
   2913    * @param[out]  *pDst output pointer
   2914    * @param[in]  blockSize number of samples to process
   2915    * @return none.
   2916    */
   2917   void arm_copy_q7(
   2918 		    q7_t * pSrc,
   2919 		   q7_t * pDst,
   2920 		   uint32_t blockSize);
   2921 
   2922   /**
   2923    * @brief  Copies the elements of a Q15 vector.
   2924    * @param[in]  *pSrc input pointer
   2925    * @param[out]  *pDst output pointer
   2926    * @param[in]  blockSize number of samples to process
   2927    * @return none.
   2928    */
   2929   void arm_copy_q15(
   2930 		     q15_t * pSrc,
   2931 		    q15_t * pDst,
   2932 		    uint32_t blockSize);
   2933 
   2934   /**
   2935    * @brief  Copies the elements of a Q31 vector.
   2936    * @param[in]  *pSrc input pointer
   2937    * @param[out]  *pDst output pointer
   2938    * @param[in]  blockSize number of samples to process
   2939    * @return none.
   2940    */
   2941   void arm_copy_q31(
   2942 		     q31_t * pSrc,
   2943 		    q31_t * pDst,
   2944 		    uint32_t blockSize);
   2945   /**
   2946    * @brief  Fills a constant value into a floating-point vector.
   2947    * @param[in]  value input value to be filled
   2948    * @param[out]  *pDst output pointer
   2949    * @param[in]  blockSize number of samples to process
   2950    * @return none.
   2951    */
   2952   void arm_fill_f32(
   2953 		     float32_t value,
   2954 		    float32_t * pDst,
   2955 		    uint32_t blockSize);
   2956 
   2957   /**
   2958    * @brief  Fills a constant value into a Q7 vector.
   2959    * @param[in]  value input value to be filled
   2960    * @param[out]  *pDst output pointer
   2961    * @param[in]  blockSize number of samples to process
   2962    * @return none.
   2963    */
   2964   void arm_fill_q7(
   2965 		    q7_t value,
   2966 		   q7_t * pDst,
   2967 		   uint32_t blockSize);
   2968 
   2969   /**
   2970    * @brief  Fills a constant value into a Q15 vector.
   2971    * @param[in]  value input value to be filled
   2972    * @param[out]  *pDst output pointer
   2973    * @param[in]  blockSize number of samples to process
   2974    * @return none.
   2975    */
   2976   void arm_fill_q15(
   2977 		     q15_t value,
   2978 		    q15_t * pDst,
   2979 		    uint32_t blockSize);
   2980 
   2981   /**
   2982    * @brief  Fills a constant value into a Q31 vector.
   2983    * @param[in]  value input value to be filled
   2984    * @param[out]  *pDst output pointer
   2985    * @param[in]  blockSize number of samples to process
   2986    * @return none.
   2987    */
   2988   void arm_fill_q31(
   2989 		     q31_t value,
   2990 		    q31_t * pDst,
   2991 		    uint32_t blockSize);
   2992 
   2993 /**
   2994  * @brief Convolution of floating-point sequences.
   2995  * @param[in] *pSrcA points to the first input sequence.
   2996  * @param[in] srcALen length of the first input sequence.
   2997  * @param[in] *pSrcB points to the second input sequence.
   2998  * @param[in] srcBLen length of the second input sequence.
   2999  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
   3000  * @return none.
   3001  */
   3002 
   3003   void arm_conv_f32(
   3004 		     float32_t * pSrcA,
   3005 		    uint32_t srcALen,
   3006 		     float32_t * pSrcB,
   3007 		    uint32_t srcBLen,
   3008 		    float32_t * pDst);
   3009 
   3010 /**
   3011  * @brief Convolution of Q15 sequences.
   3012  * @param[in] *pSrcA points to the first input sequence.
   3013  * @param[in] srcALen length of the first input sequence.
   3014  * @param[in] *pSrcB points to the second input sequence.
   3015  * @param[in] srcBLen length of the second input sequence.
   3016  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
   3017  * @return none.
   3018  */
   3019 
   3020   void arm_conv_q15(
   3021 		     q15_t * pSrcA,
   3022 		    uint32_t srcALen,
   3023 		     q15_t * pSrcB,
   3024 		    uint32_t srcBLen,
   3025 		    q15_t * pDst);
   3026 
   3027   /**
   3028    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
   3029    * @param[in] *pSrcA points to the first input sequence.
   3030    * @param[in] srcALen length of the first input sequence.
   3031    * @param[in] *pSrcB points to the second input sequence.
   3032    * @param[in] srcBLen length of the second input sequence.
   3033    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
   3034    * @return none.
   3035    */
   3036 
   3037   void arm_conv_fast_q15(
   3038 			  q15_t * pSrcA,
   3039 			 uint32_t srcALen,
   3040 			  q15_t * pSrcB,
   3041 			 uint32_t srcBLen,
   3042 			 q15_t * pDst);
   3043 
   3044   /**
   3045    * @brief Convolution of Q31 sequences.
   3046    * @param[in] *pSrcA points to the first input sequence.
   3047    * @param[in] srcALen length of the first input sequence.
   3048    * @param[in] *pSrcB points to the second input sequence.
   3049    * @param[in] srcBLen length of the second input sequence.
   3050    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
   3051    * @return none.
   3052    */
   3053 
   3054   void arm_conv_q31(
   3055 		     q31_t * pSrcA,
   3056 		    uint32_t srcALen,
   3057 		     q31_t * pSrcB,
   3058 		    uint32_t srcBLen,
   3059 		    q31_t * pDst);
   3060 
   3061   /**
   3062    * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
   3063    * @param[in] *pSrcA points to the first input sequence.
   3064    * @param[in] srcALen length of the first input sequence.
   3065    * @param[in] *pSrcB points to the second input sequence.
   3066    * @param[in] srcBLen length of the second input sequence.
   3067    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
   3068    * @return none.
   3069    */
   3070 
   3071   void arm_conv_fast_q31(
   3072 			  q31_t * pSrcA,
   3073 			 uint32_t srcALen,
   3074 			  q31_t * pSrcB,
   3075 			 uint32_t srcBLen,
   3076 			 q31_t * pDst);
   3077 
   3078   /**
   3079    * @brief Convolution of Q7 sequences.
   3080    * @param[in] *pSrcA points to the first input sequence.
   3081    * @param[in] srcALen length of the first input sequence.
   3082    * @param[in] *pSrcB points to the second input sequence.
   3083    * @param[in] srcBLen length of the second input sequence.
   3084    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
   3085    * @return none.
   3086    */
   3087 
   3088   void arm_conv_q7(
   3089 		    q7_t * pSrcA,
   3090 		   uint32_t srcALen,
   3091 		    q7_t * pSrcB,
   3092 		   uint32_t srcBLen,
   3093 		   q7_t * pDst);
   3094 
   3095   /**
   3096    * @brief Partial convolution of floating-point sequences.
   3097    * @param[in]       *pSrcA points to the first input sequence.
   3098    * @param[in]       srcALen length of the first input sequence.
   3099    * @param[in]       *pSrcB points to the second input sequence.
   3100    * @param[in]       srcBLen length of the second input sequence.
   3101    * @param[out]      *pDst points to the block of output data
   3102    * @param[in]       firstIndex is the first output sample to start with.
   3103    * @param[in]       numPoints is the number of output points to be computed.
   3104    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
   3105    */
   3106 
   3107   arm_status arm_conv_partial_f32(
   3108 				   float32_t * pSrcA,
   3109 				  uint32_t srcALen,
   3110 				   float32_t * pSrcB,
   3111 				  uint32_t srcBLen,
   3112 				  float32_t * pDst,
   3113 				  uint32_t firstIndex,
   3114 				  uint32_t numPoints);
   3115 
   3116   /**
   3117    * @brief Partial convolution of Q15 sequences.
   3118    * @param[in]       *pSrcA points to the first input sequence.
   3119    * @param[in]       srcALen length of the first input sequence.
   3120    * @param[in]       *pSrcB points to the second input sequence.
   3121    * @param[in]       srcBLen length of the second input sequence.
   3122    * @param[out]      *pDst points to the block of output data
   3123    * @param[in]       firstIndex is the first output sample to start with.
   3124    * @param[in]       numPoints is the number of output points to be computed.
   3125    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
   3126    */
   3127 
   3128   arm_status arm_conv_partial_q15(
   3129 				   q15_t * pSrcA,
   3130 				  uint32_t srcALen,
   3131 				   q15_t * pSrcB,
   3132 				  uint32_t srcBLen,
   3133 				  q15_t * pDst,
   3134 				  uint32_t firstIndex,
   3135 				  uint32_t numPoints);
   3136 
   3137   /**
   3138    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
   3139    * @param[in]       *pSrcA points to the first input sequence.
   3140    * @param[in]       srcALen length of the first input sequence.
   3141    * @param[in]       *pSrcB points to the second input sequence.
   3142    * @param[in]       srcBLen length of the second input sequence.
   3143    * @param[out]      *pDst points to the block of output data
   3144    * @param[in]       firstIndex is the first output sample to start with.
   3145    * @param[in]       numPoints is the number of output points to be computed.
   3146    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
   3147    */
   3148 
   3149   arm_status arm_conv_partial_fast_q15(
   3150 				        q15_t * pSrcA,
   3151 				       uint32_t srcALen,
   3152 				        q15_t * pSrcB,
   3153 				       uint32_t srcBLen,
   3154 				       q15_t * pDst,
   3155 				       uint32_t firstIndex,
   3156 				       uint32_t numPoints);
   3157 
   3158   /**
   3159    * @brief Partial convolution of Q31 sequences.
   3160    * @param[in]       *pSrcA points to the first input sequence.
   3161    * @param[in]       srcALen length of the first input sequence.
   3162    * @param[in]       *pSrcB points to the second input sequence.
   3163    * @param[in]       srcBLen length of the second input sequence.
   3164    * @param[out]      *pDst points to the block of output data
   3165    * @param[in]       firstIndex is the first output sample to start with.
   3166    * @param[in]       numPoints is the number of output points to be computed.
   3167    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
   3168    */
   3169 
   3170   arm_status arm_conv_partial_q31(
   3171 				   q31_t * pSrcA,
   3172 				  uint32_t srcALen,
   3173 				   q31_t * pSrcB,
   3174 				  uint32_t srcBLen,
   3175 				  q31_t * pDst,
   3176 				  uint32_t firstIndex,
   3177 				  uint32_t numPoints);
   3178 
   3179 
   3180   /**
   3181    * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
   3182    * @param[in]       *pSrcA points to the first input sequence.
   3183    * @param[in]       srcALen length of the first input sequence.
   3184    * @param[in]       *pSrcB points to the second input sequence.
   3185    * @param[in]       srcBLen length of the second input sequence.
   3186    * @param[out]      *pDst points to the block of output data
   3187    * @param[in]       firstIndex is the first output sample to start with.
   3188    * @param[in]       numPoints is the number of output points to be computed.
   3189    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
   3190    */
   3191 
   3192   arm_status arm_conv_partial_fast_q31(
   3193 				        q31_t * pSrcA,
   3194 				       uint32_t srcALen,
   3195 				        q31_t * pSrcB,
   3196 				       uint32_t srcBLen,
   3197 				       q31_t * pDst,
   3198 				       uint32_t firstIndex,
   3199 				       uint32_t numPoints);
   3200 
   3201   /**
   3202    * @brief Partial convolution of Q7 sequences.
   3203    * @param[in]       *pSrcA points to the first input sequence.
   3204    * @param[in]       srcALen length of the first input sequence.
   3205    * @param[in]       *pSrcB points to the second input sequence.
   3206    * @param[in]       srcBLen length of the second input sequence.
   3207    * @param[out]      *pDst points to the block of output data
   3208    * @param[in]       firstIndex is the first output sample to start with.
   3209    * @param[in]       numPoints is the number of output points to be computed.
   3210    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
   3211    */
   3212 
   3213   arm_status arm_conv_partial_q7(
   3214 				  q7_t * pSrcA,
   3215 				 uint32_t srcALen,
   3216 				  q7_t * pSrcB,
   3217 				 uint32_t srcBLen,
   3218 				 q7_t * pDst,
   3219 				 uint32_t firstIndex,
   3220 				 uint32_t numPoints);
   3221 
   3222 
   3223   /**
   3224    * @brief Instance structure for the Q15 FIR decimator.
   3225    */
   3226 
   3227   typedef struct
   3228   {
   3229     uint8_t M;                      /**< decimation factor. */
   3230     uint16_t numTaps;               /**< number of coefficients in the filter. */
   3231     q15_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
   3232     q15_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   3233   } arm_fir_decimate_instance_q15;
   3234 
   3235   /**
   3236    * @brief Instance structure for the Q31 FIR decimator.
   3237    */
   3238 
   3239   typedef struct
   3240   {
   3241     uint8_t M;                  /**< decimation factor. */
   3242     uint16_t numTaps;           /**< number of coefficients in the filter. */
   3243     q31_t *pCoeffs;              /**< points to the coefficient array. The array is of length numTaps.*/
   3244     q31_t *pState;               /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   3245 
   3246   } arm_fir_decimate_instance_q31;
   3247 
   3248   /**
   3249    * @brief Instance structure for the floating-point FIR decimator.
   3250    */
   3251 
   3252   typedef struct
   3253   {
   3254     uint8_t M;                          /**< decimation factor. */
   3255     uint16_t numTaps;                   /**< number of coefficients in the filter. */
   3256     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
   3257     float32_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   3258 
   3259   } arm_fir_decimate_instance_f32;
   3260 
   3261 
   3262 
   3263   /**
   3264    * @brief Processing function for the floating-point FIR decimator.
   3265    * @param[in] *S points to an instance of the floating-point FIR decimator structure.
   3266    * @param[in] *pSrc points to the block of input data.
   3267    * @param[out] *pDst points to the block of output data
   3268    * @param[in] blockSize number of input samples to process per call.
   3269    * @return none
   3270    */
   3271 
   3272   void arm_fir_decimate_f32(
   3273 			    const arm_fir_decimate_instance_f32 * S,
   3274 			     float32_t * pSrc,
   3275 			    float32_t * pDst,
   3276 			    uint32_t blockSize);
   3277 
   3278 
   3279   /**
   3280    * @brief  Initialization function for the floating-point FIR decimator.
   3281    * @param[in,out] *S points to an instance of the floating-point FIR decimator structure.
   3282    * @param[in] numTaps  number of coefficients in the filter.
   3283    * @param[in] M  decimation factor.
   3284    * @param[in] *pCoeffs points to the filter coefficients.
   3285    * @param[in] *pState points to the state buffer.
   3286    * @param[in] blockSize number of input samples to process per call.
   3287    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
   3288    * <code>blockSize</code> is not a multiple of <code>M</code>.
   3289    */
   3290 
   3291   arm_status arm_fir_decimate_init_f32(
   3292 				       arm_fir_decimate_instance_f32 * S,
   3293 				       uint16_t numTaps,
   3294 				       uint8_t M,
   3295 				       float32_t * pCoeffs,
   3296 				       float32_t * pState,
   3297 				       uint32_t blockSize);
   3298 
   3299   /**
   3300    * @brief Processing function for the Q15 FIR decimator.
   3301    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
   3302    * @param[in] *pSrc points to the block of input data.
   3303    * @param[out] *pDst points to the block of output data
   3304    * @param[in] blockSize number of input samples to process per call.
   3305    * @return none
   3306    */
   3307 
   3308   void arm_fir_decimate_q15(
   3309 			    const arm_fir_decimate_instance_q15 * S,
   3310 			     q15_t * pSrc,
   3311 			    q15_t * pDst,
   3312 			    uint32_t blockSize);
   3313 
   3314   /**
   3315    * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
   3316    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
   3317    * @param[in] *pSrc points to the block of input data.
   3318    * @param[out] *pDst points to the block of output data
   3319    * @param[in] blockSize number of input samples to process per call.
   3320    * @return none
   3321    */
   3322 
   3323   void arm_fir_decimate_fast_q15(
   3324 				 const arm_fir_decimate_instance_q15 * S,
   3325 				  q15_t * pSrc,
   3326 				 q15_t * pDst,
   3327 				 uint32_t blockSize);
   3328 
   3329 
   3330 
   3331   /**
   3332    * @brief  Initialization function for the Q15 FIR decimator.
   3333    * @param[in,out] *S points to an instance of the Q15 FIR decimator structure.
   3334    * @param[in] numTaps  number of coefficients in the filter.
   3335    * @param[in] M  decimation factor.
   3336    * @param[in] *pCoeffs points to the filter coefficients.
   3337    * @param[in] *pState points to the state buffer.
   3338    * @param[in] blockSize number of input samples to process per call.
   3339    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
   3340    * <code>blockSize</code> is not a multiple of <code>M</code>.
   3341    */
   3342 
   3343   arm_status arm_fir_decimate_init_q15(
   3344 				       arm_fir_decimate_instance_q15 * S,
   3345 				       uint16_t numTaps,
   3346 				       uint8_t M,
   3347 				       q15_t * pCoeffs,
   3348 				       q15_t * pState,
   3349 				       uint32_t blockSize);
   3350 
   3351   /**
   3352    * @brief Processing function for the Q31 FIR decimator.
   3353    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
   3354    * @param[in] *pSrc points to the block of input data.
   3355    * @param[out] *pDst points to the block of output data
   3356    * @param[in] blockSize number of input samples to process per call.
   3357    * @return none
   3358    */
   3359 
   3360   void arm_fir_decimate_q31(
   3361 			    const arm_fir_decimate_instance_q31 * S,
   3362 			     q31_t * pSrc,
   3363 			    q31_t * pDst,
   3364 			    uint32_t blockSize);
   3365 
   3366   /**
   3367    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
   3368    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
   3369    * @param[in] *pSrc points to the block of input data.
   3370    * @param[out] *pDst points to the block of output data
   3371    * @param[in] blockSize number of input samples to process per call.
   3372    * @return none
   3373    */
   3374 
   3375   void arm_fir_decimate_fast_q31(
   3376 				 arm_fir_decimate_instance_q31 * S,
   3377 				  q31_t * pSrc,
   3378 				 q31_t * pDst,
   3379 				 uint32_t blockSize);
   3380 
   3381 
   3382   /**
   3383    * @brief  Initialization function for the Q31 FIR decimator.
   3384    * @param[in,out] *S points to an instance of the Q31 FIR decimator structure.
   3385    * @param[in] numTaps  number of coefficients in the filter.
   3386    * @param[in] M  decimation factor.
   3387    * @param[in] *pCoeffs points to the filter coefficients.
   3388    * @param[in] *pState points to the state buffer.
   3389    * @param[in] blockSize number of input samples to process per call.
   3390    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
   3391    * <code>blockSize</code> is not a multiple of <code>M</code>.
   3392    */
   3393 
   3394   arm_status arm_fir_decimate_init_q31(
   3395 				       arm_fir_decimate_instance_q31 * S,
   3396 				       uint16_t numTaps,
   3397 				       uint8_t M,
   3398 				       q31_t * pCoeffs,
   3399 				       q31_t * pState,
   3400 				       uint32_t blockSize);
   3401 
   3402 
   3403 
   3404   /**
   3405    * @brief Instance structure for the Q15 FIR interpolator.
   3406    */
   3407 
   3408   typedef struct
   3409   {
   3410     uint8_t L;                      /**< upsample factor. */
   3411     uint16_t phaseLength;           /**< length of each polyphase filter component. */
   3412     q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
   3413     q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
   3414   } arm_fir_interpolate_instance_q15;
   3415 
   3416   /**
   3417    * @brief Instance structure for the Q31 FIR interpolator.
   3418    */
   3419 
   3420   typedef struct
   3421   {
   3422     uint8_t L;                      /**< upsample factor. */
   3423     uint16_t phaseLength;           /**< length of each polyphase filter component. */
   3424     q31_t *pCoeffs;                  /**< points to the coefficient array. The array is of length L*phaseLength. */
   3425     q31_t *pState;                   /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
   3426   } arm_fir_interpolate_instance_q31;
   3427 
   3428   /**
   3429    * @brief Instance structure for the floating-point FIR interpolator.
   3430    */
   3431 
   3432   typedef struct
   3433   {
   3434     uint8_t L;                     /**< upsample factor. */
   3435     uint16_t phaseLength;          /**< length of each polyphase filter component. */
   3436     float32_t *pCoeffs;             /**< points to the coefficient array. The array is of length L*phaseLength. */
   3437     float32_t *pState;              /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
   3438   } arm_fir_interpolate_instance_f32;
   3439 
   3440 
   3441   /**
   3442    * @brief Processing function for the Q15 FIR interpolator.
   3443    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
   3444    * @param[in] *pSrc     points to the block of input data.
   3445    * @param[out] *pDst    points to the block of output data.
   3446    * @param[in] blockSize number of input samples to process per call.
   3447    * @return none.
   3448    */
   3449 
   3450   void arm_fir_interpolate_q15(
   3451 			       const arm_fir_interpolate_instance_q15 * S,
   3452 			        q15_t * pSrc,
   3453 			       q15_t * pDst,
   3454 			       uint32_t blockSize);
   3455 
   3456 
   3457   /**
   3458    * @brief  Initialization function for the Q15 FIR interpolator.
   3459    * @param[in,out] *S        points to an instance of the Q15 FIR interpolator structure.
   3460    * @param[in]     L         upsample factor.
   3461    * @param[in]     numTaps   number of filter coefficients in the filter.
   3462    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
   3463    * @param[in]     *pState   points to the state buffer.
   3464    * @param[in]     blockSize number of input samples to process per call.
   3465    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
   3466    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
   3467    */
   3468 
   3469   arm_status arm_fir_interpolate_init_q15(
   3470 					  arm_fir_interpolate_instance_q15 * S,
   3471 					  uint8_t L,
   3472 					  uint16_t numTaps,
   3473 					  q15_t * pCoeffs,
   3474 					  q15_t * pState,
   3475 					  uint32_t blockSize);
   3476 
   3477   /**
   3478    * @brief Processing function for the Q31 FIR interpolator.
   3479    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
   3480    * @param[in] *pSrc     points to the block of input data.
   3481    * @param[out] *pDst    points to the block of output data.
   3482    * @param[in] blockSize number of input samples to process per call.
   3483    * @return none.
   3484    */
   3485 
   3486   void arm_fir_interpolate_q31(
   3487 			       const arm_fir_interpolate_instance_q31 * S,
   3488 			        q31_t * pSrc,
   3489 			       q31_t * pDst,
   3490 			       uint32_t blockSize);
   3491 
   3492   /**
   3493    * @brief  Initialization function for the Q31 FIR interpolator.
   3494    * @param[in,out] *S        points to an instance of the Q31 FIR interpolator structure.
   3495    * @param[in]     L         upsample factor.
   3496    * @param[in]     numTaps   number of filter coefficients in the filter.
   3497    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
   3498    * @param[in]     *pState   points to the state buffer.
   3499    * @param[in]     blockSize number of input samples to process per call.
   3500    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
   3501    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
   3502    */
   3503 
   3504   arm_status arm_fir_interpolate_init_q31(
   3505 					  arm_fir_interpolate_instance_q31 * S,
   3506 					  uint8_t L,
   3507 					  uint16_t numTaps,
   3508 					  q31_t * pCoeffs,
   3509 					  q31_t * pState,
   3510 					  uint32_t blockSize);
   3511 
   3512 
   3513   /**
   3514    * @brief Processing function for the floating-point FIR interpolator.
   3515    * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.
   3516    * @param[in] *pSrc     points to the block of input data.
   3517    * @param[out] *pDst    points to the block of output data.
   3518    * @param[in] blockSize number of input samples to process per call.
   3519    * @return none.
   3520    */
   3521 
   3522   void arm_fir_interpolate_f32(
   3523 			       const arm_fir_interpolate_instance_f32 * S,
   3524 			        float32_t * pSrc,
   3525 			       float32_t * pDst,
   3526 			       uint32_t blockSize);
   3527 
   3528   /**
   3529    * @brief  Initialization function for the floating-point FIR interpolator.
   3530    * @param[in,out] *S        points to an instance of the floating-point FIR interpolator structure.
   3531    * @param[in]     L         upsample factor.
   3532    * @param[in]     numTaps   number of filter coefficients in the filter.
   3533    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
   3534    * @param[in]     *pState   points to the state buffer.
   3535    * @param[in]     blockSize number of input samples to process per call.
   3536    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
   3537    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
   3538    */
   3539 
   3540   arm_status arm_fir_interpolate_init_f32(
   3541 					  arm_fir_interpolate_instance_f32 * S,
   3542 					  uint8_t L,
   3543 					  uint16_t numTaps,
   3544 					  float32_t * pCoeffs,
   3545 					  float32_t * pState,
   3546 					  uint32_t blockSize);
   3547 
   3548   /**
   3549    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
   3550    */
   3551 
   3552   typedef struct
   3553   {
   3554     uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
   3555     q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
   3556     q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
   3557     uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
   3558 
   3559   } arm_biquad_cas_df1_32x64_ins_q31;
   3560 
   3561 
   3562   /**
   3563    * @param[in]  *S        points to an instance of the high precision Q31 Biquad cascade filter structure.
   3564    * @param[in]  *pSrc     points to the block of input data.
   3565    * @param[out] *pDst     points to the block of output data
   3566    * @param[in]  blockSize number of samples to process.
   3567    * @return none.
   3568    */
   3569 
   3570   void arm_biquad_cas_df1_32x64_q31(
   3571 				    const arm_biquad_cas_df1_32x64_ins_q31 * S,
   3572 				     q31_t * pSrc,
   3573 				    q31_t * pDst,
   3574 				    uint32_t blockSize);
   3575 
   3576 
   3577   /**
   3578    * @param[in,out] *S           points to an instance of the high precision Q31 Biquad cascade filter structure.
   3579    * @param[in]     numStages    number of 2nd order stages in the filter.
   3580    * @param[in]     *pCoeffs     points to the filter coefficients.
   3581    * @param[in]     *pState      points to the state buffer.
   3582    * @param[in]     postShift    shift to be applied to the output. Varies according to the coefficients format
   3583    * @return        none
   3584    */
   3585 
   3586   void arm_biquad_cas_df1_32x64_init_q31(
   3587 					 arm_biquad_cas_df1_32x64_ins_q31 * S,
   3588 					 uint8_t numStages,
   3589 					 q31_t * pCoeffs,
   3590 					 q63_t * pState,
   3591 					 uint8_t postShift);
   3592 
   3593 
   3594 
   3595   /**
   3596    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
   3597    */
   3598 
   3599   typedef struct
   3600   {
   3601     uint8_t   numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
   3602     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
   3603     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
   3604   } arm_biquad_cascade_df2T_instance_f32;
   3605 
   3606 
   3607   /**
   3608    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
   3609    * @param[in]  *S        points to an instance of the filter data structure.
   3610    * @param[in]  *pSrc     points to the block of input data.
   3611    * @param[out] *pDst     points to the block of output data
   3612    * @param[in]  blockSize number of samples to process.
   3613    * @return none.
   3614    */
   3615 
   3616   void arm_biquad_cascade_df2T_f32(
   3617 				   const arm_biquad_cascade_df2T_instance_f32 * S,
   3618 				    float32_t * pSrc,
   3619 				   float32_t * pDst,
   3620 				   uint32_t blockSize);
   3621 
   3622 
   3623   /**
   3624    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
   3625    * @param[in,out] *S           points to an instance of the filter data structure.
   3626    * @param[in]     numStages    number of 2nd order stages in the filter.
   3627    * @param[in]     *pCoeffs     points to the filter coefficients.
   3628    * @param[in]     *pState      points to the state buffer.
   3629    * @return        none
   3630    */
   3631 
   3632   void arm_biquad_cascade_df2T_init_f32(
   3633 					arm_biquad_cascade_df2T_instance_f32 * S,
   3634 					uint8_t numStages,
   3635 					float32_t * pCoeffs,
   3636 					float32_t * pState);
   3637 
   3638 
   3639 
   3640   /**
   3641    * @brief Instance structure for the Q15 FIR lattice filter.
   3642    */
   3643 
   3644   typedef struct
   3645   {
   3646     uint16_t numStages;                          /**< number of filter stages. */
   3647     q15_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
   3648     q15_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
   3649   } arm_fir_lattice_instance_q15;
   3650 
   3651   /**
   3652    * @brief Instance structure for the Q31 FIR lattice filter.
   3653    */
   3654 
   3655   typedef struct
   3656   {
   3657     uint16_t numStages;                          /**< number of filter stages. */
   3658     q31_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
   3659     q31_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
   3660   } arm_fir_lattice_instance_q31;
   3661 
   3662   /**
   3663    * @brief Instance structure for the floating-point FIR lattice filter.
   3664    */
   3665 
   3666   typedef struct
   3667   {
   3668     uint16_t numStages;                  /**< number of filter stages. */
   3669     float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
   3670     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
   3671   } arm_fir_lattice_instance_f32;
   3672 
   3673   /**
   3674    * @brief Initialization function for the Q15 FIR lattice filter.
   3675    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
   3676    * @param[in] numStages  number of filter stages.
   3677    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
   3678    * @param[in] *pState points to the state buffer.  The array is of length numStages.
   3679    * @return none.
   3680    */
   3681 
   3682   void arm_fir_lattice_init_q15(
   3683 				arm_fir_lattice_instance_q15 * S,
   3684 				uint16_t numStages,
   3685 				q15_t * pCoeffs,
   3686 				q15_t * pState);
   3687 
   3688 
   3689   /**
   3690    * @brief Processing function for the Q15 FIR lattice filter.
   3691    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
   3692    * @param[in] *pSrc points to the block of input data.
   3693    * @param[out] *pDst points to the block of output data.
   3694    * @param[in] blockSize number of samples to process.
   3695    * @return none.
   3696    */
   3697   void arm_fir_lattice_q15(
   3698 			   const arm_fir_lattice_instance_q15 * S,
   3699 			    q15_t * pSrc,
   3700 			   q15_t * pDst,
   3701 			   uint32_t blockSize);
   3702 
   3703   /**
   3704    * @brief Initialization function for the Q31 FIR lattice filter.
   3705    * @param[in] *S points to an instance of the Q31 FIR lattice structure.
   3706    * @param[in] numStages  number of filter stages.
   3707    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
   3708    * @param[in] *pState points to the state buffer.   The array is of length numStages.
   3709    * @return none.
   3710    */
   3711 
   3712   void arm_fir_lattice_init_q31(
   3713 				arm_fir_lattice_instance_q31 * S,
   3714 				uint16_t numStages,
   3715 				q31_t * pCoeffs,
   3716 				q31_t * pState);
   3717 
   3718 
   3719   /**
   3720    * @brief Processing function for the Q31 FIR lattice filter.
   3721    * @param[in]  *S        points to an instance of the Q31 FIR lattice structure.
   3722    * @param[in]  *pSrc     points to the block of input data.
   3723    * @param[out] *pDst     points to the block of output data
   3724    * @param[in]  blockSize number of samples to process.
   3725    * @return none.
   3726    */
   3727 
   3728   void arm_fir_lattice_q31(
   3729 			   const arm_fir_lattice_instance_q31 * S,
   3730 			    q31_t * pSrc,
   3731 			   q31_t * pDst,
   3732 			   uint32_t blockSize);
   3733 
   3734 /**
   3735  * @brief Initialization function for the floating-point FIR lattice filter.
   3736  * @param[in] *S points to an instance of the floating-point FIR lattice structure.
   3737  * @param[in] numStages  number of filter stages.
   3738  * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
   3739  * @param[in] *pState points to the state buffer.  The array is of length numStages.
   3740  * @return none.
   3741  */
   3742 
   3743   void arm_fir_lattice_init_f32(
   3744 				arm_fir_lattice_instance_f32 * S,
   3745 				uint16_t numStages,
   3746 				float32_t * pCoeffs,
   3747 				float32_t * pState);
   3748 
   3749   /**
   3750    * @brief Processing function for the floating-point FIR lattice filter.
   3751    * @param[in]  *S        points to an instance of the floating-point FIR lattice structure.
   3752    * @param[in]  *pSrc     points to the block of input data.
   3753    * @param[out] *pDst     points to the block of output data
   3754    * @param[in]  blockSize number of samples to process.
   3755    * @return none.
   3756    */
   3757 
   3758   void arm_fir_lattice_f32(
   3759 			   const arm_fir_lattice_instance_f32 * S,
   3760 			    float32_t * pSrc,
   3761 			   float32_t * pDst,
   3762 			   uint32_t blockSize);
   3763 
   3764   /**
   3765    * @brief Instance structure for the Q15 IIR lattice filter.
   3766    */
   3767   typedef struct
   3768   {
   3769     uint16_t numStages;                         /**< number of stages in the filter. */
   3770     q15_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
   3771     q15_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
   3772     q15_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
   3773   } arm_iir_lattice_instance_q15;
   3774 
   3775   /**
   3776    * @brief Instance structure for the Q31 IIR lattice filter.
   3777    */
   3778   typedef struct
   3779   {
   3780     uint16_t numStages;                         /**< number of stages in the filter. */
   3781     q31_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
   3782     q31_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
   3783     q31_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
   3784   } arm_iir_lattice_instance_q31;
   3785 
   3786   /**
   3787    * @brief Instance structure for the floating-point IIR lattice filter.
   3788    */
   3789   typedef struct
   3790   {
   3791     uint16_t numStages;                         /**< number of stages in the filter. */
   3792     float32_t *pState;                          /**< points to the state variable array. The array is of length numStages+blockSize. */
   3793     float32_t *pkCoeffs;                        /**< points to the reflection coefficient array. The array is of length numStages. */
   3794     float32_t *pvCoeffs;                        /**< points to the ladder coefficient array. The array is of length numStages+1. */
   3795   } arm_iir_lattice_instance_f32;
   3796 
   3797   /**
   3798    * @brief Processing function for the floating-point IIR lattice filter.
   3799    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
   3800    * @param[in] *pSrc points to the block of input data.
   3801    * @param[out] *pDst points to the block of output data.
   3802    * @param[in] blockSize number of samples to process.
   3803    * @return none.
   3804    */
   3805 
   3806   void arm_iir_lattice_f32(
   3807 			   const arm_iir_lattice_instance_f32 * S,
   3808 			    float32_t * pSrc,
   3809 			   float32_t * pDst,
   3810 			   uint32_t blockSize);
   3811 
   3812   /**
   3813    * @brief Initialization function for the floating-point IIR lattice filter.
   3814    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
   3815    * @param[in] numStages number of stages in the filter.
   3816    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
   3817    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
   3818    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize-1.
   3819    * @param[in] blockSize number of samples to process.
   3820    * @return none.
   3821    */
   3822 
   3823   void arm_iir_lattice_init_f32(
   3824 				arm_iir_lattice_instance_f32 * S,
   3825 				uint16_t numStages,
   3826 				float32_t *pkCoeffs,
   3827 				float32_t *pvCoeffs,
   3828 				float32_t *pState,
   3829 				uint32_t blockSize);
   3830 
   3831 
   3832   /**
   3833    * @brief Processing function for the Q31 IIR lattice filter.
   3834    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
   3835    * @param[in] *pSrc points to the block of input data.
   3836    * @param[out] *pDst points to the block of output data.
   3837    * @param[in] blockSize number of samples to process.
   3838    * @return none.
   3839    */
   3840 
   3841   void arm_iir_lattice_q31(
   3842 			   const arm_iir_lattice_instance_q31 * S,
   3843 			    q31_t * pSrc,
   3844 			   q31_t * pDst,
   3845 			   uint32_t blockSize);
   3846 
   3847 
   3848   /**
   3849    * @brief Initialization function for the Q31 IIR lattice filter.
   3850    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
   3851    * @param[in] numStages number of stages in the filter.
   3852    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
   3853    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
   3854    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize.
   3855    * @param[in] blockSize number of samples to process.
   3856    * @return none.
   3857    */
   3858 
   3859   void arm_iir_lattice_init_q31(
   3860 				arm_iir_lattice_instance_q31 * S,
   3861 				uint16_t numStages,
   3862 				q31_t *pkCoeffs,
   3863 				q31_t *pvCoeffs,
   3864 				q31_t *pState,
   3865 				uint32_t blockSize);
   3866 
   3867 
   3868   /**
   3869    * @brief Processing function for the Q15 IIR lattice filter.
   3870    * @param[in] *S points to an instance of the Q15 IIR lattice structure.
   3871    * @param[in] *pSrc points to the block of input data.
   3872    * @param[out] *pDst points to the block of output data.
   3873    * @param[in] blockSize number of samples to process.
   3874    * @return none.
   3875    */
   3876 
   3877   void arm_iir_lattice_q15(
   3878 			   const arm_iir_lattice_instance_q15 * S,
   3879 			    q15_t * pSrc,
   3880 			   q15_t * pDst,
   3881 			   uint32_t blockSize);
   3882 
   3883 
   3884 /**
   3885  * @brief Initialization function for the Q15 IIR lattice filter.
   3886  * @param[in] *S points to an instance of the fixed-point Q15 IIR lattice structure.
   3887  * @param[in] numStages  number of stages in the filter.
   3888  * @param[in] *pkCoeffs points to reflection coefficient buffer.  The array is of length numStages.
   3889  * @param[in] *pvCoeffs points to ladder coefficient buffer.  The array is of length numStages+1.
   3890  * @param[in] *pState points to state buffer.  The array is of length numStages+blockSize.
   3891  * @param[in] blockSize number of samples to process per call.
   3892  * @return none.
   3893  */
   3894 
   3895   void arm_iir_lattice_init_q15(
   3896 				arm_iir_lattice_instance_q15 * S,
   3897 				uint16_t numStages,
   3898 				q15_t *pkCoeffs,
   3899 				q15_t *pvCoeffs,
   3900 				q15_t *pState,
   3901 				uint32_t blockSize);
   3902 
   3903   /**
   3904    * @brief Instance structure for the floating-point LMS filter.
   3905    */
   3906 
   3907   typedef struct
   3908   {
   3909     uint16_t numTaps;    /**< number of coefficients in the filter. */
   3910     float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   3911     float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
   3912     float32_t mu;        /**< step size that controls filter coefficient updates. */
   3913   } arm_lms_instance_f32;
   3914 
   3915   /**
   3916    * @brief Processing function for floating-point LMS filter.
   3917    * @param[in]  *S points to an instance of the floating-point LMS filter structure.
   3918    * @param[in]  *pSrc points to the block of input data.
   3919    * @param[in]  *pRef points to the block of reference data.
   3920    * @param[out] *pOut points to the block of output data.
   3921    * @param[out] *pErr points to the block of error data.
   3922    * @param[in]  blockSize number of samples to process.
   3923    * @return     none.
   3924    */
   3925 
   3926   void arm_lms_f32(
   3927 		   const arm_lms_instance_f32 * S,
   3928 		    float32_t * pSrc,
   3929 		    float32_t * pRef,
   3930 		   float32_t * pOut,
   3931 		   float32_t * pErr,
   3932 		   uint32_t blockSize);
   3933 
   3934   /**
   3935    * @brief Initialization function for floating-point LMS filter.
   3936    * @param[in] *S points to an instance of the floating-point LMS filter structure.
   3937    * @param[in] numTaps  number of filter coefficients.
   3938    * @param[in] *pCoeffs points to the coefficient buffer.
   3939    * @param[in] *pState points to state buffer.
   3940    * @param[in] mu step size that controls filter coefficient updates.
   3941    * @param[in] blockSize number of samples to process.
   3942    * @return none.
   3943    */
   3944 
   3945   void arm_lms_init_f32(
   3946 			arm_lms_instance_f32 * S,
   3947 			uint16_t numTaps,
   3948 			float32_t * pCoeffs,
   3949 			float32_t * pState,
   3950 			float32_t mu,
   3951 			uint32_t blockSize);
   3952 
   3953   /**
   3954    * @brief Instance structure for the Q15 LMS filter.
   3955    */
   3956 
   3957   typedef struct
   3958   {
   3959     uint16_t numTaps;    /**< number of coefficients in the filter. */
   3960     q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   3961     q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
   3962     q15_t mu;            /**< step size that controls filter coefficient updates. */
   3963     uint32_t postShift;  /**< bit shift applied to coefficients. */
   3964   } arm_lms_instance_q15;
   3965 
   3966 
   3967   /**
   3968    * @brief Initialization function for the Q15 LMS filter.
   3969    * @param[in] *S points to an instance of the Q15 LMS filter structure.
   3970    * @param[in] numTaps  number of filter coefficients.
   3971    * @param[in] *pCoeffs points to the coefficient buffer.
   3972    * @param[in] *pState points to the state buffer.
   3973    * @param[in] mu step size that controls filter coefficient updates.
   3974    * @param[in] blockSize number of samples to process.
   3975    * @param[in] postShift bit shift applied to coefficients.
   3976    * @return    none.
   3977    */
   3978 
   3979   void arm_lms_init_q15(
   3980 			arm_lms_instance_q15 * S,
   3981 			uint16_t numTaps,
   3982 			q15_t * pCoeffs,
   3983 			q15_t * pState,
   3984 			q15_t mu,
   3985 			uint32_t blockSize,
   3986 			uint32_t postShift);
   3987 
   3988   /**
   3989    * @brief Processing function for Q15 LMS filter.
   3990    * @param[in] *S points to an instance of the Q15 LMS filter structure.
   3991    * @param[in] *pSrc points to the block of input data.
   3992    * @param[in] *pRef points to the block of reference data.
   3993    * @param[out] *pOut points to the block of output data.
   3994    * @param[out] *pErr points to the block of error data.
   3995    * @param[in] blockSize number of samples to process.
   3996    * @return none.
   3997    */
   3998 
   3999   void arm_lms_q15(
   4000 		   const arm_lms_instance_q15 * S,
   4001 		    q15_t * pSrc,
   4002 		    q15_t * pRef,
   4003 		   q15_t * pOut,
   4004 		   q15_t * pErr,
   4005 		   uint32_t blockSize);
   4006 
   4007 
   4008   /**
   4009    * @brief Instance structure for the Q31 LMS filter.
   4010    */
   4011 
   4012   typedef struct
   4013   {
   4014     uint16_t numTaps;    /**< number of coefficients in the filter. */
   4015     q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   4016     q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
   4017     q31_t mu;            /**< step size that controls filter coefficient updates. */
   4018     uint32_t postShift;  /**< bit shift applied to coefficients. */
   4019 
   4020   } arm_lms_instance_q31;
   4021 
   4022   /**
   4023    * @brief Processing function for Q31 LMS filter.
   4024    * @param[in]  *S points to an instance of the Q15 LMS filter structure.
   4025    * @param[in]  *pSrc points to the block of input data.
   4026    * @param[in]  *pRef points to the block of reference data.
   4027    * @param[out] *pOut points to the block of output data.
   4028    * @param[out] *pErr points to the block of error data.
   4029    * @param[in]  blockSize number of samples to process.
   4030    * @return     none.
   4031    */
   4032 
   4033   void arm_lms_q31(
   4034 		   const arm_lms_instance_q31 * S,
   4035 		    q31_t * pSrc,
   4036 		    q31_t * pRef,
   4037 		   q31_t * pOut,
   4038 		   q31_t * pErr,
   4039 		   uint32_t blockSize);
   4040 
   4041   /**
   4042    * @brief Initialization function for Q31 LMS filter.
   4043    * @param[in] *S points to an instance of the Q31 LMS filter structure.
   4044    * @param[in] numTaps  number of filter coefficients.
   4045    * @param[in] *pCoeffs points to coefficient buffer.
   4046    * @param[in] *pState points to state buffer.
   4047    * @param[in] mu step size that controls filter coefficient updates.
   4048    * @param[in] blockSize number of samples to process.
   4049    * @param[in] postShift bit shift applied to coefficients.
   4050    * @return none.
   4051    */
   4052 
   4053   void arm_lms_init_q31(
   4054 			arm_lms_instance_q31 * S,
   4055 			uint16_t numTaps,
   4056 			q31_t *pCoeffs,
   4057 			q31_t *pState,
   4058 			q31_t mu,
   4059 			uint32_t blockSize,
   4060 			uint32_t postShift);
   4061 
   4062   /**
   4063    * @brief Instance structure for the floating-point normalized LMS filter.
   4064    */
   4065 
   4066   typedef struct
   4067   {
   4068     uint16_t  numTaps;    /**< number of coefficients in the filter. */
   4069     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   4070     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
   4071     float32_t mu;        /**< step size that control filter coefficient updates. */
   4072     float32_t energy;    /**< saves previous frame energy. */
   4073     float32_t x0;        /**< saves previous input sample. */
   4074   } arm_lms_norm_instance_f32;
   4075 
   4076   /**
   4077    * @brief Processing function for floating-point normalized LMS filter.
   4078    * @param[in] *S points to an instance of the floating-point normalized LMS filter structure.
   4079    * @param[in] *pSrc points to the block of input data.
   4080    * @param[in] *pRef points to the block of reference data.
   4081    * @param[out] *pOut points to the block of output data.
   4082    * @param[out] *pErr points to the block of error data.
   4083    * @param[in] blockSize number of samples to process.
   4084    * @return none.
   4085    */
   4086 
   4087   void arm_lms_norm_f32(
   4088 			arm_lms_norm_instance_f32 * S,
   4089 			 float32_t * pSrc,
   4090 			 float32_t * pRef,
   4091 			float32_t * pOut,
   4092 			float32_t * pErr,
   4093 			uint32_t blockSize);
   4094 
   4095   /**
   4096    * @brief Initialization function for floating-point normalized LMS filter.
   4097    * @param[in] *S points to an instance of the floating-point LMS filter structure.
   4098    * @param[in] numTaps  number of filter coefficients.
   4099    * @param[in] *pCoeffs points to coefficient buffer.
   4100    * @param[in] *pState points to state buffer.
   4101    * @param[in] mu step size that controls filter coefficient updates.
   4102    * @param[in] blockSize number of samples to process.
   4103    * @return none.
   4104    */
   4105 
   4106   void arm_lms_norm_init_f32(
   4107 			     arm_lms_norm_instance_f32 * S,
   4108 			     uint16_t numTaps,
   4109 			     float32_t * pCoeffs,
   4110 			     float32_t * pState,
   4111 			     float32_t mu,
   4112 			     uint32_t blockSize);
   4113 
   4114 
   4115   /**
   4116    * @brief Instance structure for the Q31 normalized LMS filter.
   4117    */
   4118   typedef struct
   4119   {
   4120     uint16_t numTaps;     /**< number of coefficients in the filter. */
   4121     q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   4122     q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
   4123     q31_t mu;             /**< step size that controls filter coefficient updates. */
   4124     uint8_t postShift;    /**< bit shift applied to coefficients. */
   4125     q31_t *recipTable;    /**< points to the reciprocal initial value table. */
   4126     q31_t energy;         /**< saves previous frame energy. */
   4127     q31_t x0;             /**< saves previous input sample. */
   4128   } arm_lms_norm_instance_q31;
   4129 
   4130   /**
   4131    * @brief Processing function for Q31 normalized LMS filter.
   4132    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
   4133    * @param[in] *pSrc points to the block of input data.
   4134    * @param[in] *pRef points to the block of reference data.
   4135    * @param[out] *pOut points to the block of output data.
   4136    * @param[out] *pErr points to the block of error data.
   4137    * @param[in] blockSize number of samples to process.
   4138    * @return none.
   4139    */
   4140 
   4141   void arm_lms_norm_q31(
   4142 			arm_lms_norm_instance_q31 * S,
   4143 			 q31_t * pSrc,
   4144 			 q31_t * pRef,
   4145 			q31_t * pOut,
   4146 			q31_t * pErr,
   4147 			uint32_t blockSize);
   4148 
   4149   /**
   4150    * @brief Initialization function for Q31 normalized LMS filter.
   4151    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
   4152    * @param[in] numTaps  number of filter coefficients.
   4153    * @param[in] *pCoeffs points to coefficient buffer.
   4154    * @param[in] *pState points to state buffer.
   4155    * @param[in] mu step size that controls filter coefficient updates.
   4156    * @param[in] blockSize number of samples to process.
   4157    * @param[in] postShift bit shift applied to coefficients.
   4158    * @return none.
   4159    */
   4160 
   4161   void arm_lms_norm_init_q31(
   4162 			     arm_lms_norm_instance_q31 * S,
   4163 			     uint16_t numTaps,
   4164 			     q31_t * pCoeffs,
   4165 			     q31_t * pState,
   4166 			     q31_t mu,
   4167 			     uint32_t blockSize,
   4168 			     uint8_t postShift);
   4169 
   4170   /**
   4171    * @brief Instance structure for the Q15 normalized LMS filter.
   4172    */
   4173 
   4174   typedef struct
   4175   {
   4176     uint16_t numTaps;    /**< Number of coefficients in the filter. */
   4177     q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   4178     q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
   4179     q15_t mu;            /**< step size that controls filter coefficient updates. */
   4180     uint8_t postShift;   /**< bit shift applied to coefficients. */
   4181     q15_t *recipTable;   /**< Points to the reciprocal initial value table. */
   4182     q15_t energy;        /**< saves previous frame energy. */
   4183     q15_t x0;            /**< saves previous input sample. */
   4184   } arm_lms_norm_instance_q15;
   4185 
   4186   /**
   4187    * @brief Processing function for Q15 normalized LMS filter.
   4188    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
   4189    * @param[in] *pSrc points to the block of input data.
   4190    * @param[in] *pRef points to the block of reference data.
   4191    * @param[out] *pOut points to the block of output data.
   4192    * @param[out] *pErr points to the block of error data.
   4193    * @param[in] blockSize number of samples to process.
   4194    * @return none.
   4195    */
   4196 
   4197   void arm_lms_norm_q15(
   4198 			arm_lms_norm_instance_q15 * S,
   4199 			 q15_t * pSrc,
   4200 			 q15_t * pRef,
   4201 			q15_t * pOut,
   4202 			q15_t * pErr,
   4203 			uint32_t blockSize);
   4204 
   4205 
   4206   /**
   4207    * @brief Initialization function for Q15 normalized LMS filter.
   4208    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
   4209    * @param[in] numTaps  number of filter coefficients.
   4210    * @param[in] *pCoeffs points to coefficient buffer.
   4211    * @param[in] *pState points to state buffer.
   4212    * @param[in] mu step size that controls filter coefficient updates.
   4213    * @param[in] blockSize number of samples to process.
   4214    * @param[in] postShift bit shift applied to coefficients.
   4215    * @return none.
   4216    */
   4217 
   4218   void arm_lms_norm_init_q15(
   4219 			     arm_lms_norm_instance_q15 * S,
   4220 			     uint16_t numTaps,
   4221 			     q15_t * pCoeffs,
   4222 			     q15_t * pState,
   4223 			     q15_t mu,
   4224 			     uint32_t blockSize,
   4225 			     uint8_t postShift);
   4226 
   4227   /**
   4228    * @brief Correlation of floating-point sequences.
   4229    * @param[in] *pSrcA points to the first input sequence.
   4230    * @param[in] srcALen length of the first input sequence.
   4231    * @param[in] *pSrcB points to the second input sequence.
   4232    * @param[in] srcBLen length of the second input sequence.
   4233    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
   4234    * @return none.
   4235    */
   4236 
   4237   void arm_correlate_f32(
   4238 			  float32_t * pSrcA,
   4239 			 uint32_t srcALen,
   4240 			  float32_t * pSrcB,
   4241 			 uint32_t srcBLen,
   4242 			 float32_t * pDst);
   4243 
   4244   /**
   4245    * @brief Correlation of Q15 sequences.
   4246    * @param[in] *pSrcA points to the first input sequence.
   4247    * @param[in] srcALen length of the first input sequence.
   4248    * @param[in] *pSrcB points to the second input sequence.
   4249    * @param[in] srcBLen length of the second input sequence.
   4250    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
   4251    * @return none.
   4252    */
   4253 
   4254   void arm_correlate_q15(
   4255 			  q15_t * pSrcA,
   4256 			 uint32_t srcALen,
   4257 			  q15_t * pSrcB,
   4258 			 uint32_t srcBLen,
   4259 			 q15_t * pDst);
   4260 
   4261   /**
   4262    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
   4263    * @param[in] *pSrcA points to the first input sequence.
   4264    * @param[in] srcALen length of the first input sequence.
   4265    * @param[in] *pSrcB points to the second input sequence.
   4266    * @param[in] srcBLen length of the second input sequence.
   4267    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
   4268    * @return none.
   4269    */
   4270 
   4271   void arm_correlate_fast_q15(
   4272 			       q15_t * pSrcA,
   4273 			      uint32_t srcALen,
   4274 			       q15_t * pSrcB,
   4275 			      uint32_t srcBLen,
   4276 			      q15_t * pDst);
   4277 
   4278   /**
   4279    * @brief Correlation of Q31 sequences.
   4280    * @param[in] *pSrcA points to the first input sequence.
   4281    * @param[in] srcALen length of the first input sequence.
   4282    * @param[in] *pSrcB points to the second input sequence.
   4283    * @param[in] srcBLen length of the second input sequence.
   4284    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
   4285    * @return none.
   4286    */
   4287 
   4288   void arm_correlate_q31(
   4289 			  q31_t * pSrcA,
   4290 			 uint32_t srcALen,
   4291 			  q31_t * pSrcB,
   4292 			 uint32_t srcBLen,
   4293 			 q31_t * pDst);
   4294 
   4295   /**
   4296    * @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
   4297    * @param[in] *pSrcA points to the first input sequence.
   4298    * @param[in] srcALen length of the first input sequence.
   4299    * @param[in] *pSrcB points to the second input sequence.
   4300    * @param[in] srcBLen length of the second input sequence.
   4301    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
   4302    * @return none.
   4303    */
   4304 
   4305   void arm_correlate_fast_q31(
   4306 			       q31_t * pSrcA,
   4307 			      uint32_t srcALen,
   4308 			       q31_t * pSrcB,
   4309 			      uint32_t srcBLen,
   4310 			      q31_t * pDst);
   4311 
   4312   /**
   4313    * @brief Correlation of Q7 sequences.
   4314    * @param[in] *pSrcA points to the first input sequence.
   4315    * @param[in] srcALen length of the first input sequence.
   4316    * @param[in] *pSrcB points to the second input sequence.
   4317    * @param[in] srcBLen length of the second input sequence.
   4318    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
   4319    * @return none.
   4320    */
   4321 
   4322   void arm_correlate_q7(
   4323 			 q7_t * pSrcA,
   4324 			uint32_t srcALen,
   4325 			 q7_t * pSrcB,
   4326 			uint32_t srcBLen,
   4327 			q7_t * pDst);
   4328 
   4329   /**
   4330    * @brief Instance structure for the floating-point sparse FIR filter.
   4331    */
   4332   typedef struct
   4333   {
   4334     uint16_t numTaps;             /**< number of coefficients in the filter. */
   4335     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
   4336     float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
   4337     float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
   4338     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
   4339     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
   4340   } arm_fir_sparse_instance_f32;
   4341 
   4342   /**
   4343    * @brief Instance structure for the Q31 sparse FIR filter.
   4344    */
   4345 
   4346   typedef struct
   4347   {
   4348     uint16_t numTaps;             /**< number of coefficients in the filter. */
   4349     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
   4350     q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
   4351     q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
   4352     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
   4353     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
   4354   } arm_fir_sparse_instance_q31;
   4355 
   4356   /**
   4357    * @brief Instance structure for the Q15 sparse FIR filter.
   4358    */
   4359 
   4360   typedef struct
   4361   {
   4362     uint16_t numTaps;             /**< number of coefficients in the filter. */
   4363     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
   4364     q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
   4365     q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
   4366     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
   4367     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
   4368   } arm_fir_sparse_instance_q15;
   4369 
   4370   /**
   4371    * @brief Instance structure for the Q7 sparse FIR filter.
   4372    */
   4373 
   4374   typedef struct
   4375   {
   4376     uint16_t numTaps;             /**< number of coefficients in the filter. */
   4377     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
   4378     q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
   4379     q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
   4380     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
   4381     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
   4382   } arm_fir_sparse_instance_q7;
   4383 
   4384   /**
   4385    * @brief Processing function for the floating-point sparse FIR filter.
   4386    * @param[in]  *S          points to an instance of the floating-point sparse FIR structure.
   4387    * @param[in]  *pSrc       points to the block of input data.
   4388    * @param[out] *pDst       points to the block of output data
   4389    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
   4390    * @param[in]  blockSize   number of input samples to process per call.
   4391    * @return none.
   4392    */
   4393 
   4394   void arm_fir_sparse_f32(
   4395 			  arm_fir_sparse_instance_f32 * S,
   4396 			   float32_t * pSrc,
   4397 			  float32_t * pDst,
   4398 			  float32_t * pScratchIn,
   4399 			  uint32_t blockSize);
   4400 
   4401   /**
   4402    * @brief  Initialization function for the floating-point sparse FIR filter.
   4403    * @param[in,out] *S         points to an instance of the floating-point sparse FIR structure.
   4404    * @param[in]     numTaps    number of nonzero coefficients in the filter.
   4405    * @param[in]     *pCoeffs   points to the array of filter coefficients.
   4406    * @param[in]     *pState    points to the state buffer.
   4407    * @param[in]     *pTapDelay points to the array of offset times.
   4408    * @param[in]     maxDelay   maximum offset time supported.
   4409    * @param[in]     blockSize  number of samples that will be processed per block.
   4410    * @return none
   4411    */
   4412 
   4413   void arm_fir_sparse_init_f32(
   4414 			       arm_fir_sparse_instance_f32 * S,
   4415 			       uint16_t numTaps,
   4416 			       float32_t * pCoeffs,
   4417 			       float32_t * pState,
   4418 			       int32_t * pTapDelay,
   4419 			       uint16_t maxDelay,
   4420 			       uint32_t blockSize);
   4421 
   4422   /**
   4423    * @brief Processing function for the Q31 sparse FIR filter.
   4424    * @param[in]  *S          points to an instance of the Q31 sparse FIR structure.
   4425    * @param[in]  *pSrc       points to the block of input data.
   4426    * @param[out] *pDst       points to the block of output data
   4427    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
   4428    * @param[in]  blockSize   number of input samples to process per call.
   4429    * @return none.
   4430    */
   4431 
   4432   void arm_fir_sparse_q31(
   4433 			  arm_fir_sparse_instance_q31 * S,
   4434 			   q31_t * pSrc,
   4435 			  q31_t * pDst,
   4436 			  q31_t * pScratchIn,
   4437 			  uint32_t blockSize);
   4438 
   4439   /**
   4440    * @brief  Initialization function for the Q31 sparse FIR filter.
   4441    * @param[in,out] *S         points to an instance of the Q31 sparse FIR structure.
   4442    * @param[in]     numTaps    number of nonzero coefficients in the filter.
   4443    * @param[in]     *pCoeffs   points to the array of filter coefficients.
   4444    * @param[in]     *pState    points to the state buffer.
   4445    * @param[in]     *pTapDelay points to the array of offset times.
   4446    * @param[in]     maxDelay   maximum offset time supported.
   4447    * @param[in]     blockSize  number of samples that will be processed per block.
   4448    * @return none
   4449    */
   4450 
   4451   void arm_fir_sparse_init_q31(
   4452 			       arm_fir_sparse_instance_q31 * S,
   4453 			       uint16_t numTaps,
   4454 			       q31_t * pCoeffs,
   4455 			       q31_t * pState,
   4456 			       int32_t * pTapDelay,
   4457 			       uint16_t maxDelay,
   4458 			       uint32_t blockSize);
   4459 
   4460   /**
   4461    * @brief Processing function for the Q15 sparse FIR filter.
   4462    * @param[in]  *S           points to an instance of the Q15 sparse FIR structure.
   4463    * @param[in]  *pSrc        points to the block of input data.
   4464    * @param[out] *pDst        points to the block of output data
   4465    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
   4466    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
   4467    * @param[in]  blockSize    number of input samples to process per call.
   4468    * @return none.
   4469    */
   4470 
   4471   void arm_fir_sparse_q15(
   4472 			  arm_fir_sparse_instance_q15 * S,
   4473 			   q15_t * pSrc,
   4474 			  q15_t * pDst,
   4475 			  q15_t * pScratchIn,
   4476 			  q31_t * pScratchOut,
   4477 			  uint32_t blockSize);
   4478 
   4479 
   4480   /**
   4481    * @brief  Initialization function for the Q15 sparse FIR filter.
   4482    * @param[in,out] *S         points to an instance of the Q15 sparse FIR structure.
   4483    * @param[in]     numTaps    number of nonzero coefficients in the filter.
   4484    * @param[in]     *pCoeffs   points to the array of filter coefficients.
   4485    * @param[in]     *pState    points to the state buffer.
   4486    * @param[in]     *pTapDelay points to the array of offset times.
   4487    * @param[in]     maxDelay   maximum offset time supported.
   4488    * @param[in]     blockSize  number of samples that will be processed per block.
   4489    * @return none
   4490    */
   4491 
   4492   void arm_fir_sparse_init_q15(
   4493 			       arm_fir_sparse_instance_q15 * S,
   4494 			       uint16_t numTaps,
   4495 			       q15_t * pCoeffs,
   4496 			       q15_t * pState,
   4497 			       int32_t * pTapDelay,
   4498 			       uint16_t maxDelay,
   4499 			       uint32_t blockSize);
   4500 
   4501   /**
   4502    * @brief Processing function for the Q7 sparse FIR filter.
   4503    * @param[in]  *S           points to an instance of the Q7 sparse FIR structure.
   4504    * @param[in]  *pSrc        points to the block of input data.
   4505    * @param[out] *pDst        points to the block of output data
   4506    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
   4507    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
   4508    * @param[in]  blockSize    number of input samples to process per call.
   4509    * @return none.
   4510    */
   4511 
   4512   void arm_fir_sparse_q7(
   4513 			 arm_fir_sparse_instance_q7 * S,
   4514 			  q7_t * pSrc,
   4515 			 q7_t * pDst,
   4516 			 q7_t * pScratchIn,
   4517 			 q31_t * pScratchOut,
   4518 			 uint32_t blockSize);
   4519 
   4520   /**
   4521    * @brief  Initialization function for the Q7 sparse FIR filter.
   4522    * @param[in,out] *S         points to an instance of the Q7 sparse FIR structure.
   4523    * @param[in]     numTaps    number of nonzero coefficients in the filter.
   4524    * @param[in]     *pCoeffs   points to the array of filter coefficients.
   4525    * @param[in]     *pState    points to the state buffer.
   4526    * @param[in]     *pTapDelay points to the array of offset times.
   4527    * @param[in]     maxDelay   maximum offset time supported.
   4528    * @param[in]     blockSize  number of samples that will be processed per block.
   4529    * @return none
   4530    */
   4531 
   4532   void arm_fir_sparse_init_q7(
   4533 			      arm_fir_sparse_instance_q7 * S,
   4534 			      uint16_t numTaps,
   4535 			      q7_t * pCoeffs,
   4536 			      q7_t * pState,
   4537 			      int32_t *pTapDelay,
   4538 			      uint16_t maxDelay,
   4539 			      uint32_t blockSize);
   4540 
   4541 
   4542   /*
   4543    * @brief  Floating-point sin_cos function.
   4544    * @param[in]  theta    input value in degrees
   4545    * @param[out] *pSinVal points to the processed sine output.
   4546    * @param[out] *pCosVal points to the processed cos output.
   4547    * @return none.
   4548    */
   4549 
   4550   void arm_sin_cos_f32(
   4551 		       float32_t theta,
   4552 		       float32_t *pSinVal,
   4553 		       float32_t *pCcosVal);
   4554 
   4555   /*
   4556    * @brief  Q31 sin_cos function.
   4557    * @param[in]  theta    scaled input value in degrees
   4558    * @param[out] *pSinVal points to the processed sine output.
   4559    * @param[out] *pCosVal points to the processed cosine output.
   4560    * @return none.
   4561    */
   4562 
   4563   void arm_sin_cos_q31(
   4564 		       q31_t theta,
   4565 		       q31_t *pSinVal,
   4566 		       q31_t *pCosVal);
   4567 
   4568 
   4569   /**
   4570    * @brief  Floating-point complex conjugate.
   4571    * @param[in]  *pSrc points to the input vector
   4572    * @param[out]  *pDst points to the output vector
   4573    * @param[in]  numSamples number of complex samples in each vector
   4574    * @return none.
   4575    */
   4576 
   4577   void arm_cmplx_conj_f32(
   4578 			   float32_t * pSrc,
   4579 			  float32_t * pDst,
   4580 			  uint32_t numSamples);
   4581 
   4582   /**
   4583    * @brief  Q31 complex conjugate.
   4584    * @param[in]  *pSrc points to the input vector
   4585    * @param[out]  *pDst points to the output vector
   4586    * @param[in]  numSamples number of complex samples in each vector
   4587    * @return none.
   4588    */
   4589 
   4590   void arm_cmplx_conj_q31(
   4591 			   q31_t * pSrc,
   4592 			  q31_t * pDst,
   4593 			  uint32_t numSamples);
   4594 
   4595   /**
   4596    * @brief  Q15 complex conjugate.
   4597    * @param[in]  *pSrc points to the input vector
   4598    * @param[out]  *pDst points to the output vector
   4599    * @param[in]  numSamples number of complex samples in each vector
   4600    * @return none.
   4601    */
   4602 
   4603   void arm_cmplx_conj_q15(
   4604 			   q15_t * pSrc,
   4605 			  q15_t * pDst,
   4606 			  uint32_t numSamples);
   4607 
   4608 
   4609 
   4610   /**
   4611    * @brief  Floating-point complex magnitude squared
   4612    * @param[in]  *pSrc points to the complex input vector
   4613    * @param[out]  *pDst points to the real output vector
   4614    * @param[in]  numSamples number of complex samples in the input vector
   4615    * @return none.
   4616    */
   4617 
   4618   void arm_cmplx_mag_squared_f32(
   4619 				  float32_t * pSrc,
   4620 				 float32_t * pDst,
   4621 				 uint32_t numSamples);
   4622 
   4623   /**
   4624    * @brief  Q31 complex magnitude squared
   4625    * @param[in]  *pSrc points to the complex input vector
   4626    * @param[out]  *pDst points to the real output vector
   4627    * @param[in]  numSamples number of complex samples in the input vector
   4628    * @return none.
   4629    */
   4630 
   4631   void arm_cmplx_mag_squared_q31(
   4632 				  q31_t * pSrc,
   4633 				 q31_t * pDst,
   4634 				 uint32_t numSamples);
   4635 
   4636   /**
   4637    * @brief  Q15 complex magnitude squared
   4638    * @param[in]  *pSrc points to the complex input vector
   4639    * @param[out]  *pDst points to the real output vector
   4640    * @param[in]  numSamples number of complex samples in the input vector
   4641    * @return none.
   4642    */
   4643 
   4644   void arm_cmplx_mag_squared_q15(
   4645 				  q15_t * pSrc,
   4646 				 q15_t * pDst,
   4647 				 uint32_t numSamples);
   4648 
   4649 
   4650  /**
   4651    * @ingroup groupController
   4652    */
   4653 
   4654   /**
   4655    * @defgroup PID PID Motor Control
   4656    *
   4657    * A Proportional Integral Derivative (PID) controller is a generic feedback control
   4658    * loop mechanism widely used in industrial control systems.
   4659    * A PID controller is the most commonly used type of feedback controller.
   4660    *
   4661    * This set of functions implements (PID) controllers
   4662    * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
   4663    * of data and each call to the function returns a single processed value.
   4664    * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
   4665    * is the input sample value. The functions return the output value.
   4666    *
   4667    * \par Algorithm:
   4668    * <pre>
   4669    *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
   4670    *    A0 = Kp + Ki + Kd
   4671    *    A1 = (-Kp ) - (2 * Kd )
   4672    *    A2 = Kd  </pre>
   4673    *
   4674    * \par
   4675    * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
   4676    *
   4677    * \par
   4678    * \image html PID.gif "Proportional Integral Derivative Controller"
   4679    *
   4680    * \par
   4681    * The PID controller calculates an "error" value as the difference between
   4682    * the measured output and the reference input.
   4683    * The controller attempts to minimize the error by adjusting the process control inputs.
   4684    * The proportional value determines the reaction to the current error,
   4685    * the integral value determines the reaction based on the sum of recent errors,
   4686    * and the derivative value determines the reaction based on the rate at which the error has been changing.
   4687    *
   4688    * \par Instance Structure
   4689    * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
   4690    * A separate instance structure must be defined for each PID Controller.
   4691    * There are separate instance structure declarations for each of the 3 supported data types.
   4692    *
   4693    * \par Reset Functions
   4694    * There is also an associated reset function for each data type which clears the state array.
   4695    *
   4696    * \par Initialization Functions
   4697    * There is also an associated initialization function for each data type.
   4698    * The initialization function performs the following operations:
   4699    * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
   4700    * - Zeros out the values in the state buffer.
   4701    *
   4702    * \par
   4703    * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
   4704    *
   4705    * \par Fixed-Point Behavior
   4706    * Care must be taken when using the fixed-point versions of the PID Controller functions.
   4707    * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
   4708    * Refer to the function specific documentation below for usage guidelines.
   4709    */
   4710 
   4711   /**
   4712    * @addtogroup PID
   4713    * @{
   4714    */
   4715 
   4716   /**
   4717    * @brief  Process function for the floating-point PID Control.
   4718    * @param[in,out] *S is an instance of the floating-point PID Control structure
   4719    * @param[in] in input sample to process
   4720    * @return out processed output sample.
   4721    */
   4722 
   4723 
   4724   static __INLINE float32_t arm_pid_f32(
   4725 					arm_pid_instance_f32 * S,
   4726 					float32_t in)
   4727   {
   4728     float32_t out;
   4729 
   4730     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
   4731     out = (S->A0 * in) +
   4732       (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
   4733 
   4734     /* Update state */
   4735     S->state[1] = S->state[0];
   4736     S->state[0] = in;
   4737     S->state[2] = out;
   4738 
   4739     /* return to application */
   4740     return (out);
   4741 
   4742   }
   4743 
   4744   /**
   4745    * @brief  Process function for the Q31 PID Control.
   4746    * @param[in,out] *S points to an instance of the Q31 PID Control structure
   4747    * @param[in] in input sample to process
   4748    * @return out processed output sample.
   4749    *
   4750    * <b>Scaling and Overflow Behavior:</b>
   4751    * \par
   4752    * The function is implemented using an internal 64-bit accumulator.
   4753    * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
   4754    * Thus, if the accumulator result overflows it wraps around rather than clip.
   4755    * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
   4756    * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
   4757    */
   4758 
   4759   static __INLINE q31_t arm_pid_q31(
   4760 				    arm_pid_instance_q31 * S,
   4761 				    q31_t in)
   4762   {
   4763     q63_t acc;
   4764 	q31_t out;
   4765 
   4766     /* acc = A0 * x[n]  */
   4767     acc = (q63_t) S->A0 * in;
   4768 
   4769     /* acc += A1 * x[n-1] */
   4770     acc += (q63_t) S->A1 * S->state[0];
   4771 
   4772     /* acc += A2 * x[n-2]  */
   4773     acc += (q63_t) S->A2 * S->state[1];
   4774 
   4775     /* convert output to 1.31 format to add y[n-1] */
   4776     out = (q31_t) (acc >> 31u);
   4777 
   4778     /* out += y[n-1] */
   4779     out += S->state[2];
   4780 
   4781     /* Update state */
   4782     S->state[1] = S->state[0];
   4783     S->state[0] = in;
   4784     S->state[2] = out;
   4785 
   4786     /* return to application */
   4787     return (out);
   4788 
   4789   }
   4790 
   4791   /**
   4792    * @brief  Process function for the Q15 PID Control.
   4793    * @param[in,out] *S points to an instance of the Q15 PID Control structure
   4794    * @param[in] in input sample to process
   4795    * @return out processed output sample.
   4796    *
   4797    * <b>Scaling and Overflow Behavior:</b>
   4798    * \par
   4799    * The function is implemented using a 64-bit internal accumulator.
   4800    * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
   4801    * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
   4802    * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
   4803    * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
   4804    * Lastly, the accumulator is saturated to yield a result in 1.15 format.
   4805    */
   4806 
   4807   static __INLINE q15_t arm_pid_q15(
   4808 				    arm_pid_instance_q15 * S,
   4809 				    q15_t in)
   4810   {
   4811     q63_t acc;
   4812     q15_t out;
   4813 
   4814     /* Implementation of PID controller */
   4815 
   4816 	#ifdef ARM_MATH_CM0
   4817 
   4818  	/* acc = A0 * x[n]  */
   4819 	acc = ((q31_t) S->A0 )* in ;
   4820 
   4821     #else
   4822 
   4823     /* acc = A0 * x[n]  */
   4824     acc = (q31_t) __SMUAD(S->A0, in);
   4825 
   4826 	#endif
   4827 
   4828 	#ifdef ARM_MATH_CM0
   4829 
   4830 	/* acc += A1 * x[n-1] + A2 * x[n-2]  */
   4831 	acc += (q31_t) S->A1  *  S->state[0] ;
   4832 	acc += (q31_t) S->A2  *  S->state[1] ;
   4833 
   4834 	#else
   4835 
   4836     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
   4837     acc = __SMLALD(S->A1, (q31_t)__SIMD32(S->state), acc);
   4838 
   4839 	#endif
   4840 
   4841     /* acc += y[n-1] */
   4842     acc += (q31_t) S->state[2] << 15;
   4843 
   4844     /* saturate the output */
   4845     out = (q15_t) (__SSAT((acc >> 15), 16));
   4846 
   4847     /* Update state */
   4848     S->state[1] = S->state[0];
   4849     S->state[0] = in;
   4850     S->state[2] = out;
   4851 
   4852     /* return to application */
   4853     return (out);
   4854 
   4855   }
   4856 
   4857   /**
   4858    * @} end of PID group
   4859    */
   4860 
   4861 
   4862   /**
   4863    * @brief Floating-point matrix inverse.
   4864    * @param[in]  *src points to the instance of the input floating-point matrix structure.
   4865    * @param[out] *dst points to the instance of the output floating-point matrix structure.
   4866    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
   4867    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
   4868    */
   4869 
   4870   arm_status arm_mat_inverse_f32(
   4871 				 const arm_matrix_instance_f32 * src,
   4872 				 arm_matrix_instance_f32 * dst);
   4873 
   4874 
   4875 
   4876   /**
   4877    * @ingroup groupController
   4878    */
   4879 
   4880 
   4881   /**
   4882    * @defgroup clarke Vector Clarke Transform
   4883    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
   4884    * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
   4885    * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
   4886    * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
   4887    * \image html clarke.gif Stator current space vector and its components in (a,b).
   4888    * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
   4889    * can be calculated using only <code>Ia</code> and <code>Ib</code>.
   4890    *
   4891    * The function operates on a single sample of data and each call to the function returns the processed output.
   4892    * The library provides separate functions for Q31 and floating-point data types.
   4893    * \par Algorithm
   4894    * \image html clarkeFormula.gif
   4895    * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
   4896    * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
   4897    * \par Fixed-Point Behavior
   4898    * Care must be taken when using the Q31 version of the Clarke transform.
   4899    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
   4900    * Refer to the function specific documentation below for usage guidelines.
   4901    */
   4902 
   4903   /**
   4904    * @addtogroup clarke
   4905    * @{
   4906    */
   4907 
   4908   /**
   4909    *
   4910    * @brief  Floating-point Clarke transform
   4911    * @param[in]       Ia       input three-phase coordinate <code>a</code>
   4912    * @param[in]       Ib       input three-phase coordinate <code>b</code>
   4913    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
   4914    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
   4915    * @return none.
   4916    */
   4917 
   4918   static __INLINE void arm_clarke_f32(
   4919 				      float32_t Ia,
   4920 				      float32_t Ib,
   4921 				      float32_t * pIalpha,
   4922 				      float32_t * pIbeta)
   4923   {
   4924     /* Calculate pIalpha using the equation, pIalpha = Ia */
   4925     *pIalpha = Ia;
   4926 
   4927     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
   4928     *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
   4929 
   4930   }
   4931 
   4932   /**
   4933    * @brief  Clarke transform for Q31 version
   4934    * @param[in]       Ia       input three-phase coordinate <code>a</code>
   4935    * @param[in]       Ib       input three-phase coordinate <code>b</code>
   4936    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
   4937    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
   4938    * @return none.
   4939    *
   4940    * <b>Scaling and Overflow Behavior:</b>
   4941    * \par
   4942    * The function is implemented using an internal 32-bit accumulator.
   4943    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
   4944    * There is saturation on the addition, hence there is no risk of overflow.
   4945    */
   4946 
   4947   static __INLINE void arm_clarke_q31(
   4948 				      q31_t Ia,
   4949 				      q31_t Ib,
   4950 				      q31_t * pIalpha,
   4951 				      q31_t * pIbeta)
   4952   {
   4953     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
   4954 
   4955     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
   4956     *pIalpha = Ia;
   4957 
   4958     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
   4959     product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
   4960 
   4961     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
   4962     product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
   4963 
   4964     /* pIbeta is calculated by adding the intermediate products */
   4965     *pIbeta = __QADD(product1, product2);
   4966   }
   4967 
   4968   /**
   4969    * @} end of clarke group
   4970    */
   4971 
   4972   /**
   4973    * @brief  Converts the elements of the Q7 vector to Q31 vector.
   4974    * @param[in]  *pSrc     input pointer
   4975    * @param[out]  *pDst    output pointer
   4976    * @param[in]  blockSize number of samples to process
   4977    * @return none.
   4978    */
   4979   void arm_q7_to_q31(
   4980 		     q7_t * pSrc,
   4981 		     q31_t * pDst,
   4982 		     uint32_t blockSize);
   4983 
   4984 
   4985 
   4986 
   4987   /**
   4988    * @ingroup groupController
   4989    */
   4990 
   4991   /**
   4992    * @defgroup inv_clarke Vector Inverse Clarke Transform
   4993    * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
   4994    *
   4995    * The function operates on a single sample of data and each call to the function returns the processed output.
   4996    * The library provides separate functions for Q31 and floating-point data types.
   4997    * \par Algorithm
   4998    * \image html clarkeInvFormula.gif
   4999    * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
   5000    * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
   5001    * \par Fixed-Point Behavior
   5002    * Care must be taken when using the Q31 version of the Clarke transform.
   5003    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
   5004    * Refer to the function specific documentation below for usage guidelines.
   5005    */
   5006 
   5007   /**
   5008    * @addtogroup inv_clarke
   5009    * @{
   5010    */
   5011 
   5012    /**
   5013    * @brief  Floating-point Inverse Clarke transform
   5014    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
   5015    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
   5016    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
   5017    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
   5018    * @return none.
   5019    */
   5020 
   5021 
   5022   static __INLINE void arm_inv_clarke_f32(
   5023 					  float32_t Ialpha,
   5024 					  float32_t Ibeta,
   5025 					  float32_t * pIa,
   5026 					  float32_t * pIb)
   5027   {
   5028     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
   5029     *pIa = Ialpha;
   5030 
   5031     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
   5032     *pIb = -0.5 * Ialpha + (float32_t) 0.8660254039 *Ibeta;
   5033 
   5034   }
   5035 
   5036   /**
   5037    * @brief  Inverse Clarke transform for Q31 version
   5038    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
   5039    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
   5040    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
   5041    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
   5042    * @return none.
   5043    *
   5044    * <b>Scaling and Overflow Behavior:</b>
   5045    * \par
   5046    * The function is implemented using an internal 32-bit accumulator.
   5047    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
   5048    * There is saturation on the subtraction, hence there is no risk of overflow.
   5049    */
   5050 
   5051   static __INLINE void arm_inv_clarke_q31(
   5052 					  q31_t Ialpha,
   5053 					  q31_t Ibeta,
   5054 					  q31_t * pIa,
   5055 					  q31_t * pIb)
   5056   {
   5057     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
   5058 
   5059     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
   5060     *pIa = Ialpha;
   5061 
   5062     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
   5063     product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
   5064 
   5065     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
   5066     product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
   5067 
   5068     /* pIb is calculated by subtracting the products */
   5069     *pIb = __QSUB(product2, product1);
   5070 
   5071   }
   5072 
   5073   /**
   5074    * @} end of inv_clarke group
   5075    */
   5076 
   5077   /**
   5078    * @brief  Converts the elements of the Q7 vector to Q15 vector.
   5079    * @param[in]  *pSrc     input pointer
   5080    * @param[out] *pDst     output pointer
   5081    * @param[in]  blockSize number of samples to process
   5082    * @return none.
   5083    */
   5084   void arm_q7_to_q15(
   5085 		      q7_t * pSrc,
   5086 		     q15_t * pDst,
   5087 		     uint32_t blockSize);
   5088 
   5089 
   5090 
   5091   /**
   5092    * @ingroup groupController
   5093    */
   5094 
   5095   /**
   5096    * @defgroup park Vector Park Transform
   5097    *
   5098    * Forward Park transform converts the input two-coordinate vector to flux and torque components.
   5099    * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
   5100    * from the stationary to the moving reference frame and control the spatial relationship between
   5101    * the stator vector current and rotor flux vector.
   5102    * If we consider the d axis aligned with the rotor flux, the diagram below shows the
   5103    * current vector and the relationship from the two reference frames:
   5104    * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
   5105    *
   5106    * The function operates on a single sample of data and each call to the function returns the processed output.
   5107    * The library provides separate functions for Q31 and floating-point data types.
   5108    * \par Algorithm
   5109    * \image html parkFormula.gif
   5110    * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
   5111    * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
   5112    * cosine and sine values of theta (rotor flux position).
   5113    * \par Fixed-Point Behavior
   5114    * Care must be taken when using the Q31 version of the Park transform.
   5115    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
   5116    * Refer to the function specific documentation below for usage guidelines.
   5117    */
   5118 
   5119   /**
   5120    * @addtogroup park
   5121    * @{
   5122    */
   5123 
   5124   /**
   5125    * @brief Floating-point Park transform
   5126    * @param[in]       Ialpha input two-phase vector coordinate alpha
   5127    * @param[in]       Ibeta  input two-phase vector coordinate beta
   5128    * @param[out]      *pId   points to output	rotor reference frame d
   5129    * @param[out]      *pIq   points to output	rotor reference frame q
   5130    * @param[in]       sinVal sine value of rotation angle theta
   5131    * @param[in]       cosVal cosine value of rotation angle theta
   5132    * @return none.
   5133    *
   5134    * The function implements the forward Park transform.
   5135    *
   5136    */
   5137 
   5138   static __INLINE void arm_park_f32(
   5139 				    float32_t Ialpha,
   5140 				    float32_t Ibeta,
   5141 				    float32_t * pId,
   5142 				    float32_t * pIq,
   5143 				    float32_t sinVal,
   5144 				    float32_t cosVal)
   5145   {
   5146     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
   5147     *pId = Ialpha * cosVal + Ibeta * sinVal;
   5148 
   5149     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
   5150     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
   5151 
   5152   }
   5153 
   5154   /**
   5155    * @brief  Park transform for Q31 version
   5156    * @param[in]       Ialpha input two-phase vector coordinate alpha
   5157    * @param[in]       Ibeta  input two-phase vector coordinate beta
   5158    * @param[out]      *pId   points to output rotor reference frame d
   5159    * @param[out]      *pIq   points to output rotor reference frame q
   5160    * @param[in]       sinVal sine value of rotation angle theta
   5161    * @param[in]       cosVal cosine value of rotation angle theta
   5162    * @return none.
   5163    *
   5164    * <b>Scaling and Overflow Behavior:</b>
   5165    * \par
   5166    * The function is implemented using an internal 32-bit accumulator.
   5167    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
   5168    * There is saturation on the addition and subtraction, hence there is no risk of overflow.
   5169    */
   5170 
   5171 
   5172   static __INLINE void arm_park_q31(
   5173 				    q31_t Ialpha,
   5174 				    q31_t Ibeta,
   5175 				    q31_t * pId,
   5176 				    q31_t * pIq,
   5177 				    q31_t sinVal,
   5178 				    q31_t cosVal)
   5179   {
   5180     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
   5181     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
   5182 
   5183     /* Intermediate product is calculated by (Ialpha * cosVal) */
   5184     product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
   5185 
   5186     /* Intermediate product is calculated by (Ibeta * sinVal) */
   5187     product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
   5188 
   5189 
   5190     /* Intermediate product is calculated by (Ialpha * sinVal) */
   5191     product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
   5192 
   5193     /* Intermediate product is calculated by (Ibeta * cosVal) */
   5194     product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
   5195 
   5196     /* Calculate pId by adding the two intermediate products 1 and 2 */
   5197     *pId = __QADD(product1, product2);
   5198 
   5199     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
   5200     *pIq = __QSUB(product4, product3);
   5201   }
   5202 
   5203   /**
   5204    * @} end of park group
   5205    */
   5206 
   5207   /**
   5208    * @brief  Converts the elements of the Q7 vector to floating-point vector.
   5209    * @param[in]  *pSrc is input pointer
   5210    * @param[out]  *pDst is output pointer
   5211    * @param[in]  blockSize is the number of samples to process
   5212    * @return none.
   5213    */
   5214   void arm_q7_to_float(
   5215 		        q7_t * pSrc,
   5216 		       float32_t * pDst,
   5217 		       uint32_t blockSize);
   5218 
   5219 
   5220   /**
   5221    * @ingroup groupController
   5222    */
   5223 
   5224   /**
   5225    * @defgroup inv_park Vector Inverse Park transform
   5226    * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
   5227    *
   5228    * The function operates on a single sample of data and each call to the function returns the processed output.
   5229    * The library provides separate functions for Q31 and floating-point data types.
   5230    * \par Algorithm
   5231    * \image html parkInvFormula.gif
   5232    * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
   5233    * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
   5234    * cosine and sine values of theta (rotor flux position).
   5235    * \par Fixed-Point Behavior
   5236    * Care must be taken when using the Q31 version of the Park transform.
   5237    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
   5238    * Refer to the function specific documentation below for usage guidelines.
   5239    */
   5240 
   5241   /**
   5242    * @addtogroup inv_park
   5243    * @{
   5244    */
   5245 
   5246    /**
   5247    * @brief  Floating-point Inverse Park transform
   5248    * @param[in]       Id        input coordinate of rotor reference frame d
   5249    * @param[in]       Iq        input coordinate of rotor reference frame q
   5250    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
   5251    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
   5252    * @param[in]       sinVal    sine value of rotation angle theta
   5253    * @param[in]       cosVal    cosine value of rotation angle theta
   5254    * @return none.
   5255    */
   5256 
   5257   static __INLINE void arm_inv_park_f32(
   5258 					float32_t Id,
   5259 					float32_t Iq,
   5260 					float32_t * pIalpha,
   5261 					float32_t * pIbeta,
   5262 					float32_t sinVal,
   5263 					float32_t cosVal)
   5264   {
   5265     /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
   5266     *pIalpha = Id * cosVal - Iq * sinVal;
   5267 
   5268     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
   5269     *pIbeta = Id * sinVal + Iq * cosVal;
   5270 
   5271   }
   5272 
   5273 
   5274   /**
   5275    * @brief  Inverse Park transform for	Q31 version
   5276    * @param[in]       Id        input coordinate of rotor reference frame d
   5277    * @param[in]       Iq        input coordinate of rotor reference frame q
   5278    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
   5279    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
   5280    * @param[in]       sinVal    sine value of rotation angle theta
   5281    * @param[in]       cosVal    cosine value of rotation angle theta
   5282    * @return none.
   5283    *
   5284    * <b>Scaling and Overflow Behavior:</b>
   5285    * \par
   5286    * The function is implemented using an internal 32-bit accumulator.
   5287    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
   5288    * There is saturation on the addition, hence there is no risk of overflow.
   5289    */
   5290 
   5291 
   5292   static __INLINE void arm_inv_park_q31(
   5293 					q31_t Id,
   5294 					q31_t Iq,
   5295 					q31_t * pIalpha,
   5296 					q31_t * pIbeta,
   5297 					q31_t sinVal,
   5298 					q31_t cosVal)
   5299   {
   5300     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
   5301     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
   5302 
   5303     /* Intermediate product is calculated by (Id * cosVal) */
   5304     product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
   5305 
   5306     /* Intermediate product is calculated by (Iq * sinVal) */
   5307     product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
   5308 
   5309 
   5310     /* Intermediate product is calculated by (Id * sinVal) */
   5311     product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
   5312 
   5313     /* Intermediate product is calculated by (Iq * cosVal) */
   5314     product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
   5315 
   5316     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
   5317     *pIalpha = __QSUB(product1, product2);
   5318 
   5319     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
   5320     *pIbeta = __QADD(product4, product3);
   5321 
   5322   }
   5323 
   5324   /**
   5325    * @} end of Inverse park group
   5326    */
   5327 
   5328 
   5329   /**
   5330    * @brief  Converts the elements of the Q31 vector to floating-point vector.
   5331    * @param[in]  *pSrc is input pointer
   5332    * @param[out]  *pDst is output pointer
   5333    * @param[in]  blockSize is the number of samples to process
   5334    * @return none.
   5335    */
   5336   void arm_q31_to_float(
   5337 			 q31_t * pSrc,
   5338 			float32_t * pDst,
   5339 			uint32_t blockSize);
   5340 
   5341   /**
   5342    * @ingroup groupInterpolation
   5343    */
   5344 
   5345   /**
   5346    * @defgroup LinearInterpolate Linear Interpolation
   5347    *
   5348    * Linear interpolation is a method of curve fitting using linear polynomials.
   5349    * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
   5350    *
   5351    * \par
   5352    * \image html LinearInterp.gif "Linear interpolation"
   5353    *
   5354    * \par
   5355    * A  Linear Interpolate function calculates an output value(y), for the input(x)
   5356    * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
   5357    *
   5358    * \par Algorithm:
   5359    * <pre>
   5360    *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
   5361    *       where x0, x1 are nearest values of input x
   5362    *             y0, y1 are nearest values to output y
   5363    * </pre>
   5364    *
   5365    * \par
   5366    * This set of functions implements Linear interpolation process
   5367    * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
   5368    * sample of data and each call to the function returns a single processed value.
   5369    * <code>S</code> points to an instance of the Linear Interpolate function data structure.
   5370    * <code>x</code> is the input sample value. The functions returns the output value.
   5371    *
   5372    * \par
   5373    * if x is outside of the table boundary, Linear interpolation returns first value of the table
   5374    * if x is below input range and returns last value of table if x is above range.
   5375    */
   5376 
   5377   /**
   5378    * @addtogroup LinearInterpolate
   5379    * @{
   5380    */
   5381 
   5382   /**
   5383    * @brief  Process function for the floating-point Linear Interpolation Function.
   5384    * @param[in,out] *S is an instance of the floating-point Linear Interpolation structure
   5385    * @param[in] x input sample to process
   5386    * @return y processed output sample.
   5387    *
   5388    */
   5389 
   5390   static __INLINE float32_t arm_linear_interp_f32(
   5391 						  arm_linear_interp_instance_f32 * S,
   5392 						  float32_t x)
   5393   {
   5394 
   5395 	  float32_t y;
   5396 	  float32_t x0, x1;						/* Nearest input values */
   5397 	  float32_t y0, y1;	  					/* Nearest output values */
   5398 	  float32_t xSpacing = S->xSpacing;		/* spacing between input values */
   5399 	  int32_t i;  							/* Index variable */
   5400 	  float32_t *pYData = S->pYData;	    /* pointer to output table */
   5401 
   5402 	  /* Calculation of index */
   5403 	  i =   (x - S->x1) / xSpacing;
   5404 
   5405 	  if(i < 0)
   5406 	  {
   5407 	     /* Iniatilize output for below specified range as least output value of table */
   5408 		 y = pYData[0];
   5409 	  }
   5410 	  else if(i >= S->nValues)
   5411 	  {
   5412 	  	  /* Iniatilize output for above specified range as last output value of table */
   5413 	  	  y = pYData[S->nValues-1];
   5414 	  }
   5415 	  else
   5416 	  {
   5417 	  	  /* Calculation of nearest input values */
   5418 		  x0 = S->x1 + i * xSpacing;
   5419 		  x1 = S->x1 + (i +1) * xSpacing;
   5420 
   5421 		 /* Read of nearest output values */
   5422 		  y0 = pYData[i];
   5423 		  y1 = pYData[i + 1];
   5424 
   5425 		  /* Calculation of output */
   5426 		  y = y0 + (x - x0) * ((y1 - y0)/(x1-x0));
   5427 
   5428 	  }
   5429 
   5430       /* returns output value */
   5431 	  return (y);
   5432   }
   5433 
   5434    /**
   5435    *
   5436    * @brief  Process function for the Q31 Linear Interpolation Function.
   5437    * @param[in] *pYData  pointer to Q31 Linear Interpolation table
   5438    * @param[in] x input sample to process
   5439    * @param[in] nValues number of table values
   5440    * @return y processed output sample.
   5441    *
   5442    * \par
   5443    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
   5444    * This function can support maximum of table size 2^12.
   5445    *
   5446    */
   5447 
   5448 
   5449   static __INLINE q31_t arm_linear_interp_q31(q31_t *pYData,
   5450 					      q31_t x, uint32_t nValues)
   5451   {
   5452     q31_t y;                                   /* output */
   5453     q31_t y0, y1;                                /* Nearest output values */
   5454     q31_t fract;                                 /* fractional part */
   5455     int32_t index;                              /* Index to read nearest output values */
   5456 
   5457     /* Input is in 12.20 format */
   5458     /* 12 bits for the table index */
   5459     /* Index value calculation */
   5460     index = ((x & 0xFFF00000) >> 20);
   5461 
   5462 	if(index >= (nValues - 1))
   5463 	{
   5464 		return(pYData[nValues - 1]);
   5465 	}
   5466 	else if(index < 0)
   5467 	{
   5468 		return(pYData[0]);
   5469 	}
   5470 	else
   5471 	{
   5472 
   5473 	    /* 20 bits for the fractional part */
   5474 	    /* shift left by 11 to keep fract in 1.31 format */
   5475 	    fract = (x & 0x000FFFFF) << 11;
   5476 
   5477 	    /* Read two nearest output values from the index in 1.31(q31) format */
   5478 	    y0 = pYData[index];
   5479 	    y1 = pYData[index + 1u];
   5480 
   5481 	    /* Calculation of y0 * (1-fract) and y is in 2.30 format */
   5482 	    y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
   5483 
   5484 	    /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
   5485 	    y += ((q31_t) (((q63_t) y1 * fract) >> 32));
   5486 
   5487 	    /* Convert y to 1.31 format */
   5488 	    return (y << 1u);
   5489 
   5490 	}
   5491 
   5492   }
   5493 
   5494   /**
   5495    *
   5496    * @brief  Process function for the Q15 Linear Interpolation Function.
   5497    * @param[in] *pYData  pointer to Q15 Linear Interpolation table
   5498    * @param[in] x input sample to process
   5499    * @param[in] nValues number of table values
   5500    * @return y processed output sample.
   5501    *
   5502    * \par
   5503    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
   5504    * This function can support maximum of table size 2^12.
   5505    *
   5506    */
   5507 
   5508 
   5509   static __INLINE q15_t arm_linear_interp_q15(q15_t *pYData, q31_t x, uint32_t nValues)
   5510   {
   5511     q63_t y;                                   /* output */
   5512     q15_t y0, y1;                              /* Nearest output values */
   5513     q31_t fract;                               /* fractional part */
   5514     int32_t index;                            /* Index to read nearest output values */
   5515 
   5516     /* Input is in 12.20 format */
   5517     /* 12 bits for the table index */
   5518     /* Index value calculation */
   5519     index = ((x & 0xFFF00000) >> 20u);
   5520 
   5521 	if(index >= (nValues - 1))
   5522 	{
   5523 		return(pYData[nValues - 1]);
   5524 	}
   5525 	else if(index < 0)
   5526 	{
   5527 		return(pYData[0]);
   5528 	}
   5529 	else
   5530 	{
   5531 	    /* 20 bits for the fractional part */
   5532 	    /* fract is in 12.20 format */
   5533 	    fract = (x & 0x000FFFFF);
   5534 
   5535 	    /* Read two nearest output values from the index */
   5536 	    y0 = pYData[index];
   5537 	    y1 = pYData[index + 1u];
   5538 
   5539 	    /* Calculation of y0 * (1-fract) and y is in 13.35 format */
   5540 	    y = ((q63_t) y0 * (0xFFFFF - fract));
   5541 
   5542 	    /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
   5543 	    y += ((q63_t) y1 * (fract));
   5544 
   5545 	    /* convert y to 1.15 format */
   5546 	    return (y >> 20);
   5547 	}
   5548 
   5549 
   5550   }
   5551 
   5552   /**
   5553    *
   5554    * @brief  Process function for the Q7 Linear Interpolation Function.
   5555    * @param[in] *pYData  pointer to Q7 Linear Interpolation table
   5556    * @param[in] x input sample to process
   5557    * @param[in] nValues number of table values
   5558    * @return y processed output sample.
   5559    *
   5560    * \par
   5561    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
   5562    * This function can support maximum of table size 2^12.
   5563    */
   5564 
   5565 
   5566   static __INLINE q7_t arm_linear_interp_q7(q7_t *pYData, q31_t x,  uint32_t nValues)
   5567   {
   5568     q31_t y;                                   /* output */
   5569     q7_t y0, y1;                                 /* Nearest output values */
   5570     q31_t fract;                                 /* fractional part */
   5571     int32_t index;                              /* Index to read nearest output values */
   5572 
   5573     /* Input is in 12.20 format */
   5574     /* 12 bits for the table index */
   5575     /* Index value calculation */
   5576     index = ((x & 0xFFF00000) >> 20u);
   5577 
   5578 
   5579     if(index >= (nValues - 1))
   5580 	{
   5581 		return(pYData[nValues - 1]);
   5582 	}
   5583 	else if(index < 0)
   5584 	{
   5585 		return(pYData[0]);
   5586 	}
   5587 	else
   5588 	{
   5589 
   5590 	    /* 20 bits for the fractional part */
   5591 	    /* fract is in 12.20 format */
   5592 	    fract = (x & 0x000FFFFF);
   5593 
   5594 	    /* Read two nearest output values from the index and are in 1.7(q7) format */
   5595 	    y0 = pYData[index];
   5596 	    y1 = pYData[index + 1u];
   5597 
   5598 	    /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
   5599 	    y = ((y0 * (0xFFFFF - fract)));
   5600 
   5601 	    /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
   5602 	    y += (y1 * fract);
   5603 
   5604 	    /* convert y to 1.7(q7) format */
   5605 	    return (y >> 20u);
   5606 
   5607 	}
   5608 
   5609   }
   5610   /**
   5611    * @} end of LinearInterpolate group
   5612    */
   5613 
   5614   /**
   5615    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
   5616    * @param[in] x input value in radians.
   5617    * @return  sin(x).
   5618    */
   5619 
   5620   float32_t arm_sin_f32(
   5621 			 float32_t x);
   5622 
   5623   /**
   5624    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
   5625    * @param[in] x Scaled input value in radians.
   5626    * @return  sin(x).
   5627    */
   5628 
   5629   q31_t arm_sin_q31(
   5630 		     q31_t x);
   5631 
   5632   /**
   5633    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
   5634    * @param[in] x Scaled input value in radians.
   5635    * @return  sin(x).
   5636    */
   5637 
   5638   q15_t arm_sin_q15(
   5639 		     q15_t x);
   5640 
   5641   /**
   5642    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
   5643    * @param[in] x input value in radians.
   5644    * @return  cos(x).
   5645    */
   5646 
   5647   float32_t arm_cos_f32(
   5648 			 float32_t x);
   5649 
   5650   /**
   5651    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
   5652    * @param[in] x Scaled input value in radians.
   5653    * @return  cos(x).
   5654    */
   5655 
   5656   q31_t arm_cos_q31(
   5657 		     q31_t x);
   5658 
   5659   /**
   5660    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
   5661    * @param[in] x Scaled input value in radians.
   5662    * @return  cos(x).
   5663    */
   5664 
   5665   q15_t arm_cos_q15(
   5666 		     q15_t x);
   5667 
   5668 
   5669   /**
   5670    * @ingroup groupFastMath
   5671    */
   5672 
   5673 
   5674   /**
   5675    * @defgroup SQRT Square Root
   5676    *
   5677    * Computes the square root of a number.
   5678    * There are separate functions for Q15, Q31, and floating-point data types.
   5679    * The square root function is computed using the Newton-Raphson algorithm.
   5680    * This is an iterative algorithm of the form:
   5681    * <pre>
   5682    *      x1 = x0 - f(x0)/f'(x0)
   5683    * </pre>
   5684    * where <code>x1</code> is the current estimate,
   5685    * <code>x0</code> is the previous estimate and
   5686    * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
   5687    * For the square root function, the algorithm reduces to:
   5688    * <pre>
   5689    *     x0 = in/2                         [initial guess]
   5690    *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
   5691    * </pre>
   5692    */
   5693 
   5694 
   5695   /**
   5696    * @addtogroup SQRT
   5697    * @{
   5698    */
   5699 
   5700   /**
   5701    * @brief  Floating-point square root function.
   5702    * @param[in]  in     input value.
   5703    * @param[out] *pOut  square root of input value.
   5704    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
   5705    * <code>in</code> is negative value and returns zero output for negative values.
   5706    */
   5707 
   5708   static __INLINE arm_status  arm_sqrt_f32(
   5709 					  float32_t in, float32_t *pOut)
   5710   {
   5711   	if(in > 0)
   5712 	{
   5713 
   5714 //	#if __FPU_USED
   5715     #if (__FPU_USED == 1) && defined ( __CC_ARM   )
   5716 		*pOut = __sqrtf(in);
   5717 	#else
   5718 		*pOut = sqrtf(in);
   5719 	#endif
   5720 
   5721 		return (ARM_MATH_SUCCESS);
   5722 	}
   5723   	else
   5724 	{
   5725 		*pOut = 0.0f;
   5726 		return (ARM_MATH_ARGUMENT_ERROR);
   5727 	}
   5728 
   5729   }
   5730 
   5731 
   5732   /**
   5733    * @brief Q31 square root function.
   5734    * @param[in]   in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
   5735    * @param[out]  *pOut square root of input value.
   5736    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
   5737    * <code>in</code> is negative value and returns zero output for negative values.
   5738    */
   5739   arm_status arm_sqrt_q31(
   5740 		      q31_t in, q31_t *pOut);
   5741 
   5742   /**
   5743    * @brief  Q15 square root function.
   5744    * @param[in]   in     input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
   5745    * @param[out]  *pOut  square root of input value.
   5746    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
   5747    * <code>in</code> is negative value and returns zero output for negative values.
   5748    */
   5749   arm_status arm_sqrt_q15(
   5750 		      q15_t in, q15_t *pOut);
   5751 
   5752   /**
   5753    * @} end of SQRT group
   5754    */
   5755 
   5756 
   5757 
   5758 
   5759 
   5760 
   5761   /**
   5762    * @brief floating-point Circular write function.
   5763    */
   5764 
   5765   static __INLINE void arm_circularWrite_f32(
   5766 					     int32_t * circBuffer,
   5767 					     int32_t L,
   5768 					     uint16_t * writeOffset,
   5769 					     int32_t bufferInc,
   5770 					     const int32_t * src,
   5771 					     int32_t srcInc,
   5772 					     uint32_t blockSize)
   5773   {
   5774     uint32_t i = 0u;
   5775     int32_t wOffset;
   5776 
   5777     /* Copy the value of Index pointer that points
   5778      * to the current location where the input samples to be copied */
   5779     wOffset = *writeOffset;
   5780 
   5781     /* Loop over the blockSize */
   5782     i = blockSize;
   5783 
   5784     while(i > 0u)
   5785       {
   5786 	/* copy the input sample to the circular buffer */
   5787 	circBuffer[wOffset] = *src;
   5788 
   5789 	/* Update the input pointer */
   5790 	src += srcInc;
   5791 
   5792 	/* Circularly update wOffset.  Watch out for positive and negative value */
   5793 	wOffset += bufferInc;
   5794 	if(wOffset >= L)
   5795 	  wOffset -= L;
   5796 
   5797 	/* Decrement the loop counter */
   5798 	i--;
   5799       }
   5800 
   5801     /* Update the index pointer */
   5802     *writeOffset = wOffset;
   5803   }
   5804 
   5805 
   5806 
   5807   /**
   5808    * @brief floating-point Circular Read function.
   5809    */
   5810   static __INLINE void arm_circularRead_f32(
   5811 					    int32_t * circBuffer,
   5812 					    int32_t L,
   5813 					    int32_t * readOffset,
   5814 					    int32_t bufferInc,
   5815 					    int32_t * dst,
   5816 					    int32_t * dst_base,
   5817 					    int32_t dst_length,
   5818 					    int32_t dstInc,
   5819 					    uint32_t blockSize)
   5820   {
   5821     uint32_t i = 0u;
   5822     int32_t rOffset, dst_end;
   5823 
   5824     /* Copy the value of Index pointer that points
   5825      * to the current location from where the input samples to be read */
   5826     rOffset = *readOffset;
   5827     dst_end = (int32_t) (dst_base + dst_length);
   5828 
   5829     /* Loop over the blockSize */
   5830     i = blockSize;
   5831 
   5832     while(i > 0u)
   5833       {
   5834 	/* copy the sample from the circular buffer to the destination buffer */
   5835 	*dst = circBuffer[rOffset];
   5836 
   5837 	/* Update the input pointer */
   5838 	dst += dstInc;
   5839 
   5840 	if(dst == (int32_t *) dst_end)
   5841 	  {
   5842 	    dst = dst_base;
   5843 	  }
   5844 
   5845 	/* Circularly update rOffset.  Watch out for positive and negative value  */
   5846 	rOffset += bufferInc;
   5847 
   5848 	if(rOffset >= L)
   5849 	  {
   5850 	    rOffset -= L;
   5851 	  }
   5852 
   5853 	/* Decrement the loop counter */
   5854 	i--;
   5855       }
   5856 
   5857     /* Update the index pointer */
   5858     *readOffset = rOffset;
   5859   }
   5860 
   5861   /**
   5862    * @brief Q15 Circular write function.
   5863    */
   5864 
   5865   static __INLINE void arm_circularWrite_q15(
   5866 					     q15_t * circBuffer,
   5867 					     int32_t L,
   5868 					     uint16_t * writeOffset,
   5869 					     int32_t bufferInc,
   5870 					     const q15_t * src,
   5871 					     int32_t srcInc,
   5872 					     uint32_t blockSize)
   5873   {
   5874     uint32_t i = 0u;
   5875     int32_t wOffset;
   5876 
   5877     /* Copy the value of Index pointer that points
   5878      * to the current location where the input samples to be copied */
   5879     wOffset = *writeOffset;
   5880 
   5881     /* Loop over the blockSize */
   5882     i = blockSize;
   5883 
   5884     while(i > 0u)
   5885       {
   5886 	/* copy the input sample to the circular buffer */
   5887 	circBuffer[wOffset] = *src;
   5888 
   5889 	/* Update the input pointer */
   5890 	src += srcInc;
   5891 
   5892 	/* Circularly update wOffset.  Watch out for positive and negative value */
   5893 	wOffset += bufferInc;
   5894 	if(wOffset >= L)
   5895 	  wOffset -= L;
   5896 
   5897 	/* Decrement the loop counter */
   5898 	i--;
   5899       }
   5900 
   5901     /* Update the index pointer */
   5902     *writeOffset = wOffset;
   5903   }
   5904 
   5905 
   5906 
   5907   /**
   5908    * @brief Q15 Circular Read function.
   5909    */
   5910   static __INLINE void arm_circularRead_q15(
   5911 					    q15_t * circBuffer,
   5912 					    int32_t L,
   5913 					    int32_t * readOffset,
   5914 					    int32_t bufferInc,
   5915 					    q15_t * dst,
   5916 					    q15_t * dst_base,
   5917 					    int32_t dst_length,
   5918 					    int32_t dstInc,
   5919 					    uint32_t blockSize)
   5920   {
   5921     uint32_t i = 0;
   5922     int32_t rOffset, dst_end;
   5923 
   5924     /* Copy the value of Index pointer that points
   5925      * to the current location from where the input samples to be read */
   5926     rOffset = *readOffset;
   5927 
   5928     dst_end = (int32_t) (dst_base + dst_length);
   5929 
   5930     /* Loop over the blockSize */
   5931     i = blockSize;
   5932 
   5933     while(i > 0u)
   5934       {
   5935 	/* copy the sample from the circular buffer to the destination buffer */
   5936 	*dst = circBuffer[rOffset];
   5937 
   5938 	/* Update the input pointer */
   5939 	dst += dstInc;
   5940 
   5941 	if(dst == (q15_t *) dst_end)
   5942 	  {
   5943 	    dst = dst_base;
   5944 	  }
   5945 
   5946 	/* Circularly update wOffset.  Watch out for positive and negative value */
   5947 	rOffset += bufferInc;
   5948 
   5949 	if(rOffset >= L)
   5950 	  {
   5951 	    rOffset -= L;
   5952 	  }
   5953 
   5954 	/* Decrement the loop counter */
   5955 	i--;
   5956       }
   5957 
   5958     /* Update the index pointer */
   5959     *readOffset = rOffset;
   5960   }
   5961 
   5962 
   5963   /**
   5964    * @brief Q7 Circular write function.
   5965    */
   5966 
   5967   static __INLINE void arm_circularWrite_q7(
   5968 					    q7_t * circBuffer,
   5969 					    int32_t L,
   5970 					    uint16_t * writeOffset,
   5971 					    int32_t bufferInc,
   5972 					    const q7_t * src,
   5973 					    int32_t srcInc,
   5974 					    uint32_t blockSize)
   5975   {
   5976     uint32_t i = 0u;
   5977     int32_t wOffset;
   5978 
   5979     /* Copy the value of Index pointer that points
   5980      * to the current location where the input samples to be copied */
   5981     wOffset = *writeOffset;
   5982 
   5983     /* Loop over the blockSize */
   5984     i = blockSize;
   5985 
   5986     while(i > 0u)
   5987       {
   5988 	/* copy the input sample to the circular buffer */
   5989 	circBuffer[wOffset] = *src;
   5990 
   5991 	/* Update the input pointer */
   5992 	src += srcInc;
   5993 
   5994 	/* Circularly update wOffset.  Watch out for positive and negative value */
   5995 	wOffset += bufferInc;
   5996 	if(wOffset >= L)
   5997 	  wOffset -= L;
   5998 
   5999 	/* Decrement the loop counter */
   6000 	i--;
   6001       }
   6002 
   6003     /* Update the index pointer */
   6004     *writeOffset = wOffset;
   6005   }
   6006 
   6007 
   6008 
   6009   /**
   6010    * @brief Q7 Circular Read function.
   6011    */
   6012   static __INLINE void arm_circularRead_q7(
   6013 					   q7_t * circBuffer,
   6014 					   int32_t L,
   6015 					   int32_t * readOffset,
   6016 					   int32_t bufferInc,
   6017 					   q7_t * dst,
   6018 					   q7_t * dst_base,
   6019 					   int32_t dst_length,
   6020 					   int32_t dstInc,
   6021 					   uint32_t blockSize)
   6022   {
   6023     uint32_t i = 0;
   6024     int32_t rOffset, dst_end;
   6025 
   6026     /* Copy the value of Index pointer that points
   6027      * to the current location from where the input samples to be read */
   6028     rOffset = *readOffset;
   6029 
   6030     dst_end = (int32_t) (dst_base + dst_length);
   6031 
   6032     /* Loop over the blockSize */
   6033     i = blockSize;
   6034 
   6035     while(i > 0u)
   6036       {
   6037 	/* copy the sample from the circular buffer to the destination buffer */
   6038 	*dst = circBuffer[rOffset];
   6039 
   6040 	/* Update the input pointer */
   6041 	dst += dstInc;
   6042 
   6043 	if(dst == (q7_t *) dst_end)
   6044 	  {
   6045 	    dst = dst_base;
   6046 	  }
   6047 
   6048 	/* Circularly update rOffset.  Watch out for positive and negative value */
   6049 	rOffset += bufferInc;
   6050 
   6051 	if(rOffset >= L)
   6052 	  {
   6053 	    rOffset -= L;
   6054 	  }
   6055 
   6056 	/* Decrement the loop counter */
   6057 	i--;
   6058       }
   6059 
   6060     /* Update the index pointer */
   6061     *readOffset = rOffset;
   6062   }
   6063 
   6064 
   6065   /**
   6066    * @brief  Sum of the squares of the elements of a Q31 vector.
   6067    * @param[in]  *pSrc is input pointer
   6068    * @param[in]  blockSize is the number of samples to process
   6069    * @param[out]  *pResult is output value.
   6070    * @return none.
   6071    */
   6072 
   6073   void arm_power_q31(
   6074 		      q31_t * pSrc,
   6075 		     uint32_t blockSize,
   6076 		     q63_t * pResult);
   6077 
   6078   /**
   6079    * @brief  Sum of the squares of the elements of a floating-point vector.
   6080    * @param[in]  *pSrc is input pointer
   6081    * @param[in]  blockSize is the number of samples to process
   6082    * @param[out]  *pResult is output value.
   6083    * @return none.
   6084    */
   6085 
   6086   void arm_power_f32(
   6087 		      float32_t * pSrc,
   6088 		     uint32_t blockSize,
   6089 		     float32_t * pResult);
   6090 
   6091   /**
   6092    * @brief  Sum of the squares of the elements of a Q15 vector.
   6093    * @param[in]  *pSrc is input pointer
   6094    * @param[in]  blockSize is the number of samples to process
   6095    * @param[out]  *pResult is output value.
   6096    * @return none.
   6097    */
   6098 
   6099   void arm_power_q15(
   6100 		      q15_t * pSrc,
   6101 		     uint32_t blockSize,
   6102 		     q63_t * pResult);
   6103 
   6104   /**
   6105    * @brief  Sum of the squares of the elements of a Q7 vector.
   6106    * @param[in]  *pSrc is input pointer
   6107    * @param[in]  blockSize is the number of samples to process
   6108    * @param[out]  *pResult is output value.
   6109    * @return none.
   6110    */
   6111 
   6112   void arm_power_q7(
   6113 		     q7_t * pSrc,
   6114 		    uint32_t blockSize,
   6115 		    q31_t * pResult);
   6116 
   6117   /**
   6118    * @brief  Mean value of a Q7 vector.
   6119    * @param[in]  *pSrc is input pointer
   6120    * @param[in]  blockSize is the number of samples to process
   6121    * @param[out]  *pResult is output value.
   6122    * @return none.
   6123    */
   6124 
   6125   void arm_mean_q7(
   6126 		    q7_t * pSrc,
   6127 		   uint32_t blockSize,
   6128 		   q7_t * pResult);
   6129 
   6130   /**
   6131    * @brief  Mean value of a Q15 vector.
   6132    * @param[in]  *pSrc is input pointer
   6133    * @param[in]  blockSize is the number of samples to process
   6134    * @param[out]  *pResult is output value.
   6135    * @return none.
   6136    */
   6137   void arm_mean_q15(
   6138 		     q15_t * pSrc,
   6139 		    uint32_t blockSize,
   6140 		    q15_t * pResult);
   6141 
   6142   /**
   6143    * @brief  Mean value of a Q31 vector.
   6144    * @param[in]  *pSrc is input pointer
   6145    * @param[in]  blockSize is the number of samples to process
   6146    * @param[out]  *pResult is output value.
   6147    * @return none.
   6148    */
   6149   void arm_mean_q31(
   6150 		     q31_t * pSrc,
   6151 		    uint32_t blockSize,
   6152 		    q31_t * pResult);
   6153 
   6154   /**
   6155    * @brief  Mean value of a floating-point vector.
   6156    * @param[in]  *pSrc is input pointer
   6157    * @param[in]  blockSize is the number of samples to process
   6158    * @param[out]  *pResult is output value.
   6159    * @return none.
   6160    */
   6161   void arm_mean_f32(
   6162 		     float32_t * pSrc,
   6163 		    uint32_t blockSize,
   6164 		    float32_t * pResult);
   6165 
   6166   /**
   6167    * @brief  Variance of the elements of a floating-point vector.
   6168    * @param[in]  *pSrc is input pointer
   6169    * @param[in]  blockSize is the number of samples to process
   6170    * @param[out]  *pResult is output value.
   6171    * @return none.
   6172    */
   6173 
   6174   void arm_var_f32(
   6175 		    float32_t * pSrc,
   6176 		   uint32_t blockSize,
   6177 		   float32_t * pResult);
   6178 
   6179   /**
   6180    * @brief  Variance of the elements of a Q31 vector.
   6181    * @param[in]  *pSrc is input pointer
   6182    * @param[in]  blockSize is the number of samples to process
   6183    * @param[out]  *pResult is output value.
   6184    * @return none.
   6185    */
   6186 
   6187   void arm_var_q31(
   6188 		    q31_t * pSrc,
   6189 		   uint32_t blockSize,
   6190 		   q63_t * pResult);
   6191 
   6192   /**
   6193    * @brief  Variance of the elements of a Q15 vector.
   6194    * @param[in]  *pSrc is input pointer
   6195    * @param[in]  blockSize is the number of samples to process
   6196    * @param[out]  *pResult is output value.
   6197    * @return none.
   6198    */
   6199 
   6200   void arm_var_q15(
   6201 		    q15_t * pSrc,
   6202 		   uint32_t blockSize,
   6203 		   q31_t * pResult);
   6204 
   6205   /**
   6206    * @brief  Root Mean Square of the elements of a floating-point vector.
   6207    * @param[in]  *pSrc is input pointer
   6208    * @param[in]  blockSize is the number of samples to process
   6209    * @param[out]  *pResult is output value.
   6210    * @return none.
   6211    */
   6212 
   6213   void arm_rms_f32(
   6214 		    float32_t * pSrc,
   6215 		   uint32_t blockSize,
   6216 		   float32_t * pResult);
   6217 
   6218   /**
   6219    * @brief  Root Mean Square of the elements of a Q31 vector.
   6220    * @param[in]  *pSrc is input pointer
   6221    * @param[in]  blockSize is the number of samples to process
   6222    * @param[out]  *pResult is output value.
   6223    * @return none.
   6224    */
   6225 
   6226   void arm_rms_q31(
   6227 		    q31_t * pSrc,
   6228 		   uint32_t blockSize,
   6229 		   q31_t * pResult);
   6230 
   6231   /**
   6232    * @brief  Root Mean Square of the elements of a Q15 vector.
   6233    * @param[in]  *pSrc is input pointer
   6234    * @param[in]  blockSize is the number of samples to process
   6235    * @param[out]  *pResult is output value.
   6236    * @return none.
   6237    */
   6238 
   6239   void arm_rms_q15(
   6240 		    q15_t * pSrc,
   6241 		   uint32_t blockSize,
   6242 		   q15_t * pResult);
   6243 
   6244   /**
   6245    * @brief  Standard deviation of the elements of a floating-point vector.
   6246    * @param[in]  *pSrc is input pointer
   6247    * @param[in]  blockSize is the number of samples to process
   6248    * @param[out]  *pResult is output value.
   6249    * @return none.
   6250    */
   6251 
   6252   void arm_std_f32(
   6253 		    float32_t * pSrc,
   6254 		   uint32_t blockSize,
   6255 		   float32_t * pResult);
   6256 
   6257   /**
   6258    * @brief  Standard deviation of the elements of a Q31 vector.
   6259    * @param[in]  *pSrc is input pointer
   6260    * @param[in]  blockSize is the number of samples to process
   6261    * @param[out]  *pResult is output value.
   6262    * @return none.
   6263    */
   6264 
   6265   void arm_std_q31(
   6266 		    q31_t * pSrc,
   6267 		   uint32_t blockSize,
   6268 		   q31_t * pResult);
   6269 
   6270   /**
   6271    * @brief  Standard deviation of the elements of a Q15 vector.
   6272    * @param[in]  *pSrc is input pointer
   6273    * @param[in]  blockSize is the number of samples to process
   6274    * @param[out]  *pResult is output value.
   6275    * @return none.
   6276    */
   6277 
   6278   void arm_std_q15(
   6279 		    q15_t * pSrc,
   6280 		   uint32_t blockSize,
   6281 		   q15_t * pResult);
   6282 
   6283   /**
   6284    * @brief  Floating-point complex magnitude
   6285    * @param[in]  *pSrc points to the complex input vector
   6286    * @param[out]  *pDst points to the real output vector
   6287    * @param[in]  numSamples number of complex samples in the input vector
   6288    * @return none.
   6289    */
   6290 
   6291   void arm_cmplx_mag_f32(
   6292 			  float32_t * pSrc,
   6293 			 float32_t * pDst,
   6294 			 uint32_t numSamples);
   6295 
   6296   /**
   6297    * @brief  Q31 complex magnitude
   6298    * @param[in]  *pSrc points to the complex input vector
   6299    * @param[out]  *pDst points to the real output vector
   6300    * @param[in]  numSamples number of complex samples in the input vector
   6301    * @return none.
   6302    */
   6303 
   6304   void arm_cmplx_mag_q31(
   6305 			  q31_t * pSrc,
   6306 			 q31_t * pDst,
   6307 			 uint32_t numSamples);
   6308 
   6309   /**
   6310    * @brief  Q15 complex magnitude
   6311    * @param[in]  *pSrc points to the complex input vector
   6312    * @param[out]  *pDst points to the real output vector
   6313    * @param[in]  numSamples number of complex samples in the input vector
   6314    * @return none.
   6315    */
   6316 
   6317   void arm_cmplx_mag_q15(
   6318 			  q15_t * pSrc,
   6319 			 q15_t * pDst,
   6320 			 uint32_t numSamples);
   6321 
   6322   /**
   6323    * @brief  Q15 complex dot product
   6324    * @param[in]  *pSrcA points to the first input vector
   6325    * @param[in]  *pSrcB points to the second input vector
   6326    * @param[in]  numSamples number of complex samples in each vector
   6327    * @param[out]  *realResult real part of the result returned here
   6328    * @param[out]  *imagResult imaginary part of the result returned here
   6329    * @return none.
   6330    */
   6331 
   6332   void arm_cmplx_dot_prod_q15(
   6333 			       q15_t * pSrcA,
   6334 			       q15_t * pSrcB,
   6335 			      uint32_t numSamples,
   6336 			      q31_t * realResult,
   6337 			      q31_t * imagResult);
   6338 
   6339   /**
   6340    * @brief  Q31 complex dot product
   6341    * @param[in]  *pSrcA points to the first input vector
   6342    * @param[in]  *pSrcB points to the second input vector
   6343    * @param[in]  numSamples number of complex samples in each vector
   6344    * @param[out]  *realResult real part of the result returned here
   6345    * @param[out]  *imagResult imaginary part of the result returned here
   6346    * @return none.
   6347    */
   6348 
   6349   void arm_cmplx_dot_prod_q31(
   6350 			       q31_t * pSrcA,
   6351 			       q31_t * pSrcB,
   6352 			      uint32_t numSamples,
   6353 			      q63_t * realResult,
   6354 			      q63_t * imagResult);
   6355 
   6356   /**
   6357    * @brief  Floating-point complex dot product
   6358    * @param[in]  *pSrcA points to the first input vector
   6359    * @param[in]  *pSrcB points to the second input vector
   6360    * @param[in]  numSamples number of complex samples in each vector
   6361    * @param[out]  *realResult real part of the result returned here
   6362    * @param[out]  *imagResult imaginary part of the result returned here
   6363    * @return none.
   6364    */
   6365 
   6366   void arm_cmplx_dot_prod_f32(
   6367 			       float32_t * pSrcA,
   6368 			       float32_t * pSrcB,
   6369 			      uint32_t numSamples,
   6370 			      float32_t * realResult,
   6371 			      float32_t * imagResult);
   6372 
   6373   /**
   6374    * @brief  Q15 complex-by-real multiplication
   6375    * @param[in]  *pSrcCmplx points to the complex input vector
   6376    * @param[in]  *pSrcReal points to the real input vector
   6377    * @param[out]  *pCmplxDst points to the complex output vector
   6378    * @param[in]  numSamples number of samples in each vector
   6379    * @return none.
   6380    */
   6381 
   6382   void arm_cmplx_mult_real_q15(
   6383 			        q15_t * pSrcCmplx,
   6384 			        q15_t * pSrcReal,
   6385 			       q15_t * pCmplxDst,
   6386 			       uint32_t numSamples);
   6387 
   6388   /**
   6389    * @brief  Q31 complex-by-real multiplication
   6390    * @param[in]  *pSrcCmplx points to the complex input vector
   6391    * @param[in]  *pSrcReal points to the real input vector
   6392    * @param[out]  *pCmplxDst points to the complex output vector
   6393    * @param[in]  numSamples number of samples in each vector
   6394    * @return none.
   6395    */
   6396 
   6397   void arm_cmplx_mult_real_q31(
   6398 			        q31_t * pSrcCmplx,
   6399 			        q31_t * pSrcReal,
   6400 			       q31_t * pCmplxDst,
   6401 			       uint32_t numSamples);
   6402 
   6403   /**
   6404    * @brief  Floating-point complex-by-real multiplication
   6405    * @param[in]  *pSrcCmplx points to the complex input vector
   6406    * @param[in]  *pSrcReal points to the real input vector
   6407    * @param[out]  *pCmplxDst points to the complex output vector
   6408    * @param[in]  numSamples number of samples in each vector
   6409    * @return none.
   6410    */
   6411 
   6412   void arm_cmplx_mult_real_f32(
   6413 			        float32_t * pSrcCmplx,
   6414 			        float32_t * pSrcReal,
   6415 			       float32_t * pCmplxDst,
   6416 			       uint32_t numSamples);
   6417 
   6418   /**
   6419    * @brief  Minimum value of a Q7 vector.
   6420    * @param[in]  *pSrc is input pointer
   6421    * @param[in]  blockSize is the number of samples to process
   6422    * @param[out]  *result is output pointer
   6423    * @param[in]  index is the array index of the minimum value in the input buffer.
   6424    * @return none.
   6425    */
   6426 
   6427   void arm_min_q7(
   6428 		   q7_t * pSrc,
   6429 		  uint32_t blockSize,
   6430 		  q7_t * result,
   6431 		  uint32_t * index);
   6432 
   6433   /**
   6434    * @brief  Minimum value of a Q15 vector.
   6435    * @param[in]  *pSrc is input pointer
   6436    * @param[in]  blockSize is the number of samples to process
   6437    * @param[out]  *pResult is output pointer
   6438    * @param[in]  *pIndex is the array index of the minimum value in the input buffer.
   6439    * @return none.
   6440    */
   6441 
   6442   void arm_min_q15(
   6443 		    q15_t * pSrc,
   6444 		   uint32_t blockSize,
   6445 		   q15_t * pResult,
   6446 		   uint32_t * pIndex);
   6447 
   6448   /**
   6449    * @brief  Minimum value of a Q31 vector.
   6450    * @param[in]  *pSrc is input pointer
   6451    * @param[in]  blockSize is the number of samples to process
   6452    * @param[out]  *pResult is output pointer
   6453    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
   6454    * @return none.
   6455    */
   6456   void arm_min_q31(
   6457 		    q31_t * pSrc,
   6458 		   uint32_t blockSize,
   6459 		   q31_t * pResult,
   6460 		   uint32_t * pIndex);
   6461 
   6462   /**
   6463    * @brief  Minimum value of a floating-point vector.
   6464    * @param[in]  *pSrc is input pointer
   6465    * @param[in]  blockSize is the number of samples to process
   6466    * @param[out]  *pResult is output pointer
   6467    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
   6468    * @return none.
   6469    */
   6470 
   6471   void arm_min_f32(
   6472 		    float32_t * pSrc,
   6473 		   uint32_t blockSize,
   6474 		   float32_t * pResult,
   6475 		   uint32_t * pIndex);
   6476 
   6477 /**
   6478  * @brief Maximum value of a Q7 vector.
   6479  * @param[in]       *pSrc points to the input buffer
   6480  * @param[in]       blockSize length of the input vector
   6481  * @param[out]      *pResult maximum value returned here
   6482  * @param[out]      *pIndex index of maximum value returned here
   6483  * @return none.
   6484  */
   6485 
   6486   void arm_max_q7(
   6487 		   q7_t * pSrc,
   6488 		  uint32_t blockSize,
   6489 		  q7_t * pResult,
   6490 		  uint32_t * pIndex);
   6491 
   6492 /**
   6493  * @brief Maximum value of a Q15 vector.
   6494  * @param[in]       *pSrc points to the input buffer
   6495  * @param[in]       blockSize length of the input vector
   6496  * @param[out]      *pResult maximum value returned here
   6497  * @param[out]      *pIndex index of maximum value returned here
   6498  * @return none.
   6499  */
   6500 
   6501   void arm_max_q15(
   6502 		    q15_t * pSrc,
   6503 		   uint32_t blockSize,
   6504 		   q15_t * pResult,
   6505 		   uint32_t * pIndex);
   6506 
   6507 /**
   6508  * @brief Maximum value of a Q31 vector.
   6509  * @param[in]       *pSrc points to the input buffer
   6510  * @param[in]       blockSize length of the input vector
   6511  * @param[out]      *pResult maximum value returned here
   6512  * @param[out]      *pIndex index of maximum value returned here
   6513  * @return none.
   6514  */
   6515 
   6516   void arm_max_q31(
   6517 		    q31_t * pSrc,
   6518 		   uint32_t blockSize,
   6519 		   q31_t * pResult,
   6520 		   uint32_t * pIndex);
   6521 
   6522 /**
   6523  * @brief Maximum value of a floating-point vector.
   6524  * @param[in]       *pSrc points to the input buffer
   6525  * @param[in]       blockSize length of the input vector
   6526  * @param[out]      *pResult maximum value returned here
   6527  * @param[out]      *pIndex index of maximum value returned here
   6528  * @return none.
   6529  */
   6530 
   6531   void arm_max_f32(
   6532 		    float32_t * pSrc,
   6533 		   uint32_t blockSize,
   6534 		   float32_t * pResult,
   6535 		   uint32_t * pIndex);
   6536 
   6537   /**
   6538    * @brief  Q15 complex-by-complex multiplication
   6539    * @param[in]  *pSrcA points to the first input vector
   6540    * @param[in]  *pSrcB points to the second input vector
   6541    * @param[out]  *pDst  points to the output vector
   6542    * @param[in]  numSamples number of complex samples in each vector
   6543    * @return none.
   6544    */
   6545 
   6546   void arm_cmplx_mult_cmplx_q15(
   6547 			        q15_t * pSrcA,
   6548 			        q15_t * pSrcB,
   6549 			       q15_t * pDst,
   6550 			       uint32_t numSamples);
   6551 
   6552   /**
   6553    * @brief  Q31 complex-by-complex multiplication
   6554    * @param[in]  *pSrcA points to the first input vector
   6555    * @param[in]  *pSrcB points to the second input vector
   6556    * @param[out]  *pDst  points to the output vector
   6557    * @param[in]  numSamples number of complex samples in each vector
   6558    * @return none.
   6559    */
   6560 
   6561   void arm_cmplx_mult_cmplx_q31(
   6562 			        q31_t * pSrcA,
   6563 			        q31_t * pSrcB,
   6564 			       q31_t * pDst,
   6565 			       uint32_t numSamples);
   6566 
   6567   /**
   6568    * @brief  Floating-point complex-by-complex multiplication
   6569    * @param[in]  *pSrcA points to the first input vector
   6570    * @param[in]  *pSrcB points to the second input vector
   6571    * @param[out]  *pDst  points to the output vector
   6572    * @param[in]  numSamples number of complex samples in each vector
   6573    * @return none.
   6574    */
   6575 
   6576   void arm_cmplx_mult_cmplx_f32(
   6577 			        float32_t * pSrcA,
   6578 			        float32_t * pSrcB,
   6579 			       float32_t * pDst,
   6580 			       uint32_t numSamples);
   6581 
   6582   /**
   6583    * @brief Converts the elements of the floating-point vector to Q31 vector.
   6584    * @param[in]       *pSrc points to the floating-point input vector
   6585    * @param[out]      *pDst points to the Q31 output vector
   6586    * @param[in]       blockSize length of the input vector
   6587    * @return none.
   6588    */
   6589   void arm_float_to_q31(
   6590 			       float32_t * pSrc,
   6591 			      q31_t * pDst,
   6592 			      uint32_t blockSize);
   6593 
   6594   /**
   6595    * @brief Converts the elements of the floating-point vector to Q15 vector.
   6596    * @param[in]       *pSrc points to the floating-point input vector
   6597    * @param[out]      *pDst points to the Q15 output vector
   6598    * @param[in]       blockSize length of the input vector
   6599    * @return          none
   6600    */
   6601   void arm_float_to_q15(
   6602 			       float32_t * pSrc,
   6603 			      q15_t * pDst,
   6604 			      uint32_t blockSize);
   6605 
   6606   /**
   6607    * @brief Converts the elements of the floating-point vector to Q7 vector.
   6608    * @param[in]       *pSrc points to the floating-point input vector
   6609    * @param[out]      *pDst points to the Q7 output vector
   6610    * @param[in]       blockSize length of the input vector
   6611    * @return          none
   6612    */
   6613   void arm_float_to_q7(
   6614 			      float32_t * pSrc,
   6615 			     q7_t * pDst,
   6616 			     uint32_t blockSize);
   6617 
   6618 
   6619   /**
   6620    * @brief  Converts the elements of the Q31 vector to Q15 vector.
   6621    * @param[in]  *pSrc is input pointer
   6622    * @param[out]  *pDst is output pointer
   6623    * @param[in]  blockSize is the number of samples to process
   6624    * @return none.
   6625    */
   6626   void arm_q31_to_q15(
   6627 		       q31_t * pSrc,
   6628 		      q15_t * pDst,
   6629 		      uint32_t blockSize);
   6630 
   6631   /**
   6632    * @brief  Converts the elements of the Q31 vector to Q7 vector.
   6633    * @param[in]  *pSrc is input pointer
   6634    * @param[out]  *pDst is output pointer
   6635    * @param[in]  blockSize is the number of samples to process
   6636    * @return none.
   6637    */
   6638   void arm_q31_to_q7(
   6639 		      q31_t * pSrc,
   6640 		     q7_t * pDst,
   6641 		     uint32_t blockSize);
   6642 
   6643   /**
   6644    * @brief  Converts the elements of the Q15 vector to floating-point vector.
   6645    * @param[in]  *pSrc is input pointer
   6646    * @param[out]  *pDst is output pointer
   6647    * @param[in]  blockSize is the number of samples to process
   6648    * @return none.
   6649    */
   6650   void arm_q15_to_float(
   6651 			 q15_t * pSrc,
   6652 			float32_t * pDst,
   6653 			uint32_t blockSize);
   6654 
   6655 
   6656   /**
   6657    * @brief  Converts the elements of the Q15 vector to Q31 vector.
   6658    * @param[in]  *pSrc is input pointer
   6659    * @param[out]  *pDst is output pointer
   6660    * @param[in]  blockSize is the number of samples to process
   6661    * @return none.
   6662    */
   6663   void arm_q15_to_q31(
   6664 		       q15_t * pSrc,
   6665 		      q31_t * pDst,
   6666 		      uint32_t blockSize);
   6667 
   6668 
   6669   /**
   6670    * @brief  Converts the elements of the Q15 vector to Q7 vector.
   6671    * @param[in]  *pSrc is input pointer
   6672    * @param[out]  *pDst is output pointer
   6673    * @param[in]  blockSize is the number of samples to process
   6674    * @return none.
   6675    */
   6676   void arm_q15_to_q7(
   6677 		      q15_t * pSrc,
   6678 		     q7_t * pDst,
   6679 		     uint32_t blockSize);
   6680 
   6681 
   6682   /**
   6683    * @ingroup groupInterpolation
   6684    */
   6685 
   6686   /**
   6687    * @defgroup BilinearInterpolate Bilinear Interpolation
   6688    *
   6689    * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
   6690    * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
   6691    * determines values between the grid points.
   6692    * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
   6693    * Bilinear interpolation is often used in image processing to rescale images.
   6694    * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
   6695    *
   6696    * <b>Algorithm</b>
   6697    * \par
   6698    * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
   6699    * For floating-point, the instance structure is defined as:
   6700    * <pre>
   6701    *   typedef struct
   6702    *   {
   6703    *     uint16_t numRows;
   6704    *     uint16_t numCols;
   6705    *     float32_t *pData;
   6706    * } arm_bilinear_interp_instance_f32;
   6707    * </pre>
   6708    *
   6709    * \par
   6710    * where <code>numRows</code> specifies the number of rows in the table;
   6711    * <code>numCols</code> specifies the number of columns in the table;
   6712    * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
   6713    * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
   6714    * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
   6715    *
   6716    * \par
   6717    * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
   6718    * <pre>
   6719    *     XF = floor(x)
   6720    *     YF = floor(y)
   6721    * </pre>
   6722    * \par
   6723    * The interpolated output point is computed as:
   6724    * <pre>
   6725    *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
   6726    *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
   6727    *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
   6728    *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
   6729    * </pre>
   6730    * Note that the coordinates (x, y) contain integer and fractional components.
   6731    * The integer components specify which portion of the table to use while the
   6732    * fractional components control the interpolation processor.
   6733    *
   6734    * \par
   6735    * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
   6736    */
   6737 
   6738   /**
   6739    * @addtogroup BilinearInterpolate
   6740    * @{
   6741    */
   6742 
   6743   /**
   6744   *
   6745   * @brief  Floating-point bilinear interpolation.
   6746   * @param[in,out] *S points to an instance of the interpolation structure.
   6747   * @param[in] X interpolation coordinate.
   6748   * @param[in] Y interpolation coordinate.
   6749   * @return out interpolated value.
   6750   */
   6751 
   6752 
   6753   static __INLINE float32_t arm_bilinear_interp_f32(
   6754 						    const arm_bilinear_interp_instance_f32 * S,
   6755 						    float32_t X,
   6756 						    float32_t Y)
   6757   {
   6758     float32_t out;
   6759     float32_t f00, f01, f10, f11;
   6760     float32_t *pData = S->pData;
   6761     int32_t xIndex, yIndex, index;
   6762     float32_t xdiff, ydiff;
   6763     float32_t b1, b2, b3, b4;
   6764 
   6765     xIndex = (int32_t) X;
   6766     yIndex = (int32_t) Y;
   6767 
   6768 	/* Care taken for table outside boundary */
   6769 	/* Returns zero output when values are outside table boundary */
   6770 	if(xIndex < 0 || xIndex > (S->numRows-1) || yIndex < 0  || yIndex > ( S->numCols-1))
   6771 	{
   6772 		return(0);
   6773 	}
   6774 
   6775     /* Calculation of index for two nearest points in X-direction */
   6776     index = (xIndex - 1) + (yIndex-1) *  S->numCols ;
   6777 
   6778 
   6779     /* Read two nearest points in X-direction */
   6780     f00 = pData[index];
   6781     f01 = pData[index + 1];
   6782 
   6783     /* Calculation of index for two nearest points in Y-direction */
   6784     index = (xIndex-1) + (yIndex) * S->numCols;
   6785 
   6786 
   6787     /* Read two nearest points in Y-direction */
   6788     f10 = pData[index];
   6789     f11 = pData[index + 1];
   6790 
   6791     /* Calculation of intermediate values */
   6792     b1 = f00;
   6793     b2 = f01 - f00;
   6794     b3 = f10 - f00;
   6795     b4 = f00 - f01 - f10 + f11;
   6796 
   6797     /* Calculation of fractional part in X */
   6798     xdiff = X - xIndex;
   6799 
   6800     /* Calculation of fractional part in Y */
   6801     ydiff = Y - yIndex;
   6802 
   6803     /* Calculation of bi-linear interpolated output */
   6804      out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
   6805 
   6806    /* return to application */
   6807     return (out);
   6808 
   6809   }
   6810 
   6811   /**
   6812   *
   6813   * @brief  Q31 bilinear interpolation.
   6814   * @param[in,out] *S points to an instance of the interpolation structure.
   6815   * @param[in] X interpolation coordinate in 12.20 format.
   6816   * @param[in] Y interpolation coordinate in 12.20 format.
   6817   * @return out interpolated value.
   6818   */
   6819 
   6820   static __INLINE q31_t arm_bilinear_interp_q31(
   6821 						arm_bilinear_interp_instance_q31 * S,
   6822 						q31_t X,
   6823 						q31_t Y)
   6824   {
   6825     q31_t out;                                   /* Temporary output */
   6826     q31_t acc = 0;                               /* output */
   6827     q31_t xfract, yfract;                        /* X, Y fractional parts */
   6828     q31_t x1, x2, y1, y2;                        /* Nearest output values */
   6829     int32_t rI, cI;                             /* Row and column indices */
   6830     q31_t *pYData = S->pData;                    /* pointer to output table values */
   6831     uint32_t nCols = S->numCols;                 /* num of rows */
   6832 
   6833 
   6834     /* Input is in 12.20 format */
   6835     /* 12 bits for the table index */
   6836     /* Index value calculation */
   6837     rI = ((X & 0xFFF00000) >> 20u);
   6838 
   6839     /* Input is in 12.20 format */
   6840     /* 12 bits for the table index */
   6841     /* Index value calculation */
   6842     cI = ((Y & 0xFFF00000) >> 20u);
   6843 
   6844 	/* Care taken for table outside boundary */
   6845 	/* Returns zero output when values are outside table boundary */
   6846 	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
   6847 	{
   6848 		return(0);
   6849 	}
   6850 
   6851     /* 20 bits for the fractional part */
   6852     /* shift left xfract by 11 to keep 1.31 format */
   6853     xfract = (X & 0x000FFFFF) << 11u;
   6854 
   6855     /* Read two nearest output values from the index */
   6856     x1 = pYData[(rI) + nCols * (cI)];
   6857     x2 = pYData[(rI) + nCols * (cI) + 1u];
   6858 
   6859     /* 20 bits for the fractional part */
   6860     /* shift left yfract by 11 to keep 1.31 format */
   6861     yfract = (Y & 0x000FFFFF) << 11u;
   6862 
   6863     /* Read two nearest output values from the index */
   6864     y1 = pYData[(rI) + nCols * (cI + 1)];
   6865     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
   6866 
   6867     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
   6868     out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32));
   6869     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
   6870 
   6871     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
   6872     out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
   6873     acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
   6874 
   6875     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
   6876     out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
   6877     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
   6878 
   6879     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
   6880     out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
   6881     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
   6882 
   6883     /* Convert acc to 1.31(q31) format */
   6884     return (acc << 2u);
   6885 
   6886   }
   6887 
   6888   /**
   6889   * @brief  Q15 bilinear interpolation.
   6890   * @param[in,out] *S points to an instance of the interpolation structure.
   6891   * @param[in] X interpolation coordinate in 12.20 format.
   6892   * @param[in] Y interpolation coordinate in 12.20 format.
   6893   * @return out interpolated value.
   6894   */
   6895 
   6896   static __INLINE q15_t arm_bilinear_interp_q15(
   6897 						arm_bilinear_interp_instance_q15 * S,
   6898 						q31_t X,
   6899 						q31_t Y)
   6900   {
   6901     q63_t acc = 0;                               /* output */
   6902     q31_t out;                                   /* Temporary output */
   6903     q15_t x1, x2, y1, y2;                        /* Nearest output values */
   6904     q31_t xfract, yfract;                        /* X, Y fractional parts */
   6905     int32_t rI, cI;                             /* Row and column indices */
   6906     q15_t *pYData = S->pData;                    /* pointer to output table values */
   6907     uint32_t nCols = S->numCols;                 /* num of rows */
   6908 
   6909     /* Input is in 12.20 format */
   6910     /* 12 bits for the table index */
   6911     /* Index value calculation */
   6912     rI = ((X & 0xFFF00000) >> 20);
   6913 
   6914     /* Input is in 12.20 format */
   6915     /* 12 bits for the table index */
   6916     /* Index value calculation */
   6917     cI = ((Y & 0xFFF00000) >> 20);
   6918 
   6919 	/* Care taken for table outside boundary */
   6920 	/* Returns zero output when values are outside table boundary */
   6921 	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
   6922 	{
   6923 		return(0);
   6924 	}
   6925 
   6926     /* 20 bits for the fractional part */
   6927     /* xfract should be in 12.20 format */
   6928     xfract = (X & 0x000FFFFF);
   6929 
   6930     /* Read two nearest output values from the index */
   6931     x1 = pYData[(rI) + nCols * (cI)];
   6932     x2 = pYData[(rI) + nCols * (cI) + 1u];
   6933 
   6934 
   6935     /* 20 bits for the fractional part */
   6936     /* yfract should be in 12.20 format */
   6937     yfract = (Y & 0x000FFFFF);
   6938 
   6939     /* Read two nearest output values from the index */
   6940     y1 = pYData[(rI) + nCols * (cI + 1)];
   6941     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
   6942 
   6943     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
   6944 
   6945     /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
   6946     /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
   6947     out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
   6948     acc = ((q63_t) out * (0xFFFFF - yfract));
   6949 
   6950     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
   6951     out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
   6952     acc += ((q63_t) out * (xfract));
   6953 
   6954     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
   6955     out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
   6956     acc += ((q63_t) out * (yfract));
   6957 
   6958     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
   6959     out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
   6960     acc += ((q63_t) out * (yfract));
   6961 
   6962     /* acc is in 13.51 format and down shift acc by 36 times */
   6963     /* Convert out to 1.15 format */
   6964     return (acc >> 36);
   6965 
   6966   }
   6967 
   6968   /**
   6969   * @brief  Q7 bilinear interpolation.
   6970   * @param[in,out] *S points to an instance of the interpolation structure.
   6971   * @param[in] X interpolation coordinate in 12.20 format.
   6972   * @param[in] Y interpolation coordinate in 12.20 format.
   6973   * @return out interpolated value.
   6974   */
   6975 
   6976   static __INLINE q7_t arm_bilinear_interp_q7(
   6977 					      arm_bilinear_interp_instance_q7 * S,
   6978 					      q31_t X,
   6979 					      q31_t Y)
   6980   {
   6981     q63_t acc = 0;                               /* output */
   6982     q31_t out;                                   /* Temporary output */
   6983     q31_t xfract, yfract;                        /* X, Y fractional parts */
   6984     q7_t x1, x2, y1, y2;                         /* Nearest output values */
   6985     int32_t rI, cI;                             /* Row and column indices */
   6986     q7_t *pYData = S->pData;                     /* pointer to output table values */
   6987     uint32_t nCols = S->numCols;                 /* num of rows */
   6988 
   6989     /* Input is in 12.20 format */
   6990     /* 12 bits for the table index */
   6991     /* Index value calculation */
   6992     rI = ((X & 0xFFF00000) >> 20);
   6993 
   6994     /* Input is in 12.20 format */
   6995     /* 12 bits for the table index */
   6996     /* Index value calculation */
   6997     cI = ((Y & 0xFFF00000) >> 20);
   6998 
   6999 	/* Care taken for table outside boundary */
   7000 	/* Returns zero output when values are outside table boundary */
   7001 	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
   7002 	{
   7003 		return(0);
   7004 	}
   7005 
   7006     /* 20 bits for the fractional part */
   7007     /* xfract should be in 12.20 format */
   7008     xfract = (X & 0x000FFFFF);
   7009 
   7010     /* Read two nearest output values from the index */
   7011     x1 = pYData[(rI) + nCols * (cI)];
   7012     x2 = pYData[(rI) + nCols * (cI) + 1u];
   7013 
   7014 
   7015     /* 20 bits for the fractional part */
   7016     /* yfract should be in 12.20 format */
   7017     yfract = (Y & 0x000FFFFF);
   7018 
   7019     /* Read two nearest output values from the index */
   7020     y1 = pYData[(rI) + nCols * (cI + 1)];
   7021     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
   7022 
   7023     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
   7024     out = ((x1 * (0xFFFFF - xfract)));
   7025     acc = (((q63_t) out * (0xFFFFF - yfract)));
   7026 
   7027     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
   7028     out = ((x2 * (0xFFFFF - yfract)));
   7029     acc += (((q63_t) out * (xfract)));
   7030 
   7031     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
   7032     out = ((y1 * (0xFFFFF - xfract)));
   7033     acc += (((q63_t) out * (yfract)));
   7034 
   7035     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
   7036     out = ((y2 * (yfract)));
   7037     acc += (((q63_t) out * (xfract)));
   7038 
   7039     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
   7040     return (acc >> 40);
   7041 
   7042   }
   7043 
   7044   /**
   7045    * @} end of BilinearInterpolate group
   7046    */
   7047 
   7048 
   7049 
   7050 
   7051 
   7052 
   7053 #ifdef	__cplusplus
   7054 }
   7055 #endif
   7056 
   7057 
   7058 #endif /* _ARM_MATH_H */
   7059 
   7060 
   7061 /**
   7062  *
   7063  * End of file.
   7064  */
	lpc-field Template project for programming NXP's LPC1768 MCUs
	git clone git://git.mdnr.space/lpc-field
	Log \| Files \| Refs \| README \| LICENSE