Commit 0aa71a11 by Enrico Pozzobon

templates work and fixed test script

parent 406949a8
/* ----------------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_add_f32.c
*
* Description: Floating-point matrix addition
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @defgroup MatrixAdd Matrix Addition
*
* Adds two matrices.
* \image html MatrixAddition.gif "Addition of two 3 x 3 matrices"
*
* The functions check to make sure that
* <code>pSrcA</code>, <code>pSrcB</code>, and <code>pDst</code> have the same
* number of rows and columns.
*/
/**
* @addtogroup MatrixAdd
* @{
*/
/**
* @brief Floating-point matrix addition.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_add_f32(
const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB,
arm_matrix_instance_f32 * pDst)
{
float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */
float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */
float32_t *pOut = pDst->pData; /* output data matrix pointer */
#ifndef ARM_MATH_CM0_FAMILY
float32_t inA1, inA2, inB1, inB2, out1, out2; /* temporary variables */
#endif // #ifndef ARM_MATH_CM0_FAMILY
uint32_t numSamples; /* total number of elements in the matrix */
uint32_t blkCnt; /* loop counters */
arm_status status; /* status of matrix addition */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrcA->numRows != pSrcB->numRows) ||
(pSrcA->numCols != pSrcB->numCols) ||
(pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
/* Total number of samples in the input matrix */
numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols;
#ifndef ARM_MATH_CM0_FAMILY
/* Loop unrolling */
blkCnt = numSamples >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) + B(m,n) */
/* Add and then store the results in the destination buffer. */
/* Read values from source A */
inA1 = pIn1[0];
/* Read values from source B */
inB1 = pIn2[0];
/* Read values from source A */
inA2 = pIn1[1];
/* out = sourceA + sourceB */
out1 = inA1 + inB1;
/* Read values from source B */
inB2 = pIn2[1];
/* Read values from source A */
inA1 = pIn1[2];
/* out = sourceA + sourceB */
out2 = inA2 + inB2;
/* Read values from source B */
inB1 = pIn2[2];
/* Store result in destination */
pOut[0] = out1;
pOut[1] = out2;
/* Read values from source A */
inA2 = pIn1[3];
/* Read values from source B */
inB2 = pIn2[3];
/* out = sourceA + sourceB */
out1 = inA1 + inB1;
/* out = sourceA + sourceB */
out2 = inA2 + inB2;
/* Store result in destination */
pOut[2] = out1;
/* Store result in destination */
pOut[3] = out2;
/* update pointers to process next sampels */
pIn1 += 4u;
pIn2 += 4u;
pOut += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) + B(m,n) */
/* Add and then store the results in the destination buffer. */
*pOut++ = (*pIn1++) + (*pIn2++);
/* Decrement the loop counter */
blkCnt--;
}
/* set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixAdd group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_add_q15.c
*
* Description: Q15 matrix addition
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @addtogroup MatrixAdd
* @{
*/
/**
* @brief Q15 matrix addition.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
*/
arm_status arm_mat_add_q15(
const arm_matrix_instance_q15 * pSrcA,
const arm_matrix_instance_q15 * pSrcB,
arm_matrix_instance_q15 * pDst)
{
q15_t *pInA = pSrcA->pData; /* input data matrix pointer A */
q15_t *pInB = pSrcB->pData; /* input data matrix pointer B */
q15_t *pOut = pDst->pData; /* output data matrix pointer */
uint16_t numSamples; /* total number of elements in the matrix */
uint32_t blkCnt; /* loop counters */
arm_status status; /* status of matrix addition */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrcA->numRows != pSrcB->numRows) ||
(pSrcA->numCols != pSrcB->numCols) ||
(pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Total number of samples in the input matrix */
numSamples = (uint16_t) (pSrcA->numRows * pSrcA->numCols);
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Loop unrolling */
blkCnt = (uint32_t) numSamples >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) + B(m,n) */
/* Add, Saturate and then store the results in the destination buffer. */
*__SIMD32(pOut)++ = __QADD16(*__SIMD32(pInA)++, *__SIMD32(pInB)++);
*__SIMD32(pOut)++ = __QADD16(*__SIMD32(pInA)++, *__SIMD32(pInB)++);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = (uint32_t) numSamples % 0x4u;
/* q15 pointers of input and output are initialized */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) + B(m,n) */
/* Add, Saturate and then store the results in the destination buffer. */
*pOut++ = (q15_t) __QADD16(*pInA++, *pInB++);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = (uint32_t) numSamples;
/* q15 pointers of input and output are initialized */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) + B(m,n) */
/* Add, Saturate and then store the results in the destination buffer. */
*pOut++ = (q15_t) __SSAT(((q31_t) * pInA++ + *pInB++), 16);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
/* set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixAdd group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_add_q31.c
*
* Description: Q31 matrix addition
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @addtogroup MatrixAdd
* @{
*/
/**
* @brief Q31 matrix addition.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.
*/
arm_status arm_mat_add_q31(
const arm_matrix_instance_q31 * pSrcA,
const arm_matrix_instance_q31 * pSrcB,
arm_matrix_instance_q31 * pDst)
{
q31_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */
q31_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */
q31_t *pOut = pDst->pData; /* output data matrix pointer */
q31_t inA1, inB1; /* temporary variables */
#ifndef ARM_MATH_CM0_FAMILY
q31_t inA2, inB2; /* temporary variables */
q31_t out1, out2; /* temporary variables */
#endif // #ifndef ARM_MATH_CM0_FAMILY
uint32_t numSamples; /* total number of elements in the matrix */
uint32_t blkCnt; /* loop counters */
arm_status status; /* status of matrix addition */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrcA->numRows != pSrcB->numRows) ||
(pSrcA->numCols != pSrcB->numCols) ||
(pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
/* Total number of samples in the input matrix */
numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Loop Unrolling */
blkCnt = numSamples >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) + B(m,n) */
/* Add, saturate and then store the results in the destination buffer. */
/* Read values from source A */
inA1 = pIn1[0];
/* Read values from source B */
inB1 = pIn2[0];
/* Read values from source A */
inA2 = pIn1[1];
/* Add and saturate */
out1 = __QADD(inA1, inB1);
/* Read values from source B */
inB2 = pIn2[1];
/* Read values from source A */
inA1 = pIn1[2];
/* Add and saturate */
out2 = __QADD(inA2, inB2);
/* Read values from source B */
inB1 = pIn2[2];
/* Store result in destination */
pOut[0] = out1;
pOut[1] = out2;
/* Read values from source A */
inA2 = pIn1[3];
/* Read values from source B */
inB2 = pIn2[3];
/* Add and saturate */
out1 = __QADD(inA1, inB1);
out2 = __QADD(inA2, inB2);
/* Store result in destination */
pOut[2] = out1;
pOut[3] = out2;
/* update pointers to process next sampels */
pIn1 += 4u;
pIn2 += 4u;
pOut += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) + B(m,n) */
/* Add, saturate and then store the results in the destination buffer. */
inA1 = *pIn1++;
inB1 = *pIn2++;
inA1 = __QADD(inA1, inB1);
/* Decrement the loop counter */
blkCnt--;
*pOut++ = inA1;
}
/* set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixAdd group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_cmplx_mult_f32.c
*
* Description: Floating-point matrix multiplication.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @defgroup CmplxMatrixMult Complex Matrix Multiplication
*
* Complex Matrix multiplication is only defined if the number of columns of the
* first matrix equals the number of rows of the second matrix.
* Multiplying an <code>M x N</code> matrix with an <code>N x P</code> matrix results
* in an <code>M x P</code> matrix.
* When matrix size checking is enabled, the functions check: (1) that the inner dimensions of
* <code>pSrcA</code> and <code>pSrcB</code> are equal; and (2) that the size of the output
* matrix equals the outer dimensions of <code>pSrcA</code> and <code>pSrcB</code>.
*/
/**
* @addtogroup CmplxMatrixMult
* @{
*/
/**
* @brief Floating-point Complex matrix multiplication.
* @param[in] *pSrcA points to the first input complex matrix structure
* @param[in] *pSrcB points to the second input complex matrix structure
* @param[out] *pDst points to output complex matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_cmplx_mult_f32(
const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB,
arm_matrix_instance_f32 * pDst)
{
float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */
float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */
float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */
float32_t *pOut = pDst->pData; /* output data matrix pointer */
float32_t *px; /* Temporary output data matrix pointer */
uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */
uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */
uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */
float32_t sumReal1, sumImag1; /* accumulator */
float32_t a0, b0, c0, d0;
float32_t a1, b1, c1, d1;
float32_t sumReal2, sumImag2; /* accumulator */
/* Run the below code for Cortex-M4 and Cortex-M3 */
uint16_t col, i = 0u, j, row = numRowsA, colCnt; /* loop counters */
arm_status status; /* status of matrix multiplication */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrcA->numCols != pSrcB->numRows) ||
(pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */
/* row loop */
do
{
/* Output pointer is set to starting address of the row being processed */
px = pOut + 2 * i;
/* For every row wise process, the column loop counter is to be initiated */
col = numColsB;
/* For every row wise process, the pIn2 pointer is set
** to the starting address of the pSrcB data */
pIn2 = pSrcB->pData;
j = 0u;
/* column loop */
do
{
/* Set the variable sum, that acts as accumulator, to zero */
sumReal1 = 0.0f;
sumImag1 = 0.0f;
sumReal2 = 0.0f;
sumImag2 = 0.0f;
/* Initiate the pointer pIn1 to point to the starting address of the column being processed */
pIn1 = pInA;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
colCnt = numColsA >> 2;
/* matrix multiplication */
while(colCnt > 0u)
{
/* Reading real part of complex matrix A */
a0 = *pIn1;
/* Reading real part of complex matrix B */
c0 = *pIn2;
/* Reading imaginary part of complex matrix A */
b0 = *(pIn1 + 1u);
/* Reading imaginary part of complex matrix B */
d0 = *(pIn2 + 1u);
sumReal1 += a0 * c0;
sumImag1 += b0 * c0;
pIn1 += 2u;
pIn2 += 2 * numColsB;
sumReal2 -= b0 * d0;
sumImag2 += a0 * d0;
/* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
a1 = *pIn1;
c1 = *pIn2;
b1 = *(pIn1 + 1u);
d1 = *(pIn2 + 1u);
sumReal1 += a1 * c1;
sumImag1 += b1 * c1;
pIn1 += 2u;
pIn2 += 2 * numColsB;
sumReal2 -= b1 * d1;
sumImag2 += a1 * d1;
a0 = *pIn1;
c0 = *pIn2;
b0 = *(pIn1 + 1u);
d0 = *(pIn2 + 1u);
sumReal1 += a0 * c0;
sumImag1 += b0 * c0;
pIn1 += 2u;
pIn2 += 2 * numColsB;
sumReal2 -= b0 * d0;
sumImag2 += a0 * d0;
/* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
a1 = *pIn1;
c1 = *pIn2;
b1 = *(pIn1 + 1u);
d1 = *(pIn2 + 1u);
sumReal1 += a1 * c1;
sumImag1 += b1 * c1;
pIn1 += 2u;
pIn2 += 2 * numColsB;
sumReal2 -= b1 * d1;
sumImag2 += a1 * d1;
/* Decrement the loop count */
colCnt--;
}
/* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
colCnt = numColsA % 0x4u;
while(colCnt > 0u)
{
/* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
a1 = *pIn1;
c1 = *pIn2;
b1 = *(pIn1 + 1u);
d1 = *(pIn2 + 1u);
sumReal1 += a1 * c1;
sumImag1 += b1 * c1;
pIn1 += 2u;
pIn2 += 2 * numColsB;
sumReal2 -= b1 * d1;
sumImag2 += a1 * d1;
/* Decrement the loop counter */
colCnt--;
}
sumReal1 += sumReal2;
sumImag1 += sumImag2;
/* Store the result in the destination buffer */
*px++ = sumReal1;
*px++ = sumImag1;
/* Update the pointer pIn2 to point to the starting address of the next column */
j++;
pIn2 = pSrcB->pData + 2u * j;
/* Decrement the column loop counter */
col--;
} while(col > 0u);
/* Update the pointer pInA to point to the starting address of the next row */
i = i + numColsB;
pInA = pInA + 2 * numColsA;
/* Decrement the row loop counter */
row--;
} while(row > 0u);
/* Set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixMult group
*/
/* ----------------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_init_f32.c
*
* Description: Floating-point matrix initialization.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @defgroup MatrixInit Matrix Initialization
*
* Initializes the underlying matrix data structure.
* The functions set the <code>numRows</code>,
* <code>numCols</code>, and <code>pData</code> fields
* of the matrix data structure.
*/
/**
* @addtogroup MatrixInit
* @{
*/
/**
* @brief Floating-point matrix initialization.
* @param[in,out] *S points to an instance of the floating-point matrix structure.
* @param[in] nRows number of rows in the matrix.
* @param[in] nColumns number of columns in the matrix.
* @param[in] *pData points to the matrix data array.
* @return none
*/
void arm_mat_init_f32(
arm_matrix_instance_f32 * S,
uint16_t nRows,
uint16_t nColumns,
float32_t * pData)
{
/* Assign Number of Rows */
S->numRows = nRows;
/* Assign Number of Columns */
S->numCols = nColumns;
/* Assign Data pointer */
S->pData = pData;
}
/**
* @} end of MatrixInit group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_init_q31.c
*
* Description: Q31 matrix initialization.
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @defgroup MatrixInit Matrix Initialization
*
*/
/**
* @addtogroup MatrixInit
* @{
*/
/**
* @brief Q31 matrix initialization.
* @param[in,out] *S points to an instance of the floating-point matrix structure.
* @param[in] nRows number of rows in the matrix.
* @param[in] nColumns number of columns in the matrix.
* @param[in] *pData points to the matrix data array.
* @return none
*/
void arm_mat_init_q31(
arm_matrix_instance_q31 * S,
uint16_t nRows,
uint16_t nColumns,
q31_t * pData)
{
/* Assign Number of Rows */
S->numRows = nRows;
/* Assign Number of Columns */
S->numCols = nColumns;
/* Assign Data pointer */
S->pData = pData;
}
/**
* @} end of MatrixInit group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_mult_fast_q31.c
*
* Description: Q31 matrix multiplication (fast variant).
*
* Target Processor: Cortex-M4/Cortex-M3
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @addtogroup MatrixMult
* @{
*/
/**
* @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
*
* \par
* The difference between the function arm_mat_mult_q31() and this fast variant is that
* the fast variant use a 32-bit rather than a 64-bit accumulator.
* The result of each 1.31 x 1.31 multiplication is truncated to
* 2.30 format. These intermediate results are accumulated in a 32-bit register in 2.30
* format. Finally, the accumulator is saturated and converted to a 1.31 result.
*
* \par
* The fast version has the same overflow behavior as the standard version but provides
* less precision since it discards the low 32 bits of each multiplication result.
* In order to avoid overflows completely the input signals must be scaled down.
* Scale down one of the input matrices by log2(numColsA) bits to
* avoid overflows, as a total of numColsA additions are computed internally for each
* output element.
*
* \par
* See <code>arm_mat_mult_q31()</code> for a slower implementation of this function
* which uses 64-bit accumulation to provide higher precision.
*/
arm_status arm_mat_mult_fast_q31(
const arm_matrix_instance_q31 * pSrcA,
const arm_matrix_instance_q31 * pSrcB,
arm_matrix_instance_q31 * pDst)
{
q31_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */
q31_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */
q31_t *pInA = pSrcA->pData; /* input data matrix pointer A */
// q31_t *pSrcB = pSrcB->pData; /* input data matrix pointer B */
q31_t *pOut = pDst->pData; /* output data matrix pointer */
q31_t *px; /* Temporary output data matrix pointer */
q31_t sum; /* Accumulator */
uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */
uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */
uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */
uint16_t col, i = 0u, j, row = numRowsA, colCnt; /* loop counters */
arm_status status; /* status of matrix multiplication */
q31_t inA1, inA2, inA3, inA4, inB1, inB2, inB3, inB4;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrcA->numCols != pSrcB->numRows) ||
(pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */
/* row loop */
do
{
/* Output pointer is set to starting address of the row being processed */
px = pOut + i;
/* For every row wise process, the column loop counter is to be initiated */
col = numColsB;
/* For every row wise process, the pIn2 pointer is set
** to the starting address of the pSrcB data */
pIn2 = pSrcB->pData;
j = 0u;
/* column loop */
do
{
/* Set the variable sum, that acts as accumulator, to zero */
sum = 0;
/* Initiate the pointer pIn1 to point to the starting address of pInA */
pIn1 = pInA;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
colCnt = numColsA >> 2;
/* matrix multiplication */
while(colCnt > 0u)
{
/* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
/* Perform the multiply-accumulates */
inB1 = *pIn2;
pIn2 += numColsB;
inA1 = pIn1[0];
inA2 = pIn1[1];
inB2 = *pIn2;
pIn2 += numColsB;
inB3 = *pIn2;
pIn2 += numColsB;
sum = (q31_t) ((((q63_t) sum << 32) + ((q63_t) inA1 * inB1)) >> 32);
sum = (q31_t) ((((q63_t) sum << 32) + ((q63_t) inA2 * inB2)) >> 32);
inA3 = pIn1[2];
inA4 = pIn1[3];
inB4 = *pIn2;
pIn2 += numColsB;
sum = (q31_t) ((((q63_t) sum << 32) + ((q63_t) inA3 * inB3)) >> 32);
sum = (q31_t) ((((q63_t) sum << 32) + ((q63_t) inA4 * inB4)) >> 32);
pIn1 += 4u;
/* Decrement the loop counter */
colCnt--;
}
/* If the columns of pSrcA is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
colCnt = numColsA % 0x4u;
while(colCnt > 0u)
{
/* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
/* Perform the multiply-accumulates */
sum = (q31_t) ((((q63_t) sum << 32) +
((q63_t) * pIn1++ * (*pIn2))) >> 32);
pIn2 += numColsB;
/* Decrement the loop counter */
colCnt--;
}
/* Convert the result from 2.30 to 1.31 format and store in destination buffer */
*px++ = sum << 1;
/* Update the pointer pIn2 to point to the starting address of the next column */
j++;
pIn2 = pSrcB->pData + j;
/* Decrement the column loop counter */
col--;
} while(col > 0u);
/* Update the pointer pInA to point to the starting address of the next row */
i = i + numColsB;
pInA = pInA + numColsA;
/* Decrement the row loop counter */
row--;
} while(row > 0u);
/* set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixMult group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_scale_f32.c
*
* Description: Multiplies a floating-point matrix by a scalar.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @defgroup MatrixScale Matrix Scale
*
* Multiplies a matrix by a scalar. This is accomplished by multiplying each element in the
* matrix by the scalar. For example:
* \image html MatrixScale.gif "Matrix Scaling of a 3 x 3 matrix"
*
* The function checks to make sure that the input and output matrices are of the same size.
*
* In the fixed-point Q15 and Q31 functions, <code>scale</code> is represented by
* a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
* The shift allows the gain of the scaling operation to exceed 1.0.
* The overall scale factor applied to the fixed-point data is
* <pre>
* scale = scaleFract * 2^shift.
* </pre>
*/
/**
* @addtogroup MatrixScale
* @{
*/
/**
* @brief Floating-point matrix scaling.
* @param[in] *pSrc points to input matrix structure
* @param[in] scale scale factor to be applied
* @param[out] *pDst points to output matrix structure
* @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
* or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*
*/
arm_status arm_mat_scale_f32(
const arm_matrix_instance_f32 * pSrc,
float32_t scale,
arm_matrix_instance_f32 * pDst)
{
float32_t *pIn = pSrc->pData; /* input data matrix pointer */
float32_t *pOut = pDst->pData; /* output data matrix pointer */
uint32_t numSamples; /* total number of elements in the matrix */
uint32_t blkCnt; /* loop counters */
arm_status status; /* status of matrix scaling */
#ifndef ARM_MATH_CM0_FAMILY
float32_t in1, in2, in3, in4; /* temporary variables */
float32_t out1, out2, out3, out4; /* temporary variables */
#endif // #ifndef ARM_MATH_CM0_FAMILY
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Total number of samples in the input matrix */
numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Loop Unrolling */
blkCnt = numSamples >> 2;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) * scale */
/* Scaling and results are stored in the destination buffer. */
in1 = pIn[0];
in2 = pIn[1];
in3 = pIn[2];
in4 = pIn[3];
out1 = in1 * scale;
out2 = in2 * scale;
out3 = in3 * scale;
out4 = in4 * scale;
pOut[0] = out1;
pOut[1] = out2;
pOut[2] = out3;
pOut[3] = out4;
/* update pointers to process next sampels */
pIn += 4u;
pOut += 4u;
/* Decrement the numSamples loop counter */
blkCnt--;
}
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) * scale */
/* The results are stored in the destination buffer. */
*pOut++ = (*pIn++) * scale;
/* Decrement the loop counter */
blkCnt--;
}
/* Set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixScale group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_scale_q15.c
*
* Description: Multiplies a Q15 matrix by a scalar.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @addtogroup MatrixScale
* @{
*/
/**
* @brief Q15 matrix scaling.
* @param[in] *pSrc points to input matrix
* @param[in] scaleFract fractional portion of the scale factor
* @param[in] shift number of bits to shift the result by
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
* \par
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.
* These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.
*/
arm_status arm_mat_scale_q15(
const arm_matrix_instance_q15 * pSrc,
q15_t scaleFract,
int32_t shift,
arm_matrix_instance_q15 * pDst)
{
q15_t *pIn = pSrc->pData; /* input data matrix pointer */
q15_t *pOut = pDst->pData; /* output data matrix pointer */
uint32_t numSamples; /* total number of elements in the matrix */
int32_t totShift = 15 - shift; /* total shift to apply after scaling */
uint32_t blkCnt; /* loop counters */
arm_status status; /* status of matrix scaling */
#ifndef ARM_MATH_CM0_FAMILY
q15_t in1, in2, in3, in4;
q31_t out1, out2, out3, out4;
q31_t inA1, inA2;
#endif // #ifndef ARM_MATH_CM0_FAMILY
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch */
if((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif // #ifdef ARM_MATH_MATRIX_CHECK
{
/* Total number of samples in the input matrix */
numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Loop Unrolling */
blkCnt = numSamples >> 2;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) * k */
/* Scale, saturate and then store the results in the destination buffer. */
/* Reading 2 inputs from memory */
inA1 = _SIMD32_OFFSET(pIn);
inA2 = _SIMD32_OFFSET(pIn + 2);
/* C = A * scale */
/* Scale the inputs and then store the 2 results in the destination buffer
* in single cycle by packing the outputs */
out1 = (q31_t) ((q15_t) (inA1 >> 16) * scaleFract);
out2 = (q31_t) ((q15_t) inA1 * scaleFract);
out3 = (q31_t) ((q15_t) (inA2 >> 16) * scaleFract);
out4 = (q31_t) ((q15_t) inA2 * scaleFract);
out1 = out1 >> totShift;
inA1 = _SIMD32_OFFSET(pIn + 4);
out2 = out2 >> totShift;
inA2 = _SIMD32_OFFSET(pIn + 6);
out3 = out3 >> totShift;
out4 = out4 >> totShift;
in1 = (q15_t) (__SSAT(out1, 16));
in2 = (q15_t) (__SSAT(out2, 16));
in3 = (q15_t) (__SSAT(out3, 16));
in4 = (q15_t) (__SSAT(out4, 16));
_SIMD32_OFFSET(pOut) = __PKHBT(in2, in1, 16);
_SIMD32_OFFSET(pOut + 2) = __PKHBT(in4, in3, 16);
/* update pointers to process next sampels */
pIn += 4u;
pOut += 4u;
/* Decrement the numSamples loop counter */
blkCnt--;
}
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) * k */
/* Scale, saturate and then store the results in the destination buffer. */
*pOut++ =
(q15_t) (__SSAT(((q31_t) (*pIn++) * scaleFract) >> totShift, 16));
/* Decrement the numSamples loop counter */
blkCnt--;
}
/* Set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixScale group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_scale_q31.c
*
* Description: Multiplies a Q31 matrix by a scalar.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------ */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @addtogroup MatrixScale
* @{
*/
/**
* @brief Q31 matrix scaling.
* @param[in] *pSrc points to input matrix
* @param[in] scaleFract fractional portion of the scale factor
* @param[in] shift number of bits to shift the result by
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
* \par
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
* These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
*/
arm_status arm_mat_scale_q31(
const arm_matrix_instance_q31 * pSrc,
q31_t scaleFract,
int32_t shift,
arm_matrix_instance_q31 * pDst)
{
q31_t *pIn = pSrc->pData; /* input data matrix pointer */
q31_t *pOut = pDst->pData; /* output data matrix pointer */
uint32_t numSamples; /* total number of elements in the matrix */
int32_t totShift = shift + 1; /* shift to apply after scaling */
uint32_t blkCnt; /* loop counters */
arm_status status; /* status of matrix scaling */
q31_t in1, in2, out1; /* temporary variabels */
#ifndef ARM_MATH_CM0_FAMILY
q31_t in3, in4, out2, out3, out4; /* temporary variables */
#endif // #ifndef ARM_MAT_CM0
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch */
if((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif // #ifdef ARM_MATH_MATRIX_CHECK
{
/* Total number of samples in the input matrix */
numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Loop Unrolling */
blkCnt = numSamples >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) * k */
/* Read values from input */
in1 = *pIn;
in2 = *(pIn + 1);
in3 = *(pIn + 2);
in4 = *(pIn + 3);
/* multiply input with scaler value */
in1 = ((q63_t) in1 * scaleFract) >> 32;
in2 = ((q63_t) in2 * scaleFract) >> 32;
in3 = ((q63_t) in3 * scaleFract) >> 32;
in4 = ((q63_t) in4 * scaleFract) >> 32;
/* apply shifting */
out1 = in1 << totShift;
out2 = in2 << totShift;
/* saturate the results. */
if(in1 != (out1 >> totShift))
out1 = 0x7FFFFFFF ^ (in1 >> 31);
if(in2 != (out2 >> totShift))
out2 = 0x7FFFFFFF ^ (in2 >> 31);
out3 = in3 << totShift;
out4 = in4 << totShift;
*pOut = out1;
*(pOut + 1) = out2;
if(in3 != (out3 >> totShift))
out3 = 0x7FFFFFFF ^ (in3 >> 31);
if(in4 != (out4 >> totShift))
out4 = 0x7FFFFFFF ^ (in4 >> 31);
*(pOut + 2) = out3;
*(pOut + 3) = out4;
/* update pointers to process next sampels */
pIn += 4u;
pOut += 4u;
/* Decrement the numSamples loop counter */
blkCnt--;
}
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) * k */
/* Scale, saturate and then store the results in the destination buffer. */
in1 = *pIn++;
in2 = ((q63_t) in1 * scaleFract) >> 32;
out1 = in2 << totShift;
if(in2 != (out1 >> totShift))
out1 = 0x7FFFFFFF ^ (in2 >> 31);
*pOut++ = out1;
/* Decrement the numSamples loop counter */
blkCnt--;
}
/* Set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixScale group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_sub_f32.c
*
* Description: Floating-point matrix subtraction.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @defgroup MatrixSub Matrix Subtraction
*
* Subtract two matrices.
* \image html MatrixSubtraction.gif "Subraction of two 3 x 3 matrices"
*
* The functions check to make sure that
* <code>pSrcA</code>, <code>pSrcB</code>, and <code>pDst</code> have the same
* number of rows and columns.
*/
/**
* @addtogroup MatrixSub
* @{
*/
/**
* @brief Floating-point matrix subtraction
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_sub_f32(
const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB,
arm_matrix_instance_f32 * pDst)
{
float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */
float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */
float32_t *pOut = pDst->pData; /* output data matrix pointer */
#ifndef ARM_MATH_CM0_FAMILY
float32_t inA1, inA2, inB1, inB2, out1, out2; /* temporary variables */
#endif // #ifndef ARM_MATH_CM0_FAMILY
uint32_t numSamples; /* total number of elements in the matrix */
uint32_t blkCnt; /* loop counters */
arm_status status; /* status of matrix subtraction */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrcA->numRows != pSrcB->numRows) ||
(pSrcA->numCols != pSrcB->numCols) ||
(pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Total number of samples in the input matrix */
numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Loop Unrolling */
blkCnt = numSamples >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) - B(m,n) */
/* Subtract and then store the results in the destination buffer. */
/* Read values from source A */
inA1 = pIn1[0];
/* Read values from source B */
inB1 = pIn2[0];
/* Read values from source A */
inA2 = pIn1[1];
/* out = sourceA - sourceB */
out1 = inA1 - inB1;
/* Read values from source B */
inB2 = pIn2[1];
/* Read values from source A */
inA1 = pIn1[2];
/* out = sourceA - sourceB */
out2 = inA2 - inB2;
/* Read values from source B */
inB1 = pIn2[2];
/* Store result in destination */
pOut[0] = out1;
pOut[1] = out2;
/* Read values from source A */
inA2 = pIn1[3];
/* Read values from source B */
inB2 = pIn2[3];
/* out = sourceA - sourceB */
out1 = inA1 - inB1;
/* out = sourceA - sourceB */
out2 = inA2 - inB2;
/* Store result in destination */
pOut[2] = out1;
/* Store result in destination */
pOut[3] = out2;
/* update pointers to process next sampels */
pIn1 += 4u;
pIn2 += 4u;
pOut += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) - B(m,n) */
/* Subtract and then store the results in the destination buffer. */
*pOut++ = (*pIn1++) - (*pIn2++);
/* Decrement the loop counter */
blkCnt--;
}
/* Set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixSub group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_sub_q15.c
*
* Description: Q15 Matrix subtraction
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @addtogroup MatrixSub
* @{
*/
/**
* @brief Q15 matrix subtraction.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
*/
arm_status arm_mat_sub_q15(
const arm_matrix_instance_q15 * pSrcA,
const arm_matrix_instance_q15 * pSrcB,
arm_matrix_instance_q15 * pDst)
{
q15_t *pInA = pSrcA->pData; /* input data matrix pointer A */
q15_t *pInB = pSrcB->pData; /* input data matrix pointer B */
q15_t *pOut = pDst->pData; /* output data matrix pointer */
uint32_t numSamples; /* total number of elements in the matrix */
uint32_t blkCnt; /* loop counters */
arm_status status; /* status of matrix subtraction */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrcA->numRows != pSrcB->numRows) ||
(pSrcA->numCols != pSrcB->numCols) ||
(pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Total number of samples in the input matrix */
numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Apply loop unrolling */
blkCnt = numSamples >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) - B(m,n) */
/* Subtract, Saturate and then store the results in the destination buffer. */
*__SIMD32(pOut)++ = __QSUB16(*__SIMD32(pInA)++, *__SIMD32(pInB)++);
*__SIMD32(pOut)++ = __QSUB16(*__SIMD32(pInA)++, *__SIMD32(pInB)++);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) - B(m,n) */
/* Subtract and then store the results in the destination buffer. */
*pOut++ = (q15_t) __QSUB16(*pInA++, *pInB++);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) - B(m,n) */
/* Subtract and then store the results in the destination buffer. */
*pOut++ = (q15_t) __SSAT(((q31_t) * pInA++ - *pInB++), 16);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
/* Set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixSub group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_sub_q31.c
*
* Description: Q31 matrix subtraction
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @addtogroup MatrixSub
* @{
*/
/**
* @brief Q31 matrix subtraction.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.
*/
arm_status arm_mat_sub_q31(
const arm_matrix_instance_q31 * pSrcA,
const arm_matrix_instance_q31 * pSrcB,
arm_matrix_instance_q31 * pDst)
{
q31_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */
q31_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */
q31_t *pOut = pDst->pData; /* output data matrix pointer */
q31_t inA1, inB1; /* temporary variables */
#ifndef ARM_MATH_CM0_FAMILY
q31_t inA2, inB2; /* temporary variables */
q31_t out1, out2; /* temporary variables */
#endif // #ifndef ARM_MATH_CM0_FAMILY
uint32_t numSamples; /* total number of elements in the matrix */
uint32_t blkCnt; /* loop counters */
arm_status status; /* status of matrix subtraction */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrcA->numRows != pSrcB->numRows) ||
(pSrcA->numCols != pSrcB->numCols) ||
(pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
/* Total number of samples in the input matrix */
numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Loop Unrolling */
blkCnt = numSamples >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) - B(m,n) */
/* Subtract, saturate and then store the results in the destination buffer. */
/* Read values from source A */
inA1 = pIn1[0];
/* Read values from source B */
inB1 = pIn2[0];
/* Read values from source A */
inA2 = pIn1[1];
/* Subtract and saturate */
out1 = __QSUB(inA1, inB1);
/* Read values from source B */
inB2 = pIn2[1];
/* Read values from source A */
inA1 = pIn1[2];
/* Subtract and saturate */
out2 = __QSUB(inA2, inB2);
/* Read values from source B */
inB1 = pIn2[2];
/* Store result in destination */
pOut[0] = out1;
pOut[1] = out2;
/* Read values from source A */
inA2 = pIn1[3];
/* Read values from source B */
inB2 = pIn2[3];
/* Subtract and saturate */
out1 = __QSUB(inA1, inB1);
/* Subtract and saturate */
out2 = __QSUB(inA2, inB2);
/* Store result in destination */
pOut[2] = out1;
pOut[3] = out2;
/* update pointers to process next samples */
pIn1 += 4u;
pIn2 += 4u;
pOut += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C(m,n) = A(m,n) - B(m,n) */
/* Subtract, saturate and then store the results in the destination buffer. */
inA1 = *pIn1++;
inB1 = *pIn2++;
inA1 = __QSUB(inA1, inB1);
*pOut++ = inA1;
/* Decrement the loop counter */
blkCnt--;
}
/* Set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixSub group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_trans_f32.c
*
* Description: Floating-point matrix transpose.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
/**
* @defgroup MatrixTrans Matrix Transpose
*
* Tranposes a matrix.
* Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
* \image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
*/
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @addtogroup MatrixTrans
* @{
*/
/**
* @brief Floating-point matrix transpose.
* @param[in] *pSrc points to the input matrix
* @param[out] *pDst points to the output matrix
* @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
* or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_trans_f32(
const arm_matrix_instance_f32 * pSrc,
arm_matrix_instance_f32 * pDst)
{
float32_t *pIn = pSrc->pData; /* input data matrix pointer */
float32_t *pOut = pDst->pData; /* output data matrix pointer */
float32_t *px; /* Temporary output data matrix pointer */
uint16_t nRows = pSrc->numRows; /* number of rows */
uint16_t nColumns = pSrc->numCols; /* number of columns */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
uint16_t blkCnt, i = 0u, row = nRows; /* loop counters */
arm_status status; /* status of matrix transpose */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Matrix transpose by exchanging the rows with columns */
/* row loop */
do
{
/* Loop Unrolling */
blkCnt = nColumns >> 2;
/* The pointer px is set to starting address of the column being processed */
px = pOut + i;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u) /* column loop */
{
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Decrement the column loop counter */
blkCnt--;
}
/* Perform matrix transpose for last 3 samples here. */
blkCnt = nColumns % 0x4u;
while(blkCnt > 0u)
{
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Decrement the column loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
uint16_t col, i = 0u, row = nRows; /* loop counters */
arm_status status; /* status of matrix transpose */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Matrix transpose by exchanging the rows with columns */
/* row loop */
do
{
/* The pointer px is set to starting address of the column being processed */
px = pOut + i;
/* Initialize column loop counter */
col = nColumns;
while(col > 0u)
{
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Decrement the column loop counter */
col--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
i++;
/* Decrement the row loop counter */
row--;
} while(row > 0u); /* row loop end */
/* Set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixTrans group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_trans_q15.c
*
* Description: Q15 matrix transpose.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @addtogroup MatrixTrans
* @{
*/
/*
* @brief Q15 matrix transpose.
* @param[in] *pSrc points to the input matrix
* @param[out] *pDst points to the output matrix
* @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
* or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_trans_q15(
const arm_matrix_instance_q15 * pSrc,
arm_matrix_instance_q15 * pDst)
{
q15_t *pSrcA = pSrc->pData; /* input data matrix pointer */
q15_t *pOut = pDst->pData; /* output data matrix pointer */
uint16_t nRows = pSrc->numRows; /* number of nRows */
uint16_t nColumns = pSrc->numCols; /* number of nColumns */
uint16_t col, row = nRows, i = 0u; /* row and column loop counters */
arm_status status; /* status of matrix transpose */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
#ifndef UNALIGNED_SUPPORT_DISABLE
q31_t in; /* variable to hold temporary output */
#else
q15_t in;
#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Matrix transpose by exchanging the rows with columns */
/* row loop */
do
{
/* Apply loop unrolling and exchange the columns with row elements */
col = nColumns >> 2u;
/* The pointer pOut is set to starting address of the column being processed */
pOut = pDst->pData + i;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(col > 0u)
{
#ifndef UNALIGNED_SUPPORT_DISABLE
/* Read two elements from the row */
in = *__SIMD32(pSrcA)++;
/* Unpack and store one element in the destination */
#ifndef ARM_MATH_BIG_ENDIAN
*pOut = (q15_t) in;
#else
*pOut = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Update the pointer pOut to point to the next row of the transposed matrix */
pOut += nRows;
/* Unpack and store the second element in the destination */
#ifndef ARM_MATH_BIG_ENDIAN
*pOut = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
#else
*pOut = (q15_t) in;
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Update the pointer pOut to point to the next row of the transposed matrix */
pOut += nRows;
/* Read two elements from the row */
#ifndef ARM_MATH_BIG_ENDIAN
in = *__SIMD32(pSrcA)++;
#else
in = *__SIMD32(pSrcA)++;
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Unpack and store one element in the destination */
#ifndef ARM_MATH_BIG_ENDIAN
*pOut = (q15_t) in;
#else
*pOut = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Update the pointer pOut to point to the next row of the transposed matrix */
pOut += nRows;
/* Unpack and store the second element in the destination */
#ifndef ARM_MATH_BIG_ENDIAN
*pOut = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
#else
*pOut = (q15_t) in;
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
#else
/* Read one element from the row */
in = *pSrcA++;
/* Store one element in the destination */
*pOut = in;
/* Update the pointer px to point to the next row of the transposed matrix */
pOut += nRows;
/* Read one element from the row */
in = *pSrcA++;
/* Store one element in the destination */
*pOut = in;
/* Update the pointer px to point to the next row of the transposed matrix */
pOut += nRows;
/* Read one element from the row */
in = *pSrcA++;
/* Store one element in the destination */
*pOut = in;
/* Update the pointer px to point to the next row of the transposed matrix */
pOut += nRows;
/* Read one element from the row */
in = *pSrcA++;
/* Store one element in the destination */
*pOut = in;
#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
/* Update the pointer pOut to point to the next row of the transposed matrix */
pOut += nRows;
/* Decrement the column loop counter */
col--;
}
/* Perform matrix transpose for last 3 samples here. */
col = nColumns % 0x4u;
#else
/* Run the below code for Cortex-M0 */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Matrix transpose by exchanging the rows with columns */
/* row loop */
do
{
/* The pointer pOut is set to starting address of the column being processed */
pOut = pDst->pData + i;
/* Initialize column loop counter */
col = nColumns;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(col > 0u)
{
/* Read and store the input element in the destination */
*pOut = *pSrcA++;
/* Update the pointer pOut to point to the next row of the transposed matrix */
pOut += nRows;
/* Decrement the column loop counter */
col--;
}
i++;
/* Decrement the row loop counter */
row--;
} while(row > 0u);
/* set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixTrans group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mat_trans_q31.c
*
* Description: Q31 matrix transpose.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMatrix
*/
/**
* @addtogroup MatrixTrans
* @{
*/
/*
* @brief Q31 matrix transpose.
* @param[in] *pSrc points to the input matrix
* @param[out] *pDst points to the output matrix
* @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
* or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_trans_q31(
const arm_matrix_instance_q31 * pSrc,
arm_matrix_instance_q31 * pDst)
{
q31_t *pIn = pSrc->pData; /* input data matrix pointer */
q31_t *pOut = pDst->pData; /* output data matrix pointer */
q31_t *px; /* Temporary output data matrix pointer */
uint16_t nRows = pSrc->numRows; /* number of nRows */
uint16_t nColumns = pSrc->numCols; /* number of nColumns */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
uint16_t blkCnt, i = 0u, row = nRows; /* loop counters */
arm_status status; /* status of matrix transpose */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Matrix transpose by exchanging the rows with columns */
/* row loop */
do
{
/* Apply loop unrolling and exchange the columns with row elements */
blkCnt = nColumns >> 2u;
/* The pointer px is set to starting address of the column being processed */
px = pOut + i;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Decrement the column loop counter */
blkCnt--;
}
/* Perform matrix transpose for last 3 samples here. */
blkCnt = nColumns % 0x4u;
while(blkCnt > 0u)
{
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Decrement the column loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
uint16_t col, i = 0u, row = nRows; /* loop counters */
arm_status status; /* status of matrix transpose */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Matrix transpose by exchanging the rows with columns */
/* row loop */
do
{
/* The pointer px is set to starting address of the column being processed */
px = pOut + i;
/* Initialize column loop counter */
col = nColumns;
while(col > 0u)
{
/* Read and store the input element in the destination */
*px = *pIn++;
/* Update the pointer px to point to the next row of the transposed matrix */
px += nRows;
/* Decrement the column loop counter */
col--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
i++;
/* Decrement the row loop counter */
row--;
}
while(row > 0u); /* row loop end */
/* set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
/**
* @} end of MatrixTrans group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_max_f32.c
*
* Description: Maximum value of a floating-point vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @defgroup Max Maximum
*
* Computes the maximum value of an array of data.
* The function returns both the maximum value and its position within the array.
* There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
* @addtogroup Max
* @{
*/
/**
* @brief Maximum value of a floating-point vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult maximum value returned here
* @param[out] *pIndex index of maximum value returned here
* @return none.
*/
void arm_max_f32(
float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
float32_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex, count; /* loop counter */
/* Initialise the count value. */
count = 0u;
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Loop unrolling */
blkCnt = (blockSize - 1u) >> 2u;
/* Run the below code for Cortex-M4 and Cortex-M3 */
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
maxVal2 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and its index */
out = maxVal1;
outIndex = count + 1u;
}
maxVal1 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal2)
{
/* Update the maximum value and its index */
out = maxVal2;
outIndex = count + 2u;
}
maxVal2 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and its index */
out = maxVal1;
outIndex = count + 3u;
}
/* compare for the maximum value */
if(out < maxVal2)
{
/* Update the maximum value and its index */
out = maxVal2;
outIndex = count + 4u;
}
count += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* if (blockSize - 1u) is not multiple of 4 */
blkCnt = (blockSize - 1u) % 4u;
#else
/* Run the below code for Cortex-M0 */
float32_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1u);
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and it's index */
out = maxVal1;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
* @} end of Max group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_max_q15.c
*
* Description: Maximum value of a Q15 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup Max
* @{
*/
/**
* @brief Maximum value of a Q15 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult maximum value returned here
* @param[out] *pIndex index of maximum value returned here
* @return none.
*/
void arm_max_q15(
q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q15_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex, count; /* loop counter */
/* Initialise the count value. */
count = 0u;
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Loop unrolling */
blkCnt = (blockSize - 1u) >> 2u;
/* Run the below code for Cortex-M4 and Cortex-M3 */
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
maxVal2 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and its index */
out = maxVal1;
outIndex = count + 1u;
}
maxVal1 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal2)
{
/* Update the maximum value and its index */
out = maxVal2;
outIndex = count + 2u;
}
maxVal2 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and its index */
out = maxVal1;
outIndex = count + 3u;
}
/* compare for the maximum value */
if(out < maxVal2)
{
/* Update the maximum value and its index */
out = maxVal2;
outIndex = count + 4u;
}
count += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* if (blockSize - 1u) is not multiple of 4 */
blkCnt = (blockSize - 1u) % 4u;
#else
/* Run the below code for Cortex-M0 */
q15_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
blkCnt = (blockSize - 1u);
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and it's index */
out = maxVal1;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value and its index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
* @} end of Max group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_max_q31.c
*
* Description: Maximum value of a Q31 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup Max
* @{
*/
/**
* @brief Maximum value of a Q31 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult maximum value returned here
* @param[out] *pIndex index of maximum value returned here
* @return none.
*/
void arm_max_q31(
q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex, count; /* loop counter */
/* Initialise the count value. */
count = 0u;
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Loop unrolling */
blkCnt = (blockSize - 1u) >> 2u;
/* Run the below code for Cortex-M4 and Cortex-M3 */
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
maxVal2 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and its index */
out = maxVal1;
outIndex = count + 1u;
}
maxVal1 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal2)
{
/* Update the maximum value and its index */
out = maxVal2;
outIndex = count + 2u;
}
maxVal2 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and its index */
out = maxVal1;
outIndex = count + 3u;
}
/* compare for the maximum value */
if(out < maxVal2)
{
/* Update the maximum value and its index */
out = maxVal2;
outIndex = count + 4u;
}
count += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* if (blockSize - 1u) is not multiple of 4 */
blkCnt = (blockSize - 1u) % 4u;
#else
/* Run the below code for Cortex-M0 */
q31_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1u);
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and it's index */
out = maxVal1;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value and its index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
* @} end of Max group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_max_q7.c
*
* Description: Maximum value of a Q7 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup Max
* @{
*/
/**
* @brief Maximum value of a Q7 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult maximum value returned here
* @param[out] *pIndex index of maximum value returned here
* @return none.
*/
void arm_max_q7(
q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q7_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex, count; /* loop counter */
/* Initialise the count value. */
count = 0u;
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Loop unrolling */
blkCnt = (blockSize - 1u) >> 2u;
/* Run the below code for Cortex-M4 and Cortex-M3 */
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
maxVal2 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and its index */
out = maxVal1;
outIndex = count + 1u;
}
maxVal1 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal2)
{
/* Update the maximum value and its index */
out = maxVal2;
outIndex = count + 2u;
}
maxVal2 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and its index */
out = maxVal1;
outIndex = count + 3u;
}
/* compare for the maximum value */
if(out < maxVal2)
{
/* Update the maximum value and its index */
out = maxVal2;
outIndex = count + 4u;
}
count += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* if (blockSize - 1u) is not multiple of 4 */
blkCnt = (blockSize - 1u) % 4u;
#else
/* Run the below code for Cortex-M0 */
q7_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1u);
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if(out < maxVal1)
{
/* Update the maximum value and it's index */
out = maxVal1;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value and its index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
* @} end of Max group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mean_f32.c
*
* Description: Mean value of a floating-point vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @defgroup mean Mean
*
* Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.
* The underlying algorithm is used:
*
* <pre>
* Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;
* </pre>
*
* There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
* @addtogroup mean
* @{
*/
/**
* @brief Mean value of a floating-point vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult mean value returned here
* @return none.
*/
void arm_mean_f32(
float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t sum = 0.0f; /* Temporary result storage */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
float32_t in1, in2, in3, in4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
sum += in1;
sum += in2;
sum += in3;
sum += in4;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store the result to the destination */
*pResult = sum / (float32_t) blockSize;
}
/**
* @} end of mean group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mean_q15.c
*
* Description: Mean value of a Q15 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup mean
* @{
*/
/**
* @brief Mean value of a Q15 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult mean value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function is implemented using a 32-bit internal accumulator.
* The input is represented in 1.15 format and is accumulated in a 32-bit
* accumulator in 17.15 format.
* There is no risk of internal overflow with this approach, and the
* full precision of intermediate result is preserved.
* Finally, the accumulator is saturated and truncated to yield a result of 1.15 format.
*
*/
void arm_mean_q15(
q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q31_t sum = 0; /* Temporary result storage */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
in = *__SIMD32(pSrc)++;
sum += ((in << 16) >> 16);
sum += (in >> 16);
in = *__SIMD32(pSrc)++;
sum += ((in << 16) >> 16);
sum += (in >> 16);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store the result to the destination */
*pResult = (q15_t) (sum / (q31_t)blockSize);
}
/**
* @} end of mean group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mean_q31.c
*
* Description: Mean value of a Q31 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup mean
* @{
*/
/**
* @brief Mean value of a Q31 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult mean value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
*\par
* The function is implemented using a 64-bit internal accumulator.
* The input is represented in 1.31 format and is accumulated in a 64-bit
* accumulator in 33.31 format.
* There is no risk of internal overflow with this approach, and the
* full precision of intermediate result is preserved.
* Finally, the accumulator is truncated to yield a result of 1.31 format.
*
*/
void arm_mean_q31(
q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q63_t sum = 0; /* Temporary result storage */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in1, in2, in3, in4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
sum += in1;
sum += in2;
sum += in3;
sum += in4;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store the result to the destination */
*pResult = (q31_t) (sum / (int32_t) blockSize);
}
/**
* @} end of mean group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mean_q7.c
*
* Description: Mean value of a Q7 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup mean
* @{
*/
/**
* @brief Mean value of a Q7 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult mean value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function is implemented using a 32-bit internal accumulator.
* The input is represented in 1.7 format and is accumulated in a 32-bit
* accumulator in 25.7 format.
* There is no risk of internal overflow with this approach, and the
* full precision of intermediate result is preserved.
* Finally, the accumulator is truncated to yield a result of 1.7 format.
*
*/
void arm_mean_q7(
q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q31_t sum = 0; /* Temporary result storage */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
in = *__SIMD32(pSrc)++;
sum += ((in << 24) >> 24);
sum += ((in << 16) >> 24);
sum += ((in << 8) >> 24);
sum += (in >> 24);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store the result to the destination */
*pResult = (q7_t) (sum / (int32_t) blockSize);
}
/**
* @} end of mean group
*/
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment