init
This commit is contained in:
414
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c
Normal file
414
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c
Normal file
@@ -0,0 +1,414 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_barycenter_f32.c
|
||||
* Description: Barycenter
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
/**
|
||||
@ingroup barycenter
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Barycenter
|
||||
*
|
||||
*
|
||||
* @param[in] *in List of vectors
|
||||
* @param[in] *weights Weights of the vectors
|
||||
* @param[out] *out Barycenter
|
||||
* @param[in] nbVectors Number of vectors
|
||||
* @param[in] vecDim Dimension of space (vector dimension)
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_barycenter_f32(const float32_t *in,
|
||||
const float32_t *weights,
|
||||
float32_t *out,
|
||||
uint32_t nbVectors,
|
||||
uint32_t vecDim)
|
||||
{
|
||||
const float32_t *pIn, *pW;
|
||||
const float32_t *pIn1, *pIn2, *pIn3, *pIn4;
|
||||
float32_t *pOut;
|
||||
uint32_t blkCntVector, blkCntSample;
|
||||
float32_t accum, w;
|
||||
|
||||
blkCntVector = nbVectors;
|
||||
blkCntSample = vecDim;
|
||||
|
||||
accum = 0.0f;
|
||||
|
||||
pW = weights;
|
||||
pIn = in;
|
||||
|
||||
|
||||
arm_fill_f32(0.0f, out, vecDim);
|
||||
|
||||
|
||||
/* Sum */
|
||||
pIn1 = pIn;
|
||||
pIn2 = pIn1 + vecDim;
|
||||
pIn3 = pIn2 + vecDim;
|
||||
pIn4 = pIn3 + vecDim;
|
||||
|
||||
blkCntVector = nbVectors >> 2;
|
||||
while (blkCntVector > 0)
|
||||
{
|
||||
f32x4_t outV, inV1, inV2, inV3, inV4;
|
||||
float32_t w1, w2, w3, w4;
|
||||
|
||||
pOut = out;
|
||||
w1 = *pW++;
|
||||
w2 = *pW++;
|
||||
w3 = *pW++;
|
||||
w4 = *pW++;
|
||||
accum += w1 + w2 + w3 + w4;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while (blkCntSample > 0) {
|
||||
outV = vld1q((const float32_t *) pOut);
|
||||
inV1 = vld1q(pIn1);
|
||||
inV2 = vld1q(pIn2);
|
||||
inV3 = vld1q(pIn3);
|
||||
inV4 = vld1q(pIn4);
|
||||
outV = vfmaq(outV, inV1, w1);
|
||||
outV = vfmaq(outV, inV2, w2);
|
||||
outV = vfmaq(outV, inV3, w3);
|
||||
outV = vfmaq(outV, inV4, w4);
|
||||
vst1q(pOut, outV);
|
||||
|
||||
pOut += 4;
|
||||
pIn1 += 4;
|
||||
pIn2 += 4;
|
||||
pIn3 += 4;
|
||||
pIn4 += 4;
|
||||
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while (blkCntSample > 0) {
|
||||
*pOut = *pOut + *pIn1++ * w1;
|
||||
*pOut = *pOut + *pIn2++ * w2;
|
||||
*pOut = *pOut + *pIn3++ * w3;
|
||||
*pOut = *pOut + *pIn4++ * w4;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
pIn1 += 3 * vecDim;
|
||||
pIn2 += 3 * vecDim;
|
||||
pIn3 += 3 * vecDim;
|
||||
pIn4 += 3 * vecDim;
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
pIn = pIn1;
|
||||
|
||||
blkCntVector = nbVectors & 3;
|
||||
while (blkCntVector > 0)
|
||||
{
|
||||
f32x4_t inV, outV;
|
||||
|
||||
pOut = out;
|
||||
w = *pW++;
|
||||
accum += w;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
outV = vld1q_f32(pOut);
|
||||
inV = vld1q_f32(pIn);
|
||||
outV = vfmaq(outV, inV, w);
|
||||
vst1q_f32(pOut, outV);
|
||||
pOut += 4;
|
||||
pIn += 4;
|
||||
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut + *pIn++ * w;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
/* Normalize */
|
||||
pOut = out;
|
||||
accum = 1.0f / accum;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
f32x4_t tmp;
|
||||
|
||||
tmp = vld1q((const float32_t *) pOut);
|
||||
tmp = vmulq(tmp, accum);
|
||||
vst1q(pOut, tmp);
|
||||
pOut += 4;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut * accum;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON)
|
||||
|
||||
#include "NEMath.h"
|
||||
void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim)
|
||||
{
|
||||
|
||||
const float32_t *pIn,*pW, *pIn1, *pIn2, *pIn3, *pIn4;
|
||||
float32_t *pOut;
|
||||
uint32_t blkCntVector,blkCntSample;
|
||||
float32_t accum, w,w1,w2,w3,w4;
|
||||
|
||||
float32x4_t tmp, inV,outV, inV1, inV2, inV3, inV4;
|
||||
|
||||
blkCntVector = nbVectors;
|
||||
blkCntSample = vecDim;
|
||||
|
||||
accum = 0.0f;
|
||||
|
||||
pW = weights;
|
||||
pIn = in;
|
||||
|
||||
/* Set counters to 0 */
|
||||
tmp = vdupq_n_f32(0.0f);
|
||||
pOut = out;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
vst1q_f32(pOut, tmp);
|
||||
pOut += 4;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = 0.0f;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
/* Sum */
|
||||
|
||||
pIn1 = pIn;
|
||||
pIn2 = pIn1 + vecDim;
|
||||
pIn3 = pIn2 + vecDim;
|
||||
pIn4 = pIn3 + vecDim;
|
||||
|
||||
blkCntVector = nbVectors >> 2;
|
||||
while(blkCntVector > 0)
|
||||
{
|
||||
pOut = out;
|
||||
w1 = *pW++;
|
||||
w2 = *pW++;
|
||||
w3 = *pW++;
|
||||
w4 = *pW++;
|
||||
accum += w1 + w2 + w3 + w4;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
outV = vld1q_f32(pOut);
|
||||
inV1 = vld1q_f32(pIn1);
|
||||
inV2 = vld1q_f32(pIn2);
|
||||
inV3 = vld1q_f32(pIn3);
|
||||
inV4 = vld1q_f32(pIn4);
|
||||
outV = vmlaq_n_f32(outV,inV1,w1);
|
||||
outV = vmlaq_n_f32(outV,inV2,w2);
|
||||
outV = vmlaq_n_f32(outV,inV3,w3);
|
||||
outV = vmlaq_n_f32(outV,inV4,w4);
|
||||
vst1q_f32(pOut, outV);
|
||||
pOut += 4;
|
||||
pIn1 += 4;
|
||||
pIn2 += 4;
|
||||
pIn3 += 4;
|
||||
pIn4 += 4;
|
||||
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut + *pIn1++ * w1;
|
||||
*pOut = *pOut + *pIn2++ * w2;
|
||||
*pOut = *pOut + *pIn3++ * w3;
|
||||
*pOut = *pOut + *pIn4++ * w4;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
pIn1 += 3*vecDim;
|
||||
pIn2 += 3*vecDim;
|
||||
pIn3 += 3*vecDim;
|
||||
pIn4 += 3*vecDim;
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
pIn = pIn1;
|
||||
|
||||
blkCntVector = nbVectors & 3;
|
||||
while(blkCntVector > 0)
|
||||
{
|
||||
pOut = out;
|
||||
w = *pW++;
|
||||
accum += w;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
outV = vld1q_f32(pOut);
|
||||
inV = vld1q_f32(pIn);
|
||||
outV = vmlaq_n_f32(outV,inV,w);
|
||||
vst1q_f32(pOut, outV);
|
||||
pOut += 4;
|
||||
pIn += 4;
|
||||
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut + *pIn++ * w;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
/* Normalize */
|
||||
pOut = out;
|
||||
accum = 1.0f / accum;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
tmp = vld1q_f32(pOut);
|
||||
tmp = vmulq_n_f32(tmp,accum);
|
||||
vst1q_f32(pOut, tmp);
|
||||
pOut += 4;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut * accum;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim)
|
||||
{
|
||||
|
||||
const float32_t *pIn,*pW;
|
||||
float32_t *pOut;
|
||||
uint32_t blkCntVector,blkCntSample;
|
||||
float32_t accum, w;
|
||||
|
||||
blkCntVector = nbVectors;
|
||||
blkCntSample = vecDim;
|
||||
|
||||
accum = 0.0f;
|
||||
|
||||
pW = weights;
|
||||
pIn = in;
|
||||
|
||||
/* Set counters to 0 */
|
||||
blkCntSample = vecDim;
|
||||
pOut = out;
|
||||
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = 0.0f;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
/* Sum */
|
||||
while(blkCntVector > 0)
|
||||
{
|
||||
pOut = out;
|
||||
w = *pW++;
|
||||
accum += w;
|
||||
|
||||
blkCntSample = vecDim;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut + *pIn++ * w;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
/* Normalize */
|
||||
blkCntSample = vecDim;
|
||||
pOut = out;
|
||||
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut / accum;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
* @} end of barycenter group
|
||||
*/
|
||||
Reference in New Issue
Block a user