cpp/api/AudioResampler_8cpp_source.html

 /*******************************************************************************

  * Copyright (c) 2024, 2026, Olivier Ayache.  All rights reserved.

  *

  * This file is part of AVPKit.

  *

  * AVPKit is free software: you can redistribute it and/or modify

  * it under the terms of the GNU Lesser General Public License as published by

  * the Free Software Foundation, either version 3 of the License, or

  * (at your option) any later version.

  *

  * AVPKit is distributed in the hope that it will be useful,

  * but WITHOUT ANY WARRANTY; without even the implied warranty of

  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

  * GNU Lesser General Public License for more details.

  *

  * You should have received a copy of the GNU Lesser General Public License

  * along with AVPKit.  If not, see <http://www.gnu.org/licenses/>.

  *******************************************************************************/


 #include <com/avpkit/ferry/Logger.h>

 #include <com/avpkit/core/AudioResampler.h>

 #include <com/avpkit/core/AudioSamples.h>

 #include <com/avpkit/core/Global.h>

 #include <com/avpkit/core/FfmpegIncludes.h>


 #include <stdexcept>


 VS_LOG_SETUP(VS_CPP_PACKAGE);


 namespace com { namespace avpkit { namespace core

   {

   using namespace com::avpkit::ferry;


   AudioResampler :: AudioResampler()

   {

     swrContext = 0;

     mOChannels=0;

     mOSampleRate=0;

     mIChannels=0;

     mISampleRate=0;

     mPtsOffset=0;

     mOFmt = IAudioSamples::FMT_S16;

     mIFmt = IAudioSamples::FMT_S16;

     mFilterLen = 0;

     mLog2PhaseCount = 0;

     mIsLinear = false;

     mCutoff = 1.0;

     mNextPts = Global::NO_PTS;

   }


   AudioResampler :: ~AudioResampler()

   {

     if (swrContext){

       swr_free(&swrContext);

     }

   }


   AudioResampler*

   AudioResampler :: make(

       int32_t outputChannels, int32_t inputChannels,

       int32_t outputRate, int32_t inputRate

       )

   {

     return make(outputChannels, inputChannels, outputRate, inputRate,

         IAudioSamples::FMT_S16, IAudioSamples::FMT_S16);

   }


   AudioResampler*

   AudioResampler :: make(

       int32_t outputChannels, int32_t inputChannels,

       int32_t outputRate, int32_t inputRate,

       IAudioSamples::Format outputFmt, IAudioSamples::Format inputFmt

       )

   {

     return make(outputChannels, inputChannels, outputRate, inputRate,

         outputFmt, inputFmt,

         16, 10, 0, 0.8);

   }


   AudioResampler*

   AudioResampler :: make(

       int32_t outputChannels, int32_t inputChannels,

       int32_t outputRate, int32_t inputRate,

       IAudioSamples::Format outputFmt, IAudioSamples::Format inputFmt,

       int32_t filterLen, int32_t log2PhaseCount,

       bool linear, double cutoff

       )

   {

     AudioResampler* retval = 0;

     try {

       if (outputChannels <= 0)

         throw std::invalid_argument("outputChannels <= 0");


       if (inputChannels <= 0)

         throw std::invalid_argument("inputChannels <= 0");


       if (outputRate <= 0)

         throw std::invalid_argument("outputRate <= 0");


       if (inputRate <= 0)

         throw std::invalid_argument("inputRate <= 0");


       if (filterLen <= 0)

         throw std::invalid_argument("filterLen <= 0");


       if (log2PhaseCount < 0)

         throw std::invalid_argument("log2PhaseCount < 0");


       if (cutoff < 0)

         throw std::invalid_argument("cutoffFrequency < 0");


       retval = AudioResampler::make();

       if (retval)

       {


                retval->swrContext = swr_alloc_set_opts(NULL, // we're using existing context

                                 av_get_default_channel_layout(outputChannels), // out_ch_layout

                                 (enum AVSampleFormat) outputFmt, // out_sample_fmt

                                 outputRate, // out_sample_rate

                                 av_get_default_channel_layout(inputChannels), // in_ch_layout

                                 (enum AVSampleFormat) inputFmt, // in_sample_fmt

                                 inputRate, // in_sample_rate

                                 0, // log_offset

                                 NULL);

                if (retval->swrContext){

                  av_opt_set_int(retval->swrContext, "phase_shift", log2PhaseCount, 0);

                  av_opt_set_double(retval->swrContext, "cutoff", cutoff, 0);

                  av_opt_set_int(retval->swrContext, "filter_size", filterLen, 0);

                  av_opt_set_int(retval->swrContext, "linear_interp", linear, 0);

                  int init = swr_init(retval->swrContext);

                  if(init != 0){

                    //VS_LOG_ERROR("unable to init swr context in resampler %s", Error::make(init)->getDescription());

                    throw std::invalid_argument("unable to init swr context in resampler");

                  }

                     retval->mOChannels = outputChannels;

                     retval->mOSampleRate = outputRate;

                     retval->mIChannels = inputChannels;

                     retval->mISampleRate = inputRate;

                     retval->mOFmt = outputFmt;

                     retval->mIFmt = inputFmt;

                     retval->mFilterLen = filterLen;

                     retval->mLog2PhaseCount = log2PhaseCount;

                     retval->mIsLinear = linear;

                     retval->mCutoff = cutoff;

                } else {

                    VS_REF_RELEASE(retval);

                }

       }

     }

     catch (std::bad_alloc & e)

     {

       VS_LOG_ERROR("Error: %s", e.what());

       VS_REF_RELEASE(retval);

       throw e;

     }

     catch (std::exception & e)

     {

       VS_LOG_ERROR("Error: %s", e.what());

       VS_REF_RELEASE(retval);

     }

     return retval;

   }


   int

   AudioResampler :: getOutputChannels()

   {

     VS_ASSERT(swrContext, "no context");

     return mOChannels;

   }


   int

   AudioResampler :: getOutputRate()

   {

     VS_ASSERT(swrContext, "no context");

     return mOSampleRate;

   }


   int

   AudioResampler :: getInputChannels()

   {

     VS_ASSERT(swrContext, "no context");

     return mIChannels;

   }


   int

   AudioResampler :: getInputRate()

   {

     VS_ASSERT(swrContext, "no context");

     return mISampleRate;

   }


   int32_t

   AudioResampler :: getMinimumNumSamplesRequiredInOutputSamples(

       IAudioSamples *inSamples)

   {

     int32_t retval = -1;

     try {

       int32_t numSamples = 0;

       if (inSamples)

       {

         if (!inSamples->isComplete())

           throw std::invalid_argument("input samples are not complete");


         if (inSamples->getSampleRate() != mISampleRate)

           throw std::invalid_argument("unexpected input sample rate");


         if (inSamples->getChannels() != mIChannels)

           throw std::invalid_argument("unexpected # of input channels");


         if (inSamples->getFormat() != mIFmt)

           throw std::invalid_argument("unexpected sample format");


         numSamples = inSamples->getNumSamples();

       } else {

         numSamples = 0;

       }

       retval = getMinimumNumSamplesRequiredInOutputSamples(numSamples);

     }

     catch (std::invalid_argument & e)

     {

       VS_LOG_DEBUG("invalid argument: %s", e.what());

       retval = -1;

     }

     catch (std::exception & e)

     {

       VS_LOG_DEBUG("Unknown exception: %s", e.what());

     }

     return retval;

   }


   int32_t

   AudioResampler :: getMinimumNumSamplesRequiredInOutputSamples(

       int32_t numSamples)

   {

     int32_t retval = -1;


     try

     {

       if (numSamples < 0)

         throw std::invalid_argument("numSamples < 0 not allowed");


       double conversionRatio = 1;

       {

         double top = mOSampleRate;

         VS_ASSERT(top, "should never be zero");

         double bot = mISampleRate;

         VS_ASSERT(bot, "should never be zero");

         conversionRatio = top/bot;

         VS_ASSERT(conversionRatio > 0, "the variables used should have been checked on construction");

       }

       if (conversionRatio <= 0)

         throw std::invalid_argument("programmer error");


       // FFMPEG's re-sample function doesn't let you specify the size of your

       // output buffer, but does use up all the space you might expect

       // plus 16-bytes as a lead-in/lead-out for it to seed the resampler.

       // Hence, the hard-coded 16 here.

       // NOTE: 16 might change IF the value of filters in the audio_resample

       // method in libavcodec/resample.c changes.

 #define VS_FFMPEG_AUDIO_RESAMPLER_LEADIN 16

       retval =

           (int32_t)((numSamples * conversionRatio)+VS_FFMPEG_AUDIO_RESAMPLER_LEADIN+0.5);

     }

     catch (std::invalid_argument & e)

     {

       VS_LOG_DEBUG("invalid argument: %s", e.what());

       retval = -1;

     }

     catch (std::exception & e)

     {

       VS_LOG_DEBUG("Unknown exception: %s", e.what());

     }


     return retval;

   }


   int

   AudioResampler :: resample(IAudioSamples * pOutSamples,

       IAudioSamples* pInSamples,

       int32_t numSamples)

   {

     int retval = -1;

     AudioSamples* outSamples = static_cast<AudioSamples*>(pOutSamples);

     AudioSamples* inSamples = static_cast<AudioSamples*>(pInSamples);

     unsigned int sampleSize=0;


     try {

       if (!outSamples)

         throw std::invalid_argument("no output samples");


       if (outSamples == inSamples)

         throw std::invalid_argument("resampling into the same IAudioSamples is not allowed");


       // null out the output samples.

       outSamples->setComplete(false, 0, mOSampleRate, mOChannels,

           mOFmt, Global::NO_PTS);


       if (inSamples)

       {

         if (!inSamples->isComplete())

           throw std::invalid_argument("input samples are not complete");


         if (inSamples->getSampleRate() != mISampleRate)

           throw std::invalid_argument("unexpected input sample rate");


         if (inSamples->getChannels() != mIChannels)

           throw std::invalid_argument("unexpected # of input channels");


         if (inSamples->getFormat() != mIFmt)

           throw std::invalid_argument("unexpected sample format");


         if (numSamples == 0)

           numSamples = inSamples->getNumSamples();

         else

           numSamples = FFMIN(numSamples, inSamples->getNumSamples());

         sampleSize = inSamples->getSampleBitDepth()/8;

       } else {

         numSamples = 0;

         sampleSize = IAudioSamples::findSampleBitDepth(mIFmt)/8;

       }


       int32_t neededSamples = getMinimumNumSamplesRequiredInOutputSamples(numSamples);

       int32_t bytesPerOutputSample = mOChannels*IAudioSamples::findSampleBitDepth(mOFmt)/8;

       int32_t neededBytes = neededSamples * bytesPerOutputSample; //av_samples_get_buffer_size(NULL, mOChannels, numSamples, (enum AVSampleFormat)mOFmt, 0);//

       // This causes a buffer resize to occur if needed

       if (outSamples->ensureCapacity(neededBytes) < 0)

         throw std::runtime_error("attempted to resize output buffer but failed");


       int32_t outBufSize = outSamples->getMaxBufferSize();

       int32_t gap = (neededSamples*bytesPerOutputSample)-outBufSize;


       if (gap > 0) {

 //        VS_LOG_ERROR("maxBufferSize: %d; neededSampleRoom: %d; sampleSize: %d; numSamples: %d; conversionRatio: %f;",

 //            (int32_t)outSamples->getMaxBufferSize(),

 //            neededSampleRoom,

 //            sampleSize,

 //            numSamples,

 //            conversionRatio);

         (void) sampleSize; // to avoid a -Werror error

         throw std::invalid_argument("not enough room in output buffer");

       }

       short * inBuf = inSamples ? inSamples->getRawSamples(0) : 0;


       short *outBuf = outSamples->getRawSamples(0);

       if (!outBuf)

         throw std::invalid_argument("could not get output bytes");


       VS_ASSERT(swrContext, "Should have been set at initialization");

       if (!swrContext)

         throw std::invalid_argument("programmer error");


       // Now we should be far enough along that we can safely try a resample.

       retval = swr_convert(swrContext, (uint8_t**)&outBuf, swr_get_out_samples(swrContext, numSamples), (const uint8_t**)&inBuf, numSamples);


 #if 0

       if (retval >0){

         char string[2048*16+1];

         unsigned int i=0;

         for (i = 0; i < sizeof(string)-1; i++)

           string[i] = 'X';


         bool allZero = true;


         for (i=0; i< FFMIN(numSamples, 2000);i++)

         {

           snprintf(string+(5*i), sizeof(string)-5*i, "%04hX.", inBuf[i]);

           if (inBuf[i] != 0)

             allZero = false;

         }

         VS_LOG_DEBUG("Input Buffer (%d): %s", numSamples, string);


         for (i=0; i< FFMIN((unsigned int)retval, 2000);i++)

         {

           snprintf(string+(9*i), sizeof(string)-9*i, "%04hX%04hX.",

               outBuf[2*i], outBuf[2*i+1]);

           if (outBuf[2*i] != 0 || outBuf[2*i+1] != 0)

             allZero = false;

         }

         VS_LOG_DEBUG("Output Buffer (%d): %s", retval, string);

         if (!allZero)

           VS_LOG_DEBUG("Got an audio buffer with content");

       }

 #endif // 0


       if (retval >= 0)

       {

         // copy the Pts

         int64_t pts = Global::NO_PTS;

         if (inSamples)

         {

           pts = inSamples->getPts();

           mNextPts = pts + IAudioSamples::samplesToDefaultPts(retval, mOSampleRate);

         }

         else

         {

           pts = mNextPts;

         }

         if (pts != Global::NO_PTS)

           pts += mPtsOffset;


         outSamples->setComplete(true, retval,

             mOSampleRate, mOChannels,

             mOFmt,

             pts);

         int expectedSamples = 0;

         if (inSamples)

         {

           double top = mOSampleRate;

           double bottom = mISampleRate;

           double sampleOnlyConverstionRatio = top / bottom;

           expectedSamples = (int)(numSamples * sampleOnlyConverstionRatio);

         }

         else

         {

           VS_LOG_TRACE("Got null samples; outputted all cached and set pts offset from %lld to 0",

               mPtsOffset);

           expectedSamples = retval;

           // and reset the offset

           mPtsOffset = 0;

         }


         if (retval != expectedSamples)

         {

           // we got a different number of samples than expected; we need to update

           // our pts offset

           int sampleDelta = retval - expectedSamples;

           int64_t ptsDelta = IAudioSamples::samplesToDefaultPts(sampleDelta, mOSampleRate);

           mPtsOffset += ptsDelta;

         }

       }

     }

     catch (std::invalid_argument & e)

     {

       VS_LOG_DEBUG("invalid argument: %s", e.what());

       retval = -1;

     }


     return retval;

   }


   IAudioSamples::Format

   AudioResampler :: getOutputFormat()

   {

     return mOFmt;

   }


   IAudioSamples::Format

   AudioResampler :: getInputFormat()

   {

     return mIFmt;

   }


   int32_t

   AudioResampler :: getFilterLen()

   {

     return mFilterLen;

   }


   int32_t

   AudioResampler :: getLog2PhaseCount()

   {

     return mLog2PhaseCount;

   }


   bool

   AudioResampler :: isLinear()

   {

     return mIsLinear;

   }


   double

   AudioResampler :: getCutoffFrequency()

   {

     return mCutoff;

   }


   }}}

com::avpkit::core::AudioResampler::resample
virtual int resample(IAudioSamples *pOutputSamples, IAudioSamples *pInputSamples, int32_t numSamples)
Re-sample up to numSamples from inputSamples to outputSamples.
Definition: AudioResampler.cpp:278

com::avpkit::core::AudioResampler::getMinimumNumSamplesRequiredInOutputSamples
virtual int32_t getMinimumNumSamplesRequiredInOutputSamples(IAudioSamples *inSamples)
Get the minimum number of samples that must be placeable in an output set of samples in order for a r...
Definition: AudioResampler.cpp:193

com::avpkit::core::AudioResampler::getFilterLen
virtual int32_t getFilterLen()
Get the length of each filter in the resampler filter bank.
Definition: AudioResampler.cpp:454

com::avpkit::core::AudioResampler::getCutoffFrequency
virtual double getCutoffFrequency()
What is the cuttoff frequency used?
Definition: AudioResampler.cpp:472

com::avpkit::core::AudioResampler::getInputChannels
virtual int getInputChannels()
number of channels expected in input audio.
Definition: AudioResampler.cpp:179

com::avpkit::core::AudioResampler::getOutputRate
virtual int getOutputRate()
sample rate of output audio.
Definition: AudioResampler.cpp:172

com::avpkit::core::AudioResampler::getInputFormat
virtual IAudioSamples::Format getInputFormat()
Get the sample format we expect to resample from.
Definition: AudioResampler.cpp:448

com::avpkit::core::AudioResampler::isLinear
virtual bool isLinear()
Are we linearly interpolating between filters?
Definition: AudioResampler.cpp:466

com::avpkit::core::AudioResampler::getOutputChannels
virtual int getOutputChannels()
number of channels in output audio.
Definition: AudioResampler.cpp:165

com::avpkit::core::AudioResampler::getLog2PhaseCount
virtual int32_t getLog2PhaseCount()
Get log2(number of entries in filter bank).
Definition: AudioResampler.cpp:460

com::avpkit::core::AudioResampler::getOutputFormat
virtual IAudioSamples::Format getOutputFormat()
Get the sample format we expect to resample to.
Definition: AudioResampler.cpp:442

com::avpkit::core::AudioResampler::getInputRate
virtual int getInputRate()
sample rate expected in input audio.
Definition: AudioResampler.cpp:186

com::avpkit::core::AudioSamples
Definition: AudioSamples.h:33

com::avpkit::core::AudioSamples::getSampleRate
virtual int32_t getSampleRate()
Find the sample rate of the samples in this audio buffer.
Definition: AudioSamples.cpp:209

com::avpkit::core::AudioSamples::getFormat
virtual Format getFormat()
Find the Format of the samples in this buffer.
Definition: AudioSamples.cpp:203

com::avpkit::core::AudioSamples::getChannels
virtual int32_t getChannels()
Return the number of channels of the samples in this buffer.
Definition: AudioSamples.cpp:215

com::avpkit::core::AudioSamples::ensureCapacity
virtual int32_t ensureCapacity(int32_t capacityInBytes)
Called by decoder before decoding to ensure sufficient space.
Definition: AudioSamples.cpp:90

com::avpkit::core::AudioSamples::setComplete
virtual void setComplete(bool complete, int32_t numSamples, int32_t sampleRate, int32_t channels, Format sampleFmt, int64_t pts)
Call this if you modify the samples and are now done.
Definition: AudioSamples.cpp:273

com::avpkit::core::AudioSamples::getPts
virtual int64_t getPts()
What is the Presentation Time Stamp of this set of audio samples.
Definition: AudioSamples.cpp:317

com::avpkit::core::AudioSamples::getMaxBufferSize
virtual int32_t getMaxBufferSize()
Definition: AudioSamples.cpp:234

com::avpkit::core::AudioSamples::getNumSamples
virtual int32_t getNumSamples()
Get the number of samples in this video.
Definition: AudioSamples.cpp:228

com::avpkit::core::AudioSamples::getSampleBitDepth
virtual int32_t getSampleBitDepth()
Find out the bit-depth of the samples in this buffer.
Definition: AudioSamples.cpp:241

com::avpkit::core::AudioSamples::isComplete
virtual bool isComplete()
Returns whether or not we think this buffer has been filled with data.
Definition: AudioSamples.cpp:197

com::avpkit::core::Global::NO_PTS
static const int64_t NO_PTS
A value that means no time stamp is set for a given object.
Definition: Global.h:50

com::avpkit::core::IAudioSamples
A set of raw (decoded) samples, plus a timestamp for when to play those samples relative to other ite...
Definition: IAudioSamples.h:38

com::avpkit::core::IAudioSamples::getChannels
virtual int32_t getChannels()=0
Return the number of channels of the samples in this buffer.

com::avpkit::core::IAudioSamples::findSampleBitDepth
static int32_t findSampleBitDepth(Format format)
A convenience method that returns the # of bits in a given format.
Definition: IAudioSamples.cpp:38

com::avpkit::core::IAudioSamples::getSampleRate
virtual int32_t getSampleRate()=0
Find the sample rate of the samples in this audio buffer.

com::avpkit::core::IAudioSamples::isComplete
virtual bool isComplete()=0
Returns whether or not we think this buffer has been filled with data.

com::avpkit::core::IAudioSamples::getFormat
virtual Format getFormat()=0
Find the Format of the samples in this buffer.

com::avpkit::core::IAudioSamples::getNumSamples
virtual int32_t getNumSamples()=0
Get the number of samples in this video.

com::avpkit::core::IAudioSamples::Format
Format
The format we use to represent audio.
Definition: IAudioSamples.h:46

com::avpkit::core::IAudioSamples::FMT_S16
@ FMT_S16
signed 16 bits
Definition: IAudioSamples.h:49

com::avpkit::core::IAudioSamples::samplesToDefaultPts
static int64_t samplesToDefaultPts(int64_t samples, int sampleRate)
Converts a number of samples at a given sampleRate into Microseconds.
Definition: IAudioSamples.cpp:69

com::avpkit::ferry
This library contains routines used by AVPKit libraries for "ferry"ing Java objects to and from nativ...
Definition: AtomicInteger.cpp:23

com
WARNING: Do not use logging in this class, and do not set any static file variables to values other t...
Definition: AudioResampler.cpp:30