AVPKit
AudioResampler.cpp
1 /*******************************************************************************
2  * Copyright (c) 2024, 2026, Olivier Ayache. All rights reserved.
3  *
4  * This file is part of AVPKit.
5  *
6  * AVPKit is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * AVPKit is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with AVPKit. If not, see <http://www.gnu.org/licenses/>.
18  *******************************************************************************/
19 
20 #include <com/avpkit/ferry/Logger.h>
21 #include <com/avpkit/core/AudioResampler.h>
22 #include <com/avpkit/core/AudioSamples.h>
23 #include <com/avpkit/core/Global.h>
24 #include <com/avpkit/core/FfmpegIncludes.h>
25 
26 #include <stdexcept>
27 
28 VS_LOG_SETUP(VS_CPP_PACKAGE);
29 
30 namespace com { namespace avpkit { namespace core
31  {
32  using namespace com::avpkit::ferry;
33 
34  AudioResampler :: AudioResampler()
35  {
36  swrContext = 0;
37  mOChannels=0;
38  mOSampleRate=0;
39  mIChannels=0;
40  mISampleRate=0;
41  mPtsOffset=0;
42  mOFmt = IAudioSamples::FMT_S16;
43  mIFmt = IAudioSamples::FMT_S16;
44  mFilterLen = 0;
45  mLog2PhaseCount = 0;
46  mIsLinear = false;
47  mCutoff = 1.0;
48  mNextPts = Global::NO_PTS;
49  }
50 
51  AudioResampler :: ~AudioResampler()
52  {
53  if (swrContext){
54  swr_free(&swrContext);
55  }
56  }
57 
58  AudioResampler*
59  AudioResampler :: make(
60  int32_t outputChannels, int32_t inputChannels,
61  int32_t outputRate, int32_t inputRate
62  )
63  {
64  return make(outputChannels, inputChannels, outputRate, inputRate,
66  }
67 
68  AudioResampler*
69  AudioResampler :: make(
70  int32_t outputChannels, int32_t inputChannels,
71  int32_t outputRate, int32_t inputRate,
73  )
74  {
75  return make(outputChannels, inputChannels, outputRate, inputRate,
76  outputFmt, inputFmt,
77  16, 10, 0, 0.8);
78  }
79 
80  AudioResampler*
81  AudioResampler :: make(
82  int32_t outputChannels, int32_t inputChannels,
83  int32_t outputRate, int32_t inputRate,
84  IAudioSamples::Format outputFmt, IAudioSamples::Format inputFmt,
85  int32_t filterLen, int32_t log2PhaseCount,
86  bool linear, double cutoff
87  )
88  {
89  AudioResampler* retval = 0;
90  try {
91  if (outputChannels <= 0)
92  throw std::invalid_argument("outputChannels <= 0");
93 
94  if (inputChannels <= 0)
95  throw std::invalid_argument("inputChannels <= 0");
96 
97  if (outputRate <= 0)
98  throw std::invalid_argument("outputRate <= 0");
99 
100  if (inputRate <= 0)
101  throw std::invalid_argument("inputRate <= 0");
102 
103  if (filterLen <= 0)
104  throw std::invalid_argument("filterLen <= 0");
105 
106  if (log2PhaseCount < 0)
107  throw std::invalid_argument("log2PhaseCount < 0");
108 
109  if (cutoff < 0)
110  throw std::invalid_argument("cutoffFrequency < 0");
111 
112  retval = AudioResampler::make();
113  if (retval)
114  {
115 
116  retval->swrContext = swr_alloc_set_opts(NULL, // we're using existing context
117  av_get_default_channel_layout(outputChannels), // out_ch_layout
118  (enum AVSampleFormat) outputFmt, // out_sample_fmt
119  outputRate, // out_sample_rate
120  av_get_default_channel_layout(inputChannels), // in_ch_layout
121  (enum AVSampleFormat) inputFmt, // in_sample_fmt
122  inputRate, // in_sample_rate
123  0, // log_offset
124  NULL);
125  if (retval->swrContext){
126  av_opt_set_int(retval->swrContext, "phase_shift", log2PhaseCount, 0);
127  av_opt_set_double(retval->swrContext, "cutoff", cutoff, 0);
128  av_opt_set_int(retval->swrContext, "filter_size", filterLen, 0);
129  av_opt_set_int(retval->swrContext, "linear_interp", linear, 0);
130  int init = swr_init(retval->swrContext);
131  if(init != 0){
132  //VS_LOG_ERROR("unable to init swr context in resampler %s", Error::make(init)->getDescription());
133  throw std::invalid_argument("unable to init swr context in resampler");
134  }
135  retval->mOChannels = outputChannels;
136  retval->mOSampleRate = outputRate;
137  retval->mIChannels = inputChannels;
138  retval->mISampleRate = inputRate;
139  retval->mOFmt = outputFmt;
140  retval->mIFmt = inputFmt;
141  retval->mFilterLen = filterLen;
142  retval->mLog2PhaseCount = log2PhaseCount;
143  retval->mIsLinear = linear;
144  retval->mCutoff = cutoff;
145  } else {
146  VS_REF_RELEASE(retval);
147  }
148  }
149  }
150  catch (std::bad_alloc & e)
151  {
152  VS_LOG_ERROR("Error: %s", e.what());
153  VS_REF_RELEASE(retval);
154  throw e;
155  }
156  catch (std::exception & e)
157  {
158  VS_LOG_ERROR("Error: %s", e.what());
159  VS_REF_RELEASE(retval);
160  }
161  return retval;
162  }
163 
164  int
166  {
167  VS_ASSERT(swrContext, "no context");
168  return mOChannels;
169  }
170 
171  int
173  {
174  VS_ASSERT(swrContext, "no context");
175  return mOSampleRate;
176  }
177 
178  int
180  {
181  VS_ASSERT(swrContext, "no context");
182  return mIChannels;
183  }
184 
185  int
187  {
188  VS_ASSERT(swrContext, "no context");
189  return mISampleRate;
190  }
191 
192  int32_t
194  IAudioSamples *inSamples)
195  {
196  int32_t retval = -1;
197  try {
198  int32_t numSamples = 0;
199  if (inSamples)
200  {
201  if (!inSamples->isComplete())
202  throw std::invalid_argument("input samples are not complete");
203 
204  if (inSamples->getSampleRate() != mISampleRate)
205  throw std::invalid_argument("unexpected input sample rate");
206 
207  if (inSamples->getChannels() != mIChannels)
208  throw std::invalid_argument("unexpected # of input channels");
209 
210  if (inSamples->getFormat() != mIFmt)
211  throw std::invalid_argument("unexpected sample format");
212 
213  numSamples = inSamples->getNumSamples();
214  } else {
215  numSamples = 0;
216  }
217  retval = getMinimumNumSamplesRequiredInOutputSamples(numSamples);
218  }
219  catch (std::invalid_argument & e)
220  {
221  VS_LOG_DEBUG("invalid argument: %s", e.what());
222  retval = -1;
223  }
224  catch (std::exception & e)
225  {
226  VS_LOG_DEBUG("Unknown exception: %s", e.what());
227  }
228  return retval;
229  }
230 
231  int32_t
233  int32_t numSamples)
234  {
235  int32_t retval = -1;
236 
237  try
238  {
239  if (numSamples < 0)
240  throw std::invalid_argument("numSamples < 0 not allowed");
241 
242  double conversionRatio = 1;
243  {
244  double top = mOSampleRate;
245  VS_ASSERT(top, "should never be zero");
246  double bot = mISampleRate;
247  VS_ASSERT(bot, "should never be zero");
248  conversionRatio = top/bot;
249  VS_ASSERT(conversionRatio > 0, "the variables used should have been checked on construction");
250  }
251  if (conversionRatio <= 0)
252  throw std::invalid_argument("programmer error");
253 
254  // FFMPEG's re-sample function doesn't let you specify the size of your
255  // output buffer, but does use up all the space you might expect
256  // plus 16-bytes as a lead-in/lead-out for it to seed the resampler.
257  // Hence, the hard-coded 16 here.
258  // NOTE: 16 might change IF the value of filters in the audio_resample
259  // method in libavcodec/resample.c changes.
260 #define VS_FFMPEG_AUDIO_RESAMPLER_LEADIN 16
261  retval =
262  (int32_t)((numSamples * conversionRatio)+VS_FFMPEG_AUDIO_RESAMPLER_LEADIN+0.5);
263  }
264  catch (std::invalid_argument & e)
265  {
266  VS_LOG_DEBUG("invalid argument: %s", e.what());
267  retval = -1;
268  }
269  catch (std::exception & e)
270  {
271  VS_LOG_DEBUG("Unknown exception: %s", e.what());
272  }
273 
274  return retval;
275  }
276 
277  int
279  IAudioSamples* pInSamples,
280  int32_t numSamples)
281  {
282  int retval = -1;
283  AudioSamples* outSamples = static_cast<AudioSamples*>(pOutSamples);
284  AudioSamples* inSamples = static_cast<AudioSamples*>(pInSamples);
285  unsigned int sampleSize=0;
286 
287  try {
288  if (!outSamples)
289  throw std::invalid_argument("no output samples");
290 
291  if (outSamples == inSamples)
292  throw std::invalid_argument("resampling into the same IAudioSamples is not allowed");
293 
294  // null out the output samples.
295  outSamples->setComplete(false, 0, mOSampleRate, mOChannels,
296  mOFmt, Global::NO_PTS);
297 
298  if (inSamples)
299  {
300  if (!inSamples->isComplete())
301  throw std::invalid_argument("input samples are not complete");
302 
303  if (inSamples->getSampleRate() != mISampleRate)
304  throw std::invalid_argument("unexpected input sample rate");
305 
306  if (inSamples->getChannels() != mIChannels)
307  throw std::invalid_argument("unexpected # of input channels");
308 
309  if (inSamples->getFormat() != mIFmt)
310  throw std::invalid_argument("unexpected sample format");
311 
312  if (numSamples == 0)
313  numSamples = inSamples->getNumSamples();
314  else
315  numSamples = FFMIN(numSamples, inSamples->getNumSamples());
316  sampleSize = inSamples->getSampleBitDepth()/8;
317  } else {
318  numSamples = 0;
319  sampleSize = IAudioSamples::findSampleBitDepth(mIFmt)/8;
320  }
321 
322  int32_t neededSamples = getMinimumNumSamplesRequiredInOutputSamples(numSamples);
323  int32_t bytesPerOutputSample = mOChannels*IAudioSamples::findSampleBitDepth(mOFmt)/8;
324  int32_t neededBytes = neededSamples * bytesPerOutputSample; //av_samples_get_buffer_size(NULL, mOChannels, numSamples, (enum AVSampleFormat)mOFmt, 0);//
325  // This causes a buffer resize to occur if needed
326  if (outSamples->ensureCapacity(neededBytes) < 0)
327  throw std::runtime_error("attempted to resize output buffer but failed");
328 
329  int32_t outBufSize = outSamples->getMaxBufferSize();
330  int32_t gap = (neededSamples*bytesPerOutputSample)-outBufSize;
331 
332  if (gap > 0) {
333 // VS_LOG_ERROR("maxBufferSize: %d; neededSampleRoom: %d; sampleSize: %d; numSamples: %d; conversionRatio: %f;",
334 // (int32_t)outSamples->getMaxBufferSize(),
335 // neededSampleRoom,
336 // sampleSize,
337 // numSamples,
338 // conversionRatio);
339  (void) sampleSize; // to avoid a -Werror error
340  throw std::invalid_argument("not enough room in output buffer");
341  }
342  short * inBuf = inSamples ? inSamples->getRawSamples(0) : 0;
343 
344  short *outBuf = outSamples->getRawSamples(0);
345  if (!outBuf)
346  throw std::invalid_argument("could not get output bytes");
347 
348  VS_ASSERT(swrContext, "Should have been set at initialization");
349  if (!swrContext)
350  throw std::invalid_argument("programmer error");
351 
352  // Now we should be far enough along that we can safely try a resample.
353  retval = swr_convert(swrContext, (uint8_t**)&outBuf, swr_get_out_samples(swrContext, numSamples), (const uint8_t**)&inBuf, numSamples);
354 
355 #if 0
356  if (retval >0){
357  char string[2048*16+1];
358  unsigned int i=0;
359  for (i = 0; i < sizeof(string)-1; i++)
360  string[i] = 'X';
361 
362  bool allZero = true;
363 
364  for (i=0; i< FFMIN(numSamples, 2000);i++)
365  {
366  snprintf(string+(5*i), sizeof(string)-5*i, "%04hX.", inBuf[i]);
367  if (inBuf[i] != 0)
368  allZero = false;
369  }
370  VS_LOG_DEBUG("Input Buffer (%d): %s", numSamples, string);
371 
372  for (i=0; i< FFMIN((unsigned int)retval, 2000);i++)
373  {
374  snprintf(string+(9*i), sizeof(string)-9*i, "%04hX%04hX.",
375  outBuf[2*i], outBuf[2*i+1]);
376  if (outBuf[2*i] != 0 || outBuf[2*i+1] != 0)
377  allZero = false;
378  }
379  VS_LOG_DEBUG("Output Buffer (%d): %s", retval, string);
380  if (!allZero)
381  VS_LOG_DEBUG("Got an audio buffer with content");
382  }
383 #endif // 0
384 
385  if (retval >= 0)
386  {
387  // copy the Pts
388  int64_t pts = Global::NO_PTS;
389  if (inSamples)
390  {
391  pts = inSamples->getPts();
392  mNextPts = pts + IAudioSamples::samplesToDefaultPts(retval, mOSampleRate);
393  }
394  else
395  {
396  pts = mNextPts;
397  }
398  if (pts != Global::NO_PTS)
399  pts += mPtsOffset;
400 
401  outSamples->setComplete(true, retval,
402  mOSampleRate, mOChannels,
403  mOFmt,
404  pts);
405  int expectedSamples = 0;
406  if (inSamples)
407  {
408  double top = mOSampleRate;
409  double bottom = mISampleRate;
410  double sampleOnlyConverstionRatio = top / bottom;
411  expectedSamples = (int)(numSamples * sampleOnlyConverstionRatio);
412  }
413  else
414  {
415  VS_LOG_TRACE("Got null samples; outputted all cached and set pts offset from %lld to 0",
416  mPtsOffset);
417  expectedSamples = retval;
418  // and reset the offset
419  mPtsOffset = 0;
420  }
421 
422  if (retval != expectedSamples)
423  {
424  // we got a different number of samples than expected; we need to update
425  // our pts offset
426  int sampleDelta = retval - expectedSamples;
427  int64_t ptsDelta = IAudioSamples::samplesToDefaultPts(sampleDelta, mOSampleRate);
428  mPtsOffset += ptsDelta;
429  }
430  }
431  }
432  catch (std::invalid_argument & e)
433  {
434  VS_LOG_DEBUG("invalid argument: %s", e.what());
435  retval = -1;
436  }
437 
438  return retval;
439  }
440 
443  {
444  return mOFmt;
445  }
446 
449  {
450  return mIFmt;
451  }
452 
453  int32_t
455  {
456  return mFilterLen;
457  }
458 
459  int32_t
461  {
462  return mLog2PhaseCount;
463  }
464 
465  bool
467  {
468  return mIsLinear;
469  }
470 
471  double
473  {
474  return mCutoff;
475  }
476 
477  }}}
virtual int resample(IAudioSamples *pOutputSamples, IAudioSamples *pInputSamples, int32_t numSamples)
Re-sample up to numSamples from inputSamples to outputSamples.
virtual int32_t getMinimumNumSamplesRequiredInOutputSamples(IAudioSamples *inSamples)
Get the minimum number of samples that must be placeable in an output set of samples in order for a r...
virtual int32_t getFilterLen()
Get the length of each filter in the resampler filter bank.
virtual double getCutoffFrequency()
What is the cuttoff frequency used?
virtual int getInputChannels()
number of channels expected in input audio.
virtual int getOutputRate()
sample rate of output audio.
virtual IAudioSamples::Format getInputFormat()
Get the sample format we expect to resample from.
virtual bool isLinear()
Are we linearly interpolating between filters?
virtual int getOutputChannels()
number of channels in output audio.
virtual int32_t getLog2PhaseCount()
Get log2(number of entries in filter bank).
virtual IAudioSamples::Format getOutputFormat()
Get the sample format we expect to resample to.
virtual int getInputRate()
sample rate expected in input audio.
virtual int32_t getSampleRate()
Find the sample rate of the samples in this audio buffer.
virtual Format getFormat()
Find the Format of the samples in this buffer.
virtual int32_t getChannels()
Return the number of channels of the samples in this buffer.
virtual int32_t ensureCapacity(int32_t capacityInBytes)
Called by decoder before decoding to ensure sufficient space.
virtual void setComplete(bool complete, int32_t numSamples, int32_t sampleRate, int32_t channels, Format sampleFmt, int64_t pts)
Call this if you modify the samples and are now done.
virtual int64_t getPts()
What is the Presentation Time Stamp of this set of audio samples.
virtual int32_t getMaxBufferSize()
virtual int32_t getNumSamples()
Get the number of samples in this video.
virtual int32_t getSampleBitDepth()
Find out the bit-depth of the samples in this buffer.
virtual bool isComplete()
Returns whether or not we think this buffer has been filled with data.
static const int64_t NO_PTS
A value that means no time stamp is set for a given object.
Definition: Global.h:50
A set of raw (decoded) samples, plus a timestamp for when to play those samples relative to other ite...
Definition: IAudioSamples.h:38
virtual int32_t getChannels()=0
Return the number of channels of the samples in this buffer.
static int32_t findSampleBitDepth(Format format)
A convenience method that returns the # of bits in a given format.
virtual int32_t getSampleRate()=0
Find the sample rate of the samples in this audio buffer.
virtual bool isComplete()=0
Returns whether or not we think this buffer has been filled with data.
virtual Format getFormat()=0
Find the Format of the samples in this buffer.
virtual int32_t getNumSamples()=0
Get the number of samples in this video.
Format
The format we use to represent audio.
Definition: IAudioSamples.h:46
static int64_t samplesToDefaultPts(int64_t samples, int sampleRate)
Converts a number of samples at a given sampleRate into Microseconds.
This library contains routines used by AVPKit libraries for "ferry"ing Java objects to and from nativ...
WARNING: Do not use logging in this class, and do not set any static file variables to values other t...