FFmpeg/libavformat/iamfenc.c
Andreas Rheinhardt 18af922c53 avformat/iamf: Don't mix ownership and non-ownership pointers
IAMFAudioElement and IAMFMixPresentation currently contain
pointers to independently allocated objects that are sometimes
owned by said structures and sometimes not.

More precisely, upon success the demuxer transfers ownership
of these other objects newly created AVStreamGroups, but it
keeps its pointers. iamf_read_close() therefore always resets
these pointers (because the cleanup code always treats them
as ownership pointers). This leads to memory leaks in case
iamf_read_header() without having attached all of these
objects to stream groups.

The muxer has a similar issue: It also clears these pointers
(pointing to objects owned by stream groups created by the user)
in its deinit function.

This commit fixes this memleak by explicitly adding non-ownership
pointers; this also allows to remove the code to reset the
ownership pointers.

Reviewed-by: James Almer <jamrial@gmail.com>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2024-02-19 23:30:00 +01:00

387 lines
13 KiB
C

/*
* IAMF muxer
* Copyright (c) 2023 James Almer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/avassert.h"
#include "libavutil/common.h"
#include "libavutil/iamf.h"
#include "libavcodec/put_bits.h"
#include "avformat.h"
#include "avio_internal.h"
#include "iamf.h"
#include "iamf_writer.h"
#include "internal.h"
#include "mux.h"
typedef struct IAMFMuxContext {
IAMFContext iamf;
int first_stream_id;
} IAMFMuxContext;
static int iamf_init(AVFormatContext *s)
{
IAMFMuxContext *const c = s->priv_data;
IAMFContext *const iamf = &c->iamf;
int nb_audio_elements = 0, nb_mix_presentations = 0;
int ret;
if (!s->nb_streams) {
av_log(s, AV_LOG_ERROR, "There must be at least one stream\n");
return AVERROR(EINVAL);
}
for (int i = 0; i < s->nb_streams; i++) {
if (s->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO ||
(s->streams[i]->codecpar->codec_tag != MKTAG('m','p','4','a') &&
s->streams[i]->codecpar->codec_tag != MKTAG('O','p','u','s') &&
s->streams[i]->codecpar->codec_tag != MKTAG('f','L','a','C') &&
s->streams[i]->codecpar->codec_tag != MKTAG('i','p','c','m'))) {
av_log(s, AV_LOG_ERROR, "Unsupported codec id %s\n",
avcodec_get_name(s->streams[i]->codecpar->codec_id));
return AVERROR(EINVAL);
}
if (s->streams[i]->codecpar->ch_layout.nb_channels > 2) {
av_log(s, AV_LOG_ERROR, "Unsupported channel layout on stream #%d\n", i);
return AVERROR(EINVAL);
}
for (int j = 0; j < i; j++) {
if (s->streams[i]->id == s->streams[j]->id) {
av_log(s, AV_LOG_ERROR, "Duplicated stream id %d\n", s->streams[j]->id);
return AVERROR(EINVAL);
}
}
}
if (!s->nb_stream_groups) {
av_log(s, AV_LOG_ERROR, "There must be at least two stream groups\n");
return AVERROR(EINVAL);
}
for (int i = 0; i < s->nb_stream_groups; i++) {
const AVStreamGroup *stg = s->stream_groups[i];
if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
nb_audio_elements++;
if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION)
nb_mix_presentations++;
}
if ((nb_audio_elements < 1 || nb_audio_elements > 2) || nb_mix_presentations < 1) {
av_log(s, AV_LOG_ERROR, "There must be >= 1 and <= 2 IAMF_AUDIO_ELEMENT and at least "
"one IAMF_MIX_PRESENTATION stream groups\n");
return AVERROR(EINVAL);
}
for (int i = 0; i < s->nb_stream_groups; i++) {
const AVStreamGroup *stg = s->stream_groups[i];
if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
continue;
ret = ff_iamf_add_audio_element(iamf, stg, s);
if (ret < 0)
return ret;
}
for (int i = 0; i < s->nb_stream_groups; i++) {
const AVStreamGroup *stg = s->stream_groups[i];
if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION)
continue;
ret = ff_iamf_add_mix_presentation(iamf, stg, s);
if (ret < 0)
return ret;
}
c->first_stream_id = s->streams[0]->id;
return 0;
}
static int iamf_write_header(AVFormatContext *s)
{
IAMFMuxContext *const c = s->priv_data;
IAMFContext *const iamf = &c->iamf;
int ret;
ret = ff_iamf_write_descriptors(iamf, s->pb, s);
if (ret < 0)
return ret;
c->first_stream_id = s->streams[0]->id;
return 0;
}
static inline int rescale_rational(AVRational q, int b)
{
return av_clip_int16(av_rescale(q.num, b, q.den));
}
static int write_parameter_block(AVFormatContext *s, const AVIAMFParamDefinition *param)
{
const IAMFMuxContext *const c = s->priv_data;
const IAMFContext *const iamf = &c->iamf;
uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
IAMFParamDefinition *param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
PutBitContext pb;
AVIOContext *dyn_bc;
uint8_t *dyn_buf = NULL;
int dyn_size, ret;
if (param->type > AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
av_log(s, AV_LOG_DEBUG, "Ignoring side data with unknown type %u\n",
param->type);
return 0;
}
if (!param_definition) {
av_log(s, AV_LOG_ERROR, "Non-existent Parameter Definition with ID %u referenced by a packet\n",
param->parameter_id);
return AVERROR(EINVAL);
}
if (param->type != param_definition->param->type) {
av_log(s, AV_LOG_ERROR, "Inconsistent values for Parameter Definition "
"with ID %u in a packet\n",
param->parameter_id);
return AVERROR(EINVAL);
}
ret = avio_open_dyn_buf(&dyn_bc);
if (ret < 0)
return ret;
// Sequence Header
init_put_bits(&pb, header, sizeof(header));
put_bits(&pb, 5, IAMF_OBU_IA_PARAMETER_BLOCK);
put_bits(&pb, 3, 0);
flush_put_bits(&pb);
avio_write(s->pb, header, put_bytes_count(&pb, 1));
ffio_write_leb(dyn_bc, param->parameter_id);
if (!param_definition->mode) {
ffio_write_leb(dyn_bc, param->duration);
ffio_write_leb(dyn_bc, param->constant_subblock_duration);
if (param->constant_subblock_duration == 0)
ffio_write_leb(dyn_bc, param->nb_subblocks);
}
for (int i = 0; i < param->nb_subblocks; i++) {
const void *subblock = av_iamf_param_definition_get_subblock(param, i);
switch (param->type) {
case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
const AVIAMFMixGain *mix = subblock;
if (!param_definition->mode && param->constant_subblock_duration == 0)
ffio_write_leb(dyn_bc, mix->subblock_duration);
ffio_write_leb(dyn_bc, mix->animation_type);
avio_wb16(dyn_bc, rescale_rational(mix->start_point_value, 1 << 8));
if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR)
avio_wb16(dyn_bc, rescale_rational(mix->end_point_value, 1 << 8));
if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) {
avio_wb16(dyn_bc, rescale_rational(mix->control_point_value, 1 << 8));
avio_w8(dyn_bc, av_clip_uint8(av_rescale(mix->control_point_relative_time.num, 1 << 8,
mix->control_point_relative_time.den)));
}
break;
}
case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
const AVIAMFDemixingInfo *demix = subblock;
if (!param_definition->mode && param->constant_subblock_duration == 0)
ffio_write_leb(dyn_bc, demix->subblock_duration);
avio_w8(dyn_bc, demix->dmixp_mode << 5);
break;
}
case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
const AVIAMFReconGain *recon = subblock;
const AVIAMFAudioElement *audio_element = param_definition->audio_element->celement;
if (!param_definition->mode && param->constant_subblock_duration == 0)
ffio_write_leb(dyn_bc, recon->subblock_duration);
if (!audio_element) {
av_log(s, AV_LOG_ERROR, "Invalid Parameter Definition with ID %u referenced by a packet\n", param->parameter_id);
return AVERROR(EINVAL);
}
for (int j = 0; j < audio_element->nb_layers; j++) {
const AVIAMFLayer *layer = audio_element->layers[j];
if (layer->flags & AV_IAMF_LAYER_FLAG_RECON_GAIN) {
unsigned int recon_gain_flags = 0;
int k = 0;
for (; k < 7; k++)
recon_gain_flags |= (1 << k) * !!recon->recon_gain[j][k];
for (; k < 12; k++)
recon_gain_flags |= (2 << k) * !!recon->recon_gain[j][k];
if (recon_gain_flags >> 8)
recon_gain_flags |= (1 << k);
ffio_write_leb(dyn_bc, recon_gain_flags);
for (k = 0; k < 12; k++) {
if (recon->recon_gain[j][k])
avio_w8(dyn_bc, recon->recon_gain[j][k]);
}
}
}
break;
}
default:
av_assert0(0);
}
}
dyn_size = avio_get_dyn_buf(dyn_bc, &dyn_buf);
ffio_write_leb(s->pb, dyn_size);
avio_write(s->pb, dyn_buf, dyn_size);
ffio_free_dyn_buf(&dyn_bc);
return 0;
}
static int iamf_write_packet(AVFormatContext *s, AVPacket *pkt)
{
const IAMFMuxContext *const c = s->priv_data;
AVStream *st = s->streams[pkt->stream_index];
uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
PutBitContext pb;
AVIOContext *dyn_bc;
uint8_t *side_data, *dyn_buf = NULL;
unsigned int skip_samples = 0, discard_padding = 0;
size_t side_data_size;
int dyn_size, type = st->id <= 17 ? st->id + IAMF_OBU_IA_AUDIO_FRAME_ID0 : IAMF_OBU_IA_AUDIO_FRAME;
int ret;
if (!pkt->size) {
uint8_t *new_extradata = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, NULL);
if (!new_extradata)
return AVERROR_INVALIDDATA;
// TODO: update FLAC Streaminfo on seekable output
return 0;
}
if (s->nb_stream_groups && st->id == c->first_stream_id) {
AVIAMFParamDefinition *mix =
(AVIAMFParamDefinition *)av_packet_get_side_data(pkt, AV_PKT_DATA_IAMF_MIX_GAIN_PARAM, NULL);
AVIAMFParamDefinition *demix =
(AVIAMFParamDefinition *)av_packet_get_side_data(pkt, AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM, NULL);
AVIAMFParamDefinition *recon =
(AVIAMFParamDefinition *)av_packet_get_side_data(pkt, AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM, NULL);
if (mix) {
ret = write_parameter_block(s, mix);
if (ret < 0)
return ret;
}
if (demix) {
ret = write_parameter_block(s, demix);
if (ret < 0)
return ret;
}
if (recon) {
ret = write_parameter_block(s, recon);
if (ret < 0)
return ret;
}
}
side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES,
&side_data_size);
if (side_data && side_data_size >= 10) {
skip_samples = AV_RL32(side_data);
discard_padding = AV_RL32(side_data + 4);
}
ret = avio_open_dyn_buf(&dyn_bc);
if (ret < 0)
return ret;
init_put_bits(&pb, header, sizeof(header));
put_bits(&pb, 5, type);
put_bits(&pb, 1, 0); // obu_redundant_copy
put_bits(&pb, 1, skip_samples || discard_padding);
put_bits(&pb, 1, 0); // obu_extension_flag
flush_put_bits(&pb);
avio_write(s->pb, header, put_bytes_count(&pb, 1));
if (skip_samples || discard_padding) {
ffio_write_leb(dyn_bc, discard_padding);
ffio_write_leb(dyn_bc, skip_samples);
}
if (st->id > 17)
ffio_write_leb(dyn_bc, st->id);
dyn_size = avio_get_dyn_buf(dyn_bc, &dyn_buf);
ffio_write_leb(s->pb, dyn_size + pkt->size);
avio_write(s->pb, dyn_buf, dyn_size);
ffio_free_dyn_buf(&dyn_bc);
avio_write(s->pb, pkt->data, pkt->size);
return 0;
}
static void iamf_deinit(AVFormatContext *s)
{
IAMFMuxContext *const c = s->priv_data;
IAMFContext *const iamf = &c->iamf;
ff_iamf_uninit_context(iamf);
return;
}
static const AVCodecTag iamf_codec_tags[] = {
{ AV_CODEC_ID_AAC, MKTAG('m','p','4','a') },
{ AV_CODEC_ID_FLAC, MKTAG('f','L','a','C') },
{ AV_CODEC_ID_OPUS, MKTAG('O','p','u','s') },
{ AV_CODEC_ID_PCM_S16LE, MKTAG('i','p','c','m') },
{ AV_CODEC_ID_PCM_S16BE, MKTAG('i','p','c','m') },
{ AV_CODEC_ID_PCM_S24LE, MKTAG('i','p','c','m') },
{ AV_CODEC_ID_PCM_S24BE, MKTAG('i','p','c','m') },
{ AV_CODEC_ID_PCM_S32LE, MKTAG('i','p','c','m') },
{ AV_CODEC_ID_PCM_S32BE, MKTAG('i','p','c','m') },
{ AV_CODEC_ID_NONE, MKTAG('i','p','c','m') }
};
const FFOutputFormat ff_iamf_muxer = {
.p.name = "iamf",
.p.long_name = NULL_IF_CONFIG_SMALL("Raw Immersive Audio Model and Formats"),
.p.extensions = "iamf",
.priv_data_size = sizeof(IAMFMuxContext),
.p.audio_codec = AV_CODEC_ID_OPUS,
.init = iamf_init,
.deinit = iamf_deinit,
.write_header = iamf_write_header,
.write_packet = iamf_write_packet,
.p.codec_tag = (const AVCodecTag* const []){ iamf_codec_tags, NULL },
.p.flags = AVFMT_GLOBALHEADER | AVFMT_NOTIMESTAMPS,
};