ffmpeg: add video heartbeat capability to fix_sub_duration
Splits the currently handled subtitle at random access point packets that can be configured to follow a specific output stream. Currently only subtitle streams which are directly mapped into the same output in which the heartbeat stream resides are affected. This way the subtitle - which is known to be shown at this time can be split and passed to muxer before its full duration is yet known. This is also a drawback, as this essentially outputs multiple subtitles from a single input subtitle that continues over multiple random access points. Thus this feature should not be utilized in cases where subtitle output latency does not matter. Co-authored-by: Andrzej Nadachowski <andrzej.nadachowski@24i.com> Co-authored-by: Bernard Boulay <bernard.boulay@24i.com> Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
This commit is contained in:
		
							parent
							
								
									746d27455b
								
							
						
					
					
						commit
						9a820ec8b1
					
				| @ -36,6 +36,7 @@ version <next>: | ||||
| - hstack_vaapi, vstack_vaapi and xstack_vaapi filters | ||||
| - XMD ADPCM decoder and demuxer | ||||
| - media100 to mjpegb bsf | ||||
| - ffmpeg CLI new option: -fix_sub_duration_heartbeat | ||||
| 
 | ||||
| 
 | ||||
| version 5.1: | ||||
|  | ||||
| @ -1342,6 +1342,22 @@ List all hardware acceleration components enabled in this build of ffmpeg. | ||||
| Actual runtime availability depends on the hardware and its suitable driver | ||||
| being installed. | ||||
| 
 | ||||
| @item -fix_sub_duration_heartbeat[:@var{stream_specifier}] | ||||
| Set a specific output video stream as the heartbeat stream according to which | ||||
| to split and push through currently in-progress subtitle upon receipt of a | ||||
| random access packet. | ||||
| 
 | ||||
| This lowers the latency of subtitles for which the end packet or the following | ||||
| subtitle has not yet been received. As a drawback, this will most likely lead | ||||
| to duplication of subtitle events in order to cover the full duration, so | ||||
| when dealing with use cases where latency of when the subtitle event is passed | ||||
| on to output is not relevant this option should not be utilized. | ||||
| 
 | ||||
| Requires @option{-fix_sub_duration} to be set for the relevant input subtitle | ||||
| stream for this to have any effect, as well as for the input subtitle stream | ||||
| having to be directly mapped to the same output in which the heartbeat stream | ||||
| resides. | ||||
| 
 | ||||
| @end table | ||||
| 
 | ||||
| @section Audio Options | ||||
|  | ||||
							
								
								
									
										148
									
								
								fftools/ffmpeg.c
									
									
									
									
									
								
							
							
						
						
									
										148
									
								
								fftools/ffmpeg.c
									
									
									
									
									
								
							| @ -126,6 +126,7 @@ typedef struct BenchmarkTimeStamps { | ||||
|     int64_t sys_usec; | ||||
| } BenchmarkTimeStamps; | ||||
| 
 | ||||
| static int trigger_fix_sub_duration_heartbeat(OutputStream *ost, const AVPacket *pkt); | ||||
| static BenchmarkTimeStamps get_benchmark_time_stamps(void); | ||||
| static int64_t getmaxrss(void); | ||||
| static int ifilter_has_all_input_formats(FilterGraph *fg); | ||||
| @ -953,6 +954,13 @@ static int encode_frame(OutputFile *of, OutputStream *ost, AVFrame *frame) | ||||
|                    av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, &enc->time_base)); | ||||
|         } | ||||
| 
 | ||||
|         if ((ret = trigger_fix_sub_duration_heartbeat(ost, pkt)) < 0) { | ||||
|             av_log(NULL, AV_LOG_ERROR, | ||||
|                    "Subtitle heartbeat logic failed in %s! (%s)\n", | ||||
|                    __func__, av_err2str(ret)); | ||||
|             exit_program(1); | ||||
|         } | ||||
| 
 | ||||
|         ost->data_size_enc += pkt->size; | ||||
| 
 | ||||
|         ost->packets_encoded++; | ||||
| @ -1912,6 +1920,16 @@ static void do_streamcopy(InputStream *ist, OutputStream *ost, const AVPacket *p | ||||
| 
 | ||||
|     opkt->duration = av_rescale_q(pkt->duration, ist->st->time_base, ost->mux_timebase); | ||||
| 
 | ||||
|     { | ||||
|         int ret = trigger_fix_sub_duration_heartbeat(ost, pkt); | ||||
|         if (ret < 0) { | ||||
|             av_log(NULL, AV_LOG_ERROR, | ||||
|                    "Subtitle heartbeat logic failed in %s! (%s)\n", | ||||
|                    __func__, av_err2str(ret)); | ||||
|             exit_program(1); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     of_output_packet(of, opkt, ost, 0); | ||||
| 
 | ||||
|     ost->streamcopy_started = 1; | ||||
| @ -2355,6 +2373,136 @@ out: | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| static int copy_av_subtitle(AVSubtitle *dst, AVSubtitle *src) | ||||
| { | ||||
|     int ret = AVERROR_BUG; | ||||
|     AVSubtitle tmp = { | ||||
|         .format = src->format, | ||||
|         .start_display_time = src->start_display_time, | ||||
|         .end_display_time = src->end_display_time, | ||||
|         .num_rects = 0, | ||||
|         .rects = NULL, | ||||
|         .pts = src->pts | ||||
|     }; | ||||
| 
 | ||||
|     if (!src->num_rects) | ||||
|         goto success; | ||||
| 
 | ||||
|     if (!(tmp.rects = av_calloc(src->num_rects, sizeof(*tmp.rects)))) | ||||
|         return AVERROR(ENOMEM); | ||||
| 
 | ||||
|     for (int i = 0; i < src->num_rects; i++) { | ||||
|         AVSubtitleRect *src_rect = src->rects[i]; | ||||
|         AVSubtitleRect *dst_rect; | ||||
| 
 | ||||
|         if (!(dst_rect = tmp.rects[i] = av_mallocz(sizeof(*tmp.rects[0])))) { | ||||
|             ret = AVERROR(ENOMEM); | ||||
|             goto cleanup; | ||||
|         } | ||||
| 
 | ||||
|         tmp.num_rects++; | ||||
| 
 | ||||
|         dst_rect->type      = src_rect->type; | ||||
|         dst_rect->flags     = src_rect->flags; | ||||
| 
 | ||||
|         dst_rect->x         = src_rect->x; | ||||
|         dst_rect->y         = src_rect->y; | ||||
|         dst_rect->w         = src_rect->w; | ||||
|         dst_rect->h         = src_rect->h; | ||||
|         dst_rect->nb_colors = src_rect->nb_colors; | ||||
| 
 | ||||
|         if (src_rect->text) | ||||
|             if (!(dst_rect->text = av_strdup(src_rect->text))) { | ||||
|                 ret = AVERROR(ENOMEM); | ||||
|                 goto cleanup; | ||||
|             } | ||||
| 
 | ||||
|         if (src_rect->ass) | ||||
|             if (!(dst_rect->ass = av_strdup(src_rect->ass))) { | ||||
|                 ret = AVERROR(ENOMEM); | ||||
|                 goto cleanup; | ||||
|             } | ||||
| 
 | ||||
|         for (int j = 0; j < 4; j++) { | ||||
|             // SUBTITLE_BITMAP images are special in the sense that they
 | ||||
|             // are like PAL8 images. first pointer to data, second to
 | ||||
|             // palette. This makes the size calculation match this.
 | ||||
|             size_t buf_size = src_rect->type == SUBTITLE_BITMAP && j == 1 ? | ||||
|                               AVPALETTE_SIZE : | ||||
|                               src_rect->h * src_rect->linesize[j]; | ||||
| 
 | ||||
|             if (!src_rect->data[j]) | ||||
|                 continue; | ||||
| 
 | ||||
|             if (!(dst_rect->data[j] = av_memdup(src_rect->data[j], buf_size))) { | ||||
|                 ret = AVERROR(ENOMEM); | ||||
|                 goto cleanup; | ||||
|             } | ||||
|             dst_rect->linesize[j] = src_rect->linesize[j]; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| success: | ||||
|     *dst = tmp; | ||||
| 
 | ||||
|     return 0; | ||||
| 
 | ||||
| cleanup: | ||||
|     avsubtitle_free(&tmp); | ||||
| 
 | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| static int fix_sub_duration_heartbeat(InputStream *ist, int64_t signal_pts) | ||||
| { | ||||
|     int ret = AVERROR_BUG; | ||||
|     int got_output = 1; | ||||
|     AVSubtitle *prev_subtitle = &ist->prev_sub.subtitle; | ||||
|     AVSubtitle subtitle; | ||||
| 
 | ||||
|     if (!ist->fix_sub_duration || !prev_subtitle->num_rects || | ||||
|         signal_pts <= prev_subtitle->pts) | ||||
|         return 0; | ||||
| 
 | ||||
|     if ((ret = copy_av_subtitle(&subtitle, prev_subtitle)) < 0) | ||||
|         return ret; | ||||
| 
 | ||||
|     subtitle.pts = signal_pts; | ||||
| 
 | ||||
|     return process_subtitle(ist, &subtitle, &got_output); | ||||
| } | ||||
| 
 | ||||
| static int trigger_fix_sub_duration_heartbeat(OutputStream *ost, const AVPacket *pkt) | ||||
| { | ||||
|     OutputFile *of = output_files[ost->file_index]; | ||||
|     int64_t signal_pts = av_rescale_q(pkt->pts, ost->mux_timebase, | ||||
|                                       AV_TIME_BASE_Q); | ||||
| 
 | ||||
|     if (!ost->fix_sub_duration_heartbeat || !(pkt->flags & AV_PKT_FLAG_KEY)) | ||||
|         // we are only interested in heartbeats on streams configured, and
 | ||||
|         // only on random access points.
 | ||||
|         return 0; | ||||
| 
 | ||||
|     for (int i = 0; i < of->nb_streams; i++) { | ||||
|         OutputStream *iter_ost = of->streams[i]; | ||||
|         InputStream  *ist      = iter_ost->ist; | ||||
|         int ret = AVERROR_BUG; | ||||
| 
 | ||||
|         if (iter_ost == ost || !ist || !ist->decoding_needed || | ||||
|             ist->dec_ctx->codec_type != AVMEDIA_TYPE_SUBTITLE) | ||||
|             // We wish to skip the stream that causes the heartbeat,
 | ||||
|             // output streams without an input stream, streams not decoded
 | ||||
|             // (as fix_sub_duration is only done for decoded subtitles) as
 | ||||
|             // well as non-subtitle streams.
 | ||||
|             continue; | ||||
| 
 | ||||
|         if ((ret = fix_sub_duration_heartbeat(ist, signal_pts)) < 0) | ||||
|             return ret; | ||||
|     } | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output, | ||||
|                                int *decode_failed) | ||||
| { | ||||
|  | ||||
| @ -224,6 +224,8 @@ typedef struct OptionsContext { | ||||
|     int        nb_reinit_filters; | ||||
|     SpecifierOpt *fix_sub_duration; | ||||
|     int        nb_fix_sub_duration; | ||||
|     SpecifierOpt *fix_sub_duration_heartbeat; | ||||
|     int        nb_fix_sub_duration_heartbeat; | ||||
|     SpecifierOpt *canvas_sizes; | ||||
|     int        nb_canvas_sizes; | ||||
|     SpecifierOpt *pass; | ||||
| @ -675,6 +677,12 @@ typedef struct OutputStream { | ||||
| 
 | ||||
|     EncStats enc_stats_pre; | ||||
|     EncStats enc_stats_post; | ||||
| 
 | ||||
|     /*
 | ||||
|      * bool on whether this stream should be utilized for splitting | ||||
|      * subtitles utilizing fix_sub_duration at random access points. | ||||
|      */ | ||||
|     unsigned int fix_sub_duration_heartbeat; | ||||
| } OutputStream; | ||||
| 
 | ||||
| typedef struct OutputFile { | ||||
|  | ||||
| @ -61,6 +61,7 @@ static const char *const opt_name_enc_stats_pre_fmt[]         = {"enc_stats_pre_ | ||||
| static const char *const opt_name_enc_stats_post_fmt[]        = {"enc_stats_post_fmt", NULL}; | ||||
| static const char *const opt_name_filters[]                   = {"filter", "af", "vf", NULL}; | ||||
| static const char *const opt_name_filter_scripts[]            = {"filter_script", NULL}; | ||||
| static const char *const opt_name_fix_sub_duration_heartbeat[] = {"fix_sub_duration_heartbeat", NULL}; | ||||
| static const char *const opt_name_fps_mode[]                  = {"fps_mode", NULL}; | ||||
| static const char *const opt_name_force_fps[]                 = {"force_fps", NULL}; | ||||
| static const char *const opt_name_forced_key_frames[]         = {"forced_key_frames", NULL}; | ||||
| @ -614,6 +615,9 @@ static OutputStream *new_output_stream(Muxer *mux, const OptionsContext *o, | ||||
|     MATCH_PER_STREAM_OPT(bits_per_raw_sample, i, ost->bits_per_raw_sample, | ||||
|                          oc, st); | ||||
| 
 | ||||
|     MATCH_PER_STREAM_OPT(fix_sub_duration_heartbeat, i, ost->fix_sub_duration_heartbeat, | ||||
|                          oc, st); | ||||
| 
 | ||||
|     if (oc->oformat->flags & AVFMT_GLOBALHEADER && ost->enc_ctx) | ||||
|         ost->enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; | ||||
| 
 | ||||
|  | ||||
| @ -1658,6 +1658,11 @@ const OptionDef options[] = { | ||||
|     { "autoscale",        HAS_ARG | OPT_BOOL | OPT_SPEC | | ||||
|                           OPT_EXPERT | OPT_OUTPUT,                               { .off = OFFSET(autoscale) }, | ||||
|         "automatically insert a scale filter at the end of the filter graph" }, | ||||
|     { "fix_sub_duration_heartbeat", OPT_VIDEO | OPT_BOOL | OPT_EXPERT | | ||||
|                                     OPT_SPEC | OPT_OUTPUT,                       { .off = OFFSET(fix_sub_duration_heartbeat) }, | ||||
|         "set this video output stream to be a heartbeat stream for " | ||||
|         "fix_sub_duration, according to which subtitles should be split at " | ||||
|         "random access points" }, | ||||
| 
 | ||||
|     /* audio options */ | ||||
|     { "aframes",        OPT_AUDIO | HAS_ARG  | OPT_PERFILE | OPT_OUTPUT,           { .func_arg = opt_audio_frames }, | ||||
|  | ||||
| @ -117,6 +117,21 @@ fate-ffmpeg-fix_sub_duration: CMD = fmtstdout srt -fix_sub_duration \ | ||||
|   -real_time 1 -f lavfi \
 | ||||
|   -i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]" | ||||
| 
 | ||||
| # Basic test for fix_sub_duration_heartbeat, which causes a buffered subtitle
 | ||||
| # to be pushed out when a video keyframe is received from an encoder.
 | ||||
| FATE_SAMPLES_FFMPEG-$(call FILTERDEMDECENCMUX, MOVIE, MPEGVIDEO, \ | ||||
|                            MPEG2VIDEO, SUBRIP, SRT, LAVFI_INDEV  \
 | ||||
|                            MPEGVIDEO_PARSER CCAPTION_DECODER \
 | ||||
|                            MPEG2VIDEO_ENCODER NULL_MUXER PIPE_PROTOCOL) \
 | ||||
|                            += fate-ffmpeg-fix_sub_duration_heartbeat | ||||
| fate-ffmpeg-fix_sub_duration_heartbeat: CMD = fmtstdout srt -fix_sub_duration \ | ||||
|   -real_time 1 -f lavfi \
 | ||||
|   -i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]" \
 | ||||
|   -map 0:v  -map 0:s -fix_sub_duration_heartbeat:v:0 \
 | ||||
|   -c:v mpeg2video -b:v 2M -g 30 -sc_threshold 1000000000 \
 | ||||
|   -c:s srt \
 | ||||
|   -f null - | ||||
| 
 | ||||
| FATE_STREAMCOPY-$(call REMUX, MP4 MOV, EAC3_DEMUXER) += fate-copy-trac3074 | ||||
| fate-copy-trac3074: CMD = transcode eac3 $(TARGET_SAMPLES)/eac3/csi_miami_stereo_128_spx.eac3\ | ||||
|                      mp4 "-codec copy -map 0" "-codec copy" | ||||
|  | ||||
							
								
								
									
										48
									
								
								tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,48 @@ | ||||
| 1 | ||||
| 00:00:00,968 --> 00:00:01,001 | ||||
| <font face="Monospace">{\an7}(</font> | ||||
| 
 | ||||
| 2 | ||||
| 00:00:01,001 --> 00:00:01,168 | ||||
| <font face="Monospace">{\an7}(</font> | ||||
| 
 | ||||
| 3 | ||||
| 00:00:01,168 --> 00:00:01,368 | ||||
| <font face="Monospace">{\an7}(<i> inaudibl</i></font> | ||||
| 
 | ||||
| 4 | ||||
| 00:00:01,368 --> 00:00:01,568 | ||||
| <font face="Monospace">{\an7}(<i> inaudible radio chat</i></font> | ||||
| 
 | ||||
| 5 | ||||
| 00:00:01,568 --> 00:00:02,002 | ||||
| <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font> | ||||
| 
 | ||||
| 6 | ||||
| 00:00:02,002 --> 00:00:03,003 | ||||
| <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font> | ||||
| 
 | ||||
| 7 | ||||
| 00:00:03,003 --> 00:00:03,103 | ||||
| <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font> | ||||
| 
 | ||||
| 8 | ||||
| 00:00:03,103 --> 00:00:03,303 | ||||
| <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> ) | ||||
| >></font> | ||||
| 
 | ||||
| 9 | ||||
| 00:00:03,303 --> 00:00:03,503 | ||||
| <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> ) | ||||
| >> Safety rema</font> | ||||
| 
 | ||||
| 10 | ||||
| 00:00:03,504 --> 00:00:03,704 | ||||
| <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> ) | ||||
| >> Safety remains our numb</font> | ||||
| 
 | ||||
| 11 | ||||
| 00:00:03,704 --> 00:00:04,004 | ||||
| <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> ) | ||||
| >> Safety remains our number one</font> | ||||
| 
 | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user