Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
		
			
				
	
	
		
			397 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			397 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * TTML subtitle encoder
 | |
|  * Copyright (c) 2020 24i
 | |
|  *
 | |
|  * This file is part of FFmpeg.
 | |
|  *
 | |
|  * FFmpeg is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU Lesser General Public
 | |
|  * License as published by the Free Software Foundation; either
 | |
|  * version 2.1 of the License, or (at your option) any later version.
 | |
|  *
 | |
|  * FFmpeg is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|  * Lesser General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU Lesser General Public
 | |
|  * License along with FFmpeg; if not, write to the Free Software
 | |
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * @file
 | |
|  * TTML subtitle encoder
 | |
|  * @see https://www.w3.org/TR/ttml1/
 | |
|  * @see https://www.w3.org/TR/ttml2/
 | |
|  * @see https://www.w3.org/TR/ttml-imsc/rec
 | |
|  */
 | |
| 
 | |
| #include "avcodec.h"
 | |
| #include "internal.h"
 | |
| #include "libavutil/avstring.h"
 | |
| #include "libavutil/bprint.h"
 | |
| #include "libavutil/internal.h"
 | |
| #include "ass_split.h"
 | |
| #include "ass.h"
 | |
| #include "ttmlenc.h"
 | |
| 
 | |
| typedef struct {
 | |
|     AVCodecContext *avctx;
 | |
|     ASSSplitContext *ass_ctx;
 | |
|     AVBPrint buffer;
 | |
| } TTMLContext;
 | |
| 
 | |
| static void ttml_text_cb(void *priv, const char *text, int len)
 | |
| {
 | |
|     TTMLContext *s = priv;
 | |
|     AVBPrint cur_line = { 0 };
 | |
|     AVBPrint *buffer = &s->buffer;
 | |
| 
 | |
|     av_bprint_init(&cur_line, len, AV_BPRINT_SIZE_UNLIMITED);
 | |
| 
 | |
|     av_bprint_append_data(&cur_line, text, len);
 | |
|     if (!av_bprint_is_complete(&cur_line)) {
 | |
|         av_log(s->avctx, AV_LOG_ERROR,
 | |
|                "Failed to move the current subtitle dialog to AVBPrint!\n");
 | |
|         av_bprint_finalize(&cur_line, NULL);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
| 
 | |
|     av_bprint_escape(buffer, cur_line.str, NULL, AV_ESCAPE_MODE_XML,
 | |
|                      0);
 | |
| 
 | |
|     av_bprint_finalize(&cur_line, NULL);
 | |
| }
 | |
| 
 | |
| static void ttml_new_line_cb(void *priv, int forced)
 | |
| {
 | |
|     TTMLContext *s = priv;
 | |
| 
 | |
|     av_bprintf(&s->buffer, "<br/>");
 | |
| }
 | |
| 
 | |
| static const ASSCodesCallbacks ttml_callbacks = {
 | |
|     .text             = ttml_text_cb,
 | |
|     .new_line         = ttml_new_line_cb,
 | |
| };
 | |
| 
 | |
| static int ttml_encode_frame(AVCodecContext *avctx, uint8_t *buf,
 | |
|                              int bufsize, const AVSubtitle *sub)
 | |
| {
 | |
|     TTMLContext *s = avctx->priv_data;
 | |
|     ASSDialog *dialog;
 | |
|     int i;
 | |
| 
 | |
|     av_bprint_clear(&s->buffer);
 | |
| 
 | |
|     for (i=0; i<sub->num_rects; i++) {
 | |
|         const char *ass = sub->rects[i]->ass;
 | |
|         int ret;
 | |
| 
 | |
|         if (sub->rects[i]->type != SUBTITLE_ASS) {
 | |
|             av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
 | |
|             return AVERROR(EINVAL);
 | |
|         }
 | |
| 
 | |
|         dialog = ff_ass_split_dialog(s->ass_ctx, ass);
 | |
|         if (!dialog)
 | |
|             return AVERROR(ENOMEM);
 | |
| 
 | |
|         if (dialog->style) {
 | |
|             av_bprintf(&s->buffer, "<span region=\"");
 | |
|             av_bprint_escape(&s->buffer, dialog->style, NULL,
 | |
|                              AV_ESCAPE_MODE_XML,
 | |
|                              AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
 | |
|             av_bprintf(&s->buffer, "\">");
 | |
|         }
 | |
| 
 | |
|         ret = ff_ass_split_override_codes(&ttml_callbacks, s, dialog->text);
 | |
|         if (ret < 0) {
 | |
|             int log_level = (ret != AVERROR_INVALIDDATA ||
 | |
|                              avctx->err_recognition & AV_EF_EXPLODE) ?
 | |
|                             AV_LOG_ERROR : AV_LOG_WARNING;
 | |
|             av_log(avctx, log_level,
 | |
|                    "Splitting received ASS dialog text %s failed: %s\n",
 | |
|                    dialog->text,
 | |
|                    av_err2str(ret));
 | |
| 
 | |
|             if (log_level == AV_LOG_ERROR) {
 | |
|                 ff_ass_free_dialog(&dialog);
 | |
|                 return ret;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (dialog->style)
 | |
|             av_bprintf(&s->buffer, "</span>");
 | |
| 
 | |
|         ff_ass_free_dialog(&dialog);
 | |
|     }
 | |
| 
 | |
|     if (!av_bprint_is_complete(&s->buffer))
 | |
|         return AVERROR(ENOMEM);
 | |
|     if (!s->buffer.len)
 | |
|         return 0;
 | |
| 
 | |
|     // force null-termination, so in case our destination buffer is
 | |
|     // too small, the return value is larger than bufsize minus null.
 | |
|     if (av_strlcpy(buf, s->buffer.str, bufsize) > bufsize - 1) {
 | |
|         av_log(avctx, AV_LOG_ERROR, "Buffer too small for TTML event.\n");
 | |
|         return AVERROR_BUFFER_TOO_SMALL;
 | |
|     }
 | |
| 
 | |
|     return s->buffer.len;
 | |
| }
 | |
| 
 | |
| static av_cold int ttml_encode_close(AVCodecContext *avctx)
 | |
| {
 | |
|     TTMLContext *s = avctx->priv_data;
 | |
| 
 | |
|     ff_ass_split_free(s->ass_ctx);
 | |
| 
 | |
|     av_bprint_finalize(&s->buffer, NULL);
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| static const char *ttml_get_display_alignment(int alignment)
 | |
| {
 | |
|     switch (alignment) {
 | |
|     case 1:
 | |
|     case 2:
 | |
|     case 3:
 | |
|         return "after";
 | |
|     case 4:
 | |
|     case 5:
 | |
|     case 6:
 | |
|         return "center";
 | |
|     case 7:
 | |
|     case 8:
 | |
|     case 9:
 | |
|         return "before";
 | |
|     default:
 | |
|         return NULL;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static const char *ttml_get_text_alignment(int alignment)
 | |
| {
 | |
|     switch (alignment) {
 | |
|     case 1:
 | |
|     case 4:
 | |
|     case 7:
 | |
|         return "left";
 | |
|     case 2:
 | |
|     case 5:
 | |
|     case 8:
 | |
|         return "center";
 | |
|     case 3:
 | |
|     case 6:
 | |
|     case 9:
 | |
|         return "right";
 | |
|     default:
 | |
|         return NULL;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void ttml_get_origin(ASSScriptInfo script_info, ASSStyle style,
 | |
|                            int *origin_left, int *origin_top)
 | |
| {
 | |
|     *origin_left = av_rescale(style.margin_l, 100, script_info.play_res_x);
 | |
|     *origin_top  =
 | |
|         av_rescale((style.alignment >= 7) ? style.margin_v : 0,
 | |
|                    100, script_info.play_res_y);
 | |
| }
 | |
| 
 | |
| static void ttml_get_extent(ASSScriptInfo script_info, ASSStyle style,
 | |
|                            int *width, int *height)
 | |
| {
 | |
|     *width  = av_rescale(script_info.play_res_x - style.margin_r,
 | |
|                          100, script_info.play_res_x);
 | |
|     *height = av_rescale((style.alignment <= 3) ?
 | |
|                          script_info.play_res_y - style.margin_v :
 | |
|                          script_info.play_res_y,
 | |
|                          100, script_info.play_res_y);
 | |
| }
 | |
| 
 | |
| static int ttml_write_region(AVCodecContext *avctx, AVBPrint *buf,
 | |
|                              ASSScriptInfo script_info, ASSStyle style)
 | |
| {
 | |
|     const char *display_alignment = NULL;
 | |
|     const char *text_alignment = NULL;
 | |
|     int origin_left = 0;
 | |
|     int origin_top  = 0;
 | |
|     int width = 0;
 | |
|     int height = 0;
 | |
| 
 | |
|     if (!style.name) {
 | |
|         av_log(avctx, AV_LOG_ERROR, "Subtitle style name not set!\n");
 | |
|         return AVERROR_INVALIDDATA;
 | |
|     }
 | |
| 
 | |
|     if (style.font_size < 0) {
 | |
|         av_log(avctx, AV_LOG_ERROR, "Invalid font size for TTML: %d!\n",
 | |
|                style.font_size);
 | |
|         return AVERROR_INVALIDDATA;
 | |
|     }
 | |
| 
 | |
|     if (style.margin_l < 0 || style.margin_r < 0 || style.margin_v < 0) {
 | |
|         av_log(avctx, AV_LOG_ERROR,
 | |
|                "One or more negative margin values in subtitle style: "
 | |
|                "left: %d, right: %d, vertical: %d!\n",
 | |
|                style.margin_l, style.margin_r, style.margin_v);
 | |
|         return AVERROR_INVALIDDATA;
 | |
|     }
 | |
| 
 | |
|     display_alignment = ttml_get_display_alignment(style.alignment);
 | |
|     text_alignment = ttml_get_text_alignment(style.alignment);
 | |
|     if (!display_alignment || !text_alignment) {
 | |
|         av_log(avctx, AV_LOG_ERROR,
 | |
|                "Failed to convert ASS style alignment %d of style %s to "
 | |
|                "TTML display and text alignment!\n",
 | |
|                style.alignment,
 | |
|                style.name);
 | |
|         return AVERROR_INVALIDDATA;
 | |
|     }
 | |
| 
 | |
|     ttml_get_origin(script_info, style, &origin_left, &origin_top);
 | |
|     ttml_get_extent(script_info, style, &width, &height);
 | |
| 
 | |
|     av_bprintf(buf, "      <region xml:id=\"");
 | |
|     av_bprint_escape(buf, style.name, NULL, AV_ESCAPE_MODE_XML,
 | |
|                      AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
 | |
|     av_bprintf(buf, "\"\n");
 | |
| 
 | |
|     av_bprintf(buf, "        tts:origin=\"%d%% %d%%\"\n",
 | |
|                origin_left, origin_top);
 | |
|     av_bprintf(buf, "        tts:extent=\"%d%% %d%%\"\n",
 | |
|                width, height);
 | |
| 
 | |
|     av_bprintf(buf, "        tts:displayAlign=\"");
 | |
|     av_bprint_escape(buf, display_alignment, NULL, AV_ESCAPE_MODE_XML,
 | |
|                      AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
 | |
|     av_bprintf(buf, "\"\n");
 | |
| 
 | |
|     av_bprintf(buf, "        tts:textAlign=\"");
 | |
|     av_bprint_escape(buf, text_alignment, NULL, AV_ESCAPE_MODE_XML,
 | |
|                      AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
 | |
|     av_bprintf(buf, "\"\n");
 | |
| 
 | |
|     // if we set cell resolution to our script reference resolution,
 | |
|     // then a single line is a single "point" on our canvas. Thus, by setting
 | |
|     // our font size to font size in cells, we should gain a similar enough
 | |
|     // scale without resorting to explicit pixel based font sizing, which is
 | |
|     // frowned upon in the TTML community.
 | |
|     av_bprintf(buf, "        tts:fontSize=\"%dc\"\n",
 | |
|                style.font_size);
 | |
| 
 | |
|     if (style.font_name) {
 | |
|         av_bprintf(buf, "        tts:fontFamily=\"");
 | |
|         av_bprint_escape(buf, style.font_name, NULL, AV_ESCAPE_MODE_XML,
 | |
|                          AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
 | |
|         av_bprintf(buf, "\"\n");
 | |
|     }
 | |
| 
 | |
|     av_bprintf(buf, "        tts:overflow=\"visible\" />\n");
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| static int ttml_write_header_content(AVCodecContext *avctx)
 | |
| {
 | |
|     TTMLContext *s = avctx->priv_data;
 | |
|     ASS *ass = (ASS *)s->ass_ctx;
 | |
|     ASSScriptInfo script_info = ass->script_info;
 | |
|     const size_t base_extradata_size = TTMLENC_EXTRADATA_SIGNATURE_SIZE + 1 +
 | |
|                                        AV_INPUT_BUFFER_PADDING_SIZE;
 | |
|     size_t additional_extradata_size = 0;
 | |
| 
 | |
|     if (script_info.play_res_x <= 0 || script_info.play_res_y <= 0) {
 | |
|         av_log(avctx, AV_LOG_ERROR,
 | |
|                "Invalid subtitle reference resolution %dx%d!\n",
 | |
|                script_info.play_res_x, script_info.play_res_y);
 | |
|         return AVERROR_INVALIDDATA;
 | |
|     }
 | |
| 
 | |
|     // write the first string in extradata, attributes in the base "tt" element.
 | |
|     av_bprintf(&s->buffer, ttml_default_namespacing);
 | |
|     // the cell resolution is in character cells, so not exactly 1:1 against
 | |
|     // a pixel based resolution, but as the tts:extent in the root
 | |
|     // "tt" element is frowned upon (and disallowed in the EBU-TT profile),
 | |
|     // we mimic the reference resolution by setting it as the cell resolution.
 | |
|     av_bprintf(&s->buffer, "  ttp:cellResolution=\"%d %d\"\n",
 | |
|                script_info.play_res_x, script_info.play_res_y);
 | |
|     av_bprint_chars(&s->buffer, '\0', 1);
 | |
| 
 | |
|     // write the second string in extradata, head element containing the styles
 | |
|     av_bprintf(&s->buffer, "  <head>\n");
 | |
|     av_bprintf(&s->buffer, "    <layout>\n");
 | |
| 
 | |
|     for (int i = 0; i < ass->styles_count; i++) {
 | |
|         int ret = ttml_write_region(avctx, &s->buffer, script_info,
 | |
|                                     ass->styles[i]);
 | |
|         if (ret < 0)
 | |
|             return ret;
 | |
|     }
 | |
| 
 | |
|     av_bprintf(&s->buffer, "    </layout>\n");
 | |
|     av_bprintf(&s->buffer, "  </head>\n");
 | |
|     av_bprint_chars(&s->buffer, '\0', 1);
 | |
| 
 | |
|     if (!av_bprint_is_complete(&s->buffer)) {
 | |
|         return AVERROR(ENOMEM);
 | |
|     }
 | |
| 
 | |
|     additional_extradata_size = s->buffer.len;
 | |
| 
 | |
|     if (!(avctx->extradata =
 | |
|             av_mallocz(base_extradata_size + additional_extradata_size))) {
 | |
|         return AVERROR(ENOMEM);
 | |
|     }
 | |
| 
 | |
|     avctx->extradata_size =
 | |
|         TTMLENC_EXTRADATA_SIGNATURE_SIZE + additional_extradata_size;
 | |
|     memcpy(avctx->extradata, TTMLENC_EXTRADATA_SIGNATURE,
 | |
|            TTMLENC_EXTRADATA_SIGNATURE_SIZE);
 | |
| 
 | |
|     if (additional_extradata_size)
 | |
|         memcpy(avctx->extradata + TTMLENC_EXTRADATA_SIGNATURE_SIZE,
 | |
|                s->buffer.str, additional_extradata_size);
 | |
| 
 | |
|     av_bprint_clear(&s->buffer);
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| static av_cold int ttml_encode_init(AVCodecContext *avctx)
 | |
| {
 | |
|     TTMLContext *s = avctx->priv_data;
 | |
|     int ret = AVERROR_BUG;
 | |
|     s->avctx   = avctx;
 | |
| 
 | |
|     av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
 | |
| 
 | |
|     if (!(s->ass_ctx = ff_ass_split(avctx->subtitle_header))) {
 | |
|         return AVERROR_INVALIDDATA;
 | |
|     }
 | |
| 
 | |
|     if ((ret = ttml_write_header_content(avctx)) < 0) {
 | |
|         return ret;
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| const AVCodec ff_ttml_encoder = {
 | |
|     .name           = "ttml",
 | |
|     .long_name      = NULL_IF_CONFIG_SMALL("TTML subtitle"),
 | |
|     .type           = AVMEDIA_TYPE_SUBTITLE,
 | |
|     .id             = AV_CODEC_ID_TTML,
 | |
|     .priv_data_size = sizeof(TTMLContext),
 | |
|     .init           = ttml_encode_init,
 | |
|     .encode_sub     = ttml_encode_frame,
 | |
|     .close          = ttml_encode_close,
 | |
|     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
 | |
| };
 |