Line data Source code
1 : /*
2 : * GPAC - Multimedia Framework C SDK
3 : *
4 : * Authors: Jean Le Feuvre
5 : * Copyright (c) Telecom ParisTech 2000-2021
6 : * All rights reserved
7 : *
8 : * This file is part of GPAC / text import filter
9 : *
10 : * GPAC is free software; you can redistribute it and/or modify
11 : * it under the terms of the GNU Lesser General Public License as published by
12 : * the Free Software Foundation; either version 2, or (at your option)
13 : * any later version.
14 : *
15 : * GPAC is distributed in the hope that it will be useful,
16 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 : * GNU Lesser General Public License for more details.
19 : *
20 : * You should have received a copy of the GNU Lesser General Public
21 : * License along with this library; see the file COPYING. If not, write to
22 : * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 : *
24 : */
25 :
26 :
27 :
28 : #include <gpac/filters.h>
29 : #include <gpac/constants.h>
30 : #include <gpac/utf.h>
31 : #include <gpac/xml.h>
32 : #include <gpac/token.h>
33 : #include <gpac/color.h>
34 : #include <gpac/internal/media_dev.h>
35 : #include <gpac/internal/isomedia_dev.h>
36 :
37 : #ifndef GPAC_DISABLE_SWF_IMPORT
38 : /* SWF Importer */
39 : #include <gpac/internal/swf_dev.h>
40 : #endif
41 :
42 : #ifndef GPAC_DISABLE_ISOM_WRITE
43 :
44 : #define TTML_NAMESPACE "http://www.w3.org/ns/ttml"
45 :
46 : #define CHECK_STR(__str) \
47 : if (!__str) { \
48 : e = gf_import_message(import, GF_BAD_PARAM, "Invalid XML formatting (line %d)", parser.line); \
49 : goto exit; \
50 : }
51 :
52 :
53 : typedef struct __txtin_ctx GF_TXTIn;
54 :
55 : struct __txtin_ctx
56 : {
57 : //opts
58 : u32 width, height, txtx, txty, fontsize;
59 : s32 zorder;
60 : const char *fontname, *lang, *ttml_zero;
61 : Bool nodefbox, noflush, webvtt, ttml_embed;
62 : u32 timescale;
63 : GF_Fraction fps;
64 : s32 ttml_dur;
65 :
66 :
67 : GF_FilterPid *ipid, *opid;
68 : char *file_name;
69 : u32 fmt;
70 : u32 playstate;
71 : //0: not seeking, 1: seek request pending, 2: seek configured, discarding packets up until start_range
72 : u32 seek_state;
73 : Double start_range;
74 :
75 : Bool is_setup;
76 :
77 : GF_Err (*text_process)(GF_Filter *filter, GF_TXTIn *ctx);
78 :
79 : s32 unicode_type;
80 :
81 : FILE *src;
82 :
83 : GF_BitStream *bs_w;
84 : Bool first_samp;
85 : Bool hdr_parsed;
86 :
87 : //state vars for srt
88 : u32 state, default_color;
89 : GF_TextSample *samp;
90 : u64 start, end, prev_end;
91 : u32 curLine;
92 : GF_StyleRecord style;
93 :
94 : //WebVTT state
95 : GF_WebVTTParser *vttparser;
96 :
97 : //TTXT state
98 : GF_DOMParser *parser;
99 : u32 cur_child_idx, nb_children, last_desc_idx;
100 : GF_List *text_descs;
101 : Bool last_sample_empty;
102 : u64 last_sample_duration;
103 : //TTML state is the same as ttxt plus the timescale and start (webvtt) for cts compute
104 : u32 txml_timescale;
105 : u32 current_tt_interval;
106 :
107 : //TTML state
108 : GF_XMLNode *root_working_copy, *body_node;
109 : GF_DOMParser *parser_working_copy;
110 : Bool non_compliant_ttml;
111 : u32 tick_rate, ttml_fps_num, ttml_fps_den, ttml_sfps;
112 : GF_List *ttml_resources;
113 : GF_List *div_nodes_list;
114 : Bool has_images;
115 :
116 : #ifndef GPAC_DISABLE_SWF_IMPORT
117 : //SWF text
118 : SWFReader *swf_parse;
119 : Bool do_suspend;
120 : #endif
121 :
122 :
123 : GF_List *intervals;
124 : };
125 :
126 : typedef struct
127 : {
128 : u32 size;
129 : u8 *data;
130 : Bool global;
131 : } TTMLRes;
132 :
133 : typedef struct
134 : {
135 : s64 begin, end;
136 : GF_List *resources;
137 : } TTMLInterval;
138 :
139 :
140 : enum
141 : {
142 : GF_TXTIN_MODE_NONE = 0,
143 : GF_TXTIN_MODE_SRT,
144 : GF_TXTIN_MODE_SUB,
145 : GF_TXTIN_MODE_TTXT,
146 : GF_TXTIN_MODE_TEXML,
147 : GF_TXTIN_MODE_WEBVTT,
148 : GF_TXTIN_MODE_TTML,
149 : GF_TXTIN_MODE_SWF_SVG,
150 : };
151 :
152 : #define REM_TRAIL_MARKS(__str, __sep) while (1) { \
153 : u32 _len = (u32) strlen(__str); \
154 : if (!_len) break; \
155 : _len--; \
156 : if (strchr(__sep, __str[_len])) __str[_len] = 0; \
157 : else break; \
158 : } \
159 :
160 :
161 196 : s32 gf_text_get_utf_type(FILE *in_src)
162 : {
163 : u32 read;
164 : unsigned char BOM[5];
165 196 : read = (u32) gf_fread(BOM, 5, in_src);
166 196 : if ((s32) read < 1)
167 : return -1;
168 :
169 196 : if ((BOM[0]==0xFF) && (BOM[1]==0xFE)) {
170 : /*UTF32 not supported*/
171 4 : if (!BOM[2] && !BOM[3]) return -1;
172 4 : gf_fseek(in_src, 2, SEEK_SET);
173 4 : return 3;
174 : }
175 192 : if ((BOM[0]==0xFE) && (BOM[1]==0xFF)) {
176 : /*UTF32 not supported*/
177 0 : if (!BOM[2] && !BOM[3]) return -1;
178 0 : gf_fseek(in_src, 2, SEEK_SET);
179 0 : return 2;
180 192 : } else if ((BOM[0]==0xEF) && (BOM[1]==0xBB) && (BOM[2]==0xBF)) {
181 34 : gf_fseek(in_src, 3, SEEK_SET);
182 34 : return 1;
183 : }
184 158 : if (BOM[0]<0x80) {
185 158 : gf_fseek(in_src, 0, SEEK_SET);
186 158 : return 0;
187 : }
188 : return -1;
189 : }
190 46 : static void ttxt_dom_progress(void *cbk, u64 cur_samp, u64 count)
191 : {
192 : GF_TXTIn *ctx = (GF_TXTIn *)cbk;
193 46 : ctx->end = count;
194 46 : }
195 :
196 112 : static GF_Err gf_text_guess_format(const char *filename, u32 *fmt)
197 : {
198 : char szLine[2048];
199 : u32 val;
200 : s32 uni_type;
201 112 : FILE *test = gf_fopen(filename, "rb");
202 112 : if (!test) return GF_URL_ERROR;
203 112 : uni_type = gf_text_get_utf_type(test);
204 :
205 112 : if (uni_type>1) {
206 : const u16 *sptr;
207 : char szUTF[1024];
208 2 : u32 read = (u32) gf_fread(szUTF, 1023, test);
209 2 : if ((s32) read < 0) {
210 0 : gf_fclose(test);
211 0 : return GF_IO_ERR;
212 : }
213 2 : szUTF[read]=0;
214 2 : sptr = (u16*)szUTF;
215 2 : /*read = (u32) */gf_utf8_wcstombs(szLine, read, &sptr);
216 : } else {
217 110 : val = (u32) gf_fread(szLine, 1024, test);
218 110 : if ((s32) val<0) return GF_IO_ERR;
219 :
220 110 : szLine[val]=0;
221 : }
222 266 : REM_TRAIL_MARKS(szLine, "\r\n\t ")
223 :
224 112 : *fmt = GF_TXTIN_MODE_NONE;
225 112 : if ((szLine[0]=='{') && strstr(szLine, "}{")) *fmt = GF_TXTIN_MODE_SUB;
226 109 : else if (szLine[0] == '<') {
227 23 : char *ext = gf_file_ext_start(filename);
228 23 : if (!strnicmp(ext, ".ttxt", 5)) *fmt = GF_TXTIN_MODE_TTXT;
229 19 : else if (!strnicmp(ext, ".ttml", 5)) *fmt = GF_TXTIN_MODE_TTML;
230 23 : ext = strstr(szLine, "?>");
231 23 : if (ext) ext += 2;
232 23 : if (ext && !ext[0]) {
233 0 : if (!gf_fgets(szLine, 2048, test))
234 0 : szLine[0] = '\0';
235 : }
236 23 : if (strstr(szLine, "x-quicktime-tx3g") || strstr(szLine, "text3GTrack")) *fmt = GF_TXTIN_MODE_TEXML;
237 22 : else if (strstr(szLine, "TextStream")) *fmt = GF_TXTIN_MODE_TTXT;
238 18 : else if (strstr(szLine, "tt")) *fmt = GF_TXTIN_MODE_TTML;
239 : }
240 86 : else if (strstr(szLine, "WEBVTT") )
241 44 : *fmt = GF_TXTIN_MODE_WEBVTT;
242 42 : else if (strstr(szLine, " --> ") )
243 37 : *fmt = GF_TXTIN_MODE_SRT; /* might want to change the default to WebVTT */
244 :
245 5 : else if (!strncmp(szLine, "FWS", 3) || !strncmp(szLine, "CWS", 3))
246 5 : *fmt = GF_TXTIN_MODE_SWF_SVG;
247 :
248 112 : gf_fclose(test);
249 112 : return GF_OK;
250 : }
251 :
252 :
253 :
254 134064 : char *gf_text_get_utf8_line(char *szLine, u32 lineSize, FILE *txt_in, s32 unicode_type)
255 : {
256 : u32 i, j, len;
257 : char *sOK;
258 : char szLineConv[1024];
259 : unsigned short *sptr;
260 :
261 134064 : memset(szLine, 0, sizeof(char)*lineSize);
262 134064 : sOK = gf_fgets(szLine, lineSize, txt_in);
263 134064 : if (!sOK) return NULL;
264 133927 : if (unicode_type<=1) {
265 : j=0;
266 133871 : len = (u32) strlen(szLine);
267 2673896 : for (i=0; i<len; i++) {
268 2540025 : if (!unicode_type && (szLine[i] & 0x80)) {
269 : /*non UTF8 (likely some win-CP)*/
270 3032 : if ((szLine[i+1] & 0xc0) != 0x80) {
271 3032 : szLineConv[j] = 0xc0 | ( (szLine[i] >> 6) & 0x3 );
272 3032 : j++;
273 3032 : szLine[i] &= 0xbf;
274 : }
275 : /*UTF8 2 bytes char*/
276 0 : else if ( (szLine[i] & 0xe0) == 0xc0) {
277 0 : szLineConv[j] = szLine[i];
278 : i++;
279 0 : j++;
280 : }
281 : /*UTF8 3 bytes char*/
282 0 : else if ( (szLine[i] & 0xf0) == 0xe0) {
283 0 : szLineConv[j] = szLine[i];
284 : i++;
285 0 : j++;
286 0 : szLineConv[j] = szLine[i];
287 0 : i++;
288 0 : j++;
289 : }
290 : /*UTF8 4 bytes char*/
291 0 : else if ( (szLine[i] & 0xf8) == 0xf0) {
292 0 : szLineConv[j] = szLine[i];
293 : i++;
294 0 : j++;
295 0 : szLineConv[j] = szLine[i];
296 0 : i++;
297 0 : j++;
298 0 : szLineConv[j] = szLine[i];
299 0 : i++;
300 0 : j++;
301 : } else {
302 : i+=1;
303 0 : continue;
304 : }
305 : }
306 2540025 : szLineConv[j] = szLine[i];
307 2540025 : j++;
308 : }
309 133871 : szLineConv[j] = 0;
310 : strcpy(szLine, szLineConv);
311 133871 : return sOK;
312 : }
313 :
314 : #ifdef GPAC_BIG_ENDIAN
315 : if (unicode_type==3)
316 : #else
317 56 : if (unicode_type==2)
318 : #endif
319 : {
320 : i=0;
321 0 : while (1) {
322 : char c;
323 0 : if (!szLine[i] && !szLine[i+1]) break;
324 0 : c = szLine[i+1];
325 0 : szLine[i+1] = szLine[i];
326 0 : szLine[i] = c;
327 0 : i+=2;
328 : }
329 : }
330 56 : sptr = (u16 *)szLine;
331 56 : i = (u32) gf_utf8_wcstombs(szLineConv, 1024, (const unsigned short **) &sptr);
332 56 : szLineConv[i] = 0;
333 : strcpy(szLine, szLineConv);
334 : /*this is ugly indeed: since input is UTF16-LE, there are many chances the gf_fgets never reads the \0 after a \n*/
335 56 : if (unicode_type==3) gf_fgetc(txt_in);
336 : return sOK;
337 : }
338 :
339 :
340 112 : static void txtin_probe_duration(GF_TXTIn *ctx)
341 : {
342 : GF_Fraction64 dur;
343 : dur.num = 0;
344 :
345 112 : if (ctx->fmt == GF_TXTIN_MODE_SWF_SVG) {
346 : #ifndef GPAC_DISABLE_SWF_IMPORT
347 : u32 frame_count, frame_rate;
348 5 : gf_swf_get_duration(ctx->swf_parse, &frame_rate, &frame_count);
349 5 : if (frame_count) {
350 : GF_Fraction64 tdur;
351 5 : tdur.num = frame_count;
352 5 : tdur.den = frame_rate;
353 5 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, &PROP_FRAC64(tdur));
354 5 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PLAYBACK_MODE, &PROP_UINT(GF_PLAYBACK_MODE_FASTFORWARD ) );
355 : }
356 : #endif
357 : return;
358 : }
359 107 : if ((ctx->fmt == GF_TXTIN_MODE_SRT) || (ctx->fmt == GF_TXTIN_MODE_WEBVTT) || (ctx->fmt == GF_TXTIN_MODE_SUB)) {
360 84 : u64 pos = gf_ftell(ctx->src);
361 84 : gf_fseek(ctx->src, 0, SEEK_SET);
362 65837 : while (!gf_feof(ctx->src)) {
363 : u64 end;
364 : char szLine[2048];
365 65723 : char *sOK = gf_text_get_utf8_line(szLine, 2048, ctx->src, ctx->unicode_type);
366 65723 : if (!sOK) break;
367 145678 : REM_TRAIL_MARKS(szLine, "\r\n\t ")
368 :
369 65669 : if (ctx->fmt == GF_TXTIN_MODE_SUB) {
370 : char szText[2048];
371 : u32 sframe, eframe;
372 15 : if (sscanf(szLine, "{%d}{%d}%2047s", &sframe, &eframe, szText) == 3) {
373 9 : if (ctx->fps.den)
374 0 : end = 1000 * eframe * ctx->fps.num / ctx->fps.den;
375 : else
376 9 : end = 1000 * eframe / 25;
377 9 : if (end > (u64) dur.num) dur.num = (s64) end;
378 : }
379 : } else {
380 : u32 eh, em, es, ems;
381 65654 : char *start = strstr(szLine, "-->");
382 130708 : if (!start) continue;
383 0 : while (start[0] && ((start[0] == ' ') || (start[0] == '\t'))) start++;
384 :
385 16966 : if (sscanf(start, "%u:%u:%u,%u", &eh, &em, &es, &ems) != 4) {
386 16966 : eh = 0;
387 16966 : if (sscanf(szLine, "%u:%u,%u", &em, &es, &ems) != 3) {
388 16366 : continue;
389 : }
390 : }
391 600 : end = (3600*eh + 60*em + es)*1000 + ems;
392 600 : if (end > (u64) dur.num) dur.num = (s64) end;
393 : }
394 : }
395 84 : gf_fseek(ctx->src, pos, SEEK_SET);
396 84 : if (dur.num) {
397 : dur.den = 1000;
398 4 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, &PROP_FRAC64(dur));
399 4 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PLAYBACK_MODE, &PROP_UINT(GF_PLAYBACK_MODE_FASTFORWARD ) );
400 : }
401 : return;
402 : }
403 23 : if ((ctx->fmt == GF_TXTIN_MODE_TTXT) || (ctx->fmt == GF_TXTIN_MODE_TEXML)) {
404 5 : u32 i=0;
405 5 : GF_XMLNode *node, *root = gf_xml_dom_get_root(ctx->parser);
406 111 : while ((node = gf_list_enum(root->content, &i))) {
407 : u32 j;
408 : u64 duration;
409 : GF_XMLAttribute *att;
410 101 : if (node->type) {
411 110 : continue;
412 : }
413 : /*sample text*/
414 48 : if ((ctx->fmt == GF_TXTIN_MODE_TTXT) && strcmp(node->name, "TextSample")) continue;
415 44 : else if ((ctx->fmt == GF_TXTIN_MODE_TEXML) && strcmp(node->name, "sample")) continue;
416 :
417 :
418 44 : j=0;
419 218 : while ( (att=(GF_XMLAttribute*)gf_list_enum(node->attributes, &j))) {
420 : u32 h, m, s, ms;
421 : u64 ts=0;
422 130 : if (ctx->fmt == GF_TXTIN_MODE_TTXT) {
423 212 : if (strcmp(att->name, "sampleTime")) continue;
424 :
425 42 : if (sscanf(att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
426 42 : ts = (h*3600 + m*60 + s)*1000 + ms;
427 : } else {
428 0 : ts = (u32) (atof(att->value) * 1000);
429 : }
430 42 : if (ts > (u64) dur.num) dur.num = (s64) ts;
431 : } else {
432 4 : if (strcmp(att->name, "duration")) continue;
433 4 : duration = atoi(att->value);
434 2 : dur.num += (s32) ( (1000 * duration) / ctx->txml_timescale);
435 : }
436 : }
437 : }
438 5 : if (dur.num) {
439 : dur.den = 1000;
440 5 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, &PROP_FRAC64(dur));
441 5 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PLAYBACK_MODE, &PROP_UINT(GF_PLAYBACK_MODE_FASTFORWARD ) );
442 : }
443 : return;
444 : }
445 :
446 18 : if (ctx->fmt == GF_TXTIN_MODE_TTML) {
447 18 : u32 i=0, k=0;
448 : GF_XMLNode *div_node;
449 :
450 90 : while ((div_node = gf_list_enum(ctx->div_nodes_list, &k))) {
451 : GF_XMLNode *node;
452 292 : while ((node = gf_list_enum(div_node->content, &i))) {
453 : GF_XMLNode *p_node;
454 : GF_XMLAttribute *att;
455 238 : u32 h, m, s, ms, p_idx=0;
456 : u64 ts_end=0;
457 238 : h = m = s = ms = 0;
458 883 : while ( (att = (GF_XMLAttribute*)gf_list_enum(node->attributes, &p_idx))) {
459 407 : if (strcmp(att->name, "end")) continue;
460 :
461 107 : if (sscanf(att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
462 95 : ts_end = (h*3600 + m*60+s)*1000+ms;
463 12 : } else if (sscanf(att->value, "%u:%u:%u", &h, &m, &s) == 3) {
464 0 : ts_end = (h*3600 + m*60+s)*1000;
465 : }
466 : }
467 : //or under a <span>
468 238 : p_idx = 0;
469 798 : while ( (p_node = (GF_XMLNode*)gf_list_enum(node->content, &p_idx))) {
470 322 : u32 span_idx = 0;
471 661 : while ( (att = (GF_XMLAttribute*)gf_list_enum(p_node->attributes, &span_idx))) {
472 17 : if (strcmp(att->name, "end")) continue;
473 3 : if (sscanf(att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
474 3 : ts_end = (h*3600 + m*60+s)*1000+ms;
475 0 : } else if (sscanf(att->value, "%u:%u:%u", &h, &m, &s) == 3) {
476 0 : ts_end = (h*3600 + m*60+s)*1000;
477 : }
478 : }
479 : }
480 238 : if (ts_end > (u64) dur.num) dur.num = (s64) ts_end;
481 : }
482 : }
483 18 : if (dur.num) {
484 : dur.den = 1000;
485 16 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, &PROP_FRAC64(dur));
486 16 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PLAYBACK_MODE, &PROP_UINT(GF_PLAYBACK_MODE_FASTFORWARD ) );
487 : }
488 : return;
489 : }
490 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Duration probing not supported for format %d\n", ctx->fmt));
491 : }
492 :
493 38 : static GF_Err txtin_setup_srt(GF_Filter *filter, GF_TXTIn *ctx)
494 : {
495 : u32 ID, OCR_ES_ID, dsi_len, file_size;
496 : u8 *dsi;
497 : GF_TextSampleDescriptor *sd;
498 :
499 38 : ctx->src = gf_fopen(ctx->file_name, "rb");
500 38 : if (!ctx->src) return GF_URL_ERROR;
501 :
502 38 : file_size = (u32) gf_fsize(ctx->src);
503 :
504 38 : ctx->unicode_type = gf_text_get_utf_type(ctx->src);
505 38 : if (ctx->unicode_type<0) {
506 0 : gf_fclose(ctx->src);
507 0 : ctx->src = NULL;
508 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Unsupported SRT UTF encoding\n"));
509 : return GF_NOT_SUPPORTED;
510 : }
511 :
512 38 : if (!ctx->timescale) ctx->timescale = 1000;
513 : OCR_ES_ID = ID = 0;
514 :
515 38 : if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
516 38 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
517 38 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_TX3G) );
518 38 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
519 38 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_LONGUINT(file_size) );
520 :
521 : if (!ID) ID = 1;
522 38 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
523 : if (OCR_ES_ID) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CLOCK_ID, &PROP_UINT(OCR_ES_ID) );
524 38 : if (ctx->width) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width) );
525 38 : if (ctx->height) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height) );
526 38 : if (ctx->zorder) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
527 38 : if (ctx->lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( ctx->lang) );
528 :
529 38 : sd = (GF_TextSampleDescriptor*)gf_odf_desc_new(GF_ODF_TX3G_TAG);
530 38 : sd->fonts = (GF_FontRecord*)gf_malloc(sizeof(GF_FontRecord));
531 38 : sd->font_count = 1;
532 38 : sd->fonts[0].fontID = 1;
533 38 : sd->fonts[0].fontName = gf_strdup(ctx->fontname ? ctx->fontname : "Serif");
534 38 : sd->back_color = 0x00000000; /*transparent*/
535 38 : sd->default_style.fontID = 1;
536 38 : sd->default_style.font_size = ctx->fontsize;
537 38 : sd->default_style.text_color = 0xFFFFFFFF; /*white*/
538 38 : sd->default_style.style_flags = 0;
539 38 : sd->horiz_justif = 1; /*center of scene*/
540 38 : sd->vert_justif = (s8) -1; /*bottom of scene*/
541 :
542 38 : if (ctx->nodefbox) {
543 0 : sd->default_pos.top = sd->default_pos.left = sd->default_pos.right = sd->default_pos.bottom = 0;
544 38 : } else if ((sd->default_pos.bottom==sd->default_pos.top) || (sd->default_pos.right==sd->default_pos.left)) {
545 38 : sd->default_pos.left = ctx->txtx;
546 38 : sd->default_pos.top = ctx->txty;
547 38 : sd->default_pos.right = ctx->width + sd->default_pos.left;
548 38 : sd->default_pos.bottom = ctx->height + sd->default_pos.top;
549 : }
550 :
551 : /*store attribs*/
552 38 : ctx->style = sd->default_style;
553 38 : gf_odf_tx3g_write(sd, &dsi, &dsi_len);
554 38 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA_NO_COPY(dsi, dsi_len) );
555 :
556 38 : gf_odf_desc_del((GF_Descriptor *)sd);
557 :
558 38 : ctx->default_color = ctx->style.text_color;
559 38 : ctx->samp = gf_isom_new_text_sample();
560 38 : ctx->state = 0;
561 38 : ctx->end = ctx->prev_end = ctx->start = 0;
562 38 : ctx->first_samp = GF_TRUE;
563 38 : ctx->curLine = 0;
564 :
565 38 : txtin_probe_duration(ctx);
566 38 : return GF_OK;
567 : }
568 :
569 4731 : static void txtin_process_send_text_sample(GF_TXTIn *ctx, GF_TextSample *txt_samp, u64 ts, u32 duration, Bool is_rap)
570 : {
571 : GF_FilterPacket *dst_pck;
572 : u8 *pck_data;
573 : u32 size;
574 :
575 4731 : if (ctx->seek_state==2) {
576 0 : Double end = (Double) (ts+duration);
577 0 : end /= 1000;
578 0 : if (end < ctx->start_range) return;
579 0 : ctx->seek_state = 0;
580 : }
581 :
582 4731 : size = gf_isom_text_sample_size(txt_samp);
583 :
584 4731 : dst_pck = gf_filter_pck_new_alloc(ctx->opid, size, &pck_data);
585 4731 : if (!dst_pck) return;
586 :
587 4731 : gf_bs_reassign_buffer(ctx->bs_w, pck_data, size);
588 4731 : gf_isom_text_sample_write_bs(txt_samp, ctx->bs_w);
589 :
590 4731 : ts *= ctx->timescale;
591 4731 : ts /= 1000;
592 4731 : duration *= ctx->timescale;
593 4731 : duration /= 1000;
594 :
595 4731 : gf_filter_pck_set_sap(dst_pck, is_rap ? GF_FILTER_SAP_1 : GF_FILTER_SAP_NONE);
596 4731 : gf_filter_pck_set_cts(dst_pck, ts);
597 4731 : gf_filter_pck_set_duration(dst_pck, duration);
598 :
599 4731 : gf_filter_pck_send(dst_pck);
600 : }
601 :
602 1776 : static GF_Err txtin_process_srt(GF_Filter *filter, GF_TXTIn *ctx)
603 : {
604 : u32 i;
605 : u32 sh, sm, ss, sms, eh, em, es, ems, txt_line, char_len, char_line, j, rem_styles;
606 : Bool set_start_char, set_end_char, rem_color;
607 : u32 line, len;
608 : char szLine[2048], szText[2048], *ptr;
609 : unsigned short uniLine[5000], uniText[5000], *sptr;
610 :
611 1776 : if (!ctx->is_setup) {
612 35 : ctx->is_setup = GF_TRUE;
613 35 : return txtin_setup_srt(filter, ctx);
614 : }
615 1741 : if (!ctx->opid) return GF_NOT_SUPPORTED;
616 1741 : if (!ctx->playstate) return GF_OK;
617 1691 : else if (ctx->playstate==2) return GF_EOS;
618 :
619 : txt_line = 0;
620 : set_start_char = set_end_char = GF_FALSE;
621 : char_len = 0;
622 :
623 1691 : if (ctx->seek_state == 1) {
624 0 : ctx->seek_state = 2;
625 0 : gf_fseek(ctx->src, 0, SEEK_SET);
626 : }
627 :
628 : while (1) {
629 : Bool is_empty = GF_FALSE;
630 11236 : char *sOK = gf_text_get_utf8_line(szLine, 2048, ctx->src, ctx->unicode_type);
631 :
632 11236 : if (sOK) {
633 30736 : REM_TRAIL_MARKS(szLine, "\r\n\t ")
634 :
635 11236 : if (ctx->unicode_type<=1) is_empty = strlen(szLine) ? GF_FALSE : GF_TRUE;
636 24 : else is_empty = (!szLine[0] && !szLine[1]) ? GF_TRUE : GF_FALSE;
637 : }
638 :
639 11236 : if (!sOK || is_empty) {
640 2618 : u32 utf_inc = (ctx->unicode_type<=1) ? 1 : 2;
641 : u32 nb_empty = utf_inc;
642 2618 : u32 pos = (u32) gf_ftell(ctx->src);
643 2618 : if (ctx->state) {
644 2650 : while (!gf_feof(ctx->src)) {
645 2650 : sOK = gf_text_get_utf8_line(szLine+nb_empty, 2048-nb_empty, ctx->src, ctx->unicode_type);
646 2650 : if (sOK) REM_TRAIL_MARKS((szLine+nb_empty), "\r\n\t ")
647 :
648 2650 : if (!sOK) {
649 35 : gf_fseek(ctx->src, pos, SEEK_SET);
650 35 : break;
651 2615 : } else if (!strlen(szLine+nb_empty)) {
652 32 : nb_empty+=utf_inc;
653 32 : continue;
654 2583 : } else if ( sscanf(szLine+nb_empty, "%u", &line) == 1) {
655 2583 : gf_fseek(ctx->src, pos, SEEK_SET);
656 2583 : break;
657 : } else {
658 : u32 k;
659 0 : for (k=0; k<nb_empty; k++) szLine[k] = '\n';
660 : goto force_line;
661 : }
662 : }
663 : }
664 2618 : ctx->style.style_flags = 0;
665 2618 : ctx->style.text_color = 0xFFFFFFFF;
666 2618 : ctx->style.startCharOffset = ctx->style.endCharOffset = 0;
667 2618 : if (txt_line) {
668 2618 : if (ctx->prev_end && (ctx->start != ctx->prev_end) && (ctx->state<=2)) {
669 1984 : GF_TextSample * empty_samp = gf_isom_new_text_sample();
670 1984 : txtin_process_send_text_sample(ctx, empty_samp, ctx->prev_end, (u32) (ctx->start - ctx->prev_end), GF_TRUE );
671 1984 : gf_isom_delete_text_sample(empty_samp);
672 : }
673 :
674 2618 : if (ctx->state<=2) {
675 2618 : txtin_process_send_text_sample(ctx, ctx->samp, ctx->start, (u32) (ctx->end - ctx->start), GF_TRUE);
676 2618 : ctx->prev_end = ctx->end;
677 : }
678 : txt_line = 0;
679 : char_len = 0;
680 : set_start_char = set_end_char = GF_FALSE;
681 2618 : ctx->style.startCharOffset = ctx->style.endCharOffset = 0;
682 2618 : gf_isom_text_reset(ctx->samp);
683 :
684 2618 : gf_filter_pid_set_info(ctx->opid, GF_PROP_PID_DOWN_BYTES, &PROP_LONGUINT( gf_ftell(ctx->src )) );
685 : }
686 2618 : ctx->state = 0;
687 2618 : if (!sOK) break;
688 2583 : continue;
689 : }
690 :
691 8618 : force_line:
692 8618 : switch (ctx->state) {
693 2618 : case 0:
694 2618 : if (sscanf(szLine, "%u", &line) != 1) {
695 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Bad SRT formatting - expecting number got \"%s\"\n", szLine));
696 : break;
697 : }
698 2618 : if (line != ctx->curLine + 1) {
699 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Corrupted SRT frame %d after frame %d\n", line, ctx->curLine));
700 : }
701 2618 : ctx->curLine = line;
702 2618 : ctx->state = 1;
703 2618 : break;
704 2618 : case 1:
705 2618 : if (sscanf(szLine, "%u:%u:%u,%u --> %u:%u:%u,%u", &sh, &sm, &ss, &sms, &eh, &em, &es, &ems) != 8) {
706 600 : if (sscanf(szLine, "%u:%u:%u.%u --> %u:%u:%u.%u", &sh, &sm, &ss, &sms, &eh, &em, &es, &ems) != 8) {
707 600 : sh = eh = 0;
708 600 : if (sscanf(szLine, "%u:%u,%u --> %u:%u,%u", &sm, &ss, &sms, &em, &es, &ems) != 6) {
709 0 : if (sscanf(szLine, "%u:%u.%u --> %u:%u.%u", &sm, &ss, &sms, &em, &es, &ems) != 6) {
710 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Error scanning SRT frame %d timing\n", ctx->curLine));
711 0 : ctx->state = 0;
712 0 : break;
713 : }
714 : }
715 : }
716 : }
717 2618 : ctx->start = (3600*sh + 60*sm + ss)*1000 + sms;
718 2618 : if (ctx->start < ctx->end) {
719 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Overlapping SRT frame %d - starts "LLD" ms is before end of previous one "LLD" ms - adjusting time stamps\n", ctx->curLine, ctx->start, ctx->end));
720 0 : ctx->start = ctx->end;
721 : }
722 :
723 2618 : ctx->end = (3600*eh + 60*em + es)*1000 + ems;
724 : /*make stream start at 0 by inserting a fake AU*/
725 2618 : if (ctx->first_samp && (ctx->start > 0)) {
726 34 : txtin_process_send_text_sample(ctx, ctx->samp, 0, (u32) ctx->start, GF_TRUE);
727 : }
728 2618 : ctx->style.style_flags = 0;
729 2618 : ctx->state = 2;
730 2618 : if (ctx->end <= ctx->prev_end) {
731 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Overlapping SRT frame %d end "LLD" is at or before previous end "LLD" - removing\n", ctx->curLine, ctx->end, ctx->prev_end));
732 0 : ctx->start = ctx->end;
733 0 : ctx->state = 3;
734 : }
735 : break;
736 :
737 3382 : default:
738 : /*reset only when text is present*/
739 3382 : ctx->first_samp = GF_FALSE;
740 :
741 : /*go to line*/
742 3382 : if (txt_line) {
743 764 : gf_isom_text_add_text(ctx->samp, "\n", 1);
744 764 : char_len += 1;
745 : }
746 :
747 3382 : ptr = (char *) szLine;
748 : {
749 3382 : size_t _len = gf_utf8_mbstowcs(uniLine, 5000, (const char **) &ptr);
750 3382 : if (_len == (size_t) -1) {
751 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Invalid UTF data (line %d)\n", ctx->curLine));
752 0 : ctx->state = 0;
753 : }
754 3382 : len = (u32) _len;
755 : }
756 : i=j=0;
757 : rem_styles = 0;
758 : rem_color = 0;
759 81480 : while (i<len) {
760 : u32 font_style = 0;
761 : u32 style_nb_chars = 0;
762 : u32 style_def_type = 0;
763 :
764 74716 : if ( (uniLine[i]=='<') && (uniLine[i+2]=='>')) {
765 : style_nb_chars = 3;
766 : style_def_type = 1;
767 : }
768 74394 : else if ( (uniLine[i]=='<') && (uniLine[i+1]=='/') && (uniLine[i+3]=='>')) {
769 : style_def_type = 2;
770 : style_nb_chars = 4;
771 : }
772 74072 : else if (uniLine[i]=='<') {
773 44 : const unsigned short* src = uniLine + i;
774 44 : size_t alen = gf_utf8_wcstombs(szLine, 2048, (const unsigned short**) & src);
775 44 : szLine[alen] = 0;
776 44 : strlwr(szLine);
777 44 : if (!strncmp(szLine, "<font ", 6) ) {
778 22 : char *a_sep = strstr(szLine, "color");
779 22 : if (a_sep) a_sep = strchr(a_sep, '"');
780 22 : if (a_sep) {
781 22 : char *e_sep = strchr(a_sep+1, '"');
782 22 : if (e_sep) {
783 22 : e_sep[0] = 0;
784 22 : font_style = gf_color_parse(a_sep+1);
785 22 : e_sep[0] = '"';
786 22 : e_sep = strchr(e_sep+1, '>');
787 22 : if (e_sep) {
788 22 : style_nb_chars = (u32) (1 + e_sep - szLine);
789 : style_def_type = 1;
790 : }
791 : }
792 :
793 : }
794 : }
795 22 : else if (!strncmp(szLine, "</font>", 7) ) {
796 : style_nb_chars = 7;
797 : style_def_type = 2;
798 : font_style = 0xFFFFFFFF;
799 : }
800 :
801 :
802 0 : else if (!strncmp(szLine, "<strike>", 8) ) {
803 : style_nb_chars = 8;
804 : style_def_type = 1;
805 : }
806 0 : else if (!strncmp(szLine, "</strike>", 9) ) {
807 : style_nb_chars = 9;
808 : style_def_type = 2;
809 : font_style = 0xFFFFFFFF;
810 : }
811 : //skip unknown
812 : else {
813 0 : char *a_sep = strstr(szLine, ">");
814 0 : if (a_sep) {
815 0 : style_nb_chars = (u32) (1 + a_sep - szLine);
816 0 : i += style_nb_chars;
817 0 : continue;
818 : }
819 : }
820 :
821 : }
822 :
823 : /*start of new style*/
824 74716 : if (style_def_type==1) {
825 : /*store prev style*/
826 344 : if (set_end_char) {
827 : assert(set_start_char);
828 0 : gf_isom_text_add_style(ctx->samp, &ctx->style);
829 : set_end_char = set_start_char = GF_FALSE;
830 0 : ctx->style.style_flags &= ~rem_styles;
831 : rem_styles = 0;
832 0 : if (rem_color) {
833 0 : ctx->style.text_color = ctx->default_color;
834 : rem_color = 0;
835 : }
836 : }
837 344 : if (set_start_char && (ctx->style.startCharOffset != j)) {
838 140 : ctx->style.endCharOffset = char_len + j;
839 140 : if (ctx->style.style_flags) gf_isom_text_add_style(ctx->samp, &ctx->style);
840 : }
841 344 : switch (uniLine[i+1]) {
842 52 : case 'b':
843 : case 'B':
844 52 : ctx->style.style_flags |= GF_TXT_STYLE_BOLD;
845 : set_start_char = GF_TRUE;
846 52 : ctx->style.startCharOffset = char_len + j;
847 52 : break;
848 270 : case 'i':
849 : case 'I':
850 270 : ctx->style.style_flags |= GF_TXT_STYLE_ITALIC;
851 : set_start_char = GF_TRUE;
852 270 : ctx->style.startCharOffset = char_len + j;
853 270 : break;
854 0 : case 'u':
855 : case 'U':
856 0 : ctx->style.style_flags |= GF_TXT_STYLE_UNDERLINED;
857 : set_start_char = GF_TRUE;
858 0 : ctx->style.startCharOffset = char_len + j;
859 0 : break;
860 0 : case 's':
861 : case 'S':
862 0 : ctx->style.style_flags |= GF_TXT_STYLE_STRIKETHROUGH;
863 : set_start_char = GF_TRUE;
864 0 : ctx->style.startCharOffset = char_len + j;
865 0 : break;
866 22 : case 'f':
867 : case 'F':
868 22 : if (font_style) {
869 22 : ctx->style.text_color = font_style;
870 : set_start_char = GF_TRUE;
871 22 : ctx->style.startCharOffset = char_len + j;
872 : }
873 : break;
874 : }
875 344 : i += style_nb_chars;
876 344 : continue;
877 : }
878 :
879 : /*end of prev style*/
880 74372 : if (style_def_type==2) {
881 344 : switch (uniLine[i+2]) {
882 52 : case 'b':
883 : case 'B':
884 52 : rem_styles |= GF_TXT_STYLE_BOLD;
885 : set_end_char = GF_TRUE;
886 52 : ctx->style.endCharOffset = char_len + j;
887 52 : break;
888 270 : case 'i':
889 : case 'I':
890 270 : rem_styles |= GF_TXT_STYLE_ITALIC;
891 : set_end_char = GF_TRUE;
892 270 : ctx->style.endCharOffset = char_len + j;
893 270 : break;
894 0 : case 'u':
895 : case 'U':
896 0 : rem_styles |= GF_TXT_STYLE_UNDERLINED;
897 : set_end_char = GF_TRUE;
898 0 : ctx->style.endCharOffset = char_len + j;
899 0 : break;
900 0 : case 's':
901 : case 'S':
902 0 : rem_styles |= GF_TXT_STYLE_STRIKETHROUGH;
903 : set_end_char = GF_TRUE;
904 0 : ctx->style.endCharOffset = char_len + j;
905 0 : break;
906 22 : case 'f':
907 : case 'F':
908 22 : if (font_style) {
909 : rem_color = 1;
910 : set_end_char = GF_TRUE;
911 22 : ctx->style.endCharOffset = char_len + j;
912 : }
913 : }
914 344 : i+=style_nb_chars;
915 344 : continue;
916 : }
917 : /*store style*/
918 74028 : if (set_end_char) {
919 142 : gf_isom_text_add_style(ctx->samp, &ctx->style);
920 : set_end_char = GF_FALSE;
921 : set_start_char = GF_TRUE;
922 142 : ctx->style.startCharOffset = char_len + j;
923 142 : ctx->style.style_flags &= ~rem_styles;
924 : rem_styles = 0;
925 142 : ctx->style.text_color = ctx->default_color;
926 : rem_color = 0;
927 : }
928 :
929 74028 : uniText[j] = uniLine[i];
930 74028 : j++;
931 74028 : i++;
932 : }
933 : /*store last style*/
934 3382 : if (set_end_char) {
935 176 : gf_isom_text_add_style(ctx->samp, &ctx->style);
936 : set_end_char = GF_FALSE;
937 : set_start_char = GF_TRUE;
938 176 : ctx->style.startCharOffset = char_len + j;
939 176 : ctx->style.style_flags &= ~rem_styles;
940 : }
941 :
942 : char_line = j;
943 3382 : uniText[j] = 0;
944 :
945 3382 : sptr = (u16 *) uniText;
946 3382 : len = (u32) gf_utf8_wcstombs(szText, 5000, (const u16 **) &sptr);
947 :
948 3382 : gf_isom_text_add_text(ctx->samp, szText, len);
949 3382 : char_len += char_line;
950 3382 : txt_line ++;
951 3382 : break;
952 : }
953 :
954 8618 : if (gf_filter_pid_would_block(ctx->opid))
955 : return GF_OK;
956 : }
957 :
958 : /*final flush*/
959 35 : if (ctx->end && ! ctx->noflush) {
960 35 : gf_isom_text_reset(ctx->samp);
961 35 : txtin_process_send_text_sample(ctx, ctx->samp, ctx->end, 0, GF_TRUE);
962 35 : ctx->end = 0;
963 : }
964 35 : gf_isom_text_reset(ctx->samp);
965 :
966 35 : return GF_EOS;
967 : }
968 :
969 : /* Structure used to pass importer and track data to the parsers without exposing the GF_MediaImporter structure
970 : used by WebVTT and Flash->SVG */
971 : typedef struct {
972 : GF_TXTIn *ctx;
973 : u32 timescale;
974 : u32 track;
975 : u32 descriptionIndex;
976 : } GF_ISOFlusher;
977 :
978 : #ifndef GPAC_DISABLE_VTT
979 :
980 4 : static GF_Err gf_webvtt_import_report(void *user, GF_Err e, char *message, const char *line)
981 : {
982 4 : GF_LOG(e ? GF_LOG_WARNING : GF_LOG_INFO, GF_LOG_AUTHOR, ("[TXTIn] WebVTT line %s: %s\n", line, message) );
983 4 : return e;
984 : }
985 :
986 58 : static void gf_webvtt_import_header(void *user, const char *config)
987 : {
988 : GF_TXTIn *ctx = (GF_TXTIn *)user;
989 58 : if (!ctx->hdr_parsed) {
990 45 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA((char *) config, (u32) (1+strlen(config)) ) );
991 45 : ctx->hdr_parsed = GF_TRUE;
992 45 : gf_webvtt_parser_suspend(ctx->vttparser);
993 : }
994 58 : }
995 :
996 14570 : static void gf_webvtt_flush_sample(void *user, GF_WebVTTSample *samp)
997 : {
998 : u64 start, end;
999 : GF_TXTIn *ctx = (GF_TXTIn *)user;
1000 : GF_ISOSample *s;
1001 :
1002 14570 : start = gf_webvtt_sample_get_start(samp);
1003 14570 : end = gf_webvtt_sample_get_end(samp);
1004 :
1005 14570 : if (ctx->seek_state==2) {
1006 0 : Double tsend = (Double) end;
1007 0 : tsend /= 1000;
1008 0 : if (tsend<ctx->start_range) return;
1009 0 : ctx->seek_state = 0;
1010 : }
1011 :
1012 14570 : s = gf_isom_webvtt_to_sample(samp);
1013 14570 : if (s) {
1014 : GF_FilterPacket *pck;
1015 : u8 *pck_data;
1016 :
1017 14570 : pck = gf_filter_pck_new_alloc(ctx->opid, s->dataLength, &pck_data);
1018 14570 : if (pck) {
1019 14570 : memcpy(pck_data, s->data, s->dataLength);
1020 14570 : gf_filter_pck_set_cts(pck, (u64) (ctx->timescale * start / 1000) );
1021 14570 : gf_filter_pck_set_sap(pck, GF_FILTER_SAP_1);
1022 :
1023 :
1024 14570 : if (end && (end>=start) ) {
1025 14570 : gf_filter_pck_set_duration(pck, (u32) (ctx->timescale * (end-start) / 1000) );
1026 : }
1027 14570 : gf_filter_pck_send(pck);
1028 : }
1029 :
1030 14570 : gf_isom_sample_del(&s);
1031 : }
1032 14570 : gf_webvtt_sample_del(samp);
1033 :
1034 14570 : gf_filter_pid_set_info(ctx->opid, GF_PROP_PID_DOWN_BYTES, &PROP_LONGUINT( gf_ftell(ctx->src )) );
1035 :
1036 14570 : if (gf_filter_pid_would_block(ctx->opid))
1037 14570 : gf_webvtt_parser_suspend(ctx->vttparser);
1038 :
1039 : }
1040 :
1041 46 : static GF_Err txtin_webvtt_setup(GF_Filter *filter, GF_TXTIn *ctx)
1042 : {
1043 : GF_Err e;
1044 : u32 ID, OCR_ES_ID, file_size, w, h;
1045 : Bool is_srt;
1046 : char *ext;
1047 :
1048 46 : ctx->src = gf_fopen(ctx->file_name, "rb");
1049 46 : if (!ctx->src) return GF_URL_ERROR;
1050 :
1051 46 : file_size = (u32) gf_fsize(ctx->src);
1052 :
1053 46 : ctx->unicode_type = gf_text_get_utf_type(ctx->src);
1054 46 : if (ctx->unicode_type<0) {
1055 0 : gf_fclose(ctx->src);
1056 0 : ctx->src = NULL;
1057 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Unsupported SRT UTF encoding\n"));
1058 : return GF_NOT_SUPPORTED;
1059 : }
1060 46 : ext = gf_file_ext_start(ctx->file_name);
1061 46 : is_srt = (ext && !strnicmp(ext, ".srt", 4)) ? GF_TRUE : GF_FALSE;
1062 :
1063 :
1064 46 : if (!ctx->timescale) ctx->timescale = 1000;
1065 : OCR_ES_ID = ID = 0;
1066 :
1067 46 : if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
1068 46 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
1069 46 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_WEBVTT) );
1070 46 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
1071 46 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_LONGUINT(file_size) );
1072 :
1073 46 : w = ctx->width;
1074 46 : h = ctx->height;
1075 : if (!ID) ID = 1;
1076 46 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
1077 : if (OCR_ES_ID) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CLOCK_ID, &PROP_UINT(OCR_ES_ID) );
1078 46 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(w) );
1079 46 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(h) );
1080 46 : if (ctx->zorder) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
1081 46 : if (ctx->lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( ctx->lang) );
1082 :
1083 46 : ctx->vttparser = gf_webvtt_parser_new();
1084 :
1085 46 : e = gf_webvtt_parser_init(ctx->vttparser, ctx->src, ctx->unicode_type, is_srt, ctx, gf_webvtt_import_report, gf_webvtt_flush_sample, gf_webvtt_import_header);
1086 46 : if (e != GF_OK) {
1087 0 : gf_webvtt_parser_del(ctx->vttparser);
1088 0 : ctx->vttparser = NULL;
1089 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] WebVTT parser init error %s\n", gf_error_to_string(e) ));
1090 : }
1091 : //get the header
1092 46 : e = gf_webvtt_parser_parse(ctx->vttparser);
1093 :
1094 46 : txtin_probe_duration(ctx);
1095 46 : return e;
1096 : }
1097 :
1098 14393 : static GF_Err txtin_process_webvtt(GF_Filter *filter, GF_TXTIn *ctx)
1099 : {
1100 : GF_Err e;
1101 :
1102 14393 : if (!ctx->is_setup) {
1103 46 : ctx->is_setup = GF_TRUE;
1104 46 : return txtin_webvtt_setup(filter, ctx);
1105 : }
1106 14347 : if (!ctx->vttparser) return GF_NOT_SUPPORTED;
1107 14347 : if (ctx->seek_state==1) {
1108 0 : ctx->seek_state = 2;
1109 0 : gf_webvtt_parser_restart(ctx->vttparser);
1110 : }
1111 :
1112 14347 : e = gf_webvtt_parser_parse(ctx->vttparser);
1113 14347 : if (e < GF_OK) {
1114 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] WebVTT process error %s\n", gf_error_to_string(e) ));
1115 : }
1116 : return e;
1117 : }
1118 :
1119 : #endif /*GPAC_DISABLE_VTT*/
1120 :
1121 116 : static char *ttxt_parse_string(char *str, Bool strip_lines)
1122 : {
1123 : u32 i=0;
1124 : u32 k=0;
1125 116 : u32 len = (u32) strlen(str);
1126 : u32 state = 0;
1127 :
1128 116 : if (!strip_lines) {
1129 431 : for (i=0; i<len; i++) {
1130 431 : if ((str[i] == '\r') && (str[i+1] == '\n')) {
1131 : i++;
1132 : }
1133 431 : str[k] = str[i];
1134 431 : k++;
1135 : }
1136 19 : str[k]=0;
1137 19 : return str;
1138 : }
1139 :
1140 97 : if (str[0]!='\'') return str;
1141 0 : for (i=0; i<len; i++) {
1142 0 : if (str[i] == '\'') {
1143 :
1144 0 : if (!state) {
1145 0 : if (k) {
1146 0 : str[k]='\n';
1147 0 : k++;
1148 : }
1149 : state = 1; //!state;
1150 : } else {
1151 0 : if ( (i+1==len) ||
1152 0 : ((str[i+1]==' ') || (str[i+1]=='\n') || (str[i+1]=='\r') || (str[i+1]=='\t') || (str[i+1]=='\''))
1153 : ) {
1154 : state = !state;
1155 : } else {
1156 0 : str[k] = str[i];
1157 0 : k++;
1158 : }
1159 : }
1160 0 : } else if (state) {
1161 0 : str[k] = str[i];
1162 0 : k++;
1163 : }
1164 : }
1165 0 : str[k]=0;
1166 0 : return str;
1167 : }
1168 :
1169 18 : static void ebu_ttd_remove_samples(GF_XMLNode *root, GF_XMLNode **out_body_node)
1170 : {
1171 18 : u32 idx = 0;
1172 : GF_XMLNode *node = NULL;
1173 18 : *out_body_node = NULL;
1174 72 : while ( (node = (GF_XMLNode*)gf_list_enum(root->content, &idx))) {
1175 72 : if (!strcmp(node->name, "body")) {
1176 : GF_XMLNode *body_node;
1177 18 : u32 body_idx = 0;
1178 18 : *out_body_node = node;
1179 72 : while ( (body_node = (GF_XMLNode*)gf_list_enum(node->content, &body_idx))) {
1180 54 : if (!strcmp(body_node->name, "div")) {
1181 : u32 body_num;
1182 18 : body_num = gf_list_count(body_node->content);
1183 256 : while (body_num--) {
1184 238 : GF_XMLNode *content_node = (GF_XMLNode*)gf_list_get(body_node->content, 0);
1185 : assert(gf_list_find(body_node->content, content_node) == 0);
1186 238 : gf_list_rem(body_node->content, 0);
1187 238 : gf_xml_dom_node_del(content_node);
1188 : }
1189 : }
1190 : }
1191 : return;
1192 : }
1193 : }
1194 : }
1195 :
1196 1839 : static s64 ttml_get_timestamp(GF_TXTIn *ctx, char *value)
1197 : {
1198 : u32 h, m, s, ms, f, sf;
1199 : s64 ts = -1;
1200 1839 : u32 len = (u32) strlen(value);
1201 :
1202 : //tick metrick - cannot be fractional
1203 1839 : if (len && (value[len-1]=='t')) {
1204 28 : value[len-1] = 0;
1205 28 : ts = (s64) (atoi(value) * 1000);
1206 28 : value[len-1] = 't';
1207 28 : if (ctx->tick_rate)
1208 28 : ts /= ctx->tick_rate;
1209 : }
1210 : //hours metric, can be fractional
1211 1811 : else if (len && (value[len-1]=='h')) {
1212 28 : value[len-1] = 0;
1213 28 : ts = (s64) (atof(value) * 1000 * 3600);
1214 28 : value[len-1] = 'h';
1215 : }
1216 : //minutes metric, can be fractional
1217 1783 : else if (len && (value[len-1]=='m')) {
1218 28 : value[len-1] = 0;
1219 28 : ts = (s64) (atof(value) * 1000 * 60);
1220 28 : value[len-1] = 'm';
1221 : }
1222 1755 : else if (len && (value[len-1]=='s')) {
1223 : //milliseconds metric, can be fractional but we work at 1ms clock resolution anyway
1224 56 : if ((len > 1) && (value[len-2]=='m')) {
1225 28 : value[len-2] = 0;
1226 28 : ts = (s64) (atof(value));
1227 28 : value[len-2] = 'm';
1228 : }
1229 : //seconds metric, can be fractional
1230 : else {
1231 28 : value[len-1] = 0;
1232 28 : ts = (s64) (atof(value) * 1000);
1233 28 : value[len-1] = 's';
1234 : }
1235 : }
1236 : //frames metric, can be fractional
1237 1699 : else if (len && (value[len-1]=='f')) {
1238 28 : f = sf = 0;
1239 28 : value[len-1] = 0;
1240 28 : if (sscanf(value, "%u.%u", &f, &sf) != 2) {
1241 14 : sscanf(value, "%u", &f);
1242 14 : sf = 0;
1243 : }
1244 28 : value[len-1] = 'f';
1245 :
1246 28 : if (!ctx->ttml_fps_num) {
1247 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] time indicates frames but no frame rate set, assuming 25 FPS\n"));
1248 0 : ctx->ttml_fps_num = 25;
1249 0 : ctx->ttml_fps_den = 1;
1250 : }
1251 28 : if (sf && !ctx->ttml_sfps) {
1252 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] time indicates subframes but no subFrameRate set, assuming 1\n"));
1253 0 : ctx->ttml_sfps = 1;
1254 : }
1255 28 : ts = ((s64) 1000 * f * ctx->ttml_fps_den) / ctx->ttml_fps_num;
1256 28 : if (sf)
1257 0 : ts += ((s64) 1000 * sf * ctx->ttml_fps_den / ctx->ttml_sfps) / ctx->ttml_fps_num;
1258 : }
1259 1671 : else if (sscanf(value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
1260 1671 : ts = (h*3600 + m*60+s)*1000+ms;
1261 : }
1262 0 : else if (sscanf(value, "%u:%u:%u:%u.%u", &h, &m, &s, &f, &sf) == 5) {
1263 0 : ts = (h*3600 + m*60+s)*1000;
1264 0 : if (!ctx->ttml_fps_num) {
1265 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] time indicates frames but no frame rate set, assuming 25 FPS\n"));
1266 0 : ctx->ttml_fps_num = 25;
1267 0 : ctx->ttml_fps_den = 1;
1268 : }
1269 0 : if (!ctx->ttml_sfps) {
1270 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] time indicates subframes but no subFrameRate set, assuming 1\n"));
1271 0 : ctx->ttml_sfps = 1;
1272 : }
1273 0 : ts += ((s64) 1000 * f * ctx->ttml_fps_den) / ctx->ttml_fps_num;
1274 0 : ts += ((s64) 1000 * sf * ctx->ttml_fps_den / ctx->ttml_sfps) / ctx->ttml_fps_num;
1275 : }
1276 0 : else if (sscanf(value, "%u:%u:%u:%u", &h, &m, &s, &f) == 4) {
1277 0 : ts = (h*3600 + m*60+s)*1000;
1278 0 : if (!ctx->ttml_fps_num) {
1279 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] time indicates frames but no frame rate set, assuming 25 FPS\n"));
1280 0 : ctx->ttml_fps_num = 25;
1281 0 : ctx->ttml_fps_den = 1;
1282 : }
1283 0 : ts += ((s64) 1000 * f * ctx->ttml_fps_den) / ctx->ttml_fps_num;
1284 : }
1285 0 : else if (sscanf(value, "%u:%u:%u", &h, &m, &s) == 3) {
1286 0 : ts = (h*3600 + m*60+s)*1000;
1287 : }
1288 1839 : return ts;
1289 : }
1290 :
1291 110 : static GF_Err ttml_push_interval(GF_TXTIn *ctx, s64 begin, s64 end, TTMLInterval **out_interval)
1292 : {
1293 : u32 i;
1294 : TTMLInterval *interval;
1295 114 : if (begin==-1) return GF_OK;
1296 107 : if (end==-1) return GF_OK;
1297 :
1298 107 : if (end < begin) {
1299 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] invalid timings: \"begin\"="LLD" , \"end\"="LLD". Abort.\n", begin, end));
1300 : return GF_NON_COMPLIANT_BITSTREAM;
1301 : }
1302 :
1303 : interval = NULL;
1304 355 : for (i=0; i<gf_list_count(ctx->intervals); i++) {
1305 361 : interval = gf_list_get(ctx->intervals, i);
1306 :
1307 : //generate a single sample for the input, merge interval
1308 361 : if (ctx->ttml_dur>=0) {
1309 0 : if (interval->begin > begin) interval->begin = begin;
1310 0 : if (interval->end < end) interval->end = end;
1311 0 : *out_interval = interval;
1312 0 : return GF_OK;
1313 : }
1314 : //contained, do nothing
1315 361 : if ((begin>=interval->begin) && (end<=interval->end)) {
1316 2 : *out_interval = interval;
1317 2 : return GF_OK;
1318 : }
1319 : //not overlapping
1320 359 : if ((end < interval->begin) || (begin > interval->end))
1321 329 : continue;
1322 :
1323 : //new interval starts before current and end after, remove current and push extended interval
1324 30 : if ((begin < interval->end) && (end > interval->end)) {
1325 3 : if (begin>interval->begin)
1326 : begin = interval->begin;
1327 3 : gf_list_rem(ctx->intervals, i);
1328 3 : gf_free(interval);
1329 3 : return ttml_push_interval(ctx, begin, end, out_interval);
1330 : }
1331 : //new interval starts before current and end before, remove current and push rewinded
1332 27 : if ((begin < interval->end) && (end <= interval->end)) {
1333 : end = interval->end;
1334 1 : if (begin>interval->begin)
1335 : begin = interval->begin;
1336 1 : gf_list_rem(ctx->intervals, i);
1337 1 : gf_free(interval);
1338 1 : return ttml_push_interval(ctx, begin, end, out_interval);
1339 : }
1340 : }
1341 : //need a new interval
1342 101 : GF_SAFEALLOC(interval, TTMLInterval);
1343 101 : interval->begin = begin;
1344 101 : interval->end = end;
1345 101 : *out_interval = interval;
1346 :
1347 451 : for (i=0; i<gf_list_count(ctx->intervals); i++) {
1348 353 : TTMLInterval *an_interval = gf_list_get(ctx->intervals, i);
1349 353 : if (an_interval->begin > interval->begin) {
1350 3 : return gf_list_insert(ctx->intervals, interval, i);
1351 : }
1352 : }
1353 98 : return gf_list_add(ctx->intervals, interval);
1354 : }
1355 :
1356 18 : static void ttml_reset_intervals(GF_TXTIn *ctx)
1357 : {
1358 116 : while (gf_list_count(ctx->intervals)) {
1359 98 : TTMLInterval *ival = gf_list_pop_back(ctx->intervals);
1360 98 : if (ival->resources) {
1361 12 : while (gf_list_count(ival->resources)) {
1362 7 : TTMLRes *ires = gf_list_pop_back(ival->resources);
1363 7 : if (!ires->global) {
1364 6 : gf_free(ires->data);
1365 6 : gf_free(ires);
1366 : }
1367 : }
1368 5 : gf_list_del(ival->resources);
1369 : }
1370 98 : gf_free(ival);
1371 : }
1372 18 : }
1373 :
1374 : #include <gpac/base_coding.h>
1375 :
1376 7 : static GF_Err ttml_push_res(GF_TXTIn *ctx, TTMLInterval *interval, u8 *f_data, u32 f_size)
1377 : {
1378 : GF_Err e;
1379 : TTMLRes *res;
1380 : GF_List *res_list;
1381 7 : if (interval) {
1382 6 : if (!interval->resources) {
1383 5 : if (ctx->ttml_resources)
1384 1 : interval->resources = gf_list_clone(ctx->ttml_resources);
1385 : else
1386 4 : interval->resources = gf_list_new();
1387 : }
1388 6 : res_list = interval->resources;
1389 : } else {
1390 1 : if (!ctx->ttml_resources) {
1391 1 : ctx->ttml_resources = gf_list_new();
1392 : }
1393 1 : res_list = ctx->ttml_resources;
1394 : }
1395 7 : if (!res_list) {
1396 0 : gf_free(f_data);
1397 : return GF_OUT_OF_MEM;
1398 : }
1399 7 : GF_SAFEALLOC(res, TTMLRes)
1400 7 : if (!res) {
1401 0 : gf_free(f_data);
1402 : return GF_OUT_OF_MEM;
1403 : }
1404 7 : res->size = f_size;
1405 7 : res->data = f_data;
1406 7 : if (!interval)
1407 1 : res->global = GF_TRUE;
1408 :
1409 7 : e = gf_list_add(res_list, res);
1410 7 : if (e) {
1411 0 : gf_free(res);
1412 0 : gf_free(f_data);
1413 : return e;
1414 : }
1415 : return GF_OK;
1416 : }
1417 :
1418 151 : static GF_Err ttml_push_resources(GF_TXTIn *ctx, TTMLInterval *interval, GF_XMLNode *node, GF_XMLNode *parent_source_node)
1419 : {
1420 : u32 i;
1421 : char szURN[1024];
1422 : u8 *f_data;
1423 : u32 f_size;
1424 : u32 idx;
1425 : GF_Err e;
1426 : GF_XMLAttribute *att, *data_type = NULL;
1427 : GF_XMLNode *child;
1428 : Bool is_source = GF_FALSE;
1429 : Bool is_data = GF_FALSE;
1430 : Bool check_src = GF_FALSE;
1431 :
1432 151 : if (!ctx->ttml_embed)
1433 : return GF_OK;
1434 :
1435 31 : if (!strcmp(node->name, "source")) {
1436 : is_source = GF_TRUE;
1437 : check_src = GF_TRUE;
1438 : }
1439 27 : else if (!strcmp(node->name, "data")) {
1440 2 : is_data = parent_source_node ? GF_TRUE : GF_FALSE;
1441 : check_src = GF_TRUE;
1442 : }
1443 : //we don't embed chunks
1444 25 : else if (!strcmp(node->name, "chunk")) {
1445 : return GF_OK;
1446 : }
1447 25 : else if (!strcmp(node->name, "audio") || !strcmp(node->name, "font") || !strcmp(node->name, "image")) {
1448 : check_src = GF_TRUE;
1449 : }
1450 :
1451 31 : if (check_src) {
1452 14 : i = 0;
1453 45 : while ( (att = (GF_XMLAttribute*)gf_list_enum(node->attributes, &i))) {
1454 : char *url;
1455 17 : if (!att->value) continue;
1456 17 : if (is_data && !strcmp(att->name, "type")) {
1457 : data_type = att;
1458 2 : continue;
1459 : }
1460 15 : if (strcmp(att->name, "src")) continue;
1461 6 : if (att->value[0]=='#') continue;
1462 :
1463 5 : if (!strncmp(att->value, "file://", 7)) {}
1464 5 : else if (strstr(att->value, "://"))
1465 0 : continue;
1466 :
1467 5 : url = gf_url_concatenate(ctx->file_name, att->value);
1468 : //embed image
1469 5 : e = gf_file_load_data(url, &f_data, &f_size);
1470 5 : gf_free(url);
1471 5 : if (e) return e;
1472 :
1473 5 : e = ttml_push_res(ctx, interval, f_data, f_size);
1474 5 : if (e) return e;
1475 :
1476 5 : idx = gf_list_count(interval ? interval->resources : ctx->ttml_resources);
1477 5 : gf_free(att->value);
1478 : sprintf(szURN, "urn:mpeg:14496-30:%d", idx);
1479 5 : att->value = gf_strdup(szURN);
1480 5 : if (!att->value) return GF_OUT_OF_MEM;
1481 :
1482 5 : ctx->has_images = GF_TRUE;
1483 : }
1484 : }
1485 :
1486 31 : i = 0;
1487 125 : while ( (child = (GF_XMLNode*) gf_list_enum(node->content, &i))) {
1488 65 : if (child->type) {
1489 42 : if (!is_data) continue;
1490 2 : u8 *data = child->name;
1491 2 : u32 ilen = (u32) strlen(data);
1492 2 : f_size = 3*ilen/4;
1493 2 : f_data = gf_malloc(sizeof(u8) * f_size);
1494 :
1495 2 : f_size = gf_base64_decode(data, ilen, f_data, f_size);
1496 :
1497 2 : e = ttml_push_res(ctx, interval, f_data, f_size);
1498 2 : if (e) return e;
1499 :
1500 2 : idx = gf_list_count(interval ? interval->resources : ctx->ttml_resources);
1501 : sprintf(szURN, "urn:mpeg:14496-30:%d", idx);
1502 :
1503 2 : GF_SAFEALLOC(att, GF_XMLAttribute)
1504 2 : if (att) {
1505 2 : att->name = gf_strdup("src");
1506 2 : att->value = gf_strdup(szURN);
1507 2 : gf_list_add(parent_source_node->attributes, att);
1508 : }
1509 2 : if (!att || !att->value || !att->name) return GF_OUT_OF_MEM;
1510 2 : if (data_type) {
1511 2 : gf_list_del_item(node->attributes, data_type);
1512 2 : gf_list_add(parent_source_node->attributes, data_type);
1513 : }
1514 2 : gf_xml_dom_node_reset(parent_source_node, GF_FALSE, GF_TRUE);
1515 2 : ctx->has_images = GF_TRUE;
1516 2 : return GF_OK;
1517 : }
1518 :
1519 23 : e = ttml_push_resources(ctx, interval, child, is_source ? node : NULL);
1520 23 : if (e) return e;
1521 : }
1522 : return GF_OK;
1523 : }
1524 :
1525 216 : static GF_Err ttml_rewrite_timestamp(GF_TXTIn *ctx, s64 ttml_zero, GF_XMLAttribute *att, s64 *value, Bool *drop)
1526 : {
1527 : u64 v;
1528 : char szTS[21];
1529 : u32 h, m, s, ms;
1530 216 : *value = ttml_get_timestamp(ctx, att->value);
1531 216 : if (!ttml_zero)
1532 : return GF_OK;
1533 :
1534 26 : if (*value < ttml_zero) {
1535 9 : *drop = GF_TRUE;
1536 : return GF_OK;
1537 : }
1538 :
1539 17 : *value -= ttml_zero;
1540 17 : v = (u64) (*value / 1000);
1541 17 : h = (u32) (v / 3600);
1542 17 : m = (u32) (v - h*60) / 60;
1543 17 : s = (u32) (v - h*3600 - m*60);
1544 17 : ms = (*value) % 1000;
1545 :
1546 : snprintf(szTS, 20, "%02d:%02d:%02d.%03d", h, m, s, ms);
1547 17 : szTS[20] = 0;
1548 17 : gf_free(att->value);
1549 17 : att->value = gf_strdup(szTS);
1550 : return GF_OK;
1551 : }
1552 :
1553 18 : static GF_Err ttml_setup_intervals(GF_TXTIn *ctx)
1554 : {
1555 : u32 k, i, nb_divs;
1556 : s64 ttml_zero_ms = 0;
1557 : GF_Err e;
1558 : GF_XMLNode *root;
1559 :
1560 18 : if (!ctx->intervals)
1561 18 : ctx->intervals = gf_list_new();
1562 : else
1563 0 : ttml_reset_intervals(ctx);
1564 :
1565 18 : ctx->has_images = GF_FALSE;
1566 18 : root = ctx->root_working_copy;
1567 36 : for (k=0; k<gf_list_count(root->content); k++) {
1568 36 : GF_XMLNode *head = (GF_XMLNode*)gf_list_get(root->content, k);
1569 36 : if (head->type) continue;
1570 18 : if (strcmp(head->name, "head")) continue;
1571 18 : ttml_push_resources(ctx, NULL, head, NULL);
1572 18 : break;
1573 : }
1574 :
1575 18 : root = gf_xml_dom_get_root(ctx->parser);
1576 18 : if (ctx->ttml_zero) {
1577 1 : if (ctx->ttml_zero[0]=='T')
1578 1 : ttml_zero_ms = ttml_get_timestamp(ctx, (char *) ctx->ttml_zero+1);
1579 : else
1580 0 : ttml_zero_ms = ttml_get_timestamp(ctx, (char *) ctx->ttml_zero);
1581 : }
1582 :
1583 18 : nb_divs = gf_list_count(ctx->div_nodes_list);
1584 72 : for (i=0; i<nb_divs; i++) {
1585 : u32 nb_children;
1586 54 : GF_XMLNode *div_node = gf_list_get(ctx->div_nodes_list, i);
1587 54 : nb_children = gf_list_count(div_node->content);
1588 :
1589 292 : for (k=0; k<nb_children; k++) {
1590 238 : TTMLInterval *ival=NULL;
1591 : u32 p_idx;
1592 238 : Bool drop = GF_FALSE;
1593 : GF_XMLAttribute *p_att;
1594 : GF_XMLNode *p_node;
1595 238 : s64 begin=-1, end=-1;
1596 238 : GF_XMLNode *adiv_child = (GF_XMLNode*)gf_list_get(div_node->content, k);
1597 373 : if (adiv_child->type) continue;
1598 110 : e = gf_xml_get_element_check_namespace(adiv_child, "p", root->ns);
1599 110 : if (e) continue;
1600 :
1601 108 : p_idx = 0;
1602 615 : while ( (p_att = (GF_XMLAttribute*)gf_list_enum(adiv_child->attributes, &p_idx))) {
1603 399 : if (!strcmp(p_att->name, "begin")) {
1604 105 : e = ttml_rewrite_timestamp(ctx, ttml_zero_ms, p_att, &begin, &drop);
1605 105 : if (e) return e;
1606 : }
1607 399 : if (!strcmp(p_att->name, "end")) {
1608 105 : e = ttml_rewrite_timestamp(ctx, ttml_zero_ms, p_att, &end, &drop);
1609 105 : if (e) return e;
1610 : }
1611 : }
1612 108 : if (drop) {
1613 5 : gf_xml_dom_node_del(adiv_child);
1614 5 : gf_list_rem(div_node->content, k);
1615 5 : k--;
1616 5 : nb_children--;
1617 5 : continue;
1618 : }
1619 :
1620 103 : e = ttml_push_interval(ctx, begin, end, &ival);
1621 103 : if (e) return e;
1622 :
1623 103 : e = ttml_push_resources(ctx, ival, adiv_child, NULL);
1624 103 : if (e) return e;
1625 :
1626 103 : p_idx = 0;
1627 507 : while ( (p_node = (GF_XMLNode*)gf_list_enum(adiv_child->content, &p_idx))) {
1628 301 : s64 s_begin=-1, s_end=-1;
1629 301 : e = gf_xml_get_element_check_namespace(p_node, "span", root->ns);
1630 301 : if (e) continue;
1631 :
1632 7 : u32 span_idx = 0;
1633 : GF_XMLAttribute *span_att;
1634 25 : while ( (span_att = (GF_XMLAttribute*)gf_list_enum(p_node->attributes, &span_idx))) {
1635 11 : if (!strcmp(span_att->name, "begin")) {
1636 3 : e = ttml_rewrite_timestamp(ctx, ttml_zero_ms, span_att, &s_begin, &drop);
1637 3 : if (e) return e;
1638 8 : } else if (!strcmp(span_att->name, "end")) {
1639 3 : e = ttml_rewrite_timestamp(ctx, ttml_zero_ms, span_att, &s_end, &drop);
1640 3 : if (e) return e;
1641 : }
1642 : }
1643 7 : e = ttml_push_interval(ctx, s_begin, s_end, &ival);
1644 7 : if (e) return e;
1645 :
1646 7 : e = ttml_push_resources(ctx, ival, p_node, NULL);
1647 7 : if (e) return e;
1648 : }
1649 : }
1650 : }
1651 :
1652 : //empty doc
1653 18 : if (!gf_list_count(ctx->intervals)) {
1654 : TTMLInterval *interval;
1655 1 : GF_SAFEALLOC(interval, TTMLInterval);
1656 1 : interval->begin = interval->end = 0;
1657 1 : gf_list_add(ctx->intervals, interval);
1658 : }
1659 :
1660 : #ifndef GPAC_DISABLE_LOG
1661 18 : if (gf_log_tool_level_on(GF_LOG_PARSER, GF_LOG_DEBUG)) {
1662 0 : for (k=0; k<gf_list_count(ctx->intervals); k++) {
1663 0 : TTMLInterval *ival = gf_list_get(ctx->intervals, k);
1664 0 : GF_LOG(GF_LOG_DEBUG, GF_LOG_PARSER, ("[TTML EBU-TTD] Interval %d: "LLU"-"LLU"\n", k+1, ival->begin, ival->end));
1665 : }
1666 : }
1667 : #endif
1668 : return GF_OK;
1669 : }
1670 :
1671 18 : static GF_Err gf_text_ttml_setup(GF_Filter *filter, GF_TXTIn *ctx)
1672 : {
1673 : GF_Err e;
1674 : u32 i, nb_children, ID;
1675 : u64 file_size;
1676 : s32 sub_fps_num, sub_fps_den;
1677 : GF_XMLAttribute *att;
1678 : GF_XMLNode *root, *node, *body_node;
1679 18 : const char *lang = ctx->lang;
1680 :
1681 :
1682 18 : ctx->is_setup = GF_TRUE;
1683 18 : ctx->parser = gf_xml_dom_new();
1684 18 : e = gf_xml_dom_parse(ctx->parser, ctx->file_name, ttxt_dom_progress, ctx);
1685 18 : if (e) {
1686 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Error parsing TTML file: Line %d - %s. Abort.\n", gf_xml_dom_get_line(ctx->parser), gf_xml_dom_get_error(ctx->parser) ));
1687 0 : ctx->is_setup = GF_TRUE;
1688 0 : ctx->non_compliant_ttml = GF_TRUE;
1689 0 : return e;
1690 : }
1691 18 : root = gf_xml_dom_get_root(ctx->parser);
1692 18 : if (!root) {
1693 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Error parsing TTML file: no root XML element found. Abort.\n"));
1694 0 : ctx->non_compliant_ttml = GF_TRUE;
1695 0 : return GF_NON_COMPLIANT_BITSTREAM;
1696 : }
1697 :
1698 : /*look for TTML*/
1699 18 : if (gf_xml_get_element_check_namespace(root, "tt", NULL) != GF_OK) {
1700 0 : if (root->ns) {
1701 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("TTML file not recognized: root element is \"%s:%s\" (check your namespaces)\n", root->ns, root->name));
1702 : } else {
1703 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("TTML file not recognized: root element is \"%s\"\n", root->name));
1704 : }
1705 0 : ctx->non_compliant_ttml = GF_TRUE;
1706 0 : return GF_NOT_SUPPORTED;
1707 : }
1708 :
1709 18 : GF_LOG(GF_LOG_DEBUG, GF_LOG_PARSER, ("[TXTIn] TTML EBU-TTD detected\n"));
1710 :
1711 18 : root = gf_xml_dom_get_root(ctx->parser);
1712 :
1713 :
1714 : /*** root (including language) ***/
1715 : sub_fps_num = 0;
1716 : sub_fps_den = 0;
1717 18 : i=0;
1718 151 : while ( (att = (GF_XMLAttribute *)gf_list_enum(root->attributes, &i))) {
1719 : const char *att_name;
1720 115 : GF_LOG(GF_LOG_DEBUG, GF_LOG_PARSER, ("[TTML] Found root attribute name %s, value %s\n", att->name, att->value));
1721 :
1722 115 : att_name = strchr(att->name, ':');
1723 115 : if (att_name) att_name++;
1724 : else att_name = att->name;
1725 :
1726 115 : if (!strcmp(att->name, "xmlns")) {
1727 15 : if (strcmp(att->value, TTML_NAMESPACE)) {
1728 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML] XML Namespace %s not recognized, expecting %s\n", att->name, att->value, TTML_NAMESPACE));
1729 0 : ctx->non_compliant_ttml = GF_TRUE;
1730 0 : return GF_NON_COMPLIANT_BITSTREAM;
1731 : }
1732 : }
1733 100 : else if (!strcmp(att->name, "xml:lang") && att->value && strlen(att->value)) {
1734 : lang = att->value;
1735 : }
1736 84 : else if (!strcmp(att_name, "tickRate") && att->value) {
1737 2 : ctx->tick_rate = atoi(att->value);
1738 : }
1739 82 : else if (!strcmp(att_name, "frameRate") && att->value) {
1740 2 : ctx->ttml_fps_num = atoi(att->value);
1741 2 : ctx->ttml_fps_den = 1;
1742 : }
1743 80 : else if (!strcmp(att_name, "frameRateMultiplier") && att->value) {
1744 2 : char *sep = strchr(att->value, ' ');
1745 2 : if (!sep) sep = strchr(att->value, '\t');
1746 2 : if (sep) {
1747 2 : u8 c = sep[0];
1748 2 : sep[0] = 0;
1749 : sub_fps_num = atoi(sep);
1750 2 : sep[0] = c;
1751 6 : while ((sep[0]==' ') || (sep[0]=='\t'))
1752 2 : sep++;
1753 : sub_fps_den = atoi(sep);
1754 : }
1755 : }
1756 78 : else if (!strcmp(att_name, "subFrameRate") && att->value) {
1757 2 : ctx->ttml_sfps = atoi(att->value);
1758 : }
1759 : }
1760 :
1761 18 : if (sub_fps_num && sub_fps_den && ctx->ttml_fps_num) {
1762 0 : ctx->ttml_fps_num *= sub_fps_num;
1763 0 : ctx->ttml_fps_den = sub_fps_den;
1764 : }
1765 :
1766 : //locate body
1767 18 : nb_children = gf_list_count(root->content);
1768 : body_node = NULL;
1769 :
1770 18 : i=0;
1771 126 : while ( (node = (GF_XMLNode*)gf_list_enum(root->content, &i))) {
1772 90 : if (node->type) {
1773 : nb_children--;
1774 54 : continue;
1775 : }
1776 36 : e = gf_xml_get_element_check_namespace(node, "body", root->ns);
1777 36 : if (e == GF_BAD_PARAM) {
1778 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] ignored \"%s\" node, check your namespaces\n", node->name));
1779 36 : } else if (e == GF_OK) {
1780 18 : if (body_node) {
1781 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] duplicated \"body\" element. Abort.\n"));
1782 0 : ctx->non_compliant_ttml = GF_TRUE;
1783 0 : return GF_NON_COMPLIANT_BITSTREAM;
1784 : }
1785 : body_node = node;
1786 : }
1787 : }
1788 18 : if (!body_node) {
1789 0 : GF_LOG(GF_LOG_DEBUG, GF_LOG_PARSER, ("[TTML EBU-TTD] \"body\" element not found, assuming empty doc\n"));
1790 : }
1791 :
1792 18 : if (!ctx->div_nodes_list) {
1793 18 : ctx->div_nodes_list = gf_list_new();
1794 18 : if (!ctx->div_nodes_list) return GF_OUT_OF_MEM;
1795 : } else {
1796 0 : gf_list_reset(ctx->div_nodes_list);
1797 : }
1798 :
1799 18 : if (body_node) {
1800 18 : i=0;
1801 90 : while ( (node = (GF_XMLNode*)gf_list_enum(body_node->content, &i))) {
1802 54 : if (!node->type) {
1803 18 : e = gf_xml_get_element_check_namespace(node, "div", root->ns);
1804 18 : if (e == GF_BAD_PARAM) {
1805 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] ignored \"%s\" node, check your namespaces\n", node->name));
1806 : }
1807 : }
1808 54 : gf_list_add(ctx->div_nodes_list, node);
1809 : }
1810 : }
1811 18 : file_size = ctx->end;
1812 18 : if (!ctx->timescale) ctx->timescale = 1000;
1813 :
1814 18 : if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
1815 18 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
1816 18 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_SUBS_XML) );
1817 18 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
1818 18 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_LONGUINT(file_size) );
1819 :
1820 : ID = 1;
1821 18 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
1822 18 : if (ctx->width) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width) );
1823 18 : if (ctx->height) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height) );
1824 18 : if (ctx->zorder) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
1825 18 : if (lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( lang) );
1826 18 : gf_filter_pid_set_property_str(ctx->opid, "meta:xmlns", &PROP_STRING(TTML_NAMESPACE) );
1827 :
1828 : /*** body ***/
1829 18 : ctx->parser_working_copy = gf_xml_dom_new();
1830 18 : e = gf_xml_dom_parse(ctx->parser_working_copy, ctx->file_name, NULL, NULL);
1831 : assert (e == GF_OK);
1832 18 : ctx->root_working_copy = gf_xml_dom_get_root(ctx->parser_working_copy);
1833 : assert(ctx->root_working_copy);
1834 :
1835 18 : if (body_node) {
1836 : /*remove all the sample entries (instances in body) entries from the working copy, we will add each sample in this clone DOM to create full XML of each sample*/
1837 18 : ebu_ttd_remove_samples(ctx->root_working_copy, &ctx->body_node);
1838 18 : if (!ctx->body_node) {
1839 : return GF_NON_COMPLIANT_BITSTREAM;
1840 : }
1841 : } else {
1842 0 : ctx->body_node = NULL;
1843 : }
1844 :
1845 18 : ctx->current_tt_interval = 0;
1846 :
1847 18 : ctx->last_sample_duration = 0;
1848 18 : ctx->end = 0;
1849 18 : ctx->first_samp = GF_TRUE;
1850 :
1851 18 : txtin_probe_duration(ctx);
1852 :
1853 18 : e = ttml_setup_intervals(ctx);
1854 18 : if (e) return e;
1855 :
1856 18 : if (ctx->has_images) {
1857 : char *mime_cfg = "application/ttml+xml;codecs=im1i";
1858 2 : gf_filter_pid_set_property_str(ctx->opid, "meta:mime", &PROP_STRING(mime_cfg) );
1859 : } else {
1860 16 : gf_filter_pid_set_property_str(ctx->opid, "meta:mime", NULL);
1861 : }
1862 : return GF_OK;
1863 : }
1864 :
1865 836 : static Bool ttml_check_range(TTMLInterval *interval, s64 ts_begin, s64 ts_end)
1866 : {
1867 : //if in current interval, push node
1868 836 : if ((ts_begin != -1) && (ts_end != -1) && ((ts_begin>=interval->begin) && (ts_end<=interval->end))
1869 : ) {
1870 : return GF_TRUE;
1871 : }
1872 : //begin not set, end set: in range if end less than interval end range
1873 733 : else if ((ts_begin==-1) && (ts_end != -1) && (ts_end<=interval->end)) {
1874 : return GF_TRUE;
1875 : }
1876 : //begin set, end not set: in range if begin greater than interval begin range
1877 733 : else if ((ts_begin!=-1) && (ts_end==-1) && (ts_begin>=interval->begin)) {
1878 : return GF_TRUE;
1879 : }
1880 : return GF_FALSE;
1881 : }
1882 :
1883 154 : static GF_Err gf_text_process_ttml(GF_Filter *filter, GF_TXTIn *ctx)
1884 : {
1885 : GF_Err e;
1886 : GF_XMLNode *root;
1887 : u32 i, nb_res_interval=0, k, nb_div_nodes;
1888 : char *samp_text=NULL;
1889 : GF_List *emb_resources = NULL;
1890 : TTMLInterval *interval;
1891 : Bool sample_empty = GF_TRUE;
1892 :
1893 154 : if (!ctx->is_setup) return gf_text_ttml_setup(filter, ctx);
1894 136 : if (ctx->non_compliant_ttml || !ctx->opid) return GF_NOT_SUPPORTED;
1895 136 : if (!ctx->playstate) return GF_OK;
1896 116 : else if (ctx->playstate==2) return GF_EOS;
1897 :
1898 116 : if (ctx->seek_state==1) {
1899 0 : ctx->seek_state = 2;
1900 0 : ctx->current_tt_interval = 0;
1901 : }
1902 :
1903 116 : interval = gf_list_get(ctx->intervals, ctx->current_tt_interval);
1904 116 : if (!interval) {
1905 18 : GF_LOG(GF_LOG_DEBUG, GF_LOG_PARSER, ("[TTML EBU-TTD] last_sample_duration="LLU", last_sample_end="LLU"\n", ctx->last_sample_duration, ctx->end));
1906 :
1907 18 : gf_filter_pid_set_info_str( ctx->opid, "ttxt:last_dur", &PROP_UINT((u32) ctx->last_sample_duration) );
1908 18 : gf_filter_pid_set_eos(ctx->opid);
1909 18 : return GF_EOS;
1910 : }
1911 98 : ctx->current_tt_interval++;
1912 :
1913 98 : emb_resources = interval->resources ? interval->resources : ctx->ttml_resources;
1914 98 : nb_res_interval = gf_list_count(emb_resources);
1915 :
1916 98 : root = gf_xml_dom_get_root(ctx->parser);
1917 :
1918 98 : nb_div_nodes = gf_list_count(ctx->div_nodes_list);
1919 392 : for (k=0; k<nb_div_nodes; k++) {
1920 : Bool has_content = GF_FALSE;
1921 294 : GF_XMLNode *div_node = gf_list_get(ctx->div_nodes_list, k);
1922 294 : u32 nb_children = gf_list_count(div_node->content);
1923 :
1924 294 : GF_XMLNode *copy_div_node = gf_list_get(ctx->body_node->content, k);
1925 :
1926 2058 : for (i=0; i < nb_children; i++) {
1927 : GF_XMLNode *p_node;
1928 : GF_XMLAttribute *p_att;
1929 1764 : u32 p_idx = 0;
1930 : s64 ts_begin = -1, ts_end = -1;
1931 : Bool in_range;
1932 1764 : GF_XMLNode *div_child = (GF_XMLNode*)gf_list_get(div_node->content, i);
1933 1764 : if (div_child->type) {
1934 1904 : continue;
1935 : }
1936 813 : e = gf_xml_get_element_check_namespace(div_child, "p", root->ns);
1937 813 : if (e == GF_BAD_PARAM) {
1938 2 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] ignored \"%s\" node, check your namespaces\n", div_child->name));
1939 2 : continue;
1940 : }
1941 :
1942 :
1943 : //sample is either in the <p> ...
1944 811 : p_idx = 0;
1945 4727 : while ( (p_att = (GF_XMLAttribute*)gf_list_enum(div_child->attributes, &p_idx))) {
1946 3105 : if (!strcmp(p_att->name, "begin")) {
1947 802 : if (ts_begin != -1) {
1948 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] duplicated \"begin\" attribute. Abort.\n"));
1949 : e = GF_NON_COMPLIANT_BITSTREAM;
1950 0 : goto exit;
1951 : }
1952 802 : ts_begin = ttml_get_timestamp(ctx, p_att->value);
1953 2303 : } else if (!strcmp(p_att->name, "end")) {
1954 802 : if (ts_end != -1) {
1955 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] duplicated \"end\" attribute. Abort.\n"));
1956 : e = GF_NON_COMPLIANT_BITSTREAM;
1957 : goto exit;
1958 : }
1959 802 : ts_end = ttml_get_timestamp(ctx, p_att->value);
1960 : }
1961 : }
1962 :
1963 811 : in_range = ttml_check_range(interval, ts_begin, ts_end);
1964 811 : if (in_range) {
1965 100 : GF_XMLNode *prev_child = i ? (GF_XMLNode*) gf_list_get(div_node->content, i-1) : NULL;
1966 100 : if (prev_child && prev_child->type) {
1967 100 : gf_xml_dom_append_child(copy_div_node, prev_child);
1968 : }
1969 100 : e = gf_xml_dom_append_child(copy_div_node, div_child);
1970 : assert(e == GF_OK);
1971 : has_content = GF_TRUE;
1972 : }
1973 :
1974 : //or under a <span>
1975 811 : p_idx = 0;
1976 4018 : while ( (p_node = (GF_XMLNode*)gf_list_enum(div_child->content, &p_idx))) {
1977 2399 : u32 span_idx = 0;
1978 : GF_XMLAttribute *span_att;
1979 2399 : e = gf_xml_get_element_check_namespace(p_node, "span", root->ns);
1980 2399 : if (e == GF_BAD_PARAM) {
1981 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] ignored \"%s\" node, check your namespaces\n", p_node->name));
1982 : }
1983 2399 : else if (e)
1984 2374 : continue;
1985 :
1986 : ts_begin = ts_end = -1;
1987 56 : while ( (span_att = (GF_XMLAttribute*)gf_list_enum(p_node->attributes, &span_idx))) {
1988 31 : if (!strcmp(span_att->name, "begin")) {
1989 9 : if (ts_begin != -1) {
1990 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] duplicated \"begin\" attribute under <span>. Abort.\n"));
1991 : e = GF_NON_COMPLIANT_BITSTREAM;
1992 0 : goto exit;
1993 : }
1994 9 : ts_begin = ttml_get_timestamp(ctx, span_att->value);
1995 22 : } else if (!strcmp(span_att->name, "end")) {
1996 9 : if (ts_end != -1) {
1997 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] duplicated \"end\" attribute under <span>. Abort.\n"));
1998 : e = GF_NON_COMPLIANT_BITSTREAM;
1999 : goto exit;
2000 : }
2001 9 : ts_end = ttml_get_timestamp(ctx, span_att->value);
2002 : }
2003 : }
2004 :
2005 : /*append the entire <p> and break (we cannot split the text content)*/
2006 25 : in_range = ttml_check_range(interval, ts_begin, ts_end);
2007 25 : if (in_range) {
2008 3 : GF_XMLNode *prev_child = i ? (GF_XMLNode*) gf_list_get(div_node->content, i-1) : NULL;
2009 3 : if (prev_child && prev_child->type) {
2010 3 : gf_xml_dom_append_child(copy_div_node, prev_child);
2011 : }
2012 3 : e = gf_xml_dom_append_child(copy_div_node, div_child);
2013 : assert(e == GF_OK);
2014 : has_content = GF_TRUE;
2015 3 : break;
2016 : }
2017 : }
2018 : }
2019 294 : if (has_content) {
2020 97 : GF_XMLNode *last_child = (GF_XMLNode*) gf_list_last(div_node->content);
2021 97 : if (last_child && last_child->type) {
2022 97 : gf_xml_dom_append_child(copy_div_node, last_child);
2023 : }
2024 : sample_empty = GF_FALSE;
2025 : }
2026 : }
2027 :
2028 : //empty doc
2029 98 : if (!ctx->body_node)
2030 : sample_empty = GF_FALSE;
2031 :
2032 98 : if (! sample_empty) {
2033 97 : samp_text = gf_xml_dom_serialize_root((GF_XMLNode*)ctx->root_working_copy, GF_FALSE, GF_FALSE);
2034 :
2035 388 : for (k=0; k<nb_div_nodes; k++) {
2036 291 : GF_XMLNode *copy_div_node = gf_list_get(ctx->body_node->content, k);
2037 291 : if (!copy_div_node->type)
2038 97 : gf_list_reset(copy_div_node->content);
2039 : }
2040 : }
2041 :
2042 97 : if (samp_text) {
2043 : GF_FilterPacket *pck;
2044 : u8 *pck_data;
2045 : Bool skip_pck = GF_FALSE;
2046 : u32 txt_len;
2047 : u32 res_len = 0;
2048 : char *txt_str;
2049 :
2050 :
2051 97 : if (interval->begin < (s64) ctx->end) {
2052 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] Error computing overlapped intervals! \"begin\" is "LLD" , last \"end\" was "LLD". Abort.\n", interval->begin, ctx->end));
2053 : e = GF_NOT_SUPPORTED;
2054 0 : goto exit;
2055 : }
2056 :
2057 97 : txt_str = ttxt_parse_string(samp_text, GF_TRUE);
2058 97 : if (!txt_str) txt_str = "";
2059 97 : txt_len = (u32) strlen(txt_str);
2060 :
2061 105 : for (i=0; i<nb_res_interval; i++) {
2062 8 : TTMLRes *res = gf_list_get(emb_resources, i);
2063 8 : res_len += res->size;
2064 : }
2065 :
2066 97 : if (ctx->first_samp) {
2067 17 : interval->begin = 0; /*in MP4 we must start at T=0*/
2068 17 : ctx->first_samp = GF_FALSE;
2069 : }
2070 :
2071 97 : ctx->last_sample_duration = interval->end - interval->begin;
2072 :
2073 97 : ctx->end = interval->end;
2074 97 : GF_LOG(GF_LOG_DEBUG, GF_LOG_PARSER, ("ts_begin="LLD", ts_end="LLD", last_sample_duration="LLU" (real duration: "LLU"), last_sample_end="LLU"\n", interval->begin, interval->end, interval->end - ctx->end, ctx->last_sample_duration, ctx->end));
2075 :
2076 97 : if (ctx->seek_state==2) {
2077 0 : Double end = (Double) interval->end;
2078 0 : end /= ctx->timescale;
2079 0 : if (end<ctx->start_range) skip_pck = GF_TRUE;
2080 0 : else ctx->seek_state = 0;
2081 : }
2082 :
2083 : if (!skip_pck) {
2084 97 : pck = gf_filter_pck_new_alloc(ctx->opid, txt_len+res_len, &pck_data);
2085 97 : if (!pck) {
2086 0 : gf_free(samp_text);
2087 0 : return GF_OUT_OF_MEM;
2088 : }
2089 97 : memcpy(pck_data, txt_str, txt_len);
2090 97 : gf_filter_pck_set_sap(pck, GF_FILTER_SAP_1);
2091 :
2092 97 : if (ctx->ttml_dur>0) {
2093 0 : gf_filter_pck_set_cts(pck, 0);
2094 0 : gf_filter_pck_set_duration(pck, (u32) ctx->ttml_dur);
2095 0 : ctx->last_sample_duration = (u64) ctx->ttml_dur * 1000 / ctx->timescale;
2096 : } else {
2097 97 : gf_filter_pck_set_cts(pck, (ctx->timescale * interval->begin)/1000);
2098 97 : if (interval->end >= interval->begin) {
2099 97 : gf_filter_pck_set_duration(pck, (u32) ((ctx->timescale * (interval->end - interval->begin) )/1000) );
2100 : }
2101 : }
2102 :
2103 97 : if (res_len) {
2104 6 : GF_BitStream *subs = gf_bs_new(NULL, 0, GF_BITSTREAM_WRITE);
2105 : u8 *subs_data;
2106 : u32 subs_size;
2107 : //subs 0
2108 6 : gf_bs_write_u32(subs, 0);
2109 6 : gf_bs_write_u32(subs, txt_len);
2110 6 : gf_bs_write_u32(subs, 0);
2111 6 : gf_bs_write_u8(subs, 0);
2112 6 : gf_bs_write_u8(subs, 0);
2113 :
2114 6 : pck_data += txt_len;
2115 14 : for (i=0; i<nb_res_interval; i++) {
2116 8 : TTMLRes *res = gf_list_get(emb_resources, i);
2117 8 : memcpy(pck_data, res->data, res->size);
2118 8 : pck_data += res->size;
2119 :
2120 : //subs >0
2121 8 : gf_bs_write_u32(subs, 0);
2122 8 : gf_bs_write_u32(subs, res->size);
2123 8 : gf_bs_write_u32(subs, 0);
2124 8 : gf_bs_write_u8(subs, 0);
2125 8 : gf_bs_write_u8(subs, 0);
2126 : }
2127 6 : gf_bs_get_content(subs, &subs_data, &subs_size);
2128 6 : gf_bs_del(subs);
2129 6 : gf_filter_pck_set_property(pck, GF_PROP_PCK_SUBS, &PROP_DATA_NO_COPY(subs_data, subs_size) );
2130 : }
2131 97 : gf_filter_pck_send(pck);
2132 : }
2133 :
2134 97 : gf_free(samp_text);
2135 : samp_text = NULL;
2136 : } else {
2137 1 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] empty sample (begin="LLD", end="LLD"). Skip.\n", interval->begin, interval->end));
2138 : }
2139 :
2140 : return GF_OK;
2141 :
2142 :
2143 0 : exit:
2144 0 : if (!ctx->non_compliant_ttml) {
2145 0 : ctx->non_compliant_ttml = GF_TRUE;
2146 0 : gf_filter_pid_set_discard(ctx->ipid, GF_TRUE);
2147 : }
2148 : return e;
2149 : }
2150 :
2151 : #ifndef GPAC_DISABLE_SWF_IMPORT
2152 :
2153 885 : static GF_Err swf_svg_add_iso_sample(void *user, const u8 *data, u32 length, u64 timestamp, Bool isRap)
2154 : {
2155 : GF_FilterPacket *pck;
2156 : u8 *pck_data;
2157 : GF_TXTIn *ctx = (GF_TXTIn *)user;
2158 :
2159 885 : if (ctx->seek_state==2) {
2160 0 : Double ts = (Double) timestamp;
2161 0 : ts/=1000;
2162 0 : if (ts<ctx->start_range) return GF_OK;
2163 0 : ctx->seek_state = 0;
2164 : }
2165 :
2166 885 : pck = gf_filter_pck_new_alloc(ctx->opid, length, &pck_data);
2167 885 : if (pck) {
2168 885 : memcpy(pck_data, data, length);
2169 885 : gf_filter_pck_set_cts(pck, (u64) (ctx->timescale*timestamp/1000) );
2170 885 : gf_filter_pck_set_sap(pck, isRap ? GF_FILTER_SAP_1 : GF_FILTER_SAP_NONE);
2171 885 : gf_filter_pck_set_framing(pck, GF_TRUE, GF_FALSE);
2172 :
2173 885 : gf_filter_pck_send(pck);
2174 : }
2175 :
2176 885 : if (gf_filter_pid_would_block(ctx->opid))
2177 880 : ctx->do_suspend = GF_TRUE;
2178 : return GF_OK;
2179 : }
2180 :
2181 10 : static GF_Err swf_svg_add_iso_header(void *user, const u8 *data, u32 length, Bool isHeader)
2182 : {
2183 : GF_TXTIn *ctx = (GF_TXTIn *)user;
2184 :
2185 10 : if (isHeader) {
2186 5 : if (!ctx->hdr_parsed) {
2187 5 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA((char *)data, (u32) ( strlen(data)+1 ) ) );
2188 5 : ctx->hdr_parsed = GF_TRUE;
2189 : }
2190 5 : } else if (!ctx->seek_state) {
2191 : GF_FilterPacket *pck;
2192 : u8 *pck_data;
2193 5 : pck = gf_filter_pck_new_alloc(ctx->opid, length, &pck_data);
2194 5 : if (pck) {
2195 5 : memcpy(pck_data, data, length);
2196 5 : gf_filter_pck_set_framing(pck, GF_FALSE, GF_TRUE);
2197 :
2198 5 : gf_filter_pck_send(pck);
2199 : }
2200 : }
2201 10 : return GF_OK;
2202 : }
2203 :
2204 5 : static GF_Err gf_text_swf_setup(GF_Filter *filter, GF_TXTIn *ctx)
2205 : {
2206 : GF_Err e;
2207 : u32 ID;
2208 :
2209 5 : ctx->swf_parse = gf_swf_reader_new(NULL, ctx->file_name);
2210 5 : e = gf_swf_read_header(ctx->swf_parse);
2211 5 : if (e) return e;
2212 5 : gf_swf_reader_set_user_mode(ctx->swf_parse, ctx, swf_svg_add_iso_sample, swf_svg_add_iso_header);
2213 :
2214 5 : if (!ctx->timescale) ctx->timescale = 1000;
2215 :
2216 5 : if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
2217 5 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
2218 5 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_SIMPLE_TEXT) );
2219 5 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
2220 : // gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_UINT(file_size) );
2221 :
2222 : //patch for old arch
2223 5 : ctx->width = FIX2INT(ctx->swf_parse->width);
2224 5 : ctx->height = FIX2INT(ctx->swf_parse->height);
2225 5 : if (!ctx->width && !ctx->height) {
2226 0 : ctx->width = 400;
2227 0 : ctx->height = 60;
2228 : }
2229 : ID = 1;
2230 5 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
2231 5 : if (ctx->width) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width) );
2232 5 : if (ctx->height) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height) );
2233 5 : if (ctx->zorder) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
2234 5 : if (ctx->lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( ctx->lang) );
2235 :
2236 5 : gf_filter_pid_set_property_str(ctx->opid, "meta:mime", &PROP_STRING("image/svg+xml") );
2237 :
2238 : #ifndef GPAC_DISABLE_SVG
2239 5 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] swf -> svg not fully migrated, using SWF flags 0 and no flatten angle. Patch welcome\n"));
2240 5 : e = swf_to_svg_init(ctx->swf_parse, 0, 0);
2241 : #endif
2242 :
2243 : //SWF->BIFS is handled in ctx loader, no need to define it here
2244 5 : txtin_probe_duration(ctx);
2245 :
2246 5 : return e;
2247 : }
2248 :
2249 890 : static GF_Err gf_text_process_swf(GF_Filter *filter, GF_TXTIn *ctx)
2250 : {
2251 : GF_Err e=GF_OK;
2252 :
2253 890 : if (!ctx->is_setup) {
2254 5 : ctx->is_setup = GF_TRUE;
2255 5 : return gf_text_swf_setup(filter, ctx);
2256 : }
2257 885 : if (!ctx->opid) return GF_NOT_SUPPORTED;
2258 :
2259 885 : if (ctx->seek_state==1) {
2260 0 : ctx->seek_state = 2;
2261 0 : gf_swf_reader_del(ctx->swf_parse);
2262 0 : ctx->swf_parse = gf_swf_reader_new(NULL, ctx->file_name);
2263 0 : gf_swf_read_header(ctx->swf_parse);
2264 0 : gf_swf_reader_set_user_mode(ctx->swf_parse, ctx, swf_svg_add_iso_sample, swf_svg_add_iso_header);
2265 : }
2266 :
2267 885 : ctx->do_suspend = GF_FALSE;
2268 : /*parse all tags*/
2269 3806 : while (e == GF_OK) {
2270 2916 : e = swf_parse_tag(ctx->swf_parse);
2271 2916 : if (ctx->do_suspend) return GF_OK;
2272 : }
2273 5 : if (e==GF_EOS) {
2274 5 : if (ctx->swf_parse->finalize) {
2275 5 : ctx->swf_parse->finalize(ctx->swf_parse);
2276 5 : ctx->swf_parse->finalize = NULL;
2277 : }
2278 : }
2279 : return e;
2280 : }
2281 : /* end of SWF Importer */
2282 :
2283 : #else
2284 :
2285 : #ifndef GPAC_DISABLE_ZLIB
2286 : static GF_Err gf_text_process_swf(GF_Filter *filter, GF_TXTIn *ctx)
2287 : {
2288 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("Warning: GPAC was compiled without SWF import support, can't import file.\n"));
2289 : return GF_NOT_SUPPORTED;
2290 : }
2291 : #endif
2292 :
2293 :
2294 : #endif /*GPAC_DISABLE_SWF_IMPORT*/
2295 :
2296 16 : static GF_Err gf_text_process_sub(GF_Filter *filter, GF_TXTIn *ctx)
2297 : {
2298 : u32 i, j, len, line;
2299 : GF_TextSample *samp;
2300 : Double ts_scale;
2301 : char szLine[2048], szTime[20], szText[2048];
2302 :
2303 : //same setup as for srt
2304 16 : if (!ctx->is_setup) {
2305 3 : ctx->is_setup = GF_TRUE;
2306 3 : return txtin_setup_srt(filter, ctx);
2307 : }
2308 13 : if (!ctx->opid) return GF_NOT_SUPPORTED;
2309 13 : if (!ctx->playstate) return GF_OK;
2310 10 : else if (ctx->playstate==2) return GF_EOS;
2311 :
2312 9 : if (ctx->seek_state==1) {
2313 0 : ctx->seek_state = 2;
2314 0 : gf_fseek(ctx->src, 0, SEEK_SET);
2315 : }
2316 :
2317 9 : if (ctx->fps.den && ctx->fps.num) {
2318 0 : ts_scale = ((Double) ctx->fps.num) / ctx->fps.den;
2319 : } else {
2320 : ts_scale = 25;
2321 : }
2322 :
2323 : line = 0;
2324 :
2325 : while (1) {
2326 13 : char *sOK = gf_text_get_utf8_line(szLine, 2048, ctx->src, ctx->unicode_type);
2327 13 : if (!sOK) break;
2328 :
2329 22 : REM_TRAIL_MARKS(szLine, "\r\n\t ")
2330 :
2331 11 : line++;
2332 : len = (u32) strlen(szLine);
2333 11 : if (!len) continue;
2334 :
2335 : i=0;
2336 7 : if (szLine[i] != '{') {
2337 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Bad SUB file (line %d): expecting \"{\" got \"%c\"\n", line, szLine[i]));
2338 0 : continue;
2339 : }
2340 23 : while (szLine[i+1] && szLine[i+1]!='}') {
2341 16 : szTime[i] = szLine[i+1];
2342 : i++;
2343 : }
2344 7 : szTime[i] = 0;
2345 7 : ctx->start = atoi(szTime);
2346 7 : if (ctx->start < ctx->end) {
2347 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] corrupted SUB frame (line %d) - starts (at %d ms) before end of previous one (%d ms) - adjusting time stamps\n", line, ctx->start, ctx->end));
2348 0 : ctx->start = ctx->end;
2349 : }
2350 7 : j=i+2;
2351 : i=0;
2352 7 : if (szLine[i+j] != '{') {
2353 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Bad SUB file - expecting \"{\" got \"%c\"\n", szLine[i]));
2354 0 : continue;
2355 : }
2356 25 : while (szLine[i+1+j] && szLine[i+1+j]!='}') {
2357 18 : szTime[i] = szLine[i+1+j];
2358 18 : i++;
2359 : }
2360 7 : szTime[i] = 0;
2361 7 : ctx->end = atoi(szTime);
2362 7 : j+=i+2;
2363 :
2364 7 : if (ctx->start > ctx->end) {
2365 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] corrupted SUB frame (line %d) - ends (at %d ms) before start of current frame (%d ms) - skipping\n", line, ctx->end, ctx->start));
2366 0 : continue;
2367 : }
2368 :
2369 7 : if (ctx->start && ctx->first_samp) {
2370 3 : samp = gf_isom_new_text_sample();
2371 3 : txtin_process_send_text_sample(ctx, samp, 0, (u32) (ts_scale*ctx->start), GF_TRUE);
2372 3 : ctx->first_samp = GF_FALSE;
2373 3 : gf_isom_delete_text_sample(samp);
2374 : }
2375 :
2376 161 : for (i=j; i<len; i++) {
2377 161 : if (szLine[i]=='|') {
2378 2 : szText[i-j] = '\n';
2379 : } else {
2380 159 : szText[i-j] = szLine[i];
2381 : }
2382 : }
2383 7 : szText[i-j] = 0;
2384 :
2385 7 : if (ctx->prev_end) {
2386 4 : samp = gf_isom_new_text_sample();
2387 4 : txtin_process_send_text_sample(ctx, samp, (u64) (ts_scale*(s64)ctx->prev_end), (u32) (ts_scale*(ctx->start - ctx->prev_end)), GF_TRUE);
2388 4 : gf_isom_delete_text_sample(samp);
2389 : }
2390 :
2391 7 : samp = gf_isom_new_text_sample();
2392 7 : gf_isom_text_add_text(samp, szText, (u32) strlen(szText) );
2393 7 : txtin_process_send_text_sample(ctx, samp, (u64) (ts_scale*(s64)ctx->start), (u32) (ts_scale*(ctx->end - ctx->start)), GF_TRUE);
2394 7 : gf_isom_delete_text_sample(samp);
2395 :
2396 7 : ctx->prev_end = ctx->end;
2397 :
2398 7 : gf_filter_pid_set_info(ctx->opid, GF_PROP_PID_DOWN_BYTES, &PROP_LONGUINT( gf_ftell(ctx->src )) );
2399 :
2400 7 : if (gf_filter_pid_would_block(ctx->opid))
2401 : return GF_OK;
2402 : }
2403 : /*final flush*/
2404 2 : if (ctx->end && !ctx->noflush) {
2405 2 : samp = gf_isom_new_text_sample();
2406 2 : txtin_process_send_text_sample(ctx, samp, (u64) (ts_scale*(s64)ctx->end), 0, GF_TRUE);
2407 2 : gf_isom_delete_text_sample(samp);
2408 : }
2409 :
2410 2 : gf_filter_pid_set_info_str( ctx->opid, "ttxt:last_dur", &PROP_UINT(0) );
2411 :
2412 2 : return GF_EOS;
2413 : }
2414 :
2415 :
2416 :
2417 23 : static u32 ttxt_get_color(char *val)
2418 : {
2419 : u32 r, g, b, a, res;
2420 23 : r = g = b = a = 0;
2421 23 : if (sscanf(val, "%x %x %x %x", &r, &g, &b, &a) != 4) {
2422 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Warning: color badly formatted %s\n", val));
2423 : }
2424 23 : res = (a&0xFF);
2425 23 : res<<=8;
2426 23 : res |= (r&0xFF);
2427 23 : res<<=8;
2428 23 : res |= (g&0xFF);
2429 23 : res<<=8;
2430 23 : res |= (b&0xFF);
2431 23 : return res;
2432 : }
2433 :
2434 4 : static void ttxt_parse_text_box(GF_XMLNode *n, GF_BoxRecord *box)
2435 : {
2436 4 : u32 i=0;
2437 : GF_XMLAttribute *att;
2438 : memset(box, 0, sizeof(GF_BoxRecord));
2439 20 : while ( (att=(GF_XMLAttribute *)gf_list_enum(n->attributes, &i))) {
2440 20 : if (!stricmp(att->name, "top")) box->top = atoi(att->value);
2441 16 : else if (!stricmp(att->name, "bottom")) box->bottom = atoi(att->value);
2442 12 : else if (!stricmp(att->name, "left")) box->left = atoi(att->value);
2443 8 : else if (!stricmp(att->name, "right")) box->right = atoi(att->value);
2444 : }
2445 4 : }
2446 :
2447 19 : static void ttxt_parse_text_style(GF_TXTIn *ctx, GF_XMLNode *n, GF_StyleRecord *style)
2448 : {
2449 19 : u32 i=0;
2450 : GF_XMLAttribute *att;
2451 : memset(style, 0, sizeof(GF_StyleRecord));
2452 19 : style->fontID = 1;
2453 19 : style->font_size = ctx->fontsize ;
2454 19 : style->text_color = 0xFFFFFFFF;
2455 :
2456 125 : while ( (att=(GF_XMLAttribute *)gf_list_enum(n->attributes, &i))) {
2457 121 : if (!stricmp(att->name, "fromChar")) style->startCharOffset = atoi(att->value);
2458 106 : else if (!stricmp(att->name, "toChar")) style->endCharOffset = atoi(att->value);
2459 95 : else if (!stricmp(att->name, "fontID")) style->fontID = atoi(att->value);
2460 76 : else if (!stricmp(att->name, "fontSize")) style->font_size = atoi(att->value);
2461 38 : else if (!stricmp(att->name, "color")) style->text_color = ttxt_get_color(att->value);
2462 19 : else if (!stricmp(att->name, "styles")) {
2463 19 : if (strstr(att->value, "Bold")) style->style_flags |= GF_TXT_STYLE_BOLD;
2464 19 : if (strstr(att->value, "Italic")) style->style_flags |= GF_TXT_STYLE_ITALIC;
2465 19 : if (strstr(att->value, "Underlined")) style->style_flags |= GF_TXT_STYLE_UNDERLINED;
2466 19 : if (strstr(att->value, "Strikethrough")) style->style_flags |= GF_TXT_STYLE_STRIKETHROUGH;
2467 : }
2468 : }
2469 19 : }
2470 :
2471 4 : static GF_Err txtin_setup_ttxt(GF_Filter *filter, GF_TXTIn *ctx)
2472 : {
2473 : GF_Err e;
2474 : u32 j, k, ID, OCR_ES_ID;
2475 : u64 file_size;
2476 : GF_XMLNode *root, *ext;
2477 : GF_PropertyValue *dcd;
2478 :
2479 4 : ctx->parser = gf_xml_dom_new();
2480 4 : e = gf_xml_dom_parse(ctx->parser, ctx->file_name, ttxt_dom_progress, ctx);
2481 4 : if (e) {
2482 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Error parsing TTXT file: Line %d - %s\n", gf_xml_dom_get_line(ctx->parser), gf_xml_dom_get_error(ctx->parser)));
2483 : return e;
2484 : }
2485 4 : root = gf_xml_dom_get_root(ctx->parser);
2486 :
2487 4 : if (strcmp(root->name, "TextStream")) {
2488 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Invalid Timed Text file - expecting \"TextStream\" got %s", root->name));
2489 : return GF_NON_COMPLIANT_BITSTREAM;
2490 : }
2491 4 : file_size = ctx->end;
2492 4 : ctx->end = 0;
2493 :
2494 : /*setup track in 3GP format directly (no ES desc)*/
2495 4 : if (!ctx->timescale) ctx->timescale = 1000;
2496 : OCR_ES_ID = ID = 0;
2497 :
2498 4 : if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
2499 4 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
2500 4 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_TX3G) );
2501 4 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
2502 4 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_LONGUINT(file_size) );
2503 :
2504 : if (!ID) ID = 1;
2505 4 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
2506 : if (OCR_ES_ID) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CLOCK_ID, &PROP_UINT(OCR_ES_ID) );
2507 :
2508 4 : ctx->nb_children = gf_list_count(root->content);
2509 :
2510 4 : ctx->cur_child_idx = 0;
2511 16 : for (ctx->cur_child_idx=0; ctx->cur_child_idx < ctx->nb_children; ctx->cur_child_idx++) {
2512 16 : GF_XMLNode *node = (GF_XMLNode*) gf_list_get(root->content, ctx->cur_child_idx);
2513 :
2514 16 : if (node->type) {
2515 8 : continue;
2516 : }
2517 :
2518 8 : if (!strcmp(node->name, "TextStreamHeader")) {
2519 : GF_XMLNode *sdesc;
2520 : s32 w, h, tx, ty, layer;
2521 : u32 tref_id;
2522 : GF_XMLAttribute *att;
2523 4 : w = ctx->width;
2524 4 : h = ctx->height;
2525 4 : tx = ctx->txtx;
2526 4 : ty = ctx->txty;
2527 4 : layer = ctx->zorder;
2528 : tref_id = 0;
2529 :
2530 4 : j=0;
2531 28 : while ( (att=(GF_XMLAttribute *)gf_list_enum(node->attributes, &j))) {
2532 20 : if (!strcmp(att->name, "width")) w = atoi(att->value);
2533 16 : else if (!strcmp(att->name, "height")) h = atoi(att->value);
2534 12 : else if (!strcmp(att->name, "layer")) layer = atoi(att->value);
2535 8 : else if (!strcmp(att->name, "translation_x")) tx = atoi(att->value);
2536 4 : else if (!strcmp(att->name, "translation_y")) ty = atoi(att->value);
2537 0 : else if (!strcmp(att->name, "trefID")) tref_id = atoi(att->value);
2538 : }
2539 :
2540 4 : if (tref_id) {
2541 0 : gf_filter_pid_set_property_str(ctx->opid, "tref:chap", &PROP_UINT(tref_id) );
2542 : }
2543 :
2544 4 : if (w) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(w) );
2545 4 : if (h) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(h) );
2546 4 : if (tx) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TRANS_X, &PROP_UINT(tx) );
2547 4 : if (ty) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TRANS_X, &PROP_UINT(ty) );
2548 4 : if (layer) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
2549 4 : if (ctx->lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( ctx->lang) );
2550 :
2551 4 : j=0;
2552 20 : while ( (sdesc=(GF_XMLNode*)gf_list_enum(node->content, &j))) {
2553 12 : if (sdesc->type) continue;
2554 :
2555 4 : if (!strcmp(sdesc->name, "TextSampleDescription")) {
2556 : GF_TextSampleDescriptor td;
2557 : memset(&td, 0, sizeof(GF_TextSampleDescriptor));
2558 4 : td.tag = GF_ODF_TEXT_CFG_TAG;
2559 4 : td.vert_justif = (s8) -1;
2560 4 : td.default_style.fontID = 1;
2561 4 : td.default_style.font_size = ctx->fontsize;
2562 :
2563 4 : k=0;
2564 36 : while ( (att=(GF_XMLAttribute *)gf_list_enum(sdesc->attributes, &k))) {
2565 28 : if (!strcmp(att->name, "horizontalJustification")) {
2566 4 : if (!stricmp(att->value, "center")) td.horiz_justif = 1;
2567 0 : else if (!stricmp(att->value, "right")) td.horiz_justif = (s8) -1;
2568 0 : else if (!stricmp(att->value, "left")) td.horiz_justif = 0;
2569 : }
2570 24 : else if (!strcmp(att->name, "verticalJustification")) {
2571 4 : if (!stricmp(att->value, "center")) td.vert_justif = 1;
2572 4 : else if (!stricmp(att->value, "bottom")) td.vert_justif = (s8) -1;
2573 0 : else if (!stricmp(att->value, "top")) td.vert_justif = 0;
2574 : }
2575 20 : else if (!strcmp(att->name, "backColor")) td.back_color = ttxt_get_color(att->value);
2576 16 : else if (!strcmp(att->name, "verticalText") && !stricmp(att->value, "yes") ) td.displayFlags |= GF_TXT_VERTICAL;
2577 16 : else if (!strcmp(att->name, "fillTextRegion") && !stricmp(att->value, "yes") ) td.displayFlags |= GF_TXT_FILL_REGION;
2578 16 : else if (!strcmp(att->name, "continuousKaraoke") && !stricmp(att->value, "yes") ) td.displayFlags |= GF_TXT_KARAOKE;
2579 16 : else if (!strcmp(att->name, "scroll")) {
2580 4 : if (!stricmp(att->value, "inout")) td.displayFlags |= GF_TXT_SCROLL_IN | GF_TXT_SCROLL_OUT;
2581 4 : else if (!stricmp(att->value, "in")) td.displayFlags |= GF_TXT_SCROLL_IN;
2582 4 : else if (!stricmp(att->value, "out")) td.displayFlags |= GF_TXT_SCROLL_OUT;
2583 : }
2584 12 : else if (!strcmp(att->name, "scrollMode")) {
2585 : u32 scroll_mode = GF_TXT_SCROLL_CREDITS;
2586 0 : if (!stricmp(att->value, "Credits")) scroll_mode = GF_TXT_SCROLL_CREDITS;
2587 0 : else if (!stricmp(att->value, "Marquee")) scroll_mode = GF_TXT_SCROLL_MARQUEE;
2588 0 : else if (!stricmp(att->value, "Right")) scroll_mode = GF_TXT_SCROLL_RIGHT;
2589 0 : else if (!stricmp(att->value, "Down")) scroll_mode = GF_TXT_SCROLL_DOWN;
2590 0 : td.displayFlags |= ((scroll_mode<<7) & GF_TXT_SCROLL_DIRECTION);
2591 : }
2592 : }
2593 :
2594 4 : k=0;
2595 36 : while ( (ext=(GF_XMLNode*)gf_list_enum(sdesc->content, &k))) {
2596 28 : if (ext->type) continue;
2597 12 : if (!strcmp(ext->name, "TextBox")) ttxt_parse_text_box(ext, &td.default_pos);
2598 8 : else if (!strcmp(ext->name, "Style")) ttxt_parse_text_style(ctx, ext, &td.default_style);
2599 4 : else if (!strcmp(ext->name, "FontTable")) {
2600 : GF_XMLNode *ftable;
2601 4 : u32 z=0;
2602 20 : while ( (ftable=(GF_XMLNode*)gf_list_enum(ext->content, &z))) {
2603 : u32 m;
2604 12 : if (ftable->type || strcmp(ftable->name, "FontTableEntry")) continue;
2605 4 : td.font_count += 1;
2606 4 : td.fonts = (GF_FontRecord*)gf_realloc(td.fonts, sizeof(GF_FontRecord)*td.font_count);
2607 4 : m=0;
2608 16 : while ( (att=(GF_XMLAttribute *)gf_list_enum(ftable->attributes, &m))) {
2609 12 : if (!stricmp(att->name, "fontID")) td.fonts[td.font_count-1].fontID = atoi(att->value);
2610 4 : else if (!stricmp(att->name, "fontName")) td.fonts[td.font_count-1].fontName = gf_strdup(att->value);
2611 : }
2612 : }
2613 : }
2614 : }
2615 4 : if (ctx->nodefbox) {
2616 0 : td.default_pos.top = td.default_pos.left = td.default_pos.right = td.default_pos.bottom = 0;
2617 : } else {
2618 4 : if ((td.default_pos.bottom==td.default_pos.top) || (td.default_pos.right==td.default_pos.left)) {
2619 0 : td.default_pos.top = td.default_pos.left = 0;
2620 0 : td.default_pos.right = w;
2621 0 : td.default_pos.bottom = h;
2622 : }
2623 : }
2624 4 : if (!td.fonts) {
2625 0 : td.font_count = 1;
2626 0 : td.fonts = (GF_FontRecord*)gf_malloc(sizeof(GF_FontRecord));
2627 0 : td.fonts[0].fontID = 1;
2628 0 : td.fonts[0].fontName = gf_strdup("Serif");
2629 : }
2630 4 : GF_SAFEALLOC(dcd, GF_PropertyValue);
2631 4 : if (dcd) {
2632 4 : dcd->type = GF_PROP_DATA;
2633 :
2634 4 : gf_odf_tx3g_write(&td, &dcd->value.data.ptr, &dcd->value.data.size);
2635 4 : if (!ctx->text_descs) ctx->text_descs = gf_list_new();
2636 4 : gf_list_add(ctx->text_descs, dcd);
2637 : }
2638 :
2639 4 : for (k=0; k<td.font_count; k++) gf_free(td.fonts[k].fontName);
2640 4 : gf_free(td.fonts);
2641 : }
2642 : }
2643 : }
2644 : else {
2645 : break;
2646 : }
2647 : }
2648 :
2649 4 : if (!ctx->text_descs) {
2650 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Invalid Timed Text file - text stream header not found or empty\n"));
2651 : return GF_NON_COMPLIANT_BITSTREAM;
2652 : }
2653 4 : dcd = gf_list_get(ctx->text_descs, 0);
2654 4 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, dcd);
2655 4 : ctx->last_desc_idx = 1;
2656 :
2657 4 : ctx->first_samp = GF_TRUE;
2658 4 : ctx->last_sample_empty = GF_FALSE;
2659 4 : ctx->last_sample_duration = 0;
2660 :
2661 4 : txtin_probe_duration(ctx);
2662 :
2663 4 : return GF_OK;
2664 : }
2665 :
2666 47 : static GF_Err txtin_process_ttxt(GF_Filter *filter, GF_TXTIn *ctx)
2667 : {
2668 : u32 j, k;
2669 : GF_XMLNode *root, *ext;
2670 :
2671 47 : if (!ctx->is_setup) {
2672 4 : ctx->is_setup = GF_TRUE;
2673 4 : return txtin_setup_ttxt(filter, ctx);
2674 : }
2675 43 : if (!ctx->opid) return GF_NON_COMPLIANT_BITSTREAM;
2676 43 : if (!ctx->playstate) return GF_OK;
2677 37 : else if (ctx->playstate==2) return GF_EOS;
2678 :
2679 37 : if (ctx->seek_state==1) {
2680 0 : ctx->seek_state = 2;
2681 0 : ctx->cur_child_idx = 0;
2682 : }
2683 37 : root = gf_xml_dom_get_root(ctx->parser);
2684 :
2685 88 : for (; ctx->cur_child_idx < ctx->nb_children; ctx->cur_child_idx++) {
2686 : GF_TextSample * samp;
2687 : u32 ts, descIndex;
2688 : Bool has_text = GF_FALSE;
2689 : GF_XMLAttribute *att;
2690 84 : GF_XMLNode *node = (GF_XMLNode*) gf_list_get(root->content, ctx->cur_child_idx);
2691 :
2692 84 : if (node->type) {
2693 42 : continue;
2694 : }
2695 : /*sample text*/
2696 42 : else if (strcmp(node->name, "TextSample")) continue;
2697 :
2698 42 : samp = gf_isom_new_text_sample();
2699 : ts = 0;
2700 : descIndex = 1;
2701 42 : ctx->last_sample_empty = GF_TRUE;
2702 :
2703 42 : j=0;
2704 210 : while ( (att=(GF_XMLAttribute*)gf_list_enum(node->attributes, &j))) {
2705 126 : if (!strcmp(att->name, "sampleTime")) {
2706 : u32 h, m, s, ms;
2707 42 : if (sscanf(att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
2708 42 : ts = (h*3600 + m*60 + s)*1000 + ms;
2709 : } else {
2710 0 : ts = (u32) (atof(att->value) * 1000);
2711 : }
2712 : }
2713 126 : else if (!strcmp(att->name, "sampleDescriptionIndex")) descIndex = atoi(att->value);
2714 42 : else if (!strcmp(att->name, "text")) {
2715 : u32 len;
2716 0 : char *str = ttxt_parse_string(att->value, GF_TRUE);
2717 0 : len = (u32) strlen(str);
2718 0 : gf_isom_text_add_text(samp, str, len);
2719 0 : ctx->last_sample_empty = len ? GF_FALSE : GF_TRUE;
2720 : has_text = GF_TRUE;
2721 : }
2722 42 : else if (!strcmp(att->name, "scrollDelay")) gf_isom_text_set_scroll_delay(samp, (u32) (1000*atoi(att->value)));
2723 42 : else if (!strcmp(att->name, "highlightColor")) gf_isom_text_set_highlight_color(samp, ttxt_get_color(att->value));
2724 42 : else if (!strcmp(att->name, "wrap") && !strcmp(att->value, "Automatic")) gf_isom_text_set_wrap(samp, 0x01);
2725 : }
2726 :
2727 : /*get all modifiers*/
2728 42 : j=0;
2729 135 : while ( (ext=(GF_XMLNode*)gf_list_enum(node->content, &j))) {
2730 51 : if (!has_text && (ext->type==GF_XML_TEXT_TYPE)) {
2731 : u32 len;
2732 19 : char *str = ttxt_parse_string(ext->name, GF_FALSE);
2733 19 : len = (u32) strlen(str);
2734 19 : gf_isom_text_add_text(samp, str, len);
2735 19 : ctx->last_sample_empty = len ? GF_FALSE : GF_TRUE;
2736 : has_text = GF_TRUE;
2737 : }
2738 51 : if (ext->type) continue;
2739 :
2740 16 : if (!stricmp(ext->name, "Style")) {
2741 : GF_StyleRecord r;
2742 15 : ttxt_parse_text_style(ctx, ext, &r);
2743 15 : gf_isom_text_add_style(samp, &r);
2744 : }
2745 1 : else if (!stricmp(ext->name, "TextBox")) {
2746 : GF_BoxRecord r;
2747 0 : ttxt_parse_text_box(ext, &r);
2748 0 : gf_isom_text_set_box(samp, r.top, r.left, r.bottom, r.right);
2749 : }
2750 1 : else if (!stricmp(ext->name, "Highlight")) {
2751 : u16 start, end;
2752 : start = end = 0;
2753 0 : k=0;
2754 0 : while ( (att=(GF_XMLAttribute *)gf_list_enum(ext->attributes, &k))) {
2755 0 : if (!strcmp(att->name, "fromChar")) start = atoi(att->value);
2756 0 : else if (!strcmp(att->name, "toChar")) end = atoi(att->value);
2757 : }
2758 0 : gf_isom_text_add_highlight(samp, start, end);
2759 : }
2760 1 : else if (!stricmp(ext->name, "Blinking")) {
2761 : u16 start, end;
2762 : start = end = 0;
2763 1 : k=0;
2764 4 : while ( (att=(GF_XMLAttribute *)gf_list_enum(ext->attributes, &k))) {
2765 3 : if (!strcmp(att->name, "fromChar")) start = atoi(att->value);
2766 2 : else if (!strcmp(att->name, "toChar")) end = atoi(att->value);
2767 : }
2768 1 : gf_isom_text_add_blink(samp, start, end);
2769 : }
2770 0 : else if (!stricmp(ext->name, "HyperLink")) {
2771 : u16 start, end;
2772 : char *url, *url_tt;
2773 : start = end = 0;
2774 : url = url_tt = NULL;
2775 0 : k=0;
2776 0 : while ( (att=(GF_XMLAttribute *)gf_list_enum(ext->attributes, &k))) {
2777 0 : if (!strcmp(att->name, "fromChar")) start = atoi(att->value);
2778 0 : else if (!strcmp(att->name, "toChar")) end = atoi(att->value);
2779 0 : else if (!strcmp(att->name, "URL")) url = gf_strdup(att->value);
2780 0 : else if (!strcmp(att->name, "URLToolTip")) url_tt = gf_strdup(att->value);
2781 : }
2782 0 : gf_isom_text_add_hyperlink(samp, url, url_tt, start, end);
2783 0 : if (url) gf_free(url);
2784 0 : if (url_tt) gf_free(url_tt);
2785 : }
2786 0 : else if (!stricmp(ext->name, "Karaoke")) {
2787 : u32 startTime;
2788 : GF_XMLNode *krok;
2789 : startTime = 0;
2790 0 : k=0;
2791 0 : while ( (att=(GF_XMLAttribute *)gf_list_enum(ext->attributes, &k))) {
2792 0 : if (!strcmp(att->name, "startTime")) startTime = (u32) (1000*atof(att->value));
2793 : }
2794 0 : gf_isom_text_add_karaoke(samp, startTime);
2795 0 : k=0;
2796 0 : while ( (krok=(GF_XMLNode*)gf_list_enum(ext->content, &k))) {
2797 : u16 start, end;
2798 : u32 endTime, m;
2799 0 : if (krok->type) continue;
2800 0 : if (strcmp(krok->name, "KaraokeRange")) continue;
2801 : start = end = 0;
2802 : endTime = 0;
2803 0 : m=0;
2804 0 : while ( (att=(GF_XMLAttribute *)gf_list_enum(krok->attributes, &m))) {
2805 0 : if (!strcmp(att->name, "fromChar")) start = atoi(att->value);
2806 0 : else if (!strcmp(att->name, "toChar")) end = atoi(att->value);
2807 0 : else if (!strcmp(att->name, "endTime")) endTime = (u32) (1000*atof(att->value));
2808 : }
2809 0 : gf_isom_text_set_karaoke_segment(samp, endTime, start, end);
2810 : }
2811 : }
2812 : }
2813 :
2814 42 : if (!descIndex) descIndex = 1;
2815 42 : if (descIndex != ctx->last_desc_idx) {
2816 : GF_PropertyValue *dcd;
2817 0 : ctx->last_desc_idx = descIndex;
2818 0 : dcd = gf_list_get(ctx->text_descs, descIndex-1);
2819 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, dcd);
2820 : }
2821 :
2822 : /*in MP4 we must start at T=0, so add an empty sample*/
2823 42 : if (ts && ctx->first_samp) {
2824 0 : GF_TextSample * firstsamp = gf_isom_new_text_sample();
2825 0 : txtin_process_send_text_sample(ctx, firstsamp, 0, 0, GF_TRUE);
2826 0 : gf_isom_delete_text_sample(firstsamp);
2827 : }
2828 42 : ctx->first_samp = GF_FALSE;
2829 :
2830 42 : txtin_process_send_text_sample(ctx, samp, ts, 0, GF_TRUE);
2831 :
2832 42 : gf_isom_delete_text_sample(samp);
2833 :
2834 42 : if (ctx->last_sample_empty) {
2835 23 : ctx->last_sample_duration = ts - ctx->last_sample_duration;
2836 : } else {
2837 19 : ctx->last_sample_duration = ts;
2838 : }
2839 :
2840 42 : if (gf_filter_pid_would_block(ctx->opid)) {
2841 33 : ctx->cur_child_idx++;
2842 33 : return GF_OK;
2843 : }
2844 : }
2845 :
2846 4 : if (ctx->last_sample_empty) {
2847 : //this is a bit ugly, in regular streaming mode we don't want to remove empty samples
2848 : //howvere the last one can be removed, adjusting the duration of the previous one.
2849 : //doing this here is problematic if the loader is sent a new ttxt file, we would have a cue termination sample
2850 : //we therefore share that info through pid, and let the final user (muxer& co) decide what to do
2851 4 : gf_filter_pid_set_info_str( ctx->opid, "ttxt:rem_last", &PROP_BOOL(GF_TRUE) );
2852 4 : gf_filter_pid_set_info_str( ctx->opid, "ttxt:last_dur", &PROP_UINT((u32) ctx->last_sample_duration) );
2853 : }
2854 :
2855 : return GF_EOS;
2856 : }
2857 :
2858 :
2859 8 : static u32 tx3g_get_color(char *value)
2860 : {
2861 : u32 r, g, b, a;
2862 : u32 res, v;
2863 8 : r = g = b = a = 0;
2864 8 : if (sscanf(value, "%u%%, %u%%, %u%%, %u%%", &r, &g, &b, &a) != 4) {
2865 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("Warning: color badly formatted\n"));
2866 : }
2867 8 : v = (u32) (a*255/100);
2868 : res = (v&0xFF);
2869 8 : res<<=8;
2870 8 : v = (u32) (r*255/100);
2871 8 : res |= (v&0xFF);
2872 8 : res<<=8;
2873 8 : v = (u32) (g*255/100);
2874 8 : res |= (v&0xFF);
2875 8 : res<<=8;
2876 8 : v = (u32) (b*255/100);
2877 8 : res |= (v&0xFF);
2878 8 : return res;
2879 : }
2880 :
2881 3 : static void tx3g_parse_text_box(GF_XMLNode *n, GF_BoxRecord *box)
2882 : {
2883 3 : u32 i=0;
2884 : GF_XMLAttribute *att;
2885 : memset(box, 0, sizeof(GF_BoxRecord));
2886 15 : while ((att=(GF_XMLAttribute *)gf_list_enum(n->attributes, &i))) {
2887 15 : if (!stricmp(att->name, "x")) box->left = atoi(att->value);
2888 12 : else if (!stricmp(att->name, "y")) box->top = atoi(att->value);
2889 9 : else if (!stricmp(att->name, "height")) box->bottom = atoi(att->value);
2890 6 : else if (!stricmp(att->name, "width")) box->right = atoi(att->value);
2891 : }
2892 3 : }
2893 :
2894 : typedef struct
2895 : {
2896 : u32 id;
2897 : u32 pos;
2898 : } Marker;
2899 :
2900 : #define GET_MARKER_POS(_val, __isend) \
2901 : { \
2902 : u32 i, __m = atoi(att->value); \
2903 : _val = 0; \
2904 : for (i=0; i<nb_marks; i++) { if (__m==marks[i].id) { _val = marks[i].pos; /*if (__isend) _val--; */break; } } \
2905 : }
2906 :
2907 :
2908 1 : static GF_Err txtin_texml_setup(GF_Filter *filter, GF_TXTIn *ctx)
2909 : {
2910 : GF_Err e;
2911 : u32 ID, OCR_ES_ID, i;
2912 : u64 file_size;
2913 : GF_XMLAttribute *att;
2914 : GF_XMLNode *root;
2915 :
2916 1 : ctx->parser = gf_xml_dom_new();
2917 1 : e = gf_xml_dom_parse(ctx->parser, ctx->file_name, ttxt_dom_progress, ctx);
2918 1 : if (e) {
2919 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Error parsing TeXML file: Line %d - %s", gf_xml_dom_get_line(ctx->parser), gf_xml_dom_get_error(ctx->parser) ));
2920 0 : gf_xml_dom_del(ctx->parser);
2921 0 : ctx->parser = NULL;
2922 0 : return e;
2923 : }
2924 :
2925 1 : root = gf_xml_dom_get_root(ctx->parser);
2926 :
2927 1 : if (strcmp(root->name, "text3GTrack")) {
2928 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Invalid QT TeXML file - expecting root \"text3GTrack\" got \"%s\"", root->name));
2929 : return GF_NON_COMPLIANT_BITSTREAM;
2930 : }
2931 1 : file_size = ctx->end;
2932 1 : ctx->txml_timescale = 600;
2933 :
2934 1 : i=0;
2935 8 : while ( (att=(GF_XMLAttribute *)gf_list_enum(root->attributes, &i))) {
2936 7 : if (!strcmp(att->name, "trackWidth")) ctx->width = atoi(att->value);
2937 6 : else if (!strcmp(att->name, "trackHeight")) ctx->height = atoi(att->value);
2938 5 : else if (!strcmp(att->name, "layer")) ctx->zorder = atoi(att->value);
2939 4 : else if (!strcmp(att->name, "timeScale")) ctx->txml_timescale = atoi(att->value);
2940 2 : else if (!strcmp(att->name, "transform")) {
2941 : Float fx, fy;
2942 1 : sscanf(att->value, "translate(%f,%f)", &fx, &fy);
2943 1 : ctx->txtx = (u32) fx;
2944 1 : ctx->txty = (u32) fy;
2945 : }
2946 : }
2947 :
2948 : /*setup track in 3GP format directly (no ES desc)*/
2949 : OCR_ES_ID = ID = 0;
2950 1 : if (!ctx->timescale) ctx->timescale = 1000;
2951 :
2952 1 : if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
2953 1 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
2954 1 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_ISOM_SUBTYPE_TX3G) );
2955 1 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
2956 1 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_LONGUINT(file_size) );
2957 :
2958 :
2959 : if (!ID) ID = 1;
2960 1 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
2961 : if (OCR_ES_ID) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CLOCK_ID, &PROP_UINT(OCR_ES_ID) );
2962 1 : if (ctx->width) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width) );
2963 1 : if (ctx->height) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height) );
2964 1 : if (ctx->zorder) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
2965 1 : if (ctx->lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( ctx->lang) );
2966 :
2967 :
2968 1 : ctx->nb_children = gf_list_count(root->content);
2969 1 : ctx->cur_child_idx = 0;
2970 1 : txtin_probe_duration(ctx);
2971 :
2972 1 : return GF_OK;
2973 : }
2974 :
2975 5 : static GF_Err txtin_process_texml(GF_Filter *filter, GF_TXTIn *ctx)
2976 : {
2977 : u32 j, k;
2978 : GF_StyleRecord styles[50];
2979 : Marker marks[50];
2980 : GF_XMLAttribute *att;
2981 : GF_XMLNode *root;
2982 : Bool probe_first_desc_only = GF_FALSE;
2983 :
2984 5 : if (!ctx->is_setup) {
2985 : GF_Err e;
2986 :
2987 1 : ctx->is_setup = GF_TRUE;
2988 1 : e = txtin_texml_setup(filter, ctx);
2989 1 : if (e) return e;
2990 : probe_first_desc_only = GF_TRUE;
2991 : }
2992 5 : if (!ctx->opid) return GF_NON_COMPLIANT_BITSTREAM;
2993 5 : if (!ctx->playstate && !probe_first_desc_only) return GF_OK;
2994 4 : else if (ctx->playstate==2) return GF_EOS;
2995 :
2996 4 : if (ctx->seek_state==1) {
2997 0 : ctx->seek_state = 2;
2998 0 : ctx->cur_child_idx = 0;
2999 0 : ctx->start = 0;
3000 : }
3001 :
3002 4 : root = gf_xml_dom_get_root(ctx->parser);
3003 :
3004 7 : for (; ctx->cur_child_idx < ctx->nb_children; ctx->cur_child_idx++) {
3005 : GF_XMLNode *node, *desc;
3006 : GF_TextSampleDescriptor td;
3007 : GF_TextSample * samp = NULL;
3008 : u64 duration;
3009 : u32 nb_styles, nb_marks;
3010 : Bool isRAP, same_style, same_box;
3011 :
3012 6 : if (probe_first_desc_only && ctx->text_descs && gf_list_count(ctx->text_descs))
3013 3 : return GF_OK;
3014 :
3015 : memset(&td, 0, sizeof(GF_TextSampleDescriptor));
3016 6 : node = (GF_XMLNode*)gf_list_get(root->content, ctx->cur_child_idx);
3017 9 : if (node->type) continue;
3018 3 : if (strcmp(node->name, "sample")) continue;
3019 :
3020 : isRAP = GF_TRUE;
3021 : duration = 1000;
3022 3 : j=0;
3023 12 : while ((att=(GF_XMLAttribute *)gf_list_enum(node->attributes, &j))) {
3024 9 : if (!strcmp(att->name, "duration")) duration = atoi(att->value);
3025 3 : else if (!strcmp(att->name, "keyframe")) isRAP = (!stricmp(att->value, "true") ? GF_TRUE : GF_FALSE);
3026 : }
3027 : nb_styles = 0;
3028 : nb_marks = 0;
3029 : same_style = same_box = GF_FALSE;
3030 3 : j=0;
3031 17 : while ((desc=(GF_XMLNode*)gf_list_enum(node->content, &j))) {
3032 12 : if (desc->type) continue;
3033 :
3034 5 : if (!strcmp(desc->name, "description")) {
3035 : u8 *dsi;
3036 : u32 dsi_len, stsd_idx;
3037 : GF_XMLNode *sub;
3038 : memset(&td, 0, sizeof(GF_TextSampleDescriptor));
3039 3 : td.tag = GF_ODF_TEXT_CFG_TAG;
3040 3 : td.vert_justif = (s8) -1;
3041 3 : td.default_style.fontID = 1;
3042 3 : td.default_style.font_size = ctx->fontsize;
3043 :
3044 3 : k=0;
3045 21 : while ((att=(GF_XMLAttribute *)gf_list_enum(desc->attributes, &k))) {
3046 15 : if (!strcmp(att->name, "horizontalJustification")) {
3047 3 : if (!stricmp(att->value, "center")) td.horiz_justif = 1;
3048 3 : else if (!stricmp(att->value, "right")) td.horiz_justif = (s8) -1;
3049 3 : else if (!stricmp(att->value, "left")) td.horiz_justif = 0;
3050 : }
3051 12 : else if (!strcmp(att->name, "verticalJustification")) {
3052 3 : if (!stricmp(att->value, "center")) td.vert_justif = 1;
3053 3 : else if (!stricmp(att->value, "bottom")) td.vert_justif = (s8) -1;
3054 3 : else if (!stricmp(att->value, "top")) td.vert_justif = 0;
3055 : }
3056 9 : else if (!strcmp(att->name, "backgroundColor")) td.back_color = tx3g_get_color(att->value);
3057 6 : else if (!strcmp(att->name, "displayFlags")) {
3058 : Bool rev_scroll = GF_FALSE;
3059 3 : if (strstr(att->value, "scroll")) {
3060 : u32 scroll_mode = 0;
3061 0 : if (strstr(att->value, "scrollIn")) td.displayFlags |= GF_TXT_SCROLL_IN;
3062 0 : if (strstr(att->value, "scrollOut")) td.displayFlags |= GF_TXT_SCROLL_OUT;
3063 0 : if (strstr(att->value, "reverse")) rev_scroll = GF_TRUE;
3064 0 : if (strstr(att->value, "horizontal")) scroll_mode = rev_scroll ? GF_TXT_SCROLL_RIGHT : GF_TXT_SCROLL_MARQUEE;
3065 0 : else scroll_mode = (rev_scroll ? GF_TXT_SCROLL_DOWN : GF_TXT_SCROLL_CREDITS);
3066 0 : td.displayFlags |= (scroll_mode<<7) & GF_TXT_SCROLL_DIRECTION;
3067 : }
3068 : /*TODO FIXME: check in QT doc !!*/
3069 3 : if (strstr(att->value, "writeTextVertically")) td.displayFlags |= GF_TXT_VERTICAL;
3070 3 : if (!strcmp(att->name, "continuousKaraoke")) td.displayFlags |= GF_TXT_KARAOKE;
3071 : }
3072 : }
3073 :
3074 3 : k=0;
3075 27 : while ((sub=(GF_XMLNode*)gf_list_enum(desc->content, &k))) {
3076 21 : if (sub->type) continue;
3077 9 : if (!strcmp(sub->name, "defaultTextBox")) tx3g_parse_text_box(sub, &td.default_pos);
3078 6 : else if (!strcmp(sub->name, "fontTable")) {
3079 : GF_XMLNode *ftable;
3080 3 : u32 m=0;
3081 15 : while ((ftable=(GF_XMLNode*)gf_list_enum(sub->content, &m))) {
3082 9 : if (ftable->type) continue;
3083 3 : if (!strcmp(ftable->name, "font")) {
3084 3 : u32 n=0;
3085 3 : td.font_count += 1;
3086 3 : td.fonts = (GF_FontRecord*)gf_realloc(td.fonts, sizeof(GF_FontRecord)*td.font_count);
3087 12 : while ((att=(GF_XMLAttribute *)gf_list_enum(ftable->attributes, &n))) {
3088 9 : if (!stricmp(att->name, "id")) td.fonts[td.font_count-1].fontID = atoi(att->value);
3089 3 : else if (!stricmp(att->name, "name")) td.fonts[td.font_count-1].fontName = gf_strdup(att->value);
3090 : }
3091 : }
3092 : }
3093 : }
3094 3 : else if (!strcmp(sub->name, "sharedStyles")) {
3095 : GF_XMLNode *style, *ftable;
3096 3 : u32 m=0;
3097 9 : while ((style=(GF_XMLNode*)gf_list_enum(sub->content, &m))) {
3098 6 : if (style->type) continue;
3099 3 : if (!strcmp(style->name, "style")) break;
3100 : }
3101 3 : if (style) {
3102 : char *cur;
3103 : s32 start=0;
3104 : char css_style[1024], css_val[1024];
3105 3 : memset(&styles[nb_styles], 0, sizeof(GF_StyleRecord));
3106 3 : m=0;
3107 9 : while ( (att=(GF_XMLAttribute *)gf_list_enum(style->attributes, &m))) {
3108 6 : if (!strcmp(att->name, "id")) styles[nb_styles].startCharOffset = atoi(att->value);
3109 : }
3110 3 : m=0;
3111 6 : while ( (ftable=(GF_XMLNode*)gf_list_enum(style->content, &m))) {
3112 3 : if (ftable->type) break;
3113 : }
3114 3 : cur = ftable ? ftable->name : NULL;
3115 18 : while (cur) {
3116 15 : start = gf_token_get_strip(cur, 0, "{:", " ", css_style, 1024);
3117 15 : if (start <0) break;
3118 15 : start = gf_token_get_strip(cur, start, ":}", " ", css_val, 1024);
3119 15 : if (start <0) break;
3120 15 : cur = strchr(cur+start, '{');
3121 :
3122 15 : if (!strcmp(css_style, "font-table")) {
3123 : u32 z;
3124 3 : styles[nb_styles].fontID = atoi(css_val);
3125 3 : for (z=0; z<td.font_count; z++) {
3126 3 : if (td.fonts[z].fontID == styles[nb_styles].fontID)
3127 : break;
3128 : }
3129 : }
3130 15 : else if (!strcmp(css_style, "font-size")) styles[nb_styles].font_size = atoi(css_val);
3131 9 : else if (!strcmp(css_style, "font-style") && !strcmp(css_val, "italic")) styles[nb_styles].style_flags |= GF_TXT_STYLE_ITALIC;
3132 9 : else if (!strcmp(css_style, "font-weight") && !strcmp(css_val, "bold")) styles[nb_styles].style_flags |= GF_TXT_STYLE_BOLD;
3133 8 : else if (!strcmp(css_style, "text-decoration") && !strcmp(css_val, "underline")) styles[nb_styles].style_flags |= GF_TXT_STYLE_UNDERLINED;
3134 8 : else if (!strcmp(css_style, "text-decoration") && !strcmp(css_val, "strikethrough")) styles[nb_styles].style_flags |= GF_TXT_STYLE_STRIKETHROUGH;
3135 8 : else if (!strcmp(css_style, "color")) styles[nb_styles].text_color = tx3g_get_color(css_val);
3136 : }
3137 3 : if (!nb_styles) td.default_style = styles[0];
3138 3 : nb_styles++;
3139 : }
3140 : }
3141 :
3142 : }
3143 3 : if ((td.default_pos.bottom==td.default_pos.top) || (td.default_pos.right==td.default_pos.left)) {
3144 0 : td.default_pos.top = ctx->txty;
3145 0 : td.default_pos.left = ctx->txtx;
3146 0 : td.default_pos.right = ctx->width;
3147 0 : td.default_pos.bottom = ctx->height;
3148 : }
3149 3 : if (!td.fonts) {
3150 0 : td.font_count = 1;
3151 0 : td.fonts = (GF_FontRecord*)gf_malloc(sizeof(GF_FontRecord));
3152 0 : td.fonts[0].fontID = 1;
3153 0 : td.fonts[0].fontName = gf_strdup( ctx->fontname ? ctx->fontname : "Serif");
3154 : }
3155 :
3156 3 : gf_odf_tx3g_write(&td, &dsi, &dsi_len);
3157 : stsd_idx = 0;
3158 4 : for (k=0; ctx->text_descs && k<gf_list_count(ctx->text_descs); k++) {
3159 2 : GF_PropertyValue *d = gf_list_get(ctx->text_descs, k);
3160 2 : if (d->value.data.size != dsi_len) continue;
3161 2 : if (! memcmp(d->value.data.ptr, dsi, dsi_len)) {
3162 1 : stsd_idx = k+1;
3163 1 : break;
3164 : }
3165 : }
3166 3 : if (stsd_idx) {
3167 1 : gf_free(dsi);
3168 : } else {
3169 : GF_PropertyValue *d;
3170 2 : GF_SAFEALLOC(d, GF_PropertyValue);
3171 3 : if (!d) return GF_OUT_OF_MEM;
3172 2 : d->type = GF_PROP_DATA;
3173 2 : d->value.data.ptr = dsi;
3174 2 : d->value.data.size = dsi_len;
3175 2 : if (!ctx->text_descs) ctx->text_descs = gf_list_new();
3176 2 : gf_list_add(ctx->text_descs, d);
3177 2 : stsd_idx = gf_list_count(ctx->text_descs);
3178 : }
3179 3 : if (stsd_idx != ctx->last_desc_idx) {
3180 2 : ctx->last_desc_idx = stsd_idx;
3181 2 : GF_PropertyValue *d = gf_list_get(ctx->text_descs, stsd_idx-1);
3182 2 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, d);
3183 : }
3184 :
3185 3 : for (k=0; k<td.font_count; k++) gf_free(td.fonts[k].fontName);
3186 3 : gf_free(td.fonts);
3187 :
3188 3 : if (probe_first_desc_only)
3189 : return GF_OK;
3190 : }
3191 2 : else if (!strcmp(desc->name, "sampleData")) {
3192 : GF_XMLNode *sub;
3193 : u16 start, end;
3194 : u32 styleID;
3195 : u32 nb_chars, txt_len, m;
3196 : nb_chars = 0;
3197 :
3198 2 : samp = gf_isom_new_text_sample();
3199 :
3200 2 : k=0;
3201 10 : while ((att=(GF_XMLAttribute *)gf_list_enum(desc->attributes, &k))) {
3202 6 : if (!strcmp(att->name, "targetEncoding") && !strcmp(att->value, "utf16")) ;//is_utf16 = 1;
3203 8 : else if (!strcmp(att->name, "scrollDelay")) gf_isom_text_set_scroll_delay(samp, atoi(att->value) );
3204 4 : else if (!strcmp(att->name, "highlightColor")) gf_isom_text_set_highlight_color(samp, tx3g_get_color(att->value));
3205 : }
3206 : start = end = 0;
3207 2 : k=0;
3208 20 : while ((sub=(GF_XMLNode*)gf_list_enum(desc->content, &k))) {
3209 16 : if (sub->type) continue;
3210 7 : if (!strcmp(sub->name, "text")) {
3211 : GF_XMLNode *text;
3212 : styleID = 0;
3213 2 : m=0;
3214 6 : while ((att=(GF_XMLAttribute *)gf_list_enum(sub->attributes, &m))) {
3215 4 : if (!strcmp(att->name, "styleID")) styleID = atoi(att->value);
3216 : }
3217 : txt_len = 0;
3218 :
3219 2 : m=0;
3220 26 : while ((text=(GF_XMLNode*)gf_list_enum(sub->content, &m))) {
3221 22 : if (!text->type) {
3222 10 : if (!strcmp(text->name, "marker")) {
3223 : u32 z;
3224 10 : memset(&marks[nb_marks], 0, sizeof(Marker));
3225 10 : marks[nb_marks].pos = nb_chars+txt_len;
3226 :
3227 10 : z = 0;
3228 30 : while ( (att=(GF_XMLAttribute *)gf_list_enum(text->attributes, &z))) {
3229 20 : if (!strcmp(att->name, "id")) marks[nb_marks].id = atoi(att->value);
3230 : }
3231 10 : nb_marks++;
3232 : }
3233 12 : } else if (text->type==GF_XML_TEXT_TYPE) {
3234 12 : txt_len += (u32) strlen(text->name);
3235 12 : gf_isom_text_add_text(samp, text->name, (u32) strlen(text->name));
3236 : }
3237 : }
3238 2 : if (styleID && (!same_style || (td.default_style.startCharOffset != styleID))) {
3239 2 : GF_StyleRecord st = td.default_style;
3240 2 : for (m=0; m<nb_styles; m++) {
3241 2 : if (styles[m].startCharOffset==styleID) {
3242 2 : st = styles[m];
3243 2 : break;
3244 : }
3245 : }
3246 2 : st.startCharOffset = nb_chars;
3247 2 : st.endCharOffset = nb_chars + txt_len;
3248 2 : gf_isom_text_add_style(samp, &st);
3249 : }
3250 2 : nb_chars += txt_len;
3251 : }
3252 5 : else if (!stricmp(sub->name, "highlight")) {
3253 1 : m=0;
3254 4 : while ((att=(GF_XMLAttribute *)gf_list_enum(sub->attributes, &m))) {
3255 3 : if (!strcmp(att->name, "startMarker")) GET_MARKER_POS(start, 0)
3256 2 : else if (!strcmp(att->name, "endMarker")) GET_MARKER_POS(end, 1)
3257 : }
3258 1 : gf_isom_text_add_highlight(samp, start, end);
3259 : }
3260 4 : else if (!stricmp(sub->name, "blink")) {
3261 1 : m=0;
3262 4 : while ((att=(GF_XMLAttribute *)gf_list_enum(sub->attributes, &m))) {
3263 3 : if (!strcmp(att->name, "startMarker")) GET_MARKER_POS(start, 0)
3264 2 : else if (!strcmp(att->name, "endMarker")) GET_MARKER_POS(end, 1)
3265 : }
3266 1 : gf_isom_text_add_blink(samp, start, end);
3267 : }
3268 3 : else if (!stricmp(sub->name, "link")) {
3269 : char *url, *url_tt;
3270 : url = url_tt = NULL;
3271 1 : m=0;
3272 6 : while ((att=(GF_XMLAttribute *)gf_list_enum(sub->attributes, &m))) {
3273 5 : if (!strcmp(att->name, "startMarker")) GET_MARKER_POS(start, 0)
3274 4 : else if (!strcmp(att->name, "endMarker")) GET_MARKER_POS(end, 1)
3275 2 : else if (!strcmp(att->name, "URL") || !strcmp(att->name, "href")) url = gf_strdup(att->value);
3276 1 : else if (!strcmp(att->name, "URLToolTip") || !strcmp(att->name, "altString")) url_tt = gf_strdup(att->value);
3277 : }
3278 1 : gf_isom_text_add_hyperlink(samp, url, url_tt, start, end);
3279 1 : if (url) gf_free(url);
3280 1 : if (url_tt) gf_free(url_tt);
3281 : }
3282 2 : else if (!stricmp(sub->name, "karaoke")) {
3283 : u32 time = 0;
3284 : GF_XMLNode *krok;
3285 1 : m=0;
3286 3 : while ((att=(GF_XMLAttribute *)gf_list_enum(sub->attributes, &m))) {
3287 2 : if (!strcmp(att->name, "startTime")) time = atoi(att->value);
3288 : }
3289 1 : gf_isom_text_add_karaoke(samp, time);
3290 1 : m=0;
3291 7 : while ((krok=(GF_XMLNode*)gf_list_enum(sub->content, &m))) {
3292 5 : u32 u=0;
3293 8 : if (krok->type) continue;
3294 2 : if (strcmp(krok->name, "run")) continue;
3295 : start = end = 0;
3296 8 : while ((att=(GF_XMLAttribute *)gf_list_enum(krok->attributes, &u))) {
3297 8 : if (!strcmp(att->name, "startMarker")) GET_MARKER_POS(start, 0)
3298 6 : else if (!strcmp(att->name, "endMarker")) GET_MARKER_POS(end, 1)
3299 4 : else if (!strcmp(att->name, "duration")) time += atoi(att->value);
3300 : }
3301 2 : gf_isom_text_set_karaoke_segment(samp, time, start, end);
3302 : }
3303 : }
3304 : }
3305 : }
3306 : }
3307 : /*OK, let's add the sample*/
3308 2 : if (samp) {
3309 2 : if (!same_box) gf_isom_text_set_box(samp, td.default_pos.top, td.default_pos.left, td.default_pos.bottom, td.default_pos.right);
3310 : // if (!same_style) gf_isom_text_add_style(samp, &td.default_style);
3311 :
3312 2 : txtin_process_send_text_sample(ctx, samp, (ctx->start*ctx->timescale)/ctx->txml_timescale, (u32) (duration*ctx->timescale)/ctx->txml_timescale, isRAP);
3313 2 : ctx->start += duration;
3314 2 : gf_isom_delete_text_sample(samp);
3315 :
3316 : }
3317 2 : if (gf_filter_pid_would_block(ctx->opid)) {
3318 2 : ctx->cur_child_idx++;
3319 2 : return GF_OK;
3320 : }
3321 : }
3322 :
3323 : return GF_EOS;
3324 : }
3325 :
3326 :
3327 17604 : static GF_Err txtin_process(GF_Filter *filter)
3328 : {
3329 17604 : GF_TXTIn *ctx = gf_filter_get_udta(filter);
3330 : GF_FilterPacket *pck;
3331 : GF_Err e;
3332 : Bool start, end;
3333 17604 : pck = gf_filter_pid_get_packet(ctx->ipid);
3334 17604 : if (!pck) {
3335 : return GF_OK;
3336 : }
3337 17583 : gf_filter_pck_get_framing(pck, &start, &end);
3338 17583 : if (!end) {
3339 302 : gf_filter_pid_drop_packet(ctx->ipid);
3340 302 : return GF_OK;
3341 : }
3342 : //file is loaded
3343 :
3344 17281 : e = ctx->text_process(filter, ctx);
3345 :
3346 :
3347 17281 : if (e==GF_EOS) {
3348 : //keep input alive until end of stream, so that we keep getting called
3349 112 : gf_filter_pid_drop_packet(ctx->ipid);
3350 112 : if (gf_filter_pid_is_eos(ctx->ipid))
3351 112 : gf_filter_pid_set_eos(ctx->opid);
3352 : }
3353 : return e;
3354 : }
3355 :
3356 112 : static void ttxtin_reset(GF_TXTIn *ctx)
3357 : {
3358 112 : if (ctx->samp) gf_isom_delete_text_sample(ctx->samp);
3359 112 : ctx->samp = NULL;
3360 112 : if (ctx->src) gf_fclose(ctx->src);
3361 112 : ctx->src = NULL;
3362 112 : if (ctx->vttparser) gf_webvtt_parser_del(ctx->vttparser);
3363 112 : ctx->vttparser = NULL;
3364 112 : if (ctx->parser) gf_xml_dom_del(ctx->parser);
3365 112 : ctx->parser = NULL;
3366 112 : if (ctx->parser_working_copy) gf_xml_dom_del(ctx->parser_working_copy);
3367 112 : ctx->parser_working_copy = NULL;
3368 112 : }
3369 :
3370 112 : static GF_Err txtin_configure_pid(GF_Filter *filter, GF_FilterPid *pid, Bool is_remove)
3371 : {
3372 : GF_Err e;
3373 : const char *src = NULL;
3374 112 : GF_TXTIn *ctx = gf_filter_get_udta(filter);
3375 : const GF_PropertyValue *prop;
3376 :
3377 112 : if (is_remove) {
3378 0 : ctx->ipid = NULL;
3379 0 : return GF_OK;
3380 : }
3381 :
3382 112 : if (! gf_filter_pid_check_caps(pid))
3383 : return GF_NOT_SUPPORTED;
3384 :
3385 : //we must have a file path
3386 112 : prop = gf_filter_pid_get_property(pid, GF_PROP_PID_FILEPATH);
3387 112 : if (prop && prop->value.string) src = prop->value.string;
3388 : if (!src)
3389 : return GF_NOT_SUPPORTED;
3390 :
3391 112 : if (!ctx->ipid) {
3392 : GF_FilterEvent fevt;
3393 111 : ctx->ipid = pid;
3394 :
3395 : //we work with full file only, send a play event on source to indicate that
3396 111 : GF_FEVT_INIT(fevt, GF_FEVT_PLAY, pid);
3397 : fevt.play.start_range = 0;
3398 111 : fevt.base.on_pid = ctx->ipid;
3399 111 : fevt.play.full_file_only = GF_TRUE;
3400 111 : gf_filter_pid_send_event(ctx->ipid, &fevt);
3401 111 : ctx->file_name = gf_strdup(src);
3402 : } else {
3403 1 : if (pid != ctx->ipid) {
3404 : return GF_REQUIRES_NEW_INSTANCE;
3405 : }
3406 1 : if (!strcmp(ctx->file_name, src)) return GF_OK;
3407 :
3408 1 : ttxtin_reset(ctx);
3409 1 : ctx->is_setup = GF_FALSE;
3410 1 : gf_free(ctx->file_name);
3411 1 : ctx->file_name = gf_strdup(src);
3412 : }
3413 : //guess type
3414 112 : e = gf_text_guess_format(ctx->file_name, &ctx->fmt);
3415 112 : if (e) return e;
3416 112 : if (!ctx->fmt) {
3417 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTLoad] Unknown text format for %s\n", ctx->file_name));
3418 : return GF_NOT_SUPPORTED;
3419 : }
3420 :
3421 112 : if (ctx->webvtt && (ctx->fmt == GF_TXTIN_MODE_SRT))
3422 2 : ctx->fmt = GF_TXTIN_MODE_WEBVTT;
3423 :
3424 112 : switch (ctx->fmt) {
3425 35 : case GF_TXTIN_MODE_SRT:
3426 35 : ctx->text_process = txtin_process_srt;
3427 35 : break;
3428 : #ifndef GPAC_DISABLE_VTT
3429 46 : case GF_TXTIN_MODE_WEBVTT:
3430 46 : ctx->text_process = txtin_process_webvtt;
3431 46 : break;
3432 : #endif
3433 4 : case GF_TXTIN_MODE_TTXT:
3434 4 : ctx->text_process = txtin_process_ttxt;
3435 4 : break;
3436 1 : case GF_TXTIN_MODE_TEXML:
3437 1 : ctx->text_process = txtin_process_texml;
3438 1 : break;
3439 3 : case GF_TXTIN_MODE_SUB:
3440 3 : ctx->text_process = gf_text_process_sub;
3441 3 : break;
3442 18 : case GF_TXTIN_MODE_TTML:
3443 18 : ctx->text_process = gf_text_process_ttml;
3444 18 : break;
3445 : #ifndef GPAC_DISABLE_SWF_IMPORT
3446 5 : case GF_TXTIN_MODE_SWF_SVG:
3447 5 : ctx->text_process = gf_text_process_swf;
3448 5 : break;
3449 : #endif
3450 : default:
3451 : return GF_BAD_PARAM;
3452 : }
3453 :
3454 : return GF_OK;
3455 : }
3456 :
3457 17718 : static Bool txtin_process_event(GF_Filter *filter, const GF_FilterEvent *evt)
3458 : {
3459 17718 : GF_TXTIn *ctx = gf_filter_get_udta(filter);
3460 17718 : switch (evt->base.type) {
3461 112 : case GF_FEVT_PLAY:
3462 112 : if (ctx->playstate==1) return GF_TRUE;
3463 112 : ctx->playstate = 1;
3464 112 : if ((ctx->start_range < 0.1) && (evt->play.start_range<0.1)) return GF_TRUE;
3465 0 : ctx->start_range = evt->play.start_range;
3466 0 : ctx->seek_state = 1;
3467 : //cancel play event, we work with full file
3468 0 : return GF_TRUE;
3469 :
3470 19 : case GF_FEVT_STOP:
3471 19 : ctx->playstate = 2;
3472 : //cancel play event, we work with full file
3473 19 : return GF_TRUE;
3474 : default:
3475 : return GF_FALSE;
3476 : }
3477 : return GF_FALSE;
3478 : }
3479 :
3480 111 : GF_Err txtin_initialize(GF_Filter *filter)
3481 : {
3482 : char data[1];
3483 111 : GF_TXTIn *ctx = gf_filter_get_udta(filter);
3484 111 : ctx->bs_w = gf_bs_new(data, 1, GF_BITSTREAM_WRITE);
3485 :
3486 111 : return GF_OK;
3487 : }
3488 :
3489 111 : void txtin_finalize(GF_Filter *filter)
3490 : {
3491 111 : GF_TXTIn *ctx = gf_filter_get_udta(filter);
3492 :
3493 111 : ttxtin_reset(ctx);
3494 111 : if (ctx->bs_w) gf_bs_del(ctx->bs_w);
3495 :
3496 111 : if (ctx->text_descs) {
3497 11 : while (gf_list_count(ctx->text_descs)) {
3498 6 : GF_PropertyValue *p = gf_list_pop_back(ctx->text_descs);
3499 6 : gf_free(p->value.data.ptr);
3500 6 : gf_free(p);
3501 : }
3502 5 : gf_list_del(ctx->text_descs);
3503 : }
3504 : #ifndef GPAC_DISABLE_SWF_IMPORT
3505 111 : gf_swf_reader_del(ctx->swf_parse);
3506 : #endif
3507 :
3508 111 : if (ctx->intervals) {
3509 18 : ttml_reset_intervals(ctx);
3510 18 : gf_list_del(ctx->intervals);
3511 : }
3512 111 : if (ctx->ttml_resources) {
3513 2 : while (gf_list_count(ctx->ttml_resources)) {
3514 1 : TTMLRes *ires = gf_list_pop_back(ctx->ttml_resources);
3515 1 : gf_free(ires->data);
3516 1 : gf_free(ires);
3517 : }
3518 1 : gf_list_del(ctx->ttml_resources);
3519 : }
3520 111 : if (ctx->div_nodes_list)
3521 18 : gf_list_del(ctx->div_nodes_list);
3522 :
3523 111 : if (ctx->file_name) gf_free(ctx->file_name);
3524 111 : }
3525 :
3526 :
3527 3065 : static const char *txtin_probe_data(const u8 *data, u32 data_size, GF_FilterProbeScore *score)
3528 : {
3529 3065 : char *dst = NULL;
3530 : u8 *res;
3531 :
3532 3065 : res = gf_utf_get_utf8_string_from_bom((char *)data, data_size, &dst);
3533 3065 : if (res) data = res;
3534 :
3535 : #define PROBE_OK(_score, _mime) \
3536 : *score = _score;\
3537 : if (dst) gf_free(dst);\
3538 : return _mime; \
3539 :
3540 :
3541 3065 : if (!strncmp(data, "WEBVTT", 6)) {
3542 26 : PROBE_OK(GF_FPROBE_SUPPORTED, "subtitle/vtt")
3543 : }
3544 3039 : if (strstr(data, " --> ")) {
3545 61 : PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "subtitle/srt")
3546 : }
3547 2978 : if (!strncmp(data, "FWS", 3) || !strncmp(data, "CWS", 3)) {
3548 5 : PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "application/x-shockwave-flash")
3549 : }
3550 :
3551 2973 : if ((data[0]=='{') && strstr(data, "}{")) {
3552 3 : PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "subtitle/sub")
3553 :
3554 : }
3555 : /*XML formats*/
3556 2970 : if (!strstr(data, "?>") ) {
3557 2738 : if (dst) gf_free(dst);
3558 : return NULL;
3559 : }
3560 :
3561 232 : if (strstr(data, "<x-quicktime-tx3g") || strstr(data, "<text3GTrack")) {
3562 1 : PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "quicktime/text")
3563 : }
3564 231 : if (strstr(data, "TextStream")) {
3565 4 : PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "subtitle/ttxt")
3566 : }
3567 227 : if (strstr(data, "<tt ") || strstr(data, ":tt ")) {
3568 18 : PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "subtitle/ttml")
3569 : }
3570 :
3571 209 : if (dst) gf_free(dst);
3572 : return NULL;
3573 : }
3574 :
3575 : static const GF_FilterCapability TXTInCaps[] =
3576 : {
3577 : CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_STREAM_TYPE, GF_STREAM_FILE),
3578 : CAP_STRING(GF_CAPS_INPUT, GF_PROP_PID_FILE_EXT, "srt|ttxt|sub|vtt|txml|ttml|swf"),
3579 : CAP_STRING(GF_CAPS_INPUT, GF_PROP_PID_MIME, "x-subtitle/srt|subtitle/srt|text/srt|x-subtitle/sub|subtitle/sub|text/sub|x-subtitle/ttxt|subtitle/ttxt|text/ttxt|x-subtitle/vtt|subtitle/vtt|text/vtt|x-quicktime/text|quicktime/text|subtitle/ttml|text/ttml|application/xml+ttml|application/x-shockwave-flash"),
3580 : CAP_UINT(GF_CAPS_OUTPUT, GF_PROP_PID_STREAM_TYPE, GF_STREAM_TEXT),
3581 : CAP_UINT(GF_CAPS_OUTPUT, GF_PROP_PID_CODECID, GF_CODECID_TX3G),
3582 : CAP_UINT(GF_CAPS_OUTPUT, GF_PROP_PID_CODECID, GF_CODECID_SIMPLE_TEXT),
3583 : CAP_UINT(GF_CAPS_OUTPUT, GF_PROP_PID_CODECID, GF_CODECID_WEBVTT),
3584 : CAP_UINT(GF_CAPS_OUTPUT, GF_PROP_PID_CODECID, GF_CODECID_SUBS_XML),
3585 : };
3586 :
3587 : #define OFFS(_n) #_n, offsetof(GF_TXTIn, _n)
3588 :
3589 : static const GF_FilterArgs TXTInArgs[] =
3590 : {
3591 : { OFFS(webvtt), "force WebVTT import of SRT files", GF_PROP_BOOL, "false", NULL, GF_FS_ARG_HINT_ADVANCED},
3592 : { OFFS(nodefbox), "skip default text box", GF_PROP_BOOL, "false", NULL, GF_FS_ARG_HINT_ADVANCED},
3593 : { OFFS(noflush), "skip final sample flush for srt", GF_PROP_BOOL, "false", NULL, GF_FS_ARG_HINT_ADVANCED},
3594 : { OFFS(fontname), "default font to use", GF_PROP_STRING, NULL, NULL, 0},
3595 : { OFFS(fontsize), "default font size", GF_PROP_UINT, "18", NULL, 0},
3596 : { OFFS(lang), "default language to use", GF_PROP_STRING, NULL, NULL, 0},
3597 : { OFFS(width), "default width of text area, set to 0 to resolve against visual PIDs", GF_PROP_UINT, "0", NULL, 0},
3598 : { OFFS(height), "default height of text area, set to 0 to resolve against visual PIDs", GF_PROP_UINT, "0", NULL, 0},
3599 : { OFFS(txtx), "default horizontal offset of text area: -1 (left), 0 (center) or 1 (right)", GF_PROP_UINT, "0", NULL, 0},
3600 : { OFFS(txty), "default vertical offset of text area: -1 (bottom), 0 (center) or 1 (top)", GF_PROP_UINT, "0", NULL, 0},
3601 : { OFFS(zorder), "default z-order of the PID", GF_PROP_SINT, "0", NULL, GF_FS_ARG_HINT_ADVANCED},
3602 : { OFFS(timescale), "default timescale of the PID", GF_PROP_UINT, "1000", NULL, GF_FS_ARG_HINT_ADVANCED},
3603 : { OFFS(ttml_dur), "force single sample mode - see filter help", GF_PROP_SINT, "-1", NULL, GF_FS_ARG_HINT_ADVANCED},
3604 : { OFFS(ttml_embed), "force embedding TTML resources - see filter help", GF_PROP_BOOL, "false", NULL, GF_FS_ARG_HINT_ADVANCED},
3605 : { OFFS(ttml_zero), "set subtitle zero time for TTML - see filter help", GF_PROP_STRING, NULL, NULL, GF_FS_ARG_HINT_ADVANCED},
3606 : {0}
3607 : };
3608 :
3609 : GF_FilterRegister TXTInRegister = {
3610 : .name = "txtin",
3611 : GF_FS_SET_DESCRIPTION("Subtitle loader")
3612 : GF_FS_SET_HELP("This filter reads subtitle data from input file PID to produce subtitle frames on a single PID.\n"
3613 : "The filter supports the following formats:\n"
3614 : "- SRT: https://en.wikipedia.org/wiki/SubRip\n"
3615 : "- WebVTT: https://www.w3.org/TR/webvtt1/\n"
3616 : "- TTXT: https://wiki.gpac.io/TTXT-Format-Documentation\n"
3617 : "- QT 3GPP Text XML (TexML): Apple QT6, likely deprecated\n"
3618 : "- TTML: https://www.w3.org/TR/ttml2/\n"
3619 : "- SUB: one subtitle per line formatted as `{start_frame}{end_frame}text`\n"
3620 : "\n"
3621 : "Input files must be in UTF-8 or UTF-16 format, with or without BOM.\n"
3622 : "The internal frame format is: \n"
3623 : "- WebVTT (and srt if desired): ISO/IEC 14496-30 VTT cues\n"
3624 : "- TTML: ISO/IEC 14496-30 XML subtitles\n"
3625 : "- Others: 3GPP/QT Timed Text\n"
3626 : "\n"
3627 : "# TTML Support\n"
3628 : "The [-ttml_dur]() option controls how TTML are split into packets:\n"
3629 : "- if negative (default), TTML document is split in independent time segments by inspecting all overlapping subtitles in the body\n"
3630 : "- if 0, the input document is not split, forwarded as a single frame with `CTS` matching the first active time in document and a duration equal to the document duration\n"
3631 : "- if >0, the input document is not split, forwarded as a single frame with `CTS=0` and the specified duration in `timescale` units.\n"
3632 : "\n"
3633 : "By default, media resources are kept as declared in TTML2 documents.\n"
3634 : "[-ttml_embed]() can be used to embed inside the TTML sample the resources in `<head>` or `<body>`.\n"
3635 : "- for `<source>`, `<image>`, `<audio>`, `<font>`, local URIs indicated in `src` will be loaded and `src` rewritten.\n"
3636 : "- for `<data>` with base64 coding, the data will be decoded, `<data>` element removed and parent <source> rewritten with `src` attribute inserted.\n"
3637 : "\nThe embedded data is added as a subsample to the TTML frame, and the referring elements will use `src=urn:mpeg:14496-30:N` with `N` the index of the subsample.\n"
3638 : "\n"
3639 : "A `subtitle zero` may be specified using [-ttml_zero](). This will remove all subtitles before the given time `T0`, and rewrite each subtitle begin/end `T` to `T-T0` using millisecond accuracy.\n"
3640 : "Warning: Original time formatting (tick, frames/subframe ...) will be lost when this option is used, converted to `HH:MM:SS.ms`.\n"
3641 : "\n"
3642 : "The subtitle zero time **must** be prefixed with `T` when the option is not set as a global argument:\n"
3643 : "EX gpac -i test.ttml:ttml_zero=T10:00:00 [...]\n"
3644 : "EX MP4Box -add test.ttml:sopt:ttml_zero=T10:00:00 [...]\n"
3645 : "EX gpac -i test.ttml --ttml_zero=10:00:00 [...]\n"
3646 : "EX MP4Box -add test.ttml --ttml_zero=10:00:00 [...]\n"
3647 :
3648 :
3649 :
3650 : )
3651 :
3652 : .private_size = sizeof(GF_TXTIn),
3653 : .flags = GF_FS_REG_MAIN_THREAD,
3654 : .args = TXTInArgs,
3655 : SETCAPS(TXTInCaps),
3656 : .process = txtin_process,
3657 : .configure_pid = txtin_configure_pid,
3658 : .process_event = txtin_process_event,
3659 : .probe_data = txtin_probe_data,
3660 : .initialize = txtin_initialize,
3661 : .finalize = txtin_finalize
3662 : };
3663 :
3664 :
3665 2877 : const GF_FilterRegister *txtin_register(GF_FilterSession *session)
3666 : {
3667 2877 : return &TXTInRegister;
3668 : }
3669 :
3670 :
3671 : #else
3672 : const GF_FilterRegister *txtin_register(GF_FilterSession *session)
3673 : {
3674 : return NULL;
3675 : }
3676 : #endif // GPAC_DISABLE_ISOM_WRITE
3677 :
|