Line data Source code
1 : /*
2 : * GPAC - Multimedia Framework C SDK
3 : *
4 : * Authors: Jean Le Feuvre
5 : * Copyright (c) Telecom ParisTech 2000-2012
6 : * All rights reserved
7 : *
8 : * This file is part of GPAC / common tools sub-project
9 : *
10 : * GPAC is free software; you can redistribute it and/or modify
11 : * it under the terms of the GNU Lesser General Public License as published by
12 : * the Free Software Foundation; either version 2, or (at your option)
13 : * any later version.
14 : *
15 : * GPAC is distributed in the hope that it will be useful,
16 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 : * GNU Lesser General Public License for more details.
19 : *
20 : * You should have received a copy of the GNU Lesser General Public
21 : * License along with this library; see the file COPYING. If not, write to
22 : * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 : *
24 : */
25 :
26 : #include <gpac/network.h>
27 :
28 : /* the length of the URL separator ("://" || "|//") */
29 : #define URL_SEP_LENGTH 3
30 :
31 : /* our supported protocol types */
32 : enum
33 : {
34 : /*absolute path to file*/
35 : GF_URL_TYPE_FILE_PATH = 0,
36 :
37 : /*absolute file:// URI */
38 : GF_URL_TYPE_FILE_URI,
39 :
40 : /*relative path or URL*/
41 : GF_URL_TYPE_RELATIVE ,
42 :
43 : /*any other absolute URI*/
44 : GF_URL_TYPE_ANY_URI,
45 :
46 : /*invalid input */
47 : GF_URL_TYPE_INVALID,
48 : };
49 :
50 : /*resolve the protocol type, for a std URL: http:// or ftp:// ...*/
51 20761 : static u32 URL_GetProtocolType(const char *pathName)
52 : {
53 : char *begin;
54 20761 : if (!pathName) return GF_URL_TYPE_INVALID;
55 :
56 : /* URL with the data scheme are not relative to avoid concatenation */
57 20730 : if (!strnicmp(pathName, "data:", 5)) return GF_URL_TYPE_ANY_URI;
58 :
59 :
60 : //conditions for a file path to be absolute:
61 : // - on posix: absolute iif starts with '/'
62 : // - on windows: absolute if
63 : // * starts with \ or / (current drive)
64 : // * OR starts with <LETTER>: and then \ or /
65 : // * OR starts with \\host\share\<path> [NOT HANDLED HERE]
66 : #ifndef WIN32
67 20730 : if (pathName[0] == '/')
68 : #else
69 : if ( (pathName[0] == '/') || (pathName[0] == '\\')
70 : || ( strlen(pathName)>2 && pathName[1]==':'
71 : && ((pathName[2] == '/') || (pathName[2] == '\\'))
72 : )
73 : )
74 : #endif
75 : return GF_URL_TYPE_FILE_PATH;
76 :
77 :
78 20629 : begin = strstr(pathName, "://");
79 20629 : if (!begin)
80 : return GF_URL_TYPE_RELATIVE;
81 :
82 554 : else if (!strnicmp(pathName, "file://", 7))
83 2 : return (strlen(pathName)>7 ? GF_URL_TYPE_FILE_URI : GF_URL_TYPE_INVALID);
84 :
85 : return GF_URL_TYPE_ANY_URI;
86 : }
87 :
88 : /*gets protocol type*/
89 : GF_EXPORT
90 11146 : Bool gf_url_is_local(const char *pathName)
91 : {
92 11146 : u32 mode = URL_GetProtocolType(pathName);
93 11146 : return (mode!=GF_URL_TYPE_INVALID && mode!=GF_URL_TYPE_ANY_URI) ? GF_TRUE : GF_FALSE;
94 : }
95 :
96 : GF_EXPORT
97 43 : char *gf_url_get_absolute_path(const char *pathName, const char *parentPath)
98 : {
99 : char* sep;
100 : u32 parent_type;
101 : char* res = NULL;
102 :
103 43 : u32 prot_type = URL_GetProtocolType(pathName);
104 :
105 43 : switch (prot_type) {
106 :
107 : // if it's already absolute, do nothing
108 0 : case GF_URL_TYPE_FILE_PATH:
109 : case GF_URL_TYPE_ANY_URI:
110 0 : res = gf_strdup(pathName);
111 0 : break;
112 :
113 : // if file URI, remove the scheme part
114 0 : case GF_URL_TYPE_FILE_URI:
115 :
116 0 : pathName += 6; // keep a slash in case it's forgotten
117 :
118 : /* Windows file URIs SHOULD BE in the form "file:///C:\..."
119 : * Unix file URIs SHOULD BE in the form "file:///home..."
120 : * anything before the 3rd '/' is a hostname
121 : */
122 0 : sep = strchr(pathName+1, '/');
123 0 : if (sep) {
124 : pathName = sep;
125 :
126 : // dirty way to say if windows
127 : // consume the third / in that case
128 0 : if (strlen(pathName) > 2 && pathName[2]==':')
129 0 : pathName++;
130 : }
131 0 : res = gf_strdup(pathName);
132 0 : break;
133 :
134 : // if it's relative, it depends on the parent
135 42 : case GF_URL_TYPE_RELATIVE:
136 42 : parent_type = URL_GetProtocolType(parentPath);
137 :
138 : // in this case the parent is of no help to find an absolute path so we do nothing
139 42 : if (parent_type == GF_URL_TYPE_RELATIVE || parent_type == GF_URL_TYPE_INVALID )
140 42 : res = gf_strdup(pathName);
141 : else
142 0 : res = gf_url_concatenate(parentPath, pathName);
143 :
144 : break;
145 :
146 : }
147 :
148 43 : return res;
149 :
150 : }
151 :
152 :
153 16613 : static char *gf_url_concatenate_ex(const char *parentName, const char *pathName, Bool relative_to_parent)
154 : {
155 : u32 pathSepCount, i, prot_type;
156 : Bool had_sep_count = GF_FALSE;
157 : char *outPath, *name, *rad, *tmp2;
158 : char tmp[GF_MAX_PATH];
159 :
160 16613 : if (!pathName && !parentName) return NULL;
161 16613 : if (!pathName) return gf_strdup(parentName);
162 16613 : if (!parentName || !strlen(parentName)) return gf_strdup(pathName);
163 :
164 9533 : if (!strncmp(pathName, "data:", 5)) return gf_strdup(pathName);
165 9533 : if (!strncmp(parentName, "gmem://", 7)) return NULL;
166 9533 : if (!strncmp(parentName, "gfio://", 7)) {
167 : GF_Err e;
168 3 : GF_FileIO *gfio = gf_fileio_from_url(parentName);
169 3 : GF_FileIO *gfio_new = gf_fileio_open_url(gfio, pathName, "url", &e);
170 3 : if (!gfio_new)
171 : return NULL;
172 3 : return gf_strdup( gf_fileio_url(gfio_new) );
173 : }
174 9530 : if ((strlen(parentName) > GF_MAX_PATH) || (strlen(pathName) > GF_MAX_PATH)) {
175 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CORE, ("URL too long for concatenation: \n%s\n", pathName));
176 : return NULL;
177 : }
178 :
179 9530 : while (!strncmp(parentName, "./.", 3) || !strncmp(parentName, ".\\.", 3)) {
180 0 : parentName += 2;
181 : }
182 9583 : while (!strncmp(pathName, "./.", 3) || !strncmp(pathName, ".\\.", 3)) {
183 53 : pathName += 2;
184 : }
185 :
186 9530 : prot_type = URL_GetProtocolType(pathName);
187 9530 : if (prot_type != GF_URL_TYPE_RELATIVE) {
188 : char *sep = NULL;
189 328 : if (pathName[0]=='/') sep = strstr(parentName, "://");
190 30 : if (sep) sep = strchr(sep+3, '/');
191 328 : if (sep) {
192 : u32 len;
193 30 : sep[0] = 0;
194 30 : len = (u32) strlen(parentName);
195 30 : outPath = (char*)gf_malloc(sizeof(char)*(len+1+strlen(pathName)));
196 : strcpy(outPath, parentName);
197 : strcat(outPath, pathName);
198 30 : sep[0] = '/';
199 : } else {
200 298 : outPath = gf_strdup(pathName);
201 : }
202 : goto check_spaces;
203 : }
204 :
205 : /*old upnp addressing a la Platinum*/
206 9202 : rad = strstr(parentName, "%3fpath=");
207 9202 : if (!rad) rad = strstr(parentName, "%3Fpath=");
208 9202 : if (!rad) rad = strstr(parentName, "?path=");
209 9202 : if (rad) {
210 : char *the_path;
211 0 : rad = strchr(rad, '=');
212 0 : rad[0] = 0;
213 0 : the_path = gf_strdup(rad+1);
214 : i=0;
215 : while (1) {
216 0 : if (the_path[i]==0) break;
217 0 : if (!strnicmp(the_path+i, "%5c", 3) || !strnicmp(the_path+i, "%2f", 3) ) {
218 0 : the_path[i] = '/';
219 0 : memmove(the_path+i+1, the_path+i+3, strlen(the_path+i+3)+1);
220 : }
221 0 : else if (!strnicmp(the_path+i, "%05c", 4) || !strnicmp(the_path+i, "%02f", 4) ) {
222 0 : the_path[i] = '/';
223 0 : memmove(the_path+i+1, the_path+i+4, strlen(the_path+i+4)+1);
224 : }
225 0 : i++;
226 : }
227 0 : name = gf_url_concatenate(the_path, pathName);
228 0 : outPath = (char*)gf_malloc(strlen(parentName) + strlen(name) + 2);
229 : sprintf(outPath, "%s=%s", parentName, name);
230 0 : rad[0] = '=';
231 0 : gf_free(name);
232 0 : gf_free(the_path);
233 0 : return outPath;
234 : }
235 :
236 : /*rewrite path to use / not % encoding*/
237 9202 : rad = strchr(parentName, '%');
238 9202 : if (rad && (!strnicmp(rad, "%5c", 3) || !strnicmp(rad, "%05c", 4) || !strnicmp(rad, "%2f", 3) || !strnicmp(rad, "%02f", 4))) {
239 0 : char *the_path = gf_strdup(parentName);
240 : i=0;
241 : while (1) {
242 0 : if (the_path[i]==0) break;
243 0 : if (!strnicmp(the_path+i, "%5c", 3) || !strnicmp(the_path+i, "%2f", 3) ) {
244 0 : the_path[i] = '/';
245 0 : memmove(the_path+i+1, the_path+i+3, strlen(the_path+i+3)+1);
246 : }
247 0 : else if (!strnicmp(the_path+i, "%05c", 4) || !strnicmp(the_path+i, "%02f", 4) ) {
248 0 : the_path[i] = '/';
249 0 : memmove(the_path+i+1, the_path+i+4, strlen(the_path+i+4)+1);
250 : }
251 0 : i++;
252 : }
253 0 : name = gf_url_concatenate(the_path, pathName);
254 0 : gf_free(the_path);
255 0 : return name;
256 : }
257 :
258 :
259 : pathSepCount = 0;
260 : name = NULL;
261 9202 : if (pathName[0] == '.') {
262 371 : if (!strcmp(pathName, "..")) {
263 : pathSepCount = 1;
264 : name = "";
265 : }
266 371 : if (!strcmp(pathName, "./")) {
267 : pathSepCount = 0;
268 : name = "";
269 : }
270 436 : for (i = 0; i< strlen(pathName) - 2; i++) {
271 : /*current dir*/
272 807 : if ( (pathName[i] == '.')
273 436 : && ( (pathName[i+1] == GF_PATH_SEPARATOR) || (pathName[i+1] == '/') ) ) {
274 : i++;
275 47 : continue;
276 : }
277 : /*parent dir*/
278 760 : if ( (pathName[i] == '.') && (pathName[i+1] == '.')
279 389 : && ( (pathName[i+2] == GF_PATH_SEPARATOR) || (pathName[i+2] == '/') )
280 : ) {
281 389 : pathSepCount ++;
282 : i+=2;
283 389 : name = (char *) &pathName[i+1];
284 : } else {
285 : name = (char *) &pathName[i];
286 : break;
287 : }
288 : }
289 : }
290 371 : if (!name) name = (char *) pathName;
291 :
292 : strcpy(tmp, parentName);
293 9202 : while (strchr(" \r\n\t", tmp[strlen(tmp)-1])) {
294 0 : tmp[strlen(tmp)-1] = 0;
295 : }
296 : //strip query part or fragment part
297 9202 : rad = strchr(tmp, '?');
298 9202 : if (rad) rad[0] = 0;
299 9202 : tmp2 = strrchr(tmp, '/');
300 9202 : if (!tmp2) tmp2 = strrchr(tmp, '\\');
301 9202 : if (!tmp2) tmp2 = tmp;
302 9202 : rad = strchr(tmp2, '#');
303 9202 : if (rad) rad[0] = 0;
304 :
305 9202 : if (pathSepCount)
306 : had_sep_count = GF_TRUE;
307 : /*remove the last /*/
308 118678 : for (i = (u32) strlen(tmp); i > 0; i--) {
309 : //break our path at each separator
310 109235 : if ((tmp[i-1] == GF_PATH_SEPARATOR) || (tmp[i-1] == '/')) {
311 9350 : tmp[i-1] = 0;
312 9350 : if (!pathSepCount) break;
313 389 : pathSepCount--;
314 : }
315 : }
316 : //if i==0, the parent path was relative, just return the pathName
317 9202 : if (!i) {
318 241 : tmp[i] = 0;
319 482 : while (pathSepCount) {
320 : strcat(tmp, "../");
321 0 : pathSepCount--;
322 : }
323 : }
324 : //path is relative to current dir
325 8961 : else if (!relative_to_parent && (pathName[0]=='.') && ((pathName[1]=='/') || (pathName[1]=='\\') ) ) {
326 : strcat(tmp, "/");
327 : }
328 : //parent is relative to current dir
329 8943 : else if (!had_sep_count && (pathName[0]=='.') && (tmp[0]=='.') && ((tmp[1]=='/') || (tmp[1]=='\\') ) ) {
330 : u32 nb_path_sep=0;
331 1 : u32 len = (u32) strlen(tmp);
332 15 : for (i=0; i<len; i++) {
333 14 : if ((tmp[i]=='/') || (tmp[i]=='\\') )
334 2 : nb_path_sep++;
335 : }
336 : strcpy(tmp, "");
337 3 : while (nb_path_sep--)
338 : strcat(tmp, "../");
339 : } else {
340 : strcat(tmp, "/");
341 : }
342 :
343 9202 : i = (u32) strlen(tmp);
344 9202 : outPath = (char *) gf_malloc(i + strlen(name) + 1);
345 : sprintf(outPath, "%s%s", tmp, name);
346 :
347 : /*cleanup paths sep for win32*/
348 509437 : for (i = 0; i<strlen(outPath); i++)
349 500235 : if (outPath[i]=='\\') outPath[i] = '/';
350 :
351 9202 : check_spaces:
352 : i=0;
353 537754 : while (outPath[i]) {
354 518694 : if (outPath[i] == '?') break;
355 :
356 518694 : if (outPath[i] != '%') {
357 518694 : i++;
358 518694 : continue;
359 : }
360 0 : if (!strnicmp(outPath+i, "%3f", 3)) break;
361 0 : if (!strnicmp(outPath+i, "%20", 3)) {
362 0 : outPath[i]=' ';
363 0 : memmove(outPath + i+1, outPath+i+3, strlen(outPath+i)-2);
364 : }
365 0 : i++;
366 : }
367 : return outPath;
368 : }
369 : GF_EXPORT
370 16589 : char *gf_url_concatenate(const char *parentName, const char *pathName)
371 : {
372 16589 : return gf_url_concatenate_ex(parentName, pathName, GF_FALSE);
373 : }
374 : GF_EXPORT
375 24 : char *gf_url_concatenate_parent(const char *parentName, const char *pathName)
376 : {
377 24 : return gf_url_concatenate_ex(parentName, pathName, GF_TRUE);
378 : }
379 :
380 : GF_EXPORT
381 2789 : void gf_url_to_fs_path(char *sURL)
382 : {
383 2789 : if (!strnicmp(sURL, "file://", 7)) {
384 : /*file:///C:\ scheme*/
385 0 : if ((sURL[7]=='/') && (sURL[9]==':')) {
386 0 : memmove(sURL, sURL+8, strlen(sURL)-7);
387 : } else {
388 0 : memmove(sURL, sURL+7, strlen(sURL)-6);
389 : }
390 : }
391 :
392 : while (1) {
393 2789 : char *sep = strstr(sURL, "%20");
394 2789 : if (!sep) break;
395 0 : sep[0] = ' ';
396 0 : memmove(sep+1, sep+3, strlen(sep)-2);
397 : }
398 2789 : }
399 :
400 : //TODO handle reserved characters
401 : const char *pce_special = " %";
402 : const char *pce_encoded = "0123456789ABCDEF";
403 :
404 1368 : char *gf_url_percent_encode(const char *path)
405 : {
406 : char *outpath;
407 : u32 i, count, len;
408 1368 : if (!path) return NULL;
409 :
410 1368 : len = (u32) strlen(path);
411 : count = 0;
412 89956 : for (i=0; i<len; i++) {
413 88588 : u8 c = path[i];
414 88588 : if (strchr(pce_special, c) != NULL) {
415 0 : if ((i+2<len) && ((strchr(pce_encoded, path[i+1]) == NULL) || (strchr(pce_encoded, path[i+2]) == NULL))) {
416 0 : count+=2;
417 : }
418 88588 : } else if (c>>7) {
419 0 : count+=2;
420 : }
421 : }
422 1368 : if (!count) return gf_strdup(path);
423 0 : outpath = (char*)gf_malloc(sizeof(char) * (len + count + 1));
424 : strcpy(outpath, path);
425 :
426 : count = 0;
427 0 : for (i=0; i<len; i++) {
428 : Bool do_enc = GF_FALSE;
429 0 : u8 c = path[i];
430 :
431 0 : if (strchr(pce_special, c) != NULL) {
432 0 : if ((i+2<len) && ((strchr(pce_encoded, path[i+1]) == NULL) || (strchr(pce_encoded, path[i+2]) == NULL))) {
433 : do_enc = GF_TRUE;
434 : }
435 0 : } else if (c>>7) {
436 : do_enc = GF_TRUE;
437 : }
438 :
439 : if (do_enc) {
440 : char szChar[3];
441 : sprintf(szChar, "%02X", c);
442 0 : outpath[i+count] = '%';
443 0 : outpath[i+count+1] = szChar[0];
444 0 : outpath[i+count+2] = szChar[1];
445 0 : count+=2;
446 : } else {
447 0 : outpath[i+count] = c;
448 : }
449 : }
450 0 : outpath[i+count] = 0;
451 0 : return outpath;
452 : }
453 :
454 216 : char *gf_url_percent_decode(const char *path)
455 : {
456 : char *outpath;
457 : u32 i, count, len;
458 216 : if (!path) return NULL;
459 :
460 216 : len = (u32) strlen(path);
461 : count = 0;
462 5203 : for (i=0; i<len; i++) {
463 4987 : u8 c = path[i];
464 4987 : if (c=='%') {
465 0 : i+= 2;
466 : }
467 4987 : count++;
468 : }
469 216 : if (!count) return gf_strdup(path);
470 216 : outpath = (char*)gf_malloc(sizeof(char) * (count + 1));
471 :
472 5203 : for (i=0; i<len; i++) {
473 4987 : u8 c = path[i];
474 4987 : if (c=='%') {
475 : u32 res;
476 : char szChar[3];
477 0 : szChar[0] = path[i+1];
478 0 : szChar[1] = path[i+1];
479 0 : szChar[2] = 0;
480 0 : sscanf(szChar, "%02X", &res);
481 0 : i += 2;
482 0 : outpath[i] = (char) res;
483 : } else {
484 4987 : outpath[i] = c;
485 : }
486 : }
487 216 : outpath[count] = 0;
488 216 : return outpath;
489 : }
490 :
491 : GF_EXPORT
492 638 : const char *gf_url_get_resource_name(const char *sURL)
493 : {
494 : char *sep;
495 638 : if (!sURL) return NULL;
496 638 : sep = strrchr(sURL, '/');
497 638 : if (!sep) sep = strrchr(sURL, '\\');
498 638 : if (sep) return sep+1;
499 : return sURL;
500 : }
501 :
502 : GF_EXPORT
503 6760 : const char *gf_url_get_path(const char *sURL)
504 : {
505 6760 : char *sep = strstr(sURL, "://");
506 6760 : if (!sep) return sURL;
507 1271 : sep = strchr(sep + 3, '/');
508 1271 : if (sep) return sep;
509 30 : return NULL;
510 : }
511 :
512 :
513 : #if 0 //unused
514 : Bool gf_url_remove_last_delimiter(const char *sURL, char *res_path)
515 : {
516 : strcpy(res_path, sURL);
517 : if (sURL[strlen(sURL)-1] == GF_PATH_SEPARATOR) {
518 : res_path[strlen(sURL)-1] = 0;
519 : return GF_TRUE;
520 : }
521 :
522 : return GF_FALSE;
523 : }
524 :
525 : const char* gf_url_get_resource_extension(const char *sURL) {
526 : const char *dot = strrchr(sURL, '.');
527 : if(!dot || dot == sURL) return "";
528 : return dot + 1;
529 : }
530 : #endif //unused
|