LCOV - code coverage report
Current view: top level - utils - unicode.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 45 45 100.0 %
Date: 2021-04-29 23:48:07 Functions: 1 1 100.0 %

          Line data    Source code
       1             : #include <gpac/utf.h>
       2             : /**
       3             :  * This code has been adapted from http://www.ietf.org/rfc/rfc2640.txt
       4             :  * Full Copyright Statement
       5             : 
       6             :    Copyright (C) The Internet Society (1999).  All Rights Reserved.
       7             : 
       8             :    This document and translations of it may be copied and furnished to
       9             :    others, and derivative works that comment on or otherwise explain it
      10             :    or assist in its implementation may be prepared, copied, published
      11             :    and distributed, in whole or in part, without restriction of any
      12             :    kind, provided that the above copyright notice and this paragraph are
      13             :    included on all such copies and derivative works.  However, this
      14             :    document itself may not be modified in any way, such as by removing
      15             :    the copyright notice or references to the Internet Society or other
      16             :    Internet organizations, except as needed for the purpose of
      17             :    developing Internet standards in which case the procedures for
      18             :    copyrights defined in the Internet Standards process must be
      19             :    followed, or as required to translate it into languages other than
      20             :    English.
      21             : 
      22             :    The limited permissions granted above are perpetual and will not be
      23             :    revoked by the Internet Society or its successors or assigns.
      24             : 
      25             :    This document and the information contained herein is provided on an
      26             :    "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
      27             :    TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
      28             :    BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
      29             :    HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
      30             :    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
      31             : 
      32             : Acknowledgement
      33             : 
      34             :    Funding for the RFC Editor function is currently provided by the
      35             :    Internet Society.
      36             :  */
      37             : 
      38             : GF_EXPORT
      39           7 : u32 utf8_to_ucs4(u32 * ucs4_buf, u32 utf8_len, unsigned char *utf8_buf)
      40             : {
      41           7 :         const unsigned char *utf8_endbuf = utf8_buf + utf8_len;
      42             :         u32             ucs_len = 0;
      43             :         assert( ucs4_buf );
      44             :         assert( utf8_buf );
      45             : 
      46          22 :         while (utf8_buf != utf8_endbuf) {
      47             : 
      48           9 :                 if ((*utf8_buf & 0x80) == 0x00) {
      49             :                         /* ASCII chars no
      50             :                                                                  * conversion needed */
      51           2 :                         *ucs4_buf++ = (u32) * utf8_buf;
      52           2 :                         utf8_buf++;
      53           2 :                         ucs_len++;
      54           7 :                 } else if ((*utf8_buf & 0xE0) == 0xC0)
      55             :                         //In the 2 byte utf - 8 range
      56             :                 {
      57           2 :                         *ucs4_buf++ = (u32) (((*utf8_buf - 0xC0) * 0x40)
      58           1 :                                              + (*(utf8_buf + 1) - 0x80));
      59           1 :                         utf8_buf += 2;
      60           1 :                         ucs_len++;
      61           6 :                 } else if ((*utf8_buf & 0xF0) == 0xE0) {
      62             :                         /* In the 3 byte utf-8
      63             :                                                                          * range */
      64           2 :                         *ucs4_buf++ = (u32) (((*utf8_buf - 0xE0) * 0x1000)
      65           1 :                                              + ((*(utf8_buf + 1) - 0x80) * 0x40)
      66           1 :                                              + (*(utf8_buf + 2) - 0x80));
      67             : 
      68           1 :                         utf8_buf += 3;
      69           1 :                         ucs_len++;
      70           5 :                 } else if ((*utf8_buf & 0xF8) == 0xF0) {
      71             :                         /* In the 4 byte utf-8
      72             :                                                                          * range */
      73           1 :                         *ucs4_buf++ = (u32)
      74           1 :                                       (((*utf8_buf - 0xF0) * 0x040000)
      75           1 :                                        + ((*(utf8_buf + 1) - 0x80) * 0x1000)
      76           1 :                                        + ((*(utf8_buf + 2) - 0x80) * 0x40)
      77           1 :                                        + (*(utf8_buf + 3) - 0x80));
      78           1 :                         utf8_buf += 4;
      79           1 :                         ucs_len++;
      80           4 :                 } else if ((*utf8_buf & 0xFC) == 0xF8) {
      81             :                         /* In the 5 byte utf-8
      82             :                                                                          * range */
      83           2 :                         *ucs4_buf++ = (u32)
      84           2 :                                       (((*utf8_buf - 0xF8) * 0x01000000)
      85           2 :                                        + ((*(utf8_buf + 1) - 0x80) * 0x040000)
      86           2 :                                        + ((*(utf8_buf + 2) - 0x80) * 0x1000)
      87           2 :                                        + ((*(utf8_buf + 3) - 0x80) * 0x40)
      88           2 :                                        + (*(utf8_buf + 4) - 0x80));
      89           2 :                         utf8_buf += 5;
      90           2 :                         ucs_len++;
      91           2 :                 } else if ((*utf8_buf & 0xFE) == 0xFC) {
      92             :                         /* In the 6 byte utf-8
      93             :                                                                          * range */
      94           1 :                         *ucs4_buf++ = (u32)
      95           1 :                                       (((*utf8_buf - 0xFC) * 0x40000000)
      96           1 :                                        + ((*(utf8_buf + 1) - 0x80) * 0x010000000)
      97           1 :                                        + ((*(utf8_buf + 2) - 0x80) * 0x040000)
      98           1 :                                        + ((*(utf8_buf + 3) - 0x80) * 0x1000)
      99           1 :                                        + ((*(utf8_buf + 4) - 0x80) * 0x40)
     100           1 :                                        + (*(utf8_buf + 5) - 0x80));
     101           1 :                         utf8_buf += 6;
     102           1 :                         ucs_len++;
     103             :                 } else {
     104             :                         return 0;
     105             :                 }
     106             :         }
     107             :         return (ucs_len);
     108             : }
     109             : 

Generated by: LCOV version 1.13