unicode(ucs-2 ucs-4)转utf8

#include "stdio.h" 
#include "string.h"

typedef unsigned char ubyte;
typedef char sbyte;
typedef unsigned long ulong;

  ubyte title[62]   = { 0xd9, 0x8f,/**/ 0x2f, 0x66,/*0x2f, 0x66*/ 0x00, 0x4e,/**/ 0x2a, 0x4e,/**/ 0x8b, 0x4f,/**/ 0x50, 0x5b,/**/ 0x5e, 0x97,/**/ 0x38, 0x5e,/**/ 0x7d, 0x59 /**/};
  ubyte artist[62]  = { 0x4D, 0x00,/*M*/ 0x69, 0x00,/*i*/ 0x63, 0x00,/*c*/ 0x68, 0x00,/*h*/ 0x61, 0x00,/*a*/ 0x65, 0x00,/*e*/ 0x6C, 0x00,/*l*/ 0x20, 0x00,/* */ 0x4A, 0x00,/*j*/ 0x61, 0x00,/*a*/ 0x63, 0x00,/*c*/ 0x63, 0x00,/*k*/ 0x73, 0x00,/*s*/ 0x6F, 0x00,/*o*/ 0x6E, 0x00/*n*/};
  ubyte album[62]   = { 0x52, 0x00,/*R*/ 0x6f, 0x00,/*o*/ 0x63, 0x00,/*c*/ 0x6b, 0x00,/*k*/ 0x20, 0x00,/* */ 0x57, 0x00,/*W*/ 0x69, 0x00,/*i*/ 0x74, 0x00,/*t*/ 0x68, 0x00,/*h*/ 0x20, 0x00,/* */ 0x59, 0x00,/*y*/ 0x6f, 0x00,/*o*/ 0x75, 0x00 /*u*/};


static int Impm_StrUnicodeToUtf8(ubyte *UnicBytes, int UnicChLen, ubyte *Utf8Buf, int BufSize, int *OutBytesSize);
static int Impm_UnicodeToUtf8(ulong unic, ubyte *pOutput, int outSize);
int main()
{
	ubyte out[62] = {0};
	int num = 0;
	
	
    Impm_StrUnicodeToUtf8(title, 9, out, sizeof(out), &num);
    
    printf("%d\n", num);
	
	
	return 0;
}

/*----------------------------------------------------------------------------*/
/*Name : Impm_StrUnicodeToUtf8                                                */
/*Role : Converts a string of type unicode(UCS-2&UCS-4) to a string of type utf8*/
/*Interface :                                                                 */
/*  - IN  : UnicBytes:unicode string                                          */
/*  - IN  : UnicChLen:unicode char len                                        */
/*  - OUT : Utf8Buf:Utf8 string                                               */
/*  - IN  : BufSize:Utf8 string buf size                                      */
/*  - OUT : OutBytesSize:Output Utf8 string bytes len                         */
/*Pre-condition : Unicode little endian                                       */
/*Constraints   : none                                                        */
/*Behavior :                                                                  */
/*  DO                                                                        */
/*    [operation to carry out]                                                */
/*  OD                                                                        */
/*----------------------------------------------------------------------------*/
static int Impm_StrUnicodeToUtf8(ubyte *UnicBytes, int UnicChLen, ubyte *Utf8Buf, int BufSize, int *OutBytesSize)
{
  int ChCnt = 0;
  ulong UnicCh = 0;
  ubyte Utf8Ch[6] = {0};
  ubyte *pIndex = Utf8Buf;
  int BytesCnt = 0;
  int Utf8ChBytesLen = 0;
  int ret = 0;

  if(NULL==UnicBytes || NULL==Utf8Buf || NULL==OutBytesSize)
  {
    ret = 1;
  }
  else
  {
    for(ChCnt=0; ChCnt<UnicChLen; ChCnt++)
    {
      UnicCh = 0;
      UnicCh = UnicBytes[ChCnt*2+1];
      UnicCh <<= 8;
      UnicCh |= UnicBytes[ChCnt*2];

      /*Convert*/
      Utf8ChBytesLen = Impm_UnicodeToUtf8(UnicCh, Utf8Ch, sizeof(Utf8Ch));

      if(BytesCnt+Utf8ChBytesLen <= BufSize)
      {
        memcpy(pIndex, Utf8Ch, Utf8ChBytesLen);
        pIndex += Utf8ChBytesLen;
        BytesCnt += Utf8ChBytesLen;
      }
      else
      {
        ret = 2;
      }
    }
    *OutBytesSize = BytesCnt;
  }

  return ret;
}

/*----------------------------------------------------------------------------*/
/*Name : Impm_UnicodeToUtf8                                                   */
/*Role : Converts unicode(UCS-2&UCS-4) to utf8                               */
/*Interface :                                                                 */
/*  - IN  : unic:unicode code                                                 */
/*  - OUT : pOutput:Output uft8 code                                          */
/*  - IN  : outSize:pOutput buf size                                          */
/*Pre-condition :                                                             */
/*Constraints   : none                                                        */
/*Behavior :                                                                  */
/*  DO                                                                        */
/*    [operation to carry out]                                                */
/*  OD                                                                        */
/*----------------------------------------------------------------------------*/
static int Impm_UnicodeToUtf8(ulong unic, ubyte *pOutput, int outSize)
{
  int Ret = 0;

  if( (NULL != pOutput) && (outSize >= 6) ) /*pOutput not null and outSize size big than 6(utf8 needs a maximum of 6 bytes)*/
  {
    if ( unic <= 0x0000007F )
    {
        // * U-00000000 - U-0000007F:  0xxxxxxx
        *pOutput     = (unic & 0x7F);
        Ret = 1;
    }
    else if ( unic >= 0x00000080 && unic <= 0x000007FF )
    {
        // * U-00000080 - U-000007FF:  110xxxxx 10xxxxxx
        *(pOutput+1) = (unic & 0x3F) | 0x80;
        *pOutput     = ((unic >> 6) & 0x1F) | 0xC0;
        Ret = 2;
    }
    else if ( unic >= 0x00000800 && unic <= 0x0000FFFF )
    {
        // * U-00000800 - U-0000FFFF:  1110xxxx 10xxxxxx 10xxxxxx
        *(pOutput+2) = (unic & 0x3F) | 0x80;
        *(pOutput+1) = ((unic >>  6) & 0x3F) | 0x80;
        *pOutput     = ((unic >> 12) & 0x0F) | 0xE0;
        Ret = 3;
    }
    else if ( unic >= 0x00010000 && unic <= 0x001FFFFF )
    {
        // * U-00010000 - U-001FFFFF:  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
        *(pOutput+3) = (unic & 0x3F) | 0x80;
        *(pOutput+2) = ((unic >>  6) & 0x3F) | 0x80;
        *(pOutput+1) = ((unic >> 12) & 0x3F) | 0x80;
        *pOutput     = ((unic >> 18) & 0x07) | 0xF0;
        Ret = 4;
    }
    else if ( unic >= 0x00200000 && unic <= 0x03FFFFFF )
    {
        // * U-00200000 - U-03FFFFFF:  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
        *(pOutput+4) = (unic & 0x3F) | 0x80;
        *(pOutput+3) = ((unic >>  6) & 0x3F) | 0x80;
        *(pOutput+2) = ((unic >> 12) & 0x3F) | 0x80;
        *(pOutput+1) = ((unic >> 18) & 0x3F) | 0x80;
        *pOutput     = ((unic >> 24) & 0x03) | 0xF8;
        Ret = 5;
    }
    else if ( unic >= 0x04000000 && unic <= 0x7FFFFFFF )
    {
        // * U-04000000 - U-7FFFFFFF:  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
        *(pOutput+5) = (unic & 0x3F) | 0x80;
        *(pOutput+4) = ((unic >>  6) & 0x3F) | 0x80;
        *(pOutput+3) = ((unic >> 12) & 0x3F) | 0x80;
        *(pOutput+2) = ((unic >> 18) & 0x3F) | 0x80;
        *(pOutput+1) = ((unic >> 24) & 0x3F) | 0x80;
        *pOutput     = ((unic >> 30) & 0x01) | 0xFC;
        Ret = 6;
    }
    else
    {
      Ret = 0;
    }
  }
  else
  {
    Ret = 0;
  }

  return Ret;
}

猜你喜欢

转载自blog.csdn.net/qq229596421/article/details/81004945
今日推荐