version 0.3.33
[fms.git] / src / unicode / utfconversion.cpp
1 #include "../../include/unicode/utfconversion.h"\r
2 \r
3 #ifdef _WIN32\r
4 #include "../../include/pstdint.h"\r
5 #endif\r
6 \r
7 namespace UTFConversion\r
8 {\r
9 \r
10 const bool FromUTF8(const std::vector<std::string::value_type> &utf8string, std::wstring &wcstring)\r
11 {\r
12         if(utf8string.size()==0)\r
13         {\r
14                 wcstring.assign(L"");\r
15                 return true;\r
16         }\r
17 \r
18         std::vector<std::wstring::value_type> dest(utf8string.size(),0);                // dest will never be bigger than the input but could be smaller\r
19         \r
20         const UTF8 *sourcestart=reinterpret_cast<const UTF8 *>(&utf8string[0]);\r
21         const UTF8 *sourceend=sourcestart+utf8string.size();\r
22         \r
23         if(sizeof(std::wstring::value_type)==2 && sizeof(UTF16)==2)\r
24         {       \r
25                 UTF16 *deststart=reinterpret_cast<UTF16 *>(&dest[0]);\r
26                 UTF16 *destend=deststart+dest.size();\r
27                 \r
28                 ConversionResult rval=ConvertUTF8toUTF16(&sourcestart,sourceend,&deststart,destend,lenientConversion);\r
29                 \r
30                 if(rval!=conversionOK)\r
31                 {\r
32                         return false;   \r
33                 }\r
34                 \r
35                 wcstring.assign(dest.begin(),dest.end()-(destend-deststart));\r
36                 \r
37         }\r
38         else if(sizeof(std::wstring::value_type)==4 && sizeof(UTF32)==4)\r
39         {\r
40                 UTF32 *deststart=reinterpret_cast<UTF32 *>(&dest[0]);\r
41                 UTF32 *destend=deststart+dest.size();\r
42                 \r
43                 ConversionResult rval=ConvertUTF8toUTF32(&sourcestart,sourceend,&deststart,destend,lenientConversion);\r
44 \r
45                 if(rval!=conversionOK)\r
46                 {\r
47                         return false;\r
48                 }\r
49                 \r
50                 wcstring.assign(dest.begin(),dest.end()-(destend-deststart));\r
51                 \r
52         }\r
53         else\r
54         {\r
55                 std::vector<uint32_t> dest2(utf8string.size(),0);\r
56                 UTF32 *deststart=reinterpret_cast<UTF32 *>(&dest2[0]);\r
57                 UTF32 *destend=deststart+dest2.size();\r
58 \r
59                 ConversionResult rval=ConvertUTF8toUTF32(&sourcestart,sourceend,&deststart,destend,lenientConversion);\r
60 \r
61                 if(rval!=conversionOK)\r
62                 {\r
63                         return false;\r
64                 }\r
65 \r
66                 wcstring.assign(dest2.begin(),dest2.end()-(destend-deststart));\r
67 \r
68         }\r
69 \r
70         return true;\r
71 }\r
72 \r
73 const bool FromUTF8(const std::string &utf8string, std::wstring &wcstring)\r
74 {\r
75 \r
76         if(utf8string.size()>0)\r
77         {\r
78                 return FromUTF8(std::vector<std::string::value_type>(utf8string.begin(),utf8string.end()),wcstring);\r
79         }\r
80         else\r
81         {\r
82                 wcstring.assign(L"");\r
83                 return true;\r
84         }\r
85 \r
86 }\r
87 \r
88 const bool ToUTF8(const std::wstring &wcstring, std::string &utf8string)\r
89 {\r
90         if(wcstring.size()==0)\r
91         {\r
92                 utf8string.assign("");\r
93                 return true;\r
94         }\r
95 \r
96         std::vector<std::wstring::value_type> source(wcstring.begin(),wcstring.end());\r
97 \r
98         if(sizeof(std::wstring::value_type)==2 && sizeof(UTF16)==2)\r
99         {\r
100                 std::vector<std::string::value_type> dest(wcstring.size()*2,0);\r
101                 \r
102                 const UTF16 *sourcestart=reinterpret_cast<const UTF16 *>(&source[0]);\r
103                 const UTF16 *sourceend=sourcestart+source.size();\r
104                 \r
105                 UTF8 *deststart=reinterpret_cast<UTF8 *>(&dest[0]);\r
106                 UTF8 *destend=deststart+dest.size();\r
107                 \r
108                 ConversionResult rval=ConvertUTF16toUTF8(&sourcestart,sourceend,&deststart,destend,lenientConversion);\r
109                 \r
110                 if(rval!=conversionOK)\r
111                 {\r
112                         return false;\r
113                 }\r
114                 \r
115                 utf8string.assign(dest.begin(),dest.end()-(destend-deststart));\r
116                 \r
117         }\r
118         else if(sizeof(std::wstring::value_type)==4 && sizeof(UTF32)==4)\r
119         {\r
120                 std::vector<std::string::value_type> dest(wcstring.size()*4,0);\r
121                 \r
122                 const UTF32 *sourcestart=reinterpret_cast<const UTF32 *>(&source[0]);\r
123                 const UTF32 *sourceend=sourcestart+source.size();\r
124                 \r
125                 UTF8 *deststart=reinterpret_cast<UTF8 *>(&dest[0]);\r
126                 UTF8 *destend=deststart+dest.size();\r
127                 \r
128                 ConversionResult rval=ConvertUTF32toUTF8(&sourcestart,sourceend,&deststart,destend,lenientConversion);\r
129                 \r
130                 if(rval!=conversionOK)\r
131                 {\r
132                         return false;\r
133                 }\r
134                 \r
135                 utf8string.assign(dest.begin(),dest.end()-(destend-deststart));\r
136                 \r
137         }\r
138         else\r
139         {\r
140                 std::vector<uint32_t> source2(wcstring.begin(),wcstring.end());\r
141                 std::vector<std::string::value_type> dest(wcstring.size()*sizeof(std::wstring::value_type),0);\r
142                 \r
143                 const UTF32 *sourcestart=reinterpret_cast<const UTF32 *>(&source2[0]);\r
144                 const UTF32 *sourceend=sourcestart+source2.size();\r
145 \r
146                 UTF8 *deststart=reinterpret_cast<UTF8 *>(&dest[0]);\r
147                 UTF8 *destend=deststart+dest.size();\r
148 \r
149                 ConversionResult rval=ConvertUTF32toUTF8(&sourcestart,sourceend,&deststart,destend,lenientConversion);\r
150 \r
151                 if(rval!=conversionOK)\r
152                 {\r
153                         return false;\r
154                 }\r
155 \r
156                 utf8string.assign(dest.begin(),dest.end()-(destend-deststart));\r
157 \r
158         }\r
159 \r
160         return true;\r
161 }\r
162 \r
163 }       // namespace\r