#include <iostream>
#include <string>
std::string utf16_to_utf8(const std::wstring& wstr) {
std::string result;
for (size_t i = 0; i < wstr.size(); ++i) {
wchar_t wc = wstr[i];
if (wc <= 0x7F) { // ASCII (1-byte UTF-8)
result.push_back(static_cast<char>(wc));
} else if (wc <= 0x7FF) { // 2-byte UTF-8
result.push_back(0xC0 | ((wc >> 6) & 0x1F));
result.push_back(0x80 | (wc & 0x3F));
} else if (wc >= 0xD800 && wc <= 0xDFFF) { // Surrogate pair (UTF-16)
if (i + 1 < wstr.size()) {
wchar_t high = wc;
wchar_t low = wstr[++i];
if (high >= 0xD800 && high <= 0xDBFF && low >= 0xDC00 && low <= 0xDFFF) {
uint32_t codepoint = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000;
result.push_back(0xF0 | ((codepoint >> 18) & 0x07));
result.push_back(0x80 | ((codepoint >> 12) & 0x3F));
result.push_back(0x80 | ((codepoint >> 6) & 0x3F));
result.push_back(0x80 | (codepoint & 0x3F));
}
}
} else { // 3-byte UTF-8
result.push_back(0xE0 | ((wc >> 12) & 0x0F));
result.push_back(0x80 | ((wc >> 6) & 0x3F));
result.push_back(0x80 | (wc & 0x3F));
}
}
return result;
}
int main() {
std::wstring wide_text = L"A – B"; // Unicode text
std::string utf8_text = utf16_to_utf8(wide_text);
std::cout << "UTF-8: " << utf8_text << std::endl;
return 0;
}