fork download
  1. import java.util.*;
  2. import java.lang.*;
  3. import java.text.Normalizer;
  4. import java.nio.charset.*;
  5. import java.io.*;
  6.  
  7. class Ideone
  8. {
  9. public static void main (String[] args) throws java.lang.Exception
  10. {
  11. System.out.println(convertTo7BitAsciiWithCodepointNames("Hello µ \u03bc \u00b5 \uD83D\uDE00 \uD835\uDC2E frappé, naïve, soufflé"));
  12. }
  13.  
  14. public static String convertTo7BitAsciiWithCodepointNames(String input) {
  15. // Normalize to replace as much as possible with normal character forms
  16. input = Normalizer.normalize(input, Normalizer.Form.NFKD);
  17.  
  18. // Then substitute as required into the target charset
  19. CharsetEncoder encoder = StandardCharsets.ISO_8859_1.newEncoder();
  20. StringBuilder resultBuilder = new StringBuilder();
  21.  
  22. // Iterate through the string by codepoint
  23. for (int i = 0; i < input.length(); ) {
  24. int codePoint = input.codePointAt(i);
  25.  
  26. String utf16chars = Character.toString(codePoint);
  27. if (Character.getType(codePoint) == Character.NON_SPACING_MARK) {
  28. // ignore combining accent characters
  29. } else if (encoder.canEncode(utf16chars)) {
  30. resultBuilder.append(utf16chars);
  31. } else {
  32. // Character is NOT encodable, replace with its Unicode codepoint name
  33. resultBuilder.append('[');
  34. resultBuilder.append(Character.getName(codePoint));
  35. resultBuilder.append(']');
  36. }
  37.  
  38. i += utf16chars.length();
  39. }
  40. return resultBuilder.toString();
  41. }
  42. }
Success #stdin #stdout 0.1s 56248KB
stdin
Standard input is empty
stdout
Hello [GREEK SMALL LETTER MU] [GREEK SMALL LETTER MU] [GREEK SMALL LETTER MU] [GRINNING FACE] u frappe, naive, souffle