View Javadoc

1   package org.kuali.common.util;
2   
3   import java.nio.charset.Charset;
4   
5   import org.apache.commons.lang3.StringUtils;
6   import org.junit.Test;
7   
8   public class SimpleTest {
9   
10  	@Test
11  	public void testDefaultFileEncoding() {
12  		System.out.println("file.encoding=" + System.getProperty("file.encoding"));
13  		System.out.println("Charset.defaultCharset().name()=" + Charset.defaultCharset().name());
14  	}
15  
16  	@Test
17  	public void testEncoding() {
18  		String[] encodings = new String[] { "UTF-8", "UTF-16", "UTF-32" };
19  		String s1 = "123";
20  		String s2 = "𝟙𝟚𝟛";
21  		String s = s1 + s2;
22  		StringBuilder sb = new StringBuilder();
23  		sb.append(rpad("s=" + s, 15));
24  		sb.append(rpad("s.length()=" + s.length(), 15));
25  		sb.append(rpad("s.substring(4, 6)=" + s.substring(4, 6), 25));
26  		sb.append("\n\n");
27  		StringBuilder characters = new StringBuilder();
28  		for (String encoding : encodings) {
29  			byte[] bytes = s.getBytes(Charset.forName(encoding));
30  			char[] chars = s.toCharArray();
31  			int[] codePoints = getCodePoints(chars);
32  			sb.append(rpad("encoding=" + encoding, 20));
33  			sb.append(rpad("bytes=" + bytes.length, 15));
34  			sb.append(rpad("chars=" + chars.length, 15));
35  			sb.append(rpad(HexUtils.toHexString(bytes), 55));
36  			characters.append(getString(codePoints) + "\n");
37  			sb.append("\n");
38  		}
39  		System.out.println("\n" + sb + "\n" + characters);
40  	}
41  
42  	protected String getString(int[] codePoints) {
43  		StringBuilder sb = new StringBuilder();
44  		sb.append("[");
45  		for (int i = 0; i < codePoints.length; i++) {
46  			if (i != 0) {
47  				sb.append(",");
48  			}
49  			int cp = codePoints[i];
50  			int charCount = Character.charCount(cp);
51  			sb.append(cp + ":charCount=" + charCount);
52  		}
53  		sb.append("]");
54  		return sb.toString();
55  	}
56  
57  	protected int[] getCodePoints(char[] chars) {
58  		int[] codePoints = new int[chars.length];
59  		for (int i = 0; i < chars.length; i++) {
60  			int codePoint = Character.codePointAt(chars, i);
61  			codePoints[i] = codePoint;
62  		}
63  		return codePoints;
64  	}
65  
66  	protected String getHex(byte[] bytes) {
67  		int mask = 0x000000ff;
68  		StringBuilder sb = new StringBuilder();
69  		for (byte b : bytes) {
70  			int masked = mask & b;
71  			String hex = Integer.toHexString(masked).toUpperCase();
72  			String padded = StringUtils.leftPad(hex, 2, "0");
73  			sb.append(padded);
74  		}
75  		return sb.toString();
76  	}
77  
78  	protected String rpad(String s, int padding) {
79  		return StringUtils.rightPad(s, padding, " ");
80  	}
81  
82  }