|
ferencd@0
|
1 /*
|
|
ferencd@0
|
2 punycode.h from RFC 3492
|
|
ferencd@0
|
3 http://www.nicemice.net/idn/
|
|
ferencd@0
|
4 Adam M. Costello
|
|
ferencd@0
|
5 http://www.nicemice.net/amc/
|
|
ferencd@0
|
6
|
|
ferencd@0
|
7 This is ANSI C code (C89) implementing Punycode (RFC 3492).
|
|
ferencd@0
|
8
|
|
ferencd@0
|
9 */
|
|
ferencd@0
|
10
|
|
ferencd@0
|
11 #include <limits.h>
|
|
ferencd@0
|
12
|
|
ferencd@0
|
13 enum punycode_status {
|
|
ferencd@0
|
14 punycode_success,
|
|
ferencd@0
|
15 punycode_bad_input, /* Input is invalid. */
|
|
ferencd@0
|
16 punycode_big_output, /* Output would exceed the space provided. */
|
|
ferencd@0
|
17 punycode_overflow /* Input needs wider integers to process. */
|
|
ferencd@0
|
18 };
|
|
ferencd@0
|
19
|
|
ferencd@0
|
20 #if UINT_MAX >= (1 << 26) - 1
|
|
ferencd@0
|
21 typedef unsigned int punycode_uint;
|
|
ferencd@0
|
22 #else
|
|
ferencd@0
|
23 typedef unsigned long punycode_uint;
|
|
ferencd@0
|
24 #endif
|
|
ferencd@0
|
25
|
|
ferencd@0
|
26 enum punycode_status punycode_encode(
|
|
ferencd@0
|
27 punycode_uint input_length,
|
|
ferencd@0
|
28 const punycode_uint input[],
|
|
ferencd@0
|
29 const unsigned char case_flags[],
|
|
ferencd@0
|
30 punycode_uint *output_length,
|
|
ferencd@0
|
31 char output[] );
|
|
ferencd@0
|
32
|
|
ferencd@0
|
33 /* punycode_encode() converts Unicode to Punycode. The input */
|
|
ferencd@0
|
34 /* is represented as an array of Unicode code points (not code */
|
|
ferencd@0
|
35 /* units; surrogate pairs are not allowed), and the output */
|
|
ferencd@0
|
36 /* will be represented as an array of ASCII code points. The */
|
|
ferencd@0
|
37 /* output string is *not* null-terminated; it will contain */
|
|
ferencd@0
|
38 /* zeros if and only if the input contains zeros. (Of course */
|
|
ferencd@0
|
39 /* the caller can leave room for a terminator and add one if */
|
|
ferencd@0
|
40 /* needed.) The input_length is the number of code points in */
|
|
ferencd@0
|
41 /* the input. The output_length is an in/out argument: the */
|
|
ferencd@0
|
42 /* caller passes in the maximum number of code points that it */
|
|
ferencd@0
|
43 /* can receive, and on successful return it will contain the */
|
|
ferencd@0
|
44 /* number of code points actually output. The case_flags array */
|
|
ferencd@0
|
45 /* holds input_length boolean values, where nonzero suggests that */
|
|
ferencd@0
|
46 /* the corresponding Unicode character be forced to uppercase */
|
|
ferencd@0
|
47 /* after being decoded (if possible), and zero suggests that */
|
|
ferencd@0
|
48 /* it be forced to lowercase (if possible). ASCII code points */
|
|
ferencd@0
|
49 /* are encoded literally, except that ASCII letters are forced */
|
|
ferencd@0
|
50 /* to uppercase or lowercase according to the corresponding */
|
|
ferencd@0
|
51 /* uppercase flags. If case_flags is a null pointer then ASCII */
|
|
ferencd@0
|
52 /* letters are left as they are, and other code points are */
|
|
ferencd@0
|
53 /* treated as if their uppercase flags were zero. The return */
|
|
ferencd@0
|
54 /* value can be any of the punycode_status values defined above */
|
|
ferencd@0
|
55 /* except punycode_bad_input; if not punycode_success, then */
|
|
ferencd@0
|
56 /* output_size and output might contain garbage. */
|
|
ferencd@0
|
57
|
|
ferencd@0
|
58 enum punycode_status punycode_decode(
|
|
ferencd@0
|
59 punycode_uint input_length,
|
|
ferencd@0
|
60 const char input[],
|
|
ferencd@0
|
61 punycode_uint *output_length,
|
|
ferencd@0
|
62 punycode_uint output[],
|
|
ferencd@0
|
63 unsigned char case_flags[] );
|
|
ferencd@0
|
64
|
|
ferencd@0
|
65 /* punycode_decode() converts Punycode to Unicode. The input is */
|
|
ferencd@0
|
66 /* represented as an array of ASCII code points, and the output */
|
|
ferencd@0
|
67 /* will be represented as an array of Unicode code points. The */
|
|
ferencd@0
|
68 /* input_length is the number of code points in the input. The */
|
|
ferencd@0
|
69 /* output_length is an in/out argument: the caller passes in */
|
|
ferencd@0
|
70 /* the maximum number of code points that it can receive, and */
|
|
ferencd@0
|
71 /* on successful return it will contain the actual number of */
|
|
ferencd@0
|
72 /* code points output. The case_flags array needs room for at */
|
|
ferencd@0
|
73 /* least output_length values, or it can be a null pointer if the */
|
|
ferencd@0
|
74 /* case information is not needed. A nonzero flag suggests that */
|
|
ferencd@0
|
75 /* the corresponding Unicode character be forced to uppercase */
|
|
ferencd@0
|
76 /* by the caller (if possible), while zero suggests that it be */
|
|
ferencd@0
|
77 /* forced to lowercase (if possible). ASCII code points are */
|
|
ferencd@0
|
78 /* output already in the proper case, but their flags will be set */
|
|
ferencd@0
|
79 /* appropriately so that applying the flags would be harmless. */
|
|
ferencd@0
|
80 /* The return value can be any of the punycode_status values */
|
|
ferencd@0
|
81 /* defined above; if not punycode_success, then output_length, */
|
|
ferencd@0
|
82 /* output, and case_flags might contain garbage. On success, the */
|
|
ferencd@0
|
83 /* decoder will never need to write an output_length greater than */
|
|
ferencd@0
|
84 /* input_length, because of how the encoding is defined. */
|