ferencd@0: /* ferencd@0: punycode.h from RFC 3492 ferencd@0: http://www.nicemice.net/idn/ ferencd@0: Adam M. Costello ferencd@0: http://www.nicemice.net/amc/ ferencd@0: ferencd@0: This is ANSI C code (C89) implementing Punycode (RFC 3492). ferencd@0: ferencd@0: */ ferencd@0: ferencd@0: #include ferencd@0: ferencd@0: enum punycode_status { ferencd@0: punycode_success, ferencd@0: punycode_bad_input, /* Input is invalid. */ ferencd@0: punycode_big_output, /* Output would exceed the space provided. */ ferencd@0: punycode_overflow /* Input needs wider integers to process. */ ferencd@0: }; ferencd@0: ferencd@0: #if UINT_MAX >= (1 << 26) - 1 ferencd@0: typedef unsigned int punycode_uint; ferencd@0: #else ferencd@0: typedef unsigned long punycode_uint; ferencd@0: #endif ferencd@0: ferencd@0: enum punycode_status punycode_encode( ferencd@0: punycode_uint input_length, ferencd@0: const punycode_uint input[], ferencd@0: const unsigned char case_flags[], ferencd@0: punycode_uint *output_length, ferencd@0: char output[] ); ferencd@0: ferencd@0: /* punycode_encode() converts Unicode to Punycode. The input */ ferencd@0: /* is represented as an array of Unicode code points (not code */ ferencd@0: /* units; surrogate pairs are not allowed), and the output */ ferencd@0: /* will be represented as an array of ASCII code points. The */ ferencd@0: /* output string is *not* null-terminated; it will contain */ ferencd@0: /* zeros if and only if the input contains zeros. (Of course */ ferencd@0: /* the caller can leave room for a terminator and add one if */ ferencd@0: /* needed.) The input_length is the number of code points in */ ferencd@0: /* the input. The output_length is an in/out argument: the */ ferencd@0: /* caller passes in the maximum number of code points that it */ ferencd@0: /* can receive, and on successful return it will contain the */ ferencd@0: /* number of code points actually output. The case_flags array */ ferencd@0: /* holds input_length boolean values, where nonzero suggests that */ ferencd@0: /* the corresponding Unicode character be forced to uppercase */ ferencd@0: /* after being decoded (if possible), and zero suggests that */ ferencd@0: /* it be forced to lowercase (if possible). ASCII code points */ ferencd@0: /* are encoded literally, except that ASCII letters are forced */ ferencd@0: /* to uppercase or lowercase according to the corresponding */ ferencd@0: /* uppercase flags. If case_flags is a null pointer then ASCII */ ferencd@0: /* letters are left as they are, and other code points are */ ferencd@0: /* treated as if their uppercase flags were zero. The return */ ferencd@0: /* value can be any of the punycode_status values defined above */ ferencd@0: /* except punycode_bad_input; if not punycode_success, then */ ferencd@0: /* output_size and output might contain garbage. */ ferencd@0: ferencd@0: enum punycode_status punycode_decode( ferencd@0: punycode_uint input_length, ferencd@0: const char input[], ferencd@0: punycode_uint *output_length, ferencd@0: punycode_uint output[], ferencd@0: unsigned char case_flags[] ); ferencd@0: ferencd@0: /* punycode_decode() converts Punycode to Unicode. The input is */ ferencd@0: /* represented as an array of ASCII code points, and the output */ ferencd@0: /* will be represented as an array of Unicode code points. The */ ferencd@0: /* input_length is the number of code points in the input. The */ ferencd@0: /* output_length is an in/out argument: the caller passes in */ ferencd@0: /* the maximum number of code points that it can receive, and */ ferencd@0: /* on successful return it will contain the actual number of */ ferencd@0: /* code points output. The case_flags array needs room for at */ ferencd@0: /* least output_length values, or it can be a null pointer if the */ ferencd@0: /* case information is not needed. A nonzero flag suggests that */ ferencd@0: /* the corresponding Unicode character be forced to uppercase */ ferencd@0: /* by the caller (if possible), while zero suggests that it be */ ferencd@0: /* forced to lowercase (if possible). ASCII code points are */ ferencd@0: /* output already in the proper case, but their flags will be set */ ferencd@0: /* appropriately so that applying the flags would be harmless. */ ferencd@0: /* The return value can be any of the punycode_status values */ ferencd@0: /* defined above; if not punycode_success, then output_length, */ ferencd@0: /* output, and case_flags might contain garbage. On success, the */ ferencd@0: /* decoder will never need to write an output_length greater than */ ferencd@0: /* input_length, because of how the encoding is defined. */