diff options
Diffstat (limited to 'gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs')
-rw-r--r-- | gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs | 925 |
1 files changed, 0 insertions, 925 deletions
diff --git a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs b/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs deleted file mode 100644 index 4acff7fe490..00000000000 --- a/gnu/usr.bin/perl/cpan/Unicode-Normalize/Normalize.xs +++ /dev/null @@ -1,925 +0,0 @@ - -#define PERL_NO_GET_CONTEXT /* we want efficiency */ - -/* private functions which need pTHX_ and aTHX_ - pv_cat_decompHangul - sv_2pvunicode - pv_utf8_decompose - pv_utf8_reorder - pv_utf8_compose -*/ - -#include "EXTERN.h" -#include "perl.h" -#include "XSUB.h" - -/* These 5 files are prepared by mkheader */ -#include "unfcmb.h" -#include "unfcan.h" -#include "unfcpt.h" -#include "unfcmp.h" -#include "unfexc.h" - -/* The generated normalization tables since v5.20 are in native character set - * terms. Prior to that, they were in Unicode terms. So we use 'uvchr' for - * later perls, and redefine that to be 'uvuni' for earlier ones */ -#if PERL_VERSION < 20 -# undef uvchr_to_utf8 -# ifdef uvuni_to_utf8 -# define uvchr_to_utf8 uvuni_to_utf8 -# else /* Perl 5.6.1 */ -# define uvchr_to_utf8 uv_to_utf8 -# endif - -# undef utf8n_to_uvchr -# ifdef utf8n_to_uvuni -# define utf8n_to_uvchr utf8n_to_uvuni -# else /* Perl 5.6.1 */ -# define utf8n_to_uvchr utf8_to_uv -# endif -#endif - -/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */ -#ifndef UTF8_ALLOW_BOM -#define UTF8_ALLOW_BOM (0) -#endif /* UTF8_ALLOW_BOM */ - -#ifndef UTF8_ALLOW_SURROGATE -#define UTF8_ALLOW_SURROGATE (0) -#endif /* UTF8_ALLOW_SURROGATE */ - -#ifndef UTF8_ALLOW_FE_FF -#define UTF8_ALLOW_FE_FF (0) -#endif /* UTF8_ALLOW_FE_FF */ - -#ifndef UTF8_ALLOW_FFFF -#define UTF8_ALLOW_FFFF (0) -#endif /* UTF8_ALLOW_FFFF */ - -#ifndef PERL_UNUSED_VAR -# define PERL_UNUSED_VAR(x) ((void)sizeof(x)) -#endif - -#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF) - -/* check if the string buffer is enough before uvchr_to_utf8(). */ -/* dstart, d, and dlen should be defined outside before. */ -#define Renew_d_if_not_enough_to(need) STRLEN curlen = d - dstart; \ - if (dlen < curlen + (need)) { \ - dlen += (need); \ - Renew(dstart, dlen+1, U8); \ - d = dstart + curlen; \ - } - -/* if utf8n_to_uvchr() sets retlen to 0 (if broken?) */ -#define ErrRetlenIsZero "panic (Unicode::Normalize %s): zero-length character" - -/* utf8_hop() hops back before start. Maybe broken UTF-8 */ -#define ErrHopBeforeStart "panic (Unicode::Normalize): hopping before start" - -/* At present, char > 0x10ffff are unaffected without complaint, right? */ -#define VALID_UTF_MAX (0x10ffff) -#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv)) - -/* size of array for combining characters */ -/* enough as an initial value? */ -#define CC_SEQ_SIZE (10) -#define CC_SEQ_STEP (5) - -/* HANGUL begin */ -#define Hangul_SBase 0xAC00 -#define Hangul_SFinal 0xD7A3 -#define Hangul_SCount 11172 - -#define Hangul_NCount 588 - -#define Hangul_LBase 0x1100 -#define Hangul_LFinal 0x1112 -#define Hangul_LCount 19 - -#define Hangul_VBase 0x1161 -#define Hangul_VFinal 0x1175 -#define Hangul_VCount 21 - -#define Hangul_TBase 0x11A7 -#define Hangul_TFinal 0x11C2 -#define Hangul_TCount 28 - -#define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal)) -#define Hangul_IsN(u) (((u) - Hangul_SBase) % Hangul_TCount == 0) -#define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u)) -#define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal)) -#define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal)) -#define Hangul_IsT(u) ((Hangul_TBase < (u)) && ((u) <= Hangul_TFinal)) -/* HANGUL end */ - -/* this is used for canonical ordering of combining characters (c.c.). */ -typedef struct { - U8 cc; /* combining class */ - UV uv; /* codepoint */ - STRLEN pos; /* position */ -} UNF_cc; - -static int compare_cc(const void *a, const void *b) -{ - int ret_cc; - ret_cc = ((UNF_cc*) a)->cc - ((UNF_cc*) b)->cc; - if (ret_cc) - return ret_cc; - - return ( ((UNF_cc*) a)->pos > ((UNF_cc*) b)->pos ) - - ( ((UNF_cc*) a)->pos < ((UNF_cc*) b)->pos ); -} - -static U8* dec_canonical(UV uv) -{ - U8 ***plane, **row; - if (OVER_UTF_MAX(uv)) - return NULL; - plane = (U8***)UNF_canon[uv >> 16]; - if (! plane) - return NULL; - row = plane[(uv >> 8) & 0xff]; - return row ? row[uv & 0xff] : NULL; -} - -static U8* dec_compat(UV uv) -{ - U8 ***plane, **row; - if (OVER_UTF_MAX(uv)) - return NULL; - plane = (U8***)UNF_compat[uv >> 16]; - if (! plane) - return NULL; - row = plane[(uv >> 8) & 0xff]; - return row ? row[uv & 0xff] : NULL; -} - -static UV composite_uv(UV uv, UV uv2) -{ - UNF_complist ***plane, **row, *cell, *i; - - if (!uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2)) - return 0; - - if (Hangul_IsL(uv) && Hangul_IsV(uv2)) { - UV lindex = uv - Hangul_LBase; - UV vindex = uv2 - Hangul_VBase; - return(Hangul_SBase + (lindex * Hangul_VCount + vindex) * - Hangul_TCount); - } - if (Hangul_IsLV(uv) && Hangul_IsT(uv2)) { - UV tindex = uv2 - Hangul_TBase; - return(uv + tindex); - } - plane = UNF_compos[uv >> 16]; - if (! plane) - return 0; - row = plane[(uv >> 8) & 0xff]; - if (! row) - return 0; - cell = row[uv & 0xff]; - if (! cell) - return 0; - for (i = cell; i->nextchar; i++) { - if (uv2 == i->nextchar) - return i->composite; - } - return 0; -} - -static U8 getCombinClass(UV uv) -{ - U8 **plane, *row; - if (OVER_UTF_MAX(uv)) - return 0; - plane = (U8**)UNF_combin[uv >> 16]; - if (! plane) - return 0; - row = plane[(uv >> 8) & 0xff]; - return row ? row[uv & 0xff] : 0; -} - -static U8* pv_cat_decompHangul(pTHX_ U8* d, UV uv) -{ - UV sindex = uv - Hangul_SBase; - UV lindex = sindex / Hangul_NCount; - UV vindex = (sindex % Hangul_NCount) / Hangul_TCount; - UV tindex = sindex % Hangul_TCount; - - if (! Hangul_IsS(uv)) - return d; - - d = uvchr_to_utf8(d, (lindex + Hangul_LBase)); - d = uvchr_to_utf8(d, (vindex + Hangul_VBase)); - if (tindex) - d = uvchr_to_utf8(d, (tindex + Hangul_TBase)); - return d; -} - -static char* sv_2pvunicode(pTHX_ SV *sv, STRLEN *lp) -{ - char *s; - STRLEN len; - s = SvPV(sv,len); - if (!SvUTF8(sv)) { - SV* tmpsv = sv_2mortal(newSVpvn(s, len)); - if (!SvPOK(tmpsv)) - s = SvPV_force(tmpsv,len); - sv_utf8_upgrade(tmpsv); - s = SvPV(tmpsv,len); - } - if (lp) - *lp = len; - return s; -} - -static -U8* pv_utf8_decompose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscompat) -{ - U8* p = s; - U8* e = s + slen; - U8* dstart = *dp; - U8* d = dstart; - - while (p < e) { - STRLEN retlen; - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); - if (!retlen) - croak(ErrRetlenIsZero, "decompose"); - p += retlen; - - if (Hangul_IsS(uv)) { - Renew_d_if_not_enough_to(UTF8_MAXLEN * 3) - d = pv_cat_decompHangul(aTHX_ d, uv); - } - else { - U8* r = iscompat ? dec_compat(uv) : dec_canonical(uv); - - if (r) { - STRLEN len = (STRLEN)strlen((char *)r); - Renew_d_if_not_enough_to(len) - while (len--) - *d++ = *r++; - } - else { - Renew_d_if_not_enough_to(UTF8_MAXLEN) - d = uvchr_to_utf8(d, uv); - } - } - } - *dp = dstart; - return d; -} - -static -U8* pv_utf8_reorder(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen) -{ - U8* p = s; - U8* e = s + slen; - U8* dstart = *dp; - U8* d = dstart; - - UNF_cc seq_ary[CC_SEQ_SIZE]; - UNF_cc* seq_ptr = seq_ary; /* use array at the beginning */ - UNF_cc* seq_ext = NULL; /* extend if need */ - STRLEN seq_max = CC_SEQ_SIZE; - STRLEN cc_pos = 0; - - while (p < e) { - U8 curCC; - STRLEN retlen; - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); - if (!retlen) - croak(ErrRetlenIsZero, "reorder"); - p += retlen; - - curCC = getCombinClass(uv); - - if (curCC != 0) { - if (seq_max < cc_pos + 1) { /* extend if need */ - seq_max = cc_pos + CC_SEQ_STEP; /* new size */ - if (CC_SEQ_SIZE == cc_pos) { /* seq_ary full */ - STRLEN i; - New(0, seq_ext, seq_max, UNF_cc); - for (i = 0; i < cc_pos; i++) - seq_ext[i] = seq_ary[i]; - } - else { - Renew(seq_ext, seq_max, UNF_cc); - } - seq_ptr = seq_ext; /* use seq_ext from now */ - } - - seq_ptr[cc_pos].cc = curCC; - seq_ptr[cc_pos].uv = uv; - seq_ptr[cc_pos].pos = cc_pos; - ++cc_pos; - - if (p < e) - continue; - } - - /* output */ - if (cc_pos) { - STRLEN i; - - if (cc_pos > 1) /* reordered if there are two c.c.'s */ - qsort((void*)seq_ptr, cc_pos, sizeof(UNF_cc), compare_cc); - - for (i = 0; i < cc_pos; i++) { - Renew_d_if_not_enough_to(UTF8_MAXLEN) - d = uvchr_to_utf8(d, seq_ptr[i].uv); - } - cc_pos = 0; - } - - if (curCC == 0) { - Renew_d_if_not_enough_to(UTF8_MAXLEN) - d = uvchr_to_utf8(d, uv); - } - } - if (seq_ext) - Safefree(seq_ext); - *dp = dstart; - return d; -} - -static -U8* pv_utf8_compose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscontig) -{ - U8* p = s; - U8* e = s + slen; - U8* dstart = *dp; - U8* d = dstart; - - UV uvS = 0; /* code point of the starter */ - bool valid_uvS = FALSE; /* if FALSE, uvS isn't initialized yet */ - U8 preCC = 0; - - UV seq_ary[CC_SEQ_SIZE]; - UV* seq_ptr = seq_ary; /* use array at the beginning */ - UV* seq_ext = NULL; /* extend if need */ - STRLEN seq_max = CC_SEQ_SIZE; - STRLEN cc_pos = 0; - - while (p < e) { - U8 curCC; - STRLEN retlen; - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); - if (!retlen) - croak(ErrRetlenIsZero, "compose"); - p += retlen; - - curCC = getCombinClass(uv); - - if (!valid_uvS) { - if (curCC == 0) { - uvS = uv; /* the first Starter is found */ - valid_uvS = TRUE; - if (p < e) - continue; - } - else { - Renew_d_if_not_enough_to(UTF8_MAXLEN) - d = uvchr_to_utf8(d, uv); - continue; - } - } - else { - bool composed; - - /* blocked */ - if ((iscontig && cc_pos) || /* discontiguous combination */ - (curCC != 0 && preCC == curCC) || /* blocked by same CC */ - (preCC > curCC)) /* blocked by higher CC: revised D2 */ - composed = FALSE; - - /* not blocked: - iscontig && cc_pos == 0 -- contiguous combination - curCC == 0 && preCC == 0 -- starter + starter - curCC != 0 && preCC < curCC -- lower CC */ - else { - /* try composition */ - UV uvComp = composite_uv(uvS, uv); - - if (uvComp && !isExclusion(uvComp)) { - uvS = uvComp; - composed = TRUE; - - /* preCC should not be changed to curCC */ - /* e.g. 1E14 = 0045 0304 0300 where CC(0304) == CC(0300) */ - if (p < e) - continue; - } - else - composed = FALSE; - } - - if (!composed) { - preCC = curCC; - if (curCC != 0 || !(p < e)) { - if (seq_max < cc_pos + 1) { /* extend if need */ - seq_max = cc_pos + CC_SEQ_STEP; /* new size */ - if (CC_SEQ_SIZE == cc_pos) { /* seq_ary full */ - New(0, seq_ext, seq_max, UV); - Copy(seq_ary, seq_ext, cc_pos, UV); - } - else { - Renew(seq_ext, seq_max, UV); - } - seq_ptr = seq_ext; /* use seq_ext from now */ - } - seq_ptr[cc_pos] = uv; - ++cc_pos; - } - if (curCC != 0 && p < e) - continue; - } - } - - /* output */ - { - Renew_d_if_not_enough_to(UTF8_MAXLEN) - d = uvchr_to_utf8(d, uvS); /* starter (composed or not) */ - } - - if (cc_pos) { - STRLEN i; - - for (i = 0; i < cc_pos; i++) { - Renew_d_if_not_enough_to(UTF8_MAXLEN) - d = uvchr_to_utf8(d, seq_ptr[i]); - } - cc_pos = 0; - } - - uvS = uv; - } - if (seq_ext) - Safefree(seq_ext); - *dp = dstart; - return d; -} - -MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize - -SV* -decompose(src, compat = &PL_sv_no) - SV * src - SV * compat - PROTOTYPE: $;$ - PREINIT: - SV* dst; - U8 *s, *d, *dend; - STRLEN slen, dlen; - CODE: - s = (U8*)sv_2pvunicode(aTHX_ src,&slen); - dst = newSVpvn("", 0); - dlen = slen; - New(0, d, dlen+1, U8); - dend = pv_utf8_decompose(aTHX_ s, slen, &d, dlen, (bool)SvTRUE(compat)); - sv_setpvn(dst, (char *)d, dend - d); - SvUTF8_on(dst); - Safefree(d); - RETVAL = dst; - OUTPUT: - RETVAL - - -SV* -reorder(src) - SV * src - PROTOTYPE: $ - PREINIT: - SV* dst; - U8 *s, *d, *dend; - STRLEN slen, dlen; - CODE: - s = (U8*)sv_2pvunicode(aTHX_ src,&slen); - dst = newSVpvn("", 0); - dlen = slen; - New(0, d, dlen+1, U8); - dend = pv_utf8_reorder(aTHX_ s, slen, &d, dlen); - sv_setpvn(dst, (char *)d, dend - d); - SvUTF8_on(dst); - Safefree(d); - RETVAL = dst; - OUTPUT: - RETVAL - - -SV* -compose(src) - SV * src - PROTOTYPE: $ - ALIAS: - composeContiguous = 1 - PREINIT: - SV* dst; - U8 *s, *d, *dend; - STRLEN slen, dlen; - CODE: - s = (U8*)sv_2pvunicode(aTHX_ src,&slen); - dst = newSVpvn("", 0); - dlen = slen; - New(0, d, dlen+1, U8); - dend = pv_utf8_compose(aTHX_ s, slen, &d, dlen, (bool)ix); - sv_setpvn(dst, (char *)d, dend - d); - SvUTF8_on(dst); - Safefree(d); - RETVAL = dst; - OUTPUT: - RETVAL - - -SV* -NFD(src) - SV * src - PROTOTYPE: $ - ALIAS: - NFKD = 1 - PREINIT: - SV *dst; - U8 *s, *t, *tend, *d, *dend; - STRLEN slen, tlen, dlen; - CODE: - s = (U8*)sv_2pvunicode(aTHX_ src,&slen); - - /* decompose */ - tlen = slen; - New(0, t, tlen+1, U8); - tend = pv_utf8_decompose(aTHX_ s, slen, &t, tlen, (bool)(ix==1)); - *tend = '\0'; - tlen = tend - t; /* no longer know real size of t */ - - /* reorder */ - dlen = tlen; - New(0, d, dlen+1, U8); - dend = pv_utf8_reorder(aTHX_ t, tlen, &d, dlen); - *dend = '\0'; - dlen = dend - d; /* no longer know real size of d */ - - /* return */ - dst = newSVpvn("", 0); - sv_setpvn(dst, (char *)d, dlen); - SvUTF8_on(dst); - - Safefree(t); - Safefree(d); - RETVAL = dst; - OUTPUT: - RETVAL - - -SV* -NFC(src) - SV * src - PROTOTYPE: $ - ALIAS: - NFKC = 1 - FCC = 2 - PREINIT: - SV *dst; - U8 *s, *t, *tend, *u, *uend, *d, *dend; - STRLEN slen, tlen, ulen, dlen; - CODE: - s = (U8*)sv_2pvunicode(aTHX_ src,&slen); - - /* decompose */ - tlen = slen; - New(0, t, tlen+1, U8); - tend = pv_utf8_decompose(aTHX_ s, slen, &t, tlen, (bool)(ix==1)); - *tend = '\0'; - tlen = tend - t; /* no longer know real size of t */ - - /* reorder */ - ulen = tlen; - New(0, u, ulen+1, U8); - uend = pv_utf8_reorder(aTHX_ t, tlen, &u, ulen); - *uend = '\0'; - ulen = uend - u; /* no longer know real size of u */ - - /* compose */ - dlen = ulen; - New(0, d, dlen+1, U8); - dend = pv_utf8_compose(aTHX_ u, ulen, &d, dlen, (bool)(ix==2)); - *dend = '\0'; - dlen = dend - d; /* no longer know real size of d */ - - /* return */ - dst = newSVpvn("", 0); - sv_setpvn(dst, (char *)d, dlen); - SvUTF8_on(dst); - - Safefree(t); - Safefree(u); - Safefree(d); - RETVAL = dst; - OUTPUT: - RETVAL - - -SV* -checkNFD(src) - SV * src - PROTOTYPE: $ - ALIAS: - checkNFKD = 1 - PREINIT: - STRLEN srclen, retlen; - U8 *s, *e, *p, curCC, preCC; - bool result = TRUE; - CODE: - s = (U8*)sv_2pvunicode(aTHX_ src,&srclen); - e = s + srclen; - - preCC = 0; - for (p = s; p < e; p += retlen) { - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); - if (!retlen) - croak(ErrRetlenIsZero, "checkNFD or -NFKD"); - - curCC = getCombinClass(uv); - if (preCC > curCC && curCC != 0) { /* canonical ordering violated */ - result = FALSE; - break; - } - if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv))) { - result = FALSE; - break; - } - preCC = curCC; - } - RETVAL = boolSV(result); - OUTPUT: - RETVAL - - -SV* -checkNFC(src) - SV * src - PROTOTYPE: $ - ALIAS: - checkNFKC = 1 - PREINIT: - STRLEN srclen, retlen; - U8 *s, *e, *p, curCC, preCC; - bool result = TRUE; - bool isMAYBE = FALSE; - CODE: - s = (U8*)sv_2pvunicode(aTHX_ src,&srclen); - e = s + srclen; - - preCC = 0; - for (p = s; p < e; p += retlen) { - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); - if (!retlen) - croak(ErrRetlenIsZero, "checkNFC or -NFKC"); - - curCC = getCombinClass(uv); - if (preCC > curCC && curCC != 0) { /* canonical ordering violated */ - result = FALSE; - break; - } - - /* get NFC/NFKC property */ - if (Hangul_IsS(uv)) /* Hangul syllables are canonical composites */ - ; /* YES */ - else if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) { - result = FALSE; - break; - } - else if (isComp2nd(uv)) - isMAYBE = TRUE; - else if (ix) { - char *canon, *compat; - /* NFKC_NO when having compatibility mapping. */ - canon = (char *) dec_canonical(uv); - compat = (char *) dec_compat(uv); - if (compat && !(canon && strEQ(canon, compat))) { - result = FALSE; - break; - } - } /* end of get NFC/NFKC property */ - - preCC = curCC; - } - if (isMAYBE && result) /* NO precedes MAYBE */ - XSRETURN_UNDEF; - RETVAL = boolSV(result); - OUTPUT: - RETVAL - - -SV* -checkFCD(src) - SV * src - PROTOTYPE: $ - ALIAS: - checkFCC = 1 - PREINIT: - STRLEN srclen, retlen; - U8 *s, *e, *p, curCC, preCC; - bool result = TRUE; - bool isMAYBE = FALSE; - CODE: - s = (U8*)sv_2pvunicode(aTHX_ src,&srclen); - e = s + srclen; - preCC = 0; - for (p = s; p < e; p += retlen) { - U8 *sCan; - UV uvLead; - STRLEN canlen = 0; - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); - if (!retlen) - croak(ErrRetlenIsZero, "checkFCD or -FCC"); - - sCan = (U8*) dec_canonical(uv); - - if (sCan) { - STRLEN canret; - canlen = (STRLEN)strlen((char *) sCan); - uvLead = utf8n_to_uvchr(sCan, canlen, &canret, AllowAnyUTF); - if (!canret) - croak(ErrRetlenIsZero, "checkFCD or -FCC"); - } - else { - uvLead = uv; - } - - curCC = getCombinClass(uvLead); - - if (curCC != 0 && curCC < preCC) { /* canonical ordering violated */ - result = FALSE; - break; - } - - if (ix) { - if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) { - result = FALSE; - break; - } - else if (isComp2nd(uv)) - isMAYBE = TRUE; - } - - if (sCan) { - STRLEN canret; - UV uvTrail; - U8* eCan = sCan + canlen; - U8* pCan = utf8_hop(eCan, -1); - if (pCan < sCan) - croak(ErrHopBeforeStart); - uvTrail = utf8n_to_uvchr(pCan, eCan - pCan, &canret, AllowAnyUTF); - if (!canret) - croak(ErrRetlenIsZero, "checkFCD or -FCC"); - preCC = getCombinClass(uvTrail); - } - else { - preCC = curCC; - } - } - if (isMAYBE && result) /* NO precedes MAYBE */ - XSRETURN_UNDEF; - RETVAL = boolSV(result); - OUTPUT: - RETVAL - - -U8 -getCombinClass(uv) - UV uv - PROTOTYPE: $ - -bool -isExclusion(uv) - UV uv - PROTOTYPE: $ - -bool -isSingleton(uv) - UV uv - PROTOTYPE: $ - -bool -isNonStDecomp(uv) - UV uv - PROTOTYPE: $ - -bool -isComp2nd(uv) - UV uv - PROTOTYPE: $ - ALIAS: - isNFC_MAYBE = 1 - isNFKC_MAYBE = 2 - INIT: - PERL_UNUSED_VAR(ix); - -SV* -isNFD_NO(uv) - UV uv - PROTOTYPE: $ - ALIAS: - isNFKD_NO = 1 - PREINIT: - bool result = FALSE; - CODE: - if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv))) - result = TRUE; /* NFD_NO or NFKD_NO */ - RETVAL = boolSV(result); - OUTPUT: - RETVAL - - -SV* -isComp_Ex(uv) - UV uv - PROTOTYPE: $ - ALIAS: - isNFC_NO = 0 - isNFKC_NO = 1 - PREINIT: - bool result = FALSE; - CODE: - if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) - result = TRUE; /* NFC_NO or NFKC_NO */ - else if (ix) { - char *canon, *compat; - canon = (char *) dec_canonical(uv); - compat = (char *) dec_compat(uv); - if (compat && (!canon || strNE(canon, compat))) - result = TRUE; /* NFC_NO or NFKC_NO */ - } - RETVAL = boolSV(result); - OUTPUT: - RETVAL - -SV* -getComposite(uv, uv2) - UV uv - UV uv2 - PROTOTYPE: $$ - PREINIT: - UV composite; - CODE: - composite = composite_uv(uv, uv2); - RETVAL = composite ? newSVuv(composite) : &PL_sv_undef; - OUTPUT: - RETVAL - - - -SV* -getCanon(uv) - UV uv - PROTOTYPE: $ - ALIAS: - getCompat = 1 - CODE: - if (Hangul_IsS(uv)) { - U8 tmp[3 * UTF8_MAXLEN + 1]; - U8 *t = tmp; - U8 *e = pv_cat_decompHangul(aTHX_ t, uv); - RETVAL = newSVpvn((char *)t, e - t); - } else { - U8* rstr = ix ? dec_compat(uv) : dec_canonical(uv); - if (!rstr) - XSRETURN_UNDEF; - RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr)); - } - SvUTF8_on(RETVAL); - OUTPUT: - RETVAL - - -void -splitOnLastStarter(src) - SV * src - PREINIT: - SV *svp; - STRLEN srclen; - U8 *s, *e, *p; - PPCODE: - s = (U8*)sv_2pvunicode(aTHX_ src,&srclen); - e = s + srclen; - p = e; - while (s < p) { - UV uv; - p = utf8_hop(p, -1); - if (p < s) - croak(ErrHopBeforeStart); - uv = utf8n_to_uvchr(p, e - p, NULL, AllowAnyUTF); - if (getCombinClass(uv) == 0) /* Last Starter found */ - break; - } - - svp = sv_2mortal(newSVpvn((char*)s, p - s)); - SvUTF8_on(svp); - XPUSHs(svp); - - svp = sv_2mortal(newSVpvn((char*)p, e - p)); - SvUTF8_on(svp); - XPUSHs(svp); - |