// © 2017 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 2003, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * * File colprobe.cpp * * Modification History: * * Date Name Description * 03/18/2003 weiv Creation. ******************************************************************************* */ #include "uoptions.h" #include "unicode/ucol.h" #include "unicode/ucoleitr.h" #include "unicode/ures.h" #include "unicode/uniset.h" #include "unicode/usetiter.h" #include "unicode/ustring.h" #include "unicode/uchar.h" #include "unicode/uscript.h" #include "uprops.h" #include "hash.h" #include "ucol_imp.h" #include "unicode/ustdio.h" #include "unicode/utrans.h" #include #include #include #include #include #include "colprobe.h" #ifdef WIN32 #include #else // // Stubs for Windows API functions when building on UNIXes. // typedef int DWORD; inline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;}; #include unsigned long timeGetTime() { struct timeval t; gettimeofday(&t, 0); unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares. val += t.tv_usec / 1000; return val; }; inline int LCMapStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;}; const int LCMAP_SORTKEY = 0; #define MAKELCID(a,b) 0 const int SORT_DEFAULT = 0; #endif #include "line.h" static UBool gVerbose = FALSE; static UBool gDebug = FALSE; static UBool gQuiet = FALSE; static UBool gExemplar = FALSE; DWORD gWinLCID; int gCount; Line **gICULines; UCollator *gCol; UCollator *gUCA; Line source; Line target; Line *gSource = &source; Line *gTarget = ⌖ Hashtable gElements(FALSE); Hashtable gExpansions(FALSE); CompareFn gComparer; const UChar separatorChar = 0x0030; UFILE *out = NULL; UFILE *err = NULL; UFILE *log = NULL; const char *progName = "colprobe"; const char *gLocale = NULL; //char platform[256]; int32_t platformIndex = -1; int32_t gPlatformNo = 0; int32_t gPlatformIndexes[10]; int32_t gLocaleNo = 0; const char* gLocales[100]; UBool gRulesStdin = FALSE; enum { HELP1, HELP2, VERBOSE, QUIET, VERSION, ICUDATADIR, COPYRIGHT, LOCALE, PLATFORM, DEBUG, EXEMPLAR, RULESSTDIN }; UOption options[]={ /*0*/ UOPTION_HELP_H, /*1*/ UOPTION_HELP_QUESTION_MARK, /*2*/ UOPTION_VERBOSE, /*3*/ UOPTION_QUIET, /*4*/ UOPTION_VERSION, /*5*/ UOPTION_ICUDATADIR, /*6*/ UOPTION_COPYRIGHT, /*7*/ UOPTION_DEF("locale", 'l', UOPT_REQUIRES_ARG), /*8*/ UOPTION_DEF("platform", 'p', UOPT_REQUIRES_ARG), /*9*/ UOPTION_DEF("debug", 'D', UOPT_NO_ARG), /*10*/ UOPTION_DEF("exemplar", 'E', UOPT_NO_ARG), /*11*/ UOPTION_DEF("rulesstdin", 'R', UOPT_NO_ARG) }; int Winstrcmp(const void *a, const void *b) { gCount++; int t; t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len); return t-2; } int ICUstrcmp(const void *a, const void *b) { gCount++; UCollationResult t; t = ucol_strcoll(gCol, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len); if (t == UCOL_LESS) return -1; if (t == UCOL_GREATER) return +1; return 0; } struct { const char* name; CompareFn comparer; } platforms[] = { { "icu", ICUstrcmp }, { "win", Winstrcmp} }; void deleteLineElement(void *line) { delete((Line *)line); } void stringToLower(char *string) { uint32_t i = 0; for(i = 0; i < strlen(string); i++) { string[i] = tolower(string[i]); } } void usage(const char *name) { u_fprintf(out, "Usage: %s --locale loc_name --platform platform\n", name); } void listKnownPlatforms() { int32_t i = 0; u_fprintf(err, "Known platforms:\n"); for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) { u_fprintf(err, "\t%s\n", platforms[i]); } } void addPlatform(const char *platform) { int32_t i; //stringToLower(platform); int32_t oldPlatformNo = gPlatformNo; for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) { if(strcmp(platform, platforms[i].name) == 0) { gPlatformIndexes[gPlatformNo++] = i; } } if(gPlatformNo == oldPlatformNo) { u_fprintf(err, "Unknown platform %s\n", platform); listKnownPlatforms(); } } void processArgs(int argc, char* argv[], UErrorCode &status) { int32_t i = 0; U_MAIN_INIT_ARGS(argc, argv); argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options); if(argc < 0) { u_fprintf(err, "Unknown option: %s\n", argv[-argc]); usage(progName); return; } if(options[0].doesOccur || options[1].doesOccur) { usage(progName); return; } if(options[VERBOSE].doesOccur) { gVerbose = TRUE; } if(options[DEBUG].doesOccur) { gDebug = TRUE; gVerbose = TRUE; } if(options[EXEMPLAR].doesOccur) { gExemplar = TRUE; } if(options[QUIET].doesOccur) { gQuiet = TRUE; } /* for(i = 8; i < 9; i++) { if(!options[i].doesOccur) { u_fprintf(err, "Option %s is required!\n", options[i].longName); usage(progName); status = U_ILLEGAL_ARGUMENT_ERROR; } if(options[i].value == NULL) { u_fprintf(err, "Option %s needs an argument!\n", options[i].longName); usage(progName); status = U_ILLEGAL_ARGUMENT_ERROR; } } */ // ASCII based options specified on the command line // this is for testing purposes, will allow to load // up ICU rules and then poke through them. // In that case, we test only ICU and don't need // a locale. if(options[RULESSTDIN].doesOccur) { gRulesStdin = TRUE; addPlatform("icu"); return; } if(options[LOCALE].doesOccur) { gLocale = options[LOCALE].value; } else { for(i = 1; i < argc; i++) { gLocales[gLocaleNo++] = argv[i]; } } if(options[PLATFORM].doesOccur) { //strcpy(platform, options[PLATFORM].value); //addPlatform("icu"); addPlatform(options[PLATFORM].value); } else { // there is a list of platforms u_fprintf(err, "Option %s is required!\n", options[i].longName); usage(progName); status = U_ILLEGAL_ARGUMENT_ERROR; } // // Set up a Windows LCID // gWinLCID = uloc_getLCID(gLocale); /* if (gLocale != 0) { gWinLCID = MAKELCID(gLocale, SORT_DEFAULT); } else { gWinLCID = uloc_getLCID(gLocale); } */ } void printRules(const UChar *name, int32_t len, UFILE *file) { // very rudimentary pretty rules print int32_t i = 0; UChar toPrint[16384]; int32_t toPrintIndex = 0; for(i = 0; i < len; i++) { if(name[i] == 0x0026) { if(toPrintIndex) { toPrint[toPrintIndex] = 0; u_fprintf(file, "%U\n", toPrint); toPrintIndex = 0; toPrint[toPrintIndex++] = name[i]; } else { toPrint[toPrintIndex++] = name[i]; } } else { toPrint[toPrintIndex++] = name[i]; } } if(toPrintIndex) { toPrint[toPrintIndex] = 0; u_fprintf(file, "%U\n", toPrint); toPrintIndex = 0; } } void escapeString(const UChar *name, int32_t len, UFILE *file) { u_fprintf(file, "%U", name); /* int32_t j = 0; for(j = 0; j < len; j++) { if(name[j] >= 0x20 && name[j] < 0x80) { u_fprintf(file, "%c", name[j]); } else { u_fprintf(file, "\\u%04X", name[j]); } } */ } void escapeALine(Line *line, UFILE *file) { escapeString(line->name, line->len, file); } void escapeExpansion(Line *line, UFILE *file) { escapeString(line->expansionString, line->expLen, file); } void showNames(Line *line, UFILE *file) { UErrorCode status = U_ZERO_ERROR; int32_t j = 0; char charName[256]; for(j = 0; j < line->len; j++) { u_charName(line->name[j], U_EXTENDED_CHAR_NAME, charName, 256, &status); u_fprintf(file, "%s ", charName); } } void setArray(Line **array, Line *contents, int32_t size) { int32_t i = 0; for(i = 0; i < size; i++) { array[i] = contents+i; } } // set an array from a Hashtable int32_t setArray(Line **array, Hashtable *table = &gElements) { int32_t size = table->count(); int32_t hashIndex = -1; const UHashElement *hashElement = NULL; int32_t count = 0; while((hashElement = table->nextElement(hashIndex)) != NULL) { array[count++] = (Line *)hashElement->value.pointer; } return size; } UBool trySwamped(Line **smaller, Line **greater, UChar chars[2], CompareFn comparer) { u_strcpy(gSource->name, (*smaller)->name); gSource->name[(*smaller)->len] = separatorChar; gSource->name[(*smaller)->len+1] = chars[0]; gSource->name[(*smaller)->len+2] = 0; gSource->len = (*smaller)->len+2; u_strcpy(gTarget->name, (*greater)->name); gTarget->name[(*greater)->len] = separatorChar; gTarget->name[(*greater)->len+1] = chars[1]; gTarget->name[(*greater)->len+2] = 0; gTarget->len = (*greater)->len+2; if(comparer(&gSource, &gTarget) > 0) { return TRUE; } else { return FALSE; } } UBool trySwamps(Line **smaller, Line **greater, UChar chars[2], CompareFn comparer) { gSource->name[0] = chars[0]; gSource->name[1] = separatorChar; u_strcpy(gSource->name+2, (*smaller)->name); gSource->len = (*smaller)->len+2; gTarget->name[0] = chars[1]; gTarget->name[1] = separatorChar; u_strcpy(gTarget->name+2, (*greater)->name); gTarget->len = (*greater)->len+2; if(comparer(&gSource, &gTarget) < 0) { return TRUE; } else { return FALSE; } } UColAttributeValue probeStrength(Line** prevLine, Line **currLine, CompareFn comparer) { // Primary swamps secondary // have pairs where [0] 2> [1] UChar primSwamps[][2] = { { 0x00E0, 0x0061 }, { 0x0450, 0x0435 }, { 0x31a3, 0x310d } }; // Secondary swamps tertiary // have pairs where [0] 3> [1] UChar secSwamps[][2] = { { 0x0053, 0x0073 }, { 0x0415, 0x0435 }, { 0x31b6, 0x310e } }; // Secondary is swamped by primary // have pairs where [0] 1> [1] UChar secSwamped[][2] = { { 0x0062, 0x0061 }, { 0x0436, 0x0454 }, { 0x310e, 0x310d } }; // Tertiary is swamped by secondary // have pairs where [0] 2> [1] UChar terSwamped[][2] = { { 0x00E0, 0x0061 }, { 0x0450, 0x0435 }, { 0x31a3, 0x310d } }; int32_t i = 0; // Tertiary swamps equal? int result = 0; // Choose the pair i = 0; /* if((*prevLine)->name[0] > 0xFF && (*currLine)->name[0] > 0xFF) { i = 0; } else if((*prevLine)->name[0] < 0x0400 && (*currLine)->name[0] < 0x0400) { i = 1; } else { i = 2; } */ // are they equal? if((result = comparer(prevLine, currLine)) == 0) { return UCOL_IDENTICAL; } else if(result > 0) { //fprintf(stderr, "lines should be ordered!"); return UCOL_OFF; } else if(trySwamps(prevLine, currLine, primSwamps[i], comparer)) { return UCOL_PRIMARY; } else if(trySwamps(prevLine, currLine, secSwamps[i], comparer)) { return UCOL_SECONDARY; } else if(trySwamped(prevLine, currLine, terSwamped[i], comparer)) { // is there a tertiary difference return UCOL_TERTIARY; } else { //fprintf(stderr, "Unknown strength!\n"); return UCOL_ON; } } // This function tries to probe the set of lines // (already sorted by qsort) and deduct the strengths void analyzeStrength(Line **lines, int32_t size, CompareFn comparer) { int32_t i = 0; for(i = 1; i < size; i++) { Line **prevLine = lines+i-1; Line **currLine = lines+i; (*currLine)->strength = probeStrength(prevLine, currLine, comparer); (*currLine)->sortedIndex = i; (*currLine)->previous = *prevLine; (*prevLine)->next = *currLine; } } void printStrength(UColAttributeValue strength, UFILE *file) { u_fprintf(file, " "); switch(strength) { case UCOL_IDENTICAL: u_fprintf(file, "="); break; case UCOL_TERTIARY: //u_fprintf(file, "<3"); u_fprintf(file, "<<<"); break; case UCOL_SECONDARY: //u_fprintf(file, "<2"); u_fprintf(file, "<<"); break; case UCOL_PRIMARY: //u_fprintf(file, "<1"); u_fprintf(file, "<"); break; case UCOL_OFF: u_fprintf(file, ">?"); default: u_fprintf(file, "?!"); break; } u_fprintf(file, " "); } void printStrength(Line *line, UFILE *file) { printStrength(line->strength, file); } void printLine(Line *line, UFILE *file) { escapeALine(line, file); if(line->isExpansion) { u_fprintf(file, "/"); escapeExpansion(line, file); } } void printOrdering(Line **lines, int32_t size, UFILE *file, UBool useLinks = FALSE) { int32_t i = 0; //printLine(*lines); //escapeALine(*lines); // Print first line Line *line = NULL; Line *previous = *lines; if(previous->isReset) { u_fprintf(file, "\n& "); escapeALine(previous, file); } else if(!previous->isRemoved) { printLine(previous, file); } i = 1; while(i < size && previous->next) { if(useLinks) { line = previous->next; } else { line = *(lines+i); } if(line->isReset) { u_fprintf(file, "\n& "); escapeALine(line, file); } else if(!line->isRemoved) { if(file == out) { u_fprintf(file, "\n"); } if(i > 0) { printStrength(line, file); } printLine(line, file); //escapeALine(line, file); } previous = line; i++; } u_fprintf(file, "\n"); } void setIndexes(Line **lines, int32_t size) { int32_t i = 0; (*lines)->sortedIndex = 0; for(i = 1; i < size; i++) { Line *line = *(lines+i); Line *prev = *(lines+i-1); line->previous = prev; prev->next = line; line->sortedIndex = i; } } // this seems to be a dead end void noteExpansion(Line **gLines, Line *line, int32_t size, CompareFn comparer) { UErrorCode status = U_ZERO_ERROR; UnicodeString key(line->name, line->len); //Line *toInsert = (Line *)gElements.get(key); Line *toInsert = (Line *)gExpansions.get(key); if(toInsert != NULL) { toInsert->isExpansion = TRUE; u_strcpy(toInsert->expansionString, line->expansionString); toInsert->expLen = line->expLen; toInsert->previous->next = toInsert->next; toInsert->next->previous = toInsert->previous; gElements.remove(key); } else { toInsert = new Line(*line); toInsert->isExpansion = TRUE; gElements.put(UnicodeString(toInsert->name, toInsert->len), toInsert, status); } int32_t i = 0; Line testLine; Line *l = &testLine; for(i = 0; i < size; i++) { u_strcpy(testLine.name, (*(gLines+i))->name); u_strcat(testLine.name, line->expansionString); testLine.len = (*(gLines+i))->len + line->expLen; if(comparer(&l, &line) > 0) { toInsert->previous = *(gLines+i-1); toInsert->next = *(gLines+i); toInsert->previous->next = toInsert; toInsert->next->previous = toInsert; break; } } if(gVerbose) { u_fprintf(log, "Adding expansion\n"); escapeALine(line, log); u_fprintf(log, "/"); escapeExpansion(line, log); u_fprintf(log, " "); } } void positionExpansions(Line **gLines, int32_t size, CompareFn comparer) { int result = 0; Line *line = NULL; Line *toMove = NULL; int32_t i = 0, j = 0; Line **sortedExpansions = new Line*[gExpansions.count()]; int32_t sortedExpansionsSize = setArray(sortedExpansions, &gExpansions); qsort(sortedExpansions, sortedExpansionsSize, sizeof(Line *), comparer); // Make a list of things in the vincinity of expansion candidate for(j = 0; j < sortedExpansionsSize; j++) { line = *(sortedExpansions+j); UnicodeString key(line->name, line->len); toMove = (Line *)gElements.get(key); int32_t i = 0; Line testLine, prevTestLine; Line *l = &testLine; Line *prevL = &prevTestLine; // This can be further optimized, since we now know that we have a // sorted list of expansions, so current can start from toMove, since all // the elements before it are already smaller. In the beginning it needs to // be on gLines, though. Line *current = *gLines; while(current) { if(current == toMove) { // we are wading through a sorted list // if we found ourselves, it means that we // are already in a right place, so no moving // is needed, but we need to make sure we have // the right strength. toMove->strength = probeStrength(&prevL, &toMove, comparer); if(0) { u_fprintf(log, "Positioned expansion without moving "); printLine(toMove, log); u_fprintf(log, " new ordering: \n"); printOrdering(gLines, size, log, TRUE); } break; } else { u_strcpy(testLine.name, current->name); if(!current->isExpansion) { u_strcat(testLine.name, line->expansionString); testLine.len = current->len + line->expLen; } else { testLine.len = current->len; } if(comparer(&l, &line) > 0) { // remove from chain if(toMove->next) { toMove->next->strength = probeStrength(&(toMove->previous), &(toMove->next), comparer); toMove->next->previous = toMove->previous; } if(toMove->previous) { toMove->previous->next = toMove->next; } // insert toMove->previous = current->previous; toMove->next = current; if(current->previous) { current->previous->next = toMove; } current->previous = toMove; toMove->strength = probeStrength(&prevL, &toMove, comparer); toMove->next->strength = probeStrength(&toMove, &l, comparer); if(0) { u_fprintf(log, "Positioned expansion "); printLine(toMove, log); u_fprintf(log, " new ordering: \n"); printOrdering(gLines, size, log, TRUE); } if(toMove->strength == UCOL_IDENTICAL) { // check for craziness such as s = ss/s // such line would consist of previous (or next) concatenated with the expansion value // make a test UChar fullString[256]; u_strcpy(fullString, toMove->previous->name); u_strcat(fullString, toMove->expansionString); if(u_strcmp(fullString, toMove->name) == 0) { toMove->previous->next = toMove->next; toMove->next->previous = toMove->previous; toMove->isRemoved = TRUE; u_fprintf(log, "Removed: "); printLine(toMove, log); u_fprintf(log, "\n"); } } else if(toMove->next->strength == UCOL_IDENTICAL) { UChar fullString[256]; u_strcpy(fullString, toMove->next->name); u_strcat(fullString, toMove->expansionString); if(u_strcmp(fullString, toMove->name) == 0) { toMove->next->strength = toMove->strength; toMove->previous->next = toMove->next; toMove->next->previous = toMove->previous; toMove->isRemoved = TRUE; u_fprintf(log, "Removed because of back: "); printLine(toMove, log); u_fprintf(log, "\n"); } } break; } prevTestLine = testLine; } current = current->next; } } delete[] sortedExpansions; } void noteExpansion(Line *line) { UErrorCode status = U_ZERO_ERROR; UnicodeString key(line->name, line->len); Line *el = (Line *)gElements.get(key); if(el != NULL) { el->isExpansion = TRUE; u_strcpy(el->expansionString, line->expansionString); el->expLen = line->expLen; } else { Line *toInsert = new Line(*line); toInsert->isExpansion = TRUE; gElements.put(UnicodeString(line->name, line->len), toInsert, status); } Line *el2 = (Line *)gExpansions.get(key); el2->isExpansion = TRUE; u_strcpy(el2->expansionString, line->expansionString); el2->expLen = line->expLen; if(gDebug) { u_fprintf(log, "Adding expansion\n"); printLine(line, log); u_fprintf(log, "\n"); } } void noteContraction(Line *line) { UErrorCode status = U_ZERO_ERROR; Line *toInsert = new Line(*line); toInsert->isContraction = TRUE; gElements.put(UnicodeString(line->name, line->len), toInsert, status); if(gVerbose) { u_fprintf(log, "Adding contraction\n"); escapeALine(line, log); u_fprintf(log, " "); } } void noteElement(Line *line) { UErrorCode status = U_ZERO_ERROR; Line *toInsert = new Line(*line); gElements.put(UnicodeString(line->name, line->len), toInsert, status); if(0) { //if(gDebug) escapeALine(line, log); u_fprintf(log, " "); } } // This function checks if a combination of characters has changed place with the // adjacent elements. If so, these are most probably contractions. // However, it still needs to be checked if these contractions are fake - the // test is simple - if xy is suspected contraction, if we get that x/y is expansion, then // xy is a fake contraction. int32_t analyzeContractions(Line** lines, int32_t size, CompareFn comparer) { int32_t i = 0, j = 0; int32_t outOfOrder = 0; UColAttributeValue strength = UCOL_OFF; UColAttributeValue currStrength = UCOL_OFF; Line **prevLine = lines; Line **currLine = NULL; Line **backupLine = NULL; UBool prevIsContraction = FALSE, currIsContraction = FALSE; // Problem here is detecting a contraction that is at the very end of the sorted list for(i = 1; i < size; i++) { currLine = lines+i; strength = probeStrength(prevLine, currLine, comparer); if(strength == UCOL_OFF || strength != (*currLine)->strength) { prevIsContraction = FALSE; currIsContraction = FALSE; if(!outOfOrder) { if(gVerbose) { u_fprintf(log, "Possible contractions: "); } } // now we have two elements that are different. The question is, // which one of them is the contraction - which one has moved. // Could be the previous, but could also be the current. outOfOrder++; // First, lets check whether the previous has jumped back j = i+1; // skip all the nexts that have smaller strength, they don't have an effect while(j < size && (*(lines+j))->strength > (*currLine)->strength) { j++; } // check if there are other elements of same or greater strength while(j < size && (strength = probeStrength(prevLine, (backupLine = lines+j), comparer)) == UCOL_OFF) { j++; // if we skipped more than one, it might be in fact a contraction prevIsContraction = TRUE; } if(prevIsContraction) { noteContraction(*prevLine); j = i-2; // add all the previous elements with smaller strength, since they also // will jump over and are contractions while(j >= 0 && (*(lines+j+1))->strength > (*currLine)->strength) { strength = probeStrength(lines+j, currLine, comparer); if(strength == UCOL_OFF) { noteContraction(*(lines+j)); } j--; } } // now we check if the current element is jumping forward, // the dance steps are analogous to above. j = i - 2; while(j >= 0 && (*(lines+j+1))->strength > (*currLine)->strength) { j--; } while(j >= 0 && (strength = probeStrength((backupLine = lines+j), currLine, comparer)) == UCOL_OFF) { j--; currIsContraction = TRUE; } if(currIsContraction) { if(gVerbose) { escapeALine(*currLine, log); u_fprintf(log, " "); } j = i+1; while(j < size && (*(lines+j))->strength > (*currLine)->strength) { strength = probeStrength(prevLine, lines+j, comparer); if(strength == UCOL_OFF) { noteContraction(*(lines+j)); } j++; } } // Not sure about either. List both and then check if(!(prevIsContraction || currIsContraction)) { noteContraction(*prevLine); noteContraction(*currLine); } } prevLine = currLine; } if(outOfOrder) { if(gVerbose) { u_fprintf(log, "\n"); } } return outOfOrder; } int32_t detectContractions(Line **gLines, Line *lines, int32_t size, CompareFn comparer) { int32_t i = 0, j = 0; int32_t noContractions = 0; // Create and compare doubles: Line *backupLines = new Line[size]; Line::copyArray(backupLines, lines, size); // detect contractions Line **gLinesBackup = NULL; //new Line*[size]; for(i = 0; i < size; i++) { // preserve index and previous Line::copyArray(lines, backupLines, size); for(j = 0; j < size; j++) { u_strcpy(lines[j].name, backupLines[i].name); u_strcat(lines[j].name, backupLines[j].name); lines[j].len = backupLines[i].len+backupLines[j].len; } if((noContractions += analyzeContractions(gLines, size, comparer)) && gDebug) { if(gLinesBackup == NULL) { gLinesBackup = new Line*[size]; } // Show the sorted doubles, for debugging setArray(gLinesBackup, lines, size); qsort(gLinesBackup, size, sizeof(Line *), comparer); //setIndexes(gLinesBackup, size); analyzeStrength(gLinesBackup, size, comparer); printOrdering(gLinesBackup, size, log); } if(!gQuiet) { u_fprintf(log, "."); } } if(!gQuiet) { u_fprintf(log, "\n"); } delete[] backupLines; if(gLinesBackup) { delete[] gLinesBackup; } return noContractions; } // gLines in this function is an array of sorted pointers. // Contractions are already included. int32_t detectExpansions(Line **gLines, int32_t size, CompareFn comparer) { UErrorCode status = U_ZERO_ERROR; // detect expansions UColAttributeValue startStrength = UCOL_OFF, endStrength = UCOL_OFF, strength = UCOL_OFF, previousStrength = UCOL_OFF; Line start, end, src; Line *startP = &start, *endP = &end, *srcP = &src; Line *current = NULL; memset(startP, 0, sizeof(Line)); memset(endP, 0, sizeof(Line)); memset(srcP, 0, sizeof(Line)); int32_t srcLen; int32_t i = 0, j = 0, k = 0; for(i = 0; i < size; i++) { u_strcpy(start.name, (*(gLines+i))->name); u_strcpy(end.name, (*(gLines+i))->name); srcLen = (*(gLines+i))->len; u_strcpy(start.name+srcLen, (*(gLines))->name); start.len = srcLen + (*(gLines))->len; u_strcpy(end.name+srcLen, (*(gLines+size-1))->name); end.len = srcLen + (*(gLines+size-1))->len; for(k = 0; k < size; k++) { // k is index of a thing that is not doubled current = *(gLines+k); // see if we have moved to front // has it moved to the very beginning if((startStrength = probeStrength((gLines+k), &startP, comparer)) != UCOL_OFF) { continue; // this one is in the front } // has it moved to the very end? if((endStrength = probeStrength(&endP, (gLines+k), comparer)) != UCOL_OFF) { continue; // this one is in the back } // Potential Expansion if(gDebug) { //gVerbose u_fprintf(log, "Possible expansion: "); escapeALine(*(gLines+k), log); u_fprintf(log, " "); } // Now we have to make sure that this is really an expansion // First, we have to find it u_strcpy(src.name, (*(gLines+i))->name); for(j = 0; j < size; j++) { u_strcpy(src.name+srcLen, (*(gLines+j))->name); src.len = srcLen + (*(gLines+j))->len; if((strength = probeStrength(&srcP, (gLines+k), comparer)) == UCOL_OFF) { strength = probeStrength((gLines+k), &srcP, comparer); // we found it *(gLines+j-1) is the element that is interesting // since gLines+j-1 < gLines+k < gLines+j if(gDebug) { //gVerbose u_fprintf(log, "i = %i, k = %i, j = %i ", i, k, j); escapeALine(*(gLines+i), log); escapeALine(*(gLines+j-1), log); printStrength(previousStrength, log); escapeALine(current, log); printStrength(strength, log); escapeALine(*(gLines+i), log); escapeALine(*(gLines+j), log); u_fprintf(log, "\n"); } // check whether it is a contraction that is the same as an expansion // or a multi character that doesn't do anything current->addExpansionHit(i, j); current->isExpansion = TRUE; current->expIndex = k; // cache expansion gExpansions.put(UnicodeString(current->name, current->len), current, status); //new Line(*current) break; } previousStrength = strength; } } if(!gQuiet) { u_fprintf(log, "."); } } if(!gQuiet) { u_fprintf(log, "\n"); } // now we have identified possible expansions. We need to find out how do they expand. // Let's iterate over expansions cache - it's easier. const UHashElement *el = NULL; int32_t hashIndex = -1; Line *doubles = new Line[size*10]; Line **sorter = new Line*[size*10]; int32_t currSize = 0; int32_t newSize = 0; Line *prev = NULL; Line *next = NULL; Line *origin = NULL; int result = 0; // Make a list of things in the vincinity of expansion candidate // in expansionPrefixes and expansionAfter we have stored the // prefixes of stuff that caused the detection of an expansion // and a position where the expansion was. // For example (icu, de__PHONEBOOK), we had: // aE <<< \u00E4 < af // AD < \u00E4 <<< Ae // From that we will construct the following sequence: // AD < aE <<< \u00E4/ <<< Ae < af // then we will take the vincinity of \u00E4: // aE <<< \u00E4/ <<< Ae // then we will choose the smallest expansion to be the expansion // part: 'e'. // if there is equality, we choose the equal part: // (win32, de__PHONEBOOK): // AD < \u00E4/ = ae <<< aE <<< Ae // we choose 'e'. while((el = gExpansions.nextElement(hashIndex)) != NULL) { newSize = 0; current = (Line *)el->value.pointer; currSize = size*current->expansionPrefixesSize; if(gDebug) { escapeALine(current, log); u_fprintf(log, " Number: %i\n", current->expansionPrefixesSize); } // construct the doubles for(i = 0; i < current->expansionPrefixesSize; i++) { doubles[newSize].suffix = current->expansionAfter[i]-1; doubles[newSize++].setToConcat(*(gLines+current->expansionPrefixes[i]), *(gLines+current->expansionAfter[i]-1)); doubles[newSize].suffix = current->expansionAfter[i]; doubles[newSize++].setToConcat(*(gLines+current->expansionPrefixes[i]), *(gLines+current->expansionAfter[i])); } // add the expansion we're observing doubles[newSize++] = *current; setArray(sorter, doubles, newSize); qsort(sorter, newSize, sizeof(Line*), comparer); analyzeStrength(sorter, newSize, comparer); if(gDebug) { printOrdering(sorter, newSize, log); } i = 0; while(**(sorter+i) != *current) { i++; } // find the two additions if((*(sorter+i))->strength == UCOL_IDENTICAL) { // if we ae id origin = *(gLines+((*(sorter+i-1))->suffix)); u_strcpy(current->expansionString, origin->name); current->expLen = origin->len; } else if(i < newSize-1 && (*(sorter+i+1))->strength == UCOL_IDENTICAL) { origin = *(gLines+((*(sorter+i+1))->suffix)); u_strcpy(current->expansionString, origin->name); current->expLen = origin->len; } else { if(i > 0) { prev = *(gLines+(*(sorter+i-1))->suffix); if(i < newSize-1) { next = *(gLines+(*(sorter+i+1))->suffix); result = comparer(&prev, &next); if(result <= 0) { u_strcpy(current->expansionString, prev->name); current->expLen = prev->len; } else { u_strcpy(current->expansionString, next->name); current->expLen = next->len; } } } if(0) { //if(gDebug) u_fprintf(log, "Expansion is: "); escapeALine(current, log); u_fprintf(log, "/"); escapeExpansion(current, log); u_fprintf(log, "\n"); } } noteExpansion(current); //noteExpansion(gLines, current, size, comparer); if(!gQuiet) { u_fprintf(log, "."); } } if(!gQuiet) { u_fprintf(log, "\n"); } delete[] doubles; delete[] sorter; return gExpansions.count(); } UBool isTailored(Line *line, UErrorCode &status) { UBool result = FALSE; UCollationElements *tailoring = ucol_openElements(gCol, line->name, line->len, &status); UCollationElements *uca = ucol_openElements(gUCA, line->name, line->len, &status); int32_t tailElement = UCOL_NULLORDER; int32_t ucaElement = UCOL_NULLORDER; do { do { tailElement = ucol_next(tailoring, &status); } while(tailElement == 0); do { ucaElement = ucol_next(uca, &status); } while(ucaElement == 0); if(tailElement != ucaElement) { result = TRUE; break; } } while (tailElement != UCOL_NULLORDER && ucaElement != UCOL_NULLORDER); ucol_closeElements(tailoring); ucol_closeElements(uca); return result; } void reduceUntailored(Line **gLines, int32_t size){ UErrorCode status = U_ZERO_ERROR; Line *current = *(gLines); Line *previous = NULL; while(current) { // if the current line is not tailored according to the UCA if(!isTailored(current, status)) { // we remove it current->isRemoved = TRUE; } else { // if it's tailored if(current->previous && current->previous->isRemoved == TRUE) { previous = current->previous; while(previous && (previous->strength > current->strength || previous->isExpansion || previous->isContraction) && previous->isRemoved) { if(previous->previous && previous->previous->isRemoved) { previous = previous->previous; } else { break; } } if(previous) { previous->isReset = TRUE; } else { (*(gLines))->isReset = TRUE; } } } current = current->next; } } void constructAndAnalyze(Line **gLines, Line *lines, int32_t size, CompareFn comparer) { int32_t i = 0, j = 0, k = 0; // setup our compare arrays to point to single set. // For contractions we need a block of data setArray(gLines, lines, size); //size = setArray(gLines); qsort(gLines, size, sizeof(Line *), comparer); // Establish who is previous according to the sort order //setIndexes(gLines, size); analyzeStrength(gLines, size, comparer); if(gVerbose) { u_fprintf(log, "Ordering:\n"); printOrdering(gLines, size, log); } //showDifferences(exemplarSetSize); //dumpData(exemplarSetSize); if(!gQuiet) { u_fprintf(log, "Detecting contractions?\n"); } int32_t noContractions = 0; noContractions = detectContractions(gLines, lines, size, comparer); if(!gQuiet) { u_fprintf(log, "Detected %i contractions\n", noContractions); } // now we have suspected contractions in the table // we have to re-sort the things size = setArray(gLines); qsort(gLines, size, sizeof(Line *), comparer); analyzeStrength(gLines, size, comparer); if(!gQuiet) { u_fprintf(log, "Detecting expansions\n"); } int32_t noExpansions = detectExpansions(gLines, size, comparer); if(!gQuiet) { u_fprintf(log, "Detected %i expansions\n", noExpansions); } positionExpansions(gLines, size, comparer); if(gVerbose) { u_fprintf(log, "After positioning expansions:\n"); printOrdering(gLines, size, log, TRUE); } //reduceUntailored(gLines, size); if(!gQuiet) { u_fprintf(out, "Final result\n"); } printOrdering(gLines, size, out, TRUE); printOrdering(gLines, size, log, TRUE); } // Check whether upper case comes before lower case or vice-versa int32_t checkCaseOrdering(void) { UChar stuff[][3] = { { 0x0061, separatorChar, 0x0061}, //"aa", { 0x0061, separatorChar, 0x0041 }, //"a\\u00E0", { 0x0041, separatorChar, 0x0061 }, //"\\u00E0a", { 0x0041, separatorChar, 0x0041 }, //"\\u00E0a", //{ 0x00E0, separatorChar, 0x00E0 } //"\\u00E0\\u00E0" }; const int32_t size = sizeof(stuff)/sizeof(stuff[0]); Line **sortedLines = new Line*[size]; Line lines[size]; int32_t i = 0; int32_t ordered = 0, reversed = 0; for(i = 0; i < size; i++) { lines[i].setName(stuff[i], 3); } setArray(sortedLines, lines, size); qsort(sortedLines, size, sizeof(Line*), gComparer); for(i = 0; i < size; i++) { if(*(sortedLines+i) == &lines[i]) { ordered++; } if(*(sortedLines+i) == &lines[size-i-1]) { reversed++; } } delete[] sortedLines; if(ordered == size) { return 0; // in normal order } else if(reversed == size) { return 1; // in reversed order } else { return -1; // unknown order } } // Check whether the secondaries are in the straight or reversed order int32_t checkSecondaryOrdering(void) { UChar stuff[][5] = { { 0x0061, separatorChar, 0x0061, separatorChar, 0x00E0 }, //"aa", { 0x0061, separatorChar, 0x00E0, separatorChar, 0x0061 }, //"a\\u00E0", { 0x00E0, separatorChar, 0x0061, separatorChar, 0x0061 }, //"\\u00E0a", //{ 0x00E0, separatorChar, 0x00E0 } //"\\u00E0\\u00E0" }; const int32_t size = sizeof(stuff)/sizeof(stuff[0]); Line **sortedLines = new Line*[size]; Line lines[size]; int32_t i = 0; int32_t ordered = 0, reversed = 0; for(i = 0; i < size; i++) { lines[i].setName(stuff[i], 5); } setArray(sortedLines, lines, size); qsort(sortedLines, size, sizeof(Line*), gComparer); for(i = 0; i < size; i++) { if(*(sortedLines+i) == &lines[i]) { ordered++; } if(*(sortedLines+i) == &lines[size-i-1]) { reversed++; } } delete[] sortedLines; if(ordered == size) { return 0; // in normal order } else if(reversed == size) { return 1; // in reversed order } else { return -1; // unknown order } } // We have to remove ignorable characters from the exemplar set, // otherwise, we get messed up results void removeIgnorableChars(UnicodeSet &exemplarUSet, CompareFn comparer, UErrorCode &status) { UnicodeSet ignorables, primaryIgnorables; UnicodeSetIterator exemplarUSetIter(exemplarUSet); exemplarUSetIter.reset(); Line empty; Line *emptyP = ∅ Line current; Line *currLine = ¤t; UColAttributeValue strength = UCOL_OFF; while(exemplarUSetIter.next()) { if(exemplarUSetIter.isString()) { // process a string u_memcpy(currLine->name, exemplarUSetIter.getString().getBuffer(), exemplarUSetIter.getString().length()); currLine->len = exemplarUSetIter.getString().length(); strength = probeStrength(&emptyP, &currLine, comparer); if(strength == UCOL_IDENTICAL) { ignorables.add(exemplarUSetIter.getString()); } else if(strength > UCOL_PRIMARY) { primaryIgnorables.add(exemplarUSetIter.getString()); } } else { // process code point UBool isError = FALSE; UChar32 codePoint = exemplarUSetIter.getCodepoint(); currLine->len = 0; U16_APPEND(currLine->name, currLine->len, 25, codePoint, isError); strength = probeStrength(&emptyP, &currLine, comparer); if(strength == UCOL_IDENTICAL) { ignorables.add(codePoint); } else if(strength > UCOL_PRIMARY) { primaryIgnorables.add(codePoint); } } } exemplarUSet.removeAll(ignorables); exemplarUSet.removeAll(primaryIgnorables); UnicodeString removedPattern; if(ignorables.size()) { u_fprintf(log, "Ignorables:\n"); ignorables.toPattern(removedPattern, TRUE); removedPattern.setCharAt(removedPattern.length(), 0); escapeString(removedPattern.getBuffer(), removedPattern.length(), log); u_fprintf(log, "\n"); } if(primaryIgnorables.size()) { u_fprintf(log, "Primary ignorables:\n"); primaryIgnorables.toPattern(removedPattern, TRUE); removedPattern.setCharAt(removedPattern.length(), 0); escapeString(removedPattern.getBuffer(), removedPattern.length(), log); u_fprintf(log, "\n"); } } // TODO: develop logic for choosing boundary characters - right now it is hardcoded // It should be a function of used scripts. Also, check whether we need to save // used script names void addUtilityChars(UnicodeSet &exemplarUSet, UErrorCode &status) { // in order to get nice rules, we need to add some characters to the // starting set. These are mostly parts of compatibility composed characters, // such as L-middle dot (middle dot is 0x00B7). If we don't add these, we would // get a reset at a funky character, such as L-middle dot. This list will probably // grow. exemplarUSet.add(0x00B7); // these things represent a script before the target script and // a script after. More logic should be added so that these characters are // chosen automatically exemplarUSet.add(0x0038); exemplarUSet.add(0x0039); //exemplarUSet.add(0x0433); //exemplarUSet.add(0x0436); exemplarUSet.add(0xfa29); exemplarUSet.add(0xfa28); } void getExemplars(const char *locale, UnicodeSet &exemplars, UErrorCode &status) { // first we fill out structures with exemplar characters. UResourceBundle *res = ures_open(NULL, locale, &status); int32_t exemplarLength = 0; UnicodeString exemplarString = ures_getUnicodeStringByKey(res, "ExemplarCharacters", &status); exemplars.clear(); exemplars.applyPattern(exemplarString, status); ures_close(res); } void prepareStartingSet(UnicodeSet &exemplarUSet, CompareFn comparer, UErrorCode &status) { int32_t i = 0; UnicodeString exemplarString; exemplarUSet.toPattern(exemplarString); // Produce case closure of exemplar characters // Then we want to figure out what is the script of the exemplar characters // just pick several and see their script const char* usedScriptNames[USCRIPT_CODE_LIMIT]; int32_t numberOfUsedScripts = 0; char scriptSetPattern[256]; UnicodeString pattern; // for debugging UChar32 exChar = -1; while(exemplarUSet.size() != 0 && (exChar = exemplarUSet.charAt(0)) != -1) { int32_t scriptNo = u_getIntPropertyValue(exChar, UCHAR_SCRIPT); usedScriptNames[numberOfUsedScripts] = u_getPropertyValueName(UCHAR_SCRIPT, scriptNo, U_SHORT_PROPERTY_NAME); sprintf(scriptSetPattern, "[:%s:]", usedScriptNames[numberOfUsedScripts]); numberOfUsedScripts++; UnicodeSet scriptSet(UnicodeString(scriptSetPattern, ""), status); exemplarUSet.removeAll(scriptSet); exemplarUSet.toPattern(pattern, TRUE); } exemplarUSet.clear(); // always add ASCII //exemplarUSet.addAll(UnicodeSet(UnicodeString("[\\u0020-\\u007f]", ""), status)); exemplarUSet.addAll(UnicodeSet(UnicodeString("[\\u0041-\\u005b]", ""), status)); if(gExemplar) { exemplarUSet.applyPattern(exemplarString, status); exemplarUSet.closeOver(USET_CASE); if(!gQuiet) { u_fprintf(out, "ICU exemplar characters:\n"); escapeString(exemplarString.getBuffer(), exemplarString.length(), out); u_fprintf(out, "\n"); } } else { if(!gQuiet) { u_fprintf(out, "Using scripts:\n"); } // add interesting scripts for(i = 0; i < numberOfUsedScripts; i++) { sprintf(scriptSetPattern, "[:%s:]", usedScriptNames[i]); exemplarUSet.addAll(UnicodeSet(UnicodeString(scriptSetPattern, ""), status)); if(!gQuiet) { u_fprintf(out, "%s\n", scriptSetPattern); } } } removeIgnorableChars(exemplarUSet, comparer, status); addUtilityChars(exemplarUSet, status); /* // try to check whether tailored set and exemplar characters match. USet *tailored = ucol_getTailoredSet(gCol, &status); UBool tailoredContained = exemplarUSet.containsAll(*((UnicodeSet *)tailored)); if(!tailoredContained) { ((UnicodeSet *)tailored)->removeAll(exemplarUSet); UnicodeString pattern; ((UnicodeSet *)tailored)->toPattern(pattern, TRUE); } uset_close(tailored); */ //return exemplarUSet; } void setOutputFile(const char *name, UErrorCode &status) { int32_t i = 0; char filename[256]; strcpy(filename, name); for(i = 0; i < gPlatformNo; i++) { strcat(filename, "_"); strcat(filename, platforms[gPlatformIndexes[i]].name); } if(gExemplar) { strcat(filename, "_exemplar"); } else { strcat(filename, "_script"); } strcat(filename, ".utf16.txt"); out = u_fopen(filename, "wb", "en", "utf-16"); } void processCollator(UCollator *col, UErrorCode &status) { int32_t i = 0; gCol = col; UChar ruleString[16384]; int32_t ruleStringLength = ucol_getRulesEx(gCol, UCOL_TAILORING_ONLY, ruleString, 16384); if(!gQuiet) { u_fprintf(out, "ICU rules:\n"); printRules(ruleString, ruleStringLength, out); printRules(ruleString, ruleStringLength, log); //escapeString(ruleString, ruleStringLength, out); u_fprintf(out, "\n"); } const char *locale = ucol_getLocale(gCol, ULOC_REQUESTED_LOCALE, &status); UnicodeSet exemplarUSet; if(locale) { getExemplars(locale, exemplarUSet, status); } else { exemplarUSet = *((UnicodeSet *)ucol_getTailoredSet(gCol, &status)); } for(i = 0; i < gPlatformNo; i++) { u_fprintf(out, "\nGenerating order for platform: %s\n", platforms[gPlatformIndexes[i]].name); gComparer = platforms[gPlatformIndexes[i]].comparer; prepareStartingSet(exemplarUSet, gComparer, status); int32_t itemLen = 0; // get the number of all the items from the set (both codepoints and strings) int32_t exemplarSetSize = exemplarUSet.size(); UnicodeSetIterator exemplarUSetIter(exemplarUSet); // allocate ICU lines gICULines = new Line*[exemplarSetSize*5]; int32_t j = 0; int32_t linesCount = 0; Line *lines = new Line[exemplarSetSize]; int32_t reversedSecondary = checkSecondaryOrdering(); if(reversedSecondary == 0) { u_fprintf(out, "Secondaries do not seem to be reversed\n"); } else if(reversedSecondary == 1) { u_fprintf(out, "Secondaries are reversed\n"); if(gComparer == ICUstrcmp) { ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); } } else { u_fprintf(out, "Cannot conclude if secondaries are reversed\n"); } int32_t reversedCase = checkCaseOrdering(); if(reversedCase == 0) { u_fprintf(out, "Case does not seem to be reversed\n"); } else if(reversedCase == 1) { u_fprintf(out, "Case is reversed\n"); if(gComparer == ICUstrcmp) { ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_OFF, &status); } } else { u_fprintf(out, "Cannot conclude if case is reversed\n"); } exemplarUSetIter.reset(); gElements.removeAll(); gExpansions.removeAll(); linesCount = 0; while(exemplarUSetIter.next()) { Line *currLine = lines+linesCount; if(exemplarUSetIter.isString()) { // process a string u_memcpy(currLine->name, exemplarUSetIter.getString().getBuffer(), exemplarUSetIter.getString().length()); currLine->len = exemplarUSetIter.getString().length(); } else { // process code point UBool isError = FALSE; currLine->len = 0; U16_APPEND(currLine->name, currLine->len, 25, exemplarUSetIter.getCodepoint(), isError); } currLine->name[currLine->len] = 0; // zero terminate, for our evil ways currLine->index = linesCount; linesCount++; noteElement(currLine); } constructAndAnalyze(gICULines, lines, exemplarSetSize, gComparer); delete[] lines; } // cleanup globals delete[] gICULines; u_fflush(out); u_fclose(out); ucol_close(gCol); } void processLocale(const char *locale, UErrorCode &status) { gWinLCID = uloc_getLCID(locale); UCollator *col = ucol_open(locale, &status); setOutputFile(locale, status); u_fprintf(out, "Locale %s (LCID:%06X)\n", locale, gWinLCID); processCollator(col, status); } UBool hasCollationElements(const char *locName) { UErrorCode status = U_ZERO_ERROR; UResourceBundle *ColEl = NULL; UResourceBundle *loc = ures_open(NULL, locName, &status);; if(U_SUCCESS(status)) { status = U_ZERO_ERROR; ColEl = ures_getByKey(loc, "CollationElements", ColEl, &status); if(status == U_ZERO_ERROR) { /* do the test - there are real elements */ ures_close(ColEl); ures_close(loc); return TRUE; } ures_close(ColEl); ures_close(loc); } return FALSE; } int main(int argc, char* argv[]) { UErrorCode status = U_ZERO_ERROR; err = u_finit(stderr, "en", "latin-1"); log = u_finit(stdout, "en", "latin-1"); /* USet *wsp = uprv_openRuleWhiteSpaceSet(&status); uset_add(wsp, 0x0041); uset_remove(wsp, 0x0041); UnicodeString pat; ((UnicodeSet *)wsp)->toPattern(pat, TRUE); pat.setCharAt(pat.length(), 0); escapeString(pat.getBuffer(), pat.length(), log); u_fflush(log); */ UTransliterator *anyHex = utrans_open("[^\\u000a\\u0020-\\u007f] Any-Hex/Java", UTRANS_FORWARD, NULL, 0, NULL, &status); u_fsettransliterator(log, U_WRITE, anyHex, &status); processArgs(argc, argv, status); int32_t i = 0; gElements.setValueDeleter(deleteLineElement); if(U_FAILURE(status) || gPlatformNo == 0) { return -1; } gUCA = ucol_open("root", &status); if(gRulesStdin) { char buffer[1024]; UChar ruleBuffer[16384]; UChar *rules = ruleBuffer; int32_t maxRuleLen = 16384; int32_t rLen = 0; while(gets(buffer)) { if(buffer[0] != '/' && buffer[1] != '/') { rLen = u_unescape(buffer, rules, maxRuleLen); rules += rLen; maxRuleLen -= rLen; } } UParseError parseError; //escapeString(ruleBuffer, rules-ruleBuffer, log);// u_fprintf(log, "%U\n", ruleBuffer); UCollator *col = ucol_openRules(ruleBuffer, rules-ruleBuffer, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); if(U_SUCCESS(status)) { setOutputFile("stdinRules", status); processCollator(col, status); } else { u_fprintf(err, "Error %s\n", u_errorName(status)); } } else { if(gLocale) { processLocale(gLocale, status); } else if(gLocaleNo) { for(i = 0; i < gLocaleNo; i++) { processLocale(gLocales[i], status); } } else { // do the loop through all the locales int32_t noOfLoc = uloc_countAvailable(); const char *locName = NULL; for(i = 0; i