Skip to content

Commit

Permalink
CLDR-14865 improve suggestion for fixed date pattern in case of bad f…
Browse files Browse the repository at this point in the history
…ield (#3749)
  • Loading branch information
pedberg-icu authored May 24, 2024
1 parent 3cc9e07 commit df5f77f
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 3 deletions.
141 changes: 138 additions & 3 deletions tools/cldr-code/src/main/java/org/unicode/cldr/test/CheckDates.java
Original file line number Diff line number Diff line change
Expand Up @@ -859,13 +859,47 @@ private static long getDateTimeinMillis(
static long date4004BC = getDateTimeinMillis(-4004, 9, 23, 2, 0, 0);
static Random random = new Random(0);

// We extend VariableField to implement a proper equals() method so we can use
// List methods remove() and get().
private class MyVariableField extends DateTimePatternGenerator.VariableField {
public MyVariableField(String string) {
super(string);
}

@Override
public boolean equals(Object object) {
if (!(object instanceof DateTimePatternGenerator.VariableField)) {
return false;
}
return (this.toString().equals(object.toString()));
}

@Override
public int hashCode() {
return this.toString().hashCode();
}
}

// In a List, replace DateTimePatternGenerator.VariableField items with MyVariableField
private List<Object> updateVariableFieldInList(List<Object> items) {
for (int itemIndex = 0; itemIndex < items.size(); itemIndex++) {
Object object = items.get(itemIndex);
if (object instanceof DateTimePatternGenerator.VariableField) {
items.set(itemIndex, new MyVariableField(object.toString()));
}
}
return items;
}

private void checkPattern(
DateTimePatternType dateTypePatternType,
String path,
String fullPath,
String value,
List<CheckStatus> result)
throws ParseException {
// Map to skeleton including mapping to canonical pattern chars e.g. LLL -> MMM
// (ICU internal, for CLDR?)
String skeleton = dateTimePatternGenerator.getSkeletonAllowingDuplicates(value);
String skeletonCanonical =
dateTimePatternGenerator.getCanonicalSkeletonAllowingDuplicates(value);
Expand Down Expand Up @@ -905,6 +939,8 @@ private void checkPattern(

if (dateTypePatternType == DateTimePatternType.AVAILABLE
|| dateTypePatternType == DateTimePatternType.INTERVAL) {
// Map to skeleton including mapping to canonical pattern chars e.g. LLL -> MMM
// (ICU internal, for CLDR?)
String idCanonical =
dateTimePatternGenerator.getCanonicalSkeletonAllowingDuplicates(id);
if (skeleton.isEmpty()) {
Expand All @@ -922,9 +958,108 @@ private void checkPattern(
+ ".",
id,
value));
} else if (!dateTimePatternGenerator.skeletonsAreSimilar(
} else if (!dateTimePatternGenerator.skeletonsAreSimilar( // ICU internal for CLDR
idCanonical, skeletonCanonical)) {
// Adjust pattern to match skeleton, but only width and subtype within
// canonical categories e.g. MMM -> LLLL, H -> HH. Will not change across
// canonical categories e.g. m -> M
String fixedValue = dateTimePatternGenerator.replaceFieldTypes(value, id);
// check to see if that was enough; if not, may need to do more work.
String fixedValueCanonical =
dateTimePatternGenerator.getCanonicalSkeletonAllowingDuplicates(fixedValue);
String valueFromId = null;
if (!dateTimePatternGenerator.skeletonsAreSimilar(
idCanonical, fixedValueCanonical)) {
// Need to do more work. Try two things to get a reasonable suggestion:
// - Getting the winning pattern (valueFromId) from availableFormats for id,
// if it is not the same as the bad value we already have.
// - Replace a pattern field in fixedValue twhose type does not match the
// corresponding field from id.
//
// Here is the first thing, getting the winning pattern (valueFromId) from
// availableFormats for id.
String availableFormatPath =
"//ldml/dates/calendars/calendar[@type=\""
+ calendar
+ "\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\""
+ id
+ "\"]";
valueFromId =
getCldrFileToCheck().getWinningValueWithBailey(availableFormatPath);
if (valueFromId != null
&& (valueFromId.equals(value) || valueFromId.equals(fixedValue))) {
valueFromId = null; // not useful in this case
}
//
// Here is the second thing, replacing a pattern field that does not match.
// We compare FormatParser Lists for idCanonical and fixedValueCanonical
// and if a mismatch we update the FormatParser list for fixedValue and
// generate an updated string from the FormatParser.
DateTimePatternGenerator.FormatParser idCanonFormat =
new DateTimePatternGenerator.FormatParser();
idCanonFormat.set(idCanonical);
List<Object> idCanonItems = updateVariableFieldInList(idCanonFormat.getItems());
DateTimePatternGenerator.FormatParser fixedValueCanonFormat =
new DateTimePatternGenerator.FormatParser();
fixedValueCanonFormat.set(fixedValueCanonical);
List<Object> fixedValueCanonItems =
updateVariableFieldInList(fixedValueCanonFormat.getItems());
DateTimePatternGenerator.FormatParser fixedValueFormat =
new DateTimePatternGenerator.FormatParser();
fixedValueFormat.set(fixedValue);
List<Object> fixedValueItems =
updateVariableFieldInList(fixedValueFormat.getItems());
// For idCanonFormat and fixedValueCanonFormat we started with skeletons (no
// literal text), so the items we are comparing will all be MyVariableField. We
// iterate over idCanonItems stripping matching items from fixedValueCanonItems
// until we hopefully have one remaining item in each that do not match each
// other. Then in fixedValueItems we replace the mismatched item with the one
// from idCanonItems.
int itemIndex = idCanonItems.size();
while (--itemIndex >= 0) {
Object idCanonItem = idCanonItems.get(itemIndex);
if (fixedValueCanonItems.remove(idCanonItem)) {
// we have a match, removed it from fixedValueCanonItems, now remove
// it from idCanonItems (ok since we are iterating backwards).
idCanonItems.remove(itemIndex);
}
}
// Hopefully this leaves us with one item in each list, the mismatch to fix.
if (idCanonItems.size() == 1 && fixedValueCanonItems.size() == 1) {
// In fixedValueItems, replace all occurrences of the single item in
// fixedValueCanonItems (bad value) with the item in idCanonItems.
// There might be more than one for e.g. intervalFormats.
Object fixedValueCanonItem = fixedValueCanonItems.get(0); // replace this
Object idCanonItem = idCanonItems.get(0); // with this
boolean didUpdate = false;
while ((itemIndex = fixedValueItems.indexOf(fixedValueCanonItem)) >= 0) {
fixedValueItems.set(itemIndex, idCanonItem);
didUpdate = true;
}
if (didUpdate) {
// Now get the updated fixedValue with this replacement
fixedValue = fixedValueFormat.toString();
fixedValueCanonical =
dateTimePatternGenerator.getCanonicalSkeletonAllowingDuplicates(
fixedValue);
}
}
// If this replacement attempt did not work, we give up on fixedValue
if (!dateTimePatternGenerator.skeletonsAreSimilar(
idCanonical, fixedValueCanonical)) {
fixedValue = null;
}
}
// Now report problem and suggested fix
String suggestion = "(no suggestion)";
if (fixedValue != null) {
suggestion = "(" + fixedValue + ")";
if (valueFromId != null && !valueFromId.equals(fixedValue)) {
suggestion += " or (" + valueFromId + ")";
}
} else if (valueFromId != null) {
suggestion = "(" + valueFromId + ")";
}
result.add(
new CheckStatus()
.setCause(this)
Expand All @@ -934,13 +1069,13 @@ private void checkPattern(
// ({2}). " +
.setMessage(
"Your pattern ({2}) doesn't correspond to what is asked for. Yours would be right for an ID ({1}) but not for the ID ({0}). "
+ "Please change your pattern to match what was asked, such as ({3}), with the right punctuation and/or ordering for your language. See "
+ "Please change your pattern to match what was asked, such as {3}, with the right punctuation and/or ordering for your language. See "
+ CLDRURLS.DATE_TIME_PATTERNS_URL
+ ".",
id,
skeletonCanonical,
value,
fixedValue));
suggestion));
}
if (dateTypePatternType == DateTimePatternType.AVAILABLE) {
// index y+w+ must correpond to pattern containing only Y+ and w+
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,46 @@ public void TestCheckDates() {
testFile.remove(path1);
testFile.remove(path2);
}
// Test for CLDR-14865
testFile = new CLDRFile(new SimpleXMLSource("fi"));
testFactory.addFile(testFile);
String availableFormatTestPath =
"//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"GyMd\"]";
String availableFormatValue = "d.m.y G"; // erroneous, should have M not m
testFile.add(availableFormatTestPath, availableFormatValue);
CheckCLDR c = new CheckDates(testFactory);
c.setCldrFileToCheck(testFile, options, result);
result.clear();
c.check(
availableFormatTestPath,
availableFormatTestPath,
availableFormatValue,
options,
result);
Subtype actualSubtype = Subtype.none;
String message = null;
for (CheckStatus status : result) {
actualSubtype = status.getSubtype();
message = status.getMessage();
break;
}
if (actualSubtype != Subtype.incorrectDatePattern
|| message == null
|| !message.contains("d.M.y G")) {
String errorMessage =
"fi generic availableFormat for id=GyMd with value "
+ availableFormatValue
+ ":";
if (actualSubtype != Subtype.incorrectDatePattern) {
errorMessage +=
" expected Subtype.incorrectDatePattern, got " + actualSubtype + " ;";
}
if (message == null || !message.contains("d.M.y G")) {
errorMessage += " expected message should contain suggested d.M.y G, got message: ";
errorMessage += (message == null) ? "(null)" : message;
}
errln(errorMessage);
}
}

/** Should be some CLDR locales, plus a locale specially allowed in limited submission */
Expand Down

0 comments on commit df5f77f

Please sign in to comment.