Skip to content

Commit

Permalink
more fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
fhennig committed Jan 17, 2025
1 parent d87c684 commit a899740
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,20 @@ describe('ColumnMapping', () => {
});

it('should create a mapping from columns with sensible column mapping', () => {
const sourceColumns = ['state', 'geoLocAdmin2'];
const sourceColumns = ['state', 'geoLocAdmin', 'geoLocAdmin2'];
const inputFields = [
{ name: 'date' },
{ name: 'geoLocAdmin1', displayName: 'Collection subdivision level 1' },
{ name: 'geoLocAdmin2', displayName: 'Collection subdivision level 2' },
{ name: 'geoLocAdmin1', displayName: 'Collection subdivision level 1' },
];

const mapping = ColumnMapping.fromColumns(sourceColumns, inputFields);
const entries = mapping.entries();

expect(entries).toEqual([
['state', null],
['geoLocAdmin2', 'geoLocAdmin2'],
['state', null],
['geoLocAdmin', 'geoLocAdmin1'],
]);
});

Expand Down Expand Up @@ -71,16 +72,16 @@ describe('ColumnMapping', () => {
const mapping = ColumnMapping.fromColumns(sourceColumns, inputFields);
let entries = mapping.entries();
expect(entries).toEqual([
['loc', null],
['date', 'date'],
['loc', null],
]);

const updatedMapping = mapping.updateWith('loc', 'date');

entries = updatedMapping.entries();
expect(entries).toEqual([
['loc', 'date'],
['date', null],
['loc', 'date'],
]);
});

Expand All @@ -97,6 +98,6 @@ describe('ColumnMapping', () => {
const remappedFile = await updatedMapping.applyTo(new RawFile(tsvFile));
const remappedContent = await remappedFile.text();

expect(remappedContent).toBe('location\tdate\n' + '"U\nS\nA"\t2023-01-01\n' + 'Canada\t2023-01-02\n');
expect(remappedContent).toBe('date\tlocation\n' + '2023-01-01\t"U\nS\nA"\n' + '2023-01-02\tCanada\n');
});
});
20 changes: 17 additions & 3 deletions website/src/components/Submission/FileUpload/ColumnMapping.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export class ColumnMapping {
private constructor(private readonly map: ReadonlyMap<string, string | null>) {}

private static getBestMatchingTargetColumn(sourceColumn: string, inputFields: InputField[]): string | null {
if (inputFields.length === 0) return null;
const [bestMatch, score] = inputFields
.map((field): [string, number] => {
const score = Math.max(
Expand All @@ -25,24 +26,37 @@ export class ColumnMapping {
public static fromColumns(sourceColumns: string[], inputFields: InputField[]) {
const mapping = new Map();
let availableFields = inputFields;
let remainingSourceColumns = sourceColumns;
// assign exact matches first
sourceColumns.forEach((sourceColumn) => {
const foundField = availableFields.find(
(inputField) => inputField.name === sourceColumn || inputField.displayName === sourceColumn,
);
if (foundField) {
mapping.set(sourceColumn, foundField.name);
availableFields = availableFields.filter((f) => f.name !== sourceColumn);
remainingSourceColumns = remainingSourceColumns.filter((f) => f !== sourceColumn);
}
});
// do best effort matching second
remainingSourceColumns.forEach((sourceColumn) => {
const bestMatch = this.getBestMatchingTargetColumn(sourceColumn, availableFields);
mapping.set(sourceColumn, bestMatch);
availableFields = availableFields.filter((field) => field.name !== bestMatch);
});
return new ColumnMapping(mapping);
}

/* Update the mapping with new source and target columns, trying to keep as much of the
mapping intact as possible. */
/* Update the mapping with new source and target columns, keeping previously mapped values. */
public update(newSourceColumns: string[], newInputFields: InputField[]): ColumnMapping {
// keep entries that existed before
const newMapping = new Map(
newSourceColumns.map((newSourceCol) => {
const prevTargetCol = this.map.get(newSourceCol);
if (prevTargetCol && newInputFields.map((f) => f.name).includes(prevTargetCol)) {
return [newSourceCol, prevTargetCol];
} else {
return [newSourceCol, ColumnMapping.getBestMatchingTargetColumn(newSourceCol, newInputFields)];
return [newSourceCol, null];
}
}),
);
Expand Down

0 comments on commit a899740

Please sign in to comment.