Skip to content

Commit

Permalink
fix: hashfunction in full hash join
Browse files Browse the repository at this point in the history
  • Loading branch information
maartyman committed Jan 23, 2025
1 parent 7a7a90f commit 10c88f0
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 82 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import type { MediatorHashBindings } from '@comunica/bus-hash-bindings';
import type {
IActionRdfJoin,
IActorRdfJoinArgs,
Expand All @@ -13,16 +12,15 @@ import { passTestWithSideData } from '@comunica/core';
import type { IMediatorTypeJoinCoefficients } from '@comunica/mediatortype-join-coefficients';
import type { BindingsStream } from '@comunica/types';
import type { Bindings } from '@comunica/utils-bindings-factory';
import { bindingsToCompactString } from '@comunica/utils-bindings-factory';
import type { AsyncIterator } from 'asynciterator';
import { FullHashJoin } from './FullHashJoin';

/**
* A comunica Inner Full Hash RDF Join Actor.
*/
export class ActorRdfJoinInnerFullHash extends ActorRdfJoin {
public readonly mediatorHashBindings: MediatorHashBindings;

public constructor(args: IActorRdfJoinInnerFullHashArgs) {
public constructor(args: IActorRdfJoinArgs) {
super(args, {
logicalType: 'inner',
physicalName: 'full-hash',
Expand All @@ -34,14 +32,13 @@ export class ActorRdfJoinInnerFullHash extends ActorRdfJoin {
protected async getOutput(action: IActionRdfJoin): Promise<IActorRdfJoinOutputInner> {
const metadatas = await ActorRdfJoin.getMetadatas(action.entries);
const commonVariables = ActorRdfJoin.overlappingVariables(metadatas).map(v => v.variable);
const { hashFunction } = await this.mediatorHashBindings.mediate({ context: action.context });
const bindingsStream = <BindingsStream><any> new FullHashJoin(
<AsyncIterator<Bindings>><unknown>action.entries[0].output.bindingsStream,
<AsyncIterator<Bindings>><unknown>action.entries[1].output.bindingsStream,
<(...bindings: Bindings[]) => Bindings | null>ActorRdfJoin.joinBindings,
entry => hashFunction(entry, commonVariables),
entry => hashFunction(entry, metadatas[0].variables.map(v => v.variable)),
entry => hashFunction(entry, metadatas[1].variables.map(v => v.variable)),
entry => bindingsToCompactString(entry, commonVariables),
entry => bindingsToCompactString(entry, metadatas[0].variables.map(v => v.variable)),
entry => bindingsToCompactString(entry, metadatas[1].variables.map(v => v.variable)),
);
return {
result: {
Expand All @@ -68,6 +65,3 @@ export class ActorRdfJoinInnerFullHash extends ActorRdfJoin {
}, sideData);
}
}
export interface IActorRdfJoinInnerFullHashArgs extends IActorRdfJoinArgs {
mediatorHashBindings: MediatorHashBindings;
}
15 changes: 7 additions & 8 deletions packages/actor-rdf-join-inner-full-hash/lib/DualKeyHashMap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,29 @@ export interface IMapObject<T> {
}

export class DualKeyHashMap<O extends { equals: (item: O) => boolean }> {
private readonly map = new Map<number, Map<number, IMapObject<O>>>();
private readonly map = new Map<string, Map<string, IMapObject<O>>>();

public set(mainKey: number, secondaryKey: number, value: O): void {
public set(mainKey: string, secondaryKey: string, value: O): void {
const mainMap = this.map.get(secondaryKey);
if (mainMap) {
const mapObject = mainMap.get(mainKey);
if (mapObject) {
if (value.equals(mapObject.value)) {
mapObject.count++;
} else {
// TODO [2025-01-01]: this can happen when using clashing hash functions
throw new Error(`Current value: ${JSON.stringify(mapObject.value)} and given value: ${JSON.stringify(value)} are different. With hash functions mainKey: ${mainKey}, secondary key: ${secondaryKey}!`);
}
} else {
mainMap.set(mainKey, { value, count: 1 });
}
} else {
const newMap = new Map<number, IMapObject<O>>();
const newMap = new Map<string, IMapObject<O>>();
newMap.set(mainKey, { value, count: 1 });
this.map.set(secondaryKey, newMap);
}
}

public delete(mainKey: number, secondaryKey: number): boolean {
public delete(mainKey: string, secondaryKey: string): boolean {
const mainMap = this.map.get(secondaryKey);
if (mainMap) {
const mapObject = mainMap.get(mainKey);
Expand All @@ -47,19 +46,19 @@ export class DualKeyHashMap<O extends { equals: (item: O) => boolean }> {
return false;
}

public get(mainKey: number, secondaryKey: number): IMapObject<O> | undefined {
public get(mainKey: string, secondaryKey: string): IMapObject<O> | undefined {
return this.map.get(secondaryKey)?.get(mainKey);
}

public getAll(secondaryKey: number): IterableIterator<IMapObject<O>> {
public getAll(secondaryKey: string): IterableIterator<IMapObject<O>> {
const mainMap = this.map.get(secondaryKey);
if (mainMap) {
return mainMap.values();
}
return [][Symbol.iterator]();
}

public has(secondaryKey: number): boolean {
public has(secondaryKey: string): boolean {
return this.map.has(secondaryKey);
}

Expand Down
16 changes: 8 additions & 8 deletions packages/actor-rdf-join-inner-full-hash/lib/FullHashJoin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ export class FullHashJoin extends InnerJoin {
private otherArray: IterableIterator<IMapObject<Bindings>> = [][Symbol.iterator]();
private otherElement: IMapObject<Bindings> | null = null;
private count = 0;
private readonly joinHash: (entry: Bindings) => number;
private readonly leftHash: (entry: Bindings) => number;
private readonly rightHash: (entry: Bindings) => number;
private readonly joinHash: (entry: Bindings) => string;
private readonly leftHash: (entry: Bindings) => string;
private readonly rightHash: (entry: Bindings) => string;

public constructor(
left: AsyncIterator<Bindings>,
right: AsyncIterator<Bindings>,
funJoin: (...bindings: Bindings[]) => Bindings | null,
joinHash: (entry: Bindings) => number,
leftHash: (entry: Bindings) => number,
rightHash: (entry: Bindings) => number,
joinHash: (entry: Bindings) => string,
leftHash: (entry: Bindings) => string,
rightHash: (entry: Bindings) => string,
) {
super(left, right, funJoin);
this.joinHash = joinHash;
Expand All @@ -44,9 +44,9 @@ export class FullHashJoin extends InnerJoin {

private addOrDeleteFromMemory(
item: Bindings,
joinHash: number,
joinHash: string,
memory: DualKeyHashMap<Bindings>,
hash: number,
hash: string,
): boolean {
const isAddition = item.getContextEntry(KeysBindings.isAddition) ?? true;
if (isAddition) {
Expand Down
1 change: 0 additions & 1 deletion packages/actor-rdf-join-inner-full-hash/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
"build:components": "componentsjs-generator"
},
"dependencies": {
"@comunica/bus-hash-bindings": "^4.0.2",
"@comunica/bus-rdf-join": "^4.0.1",
"@comunica/core": "^4.0.2",
"@comunica/mediatortype-join-coefficients": "^4.0.1",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import type { MediatorHashBindings } from '@comunica/bus-hash-bindings';
import type { IActionRdfJoin } from '@comunica/bus-rdf-join';
import { ActorRdfJoin } from '@comunica/bus-rdf-join';
import type { IActionRdfJoinSelectivity, IActorRdfJoinSelectivityOutput } from '@comunica/bus-rdf-join-selectivity';
Expand All @@ -11,7 +10,6 @@ import { KeysBindings } from '@incremunica/context-entries';
import {
createTestContextWithDataFactory,
createTestBindingsFactory,
createTestMediatorHashBindings,
} from '@incremunica/dev-tools';
import type * as RDF from '@rdfjs/types';
import { arrayifyStream } from 'arrayify-stream';
Expand Down Expand Up @@ -64,18 +62,15 @@ IActorRdfJoinSelectivityOutput
let action: IActionRdfJoin;
let variables0: { variable: RDF.Variable; canBeUndef: boolean }[];
let variables1: { variable: RDF.Variable; canBeUndef: boolean }[];
let mediatorHashBindings: MediatorHashBindings;

beforeEach(() => {
mediatorJoinSelectivity = <any> {
mediate: async() => ({ selectivity: 1 }),
};
mediatorHashBindings = createTestMediatorHashBindings();
actor = new ActorRdfJoinInnerFullHash({
name: 'actor',
bus,
mediatorJoinSelectivity,
mediatorHashBindings,
});
variables0 = [];
variables1 = [];
Expand Down
98 changes: 49 additions & 49 deletions packages/actor-rdf-join-inner-full-hash/test/DualKeyHashMap-test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,77 +18,77 @@ describe('DualKeyHashMap', () => {

it('sets and gets a value', () => {
const value = new TestObject(10);
map.set(0, 0, value);
expect(map.get(0, 0)).toEqual({ value, count: 1 });
map.set('0', '0', value);
expect(map.get('0', '0')).toEqual({ value, count: 1 });
});

it('increments count when setting the same value', () => {
const value = new TestObject(10);
map.set(0, 0, value);
map.set(0, 0, value);
expect(map.get(0, 0)).toEqual({ value, count: 2 });
map.set('0', '0', value);
map.set('0', '0', value);
expect(map.get('0', '0')).toEqual({ value, count: 2 });
});

it('throws error when setting different value with same key', () => {
const value1 = new TestObject(10);
const value2 = new TestObject(20);
expect(() => {
map.set(0, 0, value1);
map.set(0, 0, value2);
map.set('0', '0', value1);
map.set('0', '0', value2);
}).toThrow(`Current value: ${JSON.stringify(value1)} and given value: ${JSON.stringify(value2)} are different. With hash functions mainKey: 0, secondary key: 0!`);
});

it('returns undefined when getting non-existing value', () => {
expect(map.get(0, 0)).toBeUndefined();
expect(map.get('0', '0')).toBeUndefined();
});

it('deletes value when count is greater than 1', () => {
const value = new TestObject(10);
map.set(0, 0, value);
map.set(0, 0, value);
expect(map.delete(0, 0)).toBe(true);
expect(map.get(0, 0)).toEqual({ value, count: 1 });
map.set('0', '0', value);
map.set('0', '0', value);
expect(map.delete('0', '0')).toBe(true);
expect(map.get('0', '0')).toEqual({ value, count: 1 });
});

it('deletes value when count is equal to 1', () => {
const value = new TestObject(10);
map.set(0, 0, value);
expect(map.delete(0, 0)).toBe(true);
expect(map.get(0, 0)).toBeUndefined();
map.set('0', '0', value);
expect(map.delete('0', '0')).toBe(true);
expect(map.get('0', '0')).toBeUndefined();
});

it('returns false when deleting non-existing value', () => {
expect(map.delete(0, 0)).toBe(false);
expect(map.delete('0', '0')).toBe(false);
});

it('returns an iterator with all values of a secondary key', () => {
const value1 = new TestObject(10);
const value2 = new TestObject(20);
map.set(0, 0, value1);
map.set(1, 0, value2);
expect([ ...<any>map.getAll(0) ]).toEqual([{ value: value1, count: 1 }, { value: value2, count: 1 }]);
map.set('0', '0', value1);
map.set('1', '0', value2);
expect([ ...<any>map.getAll('0') ]).toEqual([{ value: value1, count: 1 }, { value: value2, count: 1 }]);
});

it('returns true when the secondary key exists', () => {
map.set(0, 0, new TestObject(10));
expect(map.has(0)).toBe(true);
map.set('0', '0', new TestObject(10));
expect(map.has('0')).toBe(true);
});

it('returns false when the secondary key does not exist', () => {
expect(map.has(0)).toBe(false);
expect(map.has('0')).toBe(false);
});

it('returns an empty iterator;', () => {
expect([ ...<any>map.getAll(0) ]).toEqual([]);
expect([ ...<any>map.getAll('0') ]).toEqual([]);
});

it('should clear', () => {
const value1 = new TestObject(10);
const value2 = new TestObject(20);
map.set(0, 0, value1);
map.set(1, 0, value2);
map.set('0', '0', value1);
map.set('1', '0', value2);
map.clear();
expect([ ...<any>map.getAll(0) ]).toEqual([]);
expect([ ...<any>map.getAll('0') ]).toEqual([]);
});

describe('DualKeyHashMap with equality checks', () => {
Expand All @@ -110,17 +110,17 @@ describe('DualKeyHashMap', () => {
equals: (other: { num: number; str: string }) => other.num === 2 && other.str === 'bar',
};

map.set(0, 0, item1);
expect(map.get(0, 0)).toEqual({ value: item1, count: 1 });
expect(map.get(0, 1)).toBeUndefined();
map.set('0', '0', item1);
expect(map.get('0', '0')).toEqual({ value: item1, count: 1 });
expect(map.get('0', '1')).toBeUndefined();

map.set(0, 0, item1);
expect(map.get(0, 0)).toEqual({ value: item1, count: 2 });
map.set('0', '0', item1);
expect(map.get('0', '0')).toEqual({ value: item1, count: 2 });

map.set(0, 1, item2);
expect(map.get(0, 1)).toEqual({ value: item2, count: 1 });
map.set('0', '1', item2);
expect(map.get('0', '1')).toEqual({ value: item2, count: 1 });

expect([ ...<any>map.getAll(0) ]).toEqual([{ value: item1, count: 2 }]);
expect([ ...<any>map.getAll('0') ]).toEqual([{ value: item1, count: 2 }]);
});

it('deletes values with equality checks', () => {
Expand All @@ -135,24 +135,24 @@ describe('DualKeyHashMap', () => {
equals: (other: { num: number; str: string }) => other.num === 2 && other.str === 'bar',
};

map.set(0, 0, item1);
map.set(0, 1, item2);
map.set(1, 0, item1);
map.set(1, 1, item2);
map.set('0', '0', item1);
map.set('0', '1', item2);
map.set('1', '0', item1);
map.set('1', '1', item2);

expect(map.delete(0, 0)).toBe(true);
expect(map.has(0)).toBe(true);
expect(map.get(0, 0)).toBeUndefined();
expect(map.get(1, 0)).toEqual({ value: item1, count: 1 });
expect(map.delete('0', '0')).toBe(true);
expect(map.has('0')).toBe(true);
expect(map.get('0', '0')).toBeUndefined();
expect(map.get('1', '0')).toEqual({ value: item1, count: 1 });

expect(map.delete(0, 0)).toBe(false);
expect(map.get(0, 1)).toEqual({ value: item2, count: 1 });
expect(map.get(1, 1)).toEqual({ value: item2, count: 1 });
expect(map.delete('0', '0')).toBe(false);
expect(map.get('0', '1')).toEqual({ value: item2, count: 1 });
expect(map.get('1', '1')).toEqual({ value: item2, count: 1 });

expect(map.delete(0, 1)).toBe(true);
expect(map.has(1)).toBe(true);
expect(map.get(0, 1)).toBeUndefined();
expect([ ...<any>map.getAll(1) ]).toEqual([{ value: item2, count: 1 }]);
expect(map.delete('0', '1')).toBe(true);
expect(map.has('1')).toBe(true);
expect(map.get('0', '1')).toBeUndefined();
expect([ ...<any>map.getAll('1') ]).toEqual([{ value: item2, count: 1 }]);
});
});
});

0 comments on commit 10c88f0

Please sign in to comment.