From 10c88f0b1ab987369813c1569a4b7ce24ed3a287 Mon Sep 17 00:00:00 2001 From: maartenvandenbrande Date: Thu, 23 Jan 2025 15:21:41 +0100 Subject: [PATCH] fix: hashfunction in full hash join --- .../lib/ActorRdfJoinInnerFullHash.ts | 16 +-- .../lib/DualKeyHashMap.ts | 15 ++- .../lib/FullHashJoin.ts | 16 +-- .../package.json | 1 - .../test/ActorRdfJoinInnerFullHash-test.ts | 5 - .../test/DualKeyHashMap-test.ts | 98 +++++++++---------- 6 files changed, 69 insertions(+), 82 deletions(-) diff --git a/packages/actor-rdf-join-inner-full-hash/lib/ActorRdfJoinInnerFullHash.ts b/packages/actor-rdf-join-inner-full-hash/lib/ActorRdfJoinInnerFullHash.ts index edb485c4..2faadb2d 100644 --- a/packages/actor-rdf-join-inner-full-hash/lib/ActorRdfJoinInnerFullHash.ts +++ b/packages/actor-rdf-join-inner-full-hash/lib/ActorRdfJoinInnerFullHash.ts @@ -1,4 +1,3 @@ -import type { MediatorHashBindings } from '@comunica/bus-hash-bindings'; import type { IActionRdfJoin, IActorRdfJoinArgs, @@ -13,6 +12,7 @@ import { passTestWithSideData } from '@comunica/core'; import type { IMediatorTypeJoinCoefficients } from '@comunica/mediatortype-join-coefficients'; import type { BindingsStream } from '@comunica/types'; import type { Bindings } from '@comunica/utils-bindings-factory'; +import { bindingsToCompactString } from '@comunica/utils-bindings-factory'; import type { AsyncIterator } from 'asynciterator'; import { FullHashJoin } from './FullHashJoin'; @@ -20,9 +20,7 @@ import { FullHashJoin } from './FullHashJoin'; * A comunica Inner Full Hash RDF Join Actor. */ export class ActorRdfJoinInnerFullHash extends ActorRdfJoin { - public readonly mediatorHashBindings: MediatorHashBindings; - - public constructor(args: IActorRdfJoinInnerFullHashArgs) { + public constructor(args: IActorRdfJoinArgs) { super(args, { logicalType: 'inner', physicalName: 'full-hash', @@ -34,14 +32,13 @@ export class ActorRdfJoinInnerFullHash extends ActorRdfJoin { protected async getOutput(action: IActionRdfJoin): Promise { const metadatas = await ActorRdfJoin.getMetadatas(action.entries); const commonVariables = ActorRdfJoin.overlappingVariables(metadatas).map(v => v.variable); - const { hashFunction } = await this.mediatorHashBindings.mediate({ context: action.context }); const bindingsStream = new FullHashJoin( >action.entries[0].output.bindingsStream, >action.entries[1].output.bindingsStream, <(...bindings: Bindings[]) => Bindings | null>ActorRdfJoin.joinBindings, - entry => hashFunction(entry, commonVariables), - entry => hashFunction(entry, metadatas[0].variables.map(v => v.variable)), - entry => hashFunction(entry, metadatas[1].variables.map(v => v.variable)), + entry => bindingsToCompactString(entry, commonVariables), + entry => bindingsToCompactString(entry, metadatas[0].variables.map(v => v.variable)), + entry => bindingsToCompactString(entry, metadatas[1].variables.map(v => v.variable)), ); return { result: { @@ -68,6 +65,3 @@ export class ActorRdfJoinInnerFullHash extends ActorRdfJoin { }, sideData); } } -export interface IActorRdfJoinInnerFullHashArgs extends IActorRdfJoinArgs { - mediatorHashBindings: MediatorHashBindings; -} diff --git a/packages/actor-rdf-join-inner-full-hash/lib/DualKeyHashMap.ts b/packages/actor-rdf-join-inner-full-hash/lib/DualKeyHashMap.ts index af61e8bb..43b647c0 100644 --- a/packages/actor-rdf-join-inner-full-hash/lib/DualKeyHashMap.ts +++ b/packages/actor-rdf-join-inner-full-hash/lib/DualKeyHashMap.ts @@ -4,9 +4,9 @@ export interface IMapObject { } export class DualKeyHashMap boolean }> { - private readonly map = new Map>>(); + private readonly map = new Map>>(); - public set(mainKey: number, secondaryKey: number, value: O): void { + public set(mainKey: string, secondaryKey: string, value: O): void { const mainMap = this.map.get(secondaryKey); if (mainMap) { const mapObject = mainMap.get(mainKey); @@ -14,20 +14,19 @@ export class DualKeyHashMap boolean }> { if (value.equals(mapObject.value)) { mapObject.count++; } else { - // TODO [2025-01-01]: this can happen when using clashing hash functions throw new Error(`Current value: ${JSON.stringify(mapObject.value)} and given value: ${JSON.stringify(value)} are different. With hash functions mainKey: ${mainKey}, secondary key: ${secondaryKey}!`); } } else { mainMap.set(mainKey, { value, count: 1 }); } } else { - const newMap = new Map>(); + const newMap = new Map>(); newMap.set(mainKey, { value, count: 1 }); this.map.set(secondaryKey, newMap); } } - public delete(mainKey: number, secondaryKey: number): boolean { + public delete(mainKey: string, secondaryKey: string): boolean { const mainMap = this.map.get(secondaryKey); if (mainMap) { const mapObject = mainMap.get(mainKey); @@ -47,11 +46,11 @@ export class DualKeyHashMap boolean }> { return false; } - public get(mainKey: number, secondaryKey: number): IMapObject | undefined { + public get(mainKey: string, secondaryKey: string): IMapObject | undefined { return this.map.get(secondaryKey)?.get(mainKey); } - public getAll(secondaryKey: number): IterableIterator> { + public getAll(secondaryKey: string): IterableIterator> { const mainMap = this.map.get(secondaryKey); if (mainMap) { return mainMap.values(); @@ -59,7 +58,7 @@ export class DualKeyHashMap boolean }> { return [][Symbol.iterator](); } - public has(secondaryKey: number): boolean { + public has(secondaryKey: string): boolean { return this.map.has(secondaryKey); } diff --git a/packages/actor-rdf-join-inner-full-hash/lib/FullHashJoin.ts b/packages/actor-rdf-join-inner-full-hash/lib/FullHashJoin.ts index e769bccf..43ab3428 100644 --- a/packages/actor-rdf-join-inner-full-hash/lib/FullHashJoin.ts +++ b/packages/actor-rdf-join-inner-full-hash/lib/FullHashJoin.ts @@ -12,17 +12,17 @@ export class FullHashJoin extends InnerJoin { private otherArray: IterableIterator> = [][Symbol.iterator](); private otherElement: IMapObject | null = null; private count = 0; - private readonly joinHash: (entry: Bindings) => number; - private readonly leftHash: (entry: Bindings) => number; - private readonly rightHash: (entry: Bindings) => number; + private readonly joinHash: (entry: Bindings) => string; + private readonly leftHash: (entry: Bindings) => string; + private readonly rightHash: (entry: Bindings) => string; public constructor( left: AsyncIterator, right: AsyncIterator, funJoin: (...bindings: Bindings[]) => Bindings | null, - joinHash: (entry: Bindings) => number, - leftHash: (entry: Bindings) => number, - rightHash: (entry: Bindings) => number, + joinHash: (entry: Bindings) => string, + leftHash: (entry: Bindings) => string, + rightHash: (entry: Bindings) => string, ) { super(left, right, funJoin); this.joinHash = joinHash; @@ -44,9 +44,9 @@ export class FullHashJoin extends InnerJoin { private addOrDeleteFromMemory( item: Bindings, - joinHash: number, + joinHash: string, memory: DualKeyHashMap, - hash: number, + hash: string, ): boolean { const isAddition = item.getContextEntry(KeysBindings.isAddition) ?? true; if (isAddition) { diff --git a/packages/actor-rdf-join-inner-full-hash/package.json b/packages/actor-rdf-join-inner-full-hash/package.json index a16b0a43..aa443559 100644 --- a/packages/actor-rdf-join-inner-full-hash/package.json +++ b/packages/actor-rdf-join-inner-full-hash/package.json @@ -37,7 +37,6 @@ "build:components": "componentsjs-generator" }, "dependencies": { - "@comunica/bus-hash-bindings": "^4.0.2", "@comunica/bus-rdf-join": "^4.0.1", "@comunica/core": "^4.0.2", "@comunica/mediatortype-join-coefficients": "^4.0.1", diff --git a/packages/actor-rdf-join-inner-full-hash/test/ActorRdfJoinInnerFullHash-test.ts b/packages/actor-rdf-join-inner-full-hash/test/ActorRdfJoinInnerFullHash-test.ts index 31f27d2a..2bd3128e 100644 --- a/packages/actor-rdf-join-inner-full-hash/test/ActorRdfJoinInnerFullHash-test.ts +++ b/packages/actor-rdf-join-inner-full-hash/test/ActorRdfJoinInnerFullHash-test.ts @@ -1,4 +1,3 @@ -import type { MediatorHashBindings } from '@comunica/bus-hash-bindings'; import type { IActionRdfJoin } from '@comunica/bus-rdf-join'; import { ActorRdfJoin } from '@comunica/bus-rdf-join'; import type { IActionRdfJoinSelectivity, IActorRdfJoinSelectivityOutput } from '@comunica/bus-rdf-join-selectivity'; @@ -11,7 +10,6 @@ import { KeysBindings } from '@incremunica/context-entries'; import { createTestContextWithDataFactory, createTestBindingsFactory, - createTestMediatorHashBindings, } from '@incremunica/dev-tools'; import type * as RDF from '@rdfjs/types'; import { arrayifyStream } from 'arrayify-stream'; @@ -64,18 +62,15 @@ IActorRdfJoinSelectivityOutput let action: IActionRdfJoin; let variables0: { variable: RDF.Variable; canBeUndef: boolean }[]; let variables1: { variable: RDF.Variable; canBeUndef: boolean }[]; - let mediatorHashBindings: MediatorHashBindings; beforeEach(() => { mediatorJoinSelectivity = { mediate: async() => ({ selectivity: 1 }), }; - mediatorHashBindings = createTestMediatorHashBindings(); actor = new ActorRdfJoinInnerFullHash({ name: 'actor', bus, mediatorJoinSelectivity, - mediatorHashBindings, }); variables0 = []; variables1 = []; diff --git a/packages/actor-rdf-join-inner-full-hash/test/DualKeyHashMap-test.ts b/packages/actor-rdf-join-inner-full-hash/test/DualKeyHashMap-test.ts index 84bb8214..85715581 100644 --- a/packages/actor-rdf-join-inner-full-hash/test/DualKeyHashMap-test.ts +++ b/packages/actor-rdf-join-inner-full-hash/test/DualKeyHashMap-test.ts @@ -18,77 +18,77 @@ describe('DualKeyHashMap', () => { it('sets and gets a value', () => { const value = new TestObject(10); - map.set(0, 0, value); - expect(map.get(0, 0)).toEqual({ value, count: 1 }); + map.set('0', '0', value); + expect(map.get('0', '0')).toEqual({ value, count: 1 }); }); it('increments count when setting the same value', () => { const value = new TestObject(10); - map.set(0, 0, value); - map.set(0, 0, value); - expect(map.get(0, 0)).toEqual({ value, count: 2 }); + map.set('0', '0', value); + map.set('0', '0', value); + expect(map.get('0', '0')).toEqual({ value, count: 2 }); }); it('throws error when setting different value with same key', () => { const value1 = new TestObject(10); const value2 = new TestObject(20); expect(() => { - map.set(0, 0, value1); - map.set(0, 0, value2); + map.set('0', '0', value1); + map.set('0', '0', value2); }).toThrow(`Current value: ${JSON.stringify(value1)} and given value: ${JSON.stringify(value2)} are different. With hash functions mainKey: 0, secondary key: 0!`); }); it('returns undefined when getting non-existing value', () => { - expect(map.get(0, 0)).toBeUndefined(); + expect(map.get('0', '0')).toBeUndefined(); }); it('deletes value when count is greater than 1', () => { const value = new TestObject(10); - map.set(0, 0, value); - map.set(0, 0, value); - expect(map.delete(0, 0)).toBe(true); - expect(map.get(0, 0)).toEqual({ value, count: 1 }); + map.set('0', '0', value); + map.set('0', '0', value); + expect(map.delete('0', '0')).toBe(true); + expect(map.get('0', '0')).toEqual({ value, count: 1 }); }); it('deletes value when count is equal to 1', () => { const value = new TestObject(10); - map.set(0, 0, value); - expect(map.delete(0, 0)).toBe(true); - expect(map.get(0, 0)).toBeUndefined(); + map.set('0', '0', value); + expect(map.delete('0', '0')).toBe(true); + expect(map.get('0', '0')).toBeUndefined(); }); it('returns false when deleting non-existing value', () => { - expect(map.delete(0, 0)).toBe(false); + expect(map.delete('0', '0')).toBe(false); }); it('returns an iterator with all values of a secondary key', () => { const value1 = new TestObject(10); const value2 = new TestObject(20); - map.set(0, 0, value1); - map.set(1, 0, value2); - expect([ ...map.getAll(0) ]).toEqual([{ value: value1, count: 1 }, { value: value2, count: 1 }]); + map.set('0', '0', value1); + map.set('1', '0', value2); + expect([ ...map.getAll('0') ]).toEqual([{ value: value1, count: 1 }, { value: value2, count: 1 }]); }); it('returns true when the secondary key exists', () => { - map.set(0, 0, new TestObject(10)); - expect(map.has(0)).toBe(true); + map.set('0', '0', new TestObject(10)); + expect(map.has('0')).toBe(true); }); it('returns false when the secondary key does not exist', () => { - expect(map.has(0)).toBe(false); + expect(map.has('0')).toBe(false); }); it('returns an empty iterator;', () => { - expect([ ...map.getAll(0) ]).toEqual([]); + expect([ ...map.getAll('0') ]).toEqual([]); }); it('should clear', () => { const value1 = new TestObject(10); const value2 = new TestObject(20); - map.set(0, 0, value1); - map.set(1, 0, value2); + map.set('0', '0', value1); + map.set('1', '0', value2); map.clear(); - expect([ ...map.getAll(0) ]).toEqual([]); + expect([ ...map.getAll('0') ]).toEqual([]); }); describe('DualKeyHashMap with equality checks', () => { @@ -110,17 +110,17 @@ describe('DualKeyHashMap', () => { equals: (other: { num: number; str: string }) => other.num === 2 && other.str === 'bar', }; - map.set(0, 0, item1); - expect(map.get(0, 0)).toEqual({ value: item1, count: 1 }); - expect(map.get(0, 1)).toBeUndefined(); + map.set('0', '0', item1); + expect(map.get('0', '0')).toEqual({ value: item1, count: 1 }); + expect(map.get('0', '1')).toBeUndefined(); - map.set(0, 0, item1); - expect(map.get(0, 0)).toEqual({ value: item1, count: 2 }); + map.set('0', '0', item1); + expect(map.get('0', '0')).toEqual({ value: item1, count: 2 }); - map.set(0, 1, item2); - expect(map.get(0, 1)).toEqual({ value: item2, count: 1 }); + map.set('0', '1', item2); + expect(map.get('0', '1')).toEqual({ value: item2, count: 1 }); - expect([ ...map.getAll(0) ]).toEqual([{ value: item1, count: 2 }]); + expect([ ...map.getAll('0') ]).toEqual([{ value: item1, count: 2 }]); }); it('deletes values with equality checks', () => { @@ -135,24 +135,24 @@ describe('DualKeyHashMap', () => { equals: (other: { num: number; str: string }) => other.num === 2 && other.str === 'bar', }; - map.set(0, 0, item1); - map.set(0, 1, item2); - map.set(1, 0, item1); - map.set(1, 1, item2); + map.set('0', '0', item1); + map.set('0', '1', item2); + map.set('1', '0', item1); + map.set('1', '1', item2); - expect(map.delete(0, 0)).toBe(true); - expect(map.has(0)).toBe(true); - expect(map.get(0, 0)).toBeUndefined(); - expect(map.get(1, 0)).toEqual({ value: item1, count: 1 }); + expect(map.delete('0', '0')).toBe(true); + expect(map.has('0')).toBe(true); + expect(map.get('0', '0')).toBeUndefined(); + expect(map.get('1', '0')).toEqual({ value: item1, count: 1 }); - expect(map.delete(0, 0)).toBe(false); - expect(map.get(0, 1)).toEqual({ value: item2, count: 1 }); - expect(map.get(1, 1)).toEqual({ value: item2, count: 1 }); + expect(map.delete('0', '0')).toBe(false); + expect(map.get('0', '1')).toEqual({ value: item2, count: 1 }); + expect(map.get('1', '1')).toEqual({ value: item2, count: 1 }); - expect(map.delete(0, 1)).toBe(true); - expect(map.has(1)).toBe(true); - expect(map.get(0, 1)).toBeUndefined(); - expect([ ...map.getAll(1) ]).toEqual([{ value: item2, count: 1 }]); + expect(map.delete('0', '1')).toBe(true); + expect(map.has('1')).toBe(true); + expect(map.get('0', '1')).toBeUndefined(); + expect([ ...map.getAll('1') ]).toEqual([{ value: item2, count: 1 }]); }); }); });