Skip to content

Commit

Permalink
Implement Vector API rearrange operation
Browse files Browse the repository at this point in the history
  • Loading branch information
Quan Anh Mai committed Dec 20, 2024
1 parent d6ddf70 commit c68f5c0
Show file tree
Hide file tree
Showing 3 changed files with 323 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2327,7 +2327,11 @@ public static class VexRVMOp extends VexOp {
public static final VexRVMOp VSQRTSD = new VexRVMOp("VSQRTSD", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x51, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_64BIT, VEXPrefixConfig.W1);
public static final VexRVMOp VSQRTSS = new VexRVMOp("VSQRTSS", VEXPrefixConfig.P_F3, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x51, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_32BIT, VEXPrefixConfig.W0);

public static final VexRVMOp VPERMILPS = new VexRVMOp("VPERMILPS", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x0C, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, VEXPrefixConfig.W0);
public static final VexRVMOp VPERMD = new VexRVMOp("VPERMD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x36, VEXOpAssertion.AVX2_AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W0);
public static final VexRVMOp VPERMPS = new VexRVMOp("VPERMPS", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x16, VEXOpAssertion.AVX2_AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W0);
public static final VexRVMOp VPERMILPD = new VexRVMOp("VPERMILPD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x0D, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, VEXPrefixConfig.W1);

public static final VexRVMOp VMOVSS = new VexRVMOp("VMOVSS", VEXPrefixConfig.P_F3, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x10, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_32BIT, VEXPrefixConfig.W0);
public static final VexRVMOp VMOVSD = new VexRVMOp("VMOVSD", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x10, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_64BIT, VEXPrefixConfig.W1);
public static final VexRVMOp VMOVHPD = new VexRVMOp("VMOVHPD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x16, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_64BIT, VEXPrefixConfig.W1);
Expand Down Expand Up @@ -2431,8 +2435,14 @@ public static class VexRVMOp extends VexOp {
public static final VexRVMOp EVSQRTSD = new VexRVMOp("EVSQRTSD", VSQRTSD);
public static final VexRVMOp EVSQRTSS = new VexRVMOp("EVSQRTSS", VSQRTSS);

public static final VexRVMOp EVPERMB = new VexRVMOp("EVPERMB", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x8D, VEXOpAssertion.AVX512_VBMI_VL, EVEXTuple.FVM, VEXPrefixConfig.W0, true);
public static final VexRVMOp EVPERMW = new VexRVMOp("EVPERMW", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x8D, VEXOpAssertion.AVX512BW_VL, EVEXTuple.FVM, VEXPrefixConfig.W1, true);
public static final VexRVMOp EVPERMILPS = new VexRVMOp("EVPERMILPS", VPERMILPS);
public static final VexRVMOp EVPERMD = new VexRVMOp("EVPERMD", VPERMD);
public static final VexRVMOp EVPERMPS = new VexRVMOp("EVPERMPS", VPERMPS);
public static final VexRVMOp EVPERMILPD = new VexRVMOp("EVPERMILPD", VPERMILPD);
public static final VexRVMOp EVPERMQ = new VexRVMOp("EVPERMQ", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x36, VEXOpAssertion.AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W1, true);
public static final VexRVMOp EVPERMPD = new VexRVMOp("EVPERMPD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x16, VEXOpAssertion.AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W1, true);

public static final VexRVMOp EVPBLENDMB = new VexRVMOp("EVPBLENDMB", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x66, VEXOpAssertion.AVX512BW_VL, EVEXTuple.FVM, VEXPrefixConfig.W0, true);
public static final VexRVMOp EVPBLENDMW = new VexRVMOp("EVPBLENDMW", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x66, VEXOpAssertion.AVX512BW_VL, EVEXTuple.FVM, VEXPrefixConfig.W1, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,17 @@
import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler.ASIMDSize;
import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler.ElementSize;
import jdk.graal.compiler.asm.aarch64.AArch64MacroAssembler;
import jdk.graal.compiler.core.common.LIRKind;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.Opcode;

import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
import jdk.vm.ci.aarch64.AArch64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.Value;

/**
* This enum encapsulates AArch64 instructions which perform permutations.
Expand Down Expand Up @@ -102,4 +106,61 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {

}
}

public static class ASIMDPermuteOp extends AArch64LIRInstruction {
private static final LIRInstructionClass<ASIMDPermuteOp> TYPE = LIRInstructionClass.create(ASIMDPermuteOp.class);

@Def protected AllocatableValue result;
@Alive protected AllocatableValue source;
@Use protected AllocatableValue indices;
@Temp({OperandFlag.REG, OperandFlag.ILLEGAL}) protected AllocatableValue xtmp1;
@Temp({OperandFlag.REG, OperandFlag.ILLEGAL}) protected AllocatableValue xtmp2;

public ASIMDPermuteOp(LIRGeneratorTool tool, AllocatableValue result, AllocatableValue source, AllocatableValue indices) {
super(TYPE);
this.result = result;
this.source = source;
this.indices = indices;
AArch64Kind eKind = ((AArch64Kind) result.getPlatformKind()).getScalar();
this.xtmp1 = eKind == AArch64Kind.BYTE ? Value.ILLEGAL : tool.newVariable(LIRKind.value(AArch64Kind.V128_BYTE));
this.xtmp2 = eKind == AArch64Kind.BYTE ? Value.ILLEGAL : tool.newVariable(LIRKind.value(AArch64Kind.V128_BYTE));
}

@Override
public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
AArch64Kind vKind = (AArch64Kind) result.getPlatformKind();
AArch64Kind eKind = vKind.getScalar();
ASIMDSize vSize = ASIMDSize.fromVectorKind(vKind);
Register xtmp1Reg = xtmp1.equals(Value.ILLEGAL) ? Register.None : asRegister(xtmp1);
Register xtmp2Reg = xtmp2.equals(Value.ILLEGAL) ? Register.None : asRegister(xtmp2);
Register currentIdxReg = asRegister(indices);
// Since NEON only supports byte look up, we repeatedly convert a 2W-bit look up into
// W-bit look up by transforming a 2W-bit index with value v into a pair of W-bit
// indices v * 2, v * 2 + 1 until we reach the element width equal to Byte.SIZE
if (eKind.getSizeInBytes() == AArch64Kind.QWORD.getSizeInBytes()) {
masm.neon.shlVVI(vSize, ElementSize.DoubleWord, xtmp1Reg, currentIdxReg, 1);
masm.neon.shlVVI(vSize, ElementSize.DoubleWord, xtmp2Reg, xtmp1Reg, Integer.SIZE);
masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg);
masm.neon.orrVI(vSize, ElementSize.DoubleWord, xtmp1Reg, 1L << Integer.SIZE);
currentIdxReg = xtmp1Reg;
eKind = AArch64Kind.DWORD;
}
if (eKind.getSizeInBytes() == AArch64Kind.DWORD.getSizeInBytes()) {
masm.neon.shlVVI(vSize, ElementSize.Word, xtmp1Reg, currentIdxReg, 1);
masm.neon.shlVVI(vSize, ElementSize.Word, xtmp2Reg, xtmp1Reg, Short.SIZE);
masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg);
masm.neon.orrVI(vSize, ElementSize.Word, xtmp1Reg, 1 << Short.SIZE);
currentIdxReg = xtmp1Reg;
eKind = AArch64Kind.WORD;
}
if (eKind.getSizeInBytes() == AArch64Kind.WORD.getSizeInBytes()) {
masm.neon.shlVVI(vSize, ElementSize.HalfWord, xtmp1Reg, currentIdxReg, 1);
masm.neon.shlVVI(vSize, ElementSize.HalfWord, xtmp2Reg, xtmp1Reg, Byte.SIZE);
masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg);
masm.neon.orrVI(vSize, ElementSize.HalfWord, xtmp1Reg, 1 << Byte.SIZE);
currentIdxReg = xtmp1Reg;
}
masm.neon.tblVVV(vSize, asRegister(result), asRegister(source), currentIdxReg);
}
}
}
Loading

0 comments on commit c68f5c0

Please sign in to comment.