/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.amd64;

import java.util.EnumSet;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.Opcode;
import org.graalvm.compiler.lir.SyncPort;
import org.graalvm.compiler.lir.SyncPorts;
import org.graalvm.compiler.lir.amd64.AMD64ComplexVectorOp;
import org.graalvm.compiler.lir.amd64.AMD64LIRHelper;
import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@Opcode(value="VECTORIZED_HASHCODE")
@SyncPorts(value={@SyncPort(from="https://github.com/openjdk/jdk/blob/6ebea8973feb08a7443d8d86ff52f453dc4aec43/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp#L1697-L1796", sha1="a93850c44f7e34fcec05226bae95fd695b2ea2f7"), @SyncPort(from="https://github.com/openjdk/jdk/blob/6ebea8973feb08a7443d8d86ff52f453dc4aec43/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp#L1918-L1964", sha1="9cbba8bd6c4037427fa46f067abb722b15aca90c"), @SyncPort(from="https://github.com/openjdk/jdk/blob/6ebea8973feb08a7443d8d86ff52f453dc4aec43/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp#L3236-L3423", sha1="2457cf3f9d3ff89c1515fa5d95cc7c8437a5318b")})
public final class AMD64VectorizedHashCodeOp
extends AMD64ComplexVectorOp {
    public static final LIRInstructionClass<AMD64VectorizedHashCodeOp> TYPE = LIRInstructionClass.create(AMD64VectorizedHashCodeOp.class);
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    private Value resultValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value arrayStart;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value length;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value initialValue;
    private final JavaKind arrayKind;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value[] temp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value[] vectorTemp;
    private static ArrayDataPointerConstant powersOf31 = AMD64LIRHelper.pointerConstant(16, new int[]{2111290369, -2010103841, 350799937, 11316127, 693101697, -254736545, 961614017, 31019807, -2077209343, -67006753, 1244764481, -2038056289, 211350913, -408824225, -844471871, -997072353, 1353309697, -510534177, 1507551809, -505558625, -293403007, 129082719, -1796951359, -196513505, -1807454463, 1742810335, 887503681, 28629151, 923521, 29791, 961, 31, 1});

    public AMD64VectorizedHashCodeOp(LIRGeneratorTool tool, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures, AllocatableValue result, AllocatableValue arrayStart, AllocatableValue length, AllocatableValue initialValue, JavaKind arrayKind) {
        super(TYPE, tool, runtimeCheckedCPUFeatures, AVXKind.AVXSize.YMM);
        this.resultValue = result;
        this.arrayStart = arrayStart;
        this.length = length;
        this.initialValue = initialValue;
        this.arrayKind = arrayKind;
        this.temp = this.allocateTempRegisters(tool, AMD64Kind.QWORD, 5);
        this.vectorTemp = this.allocateVectorRegisters(tool, JavaKind.Byte, 13);
    }

    private static void arraysHashcodeElload(AMD64MacroAssembler masm, Register dst, AMD64Address src, JavaKind eltype) {
        switch (eltype) {
            case Boolean: {
                masm.movzbl(dst, src);
                break;
            }
            case Byte: {
                masm.movsbl(dst, src);
                break;
            }
            case Short: {
                masm.movswl(dst, src);
                break;
            }
            case Char: {
                masm.movzwl(dst, src);
                break;
            }
            case Int: {
                masm.movl(dst, src);
                break;
            }
            default: {
                throw GraalError.shouldNotReachHere("Unsupported JavaKind " + eltype);
            }
        }
    }

    private static void vectorUnsignedCast(AMD64MacroAssembler masm, Register dst, Register src, AVXKind.AVXSize avxSize, JavaKind fromElemBt, JavaKind toElemBt) {
        block0 : switch (fromElemBt) {
            case Byte: {
                switch (toElemBt) {
                    case Short: {
                        masm.emit(AMD64Assembler.VexRMOp.VPMOVZXBW, dst, src, avxSize);
                        break block0;
                    }
                    case Int: {
                        masm.emit(AMD64Assembler.VexRMOp.VPMOVZXBD, dst, src, avxSize);
                        break block0;
                    }
                    case Long: {
                        masm.emit(AMD64Assembler.VexRMOp.VPMOVZXBQ, dst, src, avxSize);
                        break block0;
                    }
                }
                throw GraalError.shouldNotReachHere("Unsupported unsigned vector cast from " + fromElemBt + " to " + toElemBt);
            }
            case Short: {
                switch (toElemBt) {
                    case Int: {
                        masm.emit(AMD64Assembler.VexRMOp.VPMOVZXWD, dst, src, avxSize);
                        break block0;
                    }
                    case Long: {
                        masm.emit(AMD64Assembler.VexRMOp.VPMOVZXWQ, dst, src, avxSize);
                        break block0;
                    }
                }
                throw GraalError.shouldNotReachHere("Unsupported unsigned vector cast from " + fromElemBt + " to " + toElemBt);
            }
            case Int: {
                GraalError.guarantee(toElemBt == JavaKind.Long, "Unsupported unsigned vector cast from %s to %s", (Object)fromElemBt, (Object)toElemBt);
                masm.emit(AMD64Assembler.VexRMOp.VPMOVZXDQ, dst, src, avxSize);
                break;
            }
            default: {
                throw GraalError.shouldNotReachHere("Unsupported unsigned vector cast from " + fromElemBt + " to " + toElemBt);
            }
        }
    }

    private static void vectorSignedCast(AMD64MacroAssembler masm, Register dst, Register src, AVXKind.AVXSize avxSize, JavaKind fromElemBt, JavaKind toElemBt) {
        block0 : switch (fromElemBt) {
            case Byte: {
                switch (toElemBt) {
                    case Short: {
                        masm.emit(AMD64Assembler.VexRMOp.VPMOVSXBW, dst, src, avxSize);
                        break block0;
                    }
                    case Int: {
                        masm.emit(AMD64Assembler.VexRMOp.VPMOVSXBD, dst, src, avxSize);
                        break block0;
                    }
                    case Long: {
                        masm.emit(AMD64Assembler.VexRMOp.VPMOVSXBQ, dst, src, avxSize);
                        break block0;
                    }
                }
                throw GraalError.shouldNotReachHere("Unsupported signed vector cast from " + fromElemBt + " to " + toElemBt);
            }
            case Short: {
                switch (toElemBt) {
                    case Int: {
                        masm.emit(AMD64Assembler.VexRMOp.VPMOVSXWD, dst, src, avxSize);
                        break block0;
                    }
                    case Long: {
                        masm.emit(AMD64Assembler.VexRMOp.VPMOVSXWQ, dst, src, avxSize);
                        break block0;
                    }
                }
                throw GraalError.shouldNotReachHere("Unsupported signed vector cast from " + fromElemBt + " to " + toElemBt);
            }
            case Int: {
                GraalError.guarantee(toElemBt == JavaKind.Long, "Unsupported signed vector cast from %s to %s", (Object)fromElemBt, (Object)toElemBt);
                masm.emit(AMD64Assembler.VexRMOp.VPMOVSXDQ, dst, src, avxSize);
                break;
            }
            default: {
                throw GraalError.shouldNotReachHere("Unsupported signed vector cast from " + fromElemBt + " to " + toElemBt);
            }
        }
    }

    private static void arraysHashcodeElvcast(AMD64MacroAssembler masm, Register dst, JavaKind eltype) {
        switch (eltype) {
            case Boolean: {
                AMD64VectorizedHashCodeOp.vectorUnsignedCast(masm, dst, dst, AVXKind.AVXSize.YMM, JavaKind.Byte, JavaKind.Int);
                break;
            }
            case Byte: {
                AMD64VectorizedHashCodeOp.vectorSignedCast(masm, dst, dst, AVXKind.AVXSize.YMM, JavaKind.Byte, JavaKind.Int);
                break;
            }
            case Short: {
                AMD64VectorizedHashCodeOp.vectorSignedCast(masm, dst, dst, AVXKind.AVXSize.YMM, JavaKind.Short, JavaKind.Int);
                break;
            }
            case Char: {
                AMD64VectorizedHashCodeOp.vectorUnsignedCast(masm, dst, dst, AVXKind.AVXSize.YMM, JavaKind.Short, JavaKind.Int);
                break;
            }
            case Int: {
                break;
            }
            default: {
                throw GraalError.shouldNotReachHere("Unsupported vector cast from " + eltype);
            }
        }
    }

    private static void loadVector(AMD64MacroAssembler masm, Register dst, AMD64Address src, int vlenInBytes) {
        switch (vlenInBytes) {
            case 4: {
                masm.movdl(dst, src);
                break;
            }
            case 8: {
                masm.movq(dst, src);
                break;
            }
            case 16: {
                masm.movdqu(dst, src);
                break;
            }
            case 32: {
                masm.vmovdqu(dst, src);
                break;
            }
            case 64: {
                masm.emit((AMD64Assembler.VexRMOp)AMD64Assembler.VexMoveOp.VMOVDQU32, dst, src, AVXKind.AVXSize.ZMM);
                break;
            }
            default: {
                throw GraalError.shouldNotReachHere("Unsupported vector load of size " + vlenInBytes);
            }
        }
    }

    private static void reduce(AMD64MacroAssembler masm, AVXKind.AVXSize avxSize, JavaKind eleType, Register dst, Register src1, Register src2) {
        switch (eleType) {
            case Byte: {
                masm.emit(AMD64Assembler.VexRVMOp.VPADDB, dst, src1, src2, avxSize);
                break;
            }
            case Short: {
                masm.emit(AMD64Assembler.VexRVMOp.VPADDW, dst, src1, src2, avxSize);
                break;
            }
            case Int: {
                masm.emit(AMD64Assembler.VexRVMOp.VPADDD, dst, src1, src2, avxSize);
                break;
            }
            default: {
                throw GraalError.shouldNotReachHere("Unsupported reduce type " + eleType);
            }
        }
    }

    private static void reduce2I(AMD64MacroAssembler masm, Register dst, Register src1, Register src2, Register vtmp1, Register vtmp2) {
        if (vtmp1.equals((Object)src2)) {
            masm.movdqu(vtmp1, src2);
        }
        masm.emit(AMD64Assembler.VexRVMOp.VPHADDD, vtmp1, vtmp1, vtmp1, AVXKind.AVXSize.XMM);
        masm.movdl(vtmp2, src1);
        AMD64VectorizedHashCodeOp.reduce(masm, AVXKind.AVXSize.XMM, JavaKind.Int, vtmp1, vtmp1, vtmp2);
        masm.movdl(dst, vtmp1);
    }

    private static void reduce4I(AMD64MacroAssembler masm, Register dst, Register src1, Register src2, Register vtmp1, Register vtmp2) {
        if (vtmp1.equals((Object)src2)) {
            masm.movdqu(vtmp1, src2);
        }
        masm.emit(AMD64Assembler.VexRVMOp.VPHADDD, vtmp1, vtmp1, src2, AVXKind.AVXSize.XMM);
        AMD64VectorizedHashCodeOp.reduce2I(masm, dst, src1, vtmp1, vtmp1, vtmp2);
    }

    private static void reduce8I(AMD64MacroAssembler masm, Register dst, Register src1, Register src2, Register vtmp1, Register vtmp2) {
        masm.emit(AMD64Assembler.VexRVMOp.VPHADDD, vtmp1, src2, src2, AVXKind.AVXSize.YMM);
        masm.emit(AMD64Assembler.VexMRIOp.VEXTRACTI128, vtmp2, vtmp1, 1, AVXKind.AVXSize.YMM);
        masm.emit(AMD64Assembler.VexRVMOp.VPADDD, vtmp1, vtmp1, vtmp2, AVXKind.AVXSize.YMM);
        AMD64VectorizedHashCodeOp.reduce2I(masm, dst, src1, vtmp1, vtmp1, vtmp2);
    }

    private static void reduceI(AMD64MacroAssembler masm, int vlen, Register dst, Register src1, Register src2, Register vtmp1, Register vtmp2) {
        switch (vlen) {
            case 2: {
                AMD64VectorizedHashCodeOp.reduce2I(masm, dst, src1, src2, vtmp1, vtmp2);
                break;
            }
            case 4: {
                AMD64VectorizedHashCodeOp.reduce4I(masm, dst, src1, src2, vtmp1, vtmp2);
                break;
            }
            case 8: {
                AMD64VectorizedHashCodeOp.reduce8I(masm, dst, src1, src2, vtmp1, vtmp2);
                break;
            }
            default: {
                throw GraalError.shouldNotReachHere("Unsupported vector length " + vlen);
            }
        }
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        int idx;
        Label labelShortUnrolledBegin = new Label();
        Label labelShortUnrolledLoopBegin = new Label();
        Label labelShortUnrolledLoopExit = new Label();
        Label labelUnrolledVectorLoopBegin = new Label();
        Label labelEnd = new Label();
        Register result = ValueUtil.asRegister((Value)this.resultValue);
        Register ary1 = ValueUtil.asRegister((Value)this.temp[0]);
        Register cnt1 = ValueUtil.asRegister((Value)this.temp[1]);
        Register tmp2 = ValueUtil.asRegister((Value)this.temp[2]);
        Register tmp3 = ValueUtil.asRegister((Value)this.temp[3]);
        Register index = ValueUtil.asRegister((Value)this.temp[4]);
        masm.movq(ary1, ValueUtil.asRegister((Value)this.arrayStart));
        masm.movl(cnt1, ValueUtil.asRegister((Value)this.length));
        masm.movl(result, ValueUtil.asRegister((Value)this.initialValue));
        Register vnext = ValueUtil.asRegister((Value)this.vectorTemp[0]);
        Register[] vcoef = new Register[]{ValueUtil.asRegister((Value)this.vectorTemp[1]), ValueUtil.asRegister((Value)this.vectorTemp[2]), ValueUtil.asRegister((Value)this.vectorTemp[3]), ValueUtil.asRegister((Value)this.vectorTemp[4])};
        Register[] vresult = new Register[]{ValueUtil.asRegister((Value)this.vectorTemp[5]), ValueUtil.asRegister((Value)this.vectorTemp[6]), ValueUtil.asRegister((Value)this.vectorTemp[7]), ValueUtil.asRegister((Value)this.vectorTemp[8])};
        Register[] vtmp = new Register[]{ValueUtil.asRegister((Value)this.vectorTemp[9]), ValueUtil.asRegister((Value)this.vectorTemp[10]), ValueUtil.asRegister((Value)this.vectorTemp[11]), ValueUtil.asRegister((Value)this.vectorTemp[12])};
        Stride stride = Stride.fromJavaKind(this.arrayKind);
        int elsize = this.arrayKind.getByteCount();
        masm.cmplAndJcc(cnt1, 32, AMD64Assembler.ConditionFlag.Less, labelShortUnrolledBegin, false);
        masm.xorl(index, index);
        for (int idx2 = 0; idx2 < 4; ++idx2) {
            masm.vpxor(vresult[idx2], vresult[idx2], vresult[idx2], AVXKind.AVXSize.YMM);
        }
        Register bound = tmp2;
        Register next = tmp3;
        masm.leaq(tmp2, AMD64LIRHelper.recordExternalAddress(crb, powersOf31));
        masm.movl(next, new AMD64Address(tmp2));
        masm.movdl(vnext, next);
        masm.emit(AMD64Assembler.VexRMOp.VPBROADCASTD, vnext, vnext, AVXKind.AVXSize.YMM);
        masm.movl(bound, cnt1);
        masm.andl(bound, -32);
        masm.bind(labelUnrolledVectorLoopBegin);
        masm.imull(result, next);
        for (idx = 0; idx < 4; ++idx) {
            AMD64VectorizedHashCodeOp.loadVector(masm, vtmp[idx], new AMD64Address(ary1, index, stride, 8 * idx * elsize), elsize * 8);
        }
        for (idx = 0; idx < 4; ++idx) {
            masm.emit(AMD64Assembler.VexRVMOp.VPMULLD, vresult[idx], vresult[idx], vnext, AVXKind.AVXSize.YMM);
            AMD64VectorizedHashCodeOp.arraysHashcodeElvcast(masm, vtmp[idx], this.arrayKind);
            masm.emit(AMD64Assembler.VexRVMOp.VPADDD, vresult[idx], vresult[idx], vtmp[idx], AVXKind.AVXSize.YMM);
        }
        masm.addl(index, 32);
        masm.cmplAndJcc(index, bound, AMD64Assembler.ConditionFlag.Less, labelUnrolledVectorLoopBegin, false);
        masm.leaq(ary1, new AMD64Address(ary1, bound, stride));
        masm.subl(cnt1, bound);
        masm.leaq(tmp2, AMD64LIRHelper.recordExternalAddress(crb, powersOf31));
        for (idx = 0; idx < 4; ++idx) {
            AMD64VectorizedHashCodeOp.loadVector(masm, vcoef[idx], new AMD64Address(tmp2, 4 + idx * JavaKind.Int.getByteCount() * 8), JavaKind.Int.getByteCount() * 8);
            masm.emit(AMD64Assembler.VexRVMOp.VPMULLD, vresult[idx], vresult[idx], vcoef[idx], AVXKind.AVXSize.YMM);
        }
        for (idx = 0; idx < 4; ++idx) {
            AMD64VectorizedHashCodeOp.reduceI(masm, AVXKind.AVXSize.YMM.getBytes() / JavaKind.Int.getByteCount(), result, result, vresult[idx], vtmp[(idx * 2 + 0) % 4], vtmp[(idx * 2 + 1) % 4]);
        }
        masm.bind(labelShortUnrolledBegin);
        masm.movl(index, 1);
        masm.cmplAndJcc(index, cnt1, AMD64Assembler.ConditionFlag.GreaterEqual, labelShortUnrolledLoopExit, false);
        masm.bind(labelShortUnrolledLoopBegin);
        masm.movl(tmp3, 961);
        masm.imull(result, tmp3);
        AMD64VectorizedHashCodeOp.arraysHashcodeElload(masm, tmp2, new AMD64Address(ary1, index, stride, -elsize), this.arrayKind);
        masm.movl(tmp3, tmp2);
        masm.shll(tmp3, 5);
        masm.subl(tmp3, tmp2);
        masm.addl(result, tmp3);
        AMD64VectorizedHashCodeOp.arraysHashcodeElload(masm, tmp3, new AMD64Address(ary1, index, stride), this.arrayKind);
        masm.addl(result, tmp3);
        masm.addl(index, 2);
        masm.cmplAndJcc(index, cnt1, AMD64Assembler.ConditionFlag.Less, labelShortUnrolledLoopBegin, false);
        masm.bind(labelShortUnrolledLoopExit);
        masm.jccb(AMD64Assembler.ConditionFlag.Greater, labelEnd);
        masm.movl(tmp2, result);
        masm.shll(result, 5);
        masm.subl(result, tmp2);
        AMD64VectorizedHashCodeOp.arraysHashcodeElload(masm, tmp3, new AMD64Address(ary1, index, stride, -elsize), this.arrayKind);
        masm.addl(result, tmp3);
        masm.bind(labelEnd);
    }
}

