/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.amd64;

import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.SyncPort;
import org.graalvm.compiler.lir.SyncPorts;
import org.graalvm.compiler.lir.amd64.AMD64LIRInstruction;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;

@SyncPorts(value={@SyncPort(from="https://github.com/openjdk/jdk/blob/83d92672d4c2637fc37ddd873533c85a9b083904/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp#L2955-L3016", sha1="a4f29fea55385633aac2f71f50d57f9b378516d9"), @SyncPort(from="https://github.com/openjdk/jdk/blob/1fc726a8b34fcd41dae12a6d7c63232f9ccef3f4/src/hotspot/cpu/x86/macroAssembler_x86.cpp#L6244-L6701", sha1="6aac8a818c14df53d6201ac3df0bc35d3aaac9e4")})
public final class AMD64BigIntegerMultiplyToLenOp
extends AMD64LIRInstruction {
    public static final LIRInstructionClass<AMD64BigIntegerMultiplyToLenOp> TYPE = LIRInstructionClass.create(AMD64BigIntegerMultiplyToLenOp.class);
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value xValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value xlenValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value yValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value ylenValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value zValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value zlenValue;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value tmp1Value;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value[] tmpValues;

    public AMD64BigIntegerMultiplyToLenOp(Value xValue, Value xlenValue, Value yValue, Value ylenValue, Value zValue, Value zlenValue, Register heapBaseRegister) {
        super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
        GraalError.guarantee(ValueUtil.asRegister((Value)xValue).equals((Object)AMD64.rdi), "expect xValue at rdi, but was %s", (Object)xValue);
        GraalError.guarantee(ValueUtil.asRegister((Value)xlenValue).equals((Object)AMD64.rax), "expect xlenValue at rax, but was %s", (Object)xlenValue);
        GraalError.guarantee(ValueUtil.asRegister((Value)yValue).equals((Object)AMD64.rsi), "expect yValue at rsi, but was %s", (Object)yValue);
        GraalError.guarantee(ValueUtil.asRegister((Value)ylenValue).equals((Object)AMD64.rcx), "expect ylenValue at rcx, but was %s", (Object)ylenValue);
        GraalError.guarantee(ValueUtil.asRegister((Value)zValue).equals((Object)AMD64.r8), "expect zValue at r8, but was %s", (Object)zValue);
        GraalError.guarantee(ValueUtil.asRegister((Value)zlenValue).equals((Object)AMD64.r9), "expect zlenValue at r9, but was %s", (Object)zlenValue);
        this.xValue = xValue;
        this.xlenValue = xlenValue;
        this.yValue = yValue;
        this.ylenValue = ylenValue;
        this.zValue = zValue;
        this.zlenValue = zlenValue;
        this.tmp1Value = AMD64.r12.equals((Object)heapBaseRegister) ? AMD64.r14.asValue() : AMD64.r12.asValue();
        this.tmpValues = new Value[]{AMD64.rax.asValue(), AMD64.rcx.asValue(), AMD64.rdx.asValue(), AMD64.rbx.asValue(), AMD64.rsi.asValue(), AMD64.rdi.asValue(), AMD64.r8.asValue(), AMD64.r9.asValue(), AMD64.r10.asValue(), AMD64.r11.asValue(), AMD64.r13.asValue()};
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        GraalError.guarantee(this.xValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid xValue kind: %s", (Object)this.xValue);
        GraalError.guarantee(this.xlenValue.getPlatformKind().equals(AMD64Kind.DWORD), "Invalid xlenValue kind: %s", (Object)this.xlenValue);
        GraalError.guarantee(this.yValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid yValue kind: %s", (Object)this.yValue);
        GraalError.guarantee(this.ylenValue.getPlatformKind().equals(AMD64Kind.DWORD), "Invalid ylenValue kind: %s", (Object)this.ylenValue);
        GraalError.guarantee(this.zValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid zValue kind: %s", (Object)this.zValue);
        GraalError.guarantee(this.zlenValue.getPlatformKind().equals(AMD64Kind.DWORD), "Invalid zlenValue kind: %s", (Object)this.zlenValue);
        Register x = ValueUtil.asRegister((Value)this.xValue);
        Register xlen = ValueUtil.asRegister((Value)this.xlenValue);
        Register y = ValueUtil.asRegister((Value)this.yValue);
        Register ylen = ValueUtil.asRegister((Value)this.ylenValue);
        Register z = ValueUtil.asRegister((Value)this.zValue);
        Register zlen = ValueUtil.asRegister((Value)this.zlenValue);
        Register tmp1 = ValueUtil.asRegister((Value)this.tmp1Value);
        Register tmp2 = AMD64.r13;
        Register tmp3 = AMD64.r11;
        Register tmp4 = AMD64.r10;
        Register tmp5 = AMD64.rbx;
        AMD64BigIntegerMultiplyToLenOp.multiplyToLen(masm, x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5);
    }

    private static void add2WithCarry(AMD64MacroAssembler masm, Register destHi, Register destLo, Register src1, Register src2) {
        masm.addq(destLo, src1);
        masm.adcq(destHi, 0);
        masm.addq(destLo, src2);
        masm.adcq(destHi, 0);
    }

    private static void multiply64x64Loop(AMD64MacroAssembler masm, Register x, Register xstart, Register xAtXstart, Register y, Register yAtIdx, Register z, Register carry, Register product, Register idx, Register kdx) {
        Label labelFirstLoop = new Label();
        Label labelFirstLoopExit = new Label();
        Label labelOneX = new Label();
        Label labelOneY = new Label();
        Label labelMultiply = new Label();
        masm.declAndJcc(xstart, AMD64Assembler.ConditionFlag.Negative, labelOneX, false);
        masm.movq(xAtXstart, new AMD64Address(x, xstart, Stride.S4, 0));
        masm.rorq(xAtXstart, 32);
        masm.bind(labelFirstLoop);
        masm.declAndJcc(idx, AMD64Assembler.ConditionFlag.Negative, labelFirstLoopExit, false);
        masm.declAndJcc(idx, AMD64Assembler.ConditionFlag.Negative, labelOneY, false);
        masm.movq(yAtIdx, new AMD64Address(y, idx, Stride.S4, 0));
        masm.rorq(yAtIdx, 32);
        masm.bind(labelMultiply);
        masm.movq(product, xAtXstart);
        masm.mulq(yAtIdx);
        masm.addq(product, carry);
        masm.adcq(AMD64.rdx, 0);
        masm.subl(kdx, 2);
        masm.movl(new AMD64Address(z, kdx, Stride.S4, 4), product);
        masm.shrq(product, 32);
        masm.movl(new AMD64Address(z, kdx, Stride.S4, 0), product);
        masm.movq(carry, AMD64.rdx);
        masm.jmp(labelFirstLoop);
        masm.bind(labelOneY);
        masm.movl(yAtIdx, new AMD64Address(y));
        masm.jmp(labelMultiply);
        masm.bind(labelOneX);
        masm.movl(xAtXstart, new AMD64Address(x));
        masm.jmp(labelFirstLoop);
        masm.bind(labelFirstLoopExit);
    }

    private static void multiplyAdd128x128(AMD64MacroAssembler masm, Register xAtXstart, Register y, Register z, Register yzAtIdx, Register idx, Register carry, Register product, int offset) {
        masm.movq(yzAtIdx, new AMD64Address(y, idx, Stride.S4, offset));
        masm.rorq(yzAtIdx, 32);
        masm.movq(product, xAtXstart);
        masm.mulq(yzAtIdx);
        masm.movq(yzAtIdx, new AMD64Address(z, idx, Stride.S4, offset));
        masm.rorq(yzAtIdx, 32);
        AMD64BigIntegerMultiplyToLenOp.add2WithCarry(masm, AMD64.rdx, product, carry, yzAtIdx);
        masm.movl(new AMD64Address(z, idx, Stride.S4, offset + 4), product);
        masm.shrq(product, 32);
        masm.movl(new AMD64Address(z, idx, Stride.S4, offset), product);
    }

    private static void multiply128x128Loop(AMD64MacroAssembler masm, Register xAtXstart, Register y, Register z, Register yzAtIdx, Register idx, Register jdx, Register carry, Register product, Register carry2) {
        Label labelThirdLoop = new Label();
        Label labelThirdLoopExit = new Label();
        Label labelPostThirdLoopDone = new Label();
        Label labelCheck1 = new Label();
        masm.movl(jdx, idx);
        masm.andl(jdx, -4);
        masm.shrl(jdx, 2);
        masm.bind(labelThirdLoop);
        masm.sublAndJcc(jdx, 1, AMD64Assembler.ConditionFlag.Negative, labelThirdLoopExit, false);
        masm.subl(idx, 4);
        AMD64BigIntegerMultiplyToLenOp.multiplyAdd128x128(masm, xAtXstart, y, z, yzAtIdx, idx, carry, product, 8);
        masm.movq(carry2, AMD64.rdx);
        AMD64BigIntegerMultiplyToLenOp.multiplyAdd128x128(masm, xAtXstart, y, z, yzAtIdx, idx, carry2, product, 0);
        masm.movq(carry, AMD64.rdx);
        masm.jmp(labelThirdLoop);
        masm.bind(labelThirdLoopExit);
        masm.andlAndJcc(idx, 3, AMD64Assembler.ConditionFlag.Zero, labelPostThirdLoopDone, false);
        masm.sublAndJcc(idx, 2, AMD64Assembler.ConditionFlag.Negative, labelCheck1, false);
        AMD64BigIntegerMultiplyToLenOp.multiplyAdd128x128(masm, xAtXstart, y, z, yzAtIdx, idx, carry, product, 0);
        masm.movq(carry, AMD64.rdx);
        masm.bind(labelCheck1);
        masm.addl(idx, 2);
        masm.andl(idx, 1);
        masm.sublAndJcc(idx, 1, AMD64Assembler.ConditionFlag.Negative, labelPostThirdLoopDone, false);
        masm.movl(yzAtIdx, new AMD64Address(y, idx, Stride.S4, 0));
        masm.movq(product, xAtXstart);
        masm.mulq(yzAtIdx);
        masm.movl(yzAtIdx, new AMD64Address(z, idx, Stride.S4, 0));
        AMD64BigIntegerMultiplyToLenOp.add2WithCarry(masm, AMD64.rdx, product, yzAtIdx, carry);
        masm.movl(new AMD64Address(z, idx, Stride.S4, 0), product);
        masm.shrq(product, 32);
        masm.shlq(AMD64.rdx, 32);
        masm.orq(product, AMD64.rdx);
        masm.movq(carry, product);
        masm.bind(labelPostThirdLoopDone);
    }

    private static void multiply128x128BMI2Loop(AMD64MacroAssembler masm, Register y, Register z, Register carry, Register carry2, Register idx, Register jdx, Register yzAtIdx1, Register yzAtIdx2, Register tmp, Register tmp3, Register tmp4) {
        GraalError.guarantee(masm.supports(AMD64.CPUFeature.BMI2) && masm.supports(AMD64.CPUFeature.AVX), "should be used only when BMI2 is available");
        Label labelThirdLoop = new Label();
        Label labelThirdLoopExit = new Label();
        Label labelPostThirdLoopDone = new Label();
        Label labelCheck1 = new Label();
        masm.movl(jdx, idx);
        masm.andl(jdx, -4);
        masm.shrl(jdx, 2);
        masm.bind(labelThirdLoop);
        masm.sublAndJcc(jdx, 1, AMD64Assembler.ConditionFlag.Negative, labelThirdLoopExit, false);
        masm.subl(idx, 4);
        masm.movq(yzAtIdx1, new AMD64Address(y, idx, Stride.S4, 8));
        masm.rorxq(yzAtIdx1, yzAtIdx1, 32);
        masm.movq(yzAtIdx2, new AMD64Address(y, idx, Stride.S4, 0));
        masm.rorxq(yzAtIdx2, yzAtIdx2, 32);
        masm.mulxq(tmp4, tmp3, yzAtIdx1);
        masm.mulxq(carry2, tmp, yzAtIdx2);
        masm.movq(yzAtIdx1, new AMD64Address(z, idx, Stride.S4, 8));
        masm.rorxq(yzAtIdx1, yzAtIdx1, 32);
        masm.movq(yzAtIdx2, new AMD64Address(z, idx, Stride.S4, 0));
        masm.rorxq(yzAtIdx2, yzAtIdx2, 32);
        if (masm.supports(AMD64.CPUFeature.ADX)) {
            masm.adcxq(tmp3, carry);
            masm.adoxq(tmp3, yzAtIdx1);
            masm.adcxq(tmp4, tmp);
            masm.adoxq(tmp4, yzAtIdx2);
            masm.movl(carry, 0);
            masm.adcxq(carry2, carry);
            masm.adoxq(carry2, carry);
        } else {
            AMD64BigIntegerMultiplyToLenOp.add2WithCarry(masm, tmp4, tmp3, carry, yzAtIdx1);
            AMD64BigIntegerMultiplyToLenOp.add2WithCarry(masm, carry2, tmp4, tmp, yzAtIdx2);
        }
        masm.movq(carry, carry2);
        masm.movl(new AMD64Address(z, idx, Stride.S4, 12), tmp3);
        masm.shrq(tmp3, 32);
        masm.movl(new AMD64Address(z, idx, Stride.S4, 8), tmp3);
        masm.movl(new AMD64Address(z, idx, Stride.S4, 4), tmp4);
        masm.shrq(tmp4, 32);
        masm.movl(new AMD64Address(z, idx, Stride.S4, 0), tmp4);
        masm.jmp(labelThirdLoop);
        masm.bind(labelThirdLoopExit);
        masm.andlAndJcc(idx, 3, AMD64Assembler.ConditionFlag.Zero, labelPostThirdLoopDone, false);
        masm.sublAndJcc(idx, 2, AMD64Assembler.ConditionFlag.Negative, labelCheck1, false);
        masm.movq(yzAtIdx1, new AMD64Address(y, idx, Stride.S4, 0));
        masm.rorxq(yzAtIdx1, yzAtIdx1, 32);
        masm.mulxq(tmp4, tmp3, yzAtIdx1);
        masm.movq(yzAtIdx2, new AMD64Address(z, idx, Stride.S4, 0));
        masm.rorxq(yzAtIdx2, yzAtIdx2, 32);
        AMD64BigIntegerMultiplyToLenOp.add2WithCarry(masm, tmp4, tmp3, carry, yzAtIdx2);
        masm.movl(new AMD64Address(z, idx, Stride.S4, 4), tmp3);
        masm.shrq(tmp3, 32);
        masm.movl(new AMD64Address(z, idx, Stride.S4, 0), tmp3);
        masm.movq(carry, tmp4);
        masm.bind(labelCheck1);
        masm.addl(idx, 2);
        masm.andl(idx, 1);
        masm.sublAndJcc(idx, 1, AMD64Assembler.ConditionFlag.Negative, labelPostThirdLoopDone, false);
        masm.movl(tmp4, new AMD64Address(y, idx, Stride.S4, 0));
        masm.mulxq(carry2, tmp3, tmp4);
        masm.movl(tmp4, new AMD64Address(z, idx, Stride.S4, 0));
        AMD64BigIntegerMultiplyToLenOp.add2WithCarry(masm, carry2, tmp3, tmp4, carry);
        masm.movl(new AMD64Address(z, idx, Stride.S4, 0), tmp3);
        masm.shrq(tmp3, 32);
        masm.shlq(carry2, 32);
        masm.orq(tmp3, carry2);
        masm.movq(carry, tmp3);
        masm.bind(labelPostThirdLoopDone);
    }

    private static void multiplyToLen(AMD64MacroAssembler masm, Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
        Register idx = tmp1;
        Register kdx = tmp2;
        Register xstart = tmp3;
        Register yAtIdx = tmp4;
        Register carry = tmp5;
        Register product = xlen;
        Register xAtXstart = zlen;
        Label labelDone = new Label();
        Label labelSecondLoop = new Label();
        Label labelCarry = new Label();
        Label labelLastX = new Label();
        Label labelThirdLoopPrologue = new Label();
        boolean useBMI2Instructions = masm.supports(AMD64.CPUFeature.BMI2) && masm.supports(AMD64.CPUFeature.AVX);
        masm.movl(idx, ylen);
        masm.movl(kdx, zlen);
        masm.xorq(carry, carry);
        masm.movl(xstart, xlen);
        masm.declAndJcc(xstart, AMD64Assembler.ConditionFlag.Negative, labelDone, false);
        AMD64BigIntegerMultiplyToLenOp.multiply64x64Loop(masm, x, xstart, xAtXstart, y, yAtIdx, z, carry, product, idx, kdx);
        masm.testlAndJcc(kdx, kdx, AMD64Assembler.ConditionFlag.Zero, labelSecondLoop, false);
        masm.sublAndJcc(kdx, 1, AMD64Assembler.ConditionFlag.Zero, labelCarry, false);
        masm.movl(new AMD64Address(z, kdx, Stride.S4, 0), carry);
        masm.shrq(carry, 32);
        masm.subl(kdx, 1);
        masm.bind(labelCarry);
        masm.movl(new AMD64Address(z, kdx, Stride.S4, 0), carry);
        Register jdx = tmp1;
        masm.bind(labelSecondLoop);
        masm.xorq(carry, carry);
        masm.movl(jdx, ylen);
        masm.sublAndJcc(xstart, 1, AMD64Assembler.ConditionFlag.Negative, labelDone, false);
        masm.push(z);
        masm.leaq(z, new AMD64Address(z, xstart, Stride.S4, 4));
        masm.sublAndJcc(xstart, 1, AMD64Assembler.ConditionFlag.Negative, labelLastX, false);
        if (useBMI2Instructions) {
            masm.movq(AMD64.rdx, new AMD64Address(x, xstart, Stride.S4, 0));
            masm.rorxq(AMD64.rdx, AMD64.rdx, 32);
        } else {
            masm.movq(xAtXstart, new AMD64Address(x, xstart, Stride.S4, 0));
            masm.rorq(xAtXstart, 32);
        }
        masm.bind(labelThirdLoopPrologue);
        masm.push(x);
        masm.push(xstart);
        masm.push(ylen);
        if (useBMI2Instructions) {
            AMD64BigIntegerMultiplyToLenOp.multiply128x128BMI2Loop(masm, y, z, carry, x, jdx, ylen, product, tmp2, xAtXstart, tmp3, tmp4);
        } else {
            AMD64BigIntegerMultiplyToLenOp.multiply128x128Loop(masm, xAtXstart, y, z, yAtIdx, jdx, ylen, carry, product, x);
        }
        masm.pop(ylen);
        masm.pop(xlen);
        masm.pop(x);
        masm.pop(z);
        masm.movl(tmp3, xlen);
        masm.addl(tmp3, 1);
        masm.movl(new AMD64Address(z, tmp3, Stride.S4, 0), carry);
        masm.sublAndJcc(tmp3, 1, AMD64Assembler.ConditionFlag.Negative, labelDone, false);
        masm.shrq(carry, 32);
        masm.movl(new AMD64Address(z, tmp3, Stride.S4, 0), carry);
        masm.jmp(labelSecondLoop);
        masm.bind(labelLastX);
        if (useBMI2Instructions) {
            masm.movl(AMD64.rdx, new AMD64Address(x));
        } else {
            masm.movl(xAtXstart, new AMD64Address(x));
        }
        masm.jmp(labelThirdLoopPrologue);
        masm.bind(labelDone);
    }
}

