/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.aarch64;

import java.util.Arrays;
import jdk.vm.ci.aarch64.AArch64;
import jdk.vm.ci.aarch64.AArch64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler;
import org.graalvm.compiler.asm.aarch64.AArch64Address;
import org.graalvm.compiler.asm.aarch64.AArch64Assembler;
import org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler;
import org.graalvm.compiler.code.DataSection;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.Opcode;
import org.graalvm.compiler.lir.aarch64.AArch64ComplexVectorOp;
import org.graalvm.compiler.lir.aarch64.AArch64LIRInstruction;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@Opcode(value="CALC_STRING_ATTRIBUTES")
public final class AArch64CalcStringAttributesOp
extends AArch64ComplexVectorOp {
    public static final LIRInstructionClass<AArch64CalcStringAttributesOp> TYPE = LIRInstructionClass.create(AArch64CalcStringAttributesOp.class);
    private final LIRGeneratorTool.CalcStringAttributesEncoding encoding;
    private final Stride stride;
    private final boolean assumeValid;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    private Value result;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value array;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value offset;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value length;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value[] temp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value[] vectorTemp;
    private static final byte TOO_SHORT = 1;
    private static final byte TOO_LONG = 2;
    private static final byte OVERLONG_3 = 4;
    private static final byte SURROGATE = 16;
    private static final byte OVERLONG_2 = 32;
    private static final byte TWO_CONTS = -128;
    private static final byte TOO_LARGE = 8;
    private static final byte TOO_LARGE_1000 = 64;
    private static final byte OVERLONG_4 = 64;
    private static final byte CARRY = -125;
    private static final byte[] UTF8_BYTE_1_HIGH_TABLE = new byte[]{2, 2, 2, 2, 2, 2, 2, 2, -128, -128, -128, -128, 33, 1, 21, 73};
    private static final byte[] UTF8_BYTE_1_LOW_TABLE = new byte[]{-25, -93, -125, -125, -117, -53, -53, -53, -53, -53, -53, -53, -53, -37, -53, -53};
    private static final byte[] UTF8_BYTE_2_HIGH_TABLE = new byte[]{1, 1, 1, 1, 1, 1, 1, 1, -26, -82, -70, -70, 1, 1, 1, 1};
    private static final byte[] UTF_8_STATE_MACHINE = new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12};

    public AArch64CalcStringAttributesOp(LIRGeneratorTool tool, LIRGeneratorTool.CalcStringAttributesEncoding encoding, Value array, Value offset, Value length, Value result, boolean assumeValid) {
        super((LIRInstructionClass<? extends AArch64LIRInstruction>)TYPE);
        this.encoding = encoding;
        this.assumeValid = assumeValid;
        this.stride = encoding.stride;
        GraalError.guarantee(array.getPlatformKind() == AArch64Kind.QWORD, "pointer value expected");
        GraalError.guarantee(offset.getPlatformKind() == AArch64Kind.QWORD, "long value expected");
        GraalError.guarantee(length.getPlatformKind() == AArch64Kind.DWORD, "int value expected");
        if (encoding == LIRGeneratorTool.CalcStringAttributesEncoding.UTF_8 || encoding == LIRGeneratorTool.CalcStringAttributesEncoding.UTF_16) {
            GraalError.guarantee(result.getPlatformKind() == AArch64Kind.QWORD, "long value expected");
        } else {
            GraalError.guarantee(result.getPlatformKind() == AArch64Kind.DWORD, "int value expected");
        }
        this.array = array;
        this.offset = offset;
        this.length = length;
        this.result = result;
        this.temp = AArch64CalcStringAttributesOp.allocateTempRegisters(tool, AArch64CalcStringAttributesOp.getNumberOfTempRegisters(encoding, assumeValid));
        int nVectors = AArch64CalcStringAttributesOp.getNumberOfRequiredVectorRegisters(encoding, assumeValid);
        this.vectorTemp = AArch64CalcStringAttributesOp.needConsecutiveVectors(encoding, assumeValid) ? AArch64CalcStringAttributesOp.allocateConsecutiveVectorRegisters(tool, nVectors) : AArch64CalcStringAttributesOp.allocateVectorRegisters(tool, nVectors);
    }

    private static boolean needConsecutiveVectors(LIRGeneratorTool.CalcStringAttributesEncoding encoding, boolean assumeValid) {
        return encoding == LIRGeneratorTool.CalcStringAttributesEncoding.UTF_8 && !assumeValid || encoding == LIRGeneratorTool.CalcStringAttributesEncoding.UTF_32;
    }

    private static int getNumberOfTempRegisters(LIRGeneratorTool.CalcStringAttributesEncoding encoding, boolean assumeValid) {
        switch (encoding) {
            case LATIN1: 
            case BMP: {
                return 1;
            }
            case UTF_16: {
                return 2;
            }
            case UTF_8: {
                return assumeValid ? 2 : 5;
            }
        }
        return 0;
    }

    private static int getNumberOfRequiredVectorRegisters(LIRGeneratorTool.CalcStringAttributesEncoding encoding, boolean assumeValid) {
        switch (encoding) {
            case LATIN1: {
                return 3;
            }
            case BMP: {
                return 4;
            }
            case UTF_8: {
                return assumeValid ? 8 : 17;
            }
            case UTF_16: {
                return 8;
            }
            case UTF_32: {
                return 11;
            }
        }
        throw GraalError.shouldNotReachHereUnexpectedValue((Object)encoding);
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler asm) {
        try (AArch64MacroAssembler.ScratchRegister sc1 = asm.getScratchRegister();
             AArch64MacroAssembler.ScratchRegister sc2 = asm.getScratchRegister();){
            Register arr = sc1.getRegister();
            Register len = sc2.getRegister();
            Register ret = ValueUtil.asRegister((Value)this.result);
            Label end = new Label();
            asm.add(64, arr, ValueUtil.asRegister((Value)this.array), ValueUtil.asRegister((Value)this.offset));
            asm.mov(32, len, ValueUtil.asRegister((Value)this.length));
            switch (this.encoding) {
                case LATIN1: {
                    AArch64CalcStringAttributesOp.emitLatin1(asm, arr, len, ValueUtil.asRegister((Value)this.temp[0]), ret, end, ValueUtil.asRegister((Value)this.vectorTemp[0]), ValueUtil.asRegister((Value)this.vectorTemp[1]), ValueUtil.asRegister((Value)this.vectorTemp[2]));
                    break;
                }
                case BMP: {
                    this.emitBMP(asm, arr, len, ValueUtil.asRegister((Value)this.temp[0]), ret, end);
                    break;
                }
                case UTF_8: {
                    this.emitUTF8(crb, asm, arr, len, ValueUtil.asRegister((Value)this.temp[0]), ret, end);
                    break;
                }
                case UTF_16: {
                    this.emitUTF16(crb, asm, arr, len, ValueUtil.asRegister((Value)this.temp[0]), ret, end);
                    break;
                }
                case UTF_32: {
                    this.emitUTF32(asm, arr, len, ret, end);
                    break;
                }
                default: {
                    throw GraalError.shouldNotReachHereUnexpectedValue((Object)this.encoding);
                }
            }
            asm.align(16);
            asm.bind(end);
        }
    }

    static void emitLatin1(AArch64MacroAssembler asm, Register arr, Register len, Register tmp, Register ret, Label end, Register vecArray1, Register vecArray2, Register vecMask) {
        Label tailLessThan32 = new Label();
        Label tailLessThan16 = new Label();
        Label tailLessThan8 = new Label();
        Label tailLessThan4 = new Label();
        Label tailLessThan2 = new Label();
        Label tailLoaded = new Label();
        Label vectorLoop = new Label();
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecMask, -128L);
        asm.subs(64, len, len, 32);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan32);
        Register refAddress = len;
        asm.add(64, refAddress, arr, len);
        asm.mov(ret, 1);
        asm.align(16);
        asm.bind(vectorLoop);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        AArch64CalcStringAttributesOp.vectorTest(asm, vecArray1, vecArray2, vecMask);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, end);
        asm.cmp(64, arr, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, vectorLoop);
        asm.mov(64, arr, refAddress);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createPairBaseRegisterOnlyAddress(128, arr));
        AArch64CalcStringAttributesOp.vectorTest(asm, vecArray1, vecArray2, vecMask);
        asm.cset(64, ret, AArch64Assembler.ConditionFlag.NE);
        asm.jmp(end);
        asm.bind(tailLessThan32);
        asm.adds(64, len, len, 16);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan16);
        asm.fldr(128, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(128, arr));
        asm.fldr(128, vecArray2, AArch64Address.createRegisterOffsetAddress(128, arr, len, false));
        AArch64CalcStringAttributesOp.vectorTest(asm, vecArray1, vecArray2, vecMask);
        assert (AArch64CalcStringAttributesOp.returnValueAssertions());
        asm.cset(64, ret, AArch64Assembler.ConditionFlag.NE);
        asm.jmp(end);
        asm.bind(tailLessThan16);
        AArch64CalcStringAttributesOp.tailLoad(asm, arr, len, ret, tmp, null, tailLessThan8, tailLoaded, 8);
        AArch64CalcStringAttributesOp.tailLoad(asm, arr, len, ret, tmp, tailLessThan8, tailLessThan4, tailLoaded, 4);
        AArch64CalcStringAttributesOp.tailLoad(asm, arr, len, ret, tmp, tailLessThan4, tailLessThan2, tailLoaded, 2);
        asm.bind(tailLessThan2);
        asm.mov(64, ret, AArch64.zr);
        asm.adds(64, len, len, 1);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, end);
        asm.ldr(8, tmp, AArch64Address.createBaseRegisterOnlyAddress(8, arr));
        asm.align(16);
        asm.bind(tailLoaded);
        Register mask = len;
        asm.neon.moveFromIndex(AArch64ASIMDAssembler.ElementSize.DoubleWord, AArch64ASIMDAssembler.ElementSize.DoubleWord, mask, vecMask, 0);
        asm.orr(64, ret, ret, tmp);
        asm.tst(64, ret, mask);
        assert (AArch64CalcStringAttributesOp.returnValueAssertions());
        asm.cset(64, ret, AArch64Assembler.ConditionFlag.NE);
    }

    private static void tailLoad(AArch64MacroAssembler asm, Register arr, Register len, Register tmp1, Register tmp2, Label entry, Label nextTail, Label done, int chunkSize) {
        int bits = chunkSize << 3;
        if (entry != null) {
            asm.bind(entry);
        }
        asm.adds(64, len, len, chunkSize);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, nextTail);
        asm.ldr(bits, tmp1, AArch64Address.createBaseRegisterOnlyAddress(bits, arr));
        asm.ldr(bits, tmp2, AArch64Address.createRegisterOffsetAddress(bits, arr, len, false));
        asm.jmp(done);
    }

    private void emitBMP(AArch64MacroAssembler asm, Register arr, Register len, Register tmp, Register ret, Label end) {
        assert (this.stride.log2 == 1);
        Label tailLessThan32 = new Label();
        Label tailLessThan16 = new Label();
        Label tailLessThan8 = new Label();
        Label tailLessThan4 = new Label();
        Label tailLoaded = new Label();
        Label asciiLoop = new Label();
        Label latinLoop = new Label();
        Label latinFound = new Label();
        Label latinContinue = new Label();
        Register vecArray1 = ValueUtil.asRegister((Value)this.vectorTemp[0]);
        Register vecArray2 = ValueUtil.asRegister((Value)this.vectorTemp[1]);
        Register vecMask = ValueUtil.asRegister((Value)this.vectorTemp[2]);
        Register vecTmp1 = ValueUtil.asRegister((Value)this.vectorTemp[3]);
        asm.lsl(64, len, len, 1L);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.HalfWord, vecMask, -128L);
        asm.subs(64, len, len, 32);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan32);
        Register refAddress = len;
        asm.add(64, refAddress, arr, len);
        asm.align(16);
        asm.bind(asciiLoop);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        AArch64CalcStringAttributesOp.vectorTest(asm, AArch64ASIMDAssembler.ElementSize.HalfWord, vecTmp1, vecTmp1, vecArray1, vecArray2, vecMask);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, latinFound);
        asm.cmp(64, arr, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, asciiLoop);
        asm.mov(64, arr, refAddress);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        AArch64CalcStringAttributesOp.vectorTest(asm, AArch64ASIMDAssembler.ElementSize.HalfWord, vecTmp1, vecTmp1, vecArray1, vecArray2, vecMask);
        assert (AArch64CalcStringAttributesOp.returnValueAssertions());
        asm.cset(64, ret, AArch64Assembler.ConditionFlag.NE);
        asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecArray1, vecArray2);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecTmp1, vecTmp1);
        asm.csinc(64, ret, ret, ret, AArch64Assembler.ConditionFlag.EQ);
        asm.jmp(end);
        asm.align(16);
        asm.bind(latinFound);
        asm.mov(ret, 2);
        asm.jmp(latinContinue);
        asm.align(16);
        asm.bind(latinLoop);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        asm.bind(latinContinue);
        asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecArray1, vecArray2);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecTmp1, vecTmp1);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, end);
        asm.cmp(64, arr, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, latinLoop);
        asm.mov(64, arr, refAddress);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        asm.mov(ret, 1);
        asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecArray1, vecArray2);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecTmp1, vecTmp1);
        asm.csinc(64, ret, ret, ret, AArch64Assembler.ConditionFlag.EQ);
        asm.jmp(end);
        asm.bind(tailLessThan32);
        asm.adds(64, len, len, 16);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan16);
        asm.fldr(128, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(128, arr));
        asm.fldr(128, vecArray2, AArch64Address.createRegisterOffsetAddress(128, arr, len, false));
        AArch64CalcStringAttributesOp.vectorTest(asm, AArch64ASIMDAssembler.ElementSize.HalfWord, vecTmp1, vecTmp1, vecArray1, vecArray2, vecMask);
        asm.cset(64, ret, AArch64Assembler.ConditionFlag.NE);
        asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecArray1, vecArray2);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecTmp1, vecTmp1);
        asm.csinc(64, ret, ret, ret, AArch64Assembler.ConditionFlag.EQ);
        asm.jmp(end);
        AArch64CalcStringAttributesOp.tailLoad(asm, arr, len, ret, tmp, tailLessThan16, tailLessThan8, tailLoaded, 8);
        AArch64CalcStringAttributesOp.tailLoad(asm, arr, len, ret, tmp, tailLessThan8, tailLessThan4, tailLoaded, 4);
        asm.bind(tailLessThan4);
        asm.mov(64, ret, AArch64.zr);
        asm.adds(64, len, len, 2);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, end);
        asm.ldr(16, tmp, AArch64Address.createBaseRegisterOnlyAddress(16, arr));
        asm.align(16);
        asm.bind(tailLoaded);
        Register maskAscii = len;
        Register maskLatin = arr;
        asm.neon.moveFromIndex(AArch64ASIMDAssembler.ElementSize.DoubleWord, AArch64ASIMDAssembler.ElementSize.DoubleWord, maskAscii, vecMask, 0);
        asm.neon.shlVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.HalfWord, vecMask, vecMask, 1);
        asm.neon.moveFromIndex(AArch64ASIMDAssembler.ElementSize.DoubleWord, AArch64ASIMDAssembler.ElementSize.DoubleWord, maskLatin, vecMask, 0);
        asm.orr(64, tmp, ret, tmp);
        asm.tst(64, tmp, maskAscii);
        asm.cset(64, ret, AArch64Assembler.ConditionFlag.NE);
        asm.tst(64, tmp, maskLatin);
        asm.csinc(64, ret, ret, ret, AArch64Assembler.ConditionFlag.EQ);
    }

    private static void vectorTest(AArch64MacroAssembler asm, Register vecArray1, Register vecArray2, Register vecMask) {
        AArch64CalcStringAttributesOp.vectorTest(asm, vecArray1, vecArray1, vecArray1, vecArray2, vecMask);
    }

    private static void vectorTest(AArch64MacroAssembler asm, Register vecDstOrr, Register vecDstAnd, Register vecArray1, Register vecArray2, Register vecMask) {
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecDstOrr, vecArray1, vecArray2);
        asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecDstAnd, vecDstOrr, vecMask);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecDstAnd, vecDstAnd);
    }

    private static void vectorTest(AArch64MacroAssembler asm, AArch64ASIMDAssembler.ElementSize eSize, Register vecDstOrr, Register vecDstCmtst, Register vecArray1, Register vecArray2, Register vecMask) {
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecDstOrr, vecArray1, vecArray2);
        asm.neon.cmtstVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, eSize, vecDstCmtst, vecDstOrr, vecMask);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, eSize, vecDstCmtst, vecDstCmtst, true);
    }

    private void emitUTF8(CompilationResultBuilder crb, AArch64MacroAssembler asm, Register arr, Register len, Register tmp, Register ret, Label end) {
        assert (this.stride.log2 == 0);
        Label tailLessThan32 = new Label();
        Label tailLessThan32Continue = new Label();
        Label tailLessThan16 = new Label();
        Label tailLessThan8 = new Label();
        Label asciiLoop = new Label();
        Label multibyteFound = new Label();
        Label multibyteContinue = new Label();
        Label multibyteLoop = new Label();
        Label multibyteFoundTail = new Label();
        Label labelScalarTail = new Label();
        Register vecArray1 = ValueUtil.asRegister((Value)this.vectorTemp[0]);
        Register vecArray2 = ValueUtil.asRegister((Value)this.vectorTemp[1]);
        Register vecTmp1 = ValueUtil.asRegister((Value)this.vectorTemp[2]);
        Register vecTmp2 = ValueUtil.asRegister((Value)this.vectorTemp[3]);
        Register vecTmp3 = ValueUtil.asRegister((Value)this.vectorTemp[4]);
        Register vecMask0x80 = ValueUtil.asRegister((Value)this.vectorTemp[5]);
        Register vecMask0xC0 = ValueUtil.asRegister((Value)this.vectorTemp[6]);
        Register vecNCB = ValueUtil.asRegister((Value)this.vectorTemp[7]);
        Register tmp2 = ValueUtil.asRegister((Value)this.temp[1]);
        DataSection.Data maskTail = this.assumeValid ? this.createTailANDMask(crb, 32) : AArch64CalcStringAttributesOp.createTailShuffleMask(crb, 16);
        asm.mov(32, ret, len);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecMask0xC0, -64L);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecMask0x80, -128L);
        asm.subs(64, len, len, 32);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan32);
        Register refAddress = len;
        asm.add(64, refAddress, arr, len);
        asm.align(16);
        asm.bind(asciiLoop);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        AArch64CalcStringAttributesOp.vectorTest(asm, vecTmp1, vecTmp2, vecArray1, vecArray2, vecMask0x80);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, multibyteFound);
        asm.cmp(64, arr, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, asciiLoop);
        asm.mov(64, arr, refAddress);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        asm.bind(tailLessThan32Continue);
        AArch64CalcStringAttributesOp.vectorTest(asm, vecTmp1, vecTmp2, vecArray1, vecArray2, vecMask0x80);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, this.assumeValid ? multibyteFoundTail : multibyteFound);
        assert (AArch64CalcStringAttributesOp.returnValueAssertions());
        asm.lsl(64, ret, ret, 32L);
        asm.jmp(end);
        if (this.assumeValid) {
            asm.bind(multibyteFoundTail);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, 0L);
            AArch64CalcStringAttributesOp.utf8CountContinuationBytes(asm, vecArray1, vecArray2, vecTmp1, vecTmp2, vecMask0xC0, vecMask0x80, vecNCB);
            asm.neon.uaddlvSV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, vecNCB);
            asm.neon.moveFromIndex(AArch64ASIMDAssembler.ElementSize.DoubleWord, AArch64ASIMDAssembler.ElementSize.DoubleWord, tmp, vecNCB, 0);
            asm.sub(64, ret, ret, tmp);
            asm.mov(tmp, 11);
            asm.orr(64, ret, tmp, ret, AArch64Assembler.ShiftType.LSL, 32);
            asm.jmp(end);
            asm.align(16);
            asm.bind(multibyteFound);
            AArch64CalcStringAttributesOp.setCodepointCountOuterLoopRefAddress(asm, arr, refAddress, tmp2, 4064, 32);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, 0L);
            asm.jmp(multibyteContinue);
            Label multibyteOuterLoop = AArch64CalcStringAttributesOp.emitCodepointCountOuterLoopBegin(asm, arr, refAddress, tmp2, 4064, 32);
            asm.align(16);
            asm.bind(multibyteLoop);
            asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
            asm.bind(multibyteContinue);
            AArch64CalcStringAttributesOp.utf8CountContinuationBytes(asm, vecArray1, vecArray2, vecTmp1, vecTmp2, vecMask0xC0, vecMask0x80, vecNCB);
            asm.cmp(64, arr, tmp2);
            asm.branchConditionally(AArch64Assembler.ConditionFlag.LS, multibyteLoop);
            AArch64CalcStringAttributesOp.emitCodepointCountOuterLoopEnd(asm, tmp, ret, vecNCB, tmp2, refAddress, multibyteOuterLoop);
            Label skipTail = new Label();
            asm.mov(64, arr, refAddress);
            asm.and(32, len, ValueUtil.asRegister((Value)this.length), 31L);
            asm.cbz(32, len, skipTail);
            asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
            AArch64CalcStringAttributesOp.loadDataSectionAddress(crb, asm, tmp, maskTail);
            asm.add(64, tmp, tmp, len, AArch64Assembler.ExtendType.UXTW, 0);
            asm.fldp(128, vecTmp1, vecTmp2, AArch64Address.createPairBaseRegisterOnlyAddress(128, tmp));
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray1, vecArray1, vecTmp1);
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp2);
            AArch64CalcStringAttributesOp.utf8CountContinuationBytes(asm, vecArray1, vecArray2, vecTmp1, vecTmp2, vecMask0xC0, vecMask0x80, vecNCB);
            asm.neon.uaddlvSV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, vecNCB);
            asm.neon.moveFromIndex(AArch64ASIMDAssembler.ElementSize.DoubleWord, AArch64ASIMDAssembler.ElementSize.DoubleWord, tmp, vecNCB, 0);
            asm.sub(64, ret, ret, tmp);
            asm.bind(skipTail);
            asm.mov(tmp, 11);
            asm.orr(64, ret, tmp, ret, AArch64Assembler.ShiftType.LSL, 32);
            asm.jmp(end);
        } else {
            Label multibyteTail = new Label();
            Label multibyteNoFastPath = new Label();
            byte[] masks = new byte[64];
            Arrays.fill(masks, 0, 16, (byte)-1);
            masks[13] = -16;
            masks[14] = -32;
            masks[15] = -64;
            System.arraycopy(UTF8_BYTE_1_LOW_TABLE, 0, masks, 16, 16);
            System.arraycopy(UTF8_BYTE_1_HIGH_TABLE, 0, masks, 32, 16);
            System.arraycopy(UTF8_BYTE_2_HIGH_TABLE, 0, masks, 48, 16);
            DataSection.Data masksData = AArch64CalcStringAttributesOp.writeToDataSection(crb, masks);
            Register vecMask3ByteSeq = ValueUtil.asRegister((Value)this.vectorTemp[8]);
            Register vecMask4ByteSeq = ValueUtil.asRegister((Value)this.vectorTemp[9]);
            Register vecMask0x0F = ValueUtil.asRegister((Value)this.vectorTemp[10]);
            Register vecIsIncompleteMask = ValueUtil.asRegister((Value)this.vectorTemp[11]);
            Register vecLUTByte1Lo = ValueUtil.asRegister((Value)this.vectorTemp[12]);
            Register vecLUTByte1Hi = ValueUtil.asRegister((Value)this.vectorTemp[13]);
            Register vecLUTByte2Hi = ValueUtil.asRegister((Value)this.vectorTemp[14]);
            Register vecPrevIsIncomplete = ValueUtil.asRegister((Value)this.vectorTemp[15]);
            Register vecPrev = ValueUtil.asRegister((Value)this.vectorTemp[16]);
            Register vecResult = vecArray2;
            asm.align(16);
            asm.bind(multibyteFound);
            asm.sub(64, arr, arr, 16);
            asm.add(64, refAddress, refAddress, 16);
            asm.bind(multibyteFoundTail);
            AArch64CalcStringAttributesOp.loadDataSectionAddress(crb, asm, tmp, masksData);
            asm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecIsIncompleteMask, vecLUTByte1Lo, vecLUTByte1Hi, vecLUTByte2Hi, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, tmp, 64));
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecMask4ByteSeq, -16L);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecMask3ByteSeq, -32L);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecMask0x0F, 15L);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecPrevIsIncomplete, 0L);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecPrev, 0L);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecResult, 0L);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, 0L);
            AArch64CalcStringAttributesOp.setCodepointCountOuterLoopRefAddress(asm, arr, refAddress, tmp2, 4064, 16);
            asm.jmp(multibyteContinue);
            Label multibyteOuterLoop = AArch64CalcStringAttributesOp.emitCodepointCountOuterLoopBegin(asm, arr, refAddress, tmp2, 4064, 16);
            asm.align(16);
            asm.bind(multibyteLoop);
            asm.fldr(128, vecArray1, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, arr, 16));
            asm.bind(multibyteContinue);
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecMask0x80);
            AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecTmp1, vecTmp1);
            asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, multibyteNoFastPath);
            asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecResult, vecResult, vecPrevIsIncomplete);
            asm.cmp(64, arr, tmp2);
            asm.branchConditionally(AArch64Assembler.ConditionFlag.LS, multibyteLoop);
            asm.jmp(multibyteTail);
            asm.bind(multibyteNoFastPath);
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecMask0xC0);
            asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecTmp1, vecMask0x80);
            asm.neon.usraVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, vecTmp1, 7);
            asm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecPrev, vecArray1, 15);
            asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp2, vecTmp1, 4);
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecMask0x0F);
            asm.neon.tblVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecLUTByte1Hi, vecTmp2);
            asm.neon.tblVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecLUTByte1Lo, vecTmp1);
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp2);
            asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp2, vecArray1, 4);
            asm.neon.tblVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecLUTByte2Hi, vecTmp2);
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp2);
            asm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecPrev, vecArray1, 14);
            asm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp3, vecPrev, vecArray1, 13);
            asm.neon.cmhsVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp2, vecTmp2, vecMask3ByteSeq);
            asm.neon.cmhsVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp3, vecTmp3, vecMask4ByteSeq);
            asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecTmp2, vecTmp3);
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecTmp2, vecMask0x80);
            asm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp2);
            asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecResult, vecResult, vecTmp1);
            asm.neon.cmhsVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecPrevIsIncomplete, vecArray1, vecIsIncompleteMask);
            asm.neon.moveVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecPrev, vecArray1);
            asm.cmp(64, arr, tmp2);
            asm.branchConditionally(AArch64Assembler.ConditionFlag.LS, multibyteLoop);
            asm.bind(multibyteTail);
            Label done = AArch64CalcStringAttributesOp.emitCodepointCountOuterLoopEnd(asm, tmp, ret, vecNCB, tmp2, refAddress, multibyteOuterLoop, arr, 16);
            asm.mov(64, arr, refAddress);
            asm.fldr(128, vecArray1, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, arr, 16));
            AArch64CalcStringAttributesOp.loadDataSectionAddress(crb, asm, tmp, maskTail);
            asm.and(32, tmp2, ValueUtil.asRegister((Value)this.length), 15L);
            asm.add(64, tmp, tmp, 16);
            asm.neg(64, tmp2, tmp2);
            asm.fldr(128, vecTmp1, AArch64Address.createRegisterOffsetAddress(128, tmp, tmp2, false));
            asm.mov(64, tmp2, refAddress);
            asm.neon.tblVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray1, vecArray1, vecTmp1);
            asm.jmp(multibyteContinue);
            asm.bind(done);
            asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecResult, vecResult, vecPrevIsIncomplete);
            asm.mov(tmp, 11);
            AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecResult, vecResult);
            assert (AArch64CalcStringAttributesOp.returnValueAssertions());
            asm.csinc(64, tmp, tmp, tmp, AArch64Assembler.ConditionFlag.EQ);
            asm.orr(64, ret, tmp, ret, AArch64Assembler.ShiftType.LSL, 32);
            asm.jmp(end);
        }
        Label tailLessThan16Continue = new Label();
        asm.bind(tailLessThan32);
        if (this.assumeValid) {
            AArch64CalcStringAttributesOp.loadDataSectionAddress(crb, asm, tmp, maskTail);
        }
        asm.adds(64, len, len, 16);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan16);
        asm.fldr(128, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(128, arr));
        asm.fldr(128, vecArray2, AArch64Address.createRegisterOffsetAddress(128, arr, len, false));
        if (this.assumeValid) {
            asm.add(64, tmp, tmp, 16);
            asm.fldr(128, vecTmp2, AArch64Address.createRegisterOffsetAddress(128, tmp, len, false));
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp2);
            asm.jmp(tailLessThan32Continue);
        } else {
            asm.add(64, refAddress, arr, len);
            asm.add(64, arr, arr, 16);
            AArch64CalcStringAttributesOp.vectorTest(asm, vecTmp1, vecTmp2, vecArray1, vecArray2, vecMask0x80);
            asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, multibyteFoundTail);
            assert (AArch64CalcStringAttributesOp.returnValueAssertions());
            asm.lsl(64, ret, ret, 32L);
            asm.jmp(end);
        }
        asm.bind(tailLessThan16);
        asm.adds(64, len, len, 8);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan8);
        asm.fldr(64, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(64, arr));
        asm.fldr(64, vecArray2, AArch64Address.createRegisterOffsetAddress(64, arr, len, false));
        if (this.assumeValid) {
            asm.add(64, tmp, tmp, 24);
            asm.fldr(128, vecTmp2, AArch64Address.createRegisterOffsetAddress(128, tmp, len, false));
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp2);
            asm.jmp(tailLessThan32Continue);
        } else {
            AArch64CalcStringAttributesOp.loadDataSectionAddress(crb, asm, tmp, AArch64CalcStringAttributesOp.createTailShuffleMask(crb, 8));
            asm.add(64, tmp, tmp, 8);
            asm.neg(64, len, len);
            asm.fldr(128, vecTmp2, AArch64Address.createRegisterOffsetAddress(128, tmp, len, false));
            asm.neon.tblVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp2);
            asm.neon.insXX(AArch64ASIMDAssembler.ElementSize.DoubleWord, vecArray1, 1, vecArray2, 0);
            asm.bind(tailLessThan16Continue);
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecMask0x80);
            asm.sub(64, refAddress, arr, 16);
            AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecTmp1, vecTmp1);
            asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, multibyteFoundTail);
            assert (AArch64CalcStringAttributesOp.returnValueAssertions());
            asm.lsl(64, ret, ret, 32L);
            asm.jmp(end);
        }
        asm.bind(tailLessThan8);
        asm.adds(64, len, len, 4);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, labelScalarTail);
        asm.fldr(32, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(32, arr));
        if (this.assumeValid) {
            asm.fldr(32, vecArray2, AArch64Address.createRegisterOffsetAddress(32, arr, len, false));
            asm.add(64, tmp, tmp, 28);
            asm.fldr(128, vecTmp2, AArch64Address.createRegisterOffsetAddress(128, tmp, len, false));
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp2);
            asm.jmp(tailLessThan32Continue);
        } else {
            asm.ldr(32, tmp, AArch64Address.createRegisterOffsetAddress(32, arr, len, false));
            asm.sub(64, len, len, 4);
            asm.neg(64, len, len, AArch64Assembler.ShiftType.LSL, 3);
            asm.lsr(64, tmp, tmp, len);
            asm.neon.insXG(AArch64ASIMDAssembler.ElementSize.Word, vecArray1, 1, tmp);
            asm.jmp(tailLessThan16Continue);
        }
        Label scalarAsciiLoop = new Label();
        Label scalarMultiByteFound = new Label();
        Label scalarMultiByteLoop = new Label();
        asm.bind(labelScalarTail);
        asm.mov(32, len, ValueUtil.asRegister((Value)this.length));
        asm.cbz(64, len, end);
        asm.mov(64, tmp2, AArch64.zr);
        asm.align(16);
        asm.bind(scalarAsciiLoop);
        asm.ldr(8, tmp, AArch64Address.createImmediateAddress(8, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, arr, 1));
        asm.tst(64, tmp, 128L);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, scalarMultiByteFound);
        asm.subs(64, len, len, 1);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, scalarAsciiLoop);
        assert (AArch64CalcStringAttributesOp.returnValueAssertions());
        asm.lsl(64, ret, ret, 32L);
        asm.jmp(end);
        if (this.assumeValid) {
            asm.align(16);
            asm.bind(scalarMultiByteLoop);
            asm.ldr(8, tmp, AArch64Address.createImmediateAddress(8, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, arr, 1));
            asm.bind(scalarMultiByteFound);
            asm.and(64, tmp, tmp, 192L);
            asm.compare(64, tmp, 128);
            asm.csinc(64, tmp2, tmp2, tmp2, AArch64Assembler.ConditionFlag.NE);
            asm.subs(64, len, len, 1);
            asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, scalarMultiByteLoop);
            asm.sub(64, ret, ret, tmp2);
            asm.mov(tmp, 11);
            asm.orr(64, ret, tmp, ret, AArch64Assembler.ShiftType.LSL, 32);
            asm.jmp(end);
        } else {
            Register state = ValueUtil.asRegister((Value)this.temp[2]);
            Register type = ValueUtil.asRegister((Value)this.temp[3]);
            Label scalarMultibyteFoundContinue = new Label();
            asm.bind(scalarMultiByteFound);
            AArch64CalcStringAttributesOp.loadDataSectionAddress(crb, asm, tmp2, AArch64CalcStringAttributesOp.writeToDataSection(crb, UTF_8_STATE_MACHINE));
            asm.mov(64, state, AArch64.zr);
            asm.neg(64, ret, ret);
            asm.jmp(scalarMultibyteFoundContinue);
            asm.align(16);
            asm.bind(scalarMultiByteLoop);
            asm.ldr(8, tmp, AArch64Address.createImmediateAddress(8, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, arr, 1));
            asm.bind(scalarMultibyteFoundContinue);
            asm.ldr(8, type, AArch64Address.createRegisterOffsetAddress(8, tmp2, tmp, false));
            asm.add(64, type, type, state);
            asm.add(64, type, type, 256);
            asm.ldr(8, state, AArch64Address.createRegisterOffsetAddress(8, tmp2, type, false));
            asm.and(64, tmp, tmp, 192L);
            asm.compare(64, tmp, 128);
            asm.csinc(64, ret, ret, ret, AArch64Assembler.ConditionFlag.NE);
            asm.subs(64, len, len, 1);
            asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, scalarMultiByteLoop);
            asm.neg(64, ret, ret);
            asm.mov(tmp, 11);
            asm.tst(64, state, state);
            asm.csinc(64, tmp, tmp, tmp, AArch64Assembler.ConditionFlag.EQ);
            asm.orr(64, ret, tmp, ret, AArch64Assembler.ShiftType.LSL, 32);
            asm.jmp(end);
        }
    }

    private static void utf8CountContinuationBytes(AArch64MacroAssembler asm, Register vecArray1, Register vecArray2, Register vecTmp1, Register vecTmp2, Register vecMask0xC0, Register vecMask0x80, Register vecNCB) {
        asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecMask0xC0);
        asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecArray2, vecMask0xC0);
        asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecTmp1, vecMask0x80);
        asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp2, vecTmp2, vecMask0x80);
        asm.neon.usraVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, vecTmp1, 7);
        asm.neon.usraVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, vecTmp2, 7);
    }

    private static void setCodepointCountOuterLoopRefAddress(AArch64MacroAssembler asm, Register arr, Register refAddressTail, Register refAddressOuter, int outerBlockSize, int innerBlockSize) {
        assert (outerBlockSize > innerBlockSize * 2);
        asm.adds(64, refAddressOuter, arr, outerBlockSize);
        asm.csel(64, refAddressOuter, refAddressTail, refAddressOuter, AArch64Assembler.ConditionFlag.VS);
        asm.cmp(64, refAddressOuter, refAddressTail);
        asm.sub(64, refAddressOuter, refAddressOuter, innerBlockSize);
        asm.csel(64, refAddressOuter, refAddressOuter, refAddressTail, AArch64Assembler.ConditionFlag.LO);
    }

    private static Label emitCodepointCountOuterLoopBegin(AArch64MacroAssembler asm, Register arr, Register refAddressTail, Register refAddressOuter, int outerBlockSize, int innerBlockSize) {
        Label loopHead = new Label();
        asm.align(16);
        asm.bind(loopHead);
        AArch64CalcStringAttributesOp.setCodepointCountOuterLoopRefAddress(asm, arr, refAddressTail, refAddressOuter, outerBlockSize, innerBlockSize);
        return loopHead;
    }

    private static void emitCodepointCountOuterLoopEnd(AArch64MacroAssembler asm, Register tmp, Register ret, Register vecNCB, Register refAddressOuter, Register refAddressTail, Label loopHead) {
        asm.neon.uaddlvSV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, vecNCB);
        asm.neon.moveFromIndex(AArch64ASIMDAssembler.ElementSize.DoubleWord, AArch64ASIMDAssembler.ElementSize.DoubleWord, tmp, vecNCB, 0);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, 0L);
        asm.sub(64, ret, ret, tmp);
        asm.cmp(64, refAddressOuter, refAddressTail);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, loopHead);
    }

    private static Label emitCodepointCountOuterLoopEnd(AArch64MacroAssembler asm, Register tmp, Register ret, Register vecNCB, Register refAddressOuter, Register refAddressTail, Label loopHead, Register arr, int innerLoopBlockSize) {
        asm.neon.uaddlvSV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, vecNCB);
        asm.neon.moveFromIndex(AArch64ASIMDAssembler.ElementSize.DoubleWord, AArch64ASIMDAssembler.ElementSize.DoubleWord, tmp, vecNCB, 0);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNCB, 0L);
        asm.sub(64, ret, ret, tmp);
        Label breakLabel = new Label();
        asm.add(64, tmp, refAddressTail, innerLoopBlockSize);
        asm.cmp(64, arr, tmp);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.EQ, breakLabel);
        asm.cmp(64, refAddressOuter, refAddressTail);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, loopHead);
        return breakLabel;
    }

    private void emitUTF16(CompilationResultBuilder crb, AArch64MacroAssembler asm, Register arr, Register len, Register tmp, Register ret, Label end) {
        assert (this.stride.log2 == 1);
        Label tailLessThan32 = new Label();
        Label tailLessThan32Continue = new Label();
        Label tailLessThan16 = new Label();
        Label tailLessThan8 = new Label();
        Label tailLessThan4 = new Label();
        Label asciiLoop = new Label();
        Label latinContinue = new Label();
        Label latinLoop = new Label();
        Label latinFoundTail = new Label();
        Label bmpContinue = new Label();
        Label bmpLoop = new Label();
        Label bmpFoundTail = new Label();
        Label surrogateFound = new Label();
        Label surrogateContinue = new Label();
        Label surrogateLoop = new Label();
        Label surrogateFoundTail = new Label();
        Register vecArray1 = ValueUtil.asRegister((Value)this.vectorTemp[0]);
        Register vecArray2 = ValueUtil.asRegister((Value)this.vectorTemp[1]);
        Register vecTmp1 = ValueUtil.asRegister((Value)this.vectorTemp[2]);
        Register vecTmp2 = ValueUtil.asRegister((Value)this.vectorTemp[3]);
        Register vecTmp3 = ValueUtil.asRegister((Value)this.vectorTemp[4]);
        Register vecMask = ValueUtil.asRegister((Value)this.vectorTemp[5]);
        Register vecMaskSurrogates = ValueUtil.asRegister((Value)this.vectorTemp[6]);
        Register vecNSurrogates = ValueUtil.asRegister((Value)this.vectorTemp[7]);
        Register tmp2 = ValueUtil.asRegister((Value)this.temp[1]);
        DataSection.Data maskTail = this.assumeValid ? this.createTailANDMask(crb, 16) : AArch64CalcStringAttributesOp.createTailShuffleMask(crb, 16);
        asm.mov(32, ret, len);
        asm.lsl(64, len, len, 1L);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.HalfWord, vecMask, -128L);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecMaskSurrogates, this.assumeValid ? 54L : 27L);
        asm.subs(64, len, len, 32);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan32);
        Register refAddress = len;
        asm.add(64, refAddress, arr, len);
        asm.align(16);
        asm.bind(asciiLoop);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        AArch64CalcStringAttributesOp.vectorTest(asm, AArch64ASIMDAssembler.ElementSize.HalfWord, vecTmp1, vecTmp2, vecArray1, vecArray2, vecMask);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, latinContinue);
        asm.cmp(64, arr, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, asciiLoop);
        asm.mov(64, arr, refAddress);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        asm.bind(tailLessThan32Continue);
        AArch64CalcStringAttributesOp.vectorTest(asm, AArch64ASIMDAssembler.ElementSize.HalfWord, vecTmp1, vecTmp2, vecArray1, vecArray2, vecMask);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, latinFoundTail);
        assert (AArch64CalcStringAttributesOp.returnValueAssertions());
        asm.lsl(64, ret, ret, 32L);
        asm.jmp(end);
        asm.align(16);
        asm.bind(latinLoop);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        asm.bind(latinContinue);
        asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, vecArray1, vecArray2);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecTmp1, vecArray2);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, bmpContinue);
        asm.cmp(64, arr, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, latinLoop);
        asm.mov(64, arr, refAddress);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        asm.bind(latinFoundTail);
        asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, vecArray1, vecArray2);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecTmp1, vecArray2);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, bmpFoundTail);
        asm.mov(tmp, 1);
        asm.orr(64, ret, tmp, ret, AArch64Assembler.ShiftType.LSL, 32);
        asm.jmp(end);
        asm.align(16);
        asm.bind(bmpLoop);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, vecArray1, vecArray2);
        asm.bind(bmpContinue);
        this.utf16MatchSurrogatesAndTest(asm, vecArray2, vecTmp1, vecTmp2, vecMaskSurrogates);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, surrogateFound);
        asm.cmp(64, arr, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, bmpLoop);
        asm.mov(64, arr, refAddress);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
        asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, vecArray1, vecArray2);
        asm.bind(bmpFoundTail);
        this.utf16MatchSurrogatesAndTest(asm, vecArray2, vecTmp1, vecTmp2, vecMaskSurrogates);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, surrogateFoundTail);
        asm.mov(tmp, 2);
        asm.orr(64, ret, tmp, ret, AArch64Assembler.ShiftType.LSL, 32);
        asm.jmp(end);
        if (this.assumeValid) {
            asm.bind(surrogateFoundTail);
            AArch64CalcStringAttributesOp.utf16SubtractMatched(asm, tmp, ret, vecTmp1);
            asm.mov(tmp, 11);
            asm.orr(64, ret, tmp, ret, AArch64Assembler.ShiftType.LSL, 32);
            asm.jmp(end);
            asm.align(16);
            asm.bind(surrogateFound);
            AArch64CalcStringAttributesOp.setCodepointCountOuterLoopRefAddress(asm, arr, refAddress, tmp2, 4096, 32);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNSurrogates, 0L);
            asm.jmp(surrogateContinue);
            Label surrogateOuterLoop = AArch64CalcStringAttributesOp.emitCodepointCountOuterLoopBegin(asm, arr, refAddress, tmp2, 4096, 32);
            asm.align(16);
            asm.bind(surrogateLoop);
            asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
            asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, vecArray1, vecArray2);
            asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecArray2, 2);
            asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecTmp1, vecMaskSurrogates);
            asm.bind(surrogateContinue);
            asm.neon.usraVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNSurrogates, vecTmp1, 7);
            asm.cmp(64, arr, tmp2);
            asm.branchConditionally(AArch64Assembler.ConditionFlag.LS, surrogateLoop);
            AArch64CalcStringAttributesOp.emitCodepointCountOuterLoopEnd(asm, tmp, ret, vecNSurrogates, tmp2, refAddress, surrogateOuterLoop);
            Label skipTail = new Label();
            asm.mov(64, arr, refAddress);
            asm.and(32, len, ValueUtil.asRegister((Value)this.length), 15L);
            asm.cbz(64, len, skipTail);
            asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
            asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, vecArray1, vecArray2);
            AArch64CalcStringAttributesOp.loadDataSectionAddress(crb, asm, tmp, maskTail);
            asm.fldr(128, vecTmp1, AArch64Address.createRegisterOffsetAddress(128, tmp, len, false));
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp1);
            this.utf16MatchSurrogates(asm, vecArray2, vecTmp1, vecMaskSurrogates);
            AArch64CalcStringAttributesOp.utf16SubtractMatched(asm, tmp, ret, vecTmp1);
            asm.bind(skipTail);
            asm.mov(tmp, 11);
            asm.orr(64, ret, tmp, ret, AArch64Assembler.ShiftType.LSL, 32);
            asm.jmp(end);
        } else {
            Register vecPrev = vecTmp2;
            Register vecResult = vecTmp3;
            asm.bind(surrogateFoundTail);
            asm.add(64, arr, ValueUtil.asRegister((Value)this.array), ValueUtil.asRegister((Value)this.offset));
            asm.sub(64, refAddress, arr, 32);
            asm.align(16);
            asm.bind(surrogateFound);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecMaskSurrogates, 54L);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecMask, 55L);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecPrev, 0L);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecResult, 0L);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNSurrogates, 0L);
            AArch64CalcStringAttributesOp.setCodepointCountOuterLoopRefAddress(asm, arr, refAddress, tmp2, 4096, 32);
            asm.jmp(surrogateContinue);
            Label surrogateOuterLoop = AArch64CalcStringAttributesOp.emitCodepointCountOuterLoopBegin(asm, arr, refAddress, tmp2, 4096, 32);
            asm.align(16);
            asm.bind(surrogateLoop);
            asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
            asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, vecArray1, vecArray2);
            asm.bind(surrogateContinue);
            asm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray1, vecPrev, vecArray2, 15);
            asm.neon.moveVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecPrev, vecArray2);
            asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray1, vecArray1, 2);
            asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, vecArray2, 2);
            asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray1, vecArray1, vecMaskSurrogates);
            asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, vecArray2, vecMask);
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecArray2);
            asm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray1, vecArray1, vecArray2);
            asm.neon.usraVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecNSurrogates, vecTmp1, 7);
            asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecResult, vecResult, vecArray1);
            asm.cmp(64, arr, tmp2);
            asm.branchConditionally(AArch64Assembler.ConditionFlag.LS, surrogateLoop);
            Label done = AArch64CalcStringAttributesOp.emitCodepointCountOuterLoopEnd(asm, tmp, ret, vecNSurrogates, tmp2, refAddress, surrogateOuterLoop, arr, 32);
            asm.mov(64, arr, refAddress);
            asm.fldp(128, vecArray1, vecArray2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arr, 32));
            asm.neon.uzp2VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, vecArray1, vecArray2);
            AArch64CalcStringAttributesOp.loadDataSectionAddress(crb, asm, tmp, maskTail);
            asm.and(32, tmp2, ValueUtil.asRegister((Value)this.length), 15L);
            asm.add(64, tmp, tmp, 16);
            asm.neg(64, tmp2, tmp2);
            asm.fldr(128, vecTmp1, AArch64Address.createRegisterOffsetAddress(128, tmp, tmp2, false));
            asm.mov(64, tmp2, refAddress);
            asm.neon.tblVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp1);
            asm.jmp(surrogateContinue);
            asm.bind(done);
            asm.neon.dupVX(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecPrev, vecPrev, 15);
            asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecPrev, vecPrev, 2);
            asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecPrev, vecPrev, vecMaskSurrogates);
            asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecResult, vecResult, vecPrev);
            asm.mov(tmp, 11);
            AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecResult, vecResult);
            assert (AArch64CalcStringAttributesOp.returnValueAssertions());
            asm.csinc(64, tmp, tmp, tmp, AArch64Assembler.ConditionFlag.EQ);
            asm.orr(64, ret, tmp, ret, AArch64Assembler.ShiftType.LSL, 32);
            asm.jmp(end);
        }
        asm.bind(tailLessThan32);
        if (this.assumeValid) {
            AArch64CalcStringAttributesOp.loadDataSectionAddress(crb, asm, tmp, this.createTailANDMask(crb, 32));
        }
        asm.adds(64, len, len, 16);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan16);
        asm.fldr(128, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(128, arr));
        asm.fldr(128, vecArray2, AArch64Address.createRegisterOffsetAddress(128, arr, len, false));
        if (this.assumeValid) {
            asm.add(64, tmp, tmp, 16);
            asm.fldr(128, vecTmp2, AArch64Address.createRegisterOffsetAddress(128, tmp, len, false));
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp2);
        } else {
            AArch64CalcStringAttributesOp.loadDataSectionAddress(crb, asm, tmp, maskTail);
            asm.add(64, tmp, tmp, 16);
            asm.neg(64, len, len);
            asm.fldr(128, vecTmp2, AArch64Address.createRegisterOffsetAddress(128, tmp, len, false));
            asm.neon.tblVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp2);
        }
        asm.jmp(tailLessThan32Continue);
        asm.bind(tailLessThan16);
        asm.adds(64, len, len, 8);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan8);
        asm.fldr(64, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(64, arr));
        asm.fldr(64, vecArray2, AArch64Address.createRegisterOffsetAddress(64, arr, len, false));
        if (this.assumeValid) {
            asm.add(64, tmp, tmp, 24);
            asm.fldr(128, vecTmp2, AArch64Address.createRegisterOffsetAddress(128, tmp, len, false));
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp2);
        } else {
            AArch64CalcStringAttributesOp.loadDataSectionAddress(crb, asm, tmp, AArch64CalcStringAttributesOp.createTailShuffleMask(crb, 8));
            asm.add(64, tmp, tmp, 8);
            asm.neg(64, len, len);
            asm.fldr(128, vecTmp2, AArch64Address.createRegisterOffsetAddress(128, tmp, len, false));
            asm.neon.tblVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp2);
            asm.neon.insXX(AArch64ASIMDAssembler.ElementSize.DoubleWord, vecArray1, 1, vecArray2, 0);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, 0L);
        }
        asm.jmp(tailLessThan32Continue);
        asm.bind(tailLessThan8);
        asm.adds(64, len, len, 4);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan4);
        asm.fldr(32, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(32, arr));
        if (this.assumeValid) {
            asm.fldr(32, vecArray2, AArch64Address.createRegisterOffsetAddress(32, arr, len, false));
            asm.add(64, tmp, tmp, 28);
            asm.fldr(128, vecTmp2, AArch64Address.createRegisterOffsetAddress(128, tmp, len, false));
            asm.neon.andVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray2, vecArray2, vecTmp2);
        } else {
            asm.ldr(32, tmp, AArch64Address.createRegisterOffsetAddress(32, arr, len, false));
            asm.sub(64, len, len, 4);
            asm.neg(64, len, len, AArch64Assembler.ShiftType.LSL, 3);
            asm.lsr(64, tmp, tmp, len);
            asm.neon.insXG(AArch64ASIMDAssembler.ElementSize.Word, vecArray1, 1, tmp);
            asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, 0L);
        }
        asm.jmp(tailLessThan32Continue);
        asm.bind(tailLessThan4);
        asm.adds(64, len, len, 2);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, end);
        asm.fldr(16, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(16, arr));
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, 0L);
        asm.jmp(tailLessThan32Continue);
    }

    private static void utf16SubtractMatched(AArch64MacroAssembler asm, Register tmp, Register ret, Register vecTmp1) {
        asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecTmp1, 7);
        asm.neon.addvSV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecTmp1);
        asm.neon.moveFromIndex(AArch64ASIMDAssembler.ElementSize.DoubleWord, AArch64ASIMDAssembler.ElementSize.DoubleWord, tmp, vecTmp1, 0);
        asm.sub(64, ret, ret, tmp);
    }

    private void utf16MatchSurrogates(AArch64MacroAssembler asm, Register vecArray2, Register vecTmp1, Register vecMaskSurrogates) {
        asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecArray2, this.assumeValid ? 2 : 3);
        asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecTmp1, vecTmp1, vecMaskSurrogates);
    }

    private void utf16MatchSurrogatesAndTest(AArch64MacroAssembler asm, Register vecArray2, Register vecTmp1, Register vecTmp2, Register vecMaskSurrogates) {
        this.utf16MatchSurrogates(asm, vecArray2, vecTmp1, vecMaskSurrogates);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, vecTmp2, vecTmp1);
    }

    private void emitUTF32(AArch64MacroAssembler asm, Register arr, Register len, Register ret, Label end) {
        assert (this.stride.log2 == 2);
        Label tailLessThan64 = new Label();
        Label tailLessThan32 = new Label();
        Label tailLessThan64Continue = new Label();
        Label tailLessThan16 = new Label();
        Label tailLessThan8 = new Label();
        Label asciiLoop = new Label();
        Label latinFound = new Label();
        Label latinContinue = new Label();
        Label latinLoop = new Label();
        Label latinFoundTail = new Label();
        Label bmpFound = new Label();
        Label bmpContinue = new Label();
        Label bmpLoop = new Label();
        Label bmpFoundTail = new Label();
        Label astralContinue = new Label();
        Label astralLoop = new Label();
        Label astralFoundTail = new Label();
        Label returnBroken = new Label();
        Register vecArray1 = ValueUtil.asRegister((Value)this.vectorTemp[0]);
        Register vecArray2 = ValueUtil.asRegister((Value)this.vectorTemp[1]);
        Register vecArray3 = ValueUtil.asRegister((Value)this.vectorTemp[2]);
        Register vecArray4 = ValueUtil.asRegister((Value)this.vectorTemp[3]);
        Register vecTmp1 = ValueUtil.asRegister((Value)this.vectorTemp[4]);
        Register vecTmp2 = ValueUtil.asRegister((Value)this.vectorTemp[5]);
        Register vecTmp3 = ValueUtil.asRegister((Value)this.vectorTemp[6]);
        Register vecTmp4 = ValueUtil.asRegister((Value)this.vectorTemp[7]);
        Register vecMask = ValueUtil.asRegister((Value)this.vectorTemp[8]);
        Register vecMaskBroken = ValueUtil.asRegister((Value)this.vectorTemp[9]);
        Register vecMaskOutOfRange = ValueUtil.asRegister((Value)this.vectorTemp[10]);
        asm.lsl(64, len, len, 2L);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecMask, -128L);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecMaskBroken, 27L);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecMaskOutOfRange, 0x10FFFFL);
        asm.subs(64, len, len, 64);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan64);
        Register refAddress = len;
        asm.add(64, refAddress, arr, len);
        asm.align(16);
        asm.bind(asciiLoop);
        asm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray1, vecArray2, vecArray3, vecArray4, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, arr, 64));
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecArray2);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecArray3, vecArray4);
        AArch64CalcStringAttributesOp.vectorTest(asm, AArch64ASIMDAssembler.ElementSize.Word, vecTmp1, vecTmp2, vecTmp1, vecTmp2, vecMask);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, latinFound);
        asm.cmp(64, arr, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, asciiLoop);
        asm.mov(64, arr, refAddress);
        asm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray1, vecArray2, vecArray3, vecArray4, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, arr, 64));
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecArray2);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecArray3, vecArray4);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp2);
        asm.bind(tailLessThan64Continue);
        asm.neon.cmtstVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp1, vecMask);
        asm.neon.shlVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecMask, vecMask, 1);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp2, true);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, latinFoundTail);
        asm.mov(ret, 0);
        asm.jmp(end);
        asm.align(16);
        asm.bind(latinFound);
        asm.neon.shlVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecMask, vecMask, 1);
        asm.jmp(latinContinue);
        asm.align(16);
        asm.bind(latinLoop);
        asm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray1, vecArray2, vecArray3, vecArray4, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, arr, 64));
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecArray2);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecArray3, vecArray4);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp2);
        asm.bind(latinContinue);
        asm.neon.cmtstVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp1, vecMask);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp2, true);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, bmpFound);
        asm.cmp(64, arr, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, latinLoop);
        asm.mov(64, arr, refAddress);
        asm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray1, vecArray2, vecArray3, vecArray4, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, arr, 64));
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecArray2);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecArray3, vecArray4);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp2);
        asm.bind(latinFoundTail);
        asm.neon.cmtstVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp1, vecMask);
        asm.neon.shlVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecMask, vecMask, 8);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp2, true);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, bmpFoundTail);
        asm.mov(ret, 1);
        asm.jmp(end);
        asm.align(16);
        asm.bind(bmpFound);
        asm.neon.shlVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecMask, vecMask, 8);
        asm.mov(ret, 0);
        asm.jmp(bmpContinue);
        asm.align(16);
        asm.bind(bmpLoop);
        asm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray1, vecArray2, vecArray3, vecArray4, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, arr, 64));
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecArray2);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecArray3, vecArray4);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp2);
        asm.bind(bmpContinue);
        asm.neon.cmtstVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp1, vecMask);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp2, true);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, astralContinue);
        AArch64CalcStringAttributesOp.utf32CheckInvalidBMP(asm, vecArray1, vecArray2, vecArray3, vecArray4, vecTmp1, vecTmp2, vecTmp3, vecTmp4, vecMaskBroken, returnBroken);
        asm.cmp(64, arr, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, bmpLoop);
        asm.mov(64, arr, refAddress);
        asm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray1, vecArray2, vecArray3, vecArray4, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, arr, 64));
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecArray2);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecArray3, vecArray4);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp2);
        asm.bind(bmpFoundTail);
        asm.neon.cmtstVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp1, vecMask);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp2, true);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, astralFoundTail);
        AArch64CalcStringAttributesOp.utf32CheckInvalidBMP(asm, vecArray1, vecArray2, vecArray3, vecArray4, vecTmp1, vecTmp2, vecTmp3, vecTmp4, vecMaskBroken, returnBroken);
        asm.mov(ret, 2);
        asm.jmp(end);
        asm.align(16);
        asm.bind(astralLoop);
        asm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray1, vecArray2, vecArray3, vecArray4, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, arr, 64));
        asm.bind(astralContinue);
        AArch64CalcStringAttributesOp.utf32CheckInvalid(asm, vecArray1, vecArray2, vecArray3, vecArray4, vecTmp1, vecTmp2, vecTmp3, vecTmp4, vecMaskBroken, vecMaskOutOfRange, returnBroken);
        asm.cmp(64, arr, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, astralLoop);
        asm.mov(64, arr, refAddress);
        asm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray1, vecArray2, vecArray3, vecArray4, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, arr, 64));
        asm.bind(astralFoundTail);
        AArch64CalcStringAttributesOp.utf32CheckInvalid(asm, vecArray1, vecArray2, vecArray3, vecArray4, vecTmp1, vecTmp2, vecTmp3, vecTmp4, vecMaskBroken, vecMaskOutOfRange, returnBroken);
        asm.mov(ret, 3);
        asm.jmp(end);
        asm.bind(tailLessThan64);
        asm.adds(64, len, len, 32);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan32);
        asm.fldp(128, vecArray1, vecArray2, AArch64Address.createPairBaseRegisterOnlyAddress(128, arr));
        asm.add(64, arr, arr, len);
        asm.fldp(128, vecArray3, vecArray4, AArch64Address.createPairBaseRegisterOnlyAddress(128, arr));
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecArray2);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp2, vecArray3, vecArray4);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp2);
        asm.jmp(tailLessThan64Continue);
        asm.bind(tailLessThan32);
        asm.adds(64, len, len, 16);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan16);
        asm.fldr(128, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(128, arr));
        asm.fldr(128, vecArray2, AArch64Address.createRegisterOffsetAddress(128, arr, len, false));
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray3, 0L);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray4, 0L);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecArray2);
        asm.jmp(tailLessThan64Continue);
        asm.bind(tailLessThan16);
        asm.adds(64, len, len, 8);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan8);
        asm.fldr(64, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(64, arr));
        asm.fldr(64, vecArray2, AArch64Address.createRegisterOffsetAddress(64, arr, len, false));
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray3, 0L);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray4, 0L);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1, vecArray2);
        asm.jmp(tailLessThan64Continue);
        asm.bind(tailLessThan8);
        asm.mov(64, ret, AArch64.zr);
        asm.adds(64, len, len, 4);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, end);
        asm.fldr(32, vecArray1, AArch64Address.createBaseRegisterOnlyAddress(32, arr));
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray2, 0L);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray3, 0L);
        asm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, vecArray4, 0L);
        asm.neon.moveVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecArray1);
        asm.jmp(tailLessThan64Continue);
        asm.align(16);
        asm.bind(returnBroken);
        asm.mov(ret, 4);
    }

    private static void utf32CheckInvalid(AArch64MacroAssembler asm, Register vecArray1, Register vecArray2, Register vecArray3, Register vecArray4, Register vecTmp1, Register vecTmp2, Register vecTmp3, Register vecTmp4, Register vecMaskBroken, Register vecMaskOutOfRange, Label returnBroken) {
        asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp1, vecArray1, 11);
        asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecArray2, 11);
        asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp3, vecArray3, 11);
        asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp4, vecArray4, 11);
        asm.neon.umaxVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecArray1, vecArray1, vecArray2);
        asm.neon.umaxVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecArray3, vecArray3, vecArray4);
        asm.neon.umaxVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecArray1, vecArray1, vecArray3);
        asm.neon.cmhiVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecArray1, vecArray1, vecMaskOutOfRange);
        asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp1, vecTmp1, vecMaskBroken);
        asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp2, vecMaskBroken);
        asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp3, vecTmp3, vecMaskBroken);
        asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp4, vecTmp4, vecMaskBroken);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp2);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp3, vecTmp3, vecTmp4);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp3);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecArray1, vecArray1, vecTmp1);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, AArch64ASIMDAssembler.ElementSize.Word, vecArray1, vecArray1, true);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, returnBroken);
    }

    private static void utf32CheckInvalidBMP(AArch64MacroAssembler asm, Register vecArray1, Register vecArray2, Register vecArray3, Register vecArray4, Register vecTmp1, Register vecTmp2, Register vecTmp3, Register vecTmp4, Register vecMaskBroken, Label returnBroken) {
        asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp1, vecArray1, 11);
        asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecArray2, 11);
        asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp3, vecArray3, 11);
        asm.neon.ushrVVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp4, vecArray4, 11);
        asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp1, vecTmp1, vecMaskBroken);
        asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp2, vecTmp2, vecMaskBroken);
        asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp3, vecTmp3, vecMaskBroken);
        asm.neon.cmeqVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, vecTmp4, vecTmp4, vecMaskBroken);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp2);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp3, vecTmp3, vecTmp4);
        asm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vecTmp1, vecTmp1, vecTmp3);
        AArch64CalcStringAttributesOp.vectorCheckZero(asm, AArch64ASIMDAssembler.ElementSize.Word, vecTmp1, vecTmp1, true);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.NE, returnBroken);
    }

    private static boolean returnValueAssertions() {
        return true;
    }
}

