/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.matrix.data;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysds.hops.OptimizerUtils;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.data.DenseBlock;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.SparseBlockCSR;
import org.apache.sysds.runtime.data.SparseBlockFactory;
import org.apache.sysds.runtime.data.SparseBlockMCSR;
import org.apache.sysds.runtime.data.SparseRow;
import org.apache.sysds.runtime.data.SparseRowVector;
import org.apache.sysds.runtime.functionobjects.And;
import org.apache.sysds.runtime.functionobjects.Builtin;
import org.apache.sysds.runtime.functionobjects.Divide;
import org.apache.sysds.runtime.functionobjects.Equals;
import org.apache.sysds.runtime.functionobjects.GreaterThan;
import org.apache.sysds.runtime.functionobjects.GreaterThanEquals;
import org.apache.sysds.runtime.functionobjects.LessThan;
import org.apache.sysds.runtime.functionobjects.LessThanEquals;
import org.apache.sysds.runtime.functionobjects.Minus;
import org.apache.sysds.runtime.functionobjects.MinusMultiply;
import org.apache.sysds.runtime.functionobjects.Multiply;
import org.apache.sysds.runtime.functionobjects.Multiply2;
import org.apache.sysds.runtime.functionobjects.NotEquals;
import org.apache.sysds.runtime.functionobjects.Plus;
import org.apache.sysds.runtime.functionobjects.PlusMultiply;
import org.apache.sysds.runtime.functionobjects.Power;
import org.apache.sysds.runtime.functionobjects.Power2;
import org.apache.sysds.runtime.functionobjects.ValueFunction;
import org.apache.sysds.runtime.matrix.data.LibMatrixMult;
import org.apache.sysds.runtime.matrix.data.LibMatrixOuterAgg;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
import org.apache.sysds.runtime.matrix.operators.UnaryOperator;
import org.apache.sysds.runtime.util.CommonThreadPool;
import org.apache.sysds.runtime.util.DataConverter;
import org.apache.sysds.runtime.util.SortUtils;
import org.apache.sysds.runtime.util.UtilFunctions;

public class LibMatrixBincell {
    private static final Log LOG = LogFactory.getLog((String)LibMatrixBincell.class.getName());
    private static final long PAR_NUMCELL_THRESHOLD2 = 16384L;

    private LibMatrixBincell() {
    }

    public static MatrixBlock uncellOp(MatrixBlock m1, MatrixBlock ret, UnaryOperator op) {
        if (!m1.sparse && !m1.isEmptyBlock(false) && op.getNumThreads() > 1 && m1.getLength() > 16384L) {
            if (!op.isInplace() || m1.isEmpty()) {
                ret.allocateDenseBlock(false);
            } else {
                ret = m1;
            }
            int k = op.getNumThreads();
            DenseBlock a = m1.getDenseBlock();
            DenseBlock c = ret.getDenseBlock();
            ExecutorService pool = CommonThreadPool.get(k);
            try {
                ArrayList<UncellTask> tasks = new ArrayList<UncellTask>();
                ArrayList<Integer> blklens = UtilFunctions.getBalancedBlockSizesDefault(ret.rlen, k, false);
                int lb = 0;
                for (int i = 0; i < blklens.size(); ++i) {
                    tasks.add(new UncellTask(a, c, op, lb, lb + blklens.get(i)));
                    lb += blklens.get(i).intValue();
                }
                List taskret = pool.invokeAll(tasks);
                long nnz = 0L;
                for (Future task : taskret) {
                    nnz += ((Long)task.get()).longValue();
                }
                ret.setNonZeros(nnz);
            }
            catch (InterruptedException | ExecutionException ex) {
                throw new DMLRuntimeException(ex);
            }
            finally {
                pool.shutdown();
            }
        } else {
            if (op.isInplace() && !m1.isInSparseFormat()) {
                ret = m1;
            }
            if (op.sparseSafe) {
                LibMatrixBincell.sparseUnaryOperations(m1, ret, op);
            } else {
                LibMatrixBincell.denseUnaryOperations(m1, ret, op);
            }
            ret.recomputeNonZeros();
        }
        return ret;
    }

    public static MatrixBlock bincellOpScalar(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int k) {
        boolean sp = m1.sparse;
        if (!op.sparseSafe) {
            sp = false;
        }
        if (ret == null) {
            ret = new MatrixBlock(m1.getNumRows(), m1.getNumColumns(), sp, m1.nonZeros);
        } else {
            ret.reset(m1.getNumRows(), m1.getNumColumns(), sp, m1.nonZeros);
        }
        if (op.fn instanceof Multiply && op.getConstant() == 0.0) {
            return ret;
        }
        if (k <= 1 || m1.isEmpty() || !op.sparseSafe || ret.getLength() < 16384L) {
            LibMatrixBincell.bincellOpScalarSingleThread(m1, ret, op);
        } else {
            LibMatrixBincell.bincellOpScalarParallel(m1, ret, op, k);
        }
        if (ret.isEmptyBlock(false)) {
            ret.examSparsity(k);
        }
        return ret;
    }

    public static MatrixBlock bincellOp(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
        try {
            LibMatrixBincell.isValidDimensionsBinary(m1, m2);
            op = LibMatrixBincell.replaceOpWithSparseSafeIfApplicable(m1, m2, op);
            BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
            boolean outer = atype == BinaryAccessType.OUTER_VECTOR_VECTOR;
            int rows = m1.getNumRows();
            int cols = outer ? m2.getNumColumns() : m1.getNumColumns();
            SparsityEstimate resultSparse = LibMatrixBincell.estimateSparsityOnBinary(m1, m2, op);
            if (ret == null) {
                ret = new MatrixBlock(rows, cols, resultSparse.sparse, resultSparse.estimatedNonZeros);
            } else {
                ret.reset(rows, cols, resultSparse.sparse, resultSparse.estimatedNonZeros);
            }
            boolean skipEmpty = LibMatrixBincell.shouldSkipEmpty(m2, op);
            boolean e1 = m1.isEmpty();
            boolean e2 = m2.isEmpty();
            if (skipEmpty && (e1 || e2)) {
                return ret;
            }
            ret.allocateBlock();
            int k = op.getNumThreads();
            if (k <= 1 || m1.isEmpty() || m2.isEmpty() || ret.getLength() < 16384L || LibMatrixBincell.isSafeBinaryMcVDenseSparseMult(m1, m2, ret, op) || !CommonThreadPool.useParallelismOnThread()) {
                LibMatrixBincell.bincellOpMatrixSingle(m1, m2, ret, op, atype);
            } else {
                LibMatrixBincell.bincellOpMatrixParallel(m1, m2, ret, op, atype, k);
            }
            if (ret.isEmptyBlock(false)) {
                ret.examSparsity(k);
            }
            return ret;
        }
        catch (Exception e) {
            throw new RuntimeException("Failed to perform binary operation", e);
        }
    }

    public static MatrixBlock bincellOpInPlace(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        return LibMatrixBincell.bincellOpInPlaceRight(m1ret, m2, op);
    }

    public static MatrixBlock bincellOpInPlaceRight(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        LibMatrixBincell.isValidDimensionsBinary(m1ret, m2);
        op = LibMatrixBincell.replaceOpWithSparseSafeIfApplicable(m1ret, m2, op);
        boolean skipEmpty = LibMatrixBincell.shouldSkipEmpty(m2, op);
        boolean e1 = m1ret.isEmpty();
        boolean e2 = m2.isEmpty();
        if (skipEmpty && (e1 || e2)) {
            m1ret.reset(m1ret.rlen, m1ret.clen, 0.0);
            return m1ret;
        }
        SparsityEstimate resultSparse = LibMatrixBincell.estimateSparsityOnBinary(m1ret, m2, op);
        if (e1 && e2) {
            double r = op.fn.execute(0.0, 0.0);
            m1ret.fill(r);
            return m1ret;
        }
        if (resultSparse.sparse && e1) {
            m1ret.allocateSparseRowsBlock();
        } else if (!resultSparse.sparse && e1) {
            m1ret.allocateDenseBlock();
        } else if (resultSparse.sparse && !m1ret.sparse) {
            m1ret.denseToSparse();
        } else if (!resultSparse.sparse && m1ret.sparse) {
            m1ret.sparseToDense();
        }
        long nnz = op.sparseSafe || LibMatrixBincell.isSparseSafeDivideOrPow(op, m2) ? LibMatrixBincell.safeBinaryInPlace(m1ret, m2, op) : LibMatrixBincell.unsafeBinaryInPlace(m1ret, m2, op);
        m1ret.setNonZeros(nnz);
        if (m1ret.isEmptyBlock(false)) {
            m1ret.examSparsity();
        }
        return m1ret;
    }

    public static MatrixBlock bincellOpInPlaceLeft(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        int nRows = m1ret.getNumRows();
        int nCols = m1ret.getNumColumns();
        op = LibMatrixBincell.replaceOpWithSparseSafeIfApplicable(m1ret, m2, op);
        if (m1ret.isInSparseFormat()) {
            LOG.warn((Object)"Inefficient bincell op in place left, because output is materialized in new matrix");
            MatrixBlock right = new MatrixBlock(nRows, nCols, true);
            right.copyShallow(m1ret);
            m1ret.cleanupBlock(true, true);
            LibMatrixBincell.bincellOp(m2, right, m1ret, op);
            return m1ret;
        }
        double[] retV = m1ret.getDenseBlockValues();
        ValueFunction f = op.fn;
        if (m2.isInSparseFormat() && op.sparseSafe) {
            SparseBlock sb = m2.getSparseBlock();
            for (int row = 0; row < nRows; ++row) {
                if (sb.isEmpty(row)) continue;
                int apos = sb.pos(row);
                int alen = sb.size(row) + apos;
                int[] aix = sb.indexes(row);
                double[] aval = sb.values(row);
                int offsetV = row * nCols;
                for (int j = apos; j < alen; ++j) {
                    int idx = offsetV + aix[j];
                    retV[idx] = f.execute(aval[j], retV[idx]);
                }
            }
        } else {
            if (m2.isInSparseFormat()) {
                throw new NotImplementedException("Not implemented left bincell in place unsafe operations");
            }
            double[] m2V = m2.getDenseBlockValues();
            int size = nRows * nCols;
            for (int i = 0; i < size; ++i) {
                retV[i] = f.execute(m2V[i], retV[i]);
            }
            if (m1ret.isEmptyBlock(false)) {
                m1ret.examSparsity();
            }
        }
        return m1ret;
    }

    public static BinaryAccessType getBinaryAccessType(MatrixBlock m1, MatrixBlock m2) {
        int rlen1 = m1.rlen;
        int rlen2 = m2.rlen;
        int clen1 = m1.clen;
        int clen2 = m2.clen;
        if (rlen1 == rlen2 && clen1 == clen2) {
            return BinaryAccessType.MATRIX_MATRIX;
        }
        if (rlen1 == rlen2 && clen2 == 1) {
            return BinaryAccessType.MATRIX_COL_VECTOR;
        }
        if (clen1 == clen2 && rlen2 == 1) {
            return BinaryAccessType.MATRIX_ROW_VECTOR;
        }
        if (clen1 == 1 && rlen2 == 1) {
            return BinaryAccessType.OUTER_VECTOR_VECTOR;
        }
        return BinaryAccessType.INVALID;
    }

    public static BinaryAccessType getBinaryAccessTypeExtended(MatrixBlock m1, MatrixBlock m2) {
        int rlen1 = m1.rlen;
        int rlen2 = m2.rlen;
        int clen1 = m1.clen;
        int clen2 = m2.clen;
        if (rlen1 == rlen2) {
            if (clen1 == clen2) {
                return BinaryAccessType.MATRIX_MATRIX;
            }
            if (clen1 < clen2 && clen1 == 1) {
                return BinaryAccessType.COL_VECTOR_MATRIX;
            }
            if (clen2 == 1) {
                return BinaryAccessType.MATRIX_COL_VECTOR;
            }
            return BinaryAccessType.INVALID;
        }
        if (clen1 == clen2) {
            if (rlen1 < rlen2 && rlen1 == 1) {
                return BinaryAccessType.ROW_VECTOR_MATRIX;
            }
            if (rlen2 == 1) {
                return BinaryAccessType.MATRIX_ROW_VECTOR;
            }
            return BinaryAccessType.INVALID;
        }
        if (clen1 == 1 && rlen2 == 1) {
            return BinaryAccessType.OUTER_VECTOR_VECTOR;
        }
        return BinaryAccessType.INVALID;
    }

    public static void isValidDimensionsBinary(MatrixBlock m1, MatrixBlock m2) {
        boolean isValid;
        int rlen1 = m1.rlen;
        int clen1 = m1.clen;
        int rlen2 = m2.rlen;
        int clen2 = m2.clen;
        boolean bl = isValid = rlen1 == rlen2 && clen1 == clen2 || rlen1 == rlen2 && clen1 > 1 && clen2 == 1 || clen1 == clen2 && rlen1 > 1 && rlen2 == 1 || clen1 == 1 && rlen2 == 1;
        if (!isValid) {
            throw new DMLRuntimeException("Block sizes are not matched for binary cell operations: " + rlen1 + "x" + clen1 + " vs " + rlen2 + "x" + clen2);
        }
    }

    public static BinaryOperator replaceOpWithSparseSafeIfApplicable(MatrixBlock m1, MatrixBlock m2, BinaryOperator op) {
        if ((m1.getSparsity() < 1.0 || m2.getSparsity() < 1.0) && op.fn instanceof Builtin && ((Builtin)op.fn).bFunc == Builtin.BuiltinCode.LOG) {
            op = new BinaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.LOG_NZ), op.getNumThreads());
        }
        return op;
    }

    private static void bincellOpScalarSingleThread(MatrixBlock m1, MatrixBlock ret, ScalarOperator op) {
        long nnz = 0L;
        nnz = op.sparseSafe ? LibMatrixBincell.safeBinaryScalar(m1, ret, op, 0, m1.rlen) : LibMatrixBincell.unsafeBinaryScalar(m1, ret, op);
        ret.nonZeros = nnz;
        if (ret.isEmptyBlock(false)) {
            ret.examSparsity();
        }
    }

    private static void bincellOpScalarParallel(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int k) {
        ret.allocateBlock();
        ExecutorService pool = CommonThreadPool.get(k);
        try {
            ArrayList<BincellScalarTask> tasks = new ArrayList<BincellScalarTask>();
            int rMax = m1.getNumRows();
            int blkLen = Math.max(Math.max(rMax / k, 1000 / ret.getNumColumns()), 1);
            for (int i = 0; i < rMax; i += blkLen) {
                tasks.add(new BincellScalarTask(m1, ret, op, i, Math.min(rMax, i + blkLen)));
            }
            long nnz = 0L;
            for (Future task : pool.invokeAll(tasks)) {
                nnz += ((Long)task.get()).longValue();
            }
            ret.nonZeros = nnz;
        }
        catch (InterruptedException | ExecutionException ex) {
            throw new DMLRuntimeException(ex);
        }
        finally {
            pool.shutdown();
        }
        if (ret.isEmptyBlock(false)) {
            ret.examSparsity();
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static void bincellOpMatrixParallel(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, BinaryAccessType atype, int k) throws Exception {
        ExecutorService pool = CommonThreadPool.get(k);
        try {
            ArrayList<BincellTask> tasks = new ArrayList<BincellTask>();
            ArrayList<Integer> blklens = UtilFunctions.getBalancedBlockSizesDefault(ret.rlen, k, false);
            int lb = 0;
            for (int i = 0; i < blklens.size(); ++i) {
                tasks.add(new BincellTask(m1, m2, ret, op, atype, lb, lb + blklens.get(i)));
                lb += blklens.get(i).intValue();
            }
            List taskret = pool.invokeAll(tasks);
            long nnz = 0L;
            for (Future task : taskret) {
                nnz += ((Long)task.get()).longValue();
            }
            ret.nonZeros = nnz;
            if (ret.isEmptyBlock(false)) {
                ret.examSparsity(k);
            }
        }
        finally {
            pool.shutdown();
        }
    }

    private static void bincellOpMatrixSingle(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, BinaryAccessType atype) {
        long nnz = 0L;
        nnz = LibMatrixBincell.binCellOpExecute(m1, m2, ret, op, atype, 0, m1.rlen);
        ret.setNonZeros(nnz);
    }

    private static long binCellOpExecute(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, BinaryAccessType atype, int rl, int ru) {
        long nnz = op.sparseSafe || LibMatrixBincell.isSparseSafeDivideOrPow(op, m2) ? LibMatrixBincell.safeBinary(m1, m2, ret, op, atype, rl, ru) : LibMatrixBincell.unsafeBinary(m1, m2, ret, op, rl, ru);
        return nnz;
    }

    private static boolean isSparseSafeDivideOrPow(BinaryOperator op, MatrixBlock rhs) {
        return (op.fn instanceof Divide || op.fn instanceof Power) && rhs.getNonZeros() == (long)rhs.getNumRows() * (long)rhs.getNumColumns();
    }

    private static void denseUnaryOperations(MatrixBlock m1, MatrixBlock ret, UnaryOperator op) {
        double val0 = op.fn.execute(0.0);
        int m = m1.rlen;
        int n = m1.clen;
        if (m1.isEmptyBlock(false)) {
            if (val0 != 0.0) {
                ret.reset(m, n, val0);
            }
            return;
        }
        if (m1.sparse && val0 != 0.0) {
            ret.reset(m, n, val0);
            ret.nonZeros = (long)m * (long)n;
        }
        LibMatrixBincell.sparseUnaryOperations(m1, ret, op);
    }

    private static void sparseUnaryOperations(MatrixBlock m1, MatrixBlock ret, UnaryOperator op) {
        if (m1.isEmptyBlock(false)) {
            return;
        }
        int m = m1.rlen;
        int n = m1.clen;
        if (m1.sparse && ret.sparse) {
            ret.allocateSparseRowsBlock();
            SparseBlock a = m1.sparseBlock;
            SparseBlock c = ret.sparseBlock;
            long nnz = 0L;
            for (int i = 0; i < m; ++i) {
                if (a.isEmpty(i)) continue;
                int apos = a.pos(i);
                int alen = a.size(i);
                int[] aix = a.indexes(i);
                double[] avals = a.values(i);
                c.allocate(i, alen);
                for (int j = apos; j < apos + alen; ++j) {
                    double val = op.fn.execute(avals[j]);
                    c.append(i, aix[j], val);
                    nnz += val != 0.0 ? 1L : 0L;
                }
            }
            ret.nonZeros = nnz;
        } else if (m1.sparse) {
            ret.allocateDenseBlock(false);
            SparseBlock a = m1.sparseBlock;
            DenseBlock c = ret.denseBlock;
            long nnz = ret.nonZeros > 0L ? (long)m * (long)n - a.size() : 0L;
            for (int i = 0; i < m; ++i) {
                if (a.isEmpty(i)) continue;
                int apos = a.pos(i);
                int alen = a.size(i);
                int[] aix = a.indexes(i);
                double[] avals = a.values(i);
                double[] cvals = c.values(i);
                int cix = c.pos(i);
                for (int j = apos; j < apos + alen; ++j) {
                    double val;
                    cvals[cix + aix[j]] = val = op.fn.execute(avals[j]);
                    nnz += val != 0.0 ? 1L : 0L;
                }
            }
            ret.nonZeros = nnz;
        } else {
            if (m1 != ret) {
                ret.allocateDenseBlock(false);
            }
            DenseBlock da = m1.getDenseBlock();
            DenseBlock dc = ret.getDenseBlock();
            long nnz = 0L;
            for (int bi = 0; bi < da.numBlocks(); ++bi) {
                double[] a = da.valuesAt(bi);
                double[] c = dc.valuesAt(bi);
                int len = da.size(bi);
                for (int i = 0; i < len; ++i) {
                    c[i] = op.fn.execute(a[i]);
                    nnz += c[i] != 0.0 ? 1L : 0L;
                }
            }
            ret.nonZeros = nnz;
        }
    }

    private static long safeBinary(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, BinaryAccessType atype, int rl, int ru) {
        if (m1.isEmptyBlock(false) && m2.isEmptyBlock(false)) {
            return 0L;
        }
        if (atype.isMatrixVector()) {
            return LibMatrixBincell.safeBinaryMV(m1, m2, ret, op, atype, rl, ru);
        }
        if (atype == BinaryAccessType.OUTER_VECTOR_VECTOR) {
            return LibMatrixBincell.safeBinaryVVGeneric(m1, m2, ret, op, rl, ru);
        }
        return LibMatrixBincell.safeBinaryMM(m1, m2, ret, op, rl, ru);
    }

    private static long safeBinaryMM(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        boolean copyLeftRightEmpty = op.fn instanceof Plus || op.fn instanceof Minus || op.fn instanceof PlusMultiply || op.fn instanceof MinusMultiply;
        boolean copyRightLeftEmpty = op.fn instanceof Plus;
        if (copyLeftRightEmpty && m2.isEmpty()) {
            ret.copyShallow(m1);
            return ret.getNonZeros();
        }
        if (copyRightLeftEmpty && m1.isEmpty()) {
            ret.copyShallow(m2);
            return ret.getNonZeros();
        }
        if (m1.sparse && m2.sparse) {
            return LibMatrixBincell.safeBinaryMMSparseSparse(m1, m2, ret, op, rl, ru);
        }
        if (!ret.sparse && (m1.sparse || m2.sparse) && (op.fn instanceof Plus || op.fn instanceof Minus || op.fn instanceof PlusMultiply || op.fn instanceof MinusMultiply || op.fn instanceof Multiply && !m2.sparse)) {
            return LibMatrixBincell.safeBinaryMMSparseDenseDense(m1, m2, ret, op, rl, ru);
        }
        if (!(ret.sparse || m1.isInSparseFormat() || m2.isInSparseFormat() || m1.isEmpty() || m2.isEmpty())) {
            return LibMatrixBincell.safeBinaryMMDenseDenseDense(m1, m2, ret, op, rl, ru);
        }
        if (LibMatrixBincell.shouldSkipEmpty(m2, op) && (m1.sparse || m2.sparse)) {
            return LibMatrixBincell.safeBinaryMMSparseDenseSkip(m1, m2, ret, op, rl, ru);
        }
        return LibMatrixBincell.safeBinaryMMGeneric(m1, m2, ret, op, rl, ru);
    }

    private static long safeBinaryMV(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, BinaryAccessType atype, int rl, int ru) {
        if (!(m1.sparse || m2.sparse || ret.sparse)) {
            return LibMatrixBincell.safeBinaryMVDense(m1, m2, ret, op, rl, ru);
        }
        if (m1.sparse && !m2.sparse && !m2.isEmpty() && !ret.sparse && atype == BinaryAccessType.MATRIX_ROW_VECTOR) {
            return LibMatrixBincell.safeBinaryMVSparseDenseRow(m1, m2, ret, op, rl, ru);
        }
        if (m1.sparse) {
            return LibMatrixBincell.safeBinaryMVSparseLeft(m1, m2, ret, op, rl, ru);
        }
        if (!LibMatrixBincell.isSafeBinaryMcVDenseSparseMult(m1, m2, ret, op)) {
            return LibMatrixBincell.safeBinaryMVGeneric(m1, m2, ret, op, rl, ru);
        }
        LibMatrixBincell.safeBinaryMcVDenseSparseMult(m1, m2, ret, op);
        return ret.getNonZeros();
    }

    private static boolean shouldSkipEmpty(MatrixBlock m2, BinaryOperator op) {
        return op.fn instanceof Multiply || op.fn instanceof Builtin && ((Builtin)op.fn).bFunc == Builtin.BuiltinCode.LOG_NZ || LibMatrixBincell.isSparseSafeDivideOrPow(op, m2);
    }

    private static long safeBinaryMVDense(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        if (op.fn instanceof Multiply && (m1.isEmptyBlock(false) || m2.isEmptyBlock(false))) {
            return 0L;
        }
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
        if (atype == BinaryAccessType.MATRIX_COL_VECTOR) {
            return LibMatrixBincell.safeBinaryMVDenseColVector(m1, m2, ret, op, rl, ru);
        }
        return LibMatrixBincell.safeBinaryMVDenseRowVector(m1, m2, ret, op, rl, ru);
    }

    private static long safeBinaryMVDenseColVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int clen = m1.clen;
        DenseBlock da = m1.getDenseBlock();
        DenseBlock dc = ret.getDenseBlock();
        double[] b = m2.getDenseBlockValues();
        if (op.fn instanceof Multiply) {
            return LibMatrixBincell.safeBinaryMVDenseColVectorMultiply(da, b, dc, clen, rl, ru);
        }
        if (op.fn instanceof Divide) {
            return LibMatrixBincell.safeBinaryMVDenseColVectorDivide(da, b, dc, clen, rl, ru);
        }
        return LibMatrixBincell.safeBinaryMVDenseColVectorGeneric(da, b, dc, clen, op, rl, ru);
    }

    private static long safeBinaryMVDenseColVectorGeneric(DenseBlock da, double[] b, DenseBlock dc, int clen, BinaryOperator op, int rl, int ru) {
        if (b == null) {
            return LibMatrixBincell.safeBinaryMVDenseColVectorGenericEmptyVector(da, dc, clen, op, rl, ru);
        }
        return LibMatrixBincell.safeBinaryMVDenseColVectorGenericDenseVector(da, b, dc, clen, op, rl, ru);
    }

    private static long safeBinaryMVDenseColVectorGenericEmptyVector(DenseBlock da, DenseBlock dc, int clen, BinaryOperator op, int rl, int ru) {
        long nnz = 0L;
        for (int i = rl; i < ru; ++i) {
            double[] a = da.values(i);
            double[] c = dc.values(i);
            int ix = da.pos(i);
            for (int j = 0; j < clen; ++j) {
                double val;
                double d = val = op.fn.execute(a[ix + j], 0.0);
                c[ix + j] = d;
                nnz += d != 0.0 ? 1L : 0L;
            }
        }
        return nnz;
    }

    private static long safeBinaryMVDenseColVectorGenericDenseVector(DenseBlock da, double[] b, DenseBlock dc, int clen, BinaryOperator op, int rl, int ru) {
        long nnz = 0L;
        for (int i = rl; i < ru; ++i) {
            double[] a = da.values(i);
            double[] c = dc.values(i);
            int ix = da.pos(i);
            double v2 = b[i];
            for (int j = 0; j < clen; ++j) {
                double val;
                double d = val = op.fn.execute(a[ix + j], v2);
                c[ix + j] = d;
                nnz += d != 0.0 ? 1L : 0L;
            }
        }
        return nnz;
    }

    private static long safeBinaryMVDenseColVectorMultiply(DenseBlock da, double[] b, DenseBlock dc, int clen, int rl, int ru) {
        if (b == null) {
            return 0L;
        }
        long nnz = 0L;
        for (int i = rl; i < ru; ++i) {
            int j;
            double[] a = da.values(i);
            double[] c = dc.values(i);
            int ix = da.pos(i);
            double v2 = b[i];
            if (v2 == 0.0) continue;
            if (v2 == 1.0) {
                for (j = ix; j < clen + ix; ++j) {
                    c[j] = a[j];
                    nnz += c[j] != 0.0 ? 1L : 0L;
                }
                continue;
            }
            for (j = ix; j < clen + ix; ++j) {
                c[j] = a[j] * v2;
                nnz += c[j] != 0.0 ? 1L : 0L;
            }
        }
        return nnz;
    }

    private static long safeBinaryMVDenseColVectorDivide(DenseBlock da, double[] b, DenseBlock dc, int clen, int rl, int ru) {
        if (b == null) {
            dc.fill(Double.NaN);
            return (long)dc.getDim(0) * (long)dc.getDim(1);
        }
        long nnz = 0L;
        for (int i = rl; i < ru; ++i) {
            double[] a = da.values(i);
            double[] c = dc.values(i);
            int ix = da.pos(i);
            double v2 = b[i];
            LibMatrixBincell.processRowMVDenseDivide(a, c, ix, clen, v2);
        }
        return nnz;
    }

    private static long processRowMVDenseDivide(double[] a, double[] c, int ix, int clen, double v2) {
        long nnz = 0L;
        if (v2 == 0.0) {
            Arrays.fill(c, ix, clen, Double.NaN);
            nnz += (long)clen;
        } else if (v2 == 1.0) {
            for (int j = ix; j < clen + ix; ++j) {
                c[j] = a[j];
                nnz += c[j] != 0.0 ? 1L : 0L;
            }
        } else {
            for (int j = ix; j < clen + ix; ++j) {
                c[j] = a[j] / v2;
                nnz += c[j] != 0.0 ? 1L : 0L;
            }
        }
        return nnz;
    }

    private static long safeBinaryMVDenseRowVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int clen = m1.clen;
        DenseBlock da = m1.getDenseBlock();
        DenseBlock dc = ret.getDenseBlock();
        long nnz = 0L;
        double[] b = m2.getDenseBlockValues();
        if (da == null && b == null) {
            double val = op.fn.execute(0L, 0L);
            dc.set(rl, ru, 0, clen, val);
            nnz += val != 0.0 ? (long)(ru - rl) * (long)clen : 0L;
        } else if (da == null) {
            double[] c = dc.values(rl);
            for (int j = 0; j < clen; ++j) {
                double val = op.fn.execute(0.0, b[j]);
                c[j] = val;
                nnz += c[j] != 0.0 ? (long)(ru - rl) : 0L;
            }
            for (int i = rl + 1; i < ru; ++i) {
                dc.set(i, c);
            }
        } else {
            for (int i = rl; i < ru; ++i) {
                double[] a = da.values(i);
                double[] c = dc.values(i);
                int ix = da.pos(i);
                for (int j = 0; j < clen; ++j) {
                    double val;
                    double d = val = op.fn.execute(a[ix + j], b != null ? b[j] : 0.0);
                    c[ix + j] = d;
                    nnz += d != 0.0 ? 1L : 0L;
                }
            }
        }
        return nnz;
    }

    private static long safeBinaryMVSparseDenseRow(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int i;
        boolean isMultiply;
        boolean skipEmpty = isMultiply = op.fn instanceof Multiply;
        int clen = m1.clen;
        SparseBlock a = m1.sparseBlock;
        double[] b = m2.getDenseBlockValues();
        DenseBlock c = ret.getDenseBlock();
        if (skipEmpty && (m1.isEmptyBlock(false) || m2.isEmptyBlock(false))) {
            return 0L;
        }
        if (!skipEmpty && m2.isEmptyBlock(false) && rl == 0 && (op.fn instanceof Minus || op.fn instanceof Plus)) {
            ret.copy(m1);
            return ret.nonZeros;
        }
        double[] tmp = c.values(rl);
        int tpos = c.pos(rl);
        if (!skipEmpty) {
            for (i = 0; i < clen; ++i) {
                tmp[tpos + i] = op.fn.execute(0.0, b[i]);
            }
        }
        for (i = rl + 1; i < ru; ++i) {
            if (skipEmpty && (a == null || a.isEmpty(i))) continue;
            System.arraycopy(tmp, tpos, c.values(i), c.pos(i), clen);
        }
        long nnz = 0L;
        for (int i2 = rl; i2 < ru; ++i2) {
            if (skipEmpty && (a == null || a.isEmpty(i2))) continue;
            double[] cvals = c.values(i2);
            int cpos = c.pos(i2);
            if (a != null && !a.isEmpty(i2)) {
                int apos = a.pos(i2);
                int alen = a.size(i2);
                int[] aix = a.indexes(i2);
                double[] avals = a.values(i2);
                for (int j = apos; j < apos + alen; ++j) {
                    cvals[cpos + aix[j]] = op.fn.execute(avals[j], b[aix[j]]);
                }
            }
            nnz += (long)UtilFunctions.computeNnz(cvals, cpos, clen);
        }
        return nnz;
    }

    private static long safeBinaryMVSparseLeft(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        boolean isMultiply = op.fn instanceof Multiply;
        boolean skipEmpty = isMultiply || LibMatrixBincell.isSparseSafeDivideOrPow(op, m2);
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
        if (skipEmpty && (m1.isEmptyBlock(false) || m2.isEmptyBlock(false))) {
            return 0L;
        }
        if (atype == BinaryAccessType.MATRIX_COL_VECTOR) {
            LibMatrixBincell.safeBinaryMVSparseLeftColVector(m1, m2, ret, op, rl, ru);
        } else if (atype == BinaryAccessType.MATRIX_ROW_VECTOR) {
            LibMatrixBincell.safeBinaryMVSparseLeftRowVector(m1, m2, ret, op, rl, ru);
        }
        return ret.recomputeNonZeros(rl, ru - 1);
    }

    private static void safeBinaryMVSparseLeftColVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        boolean aNull;
        boolean isMultiply = op.fn instanceof Multiply;
        boolean skipEmpty = isMultiply || LibMatrixBincell.isSparseSafeDivideOrPow(op, m2);
        int clen = m1.clen;
        SparseBlock a = m1.sparseBlock;
        boolean bl = aNull = a == null;
        if (skipEmpty && a == null) {
            return;
        }
        if (ret.isInSparseFormat()) {
            SparseBlockMCSR rb = (SparseBlockMCSR)ret.getSparseBlock();
            for (int i = rl; i < ru; ++i) {
                boolean fill;
                boolean emptyRow;
                double v2 = m2.get(i, 0);
                boolean bl2 = emptyRow = !aNull ? a.isEmpty(i) : true;
                if (skipEmpty && (emptyRow || v2 == 0.0) || emptyRow && v2 == 0.0) continue;
                double vz = op.fn.execute(0.0, v2);
                boolean bl3 = fill = vz != 0.0;
                if (isMultiply && v2 == 1.0) {
                    ret.appendRow(i, a.get(i));
                    continue;
                }
                if (!fill) {
                    LibMatrixBincell.safeBinaryMVSparseColVectorRowNoFill(a, i, rb, v2, emptyRow, op);
                    continue;
                }
                LibMatrixBincell.safeBinaryMVSparseColVectorRowWithFill(a, i, rb, vz, v2, clen, emptyRow, op);
            }
        } else {
            DenseBlock db = ret.getDenseBlock();
            for (int i = rl; i < ru; ++i) {
                boolean fill;
                boolean emptyRow;
                double v2 = m2.get(i, 0);
                boolean bl4 = emptyRow = !aNull ? a.isEmpty(i) : true;
                if (skipEmpty && (emptyRow || v2 == 0.0) || emptyRow && v2 == 0.0) continue;
                double vz = op.fn.execute(0.0, v2);
                boolean bl5 = fill = vz != 0.0;
                if (isMultiply && v2 == 1.0) {
                    ret.appendRow(i, a.get(i));
                    continue;
                }
                if (!fill) {
                    LibMatrixBincell.safeBinaryMVSparseColVectorRowNoFill(a, i, db, v2, emptyRow, op);
                    continue;
                }
                LibMatrixBincell.safeBinaryMVSparseColVectorRowWithFill(a, i, db, vz, v2, clen, emptyRow, op);
            }
        }
    }

    private static final void safeBinaryMVSparseColVectorRowNoFill(SparseBlock a, int i, SparseBlockMCSR rb, double v2, boolean emptyRow, BinaryOperator op) {
        if (!emptyRow) {
            int apos = a.pos(i);
            int alen = a.size(i);
            int[] aix = a.indexes(i);
            double[] avals = a.values(i);
            rb.allocate(i, alen);
            for (int j = apos; j < apos + alen; ++j) {
                double v = op.fn.execute(avals[j], v2);
                rb.append(i, aix[j], v);
            }
        }
    }

    private static final void safeBinaryMVSparseColVectorRowNoFill(SparseBlock a, int i, DenseBlock rb, double v2, boolean emptyRow, BinaryOperator op) {
        if (!emptyRow) {
            int apos = a.pos(i);
            int alen = a.size(i);
            int[] aix = a.indexes(i);
            double[] avals = a.values(i);
            for (int j = apos; j < apos + alen; ++j) {
                double v = op.fn.execute(avals[j], v2);
                rb.set(i, aix[j], v);
            }
        }
    }

    private static final void safeBinaryMVSparseColVectorRowWithFill(SparseBlock a, int i, SparseBlockMCSR rb, double vz, double v2, int clen, boolean emptyRow, BinaryOperator op) {
        int lastIx = -1;
        if (!emptyRow) {
            int apos = a.pos(i);
            int alen = a.size(i);
            int[] aix = a.indexes(i);
            double[] avals = a.values(i);
            rb.allocate(i, clen);
            for (int j = apos; j < apos + alen; ++j) {
                LibMatrixBincell.fillZeroValuesScalar(vz, rb, i, lastIx + 1, aix[j]);
                double v = op.fn.execute(avals[j], v2);
                rb.append(i, aix[j], v);
                lastIx = aix[j];
            }
            LibMatrixBincell.fillZeroValuesScalar(vz, rb, i, lastIx + 1, clen);
        } else {
            rb.allocate(i, clen);
            LibMatrixBincell.fillZeroValuesScalar(vz, rb, i, lastIx + 1, clen);
        }
    }

    private static final void safeBinaryMVSparseColVectorRowWithFill(SparseBlock a, int i, DenseBlock rb, double vz, double v2, int clen, boolean emptyRow, BinaryOperator op) {
        int lastIx = -1;
        if (!emptyRow) {
            int apos = a.pos(i);
            int alen = a.size(i);
            int[] aix = a.indexes(i);
            double[] avals = a.values(i);
            for (int j = apos; j < apos + alen; ++j) {
                LibMatrixBincell.fillZeroValuesScalar(vz, rb, i, lastIx + 1, aix[j]);
                double v = op.fn.execute(avals[j], v2);
                rb.set(i, aix[j], v);
                lastIx = aix[j];
            }
            LibMatrixBincell.fillZeroValuesScalar(vz, rb, i, lastIx + 1, clen);
        } else {
            LibMatrixBincell.fillZeroValuesScalar(vz, rb, i, lastIx + 1, clen);
        }
    }

    private static final void fillZeroValuesScalar(double v, SparseBlock ret, int rpos, int cpos, int len) {
        for (int k = cpos; k < len; ++k) {
            ret.append(rpos, k, v);
        }
    }

    private static final void fillZeroValuesScalar(double v, DenseBlock ret, int rpos, int cpos, int len) {
        ret.set(rpos, rpos + 1, cpos, len, v);
    }

    private static void safeBinaryMVSparseLeftRowVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        boolean isMultiply = op.fn instanceof Multiply;
        boolean skipEmpty = isMultiply || LibMatrixBincell.isSparseSafeDivideOrPow(op, m2);
        int clen = m1.clen;
        SparseBlock a = m1.sparseBlock;
        if (ret.isInSparseFormat()) {
            SparseBlock sb = ret.getSparseBlock();
            for (int i = rl; i < ru; ++i) {
                if (skipEmpty && (a == null || a.isEmpty(i))) continue;
                if (skipEmpty && ret.sparse) {
                    ret.sparseBlock.allocate(i, a.size(i));
                }
                int lastIx = -1;
                if (a != null && !a.isEmpty(i)) {
                    int apos = a.pos(i);
                    int alen = a.size(i);
                    int[] aix = a.indexes(i);
                    double[] avals = a.values(i);
                    for (int j = apos; j < apos + alen; ++j) {
                        LibMatrixBincell.fillZeroValues(op, m2, ret, skipEmpty, i, lastIx + 1, aix[j]);
                        double v2 = m2.get(0, aix[j]);
                        double v = op.fn.execute(avals[j], v2);
                        sb.append(i, aix[j], v);
                        lastIx = aix[j];
                    }
                }
                LibMatrixBincell.fillZeroValues(op, m2, ret, skipEmpty, i, lastIx + 1, clen);
            }
        } else {
            DenseBlock db = ret.getDenseBlock();
            for (int i = rl; i < ru; ++i) {
                if (skipEmpty && (a == null || a.isEmpty(i))) continue;
                if (skipEmpty && ret.sparse) {
                    ret.sparseBlock.allocate(i, a.size(i));
                }
                int lastIx = -1;
                if (a != null && !a.isEmpty(i)) {
                    int apos = a.pos(i);
                    int alen = a.size(i);
                    int[] aix = a.indexes(i);
                    double[] avals = a.values(i);
                    for (int j = apos; j < apos + alen; ++j) {
                        LibMatrixBincell.fillZeroValues(op, m2, db, skipEmpty, i, lastIx + 1, aix[j]);
                        double v2 = m2.get(0, aix[j]);
                        double v = op.fn.execute(avals[j], v2);
                        db.set(i, aix[j], v);
                        lastIx = aix[j];
                    }
                }
                LibMatrixBincell.fillZeroValues(op, m2, db, skipEmpty, i, lastIx + 1, clen);
            }
        }
    }

    private static void fillZeroValues(BinaryOperator op, MatrixBlock m2, MatrixBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        if (skipEmpty) {
            return;
        }
        if (m2.isEmpty()) {
            LibMatrixBincell.fillZeroValuesEmpty(op, m2, ret, skipEmpty, rpos, cpos, len);
        } else if (m2.isInSparseFormat()) {
            LibMatrixBincell.fillZeroValuesSparse(op, m2, ret, skipEmpty, rpos, cpos, len);
        } else {
            LibMatrixBincell.fillZeroValuesDense(op, m2, ret, skipEmpty, rpos, cpos, len);
        }
    }

    private static void fillZeroValues(BinaryOperator op, MatrixBlock m2, DenseBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        if (skipEmpty) {
            return;
        }
        if (m2.isEmpty()) {
            LibMatrixBincell.fillZeroValuesEmpty(op, m2, ret, skipEmpty, rpos, cpos, len);
        } else if (m2.isInSparseFormat()) {
            LibMatrixBincell.fillZeroValuesSparse(op, m2, ret, skipEmpty, rpos, cpos, len);
        } else {
            LibMatrixBincell.fillZeroValuesDense(op, m2, ret, skipEmpty, rpos, cpos, len);
        }
    }

    private static void fillZeroValuesEmpty(BinaryOperator op, MatrixBlock m2, MatrixBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        boolean zeroIsZero;
        double zero = op.fn.execute(0.0, 0.0);
        boolean bl = zeroIsZero = zero == 0.0;
        if (!zeroIsZero) {
            throw new RuntimeException("invalid safe fill");
        }
    }

    private static void fillZeroValuesEmpty(BinaryOperator op, MatrixBlock m2, DenseBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        boolean zeroIsZero;
        double zero = op.fn.execute(0.0, 0.0);
        boolean bl = zeroIsZero = zero == 0.0;
        if (!zeroIsZero) {
            ret.set(rpos, rpos + 1, cpos, len, zero);
        }
    }

    private static void fillZeroValuesDense(BinaryOperator op, MatrixBlock m2, MatrixBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        DenseBlock db = m2.getDenseBlock();
        double[] vals = db.values(0);
        SparseBlock r = ret.getSparseBlock();
        if (ret.isInSparseFormat() && r instanceof SparseBlockMCSR) {
            SparseBlockMCSR mCSR = (SparseBlockMCSR)r;
            mCSR.allocate(rpos, cpos, len);
            SparseRow sr = mCSR.get(rpos);
            for (int k = cpos; k < len; ++k) {
                sr.append(k, op.fn.execute(0.0, vals[k]));
            }
        } else {
            for (int k = cpos; k < len; ++k) {
                ret.appendValue(rpos, k, op.fn.execute(0.0, vals[k]));
            }
        }
    }

    private static void fillZeroValuesDense(BinaryOperator op, MatrixBlock m2, DenseBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        DenseBlock db = m2.getDenseBlock();
        double[] vals = db.values(0);
        for (int k = cpos; k < len; ++k) {
            ret.set(rpos, k, op.fn.execute(0.0, vals[k]));
        }
    }

    private static void fillZeroValuesSparse(BinaryOperator op, MatrixBlock m2, MatrixBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        double zero = op.fn.execute(0.0, 0.0);
        boolean zeroIsZero = zero == 0.0;
        SparseBlock sb = m2.getSparseBlock();
        if (sb.isEmpty(0)) {
            if (!zeroIsZero) {
                throw new RuntimeException("invalid fill zeros");
            }
        } else {
            int apos;
            int alen = sb.size(0) + apos;
            int[] aix = sb.indexes(0);
            double[] vals = sb.values(0);
            for (apos = sb.pos(0); apos < alen && aix[apos] < len && cpos > aix[apos]; ++apos) {
            }
            while (apos < alen && aix[apos] < len) {
                if (!zeroIsZero) {
                    throw new RuntimeException("invalid fill zeros");
                }
                cpos = aix[apos];
                double v = op.fn.execute(0.0, vals[apos]);
                ret.appendValue(rpos, aix[apos], v);
                ++apos;
            }
            if (!zeroIsZero) {
                throw new RuntimeException("invalid fill zeros");
            }
        }
    }

    private static void fillZeroValuesSparse(BinaryOperator op, MatrixBlock m2, DenseBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        double zero = op.fn.execute(0.0, 0.0);
        boolean zeroIsZero = zero == 0.0;
        SparseBlock sb = m2.getSparseBlock();
        if (sb.isEmpty(0)) {
            if (!zeroIsZero) {
                throw new RuntimeException("invalid fill zeros");
            }
        } else {
            int apos;
            int alen = sb.size(0) + apos;
            int[] aix = sb.indexes(0);
            double[] vals = sb.values(0);
            for (apos = sb.pos(0); apos < alen && aix[apos] < len && cpos > aix[apos]; ++apos) {
            }
            while (apos < alen && aix[apos] < len) {
                if (!zeroIsZero) {
                    throw new RuntimeException("invalid fill zeros");
                }
                cpos = aix[apos];
                double v = op.fn.execute(0.0, vals[apos]);
                ret.set(rpos, aix[apos], v);
                ++apos;
            }
            if (!zeroIsZero) {
                throw new RuntimeException("invalid fill zeros");
            }
        }
    }

    private static boolean isSafeBinaryMcVDenseSparseMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
        return op.sparseSafe && !m1.sparse && !m2.sparse && ret.sparse && op.fn instanceof Multiply && atype == BinaryAccessType.MATRIX_COL_VECTOR && (long)m1.rlen * (long)m2.clen < Integer.MAX_VALUE;
    }

    private static void safeBinaryMcVDenseSparseMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
        int rlen = m1.rlen;
        int clen = m1.clen;
        double[] a = m1.getDenseBlockValues();
        double[] b = m2.getDenseBlockValues();
        int nnz = 0;
        int i = 0;
        int aix = 0;
        while (i < rlen) {
            nnz += b[i] != 0.0 ? UtilFunctions.countNonZeros(a, aix, clen) : 0;
            ++i;
            aix += clen;
        }
        int[] rptr = new int[rlen + 1];
        int[] indexes = new int[nnz];
        double[] vals = new double[nnz];
        rptr[0] = 0;
        int i2 = 0;
        int aix2 = 0;
        int pos = 0;
        while (i2 < rlen) {
            double bval = b[i2];
            if (bval != 0.0) {
                for (int j = 0; j < clen; ++j) {
                    double aval = a[aix2 + j];
                    if (aval == 0.0) continue;
                    indexes[pos] = j;
                    vals[pos] = aval * bval;
                    ++pos;
                }
            }
            rptr[i2 + 1] = pos;
            ++i2;
            aix2 += clen;
        }
        ret.sparseBlock = new SparseBlockCSR(rptr, indexes, vals, nnz);
        ret.setNonZeros(nnz);
    }

    private static long safeBinaryMVGeneric(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        boolean isMultiply;
        boolean skipEmpty = isMultiply = op.fn instanceof Multiply;
        int clen = m1.clen;
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
        long nnz = 0L;
        if (atype == BinaryAccessType.MATRIX_COL_VECTOR) {
            for (int i = rl; i < ru; ++i) {
                nnz += LibMatrixBincell.safeBinaryMcVGenericRow(m1, m2, ret, op, isMultiply, skipEmpty, clen, i);
            }
        } else if (m2.sparse && ret.sparse && isMultiply) {
            SparseBlock b = m2.sparseBlock;
            SparseBlock c = ret.sparseBlock;
            if (b.isEmpty(0)) {
                return 0L;
            }
            int blen = b.size(0);
            int[] bix = b.indexes(0);
            double[] bvals = b.values(0);
            for (int i = rl; i < ru; ++i) {
                c.allocate(i, blen);
                for (int j = 0; j < blen; ++j) {
                    c.append(i, bix[j], m1.get(i, bix[j]) * bvals[j]);
                }
            }
            ret.setNonZeros(c.size());
        } else {
            nnz = LibMatrixBincell.safeBinaryMrVGeneric(m1, m2, ret, op, clen, rl, ru);
        }
        return nnz;
    }

    private static long safeBinaryMrVGeneric(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int clen, int rl, int ru) {
        long nnz = 0L;
        if (ret.isInSparseFormat()) {
            SparseBlock sb = ret.getSparseBlock();
            for (int i = rl; i < ru; ++i) {
                for (int j = 0; j < clen; ++j) {
                    double v1 = m1.get(i, j);
                    double v2 = m2.get(0, j);
                    double v = op.fn.execute(v1, v2);
                    sb.append(i, j, v);
                }
                nnz += (long)sb.size(i);
            }
        } else {
            DenseBlock db = ret.getDenseBlock();
            for (int i = rl; i < ru; ++i) {
                for (int j = 0; j < clen; ++j) {
                    double v1 = m1.get(i, j);
                    double v2 = m2.get(0, j);
                    double v = op.fn.execute(v1, v2);
                    db.set(i, j, v);
                    nnz += v != 0.0 ? 1L : 0L;
                }
            }
        }
        return nnz;
    }

    private static long safeBinaryMcVGenericRow(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, boolean isMultiply, boolean skipEmpty, int clen, int i) {
        double v2 = m2.get(i, 0);
        long nnz = 0L;
        if (skipEmpty && v2 == 0.0) {
            return 0L;
        }
        if (ret.isInSparseFormat()) {
            SparseBlock sb = ret.getSparseBlock();
            if (isMultiply && v2 == 1.0) {
                for (int j = 0; j < clen; ++j) {
                    double v1 = m1.get(i, j);
                    sb.append(i, j, v1);
                }
            } else {
                for (int j = 0; j < clen; ++j) {
                    double v1 = m1.get(i, j);
                    double v = op.fn.execute(v1, v2);
                    sb.append(i, j, v);
                }
            }
            nnz += (long)sb.size(i);
        } else {
            DenseBlock db = ret.getDenseBlock();
            if (isMultiply && v2 == 1.0) {
                for (int j = 0; j < clen; ++j) {
                    double v1 = m1.get(i, j);
                    db.set(i, j, v1);
                }
            } else {
                for (int j = 0; j < clen; ++j) {
                    double v1 = m1.get(i, j);
                    double v = op.fn.execute(v1, v2);
                    db.set(i, j, v);
                }
            }
            nnz += ret.recomputeNonZeros(i, i);
        }
        return nnz;
    }

    private static long safeBinaryVVGeneric(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int clen = m2.clen;
        long nnz = 0L;
        if (LibMatrixOuterAgg.isCompareOperator(op) && m2.getNumColumns() > 16 && SortUtils.isSorted(m2)) {
            return LibMatrixBincell.performBinOuterOperation(m1, m2, ret, op, rl, ru);
        }
        if (ret.isInSparseFormat()) {
            SparseBlock sb = ret.getSparseBlock();
            for (int r = rl; r < ru; ++r) {
                double v1 = m1.get(r, 0);
                for (int c = 0; c < clen; ++c) {
                    double v2 = m2.get(0, c);
                    double v = op.fn.execute(v1, v2);
                    sb.append(r, c, v);
                    nnz += v == 0.0 ? 0L : 1L;
                }
            }
        } else {
            DenseBlock db = ret.getDenseBlock();
            for (int r = rl; r < ru; ++r) {
                double v1 = m1.get(r, 0);
                for (int c = 0; c < clen; ++c) {
                    double v2 = m2.get(0, c);
                    double v = op.fn.execute(v1, v2);
                    db.set(r, c, v);
                    nnz += v == 0.0 ? 0L : 1L;
                }
            }
        }
        return nnz;
    }

    private static long safeBinaryMMSparseSparse(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        if (m1.sparseBlock != null && m2.sparseBlock != null) {
            SparseBlock lsblock = m1.sparseBlock;
            SparseBlock rsblock = m2.sparseBlock;
            if (ret.sparse && lsblock.isAligned(rsblock)) {
                return LibMatrixBincell.safeBinaryMMSparseSparseAligned(m1, m2, ret, op, rl, ru);
            }
            return LibMatrixBincell.safeBinaryMMSparseSparseGeneric(m1, m2, ret, op, rl, ru);
        }
        if (m2.sparseBlock != null) {
            return LibMatrixBincell.safeBinaryMMSparseSparseNullRight(m1, m2, ret, op, rl, ru);
        }
        return LibMatrixBincell.safeBinaryMMSparseSparseNullLeft(m1, m2, ret, op, rl, ru);
    }

    private static long safeBinaryMMSparseSparseAligned(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        SparseBlock lsblock = m1.sparseBlock;
        SparseBlock rsblock = m2.sparseBlock;
        long lnnz = 0L;
        SparseBlock c = ret.sparseBlock;
        for (int r = rl; r < ru; ++r) {
            if (lsblock.isEmpty(r)) continue;
            int alen = lsblock.size(r);
            int apos = lsblock.pos(r);
            int[] aix = lsblock.indexes(r);
            double[] avals = lsblock.values(r);
            double[] bvals = rsblock.values(r);
            c.allocate(r, alen);
            for (int j = apos; j < apos + alen; ++j) {
                double tmp = op.fn.execute(avals[j], bvals[j]);
                c.append(r, aix[j], tmp);
            }
            lnnz += (long)c.size(r);
        }
        return lnnz;
    }

    private static long safeBinaryMMSparseSparseGeneric(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        SparseBlock lsblock = m1.sparseBlock;
        SparseBlock rsblock = m2.sparseBlock;
        long lnnz = 0L;
        for (int r = rl; r < ru; ++r) {
            boolean emptyRowLeft = lsblock.isEmpty(r);
            boolean emptyRowRight = rsblock.isEmpty(r);
            if (!emptyRowLeft) {
                double[] lbV = lsblock.values(r);
                int[] lbI = lsblock.indexes(r);
                int lbP = lsblock.pos(r);
                int lbS = lsblock.size(r);
                if (!emptyRowRight) {
                    double[] rbV = rsblock.values(r);
                    int[] rbI = rsblock.indexes(r);
                    int rbP = rsblock.pos(r);
                    int rbS = rsblock.size(r);
                    lnnz += LibMatrixBincell.mergeForSparseBinary(op, lbV, lbI, lbP, lbS, rbV, rbI, rbP, rbS, r, ret);
                    continue;
                }
                lnnz += LibMatrixBincell.appendLeftForSparseBinary(op, lbV, lbI, lbP, lbS, r, ret);
                continue;
            }
            if (emptyRowRight) continue;
            double[] rbV = rsblock.values(r);
            int[] rbI = rsblock.indexes(r);
            int rbP = rsblock.pos(r);
            int rbS = rsblock.size(r);
            lnnz += LibMatrixBincell.appendRightForSparseBinary(op, rbV, rbI, rbP, rbS, r, ret);
        }
        return lnnz;
    }

    private static long safeBinaryMMSparseSparseNullRight(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        long lnnz = 0L;
        SparseBlock rsblock = m2.sparseBlock;
        for (int r = rl; r < ru; ++r) {
            if (rsblock.isEmpty(r)) continue;
            lnnz += LibMatrixBincell.appendRightForSparseBinary(op, rsblock.values(r), rsblock.indexes(r), rsblock.pos(r), rsblock.size(r), r, ret);
        }
        return lnnz;
    }

    private static long safeBinaryMMSparseSparseNullLeft(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        long lnnz = 0L;
        SparseBlock lsblock = m1.sparseBlock;
        for (int r = rl; r < ru; ++r) {
            if (lsblock.isEmpty(r)) continue;
            lnnz += LibMatrixBincell.appendLeftForSparseBinary(op, lsblock.values(r), lsblock.indexes(r), lsblock.pos(r), lsblock.size(r), r, ret);
        }
        return lnnz;
    }

    private static long safeBinaryMMSparseDenseDense(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int n = ret.clen;
        DenseBlock dc = ret.getDenseBlock();
        if (!m1.isEmpty()) {
            if (m1.isInSparseFormat()) {
                LibMatrixBincell.safeMMLSparsePreProcess(m1, rl, ru, dc);
            } else {
                LibMatrixBincell.safeMMLDensePreProcess(m1, rl, ru, n, dc);
            }
        }
        long lnnz = 0L;
        if (m2.isEmpty()) {
            return ret.recomputeNonZeros(rl, ru - 1);
        }
        lnnz = m2.isInSparseFormat() ? LibMatrixBincell.safeMMRSparsePostProcess(m2, ret, op, rl, ru, dc, lnnz) : LibMatrixBincell.safeMMRDensePostProcess(m1, m2, ret, op, rl, ru, n, dc, lnnz);
        return lnnz;
    }

    private static long safeMMRDensePostProcess(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru, int n, DenseBlock dc, long lnnz) {
        if (!m2.isEmptyBlock(false)) {
            DenseBlock da = m2.getDenseBlock();
            for (int i = rl; i < ru; ++i) {
                int apos;
                double[] a = da.values(i);
                double[] c = dc.values(i);
                for (int j = apos = da.pos(i); j < apos + n; ++j) {
                    c[j] = op.fn.execute(c[j], a[j]);
                }
                lnnz += ret.recomputeNonZeros(i, i);
            }
        } else if (op.fn instanceof Multiply) {
            ret.denseBlock.set(0.0);
        } else {
            lnnz = m1.nonZeros;
        }
        return lnnz;
    }

    private static long safeMMRSparsePostProcess(MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru, DenseBlock dc, long lnnz) {
        SparseBlock a = m2.sparseBlock;
        for (int i = rl; i < ru; ++i) {
            double[] c = dc.values(i);
            int cpos = dc.pos(i);
            if (!a.isEmpty(i)) {
                int apos = a.pos(i);
                int alen = a.size(i);
                int[] aix = a.indexes(i);
                double[] avals = a.values(i);
                for (int k = apos; k < apos + alen; ++k) {
                    c[cpos + aix[k]] = op.fn.execute(c[cpos + aix[k]], avals[k]);
                }
            }
            lnnz += ret.recomputeNonZeros(i, i);
        }
        return lnnz;
    }

    private static void safeMMLDensePreProcess(MatrixBlock m1, int rl, int ru, int n, DenseBlock dc) {
        if (!m1.isEmptyBlock(false)) {
            int rlbix = dc.index(rl);
            int rubix = dc.index(ru - 1);
            DenseBlock da = m1.getDenseBlock();
            if (rlbix == rubix) {
                System.arraycopy(da.valuesAt(rlbix), da.pos(rl), dc.valuesAt(rlbix), dc.pos(rl), (ru - rl) * n);
            } else {
                for (int i = rl; i < ru; ++i) {
                    System.arraycopy(da.values(i), da.pos(i), dc.values(i), dc.pos(i), n);
                }
            }
        } else {
            dc.set(0.0);
        }
    }

    private static void safeMMLSparsePreProcess(MatrixBlock m1, int rl, int ru, DenseBlock dc) {
        SparseBlock a = m1.getSparseBlock();
        for (int i = rl; i < ru; ++i) {
            double[] c = dc.values(i);
            int cpos = dc.pos(i);
            if (a.isEmpty(i)) continue;
            int apos = a.pos(i);
            int alen = a.size(i);
            int[] aix = a.indexes(i);
            double[] avals = a.values(i);
            for (int k = apos; k < apos + alen; ++k) {
                c[cpos + aix[k]] = avals[k];
            }
        }
    }

    private static long safeBinaryMMDenseDenseDense(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int clen = m1.clen;
        boolean isPM = op.fn instanceof PlusMultiply || op.fn instanceof MinusMultiply;
        DenseBlock da = m1.getDenseBlock();
        DenseBlock db = m2.getDenseBlock();
        DenseBlock dc = ret.getDenseBlock();
        if (isPM && clen >= 64) {
            return LibMatrixBincell.safeBinaryMMDenseDenseDensePM_Vec(da, db, dc, op, rl, ru, clen);
        }
        if (da.isContiguous() && db.isContiguous() && dc.isContiguous()) {
            if (op.fn instanceof PlusMultiply) {
                return LibMatrixBincell.safeBinaryMMDenseDenseDensePM(da, db, dc, op, rl, ru, clen);
            }
            return LibMatrixBincell.safeBinaryMMDenseDenseDenseContiguous(m1, m2, ret, op, rl, ru, clen);
        }
        return LibMatrixBincell.safeBinaryMMDenseDenseDenseGeneric(da, db, dc, op, rl, ru, clen);
    }

    private static final long safeBinaryMMDenseDenseDensePM_Vec(DenseBlock da, DenseBlock db, DenseBlock dc, BinaryOperator op, int rl, int ru, int clen) {
        double cntPM = op.fn instanceof PlusMultiply ? ((PlusMultiply)op.fn).getConstant() : -1.0 * ((MinusMultiply)op.fn).getConstant();
        long lnnz = 0L;
        for (int i = rl; i < ru; ++i) {
            double[] a = da.values(i);
            double[] b = db.values(i);
            double[] c = dc.values(i);
            int pos = da.pos(i);
            System.arraycopy(a, pos, c, pos, clen);
            LibMatrixMult.vectMultiplyAdd(cntPM, b, c, pos, pos, clen);
            lnnz += (long)UtilFunctions.computeNnz(c, pos, clen);
        }
        return lnnz;
    }

    private static final long safeBinaryMMDenseDenseDensePM(DenseBlock da, DenseBlock db, DenseBlock dc, BinaryOperator op, int rl, int ru, int clen) {
        long lnnz = 0L;
        double[] a = da.values(0);
        double[] b = db.values(0);
        double[] c = dc.values(0);
        double d = ((PlusMultiply)op.fn).getConstant();
        for (int i = da.pos(rl); i < da.pos(ru); ++i) {
            c[i] = a[i] + d * b[i];
            lnnz += c[i] != 0.0 ? 1L : 0L;
        }
        return lnnz;
    }

    private static final long safeBinaryMMDenseDenseDenseContiguous(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru, int clen) {
        DenseBlock da = m1.getDenseBlock();
        DenseBlock db = m2.getDenseBlock();
        DenseBlock dc = ret.getDenseBlock();
        double[] a = da.values(0);
        double[] b = db.values(0);
        double[] c = dc.values(0);
        int end = da.pos(ru);
        if (m1.getSparsity() == 1.0 && m2.getSparsity() == 1.0 && op.fn instanceof Multiply) {
            LibMatrixBincell.safeBinaryMMDDDCMult(a, b, c, da.pos(rl), end);
            return (long)m1.rlen * (long)m1.clen;
        }
        return LibMatrixBincell.safeBinaryMMDDDCG(op, da, a, b, c, da.pos(rl), end);
    }

    private static long safeBinaryMMDDDCG(BinaryOperator op, DenseBlock da, double[] a, double[] b, double[] c, int start, int end) {
        long lnnz = 0L;
        for (int i = start; i < end; ++i) {
            c[i] = op.fn.execute(a[i], b[i]);
            lnnz += c[i] != 0.0 ? 1L : 0L;
        }
        return lnnz;
    }

    private static void safeBinaryMMDDDCMult(double[] a, double[] b, double[] c, int start, int end) {
        int i;
        int h = (end - start) % 8;
        for (i = start; i < start + h; ++i) {
            c[i] = a[i] * b[i];
        }
        for (i = start + h; i < end; i += 8) {
            LibMatrixBincell.by8(a, b, c, i);
        }
    }

    private static void by8(double[] a, double[] b, double[] c, int i) {
        c[i] = a[i] * b[i];
        c[i + 1] = a[i + 1] * b[i + 1];
        c[i + 2] = a[i + 2] * b[i + 2];
        c[i + 3] = a[i + 3] * b[i + 3];
        c[i + 4] = a[i + 4] * b[i + 4];
        c[i + 5] = a[i + 5] * b[i + 5];
        c[i + 6] = a[i + 6] * b[i + 6];
        c[i + 7] = a[i + 7] * b[i + 7];
    }

    private static final long safeBinaryMMDenseDenseDenseGeneric(DenseBlock da, DenseBlock db, DenseBlock dc, BinaryOperator op, int rl, int ru, int clen) {
        ValueFunction fn = op.fn;
        long lnnz = 0L;
        for (int i = rl; i < ru; ++i) {
            int pos;
            double[] a = da.values(i);
            double[] b = db.values(i);
            double[] c = dc.values(i);
            for (int j = pos = da.pos(i); j < pos + clen; ++j) {
                c[j] = fn.execute(a[j], b[j]);
                lnnz += c[j] != 0.0 ? 1L : 0L;
            }
        }
        return lnnz;
    }

    private static long safeBinaryMMSparseDenseSkip(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        SparseBlock a = m1.sparse ? m1.sparseBlock : m2.sparseBlock;
        MatrixBlock b = m1.sparse ? m2 : m1;
        boolean left = a == m1.sparseBlock;
        long lnnz = 0L;
        for (int i = rl; i < Math.min(ru, a.numRows()); ++i) {
            if (a.isEmpty(i)) continue;
            int apos = a.pos(i);
            int alen = a.size(i);
            int[] aix = a.indexes(i);
            double[] avals = a.values(i);
            lnnz += LibMatrixBincell.safeMMSparseDenseSkipRow(ret, op, b, left, i, apos, alen, aix, avals);
        }
        return lnnz;
    }

    private static long safeMMSparseDenseSkipRow(MatrixBlock ret, BinaryOperator op, MatrixBlock b, boolean left, int i, int apos, int alen, int[] aix, double[] avals) {
        if (left) {
            if (ret.sparse) {
                return LibMatrixBincell.safeMMSparseDenseSkipRowLeftSparseRet(op, b, i, apos, alen, aix, avals, ret.getSparseBlock());
            }
            return LibMatrixBincell.safeMMSparseDenseSkipRowLeftDenseRet(op, b, i, apos, alen, aix, avals, ret.getDenseBlock());
        }
        if (ret.sparse) {
            return LibMatrixBincell.safeMMSparseDenseSkipRowRightSparseRet(op, b, i, apos, alen, aix, avals, ret.getSparseBlock());
        }
        return LibMatrixBincell.safeMMSparseDenseSkipRowRightDenseRet(op, b, i, apos, alen, aix, avals, ret.getDenseBlock());
    }

    private static long safeMMSparseDenseSkipRowLeftDenseRet(BinaryOperator op, MatrixBlock b, int i, int apos, int alen, int[] aix, double[] avals, DenseBlock db) {
        long lnnz = 0L;
        for (int k = apos; k < apos + alen; ++k) {
            double in2 = b.get(i, aix[k]);
            double val = op.fn.execute(avals[k], in2);
            lnnz += val != 0.0 ? 1L : 0L;
            db.set(i, aix[k], val);
        }
        return lnnz;
    }

    private static long safeMMSparseDenseSkipRowLeftSparseRet(BinaryOperator op, MatrixBlock b, int i, int apos, int alen, int[] aix, double[] avals, SparseBlock sb) {
        if (!b.sparse) {
            sb.allocate(i, alen);
        }
        for (int k = apos; k < apos + alen; ++k) {
            int idx = aix[k];
            sb.append(i, idx, op.fn.execute(avals[k], b.get(i, idx)));
        }
        return sb.size(i);
    }

    private static long safeMMSparseDenseSkipRowRightDenseRet(BinaryOperator op, MatrixBlock b, int i, int apos, int alen, int[] aix, double[] avals, DenseBlock db) {
        long lnnz = 0L;
        for (int k = apos; k < apos + alen; ++k) {
            double in2 = b.get(i, aix[k]);
            double val = op.fn.execute(in2, avals[k]);
            lnnz += val != 0.0 ? 1L : 0L;
            db.set(i, aix[k], val);
        }
        return lnnz;
    }

    private static long safeMMSparseDenseSkipRowRightSparseRet(BinaryOperator op, MatrixBlock b, int i, int apos, int alen, int[] aix, double[] avals, SparseBlock sb) {
        if (!b.sparse) {
            sb.allocate(i, alen);
        }
        for (int k = apos; k < apos + alen; ++k) {
            double in2 = b.get(i, aix[k]);
            double val = op.fn.execute(in2, avals[k]);
            sb.append(i, aix[k], val);
        }
        return sb.size(i);
    }

    private static long safeBinaryMMGeneric(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int clen = m2.clen;
        long lnnz = 0L;
        if (ret.isInSparseFormat()) {
            SparseBlock sb = ret.getSparseBlock();
            for (int r = rl; r < ru; ++r) {
                sb.allocate(r);
                for (int c = 0; c < clen; ++c) {
                    double in1 = m1.get(r, c);
                    double in2 = m2.get(r, c);
                    if (in1 == 0.0 && in2 == 0.0) continue;
                    double val = op.fn.execute(in1, in2);
                    lnnz += val != 0.0 ? 1L : 0L;
                    sb.append(r, c, val);
                }
            }
        } else {
            DenseBlock db = ret.getDenseBlock();
            for (int r = rl; r < ru; ++r) {
                for (int c = 0; c < clen; ++c) {
                    double in1 = m1.get(r, c);
                    double in2 = m2.get(r, c);
                    if (in1 == 0.0 && in2 == 0.0) continue;
                    double val = op.fn.execute(in1, in2);
                    lnnz += val != 0.0 ? 1L : 0L;
                    db.set(r, c, val);
                }
            }
        }
        return lnnz;
    }

    private static long performBinOuterOperation(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator bOp, int rl, int ru) {
        int clen = ret.clen;
        double[] b = DataConverter.convertToDoubleVector(m2);
        DenseBlock dc = ret.getDenseBlock();
        boolean scanType1 = bOp.fn instanceof LessThan || bOp.fn instanceof Equals || bOp.fn instanceof NotEquals || bOp.fn instanceof GreaterThanEquals;
        boolean scanType2 = bOp.fn instanceof LessThanEquals || bOp.fn instanceof Equals || bOp.fn instanceof NotEquals || bOp.fn instanceof GreaterThan;
        boolean lt = bOp.fn instanceof LessThan;
        boolean lte = bOp.fn instanceof LessThanEquals;
        boolean gt = bOp.fn instanceof GreaterThan;
        boolean gte = bOp.fn instanceof GreaterThanEquals;
        boolean eqNeq = bOp.fn instanceof Equals || bOp.fn instanceof NotEquals;
        long lnnz = 0L;
        for (int i = rl; i < ru; ++i) {
            int end;
            int start;
            int ixPos1;
            double[] cvals = dc.values(i);
            int pos = dc.pos(i);
            double value = m1.get(i, 0);
            int ixPos2 = ixPos1 = Arrays.binarySearch(b, value);
            if (ixPos1 >= 0) {
                if (scanType1) {
                    while (ixPos1 < b.length && value == b[ixPos1]) {
                        ++ixPos1;
                    }
                }
                if (scanType2) {
                    while (ixPos2 > 0 && value == b[ixPos2 - 1]) {
                        --ixPos2;
                    }
                }
            } else {
                ixPos2 = ixPos1 = Math.abs(ixPos1) - 1;
            }
            int n = lt ? ixPos1 : (start = lte || eqNeq ? ixPos2 : 0);
            int n2 = gt ? ixPos2 : (end = gte || eqNeq ? ixPos1 : clen);
            if (bOp.fn instanceof NotEquals) {
                Arrays.fill(cvals, pos, pos + start, 1.0);
                Arrays.fill(cvals, pos + end, pos + clen, 1.0);
                lnnz += (long)(start + (clen - end));
                continue;
            }
            if (start >= end) continue;
            Arrays.fill(cvals, pos + start, pos + end, 1.0);
            lnnz += (long)(end - start);
        }
        return lnnz;
    }

    private static long unsafeBinary(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int clen = m1.clen;
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
        if (atype == BinaryAccessType.MATRIX_COL_VECTOR) {
            return LibMatrixBincell.unsafeBinaryMcV(m1, m2, ret, op, rl, ru, clen);
        }
        if (atype == BinaryAccessType.MATRIX_ROW_VECTOR) {
            return LibMatrixBincell.unsafeBinaryMrV(m1, m2, ret, op, rl, ru, clen);
        }
        if (atype == BinaryAccessType.OUTER_VECTOR_VECTOR) {
            return LibMatrixBincell.unsafeBinaryVoV(m1, m2, ret, op, rl, ru);
        }
        return LibMatrixBincell.unsafeBinaryMM(m1, m2, ret, op, rl, ru, clen);
    }

    private static long unsafeBinaryMM(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru, int clen) {
        long lnnz = 0L;
        if (!(m1.clen != 1 || m1.sparse || m1.isEmptyBlock(false) || m2.sparse || m2.isEmptyBlock(false))) {
            ret.allocateDenseBlock();
            double[] a = m1.getDenseBlockValues();
            double[] b = m2.getDenseBlockValues();
            double[] c = ret.getDenseBlockValues();
            for (int i = rl; i < ru; ++i) {
                c[i] = op.fn.execute(a[i], b[i]);
                lnnz += c[i] != 0.0 ? 1L : 0L;
            }
        } else if (!ret.isInSparseFormat()) {
            ret.allocateDenseBlock();
            DenseBlock db = ret.getDenseBlock();
            for (int r = rl; r < ru; ++r) {
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1.get(r, c);
                    double v2 = m2.get(r, c);
                    double v = op.fn.execute(v1, v2);
                    db.set(r, c, v);
                    lnnz += v != 0.0 ? 1L : 0L;
                }
            }
        } else {
            ret.allocateSparseRowsBlock();
            SparseBlock sb = ret.getSparseBlock();
            for (int r = rl; r < ru; ++r) {
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1.get(r, c);
                    double v2 = m2.get(r, c);
                    double v = op.fn.execute(v1, v2);
                    sb.append(r, c, v);
                    lnnz += v != 0.0 ? 1L : 0L;
                }
            }
        }
        return lnnz;
    }

    private static long unsafeBinaryVoV(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        long lnnz = 0L;
        int clen2 = m2.clen;
        if (LibMatrixOuterAgg.isCompareOperator(op) && m2.getNumColumns() > 16 && SortUtils.isSorted(m2)) {
            lnnz = LibMatrixBincell.performBinOuterOperation(m1, m2, ret, op, rl, ru);
        } else if (ret.isInSparseFormat()) {
            ret.allocateSparseRowsBlock();
            SparseBlock sb = ret.getSparseBlock();
            for (int r = rl; r < ru; ++r) {
                double v1 = m1.get(r, 0);
                for (int c = 0; c < clen2; ++c) {
                    double v2 = m2.get(0, c);
                    double v = op.fn.execute(v1, v2);
                    lnnz += v != 0.0 ? 1L : 0L;
                    sb.append(r, c, v);
                }
            }
        } else {
            ret.allocateDenseBlock();
            DenseBlock db = ret.getDenseBlock();
            for (int r = rl; r < ru; ++r) {
                double v1 = m1.get(r, 0);
                for (int c = 0; c < clen2; ++c) {
                    double v2 = m2.get(0, c);
                    double v = op.fn.execute(v1, v2);
                    lnnz += v != 0.0 ? 1L : 0L;
                    db.set(r, c, v);
                }
            }
        }
        return lnnz;
    }

    private static long unsafeBinaryMrV(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru, int clen) {
        long lnnz = 0L;
        if (ret.isInSparseFormat()) {
            ret.allocateSparseRowsBlock();
            SparseBlock sb = ret.getSparseBlock();
            for (int r = rl; r < ru; ++r) {
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1.get(r, c);
                    double v2 = m2.get(0, c);
                    double v = op.fn.execute(v1, v2);
                    sb.append(r, c, v);
                    lnnz += v != 0.0 ? 1L : 0L;
                }
            }
        } else {
            ret.allocateDenseBlock();
            DenseBlock db = ret.getDenseBlock();
            for (int r = rl; r < ru; ++r) {
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1.get(r, c);
                    double v2 = m2.get(0, c);
                    double v = op.fn.execute(v1, v2);
                    db.set(r, c, v);
                    lnnz += v != 0.0 ? 1L : 0L;
                }
            }
        }
        return lnnz;
    }

    private static long unsafeBinaryMcV(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru, int clen) {
        long lnnz = 0L;
        if (ret.isInSparseFormat()) {
            ret.allocateSparseRowsBlock();
            SparseBlock sb = ret.getSparseBlock();
            for (int r = rl; r < ru; ++r) {
                double v2 = m2.get(r, 0);
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1.get(r, c);
                    double v = op.fn.execute(v1, v2);
                    sb.append(r, c, v);
                    lnnz += v != 0.0 ? 1L : 0L;
                }
            }
        } else {
            ret.allocateDenseBlock();
            DenseBlock db = ret.getDenseBlock();
            for (int r = rl; r < ru; ++r) {
                double v2 = m2.get(r, 0);
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1.get(r, c);
                    double v = op.fn.execute(v1, v2);
                    db.set(r, c, v);
                    lnnz += v != 0.0 ? 1L : 0L;
                }
            }
        }
        return lnnz;
    }

    private static long safeBinaryScalar(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int rl, int ru) {
        if (m1.isEmptyBlock(false)) {
            return 0L;
        }
        if (m1.sparse != ret.sparse) {
            throw new DMLRuntimeException("Unsupported safe binary scalar operations over different input/output representation: " + m1.sparse + " " + ret.sparse + "  " + op);
        }
        if (m1.sparse) {
            return LibMatrixBincell.safeBinaryScalarSparseSparse(m1, ret, op, rl, ru);
        }
        return LibMatrixBincell.safeBinaryScalarDenseDense(m1, ret, op, rl, ru);
    }

    private static long safeBinaryScalarSparseSparse(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int rl, int ru) {
        boolean allocExact;
        boolean copyOnes;
        ret.allocateSparseRowsBlock();
        boolean bl = copyOnes = op.fn instanceof NotEquals && op.getConstant() == 0.0;
        if (copyOnes) {
            return LibMatrixBincell.safeBinaryScalarSparseSparseCopyOnes(m1, ret, op, rl, ru);
        }
        boolean bl2 = allocExact = op.fn instanceof Multiply || op.fn instanceof Multiply2 || op.fn instanceof Power2 || Builtin.isBuiltinCode(op.fn, Builtin.BuiltinCode.MAX) || Builtin.isBuiltinCode(op.fn, Builtin.BuiltinCode.MIN);
        if (allocExact && ret.sparseBlock instanceof SparseBlockMCSR) {
            return LibMatrixBincell.safeBinaryScalarSparseSparseExact(m1, ret, op, rl, ru);
        }
        return LibMatrixBincell.safeBinaryScalarSparseSparseGeneric(m1, ret, op, rl, ru);
    }

    private static long safeBinaryScalarSparseSparseExact(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int rl, int ru) {
        SparseBlock a = m1.sparseBlock;
        SparseBlockMCSR c = (SparseBlockMCSR)ret.sparseBlock;
        boolean neverReturnZeroOnNonZero = op.fn instanceof Power2 || op.fn instanceof Multiply && op.getConstant() != 0.0;
        long nnz = 0L;
        for (int r = rl; r < ru; ++r) {
            if (a.isEmpty(r)) continue;
            int apos = a.pos(r);
            int alen = a.size(r);
            int[] aix = a.indexes(r);
            double[] avals = a.values(r);
            if (neverReturnZeroOnNonZero) {
                nnz += LibMatrixBincell.safeBinaryScalarSparseSparseExactRowNoZero(apos, alen, aix, avals, r, c, op);
                continue;
            }
            nnz += LibMatrixBincell.safeBinaryScalarSparseSparseExactRow(apos, alen, aix, avals, r, c, op);
        }
        return nnz;
    }

    private static long safeBinaryScalarSparseSparseExactRow(int apos, int alen, int[] aix, double[] avals, int r, SparseBlockMCSR c, ScalarOperator op) {
        int[] cix = new int[alen];
        double[] cvals = new double[alen];
        int k = 0;
        for (int j = apos; j < apos + alen; ++j) {
            double v = op.executeScalar(avals[j]);
            if (v == 0.0) continue;
            cix[k] = aix[j];
            cvals[k++] = v;
        }
        SparseRowVector sr = new SparseRowVector(cvals, cix, k);
        c.set(r, sr, false);
        return k;
    }

    private static long safeBinaryScalarSparseSparseExactRowNoZero(int apos, int alen, int[] aix, double[] avals, int r, SparseBlockMCSR c, ScalarOperator op) {
        int[] cix = new int[alen];
        System.arraycopy(aix, apos, cix, 0, alen);
        double[] cvals = new double[alen];
        int k = 0;
        for (int j = apos; j < apos + alen; ++j) {
            cvals[k++] = op.executeScalar(avals[j]);
        }
        SparseRowVector sr = new SparseRowVector(cvals, cix, k);
        c.set(r, sr, false);
        return k;
    }

    private static long safeBinaryScalarSparseSparseGeneric(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int rl, int ru) {
        boolean allocExact = op.fn instanceof Multiply || op.fn instanceof Multiply2 || op.fn instanceof Power2 || Builtin.isBuiltinCode(op.fn, Builtin.BuiltinCode.MAX) || Builtin.isBuiltinCode(op.fn, Builtin.BuiltinCode.MIN);
        SparseBlock a = m1.sparseBlock;
        SparseBlock c = ret.sparseBlock;
        long nnz = 0L;
        for (int r = rl; r < ru; ++r) {
            if (a.isEmpty(r)) continue;
            int apos = a.pos(r);
            int alen = a.size(r);
            int[] aix = a.indexes(r);
            double[] avals = a.values(r);
            if (allocExact) {
                c.allocate(r, alen);
            }
            for (int j = apos; j < apos + alen; ++j) {
                double val = op.executeScalar(avals[j]);
                c.append(r, aix[j], val);
                nnz += val != 0.0 ? 1L : 0L;
            }
        }
        return nnz;
    }

    private static long safeBinaryScalarSparseSparseCopyOnes(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int rl, int ru) {
        long lnnz = 0L;
        SparseBlock a = m1.sparseBlock;
        SparseBlock c = ret.sparseBlock;
        long nnz = 0L;
        for (int r = rl; r < ru; ++r) {
            if (a.isEmpty(r)) continue;
            int apos = a.pos(r);
            int alen = a.size(r);
            int[] aix = a.indexes(r);
            SparseRowVector crow = new SparseRowVector(alen);
            crow.setSize(alen);
            System.arraycopy(aix, apos, crow.indexes(), 0, alen);
            Arrays.fill(crow.values(), 0, alen, 1.0);
            c.set(r, crow, false);
            nnz += (long)alen;
        }
        lnnz = ret.nonZeros = nnz;
        return lnnz;
    }

    private static long safeBinaryScalarDenseDense(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int rl, int ru) {
        return LibMatrixBincell.denseBinaryScalar(m1, ret, op, rl, ru);
    }

    private static long unsafeBinaryScalar(MatrixBlock m1, MatrixBlock ret, ScalarOperator op) {
        if (m1.isEmptyBlock(false)) {
            double val = op.executeScalar(0.0);
            if (val != 0.0) {
                ret.reset(ret.rlen, ret.clen, val);
            }
            return val != 0.0 ? ret.getLength() : 0L;
        }
        if (ret.sparse) {
            return LibMatrixBincell.unsafeBinaryScalarSparseOut(m1, ret, op);
        }
        return LibMatrixBincell.unsafeBinaryScalarDenseOut(m1, ret, op);
    }

    private static long unsafeBinaryScalarDenseOut(MatrixBlock m1, MatrixBlock ret, ScalarOperator op) {
        int m = m1.rlen;
        int n = m1.clen;
        long lnnz = 0L;
        if (m1.sparse) {
            boolean lsparseSafe;
            ret.allocateDenseBlock();
            SparseBlock a = m1.sparseBlock;
            DenseBlock dc = ret.getDenseBlock();
            double val0 = op.executeScalar(0.0);
            boolean bl = lsparseSafe = val0 == 0.0;
            if (!lsparseSafe) {
                dc.set(val0);
            }
            long nnz = lsparseSafe ? 0L : (long)(m * n);
            for (int bi = 0; bi < dc.numBlocks(); ++bi) {
                int blen = dc.blockSize(bi);
                double[] c = dc.valuesAt(bi);
                int i = bi * dc.blockSize();
                int cix = i * n;
                while (i < blen && i < m) {
                    if (!a.isEmpty(i)) {
                        int apos = a.pos(i);
                        int alen = a.size(i);
                        int[] aix = a.indexes(i);
                        double[] avals = a.values(i);
                        for (int j = apos; j < apos + alen; ++j) {
                            double val;
                            c[cix + aix[j]] = val = op.executeScalar(avals[j]);
                            nnz += lsparseSafe ? (long)(val != 0.0 ? 1 : 0) : (long)(val == 0.0 ? -1 : 0);
                        }
                    }
                    ++i;
                    cix += n;
                }
            }
            lnnz = ret.nonZeros = nnz;
        } else {
            lnnz = LibMatrixBincell.denseBinaryScalar(m1, ret, op, 0, m);
            if (op.fn instanceof Multiply) {
                lnnz = m1.getNonZeros();
            }
        }
        return lnnz;
    }

    private static long unsafeBinaryScalarSparseOut(MatrixBlock m1, MatrixBlock ret, ScalarOperator op) {
        int m = m1.rlen;
        int n = m1.clen;
        long lnnz = 0L;
        ret.allocateSparseRowsBlock();
        SparseBlock c = ret.getSparseBlock();
        if (m1.isInSparseFormat()) {
            SparseBlock a = m1.getSparseBlock();
            double val0 = op.executeScalar(0.0);
            if (val0 == 0.0) {
                throw new NotImplementedException("Not implemented unsafe binary where op returns 0 on zero");
            }
            for (int r = 0; r < m; ++r) {
                if (a.isEmpty(r)) {
                    for (int j = 0; j < n; ++j) {
                        c.append(r, j, val0);
                    }
                } else {
                    int j;
                    int[] aix = a.indexes(r);
                    double[] aval = a.values(r);
                    int apos = a.pos(r);
                    int alen = apos + a.size(r);
                    for (j = 0; j < n && apos < alen; ++j) {
                        if (aix[apos] == j) {
                            c.append(r, j, op.executeScalar(aval[apos]));
                            ++apos;
                            continue;
                        }
                        c.append(r, j, val0);
                    }
                    while (j < n) {
                        c.append(r, j, val0);
                        ++j;
                    }
                }
                lnnz += (long)c.size(r);
            }
        } else {
            DenseBlock da = m1.getDenseBlock();
            for (int r = 0; r < m; ++r) {
                double[] a = da.values(r);
                int apos = da.pos(r);
                for (int j = 0; j < n; ++j) {
                    double v = op.executeScalar(a[apos + j]);
                    c.append(r, j, v);
                }
                lnnz += (long)c.size(r);
            }
        }
        return lnnz;
    }

    private static long denseBinaryScalar(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int rl, int ru) {
        ret.allocateDenseBlock(true);
        DenseBlock da = m1.getDenseBlock();
        DenseBlock dc = ret.getDenseBlock();
        int clen = m1.clen;
        long nnz = 0L;
        nnz = op.fn instanceof Multiply ? (clen == 1 ? LibMatrixBincell.denseBinaryScalarMultiplySingleCol(da.valuesAt(0), dc.valuesAt(0), op.getConstant(), rl, ru) : (da.isContiguous() ? LibMatrixBincell.denseBinaryScalarMultiplyMultiColContiguous(da, dc, op.getConstant(), clen, rl, ru) : LibMatrixBincell.denseBinaryScalarMultiplyMultiCol(da, dc, op.getConstant(), clen, rl, ru))) : (clen == 1 ? LibMatrixBincell.denseBinaryScalarSingleCol(da.valuesAt(0), dc.valuesAt(0), op, rl, ru) : LibMatrixBincell.denseBinaryScalarMultiCol(da, dc, op, clen, rl, ru));
        return nnz;
    }

    private static long denseBinaryScalarSingleCol(double[] a, double[] c, ScalarOperator op, int rl, int ru) {
        long nnz = 0L;
        for (int i = rl; i < ru; ++i) {
            c[i] = op.executeScalar(a[i]);
            nnz += c[i] != 0.0 ? 1L : 0L;
        }
        return nnz;
    }

    private static long denseBinaryScalarMultiCol(DenseBlock da, DenseBlock dc, ScalarOperator op, int clen, int rl, int ru) {
        long nnz = 0L;
        for (int i = rl; i < ru; ++i) {
            double[] a = da.values(i);
            double[] c = dc.values(i);
            int apos = da.pos(i);
            int cpos = dc.pos(i);
            for (int j = 0; j < clen; ++j) {
                c[cpos + j] = op.executeScalar(a[apos + j]);
                nnz += c[cpos + j] != 0.0 ? 1L : 0L;
            }
        }
        return nnz;
    }

    private static long denseBinaryScalarMultiplySingleCol(double[] a, double[] c, double b, int rl, int ru) {
        long nnz = 0L;
        for (int i = rl; i < ru; ++i) {
            c[i] = b * a[i];
            if (0.0 == c[i]) continue;
            ++nnz;
        }
        return nnz;
    }

    private static long denseBinaryScalarMultiplyMultiColContiguous(DenseBlock da, DenseBlock dc, double b, int clen, int rl, int ru) {
        int i;
        double[] a = da.values(0);
        double[] c = dc.values(0);
        long nnz = 0L;
        int start = rl * clen;
        int end = ru * clen;
        int cells = end - start;
        for (i = start; i < end - cells % 8; i += 8) {
            nnz += LibMatrixBincell.unroll8Multiply(a, b, c, i);
        }
        for (i = end - cells % 8; i < end; ++i) {
            c[i] = b * a[i];
            if (0.0 == c[i]) continue;
            ++nnz;
        }
        return nnz;
    }

    private static long unroll8Multiply(double[] a, double b, double[] c, int i) {
        long nnz = 0L;
        c[i] = b * a[i];
        nnz += 0.0 != c[i] ? 1L : 0L;
        double d = b * a[i + 1];
        c[i + 1] = d;
        nnz += 0.0 != d ? 1L : 0L;
        double d2 = b * a[i + 2];
        c[i + 2] = d2;
        nnz += 0.0 != d2 ? 1L : 0L;
        double d3 = b * a[i + 3];
        c[i + 3] = d3;
        nnz += 0.0 != d3 ? 1L : 0L;
        double d4 = b * a[i + 4];
        c[i + 4] = d4;
        nnz += 0.0 != d4 ? 1L : 0L;
        double d5 = b * a[i + 5];
        c[i + 5] = d5;
        nnz += 0.0 != d5 ? 1L : 0L;
        double d6 = b * a[i + 6];
        c[i + 6] = d6;
        nnz += 0.0 != d6 ? 1L : 0L;
        double d7 = b * a[i + 7];
        c[i + 7] = d7;
        return nnz += 0.0 != d7 ? 1L : 0L;
    }

    private static long denseBinaryScalarMultiplyMultiCol(DenseBlock da, DenseBlock dc, double b, int clen, int rl, int ru) {
        long nnz = 0L;
        for (int i = rl; i < ru; ++i) {
            double[] a = da.values(i);
            double[] c = dc.values(i);
            int apos = da.pos(i);
            int cpos = dc.pos(i);
            for (int j = 0; j < clen; ++j) {
                c[cpos + j] = b * a[apos + j];
                if (0.0 == c[cpos + j]) continue;
                ++nnz;
            }
        }
        return nnz;
    }

    private static long safeBinaryInPlace(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        boolean PoM;
        boolean bl = PoM = op.fn instanceof Plus || op.fn instanceof Minus;
        if (m1ret.isEmpty() && m2.isEmpty() || PoM && m2.isEmpty()) {
            boolean isEquals;
            boolean bl2 = isEquals = op.fn instanceof Equals || op.fn instanceof LessThanEquals || op.fn instanceof GreaterThanEquals;
            if (isEquals) {
                m1ret.reset(m1ret.rlen, m1ret.clen, 1.0);
                return (long)m1ret.rlen * (long)m1ret.clen;
            }
            return 0L;
        }
        if (m2.isEmpty() && (op.fn instanceof Multiply || op.fn instanceof And)) {
            m1ret.reset(m1ret.rlen, m1ret.clen, 0.0);
            return 0L;
        }
        if (m1ret.getNumRows() > 1 && m2.getNumRows() == 1) {
            return LibMatrixBincell.safeBinaryInPlaceMatrixRowVector(m1ret, m2, op);
        }
        return LibMatrixBincell.safeBinaryInPlaceMatrixMatrix(m1ret, m2, op);
    }

    private static long safeBinaryInPlaceMatrixRowVector(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        if (m1ret.sparse) {
            if (m2.isInSparseFormat() && !op.isRowSafeLeft(m2)) {
                throw new DMLRuntimeException("Invalid row safety of in place row operation: " + op);
            }
            if (m2.isEmpty()) {
                LibMatrixBincell.safeBinaryInPlaceSparseConst(m1ret, 0.0, op);
            } else {
                if (m2.sparse) {
                    throw new NotImplementedException("Not made sparse vector in place to sparse " + op);
                }
                LibMatrixBincell.safeBinaryInPlaceSparseVector(m1ret, m2, op);
            }
        } else {
            if (!m1ret.isAllocated()) {
                LOG.warn((Object)"Allocating in place output dense block");
                m1ret.allocateBlock();
            }
            if (m2.isEmpty()) {
                LibMatrixBincell.safeBinaryInPlaceDenseConst(m1ret, 0.0, op);
            } else {
                if (m2.sparse) {
                    throw new NotImplementedException("Not made sparse vector in place to dense " + op);
                }
                LibMatrixBincell.safeBinaryInPlaceDenseVector(m1ret, m2, op);
            }
        }
        return m1ret.recomputeNonZeros();
    }

    private static long safeBinaryInPlaceMatrixMatrix(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        if (op.fn instanceof Plus && m1ret.isEmpty() && !m1ret.isAllocated()) {
            m1ret.copy(m2);
        } else if (m1ret.sparse && m2.sparse) {
            LibMatrixBincell.safeBinaryInPlaceSparse(m1ret, m2, op);
        } else if (!m1ret.sparse && !m2.sparse) {
            LibMatrixBincell.safeBinaryInPlaceDense(m1ret, m2, op);
        } else if (m2.sparse && (op.fn instanceof Plus || op.fn instanceof Minus)) {
            LibMatrixBincell.safeBinaryInPlaceDenseSparseAdd(m1ret, m2, op);
        } else {
            LibMatrixBincell.safeBinaryInPlaceGeneric(m1ret, m2, op);
        }
        return m1ret.recomputeNonZeros();
    }

    private static void safeBinaryInPlaceSparse(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        if (m1ret.sparseBlock != null) {
            m1ret.allocateSparseRowsBlock(false);
        }
        if (!(m1ret.sparseBlock instanceof SparseBlockMCSR)) {
            m1ret.sparseBlock = SparseBlockFactory.copySparseBlock(SparseBlock.Type.MCSR, m1ret.sparseBlock, false);
        }
        if (m2.sparseBlock != null) {
            m2.allocateSparseRowsBlock(false);
        }
        SparseBlock c = m1ret.sparseBlock;
        SparseBlock b = m2.sparseBlock;
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        boolean compact = LibMatrixBincell.shouldCompact(null, op);
        boolean mcsr = c instanceof SparseBlockMCSR;
        long nnz = 0L;
        if (c != null && b != null) {
            for (int r = 0; r < rlen; ++r) {
                if (c.isEmpty(r) && b.isEmpty(r)) continue;
                if (b.isEmpty(r)) {
                    LibMatrixBincell.zeroRightForSparseBinary(op, r, m1ret);
                } else if (c.isEmpty(r)) {
                    nnz += LibMatrixBincell.appendRightForSparseBinary(op, b.values(r), b.indexes(r), b.pos(r), b.size(r), r, m1ret);
                } else {
                    int estimateSize = Math.min(clen, (!c.isEmpty(r) ? c.size(r) : 0) + (!b.isEmpty(r) ? b.size(r) : 0));
                    SparseRow old = c.get(r);
                    c.set(r, new SparseRowVector(estimateSize), false);
                    nnz += LibMatrixBincell.mergeForSparseBinary(op, old.values(), old.indexes(), 0, old.size(), b.values(r), b.indexes(r), b.pos(r), b.size(r), r, m1ret);
                }
                if (!compact || !mcsr || c.isEmpty(r)) continue;
                c.get(r).compact();
            }
            if (compact && !mcsr) {
                SparseBlockCSR sbcsr = (SparseBlockCSR)c;
                sbcsr.compact();
            }
            m1ret.setNonZeros(nnz);
        } else if (c == null && b != null) {
            m1ret.sparseBlock = SparseBlockFactory.createSparseBlock(rlen);
            nnz = 0L;
            for (int r = 0; r < rlen; ++r) {
                if (b.isEmpty(r)) continue;
                nnz += LibMatrixBincell.appendRightForSparseBinary(op, b.values(r), b.indexes(r), b.pos(r), b.size(r), r, m1ret);
            }
            m1ret.setNonZeros(nnz);
        } else if (c != null) {
            for (int r = 0; r < rlen; ++r) {
                if (c.isEmpty(r)) continue;
                LibMatrixBincell.zeroRightForSparseBinary(op, r, m1ret);
            }
            m1ret.recomputeNonZeros(op.getNumThreads());
        }
    }

    private static void safeBinaryInPlaceSparseConst(MatrixBlock m1ret, double m2, BinaryOperator op) {
        SparseBlock sb = m1ret.getSparseBlock();
        int rlen = m1ret.rlen;
        for (int r = 0; r < rlen; ++r) {
            if (sb.isEmpty(r)) continue;
            int apos = sb.pos(r);
            int alen = sb.size(r) + apos;
            double[] avals = sb.values(r);
            for (int k = apos; k < alen; ++k) {
                avals[k] = op.fn.execute(avals[k], m2);
            }
        }
    }

    private static void safeBinaryInPlaceSparseVector(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        if (m1ret.isEmpty()) {
            return;
        }
        SparseBlock sb = m1ret.getSparseBlock();
        double[] b = m2.getDenseBlockValues();
        int rlen = m1ret.rlen;
        boolean compact = LibMatrixBincell.shouldCompact(m2, op);
        boolean mcsr = sb instanceof SparseBlockMCSR;
        long nnz = 0L;
        for (int r = 0; r < rlen; ++r) {
            if (sb.isEmpty(r)) continue;
            int apos = sb.pos(r);
            int alen = sb.size(r) + apos;
            double[] avals = sb.values(r);
            int[] aix = sb.indexes(r);
            for (int k = apos; k < alen; ++k) {
                avals[k] = op.fn.execute(avals[k], b[aix[k]]);
            }
            if (!compact || !mcsr) continue;
            SparseRow sr = sb.get(r);
            if (sr instanceof SparseRowVector) {
                ((SparseRowVector)sr).setSize(avals.length);
            }
            sr.compact();
            nnz += (long)sr.size();
        }
        if (compact && !mcsr) {
            SparseBlockCSR sbcsr = (SparseBlockCSR)sb;
            sbcsr.compact();
            nnz = sbcsr.size();
        }
        m1ret.setNonZeros(nnz);
    }

    private static boolean shouldCompact(MatrixBlock m2, BinaryOperator op) {
        return (op.fn instanceof Multiply || op.fn instanceof And || op.fn instanceof Builtin && ((Builtin)op.fn).bFunc == Builtin.BuiltinCode.LOG_NZ) && (m2 == null || op.isIntroducingZerosRight(m2));
    }

    private static void safeBinaryInPlaceDense(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        if (!m1ret.isAllocated()) {
            m1ret.allocateDenseBlock();
        }
        if (op.fn instanceof Plus) {
            LibMatrixBincell.safeBinaryInPlaceDensePlus(m1ret, m2, op);
        } else {
            LibMatrixBincell.safeBinaryInPlaceDenseGeneric(m1ret, m2, op);
        }
    }

    private static void safeBinaryInPlaceDensePlus(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        DenseBlock a = m1ret.getDenseBlock();
        DenseBlock b = m2.getDenseBlock();
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        long lnnz = 0L;
        if (a.isContiguous() && b.isContiguous()) {
            double[] avals = a.values(0);
            double[] bvals = b.values(0);
            for (int i = 0; i < avals.length; ++i) {
                int n = i;
                double d = avals[n] + bvals[i];
                avals[n] = d;
                lnnz += d == 0.0 ? 0L : 1L;
            }
        } else {
            for (int r = 0; r < rlen; ++r) {
                int aix = a.pos(r);
                int bix = b.pos(r);
                double[] avals = a.values(r);
                double[] bvals = b.values(r);
                LibMatrixMult.vectAdd(bvals, avals, bix, aix, clen);
                lnnz += (long)UtilFunctions.computeNnz(avals, aix, clen);
            }
        }
        m1ret.setNonZeros(lnnz);
    }

    private static void safeBinaryInPlaceDenseGeneric(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        DenseBlock a = m1ret.getDenseBlock();
        DenseBlock b = m2.getDenseBlock();
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        long lnnz = 0L;
        for (int r = 0; r < rlen; ++r) {
            double[] avals = a.values(r);
            double[] bvals = b.values(r);
            int c = 0;
            int ix = a.pos(r);
            while (c < clen) {
                double tmp = op.fn.execute(avals[ix], bvals[ix]);
                avals[ix] = tmp;
                lnnz += avals[ix] != 0.0 ? 1L : 0L;
                ++c;
                ++ix;
            }
        }
        m1ret.setNonZeros(lnnz);
    }

    private static void safeBinaryInPlaceDenseConst(MatrixBlock m1ret, double m2, BinaryOperator op) {
        m1ret.allocateDenseBlock();
        DenseBlock a = m1ret.getDenseBlock();
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        long lnnz = 0L;
        for (int r = 0; r < rlen; ++r) {
            double[] avals = a.values(r);
            int c = 0;
            int ix = a.pos(r);
            while (c < clen) {
                double tmp = op.fn.execute(avals[ix], m2);
                avals[ix] = tmp;
                lnnz += avals[ix] != 0.0 ? 1L : 0L;
                ++c;
                ++ix;
            }
        }
        m1ret.setNonZeros(lnnz);
    }

    private static void safeBinaryInPlaceDenseVector(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        m1ret.allocateDenseBlock();
        DenseBlock a = m1ret.getDenseBlock();
        double[] b = m2.getDenseBlockValues();
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        long lnnz = 0L;
        for (int r = 0; r < rlen; ++r) {
            double[] avals = a.values(r);
            int c = 0;
            int ix = a.pos(r);
            while (c < clen) {
                double tmp = op.fn.execute(avals[ix], b[ix % clen]);
                avals[ix] = tmp;
                lnnz += avals[ix] != 0.0 ? 1L : 0L;
                ++c;
                ++ix;
            }
        }
        m1ret.setNonZeros(lnnz);
    }

    private static void safeBinaryInPlaceDenseSparseAdd(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        int rlen = m1ret.rlen;
        DenseBlock a = m1ret.denseBlock;
        SparseBlock b = m2.sparseBlock;
        long nnz = m1ret.getNonZeros();
        for (int r = 0; r < rlen; ++r) {
            if (b.isEmpty(r)) continue;
            int apos = a.pos(r);
            int bpos = b.pos(r);
            int blen = b.size(r);
            int[] bix = b.indexes(r);
            double[] avals = a.values(r);
            double[] bvals = b.values(r);
            for (int k = bpos; k < bpos + blen; ++k) {
                double vold = avals[apos + bix[k]];
                double vnew = op.fn.execute(vold, bvals[k]);
                nnz += vold == 0.0 && vnew != 0.0 ? 1L : (vold != 0.0 && vnew == 0.0 ? -1L : 0L);
                avals[apos + bix[k]] = vnew;
            }
        }
        m1ret.setNonZeros(nnz);
    }

    private static void safeBinaryInPlaceGeneric(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        for (int r = 0; r < rlen; ++r) {
            for (int c = 0; c < clen; ++c) {
                double thisvalue = m1ret.get(r, c);
                double thatvalue = m2.get(r, c);
                double resultvalue = op.fn.execute(thisvalue, thatvalue);
                m1ret.set(r, c, resultvalue);
            }
        }
    }

    private static long unsafeBinaryInPlace(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1ret, m2);
        long nnz = 0L;
        if (atype == BinaryAccessType.MATRIX_COL_VECTOR) {
            for (int r = 0; r < rlen; ++r) {
                double v2 = m2.get(r, 0);
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1ret.get(r, c);
                    double v = op.fn.execute(v1, v2);
                    m1ret.set(r, c, v);
                    nnz += v != 0.0 ? 1L : 0L;
                }
            }
        } else if (atype == BinaryAccessType.MATRIX_ROW_VECTOR) {
            for (int r = 0; r < rlen; ++r) {
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1ret.get(r, c);
                    double v2 = m2.get(0, c);
                    double v = op.fn.execute(v1, v2);
                    m1ret.set(r, c, v);
                    nnz += v != 0.0 ? 1L : 0L;
                }
            }
        } else {
            for (int r = 0; r < rlen; ++r) {
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1ret.get(r, c);
                    double v2 = m2.get(r, c);
                    double v = op.fn.execute(v1, v2);
                    m1ret.set(r, c, v);
                    nnz += v != 0.0 ? 1L : 0L;
                }
            }
        }
        return nnz;
    }

    private static long mergeForSparseBinary(BinaryOperator op, double[] values1, int[] cols1, int pos1, int size1, double[] values2, int[] cols2, int pos2, int size2, int resultRow, MatrixBlock result) {
        size1 = pos1 + size1;
        size2 = pos2 + size2;
        if (op.fn instanceof Multiply && result.isInSparseFormat()) {
            return LibMatrixBincell.mergeForSparseBinaryMultiply(op, values1, cols1, pos1, size1, values2, cols2, pos2, size2, resultRow, result);
        }
        if (result.isInSparseFormat()) {
            return LibMatrixBincell.mergeForSparseBinaryGeneric(op, values1, cols1, pos1, size1, values2, cols2, pos2, size2, resultRow, result);
        }
        return LibMatrixBincell.mergeForSparseBinaryDenseOut(op, values1, cols1, pos1, size1, values2, cols2, pos2, size2, resultRow, result);
    }

    private static long mergeForSparseBinaryMultiply(BinaryOperator op, double[] values1, int[] cols1, int pos1, int size1, double[] values2, int[] cols2, int pos2, int size2, int resultRow, MatrixBlock result) {
        SparseBlockMCSR sblock = (SparseBlockMCSR)result.getSparseBlock();
        SparseRowVector r = new SparseRowVector(Math.min(size1 - pos1, size2 - pos2), Math.max(size1 - pos1, size2 - pos2));
        while (pos1 < size1 && pos2 < size2) {
            int colPos1 = cols1[pos1];
            int colPos2 = cols2[pos2];
            if (colPos1 == colPos2) {
                r.append(colPos1, op.fn.execute(values1[pos1++], values2[pos2++]));
                continue;
            }
            if (colPos1 < colPos2) {
                ++pos1;
                continue;
            }
            ++pos2;
        }
        sblock.set(resultRow, r, false);
        return sblock.size(resultRow);
    }

    private static long mergeForSparseBinaryGeneric(BinaryOperator op, double[] values1, int[] cols1, int pos1, int size1, double[] values2, int[] cols2, int pos2, int size2, int resultRow, MatrixBlock result) {
        SparseBlockMCSR c = (SparseBlockMCSR)result.getSparseBlock();
        int s1 = size1 - pos1;
        int s2 = size2 - pos2;
        int biggestSize = Math.max(s1, s2);
        SparseRowVector r = new SparseRowVector(biggestSize, s1 + s2);
        while (pos1 < size1 && pos2 < size2) {
            int colPos1 = cols1[pos1];
            int colPos2 = cols2[pos2];
            if (colPos1 < colPos2) {
                r.append(colPos1, op.fn.execute(values1[pos1++], 0.0));
                continue;
            }
            if (colPos1 == colPos2) {
                r.append(colPos1, op.fn.execute(values1[pos1++], values2[pos2++]));
                continue;
            }
            r.append(colPos2, op.fn.execute(0.0, values2[pos2++]));
        }
        while (pos1 < size1) {
            r.append(cols1[pos1], op.fn.execute(values1[pos1], 0.0));
            ++pos1;
        }
        while (pos2 < size2) {
            r.append(cols2[pos2], op.fn.execute(0.0, values2[pos2]));
            ++pos2;
        }
        c.set(resultRow, r, false);
        return c.size(resultRow);
    }

    private static long mergeForSparseBinaryDenseOut(BinaryOperator op, double[] values1, int[] cols1, int pos1, int size1, double[] values2, int[] cols2, int pos2, int size2, int resultRow, MatrixBlock result) {
        while (pos1 < size1 && pos2 < size2) {
            int colPos1 = cols1[pos1];
            int colPos2 = cols2[pos2];
            if (colPos1 < colPos2) {
                result.set(resultRow, colPos1, op.fn.execute(values1[pos1++], 0.0));
                continue;
            }
            if (colPos1 == colPos2) {
                result.set(resultRow, colPos1, op.fn.execute(values1[pos1++], values2[pos2++]));
                continue;
            }
            result.set(resultRow, colPos2, op.fn.execute(0.0, values2[pos2++]));
        }
        while (pos1 < size1) {
            result.set(resultRow, cols1[pos1], op.fn.execute(values1[pos1], 0.0));
            ++pos1;
        }
        while (pos2 < size2) {
            result.set(resultRow, cols2[pos2], op.fn.execute(0.0, values2[pos2]));
            ++pos2;
        }
        return result.recomputeNonZeros(resultRow, resultRow);
    }

    private static long appendLeftForSparseBinary(BinaryOperator op, double[] values1, int[] cols1, int pos1, int size1, int resultRow, MatrixBlock result) {
        if (result.isInSparseFormat()) {
            SparseBlock sb = result.getSparseBlock();
            for (int j = pos1; j < pos1 + size1; ++j) {
                double v = op.fn.execute(values1[j], 0.0);
                sb.append(resultRow, cols1[j], v);
            }
            return sb.size(resultRow);
        }
        DenseBlock db = result.getDenseBlock();
        long nnz = 0L;
        for (int j = pos1; j < pos1 + size1; ++j) {
            double v = op.fn.execute(values1[j], 0.0);
            db.set(resultRow, cols1[j], v);
            nnz += v != 0.0 ? 1L : 0L;
        }
        return nnz;
    }

    private static long appendRightForSparseBinary(BinaryOperator op, double[] values2, int[] cols2, int pos2, int size2, int r, MatrixBlock result) {
        if (result.isInSparseFormat()) {
            SparseBlock sb = result.getSparseBlock();
            for (int j = pos2; j < pos2 + size2; ++j) {
                double v = op.fn.execute(0.0, values2[j]);
                sb.append(r, cols2[j], v);
            }
            return sb.size(r);
        }
        DenseBlock db = result.getDenseBlock();
        long nnz = 0L;
        for (int j = pos2; j < pos2 + size2; ++j) {
            double v = op.fn.execute(0.0, values2[j]);
            db.set(r, cols2[j], v);
            nnz += v != 0.0 ? 1L : 0L;
        }
        return nnz;
    }

    private static void zeroRightForSparseBinary(BinaryOperator op, int r, MatrixBlock ret) {
        if (op.fn instanceof Plus || op.fn instanceof Minus) {
            return;
        }
        SparseBlock c = ret.sparseBlock;
        int apos = c.pos(r);
        int alen = c.size(r);
        double[] values = c.values(r);
        boolean zero = false;
        for (int i = apos; i < apos + alen; ++i) {
            values[i] = op.fn.execute(values[i], 0.0);
            zero |= values[i] == 0.0;
        }
        if (zero) {
            c.compact(r);
        }
    }

    private static SparsityEstimate estimateSparsityOnBinary(MatrixBlock m1, MatrixBlock m2, BinaryOperator op) {
        long n;
        BinaryAccessType atype;
        long nz1 = m1.getNonZeros();
        long nz2 = m2.getNonZeros();
        if (nz1 <= 0L) {
            nz1 = m1.recomputeNonZeros(op.getNumThreads());
        }
        if (nz2 <= 0L) {
            nz2 = m2.recomputeNonZeros(op.getNumThreads());
        }
        boolean outer = (atype = LibMatrixBincell.getBinaryAccessType(m1, m2)) == BinaryAccessType.OUTER_VECTOR_VECTOR;
        long m = m1.getNumRows();
        long l = n = outer ? (long)m2.getNumColumns() : (long)m1.getNumColumns();
        if (!(op.sparseSafe || op.fn instanceof Divide && m2.getSparsity() == 1.0)) {
            return new SparsityEstimate(false, m * n);
        }
        if (!outer && op.fn instanceof Divide && m2.getSparsity() == 1.0) {
            return new SparsityEstimate(m1.sparse, nz1);
        }
        long estnnz = 0L;
        if (atype == BinaryAccessType.OUTER_VECTOR_VECTOR) {
            estnnz = OptimizerUtils.getOuterNonZeros(m, n, nz1, nz2, op.getBinaryOperatorOpOp2());
        } else {
            if (atype == BinaryAccessType.MATRIX_COL_VECTOR) {
                nz2 *= n;
            } else if (atype == BinaryAccessType.MATRIX_ROW_VECTOR) {
                nz2 *= m;
            }
            double sp1 = OptimizerUtils.getSparsity(m, n, nz1);
            double sp2 = OptimizerUtils.getSparsity(m, n, nz2);
            double spout = OptimizerUtils.getBinaryOpSparsity(sp1, sp2, op.getBinaryOperatorOpOp2(), true);
            estnnz = UtilFunctions.toLong(spout * (double)m * (double)n);
        }
        return new SparsityEstimate(MatrixBlock.evalSparseFormatInMemory(m, n, estnnz), estnnz);
    }

    private static class SparsityEstimate {
        protected final long estimatedNonZeros;
        protected final boolean sparse;

        protected SparsityEstimate(boolean sp, long nnz) {
            this.estimatedNonZeros = nnz;
            this.sparse = sp;
        }
    }

    private static class UncellTask
    implements Callable<Long> {
        private final DenseBlock _a;
        private final DenseBlock _c;
        private final UnaryOperator _op;
        private final int _rl;
        private final int _ru;

        protected UncellTask(DenseBlock a, DenseBlock c, UnaryOperator op, int rl, int ru) {
            this._a = a;
            this._c = c;
            this._op = op;
            this._rl = rl;
            this._ru = ru;
        }

        @Override
        public Long call() throws Exception {
            long nnz = 0L;
            if (this._a.isContiguous(this._rl, this._ru)) {
                double[] avals = this._a.values(this._rl);
                double[] cvals = this._c.values(this._rl);
                int start = this._a.pos(this._rl);
                int end = this._a.pos(this._ru);
                for (int i = start; i < end; ++i) {
                    cvals[i] = this._op.fn.execute(avals[i]);
                    nnz += cvals[i] != 0.0 ? 1L : 0L;
                }
            } else {
                int clen = this._a.getDim(1);
                for (int i = this._rl; i < this._ru; ++i) {
                    double[] avals = this._a.values(i);
                    double[] cvals = this._c.values(i);
                    int pos = this._a.pos(i);
                    for (int j = 0; j < clen; ++j) {
                        cvals[pos + j] = this._op.fn.execute(avals[pos + j]);
                        nnz += cvals[pos + j] != 0.0 ? 1L : 0L;
                    }
                }
            }
            return nnz;
        }
    }

    private static class BincellScalarTask
    implements Callable<Long> {
        private final MatrixBlock _m1;
        private final MatrixBlock _ret;
        private final ScalarOperator _sop;
        private final int _rl;
        private final int _ru;

        protected BincellScalarTask(MatrixBlock m1, MatrixBlock ret, ScalarOperator sop, int rl, int ru) {
            this._m1 = m1;
            this._ret = ret;
            this._sop = sop;
            this._rl = rl;
            this._ru = ru;
        }

        @Override
        public Long call() throws Exception {
            return LibMatrixBincell.safeBinaryScalar(this._m1, this._ret, this._sop, this._rl, this._ru);
        }
    }

    private static class BincellTask
    implements Callable<Long> {
        private final MatrixBlock _m1;
        private final MatrixBlock _m2;
        private final MatrixBlock _ret;
        private final BinaryOperator _bop;
        BinaryAccessType _atype;
        private final int _rl;
        private final int _ru;

        protected BincellTask(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator bop, BinaryAccessType atype, int rl, int ru) {
            this._m1 = m1;
            this._m2 = m2;
            this._ret = ret;
            this._bop = bop;
            this._atype = atype;
            this._rl = rl;
            this._ru = ru;
        }

        @Override
        public Long call() throws Exception {
            return LibMatrixBincell.binCellOpExecute(this._m1, this._m2, this._ret, this._bop, this._atype, this._rl, this._ru);
        }
    }

    public static enum BinaryAccessType {
        MATRIX_MATRIX,
        MATRIX_COL_VECTOR,
        MATRIX_ROW_VECTOR,
        COL_VECTOR_MATRIX,
        ROW_VECTOR_MATRIX,
        OUTER_VECTOR_VECTOR,
        INVALID;


        public boolean isMatrixVector() {
            return this == MATRIX_COL_VECTOR || this == MATRIX_ROW_VECTOR || this == COL_VECTOR_MATRIX || this == ROW_VECTOR_MATRIX;
        }
    }
}

