From ee9e94f555c3efe82f575b0520f1f9f804d69e2b Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Wed, 7 Jan 2026 13:39:06 +0100
Subject: [PATCH 01/18] Second skeleton for ColGroupDDCLZW using the
 IMapToDataInterface and extending on APreAgg like ColGroupDDC for easier
 implementation. Idea: store only compressed version of _data vector and
 important metadata. If decompression is needed we reconstruct the _data
 vector using the metadata and the compressed _data vector. Decompression
 takes place at most once. This is just an idea and theres other ways of
 implementing.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 1283 +++++++++++++++++
 1 file changed, 1283 insertions(+)
 create mode 100644 src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
new file mode 100644
index 00000000000..01a87aafd7b
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -0,0 +1,1283 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.compress.colgroup;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.VectorSpecies;
+import org.apache.arrow.vector.complex.writer.BitWriter;
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysds.runtime.compress.DMLCompressionException;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupUtils.P;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.IdentityDictionary;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary;
+import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
+import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
+import org.apache.sysds.runtime.compress.colgroup.indexes.RangeIndex;
+import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
+import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
+import org.apache.sysds.runtime.compress.colgroup.offset.AOffsetIterator;
+import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
+import org.apache.sysds.runtime.compress.colgroup.scheme.DDCScheme;
+import org.apache.sysds.runtime.compress.colgroup.scheme.ICLAScheme;
+import org.apache.sysds.runtime.compress.cost.ComputationCostEstimator;
+import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
+import org.apache.sysds.runtime.compress.estim.EstimationFactors;
+import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
+import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
+import org.apache.sysds.runtime.data.DenseBlock;
+import org.apache.sysds.runtime.data.SparseBlock;
+import org.apache.sysds.runtime.data.SparseBlockMCSR;
+import org.apache.sysds.runtime.data.SparseRow;
+import org.apache.sysds.runtime.functionobjects.Builtin;
+import org.apache.sysds.runtime.functionobjects.Minus;
+import org.apache.sysds.runtime.functionobjects.Plus;
+import org.apache.sysds.runtime.matrix.data.LibMatrixMult;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
+import org.apache.sysds.runtime.matrix.operators.RightScalarOperator;
+import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
+import org.apache.sysds.runtime.matrix.operators.UnaryOperator;
+import org.jboss.netty.handler.codec.compression.CompressionException;
+import shaded.parquet.it.unimi.dsi.fastutil.ints.IntArrayList;
+import shaded.parquet.it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
+
+/**
+ * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC) whose
+ * mapping vector is additionally lzw compressed.
+ * <p>
+ * Idea:
+ * - The dictionary (_dict) is stored exactly like a normal DDC group (in the super class APreAgg).
+ * - The mapping vector (row -> dictionary-id) is NOT stored directly as an AMapToData field permanently.
+ * - Instead, the mapping is stored in compressed form as a byte[] (_dataLZW).
+ * - When an operation needs the mapping, it is decoded on-demand into a transient cached AMapToData (_data).
+ */
+public class ColGroupDDCLZW extends APreAgg implements IMapToDataGroup {
+    private static final long serialVersionUID = -5769772089913918987L;
+
+    private transient volatile AMapToData _data; // Decoded mapping cache
+    private final int[] _dataLZW; // LZW compressed representation of the mapping (TODO optimize!)
+
+    private final int _nRows; // Number of rows in the mapping vector
+    private final int _nUnique; // Number of unique values in the mapping vector
+
+    // Compresses a decoded mapping (AMapToData) into an LZW-compressed byte array.
+    private static int[] compress(final AMapToData data) {
+        if (data == null)
+            throw new IllegalArgumentException("Invalid input: data is null");
+
+        final int nRows = data.size();
+        if (nRows <= 0) {
+            throw new IllegalArgumentException("Invalid input: data has no rows");
+        }
+
+        final int nUnique = data.getUnique();
+        if (nUnique <= 0) {
+            throw new IllegalArgumentException("Invalid input: data has no unique values");
+        }
+
+        // Extract _data values as int array.
+        final int[] dataIntVals = new int[nRows];
+        for (int i = 0; i < nRows; i++) {
+            dataIntVals[i] = data.getIndex(i);
+        }
+
+        // LZW dictionary. Maps (prefixCode, nextSymbol) to a new code.
+        // Using fastutil keeps lookups fast.
+        final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
+        dict.defaultReturnValue(-1);
+
+        // Codes {0,...,nUnique - 1} are reserved for the original symbols.
+        int nextCode = nUnique;
+
+        // Output buffer.
+        IntArrayList out = new IntArrayList();
+
+        // Initialize w with the first input symbol.
+        int w = data.getIndex(0);
+
+        // Process the remaining input symbols.
+        for (int i = 1; i < nRows; i++) {
+            int k = data.getIndex(i); // next input symbol
+            long key = packKey(w, k); // encode (w,k) into long key
+
+            int wk = dict.get(key); // look if wk exists in dict
+            if (wk != -1) {
+                w = wk; // wk exists in dict so replace w by wk and continue.
+            } else {
+                // wk does not exist in dict.
+                out.add(w);
+                dict.put(key, nextCode++);
+                w = k; // Start new phrase with k
+            }
+        }
+
+        out.add(w);
+        return out.toIntArray();
+    }
+
+    /**
+     * Builds a packed 64-bit key for (prefixCode, nextSymbol) pairs used in the LZW dictionary.
+     * Upper 32 bits: prefixCode (current pattern code w)
+     * Lower 32 bits: nextSymbol (k)
+     */
+    private static long packKey(int prefixCode, int nextSymbol) {
+        return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
+    }
+
+    // Lazily decode the mapping from _dataLZW into an AMapToData instance and cache it in _data.
+    private AMapToData decompress() {
+        /*AMapToData d = _data;
+        if (d == null) {
+            synchronized (this) {
+                d = _data;
+                if (d == null) {
+                    d = decode(_dataLZW, _nRows, _nUnique);
+                    _data = d;
+                }
+            }
+        }*/
+        return null;
+    }
+
+    // Build Constructor: Used when creating a new DDCLZW instance during compression/build time. (TODO)
+    private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+        super(colIndexes, dict, cachedCounts);
+
+        // Derive metadata
+        _nRows = data.size();
+        _nUnique = dict.getNumberOfValues(colIndexes.size());
+
+        // Compress mapping to LZW
+        _dataLZW = compress(data);
+
+        if (CompressedMatrixBlock.debug) {
+            _data = data;
+            if (getNumValues() == 0)
+                throw new DMLCompressionException("Invalid construction with empty dictionary");
+            if (data.size() == 0)
+                throw new DMLCompressionException("Invalid length of the data. is zero");
+
+            if (data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
+                        + dict.getNumberOfValues(colIndexes.size()));
+            int[] c = getCounts();
+            if (c.length != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid DDC Construction");
+            data.verify();
+        } else {
+            _data = null;
+        }
+    }
+
+    // Read Constructor: Used when creating this group from a serialized form (e.g., reading a compressed matrix from disk/memory stream). (TODO)
+    private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, int[] dataLZW, int nRows, int nUnique, int[] cachedCounts) {
+        super(colIndexes, dict, cachedCounts);
+
+        _dataLZW = dataLZW;
+        _data = null;
+        _nRows = nRows;
+        _nUnique = nUnique;
+
+        if (CompressedMatrixBlock.debug) {
+            final AMapToData d = decompress();
+            if (getNumValues() == 0)
+                throw new DMLCompressionException("Invalid construction with empty dictionary");
+            if (d.size() == 0)
+                throw new DMLCompressionException("Invalid length of the data. is zero");
+
+            if (d.getUnique() != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid map to dict Map has:" + d.getUnique() + " while dict has "
+                        + dict.getNumberOfValues(colIndexes.size()));
+            int[] c = getCounts();
+            if (c.length != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid DDC Construction");
+            d.verify();
+        }
+    }
+
+    // Factory method for creating a column group.
+    public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+        if (dict == null)
+            return new ColGroupEmpty(colIndexes);
+        else if (data.getUnique() == 1)
+            return ColGroupConst.create(colIndexes, dict);
+        else
+            return new ColGroupDDCLZW(colIndexes, dict, data, cachedCounts);
+    }
+
+    public CompressionType getCompType() {
+        return CompressionType.DDCLZW; // TODO add new compression type DDCLZW
+    }
+
+    public ColGroupType getColGroupType() {
+        return ColGroupType.DDCLZW; // TODO add new ColGroup type DDCLZW
+    }
+
+    @Override
+    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                          SparseBlock sb) {
+        AMapToData d = decompress();
+        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+            final int vr = d.getIndex(r);
+            if (sb.isEmpty(vr))
+                continue;
+            final double[] c = db.values(offT);
+            final int off = db.pos(offT) + offC;
+            _colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                         double[] values) {
+        final int idxSize = _colIndexes.size();
+        if (db.isContiguous()) {
+            final int nColOut = db.getDim(1);
+            if (idxSize == 1 && nColOut == 1)
+                decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
+            else if (idxSize == 1)
+                decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
+            else if (idxSize == nColOut) // offC == 0 implied
+                decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
+            else if (offC == 0 && offR == 0)
+                decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
+            else if (offC == 0)
+                decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
+            else
+                decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+        } else
+            decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+    }
+
+    private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                          int offC, double[] values) {
+        AMapToData d = decompress();
+        final double[] c = db.values(0);
+        final int nCols = db.getDim(1);
+        final int colOff = _colIndexes.get(0) + offC;
+        for (int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
+            c[offT] += values[d.getIndex(i)];
+
+    }
+
+    // TODO: implement necessary logic in decompress to ensure correctness of getMapToData()
+    @Override
+    public AMapToData getMapToData() {
+        return decompress();
+    }
+
+    private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                             int offC, double[] values) {
+        AMapToData d = decompress();
+        final double[] c = db.values(0);
+        decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, d);
+    }
+
+    private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
+                                                                                    double[] values, AMapToData data) {
+        data.decompressToRange(c, rl, ru, offR, values);
+
+    }
+
+    private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                           double[] values, int nCol) {
+        AMapToData d = decompress();
+        final double[] c = db.values(0);
+        for (int r = rl; r < ru; r++) {
+            final int start = d.getIndex(r) * nCol;
+            final int offStart = (offR + r) * nCol;
+            LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
+                                                                  double[] values, int nCol, int colOut) {
+        AMapToData d = decompress();
+        int off = (rl + offR) * colOut;
+        for (int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
+            final double[] c = db.values(offT);
+            final int rowIndex = d.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
+        AMapToData d = decompress();
+        final int nCol = _colIndexes.size();
+        final int nColU = db.getDim(1);
+        final double[] c = db.values(0);
+        for (int i = rl; i < ru; i++) {
+            final int off = i * nColU;
+            final int rowIndex = d.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                              double[] values, int nCol) {
+        AMapToData d = decompress();
+        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+            final double[] c = db.values(offT);
+            final int off = db.pos(offT) + offC;
+            final int rowIndex = d.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                           SparseBlock sb) {
+        AMapToData d = decompress();
+        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+            final int vr = d.getIndex(r);
+            if (sb.isEmpty(vr))
+                continue;
+            final int apos = sb.pos(vr);
+            final int alen = sb.size(vr) + apos;
+            final int[] aix = sb.indexes(vr);
+            final double[] aval = sb.values(vr);
+            for (int j = apos; j < alen; j++)
+                ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                          double[] values) {
+        decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
+    }
+
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                          double[] values, int nCol) {
+        AMapToData d = decompress();
+        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+            final int rowIndex = d.getIndex(i) * nCol;
+            for (int j = 0; j < nCol; j++)
+                ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
+        AMapToData d = decompress();
+        for (int i = rl; i < ru; i++) {
+            final int vr = d.getIndex(i);
+            if (sb.isEmpty(vr))
+                continue;
+            final int apos = sb.pos(vr);
+            final int alen = sb.size(vr) + apos;
+            final int[] aix = sb.indexes(vr);
+            final double[] aval = sb.values(vr);
+            for (int j = apos; j < alen; j++) {
+                final int rowOut = _colIndexes.get(aix[j]);
+                final double[] c = db.values(rowOut);
+                final int off = db.pos(rowOut);
+                c[off + i] += aval[j];
+            }
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+        AMapToData d = decompress();
+        final int nCol = _colIndexes.size();
+        for (int j = 0; j < nCol; j++) {
+            final int rowOut = _colIndexes.get(j);
+            final double[] c = db.values(rowOut);
+            final int off = db.pos(rowOut);
+            for (int i = rl; i < ru; i++) {
+                final double v = dict[d.getIndex(i) * nCol + j];
+                c[off + i] += v;
+            }
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
+        AMapToData d = decompress();
+        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+        for (int j = 0; j < _colIndexes.size(); j++)
+            sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+        for (int i = 0; i < d.size(); i++) {
+            int di = d.getIndex(i);
+            if (sb.isEmpty(di))
+                continue;
+
+            final int apos = sb.pos(di);
+            final int alen = sb.size(di) + apos;
+            final int[] aix = sb.indexes(di);
+            final double[] aval = sb.values(di);
+
+            for (int j = apos; j < alen; j++) {
+                sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
+            }
+        }
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
+        AMapToData d = decompress();
+        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+        for (int j = 0; j < _colIndexes.size(); j++)
+            sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+        final int nCol = _colIndexes.size();
+        for (int j = 0; j < nCol; j++) {
+            final int rowOut = _colIndexes.get(j);
+            SparseRow r = sbr.get(rowOut);
+
+            for (int i = 0; i < d.size(); i++) {
+                final double v = dict[d.getIndex(i) * nCol + j];
+                r = r.append(i, v);
+            }
+            sbr.set(rowOut, r, false);
+        }
+    }
+
+    @Override
+    public double getIdx(int r, int colIdx) {
+        AMapToData d = decompress();
+        return _dict.getValue(d.getIndex(r), colIdx, _colIndexes.size());
+    }
+
+    @Override
+    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+        AMapToData d = decompress();
+        for (int rix = rl; rix < ru; rix++)
+            c[rix] += preAgg[d.getIndex(rix)];
+    }
+
+    @Override
+    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+        AMapToData d = decompress();
+        for (int i = rl; i < ru; i++)
+            c[i] = builtin.execute(c[i], preAgg[d.getIndex(i)]);
+    }
+
+    @Override
+    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+        AMapToData d = decompress();
+        for (int rix = rl; rix < ru; rix++)
+            c[rix] *= preAgg[d.getIndex(rix)];
+    }
+
+    @Override
+    public int[] getCounts(int[] counts) {
+        return decompress().getCounts(counts);
+    }
+
+    @Override
+    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        if (_colIndexes.size() == 1)
+            leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
+        else
+            lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+    }
+
+    private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+                                                   int cu) {
+        final DenseBlock retV = result.getDenseBlock();
+        final int nColM = matrix.getNumColumns();
+        final int nColRet = result.getNumColumns();
+        final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
+        if (matrix.isEmpty())
+            return;
+        else if (matrix.isInSparseFormat()) {
+            AMapToData d = decompress();
+            if (cl != 0 || cu != d.size())
+                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
+            else
+                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
+        } else if (!matrix.getDenseBlock().isContiguous())
+            lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
+                    cu);
+        else
+            lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                 int rl, int ru) {
+
+        if (retV.isContiguous())
+            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
+        else
+            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+                                                        double[] vals, int rl, int ru) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int alen = sb.size(r) + apos;
+            final int[] aix = sb.indexes(r);
+            final double[] aval = sb.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+
+            for (int i = apos; i < alen; i++)
+                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+                                                           double[] vals, int rl, int ru) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int alen = sb.size(r) + apos;
+            final int[] aix = sb.indexes(r);
+            final double[] aval = sb.values(r);
+            final int offR = r * nColRet;
+            for (int i = apos; i < alen; i++)
+                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                 int rl, int ru, int cl, int cu) {
+        if (retV.isContiguous())
+            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+        else
+            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+                                                        double[] vals, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int aposSkip = sb.posFIndexGTE(r, cl);
+            final int[] aix = sb.indexes(r);
+            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+                continue;
+            final int alen = sb.size(r) + apos;
+            final double[] aval = sb.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            // final int offR = r * nColRet;
+            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+                                                           double[] vals, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int aposSkip = sb.posFIndexGTE(r, cl);
+            final int[] aix = sb.indexes(r);
+            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+                continue;
+            final int alen = sb.size(r) + apos;
+            final double[] aval = sb.values(r);
+            final int offR = r * nColRet;
+            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
+                                                             double[] vals, int rl, int ru, int cl, int cu) {
+        lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                int rl, int ru, int cl, int cu) {
+        if (retV.isContiguous())
+            lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
+                                                                      int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = db.pos(r);
+            final double[] mV = db.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
+                                                       double[] vals, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
+                                                          double[] vals, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
+        }
+    }
+
+    private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+
+        if (matrix.isInSparseFormat())
+            lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final DenseBlock db = result.getDenseBlock();
+        final SparseBlock sb = matrix.getSparseBlock();
+
+        if (cl != 0 || cu != d.size()) {
+            // sub part
+            for (int r = rl; r < ru; r++) {
+                if (sb.isEmpty(r))
+                    continue;
+                final double[] retV = db.values(r);
+                final int pos = db.pos(r);
+                lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
+            }
+        } else {
+            for (int r = rl; r < ru; r++)
+                d.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
+        }
+    }
+
+    private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
+        AMapToData d = decompress();
+        final int apos = sb.pos(r);
+        final int aposSkip = sb.posFIndexGTE(r, cl);
+        final int[] aix = sb.indexes(r);
+        if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+            return;
+        final int alen = sb.size(r) + apos;
+        final double[] aval = sb.values(r);
+        for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+            _dict.multiplyScalar(aval[i], retV, offR, d.getIndex(aix[i]), _colIndexes);
+    }
+
+    private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        if (matrix.getDenseBlock().isContiguous())
+            lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+                                                         int cu) {
+        AMapToData d = decompress();
+        final double[] retV = result.getDenseBlockValues();
+        final int nColM = matrix.getNumColumns();
+        final int nColRet = result.getNumColumns();
+        final double[] mV = matrix.getDenseBlockValues();
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                _dict.multiplyScalar(mV[offL + c], retV, offR, d.getIndex(c), _colIndexes);
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
+                                                            int cu) {
+        AMapToData d = decompress();
+        final double[] retV = result.getDenseBlockValues();
+        final int nColRet = result.getNumColumns();
+        for (int r = rl; r < ru; r++) {
+            final int offL = db.pos(r);
+            final double[] mV = db.values(r);
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                _dict.multiplyScalar(mV[offL + c], retV, offR, d.getIndex(c), _colIndexes);
+        }
+    }
+
+    @Override
+    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        d.preAggregateDense(m, preAgg, rl, ru, cl, cu);
+    }
+
+    @Override
+    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
+        DenseBlock db = that.getDenseBlock();
+        DenseBlock retDB = ret.getDenseBlock();
+        for (int i = rl; i < ru; i++)
+            leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
+    }
+
+    @Override
+    public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
+        if (_dict instanceof IdentityDictionary)
+            identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
+        else
+            defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
+    }
+
+    private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+        final double[] b = right.getDenseBlockValues();
+        final double[] c = ret.getDenseBlockValues();
+        final int jd = right.getNumColumns();
+        final DoubleVector vVec = DoubleVector.zero(SPECIES); // TODO: SPECIES Vector in ColGroupDDC. What do ?
+        final int vLen = SPECIES.length();
+        final int lenJ = cru - crl;
+        final int end = cru - (lenJ % vLen);
+        AMapToData d = decompress();
+        for (int i = rl; i < ru; i++) {
+            int k = d.getIndex(i);
+            final int offOut = i * jd + crl;
+            final double aa = 1;
+            final int k_right = _colIndexes.get(k);
+            vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+        }
+    }
+
+    private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+        AMapToData d = decompress();
+        final double[] a = _dict.getValues();
+        final double[] b = right.getDenseBlockValues();
+        final double[] c = ret.getDenseBlockValues();
+        final int kd = _colIndexes.size();
+        final int jd = right.getNumColumns();
+        final DoubleVector vVec = DoubleVector.zero(SPECIES);
+        final int vLen = SPECIES.length();
+
+        final int blkzI = 32;
+        final int blkzK = 24;
+        final int lenJ = cru - crl;
+        final int end = cru - (lenJ % vLen);
+        for (int bi = rl; bi < ru; bi += blkzI) {
+            final int bie = Math.min(ru, bi + blkzI);
+            for (int bk = 0; bk < kd; bk += blkzK) {
+                final int bke = Math.min(kd, bk + blkzK);
+                for (int i = bi; i < bie; i++) {
+                    int offi = d.getIndex(i) * kd;
+                    final int offOut = i * jd + crl;
+                    for (int k = bk; k < bke; k++) {
+                        final double aa = a[offi + k];
+                        final int k_right = _colIndexes.get(k);
+                        vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+                    }
+                }
+            }
+        }
+    }
+
+    final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
+        vVec = vVec.broadcast(aa);
+        final int offj = k * jd;
+        final int end = endT + offj;
+        for (int j = offj + crl; j < end; j += vLen, offOut += vLen) {
+            DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
+            DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
+            res = vVec.fma(bVec, res);
+            res.intoArray(c, offOut);
+        }
+        for (int j = end; j < cru + offj; j++, offOut++) {
+            double bb = b[j];
+            c[offOut] += bb * aa;
+        }
+    }
+
+    @Override
+    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        if (cl != 0 || cu != d.size()) {
+            throw new NotImplementedException();
+        }
+        d.preAggregateSparse(sb, preAgg, rl, ru);
+    }
+
+    @Override
+    public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
+        AMapToData d = decompress();
+        try {
+
+            d.preAggregateDDC_DDC(that.d, that._dict, ret, that._colIndexes.size());
+        } catch (Exception e) {
+            throw new CompressionException(that.toString(), e);
+        }
+    }
+
+    @Override
+    public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
+        AMapToData d = decompress();
+        d.preAggregateDDC_SDCZ(that.d, that._dict, that._indexes, ret, that._colIndexes.size());
+    }
+
+    @Override
+    public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
+        final AOffsetIterator itThat = that._indexes.getOffsetIterator();
+        final int nCol = that._colIndexes.size();
+        final int finalOff = that._indexes.getOffsetToLast();
+        final double[] v = ret.getValues();
+        AMapToData d = decompress();
+        while (true) {
+            final int to = d.getIndex(itThat.value());
+            that._dict.addToEntry(v, 0, to, nCol);
+            if (itThat.value() == finalOff)
+                break;
+            itThat.next();
+        }
+    }
+
+    @Override
+    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
+        AMapToData d = decompress();
+        d.preAggregateDDC_RLE(that._ptr, that.d, that._dict, ret, that._colIndexes.size());
+    }
+
+    @Override
+    public boolean sameIndexStructure(AColGroupCompressed that) {
+        AMapToData d = decompress();
+        return that instanceof ColGroupDDC && ((ColGroupDDC) that).d == d;
+    }
+
+    @Override
+    public long estimateInMemorySize() {
+        AMapToData d = decompress();
+        long size = super.estimateInMemorySize();
+        size += d.getInMemorySize();
+        return size;
+    }
+
+    @Override
+    public AColGroup scalarOperation(ScalarOperator op) {
+        AMapToData d = decompress();
+        if ((op.fn instanceof Plus || op.fn instanceof Minus)) {
+            final double v0 = op.executeScalar(0);
+            if (v0 == 0)
+                return this;
+            final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
+            return ColGroupDDCFOR.create(_colIndexes, _dict, d, getCachedCounts(), reference);
+        }
+        return create(_colIndexes, _dict.applyScalarOp(op), d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup unaryOperation(UnaryOperator op) {
+        AMapToData d = decompress();
+        return create(_colIndexes, _dict.applyUnaryOp(op), d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
+        IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
+        AMapToData d = decompress();
+        return create(_colIndexes, ret, d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
+        if ((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
+                ((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
+            AMapToData d = decompress();
+            final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
+            return ColGroupDDCFOR.create(_colIndexes, _dict, d, getCachedCounts(), reference);
+        }
+        final IDictionary ret;
+        if (_colIndexes.size() == 1)
+            ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
+        else
+            ret = _dict.binOpRight(op, v, _colIndexes);
+        AMapToData d = decompress();
+        return create(_colIndexes, ret, d, getCachedCounts());
+    }
+
+    // TODO
+    @Override
+    public void write(DataOutput out) throws IOException {
+        super.write(out);
+        AMapToData d = decompress();
+        d.write(out);
+    }
+
+    // TODO
+    public static ColGroupDDC read(DataInput in) throws IOException {
+        IColIndex cols = ColIndexFactory.read(in);
+        IDictionary dict = DictionaryFactory.read(in);
+        AMapToData data = MapToFactory.readIn(in);
+        return new ColGroupDDC(cols, dict, data, null);
+    }
+
+    @Override
+    public long getExactSizeOnDisk() {
+        AMapToData d = decompress();
+        long ret = super.getExactSizeOnDisk();
+        ret += d.getExactSizeOnDisk();
+        return ret;
+    }
+
+    @Override
+    public double getCost(ComputationCostEstimator e, int nRows) {
+        final int nVals = getNumValues();
+        final int nCols = getNumCols();
+        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
+    }
+
+    @Override
+    protected int numRowsToMultiply() {
+        AMapToData d = decompress();
+        return d.size();
+    }
+
+    @Override
+    protected double computeMxx(double c, Builtin builtin) {
+        return _dict.aggregate(c, builtin);
+    }
+
+    @Override
+    protected void computeColMxx(double[] c, Builtin builtin) {
+        _dict.aggregateCols(c, builtin, _colIndexes);
+    }
+
+    @Override
+    public boolean containsValue(double pattern) {
+        return _dict.containsValue(pattern);
+    }
+
+    @Override
+    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
+        if (preAgg != null) {
+            AMapToData d = decompress();
+            return create(colIndexes, preAgg, d, getCachedCounts());
+        } else
+            return null;
+    }
+
+    @Override
+    public AColGroup sliceRows(int rl, int ru) {
+        try {
+            AMapToData d = decompress();
+            return ColGroupDDC.create(_colIndexes, _dict, d.slice(rl, ru), null);
+        } catch (Exception e) {
+            throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
+        }
+    }
+
+    @Override
+    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+        AMapToData d = decompress();
+        return create(colIndexes, newDictionary, d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup append(AColGroup g) {
+        if (g instanceof ColGroupDDC) {
+            if (g.getColIndices().equals(_colIndexes)) {
+
+                ColGroupDDC gDDC = (ColGroupDDC) g;
+                if (gDDC._dict.equals(_dict)) {
+                    AMapToData d = decompress();
+                    AMapToData nd = d.append(gDDC.d);
+                    return create(_colIndexes, _dict, nd, null);
+                } else
+                    LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+            } else
+                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
+        } else
+            LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
+        return null;
+    }
+
+    @Override
+    public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
+        for (int i = 1; i < g.length; i++) {
+            if (!_colIndexes.equals(g[i]._colIndexes)) {
+                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
+                return null;
+            }
+
+            if (!(g[i] instanceof ColGroupDDC)) {
+                LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
+                return null;
+            }
+
+            final ColGroupDDC gDDC = (ColGroupDDC) g[i];
+            if (!gDDC._dict.equals(_dict)) {
+                LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+                return null;
+            }
+        }
+        AMapToData d = decompress();
+        AMapToData nd = d.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
+        return create(_colIndexes, _dict, nd, null);
+    }
+
+    @Override
+    public ICLAScheme getCompressionScheme() {
+        throw new NotImplementedException();
+    }
+
+    @Override
+    public AColGroup recompress() {
+        throw new NotImplementedException();
+    }
+
+    @Override
+    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
+        try {
+
+            IEncode enc = getEncoding();
+            AMapToData d = decompress();
+            EstimationFactors ef = new EstimationFactors(d.getUnique(), d.size(), d.size(),
+                    _dict.getSparsity());
+            return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
+        } catch (Exception e) {
+            throw new DMLCompressionException(this.toString(), e);
+        }
+    }
+
+    @Override
+    public IEncode getEncoding() {
+        AMapToData d = decompress();
+        return EncodingFactory.create(d);
+    }
+
+    @Override
+    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
+        AMapToData d = decompress();
+        return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), d, getCachedCounts());
+    }
+
+    @Override
+    public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+        final SparseBlock sb = selection.getSparseBlock();
+        final SparseBlock retB = ret.getSparseBlock();
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int sPos = sb.pos(r);
+            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+            decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+        }
+    }
+
+    @Override
+    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+        // morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
+        final SparseBlock sb = selection.getSparseBlock();
+        final DenseBlock retB = ret.getDenseBlock();
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int sPos = sb.pos(r);
+            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+            decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+        }
+    }
+
+    private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
+                                                          int cu) {
+        IdentityDictionary a = (IdentityDictionary) _dict;
+        if (_colIndexes instanceof RangeIndex)
+            leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
+        else {
+
+            pos += cl; // left side matrix position offset.
+            if (a.withEmpty()) {
+                final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+                for (int rc = cl; rc < cu; rc++, pos++) {
+                    final int idx = _data.getIndex(rc);
+                    if (idx != nVal)
+                        values2[pos2 + _colIndexes.get(idx)] += values[pos];
+                }
+            } else {
+                AMapToData d = decompress();
+                for (int rc = cl; rc < cu; rc++, pos++)
+                    values2[pos2 + _colIndexes.get(d.getIndex(rc))] += values[pos];
+            }
+        }
+    }
+
+    private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
+                                                                    int cl, int cu) {
+        IdentityDictionary a = (IdentityDictionary) _dict;
+
+        final int firstCol = pos2 + _colIndexes.get(0);
+        pos += cl; // left side matrix position offset.
+        AMapToData d = decompress();
+        if (a.withEmpty()) {
+            final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+            for (int rc = cl; rc < cu; rc++, pos++) {
+                final int idx = d.getIndex(rc);
+                if (idx != nVal)
+                    values2[firstCol + idx] += values[pos];
+            }
+        } else {
+            for (int rc = cl; rc < cu; rc++, pos++)
+                values2[firstCol + d.getIndex(rc)] += values[pos];
+        }
+    }
+
+    @Override
+    public AColGroup morph(CompressionType ct, int nRow) {
+        // return this;
+        if (ct == getCompType())
+            return this;
+        else if (ct == CompressionType.SDC) {
+            // return this;
+            AMapToData d = decompress();
+            int[] counts = getCounts();
+            int maxId = maxIndex(counts);
+            double[] def = _dict.getRow(maxId, _colIndexes.size());
+
+            int offsetSize = nRow - counts[maxId];
+            int[] offsets = new int[offsetSize];
+            AMapToData reducedData = MapToFactory.create(offsetSize, d.getUnique());
+            int o = 0;
+            for (int i = 0; i < nRow; i++) {
+                int v = d.getIndex(i);
+                if (v != maxId) {
+                    offsets[o] = i;
+                    reducedData.set(o, v);
+                    o++;
+                }
+            }
+
+            return ColGroupSDC.create(_colIndexes, d.size(), _dict, def, OffsetFactory.createOffset(offsets),
+                    reducedData, null);
+        } else if (ct == CompressionType.CONST) {
+            // if(1 < getNumValues()) {
+            String thisS = this.toString();
+            if (thisS.length() > 10000)
+                thisS = thisS.substring(0, 10000) + "...";
+            LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
+            return this;
+            // }
+        } else if (ct == CompressionType.DDCFOR)
+            return this; // it does not make sense to change to FOR.
+        else
+            return super.morph(ct, nRow);
+    }
+
+    private static int maxIndex(int[] counts) {
+        int id = 0;
+        for (int i = 1; i < counts.length; i++) {
+            if (counts[i] > counts[id]) {
+                id = i;
+            }
+        }
+        return id;
+    }
+
+    @Override
+    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
+        final IDictionary combined = combineDictionaries(nCol, right);
+        final IColIndex combinedColIndex = combineColIndexes(nCol, right);
+        AMapToData d = decompress();
+        return new ColGroupDDC(combinedColIndex, combined, d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
+        IDictionary b = ((ColGroupDDC) right).getDictionary();
+        IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
+        IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
+        AMapToData d = decompress();
+        return new ColGroupDDC(combinedColIndex, combined, d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
+        AMapToData d = decompress();
+        AMapToData[] maps = d.splitReshapeDDC(multiplier);
+        AColGroup[] res = new AColGroup[multiplier];
+        for (int i = 0; i < multiplier; i++) {
+            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+            res[i] = create(ci, _dict, maps[i], null);
+        }
+        return res;
+    }
+
+    @Override
+    public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
+            throws Exception {
+        AMapToData d = decompress();
+        AMapToData[] maps = d.splitReshapeDDCPushDown(multiplier, pool);
+        AColGroup[] res = new AColGroup[multiplier];
+        for (int i = 0; i < multiplier; i++) {
+            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+            res[i] = create(ci, _dict, maps[i], null);
+        }
+        return res;
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append(super.toString());
+        sb.append(String.format("\n%15s", "Data: "));
+        AMapToData d = decompress();
+        sb.append(d);
+        return sb.toString();
+    }
+
+    @Override
+    protected boolean allowShallowIdentityRightMult() {
+        return true;
+    }
+}

From 007611c60c2bfadf2d4ed13e89c1eb01b734b814 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Thu, 8 Jan 2026 15:48:50 +0100
Subject: [PATCH 02/18] Idea:  * - DDCLZW stores the mapping vector exclusively
 in compressed form.  * - No persistent MapToData cache is maintained.  * -
 Sequential operations decode on-the-fly, while operations requiring random
 access explicitly materialize and fall back to DDC.  */

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 1120 +----------------
 1 file changed, 31 insertions(+), 1089 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 01a87aafd7b..d83120e1345 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -74,24 +74,27 @@
 /**
  * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC) whose
  * mapping vector is additionally lzw compressed.
- * <p>
  * Idea:
- * - The dictionary (_dict) is stored exactly like a normal DDC group (in the super class APreAgg).
- * - The mapping vector (row -> dictionary-id) is NOT stored directly as an AMapToData field permanently.
- * - Instead, the mapping is stored in compressed form as a byte[] (_dataLZW).
- * - When an operation needs the mapping, it is decoded on-demand into a transient cached AMapToData (_data).
+ * - DDCLZW stores the mapping vector exclusively in compressed form.
+ * - No persistent MapToData cache is maintained.
+ * - Sequential operations decode on-the-fly, while operations requiring random access explicitly materialize and fall back to DDC.
  */
 public class ColGroupDDCLZW extends APreAgg implements IMapToDataGroup {
     private static final long serialVersionUID = -5769772089913918987L;
 
-    private transient volatile AMapToData _data; // Decoded mapping cache
-    private final int[] _dataLZW; // LZW compressed representation of the mapping (TODO optimize!)
+    private final int[] _dataLZW; // LZW compressed representation of the mapping
 
     private final int _nRows; // Number of rows in the mapping vector
     private final int _nUnique; // Number of unique values in the mapping vector
 
-    // Compresses a decoded mapping (AMapToData) into an LZW-compressed byte array.
-    private static int[] compress(final AMapToData data) {
+
+    // Builds a packed 64-bit key for (prefixCode(w), nextSymbol(k)) pairs used in the LZW dictionary. (TODO)
+    private static long packKey(int prefixCode, int nextSymbol) {
+        return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
+    }
+
+    // Compresses a mapping (AMapToData) into an LZW-compressed byte/integer/? array. (TODO)
+    private static int[] compress(AMapToData data) {
         if (data == null)
             throw new IllegalArgumentException("Invalid input: data is null");
 
@@ -145,35 +148,18 @@ private static int[] compress(final AMapToData data) {
         return out.toIntArray();
     }
 
-    /**
-     * Builds a packed 64-bit key for (prefixCode, nextSymbol) pairs used in the LZW dictionary.
-     * Upper 32 bits: prefixCode (current pattern code w)
-     * Lower 32 bits: nextSymbol (k)
-     */
-    private static long packKey(int prefixCode, int nextSymbol) {
-        return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
-    }
-
-    // Lazily decode the mapping from _dataLZW into an AMapToData instance and cache it in _data.
-    private AMapToData decompress() {
-        /*AMapToData d = _data;
-        if (d == null) {
-            synchronized (this) {
-                d = _data;
-                if (d == null) {
-                    d = decode(_dataLZW, _nRows, _nUnique);
-                    _data = d;
-                }
-            }
-        }*/
+    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form. (TODO)
+    private AMapToData decompress(int[] _dataLZW) {
+        AMapToData d = null;
         return null;
     }
 
+
     // Build Constructor: Used when creating a new DDCLZW instance during compression/build time. (TODO)
     private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
         super(colIndexes, dict, cachedCounts);
 
-        // Derive metadata
+        // Derive metadadata
         _nRows = data.size();
         _nUnique = dict.getNumberOfValues(colIndexes.size());
 
@@ -181,12 +167,10 @@ private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data,
         _dataLZW = compress(data);
 
         if (CompressedMatrixBlock.debug) {
-            _data = data;
             if (getNumValues() == 0)
                 throw new DMLCompressionException("Invalid construction with empty dictionary");
-            if (data.size() == 0)
+            if (_nRows == 0)
                 throw new DMLCompressionException("Invalid length of the data. is zero");
-
             if (data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
                 throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
                         + dict.getNumberOfValues(colIndexes.size()));
@@ -194,8 +178,6 @@ private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data,
             if (c.length != dict.getNumberOfValues(colIndexes.size()))
                 throw new DMLCompressionException("Invalid DDC Construction");
             data.verify();
-        } else {
-            _data = null;
         }
     }
 
@@ -204,28 +186,28 @@ private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, int[] dataLZW, in
         super(colIndexes, dict, cachedCounts);
 
         _dataLZW = dataLZW;
-        _data = null;
         _nRows = nRows;
         _nUnique = nUnique;
 
         if (CompressedMatrixBlock.debug) {
-            final AMapToData d = decompress();
             if (getNumValues() == 0)
                 throw new DMLCompressionException("Invalid construction with empty dictionary");
-            if (d.size() == 0)
+            if (_nRows <= 0)
                 throw new DMLCompressionException("Invalid length of the data. is zero");
-
-            if (d.getUnique() != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid map to dict Map has:" + d.getUnique() + " while dict has "
+            if (_nUnique <= dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid map to dict Map has:" + _nUnique + " while dict has "
                         + dict.getNumberOfValues(colIndexes.size()));
             int[] c = getCounts();
             if (c.length != dict.getNumberOfValues(colIndexes.size()))
                 throw new DMLCompressionException("Invalid DDC Construction");
-            d.verify();
+
+            // Optional: validate that decoding works (expensive)
+            // AMapToData decoded = decode(_dataLZW, _nRows, _nUnique);
+            // decoded.verify();
         }
     }
 
-    // Factory method for creating a column group.
+    // Factory method for creating a column group. (AColGroup g = ColGroupDDCLZW.create(...);)
     public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
         if (dict == null)
             return new ColGroupEmpty(colIndexes);
@@ -235,1049 +217,9 @@ else if (data.getUnique() == 1)
             return new ColGroupDDCLZW(colIndexes, dict, data, cachedCounts);
     }
 
-    public CompressionType getCompType() {
-        return CompressionType.DDCLZW; // TODO add new compression type DDCLZW
-    }
-
-    public ColGroupType getColGroupType() {
-        return ColGroupType.DDCLZW; // TODO add new ColGroup type DDCLZW
-    }
-
-    @Override
-    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                          SparseBlock sb) {
-        AMapToData d = decompress();
-        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-            final int vr = d.getIndex(r);
-            if (sb.isEmpty(vr))
-                continue;
-            final double[] c = db.values(offT);
-            final int off = db.pos(offT) + offC;
-            _colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                         double[] values) {
-        final int idxSize = _colIndexes.size();
-        if (db.isContiguous()) {
-            final int nColOut = db.getDim(1);
-            if (idxSize == 1 && nColOut == 1)
-                decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
-            else if (idxSize == 1)
-                decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
-            else if (idxSize == nColOut) // offC == 0 implied
-                decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
-            else if (offC == 0 && offR == 0)
-                decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
-            else if (offC == 0)
-                decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
-            else
-                decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-        } else
-            decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-    }
-
-    private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                          int offC, double[] values) {
-        AMapToData d = decompress();
-        final double[] c = db.values(0);
-        final int nCols = db.getDim(1);
-        final int colOff = _colIndexes.get(0) + offC;
-        for (int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
-            c[offT] += values[d.getIndex(i)];
-
-    }
-
-    // TODO: implement necessary logic in decompress to ensure correctness of getMapToData()
-    @Override
-    public AMapToData getMapToData() {
-        return decompress();
-    }
-
-    private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                             int offC, double[] values) {
-        AMapToData d = decompress();
-        final double[] c = db.values(0);
-        decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, d);
-    }
-
-    private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
-                                                                                    double[] values, AMapToData data) {
-        data.decompressToRange(c, rl, ru, offR, values);
-
-    }
-
-    private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                           double[] values, int nCol) {
-        AMapToData d = decompress();
-        final double[] c = db.values(0);
-        for (int r = rl; r < ru; r++) {
-            final int start = d.getIndex(r) * nCol;
-            final int offStart = (offR + r) * nCol;
-            LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
-                                                                  double[] values, int nCol, int colOut) {
-        AMapToData d = decompress();
-        int off = (rl + offR) * colOut;
-        for (int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
-            final double[] c = db.values(offT);
-            final int rowIndex = d.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
-        AMapToData d = decompress();
-        final int nCol = _colIndexes.size();
-        final int nColU = db.getDim(1);
-        final double[] c = db.values(0);
-        for (int i = rl; i < ru; i++) {
-            final int off = i * nColU;
-            final int rowIndex = d.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                              double[] values, int nCol) {
-        AMapToData d = decompress();
-        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-            final double[] c = db.values(offT);
-            final int off = db.pos(offT) + offC;
-            final int rowIndex = d.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                           SparseBlock sb) {
-        AMapToData d = decompress();
-        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-            final int vr = d.getIndex(r);
-            if (sb.isEmpty(vr))
-                continue;
-            final int apos = sb.pos(vr);
-            final int alen = sb.size(vr) + apos;
-            final int[] aix = sb.indexes(vr);
-            final double[] aval = sb.values(vr);
-            for (int j = apos; j < alen; j++)
-                ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                          double[] values) {
-        decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
-    }
-
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                          double[] values, int nCol) {
-        AMapToData d = decompress();
-        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-            final int rowIndex = d.getIndex(i) * nCol;
-            for (int j = 0; j < nCol; j++)
-                ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
-        AMapToData d = decompress();
-        for (int i = rl; i < ru; i++) {
-            final int vr = d.getIndex(i);
-            if (sb.isEmpty(vr))
-                continue;
-            final int apos = sb.pos(vr);
-            final int alen = sb.size(vr) + apos;
-            final int[] aix = sb.indexes(vr);
-            final double[] aval = sb.values(vr);
-            for (int j = apos; j < alen; j++) {
-                final int rowOut = _colIndexes.get(aix[j]);
-                final double[] c = db.values(rowOut);
-                final int off = db.pos(rowOut);
-                c[off + i] += aval[j];
-            }
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
-        AMapToData d = decompress();
-        final int nCol = _colIndexes.size();
-        for (int j = 0; j < nCol; j++) {
-            final int rowOut = _colIndexes.get(j);
-            final double[] c = db.values(rowOut);
-            final int off = db.pos(rowOut);
-            for (int i = rl; i < ru; i++) {
-                final double v = dict[d.getIndex(i) * nCol + j];
-                c[off + i] += v;
-            }
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
-        AMapToData d = decompress();
-        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-        for (int j = 0; j < _colIndexes.size(); j++)
-            sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-        for (int i = 0; i < d.size(); i++) {
-            int di = d.getIndex(i);
-            if (sb.isEmpty(di))
-                continue;
-
-            final int apos = sb.pos(di);
-            final int alen = sb.size(di) + apos;
-            final int[] aix = sb.indexes(di);
-            final double[] aval = sb.values(di);
-
-            for (int j = apos; j < alen; j++) {
-                sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
-            }
-        }
-
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
-        AMapToData d = decompress();
-        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-        for (int j = 0; j < _colIndexes.size(); j++)
-            sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-        final int nCol = _colIndexes.size();
-        for (int j = 0; j < nCol; j++) {
-            final int rowOut = _colIndexes.get(j);
-            SparseRow r = sbr.get(rowOut);
-
-            for (int i = 0; i < d.size(); i++) {
-                final double v = dict[d.getIndex(i) * nCol + j];
-                r = r.append(i, v);
-            }
-            sbr.set(rowOut, r, false);
-        }
-    }
-
-    @Override
-    public double getIdx(int r, int colIdx) {
-        AMapToData d = decompress();
-        return _dict.getValue(d.getIndex(r), colIdx, _colIndexes.size());
-    }
-
-    @Override
-    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
-        AMapToData d = decompress();
-        for (int rix = rl; rix < ru; rix++)
-            c[rix] += preAgg[d.getIndex(rix)];
-    }
-
-    @Override
-    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
-        AMapToData d = decompress();
-        for (int i = rl; i < ru; i++)
-            c[i] = builtin.execute(c[i], preAgg[d.getIndex(i)]);
-    }
-
-    @Override
-    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
-        AMapToData d = decompress();
-        for (int rix = rl; rix < ru; rix++)
-            c[rix] *= preAgg[d.getIndex(rix)];
-    }
-
-    @Override
-    public int[] getCounts(int[] counts) {
-        return decompress().getCounts(counts);
-    }
-
-    @Override
-    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        if (_colIndexes.size() == 1)
-            leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
-        else
-            lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-    }
-
-    private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-                                                   int cu) {
-        final DenseBlock retV = result.getDenseBlock();
-        final int nColM = matrix.getNumColumns();
-        final int nColRet = result.getNumColumns();
-        final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
-        if (matrix.isEmpty())
-            return;
-        else if (matrix.isInSparseFormat()) {
-            AMapToData d = decompress();
-            if (cl != 0 || cu != d.size())
-                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-            else
-                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
-        } else if (!matrix.getDenseBlock().isContiguous())
-            lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
-                    cu);
-        else
-            lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                 int rl, int ru) {
-
-        if (retV.isContiguous())
-            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
-        else
-            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-                                                        double[] vals, int rl, int ru) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int alen = sb.size(r) + apos;
-            final int[] aix = sb.indexes(r);
-            final double[] aval = sb.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-
-            for (int i = apos; i < alen; i++)
-                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-                                                           double[] vals, int rl, int ru) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int alen = sb.size(r) + apos;
-            final int[] aix = sb.indexes(r);
-            final double[] aval = sb.values(r);
-            final int offR = r * nColRet;
-            for (int i = apos; i < alen; i++)
-                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                 int rl, int ru, int cl, int cu) {
-        if (retV.isContiguous())
-            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-        else
-            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-                                                        double[] vals, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int aposSkip = sb.posFIndexGTE(r, cl);
-            final int[] aix = sb.indexes(r);
-            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-                continue;
-            final int alen = sb.size(r) + apos;
-            final double[] aval = sb.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            // final int offR = r * nColRet;
-            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-                                                           double[] vals, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int aposSkip = sb.posFIndexGTE(r, cl);
-            final int[] aix = sb.indexes(r);
-            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-                continue;
-            final int alen = sb.size(r) + apos;
-            final double[] aval = sb.values(r);
-            final int offR = r * nColRet;
-            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
-                                                             double[] vals, int rl, int ru, int cl, int cu) {
-        lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                int rl, int ru, int cl, int cu) {
-        if (retV.isContiguous())
-            lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
-                                                                      int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = db.pos(r);
-            final double[] mV = db.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
-                                                       double[] vals, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
-                                                          double[] vals, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
-        }
-    }
-
-    private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-
-        if (matrix.isInSparseFormat())
-            lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final DenseBlock db = result.getDenseBlock();
-        final SparseBlock sb = matrix.getSparseBlock();
-
-        if (cl != 0 || cu != d.size()) {
-            // sub part
-            for (int r = rl; r < ru; r++) {
-                if (sb.isEmpty(r))
-                    continue;
-                final double[] retV = db.values(r);
-                final int pos = db.pos(r);
-                lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
-            }
-        } else {
-            for (int r = rl; r < ru; r++)
-                d.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
-        }
-    }
-
-    private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
-        AMapToData d = decompress();
-        final int apos = sb.pos(r);
-        final int aposSkip = sb.posFIndexGTE(r, cl);
-        final int[] aix = sb.indexes(r);
-        if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-            return;
-        final int alen = sb.size(r) + apos;
-        final double[] aval = sb.values(r);
-        for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-            _dict.multiplyScalar(aval[i], retV, offR, d.getIndex(aix[i]), _colIndexes);
-    }
-
-    private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        if (matrix.getDenseBlock().isContiguous())
-            lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-                                                         int cu) {
-        AMapToData d = decompress();
-        final double[] retV = result.getDenseBlockValues();
-        final int nColM = matrix.getNumColumns();
-        final int nColRet = result.getNumColumns();
-        final double[] mV = matrix.getDenseBlockValues();
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                _dict.multiplyScalar(mV[offL + c], retV, offR, d.getIndex(c), _colIndexes);
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
-                                                            int cu) {
-        AMapToData d = decompress();
-        final double[] retV = result.getDenseBlockValues();
-        final int nColRet = result.getNumColumns();
-        for (int r = rl; r < ru; r++) {
-            final int offL = db.pos(r);
-            final double[] mV = db.values(r);
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                _dict.multiplyScalar(mV[offL + c], retV, offR, d.getIndex(c), _colIndexes);
-        }
-    }
-
-    @Override
-    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        d.preAggregateDense(m, preAgg, rl, ru, cl, cu);
-    }
-
-    @Override
-    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
-        DenseBlock db = that.getDenseBlock();
-        DenseBlock retDB = ret.getDenseBlock();
-        for (int i = rl; i < ru; i++)
-            leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
-    }
-
-    @Override
-    public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
-        if (_dict instanceof IdentityDictionary)
-            identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
-        else
-            defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
-    }
-
-    private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-        final double[] b = right.getDenseBlockValues();
-        final double[] c = ret.getDenseBlockValues();
-        final int jd = right.getNumColumns();
-        final DoubleVector vVec = DoubleVector.zero(SPECIES); // TODO: SPECIES Vector in ColGroupDDC. What do ?
-        final int vLen = SPECIES.length();
-        final int lenJ = cru - crl;
-        final int end = cru - (lenJ % vLen);
-        AMapToData d = decompress();
-        for (int i = rl; i < ru; i++) {
-            int k = d.getIndex(i);
-            final int offOut = i * jd + crl;
-            final double aa = 1;
-            final int k_right = _colIndexes.get(k);
-            vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-        }
-    }
-
-    private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-        AMapToData d = decompress();
-        final double[] a = _dict.getValues();
-        final double[] b = right.getDenseBlockValues();
-        final double[] c = ret.getDenseBlockValues();
-        final int kd = _colIndexes.size();
-        final int jd = right.getNumColumns();
-        final DoubleVector vVec = DoubleVector.zero(SPECIES);
-        final int vLen = SPECIES.length();
-
-        final int blkzI = 32;
-        final int blkzK = 24;
-        final int lenJ = cru - crl;
-        final int end = cru - (lenJ % vLen);
-        for (int bi = rl; bi < ru; bi += blkzI) {
-            final int bie = Math.min(ru, bi + blkzI);
-            for (int bk = 0; bk < kd; bk += blkzK) {
-                final int bke = Math.min(kd, bk + blkzK);
-                for (int i = bi; i < bie; i++) {
-                    int offi = d.getIndex(i) * kd;
-                    final int offOut = i * jd + crl;
-                    for (int k = bk; k < bke; k++) {
-                        final double aa = a[offi + k];
-                        final int k_right = _colIndexes.get(k);
-                        vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-                    }
-                }
-            }
-        }
-    }
-
-    final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
-        vVec = vVec.broadcast(aa);
-        final int offj = k * jd;
-        final int end = endT + offj;
-        for (int j = offj + crl; j < end; j += vLen, offOut += vLen) {
-            DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
-            DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
-            res = vVec.fma(bVec, res);
-            res.intoArray(c, offOut);
-        }
-        for (int j = end; j < cru + offj; j++, offOut++) {
-            double bb = b[j];
-            c[offOut] += bb * aa;
-        }
-    }
-
-    @Override
-    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        if (cl != 0 || cu != d.size()) {
-            throw new NotImplementedException();
-        }
-        d.preAggregateSparse(sb, preAgg, rl, ru);
-    }
-
-    @Override
-    public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
-        AMapToData d = decompress();
-        try {
-
-            d.preAggregateDDC_DDC(that.d, that._dict, ret, that._colIndexes.size());
-        } catch (Exception e) {
-            throw new CompressionException(that.toString(), e);
-        }
-    }
-
-    @Override
-    public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
-        AMapToData d = decompress();
-        d.preAggregateDDC_SDCZ(that.d, that._dict, that._indexes, ret, that._colIndexes.size());
-    }
-
-    @Override
-    public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
-        final AOffsetIterator itThat = that._indexes.getOffsetIterator();
-        final int nCol = that._colIndexes.size();
-        final int finalOff = that._indexes.getOffsetToLast();
-        final double[] v = ret.getValues();
-        AMapToData d = decompress();
-        while (true) {
-            final int to = d.getIndex(itThat.value());
-            that._dict.addToEntry(v, 0, to, nCol);
-            if (itThat.value() == finalOff)
-                break;
-            itThat.next();
-        }
-    }
-
-    @Override
-    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
-        AMapToData d = decompress();
-        d.preAggregateDDC_RLE(that._ptr, that.d, that._dict, ret, that._colIndexes.size());
-    }
-
-    @Override
-    public boolean sameIndexStructure(AColGroupCompressed that) {
-        AMapToData d = decompress();
-        return that instanceof ColGroupDDC && ((ColGroupDDC) that).d == d;
-    }
-
-    @Override
-    public long estimateInMemorySize() {
-        AMapToData d = decompress();
-        long size = super.estimateInMemorySize();
-        size += d.getInMemorySize();
-        return size;
-    }
-
-    @Override
-    public AColGroup scalarOperation(ScalarOperator op) {
-        AMapToData d = decompress();
-        if ((op.fn instanceof Plus || op.fn instanceof Minus)) {
-            final double v0 = op.executeScalar(0);
-            if (v0 == 0)
-                return this;
-            final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
-            return ColGroupDDCFOR.create(_colIndexes, _dict, d, getCachedCounts(), reference);
-        }
-        return create(_colIndexes, _dict.applyScalarOp(op), d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup unaryOperation(UnaryOperator op) {
-        AMapToData d = decompress();
-        return create(_colIndexes, _dict.applyUnaryOp(op), d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
-        IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
-        AMapToData d = decompress();
-        return create(_colIndexes, ret, d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
-        if ((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
-                ((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
-            AMapToData d = decompress();
-            final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
-            return ColGroupDDCFOR.create(_colIndexes, _dict, d, getCachedCounts(), reference);
-        }
-        final IDictionary ret;
-        if (_colIndexes.size() == 1)
-            ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
-        else
-            ret = _dict.binOpRight(op, v, _colIndexes);
-        AMapToData d = decompress();
-        return create(_colIndexes, ret, d, getCachedCounts());
-    }
-
-    // TODO
-    @Override
-    public void write(DataOutput out) throws IOException {
-        super.write(out);
-        AMapToData d = decompress();
-        d.write(out);
-    }
-
-    // TODO
-    public static ColGroupDDC read(DataInput in) throws IOException {
-        IColIndex cols = ColIndexFactory.read(in);
-        IDictionary dict = DictionaryFactory.read(in);
-        AMapToData data = MapToFactory.readIn(in);
-        return new ColGroupDDC(cols, dict, data, null);
-    }
-
-    @Override
-    public long getExactSizeOnDisk() {
-        AMapToData d = decompress();
-        long ret = super.getExactSizeOnDisk();
-        ret += d.getExactSizeOnDisk();
-        return ret;
-    }
-
-    @Override
-    public double getCost(ComputationCostEstimator e, int nRows) {
-        final int nVals = getNumValues();
-        final int nCols = getNumCols();
-        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
-    }
-
-    @Override
-    protected int numRowsToMultiply() {
-        AMapToData d = decompress();
-        return d.size();
-    }
-
-    @Override
-    protected double computeMxx(double c, Builtin builtin) {
-        return _dict.aggregate(c, builtin);
-    }
-
-    @Override
-    protected void computeColMxx(double[] c, Builtin builtin) {
-        _dict.aggregateCols(c, builtin, _colIndexes);
-    }
-
-    @Override
-    public boolean containsValue(double pattern) {
-        return _dict.containsValue(pattern);
-    }
-
-    @Override
-    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
-        if (preAgg != null) {
-            AMapToData d = decompress();
-            return create(colIndexes, preAgg, d, getCachedCounts());
-        } else
-            return null;
-    }
-
-    @Override
-    public AColGroup sliceRows(int rl, int ru) {
-        try {
-            AMapToData d = decompress();
-            return ColGroupDDC.create(_colIndexes, _dict, d.slice(rl, ru), null);
-        } catch (Exception e) {
-            throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
-        }
-    }
-
-    @Override
-    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
-        AMapToData d = decompress();
-        return create(colIndexes, newDictionary, d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup append(AColGroup g) {
-        if (g instanceof ColGroupDDC) {
-            if (g.getColIndices().equals(_colIndexes)) {
-
-                ColGroupDDC gDDC = (ColGroupDDC) g;
-                if (gDDC._dict.equals(_dict)) {
-                    AMapToData d = decompress();
-                    AMapToData nd = d.append(gDDC.d);
-                    return create(_colIndexes, _dict, nd, null);
-                } else
-                    LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-            } else
-                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
-        } else
-            LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
-        return null;
-    }
-
-    @Override
-    public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
-        for (int i = 1; i < g.length; i++) {
-            if (!_colIndexes.equals(g[i]._colIndexes)) {
-                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
-                return null;
-            }
-
-            if (!(g[i] instanceof ColGroupDDC)) {
-                LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
-                return null;
-            }
-
-            final ColGroupDDC gDDC = (ColGroupDDC) g[i];
-            if (!gDDC._dict.equals(_dict)) {
-                LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-                return null;
-            }
-        }
-        AMapToData d = decompress();
-        AMapToData nd = d.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
-        return create(_colIndexes, _dict, nd, null);
-    }
-
-    @Override
-    public ICLAScheme getCompressionScheme() {
-        throw new NotImplementedException();
-    }
-
-    @Override
-    public AColGroup recompress() {
-        throw new NotImplementedException();
-    }
-
-    @Override
-    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
-        try {
-
-            IEncode enc = getEncoding();
-            AMapToData d = decompress();
-            EstimationFactors ef = new EstimationFactors(d.getUnique(), d.size(), d.size(),
-                    _dict.getSparsity());
-            return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
-        } catch (Exception e) {
-            throw new DMLCompressionException(this.toString(), e);
-        }
-    }
-
-    @Override
-    public IEncode getEncoding() {
-        AMapToData d = decompress();
-        return EncodingFactory.create(d);
-    }
-
-    @Override
-    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
-        AMapToData d = decompress();
-        return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), d, getCachedCounts());
-    }
-
-    @Override
-    public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-        final SparseBlock sb = selection.getSparseBlock();
-        final SparseBlock retB = ret.getSparseBlock();
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int sPos = sb.pos(r);
-            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-            decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-        }
-    }
-
-    @Override
-    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-        // morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
-        final SparseBlock sb = selection.getSparseBlock();
-        final DenseBlock retB = ret.getDenseBlock();
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int sPos = sb.pos(r);
-            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-            decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-        }
-    }
-
-    private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
-                                                          int cu) {
-        IdentityDictionary a = (IdentityDictionary) _dict;
-        if (_colIndexes instanceof RangeIndex)
-            leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
-        else {
-
-            pos += cl; // left side matrix position offset.
-            if (a.withEmpty()) {
-                final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-                for (int rc = cl; rc < cu; rc++, pos++) {
-                    final int idx = _data.getIndex(rc);
-                    if (idx != nVal)
-                        values2[pos2 + _colIndexes.get(idx)] += values[pos];
-                }
-            } else {
-                AMapToData d = decompress();
-                for (int rc = cl; rc < cu; rc++, pos++)
-                    values2[pos2 + _colIndexes.get(d.getIndex(rc))] += values[pos];
-            }
-        }
-    }
-
-    private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
-                                                                    int cl, int cu) {
-        IdentityDictionary a = (IdentityDictionary) _dict;
-
-        final int firstCol = pos2 + _colIndexes.get(0);
-        pos += cl; // left side matrix position offset.
-        AMapToData d = decompress();
-        if (a.withEmpty()) {
-            final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-            for (int rc = cl; rc < cu; rc++, pos++) {
-                final int idx = d.getIndex(rc);
-                if (idx != nVal)
-                    values2[firstCol + idx] += values[pos];
-            }
-        } else {
-            for (int rc = cl; rc < cu; rc++, pos++)
-                values2[firstCol + d.getIndex(rc)] += values[pos];
-        }
-    }
-
-    @Override
-    public AColGroup morph(CompressionType ct, int nRow) {
-        // return this;
-        if (ct == getCompType())
-            return this;
-        else if (ct == CompressionType.SDC) {
-            // return this;
-            AMapToData d = decompress();
-            int[] counts = getCounts();
-            int maxId = maxIndex(counts);
-            double[] def = _dict.getRow(maxId, _colIndexes.size());
-
-            int offsetSize = nRow - counts[maxId];
-            int[] offsets = new int[offsetSize];
-            AMapToData reducedData = MapToFactory.create(offsetSize, d.getUnique());
-            int o = 0;
-            for (int i = 0; i < nRow; i++) {
-                int v = d.getIndex(i);
-                if (v != maxId) {
-                    offsets[o] = i;
-                    reducedData.set(o, v);
-                    o++;
-                }
-            }
-
-            return ColGroupSDC.create(_colIndexes, d.size(), _dict, def, OffsetFactory.createOffset(offsets),
-                    reducedData, null);
-        } else if (ct == CompressionType.CONST) {
-            // if(1 < getNumValues()) {
-            String thisS = this.toString();
-            if (thisS.length() > 10000)
-                thisS = thisS.substring(0, 10000) + "...";
-            LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
-            return this;
-            // }
-        } else if (ct == CompressionType.DDCFOR)
-            return this; // it does not make sense to change to FOR.
-        else
-            return super.morph(ct, nRow);
-    }
-
-    private static int maxIndex(int[] counts) {
-        int id = 0;
-        for (int i = 1; i < counts.length; i++) {
-            if (counts[i] > counts[id]) {
-                id = i;
-            }
-        }
-        return id;
-    }
-
-    @Override
-    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
-        final IDictionary combined = combineDictionaries(nCol, right);
-        final IColIndex combinedColIndex = combineColIndexes(nCol, right);
-        AMapToData d = decompress();
-        return new ColGroupDDC(combinedColIndex, combined, d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
-        IDictionary b = ((ColGroupDDC) right).getDictionary();
-        IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
-        IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
-        AMapToData d = decompress();
-        return new ColGroupDDC(combinedColIndex, combined, d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
-        AMapToData d = decompress();
-        AMapToData[] maps = d.splitReshapeDDC(multiplier);
-        AColGroup[] res = new AColGroup[multiplier];
-        for (int i = 0; i < multiplier; i++) {
-            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-            res[i] = create(ci, _dict, maps[i], null);
-        }
-        return res;
-    }
-
-    @Override
-    public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
-            throws Exception {
-        AMapToData d = decompress();
-        AMapToData[] maps = d.splitReshapeDDCPushDown(multiplier, pool);
-        AColGroup[] res = new AColGroup[multiplier];
-        for (int i = 0; i < multiplier; i++) {
-            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-            res[i] = create(ci, _dict, maps[i], null);
-        }
-        return res;
-    }
-
-    @Override
-    public String toString() {
-        StringBuilder sb = new StringBuilder();
-        sb.append(super.toString());
-        sb.append(String.format("\n%15s", "Data: "));
-        AMapToData d = decompress();
-        sb.append(d);
-        return sb.toString();
-    }
-
-    @Override
-    protected boolean allowShallowIdentityRightMult() {
-        return true;
-    }
+    /*
+     * TODO: Operations with complex access patterns shall be uncompressed to ddc format.
+     *  ... return ColGroupDDC.create(...,decompress(_dataLZW),...). We need to decide which methods are
+     *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
+     * */
 }

From b1bf90696636a5d36c7579ad69beb19adecd0664 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Fri, 9 Jan 2026 10:06:39 +0100
Subject: [PATCH 03/18] More TODOS written and cleaned up project.

---
 .../sysds/runtime/compress/colgroup/ColGroupDDCLZW.java       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index d83120e1345..12ac52186a9 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -114,8 +114,10 @@ private static int[] compress(AMapToData data) {
             dataIntVals[i] = data.getIndex(i);
         }
 
+        // TODO: Dictionary befüllen mit uniquen values.
+
         // LZW dictionary. Maps (prefixCode, nextSymbol) to a new code.
-        // Using fastutil keeps lookups fast.
+        // Using fastutil keeps lookups fast. (TODO Dictionary)
         final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
         dict.defaultReturnValue(-1);
 

From 80274581cb1353a103ffe248696818a0ab5d4ab1 Mon Sep 17 00:00:00 2001
From: Annika Lehmann <anlehmannbe@gmail.com>
Date: Sat, 10 Jan 2026 08:46:40 +0100
Subject: [PATCH 04/18] =?UTF-8?q?Dictionary=20initialisierung=20f=C3=BCr?=
 =?UTF-8?q?=20Compress=20und=20rudiment=C3=A4re=20Implementierung=20von=20?=
 =?UTF-8?q?Decompress?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 132 +++++++++++++++++-
 1 file changed, 126 insertions(+), 6 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 12ac52186a9..9cc25cdb99d 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -70,6 +70,9 @@
 import org.jboss.netty.handler.codec.compression.CompressionException;
 import shaded.parquet.it.unimi.dsi.fastutil.ints.IntArrayList;
 import shaded.parquet.it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Stack;
 
 /**
  * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC) whose
@@ -114,6 +117,10 @@ private static int[] compress(AMapToData data) {
             dataIntVals[i] = data.getIndex(i);
         }
 
+        // Output buffer.
+        IntArrayList out = new IntArrayList();
+        out.add(nUnique);
+
         // TODO: Dictionary befüllen mit uniquen values.
 
         // LZW dictionary. Maps (prefixCode, nextSymbol) to a new code.
@@ -121,12 +128,26 @@ private static int[] compress(AMapToData data) {
         final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
         dict.defaultReturnValue(-1);
 
+        // Befüllen des Dictionary
+        // Abspeichern der Symbole im Output stream
+        int index = 0;
+        for (int i = 0; i < nRows; i++) {
+            if (index == nUnique){
+                break;
+            }
+            int ct = dict.get(dataIntVals[i]);
+            if  (ct == -1) {
+                dict.put(dataIntVals[i], index++);
+                out.add(dataIntVals[i]);
+            }
+        }
+        if (index != nUnique) {
+            throw new IllegalArgumentException("Not enough symbols found for number of unique values");
+        }
+
         // Codes {0,...,nUnique - 1} are reserved for the original symbols.
         int nextCode = nUnique;
 
-        // Output buffer.
-        IntArrayList out = new IntArrayList();
-
         // Initialize w with the first input symbol.
         int w = data.getIndex(0);
 
@@ -150,10 +171,108 @@ private static int[] compress(AMapToData data) {
         return out.toIntArray();
     }
 
+    private static int unpackfirst(long key){
+        return (int)(key >>> 32);
+    }
+
+    private static int unpacksecond(long key){
+        return (int)(key);
+    }
+
     // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form. (TODO)
-    private AMapToData decompress(int[] _dataLZW) {
-        AMapToData d = null;
-        return null;
+    private static int[] packint(int[] arr, int last){
+        int[] result = Arrays.copyOf(arr, arr.length+1);
+        result[arr.length] = last;
+        return result;
+    }
+
+    private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict) {
+
+        Stack<Integer> stack = new Stack<>();
+
+        int c = code;
+
+        while (c >= alphabetSize) {
+            long key = dict.get(c);
+            int symbol = unpacksecond(key);
+            stack.push(symbol);
+            c = unpackfirst(key);
+        }
+
+        // Basissymbol
+        stack.push(c);
+        int [] outarray = new int[stack.size()];
+        int i = 0;
+        // korrekt ins Output schreiben
+        while (!stack.isEmpty()) {
+            outarray[i++] = stack.pop();
+        }
+        return outarray;
+    }
+
+    private static void addtoOutput(IntArrayList outarray, int[] code) {
+        for (int i = 0; i < code.length; i++) {
+            outarray.add(code[i]);
+        }
+    }
+
+    private static IntArrayList decompress(int[] code) { //TODO: return AMapToData
+
+        Map<Integer, Long> dict = new HashMap<>();
+
+        //HashMap<Integer, int[]> dict = new HashMap<>();
+        int alphabetSize = code[0];
+        //int nextCode = 0;
+
+
+        // Fill dictionary with values 0-255
+        for (int i = 0; i < alphabetSize; i++) {
+            //dict.put(i, new int[]{code[1+i]}); // TODO: Automatisch Zahl nehmen, wenn < AlphabetSize?
+            //_dict.put(List.of(i), nextCode++);
+            dict.put(i, packKey(-1, code[i]));
+        }
+
+        // Result der Decompression
+        IntArrayList o = new IntArrayList();
+        //List<Integer> o = new ArrayList<>();
+
+        int old = code[1+alphabetSize];
+        //long next = dict.get(old);
+        int[] next = unpack(old, alphabetSize, dict);
+        addtoOutput(o, next);
+        int c = next[0];
+
+
+        for (int i = alphabetSize+2; i < code.length; i++) {
+            int key = code[i];
+            if (! dict.containsKey(key)) {
+                int[] oldnext = unpack(old, alphabetSize, dict);
+                int first = oldnext[0];
+                next = packint(oldnext, first);
+            } else {
+                next = unpack(key, alphabetSize, dict);
+            }
+            for (int inh : next){ // TODO: extra Methode
+                o.add(inh);
+            }
+            int first = next[0];
+            long s = packKey(old, first);
+            dict.put(alphabetSize+i, s); // count statt alphabet
+            //count++;
+            old = key;
+        }
+        return o;
+   /*AMapToData d = _data;
+   if (d == null) {
+       synchronized (this) {
+           d = _data;
+           if (d == null) {
+               d = decode(_dataLZW, _nRows, _nUnique);
+               _data = d;
+           }
+       }
+   }*/
+        //return null;
     }
 
 
@@ -225,3 +344,4 @@ else if (data.getUnique() == 1)
      *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
      * */
 }
+

From ef3b8347b7974ccccdc160d2a315306abae82e51 Mon Sep 17 00:00:00 2001
From: Annika Lehmann <anlehmannbe@gmail.com>
Date: Sat, 10 Jan 2026 08:54:56 +0100
Subject: [PATCH 05/18] Uebersichtlichkeit verbessert

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 29 ++++---------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 9cc25cdb99d..383a90d4cd2 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -121,7 +121,6 @@ private static int[] compress(AMapToData data) {
         IntArrayList out = new IntArrayList();
         out.add(nUnique);
 
-        // TODO: Dictionary befüllen mit uniquen values.
 
         // LZW dictionary. Maps (prefixCode, nextSymbol) to a new code.
         // Using fastutil keeps lookups fast. (TODO Dictionary)
@@ -217,27 +216,23 @@ private static void addtoOutput(IntArrayList outarray, int[] code) {
     }
 
     private static IntArrayList decompress(int[] code) { //TODO: return AMapToData
-
+        // Dictionary
         Map<Integer, Long> dict = new HashMap<>();
 
-        //HashMap<Integer, int[]> dict = new HashMap<>();
+        // Extract alphabet size
         int alphabetSize = code[0];
-        //int nextCode = 0;
 
 
-        // Fill dictionary with values 0-255
+        // Dictionary Initalisierung
         for (int i = 0; i < alphabetSize; i++) {
-            //dict.put(i, new int[]{code[1+i]}); // TODO: Automatisch Zahl nehmen, wenn < AlphabetSize?
-            //_dict.put(List.of(i), nextCode++);
             dict.put(i, packKey(-1, code[i]));
         }
 
         // Result der Decompression
         IntArrayList o = new IntArrayList();
-        //List<Integer> o = new ArrayList<>();
 
+        // Decompression
         int old = code[1+alphabetSize];
-        //long next = dict.get(old);
         int[] next = unpack(old, alphabetSize, dict);
         addtoOutput(o, next);
         int c = next[0];
@@ -252,27 +247,15 @@ private static IntArrayList decompress(int[] code) { //TODO: return AMapToData
             } else {
                 next = unpack(key, alphabetSize, dict);
             }
-            for (int inh : next){ // TODO: extra Methode
+            for (int inh : next){ // TODO: effizienz
                 o.add(inh);
             }
             int first = next[0];
             long s = packKey(old, first);
-            dict.put(alphabetSize+i, s); // count statt alphabet
-            //count++;
+            dict.put(alphabetSize+i, s);
             old = key;
         }
         return o;
-   /*AMapToData d = _data;
-   if (d == null) {
-       synchronized (this) {
-           d = _data;
-           if (d == null) {
-               d = decode(_dataLZW, _nRows, _nUnique);
-               _data = d;
-           }
-       }
-   }*/
-        //return null;
     }
 
 

From 988682114c176b64abc41abc93b18cdd4736487d Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Sun, 11 Jan 2026 11:25:24 +0100
Subject: [PATCH 06/18] Minor error fixing. Redesigned compress method.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 40 +++++--------------
 1 file changed, 11 insertions(+), 29 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 383a90d4cd2..4ad6fff6feb 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -111,38 +111,18 @@ private static int[] compress(AMapToData data) {
             throw new IllegalArgumentException("Invalid input: data has no unique values");
         }
 
-        // Extract _data values as int array.
-        final int[] dataIntVals = new int[nRows];
-        for (int i = 0; i < nRows; i++) {
-            dataIntVals[i] = data.getIndex(i);
-        }
-
-        // Output buffer.
-        IntArrayList out = new IntArrayList();
-        out.add(nUnique);
+        // Fast-path: single symbol
+        if (nRows == 1)
+            return new int[]{data.getIndex(0)};
 
 
-        // LZW dictionary. Maps (prefixCode, nextSymbol) to a new code.
+        // LZW dictionary. Maps (prefixCode, nextSymbol) -> newCode (to a new code).
         // Using fastutil keeps lookups fast. (TODO Dictionary)
         final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
         dict.defaultReturnValue(-1);
 
-        // Befüllen des Dictionary
-        // Abspeichern der Symbole im Output stream
-        int index = 0;
-        for (int i = 0; i < nRows; i++) {
-            if (index == nUnique){
-                break;
-            }
-            int ct = dict.get(dataIntVals[i]);
-            if  (ct == -1) {
-                dict.put(dataIntVals[i], index++);
-                out.add(dataIntVals[i]);
-            }
-        }
-        if (index != nUnique) {
-            throw new IllegalArgumentException("Not enough symbols found for number of unique values");
-        }
+        // Output buffer (heuristic capacity; avoids frequent reallocs)
+        final IntArrayList out = new IntArrayList(Math.max(16, nRows / 2));
 
         // Codes {0,...,nUnique - 1} are reserved for the original symbols.
         int nextCode = nUnique;
@@ -151,15 +131,16 @@ private static int[] compress(AMapToData data) {
         int w = data.getIndex(0);
 
         // Process the remaining input symbols.
+        // Example: _data = [2,0,2,3,0,2,1,0,2].
         for (int i = 1; i < nRows; i++) {
-            int k = data.getIndex(i); // next input symbol
-            long key = packKey(w, k); // encode (w,k) into long key
+            final int k = data.getIndex(i); // next input symbol
+            final long key = packKey(w, k); // encode (w,k) into long key
 
             int wk = dict.get(key); // look if wk exists in dict
             if (wk != -1) {
                 w = wk; // wk exists in dict so replace w by wk and continue.
             } else {
-                // wk does not exist in dict.
+                // wk does not exist in dict. output current phrase, add new phrase, restart at k
                 out.add(w);
                 dict.put(key, nextCode++);
                 w = k; // Start new phrase with k
@@ -170,6 +151,7 @@ private static int[] compress(AMapToData data) {
         return out.toIntArray();
     }
 
+
     private static int unpackfirst(long key){
         return (int)(key >>> 32);
     }

From e0d5d75d99880b53b799d370dbf698587a3158b4 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Sun, 11 Jan 2026 11:30:58 +0100
Subject: [PATCH 07/18] Added red/write methods to serialize and deserialize
 from stream.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 36 +++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 4ad6fff6feb..4b182c8b653 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -117,7 +117,7 @@ private static int[] compress(AMapToData data) {
 
 
         // LZW dictionary. Maps (prefixCode, nextSymbol) -> newCode (to a new code).
-        // Using fastutil keeps lookups fast. (TODO Dictionary)
+        // Using fastutil keeps lookups fast. (TODO improve time/space complexity)
         final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
         dict.defaultReturnValue(-1);
 
@@ -151,7 +151,6 @@ private static int[] compress(AMapToData data) {
         return out.toIntArray();
     }
 
-
     private static int unpackfirst(long key){
         return (int)(key >>> 32);
     }
@@ -308,5 +307,38 @@ else if (data.getUnique() == 1)
      *  ... return ColGroupDDC.create(...,decompress(_dataLZW),...). We need to decide which methods are
      *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
      * */
+
+    // Deserialize ColGroupDDCLZW object in binary stream.
+    public static ColGroupDDCLZW read(DataInput in) throws IOException {
+        final IColIndex colIndexes = ColIndexFactory.read(in);
+        final IDictionary dict = DictionaryFactory.read(in);
+
+        // Metadata for lzw mapping.
+        final int nRows = in.readInt();
+        final int nUnique = in.readInt();
+
+        // Read compressed mapping array.
+        final int len = in.readInt();
+        if (len < 0)
+            throw new IOException("Invalid LZW data length: " + len);
+
+        final int[] dataLZW = new int[len];
+        for (int i = 0; i < len; i++)
+            dataLZW[i] = in.readInt();
+
+        // cachedCounts currently not serialized (mirror ColGroupDDC.read which passes null)
+        return new ColGroupDDCLZW(colIndexes, dict, dataLZW, nRows, nUnique, null);
+    }
+
+    // Serialize a ColGroupDDC-object into binary stream.
+    @Override
+    public void write(DataOutput out) throws IOException {
+        _colIndexes.write(out);
+        _dict.write(out);
+        out.writeInt(_nRows);
+        out.writeInt(_nUnique);
+        out.writeInt(_dataLZW.length);
+        for (int i : _dataLZW) out.writeInt(i);
+    }
 }
 

From beb4613b990e513f85ac6e35360d00fc9fe7d13a Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Sun, 11 Jan 2026 12:16:24 +0100
Subject: [PATCH 08/18] Commented code, error handling for compress. next step
 make compress and decompress and its used data structures compatible.

---
 .../runtime/compress/colgroup/ColGroupDDCLZW.java  | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 4b182c8b653..8e8bc7dfbb1 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -128,12 +128,18 @@ private static int[] compress(AMapToData data) {
         int nextCode = nUnique;
 
         // Initialize w with the first input symbol.
+        // AMapToData stores dictionary indices, not actual data values.
+        // Since indices reference positions in an IDictionary, they are always in the valid index range 0 … nUnique−1;
         int w = data.getIndex(0);
 
         // Process the remaining input symbols.
         // Example: _data = [2,0,2,3,0,2,1,0,2].
         for (int i = 1; i < nRows; i++) {
             final int k = data.getIndex(i); // next input symbol
+            
+            if(k < 0 || k >= nUnique)
+                throw new IllegalArgumentException("Symbol out of range: " + k + " (nUnique=" + nUnique + ")");
+
             final long key = packKey(w, k); // encode (w,k) into long key
 
             int wk = dict.get(key); // look if wk exists in dict
@@ -151,21 +157,24 @@ private static int[] compress(AMapToData data) {
         return out.toIntArray();
     }
 
+    // Unpack upper 32 bits (w) of (w,k) key pair.
     private static int unpackfirst(long key){
         return (int)(key >>> 32);
     }
 
+    // Unpack lower 32 bits (k) of (w,k) key pair.
     private static int unpacksecond(long key){
         return (int)(key);
     }
 
-    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form. (TODO)
+    // Append symbol to end of int-array.
     private static int[] packint(int[] arr, int last){
         int[] result = Arrays.copyOf(arr, arr.length+1);
         result[arr.length] = last;
         return result;
     }
 
+    // Reconstruct phrase to lzw-code.
     private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict) {
 
         Stack<Integer> stack = new Stack<>();
@@ -190,12 +199,15 @@ private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict)
         return outarray;
     }
 
+    // Append phrase to output.
     private static void addtoOutput(IntArrayList outarray, int[] code) {
         for (int i = 0; i < code.length; i++) {
             outarray.add(code[i]);
         }
     }
 
+    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form.
+    // TODO: Compatibility with compress() and used data structures. Improve time/space complexity.
     private static IntArrayList decompress(int[] code) { //TODO: return AMapToData
         // Dictionary
         Map<Integer, Long> dict = new HashMap<>();

From 620e03aa0abc953af90f05480e053d2d03928b17 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Sun, 11 Jan 2026 18:18:56 +0100
Subject: [PATCH 09/18] Added first stages of tests. improved compression and
 decompression algorithms and try made them compatible.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 122 +++---
 .../compress/colgroup/ColGroupDDCTest.java    | 363 +++++++++---------
 2 files changed, 261 insertions(+), 224 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 8e8bc7dfbb1..80fc69a7371 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -70,6 +70,8 @@
 import org.jboss.netty.handler.codec.compression.CompressionException;
 import shaded.parquet.it.unimi.dsi.fastutil.ints.IntArrayList;
 import shaded.parquet.it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
+
+
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Stack;
@@ -136,8 +138,8 @@ private static int[] compress(AMapToData data) {
         // Example: _data = [2,0,2,3,0,2,1,0,2].
         for (int i = 1; i < nRows; i++) {
             final int k = data.getIndex(i); // next input symbol
-            
-            if(k < 0 || k >= nUnique)
+
+            if (k < 0 || k >= nUnique)
                 throw new IllegalArgumentException("Symbol out of range: " + k + " (nUnique=" + nUnique + ")");
 
             final long key = packKey(w, k); // encode (w,k) into long key
@@ -158,31 +160,36 @@ private static int[] compress(AMapToData data) {
     }
 
     // Unpack upper 32 bits (w) of (w,k) key pair.
-    private static int unpackfirst(long key){
-        return (int)(key >>> 32);
+    private static int unpackfirst(long key) {
+        return (int) (key >>> 32);
     }
 
     // Unpack lower 32 bits (k) of (w,k) key pair.
-    private static int unpacksecond(long key){
-        return (int)(key);
+    private static int unpacksecond(long key) {
+        return (int) (key);
     }
 
     // Append symbol to end of int-array.
-    private static int[] packint(int[] arr, int last){
-        int[] result = Arrays.copyOf(arr, arr.length+1);
+    private static int[] packint(int[] arr, int last) {
+        int[] result = Arrays.copyOf(arr, arr.length + 1);
         result[arr.length] = last;
         return result;
     }
 
     // Reconstruct phrase to lzw-code.
-    private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict) {
+    private static int[] unpack(int code, int nUnique, Map<Integer, Long> dict) {
+        // Base symbol (implicit alphabet)
+        if (code < nUnique)
+            return new int[]{code};
 
         Stack<Integer> stack = new Stack<>();
-
         int c = code;
 
-        while (c >= alphabetSize) {
-            long key = dict.get(c);
+        while (c >= nUnique) {
+            Long key = dict.get(c);
+            if (key == null)
+                throw new IllegalStateException("Missing dictionary entry for code: " + c);
+
             int symbol = unpacksecond(key);
             stack.push(symbol);
             c = unpackfirst(key);
@@ -190,7 +197,7 @@ private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict)
 
         // Basissymbol
         stack.push(c);
-        int [] outarray = new int[stack.size()];
+        int[] outarray = new int[stack.size()];
         int i = 0;
         // korrekt ins Output schreiben
         while (!stack.isEmpty()) {
@@ -199,56 +206,71 @@ private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict)
         return outarray;
     }
 
-    // Append phrase to output.
-    private static void addtoOutput(IntArrayList outarray, int[] code) {
-        for (int i = 0; i < code.length; i++) {
-            outarray.add(code[i]);
-        }
-    }
-
     // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form.
     // TODO: Compatibility with compress() and used data structures. Improve time/space complexity.
-    private static IntArrayList decompress(int[] code) { //TODO: return AMapToData
-        // Dictionary
-        Map<Integer, Long> dict = new HashMap<>();
-
-        // Extract alphabet size
-        int alphabetSize = code[0];
+    private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
+        // Validate input arguments.
+        if (codes == null)
+            throw new IllegalArgumentException("codes is null");
+        if (codes.length == 0)
+            throw new IllegalArgumentException("codes is empty");
+        if (nUnique <= 0)
+            throw new IllegalArgumentException("Invalid alphabet size: " + nUnique);
+        if (nRows <= 0) {
+            throw new IllegalArgumentException("Invalid nRows: " + nRows);
+        }
 
+        // Maps: code -> packKey(prefixCode, lastSymbolOfPhrase).
+        // Base symbols (0..nUnique-1) are implicit and not stored here.
+        final Map<Integer, Long> dict = new HashMap<>();
 
-        // Dictionary Initalisierung
-        for (int i = 0; i < alphabetSize; i++) {
-            dict.put(i, packKey(-1, code[i]));
-        }
+        // Output mapping that will be reconstructed.
+        AMapToData out = MapToFactory.create(nRows, nUnique);
+        int outPos = 0; // Current write position in the output mapping.
 
-        // Result der Decompression
-        IntArrayList o = new IntArrayList();
+        // Decode the first code. The first code always expands to a valid phrase without needing
+        // any dictionary entries.
+        int old = codes[0];
+        int[] oldPhrase = unpack(old, nUnique, dict);
+        for (int v : oldPhrase)
+            out.set(outPos++, v);
 
-        // Decompression
-        int old = code[1+alphabetSize];
-        int[] next = unpack(old, alphabetSize, dict);
-        addtoOutput(o, next);
-        int c = next[0];
+        // Next free dictionary code. Codes 0..nUnique-1 are reserved for base symbols.
+        int nextCode = nUnique;
 
+        // Process remaining codes.
+        for (int i = 1; i < codes.length; i++) {
+            int key = codes[i];
 
-        for (int i = alphabetSize+2; i < code.length; i++) {
-            int key = code[i];
-            if (! dict.containsKey(key)) {
-                int[] oldnext = unpack(old, alphabetSize, dict);
-                int first = oldnext[0];
-                next = packint(oldnext, first);
+            int[] next;
+            if (key < nUnique || dict.containsKey(key)) {
+                // Normal case: The code is either a base symbol or already present in the dictionary.
+                next = unpack(key, nUnique, dict);
             } else {
-                next = unpack(key, alphabetSize, dict);
-            }
-            for (int inh : next){ // TODO: effizienz
-                o.add(inh);
+                // KwKwK special case: The current code refers to a phrase that is being defined right now.
+                // next = oldPhrase + first(oldPhrase).
+                int first = oldPhrase[0];
+                next = packint(oldPhrase, first);
             }
+
+            // Append the reconstructed phrase to the output mapping.
+            for (int v : next) out.set(outPos++, v);
+
+            // Add new phrase to dictionary: nextCode -> (old, firstSymbol(next)).
             int first = next[0];
-            long s = packKey(old, first);
-            dict.put(alphabetSize+i, s);
+            dict.put(nextCode++, packKey(old, first));
+
+            // Advance.
             old = key;
+            oldPhrase = next;
         }
-        return o;
+
+        // Safety check: decoder must produce exactly nRows symbols.
+        if (outPos != nRows)
+            throw new IllegalStateException("Decompression length mismatch: got " + outPos + " expected " + nRows);
+
+        // Return the reconstructed mapping.
+        return out;
     }
 
 
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index 0f04cfc9c27..f3b1350cdc0 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -27,6 +27,7 @@
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.compress.colgroup.AColGroup;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupDDCLZW;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
 import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
@@ -38,179 +39,193 @@
 
 public class ColGroupDDCTest {
 
-	protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName());
-
-	@Test
-	public void testConvertToDeltaDDCBasic() {
-		IColIndex colIndexes = ColIndexFactory.create(2);
-		double[] dictValues = new double[] {10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(3, 3);
-		data.set(0, 0);
-		data.set(1, 1);
-		data.set(2, 2);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(3, 2, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-		assertEquals(10.0, mb.get(0, 0), 0.0);
-		assertEquals(20.0, mb.get(0, 1), 0.0);
-		assertEquals(11.0, mb.get(1, 0), 0.0);
-		assertEquals(21.0, mb.get(1, 1), 0.0);
-		assertEquals(12.0, mb.get(2, 0), 0.0);
-		assertEquals(22.0, mb.get(2, 1), 0.0);
-	}
-
-	@Test
-	public void testConvertToDeltaDDCSingleColumn() {
-		IColIndex colIndexes = ColIndexFactory.create(1);
-		double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(5, 5);
-		for(int i = 0; i < 5; i++)
-			data.set(i, i);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(5, 1, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 5);
-
-		assertEquals(1.0, mb.get(0, 0), 0.0);
-		assertEquals(2.0, mb.get(1, 0), 0.0);
-		assertEquals(3.0, mb.get(2, 0), 0.0);
-		assertEquals(4.0, mb.get(3, 0), 0.0);
-		assertEquals(5.0, mb.get(4, 0), 0.0);
-	}
-
-	@Test
-	public void testConvertToDeltaDDCWithRepeatedValues() {
-		IColIndex colIndexes = ColIndexFactory.create(2);
-		double[] dictValues = new double[] {10.0, 20.0, 10.0, 20.0, 10.0, 20.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(3, 3);
-		data.set(0, 0);
-		data.set(1, 1);
-		data.set(2, 2);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(3, 2, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-		assertEquals(10.0, mb.get(0, 0), 0.0);
-		assertEquals(20.0, mb.get(0, 1), 0.0);
-		assertEquals(10.0, mb.get(1, 0), 0.0);
-		assertEquals(20.0, mb.get(1, 1), 0.0);
-		assertEquals(10.0, mb.get(2, 0), 0.0);
-		assertEquals(20.0, mb.get(2, 1), 0.0);
-	}
-
-	@Test
-	public void testConvertToDeltaDDCWithNegativeDeltas() {
-		IColIndex colIndexes = ColIndexFactory.create(2);
-		double[] dictValues = new double[] {10.0, 20.0, 8.0, 15.0, 12.0, 25.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(3, 3);
-		data.set(0, 0);
-		data.set(1, 1);
-		data.set(2, 2);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(3, 2, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-		assertEquals(10.0, mb.get(0, 0), 0.0);
-		assertEquals(20.0, mb.get(0, 1), 0.0);
-		assertEquals(8.0, mb.get(1, 0), 0.0);
-		assertEquals(15.0, mb.get(1, 1), 0.0);
-		assertEquals(12.0, mb.get(2, 0), 0.0);
-		assertEquals(25.0, mb.get(2, 1), 0.0);
-	}
-
-	@Test
-	public void testConvertToDeltaDDCWithZeroDeltas() {
-		IColIndex colIndexes = ColIndexFactory.create(2);
-		double[] dictValues = new double[] {5.0, 0.0, 5.0, 0.0, 0.0, 5.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(3, 3);
-		data.set(0, 0);
-		data.set(1, 1);
-		data.set(2, 2);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(3, 2, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-		assertEquals(5.0, mb.get(0, 0), 0.0);
-		assertEquals(0.0, mb.get(0, 1), 0.0);
-		assertEquals(5.0, mb.get(1, 0), 0.0);
-		assertEquals(0.0, mb.get(1, 1), 0.0);
-		assertEquals(0.0, mb.get(2, 0), 0.0);
-		assertEquals(5.0, mb.get(2, 1), 0.0);
-	}
-
-	@Test
-	public void testConvertToDeltaDDCMultipleUniqueDeltas() {
-		IColIndex colIndexes = ColIndexFactory.create(2);
-		double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(4, 4);
-		for(int i = 0; i < 4; i++)
-			data.set(i, i);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(4, 2, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 4);
-
-		assertEquals(1.0, mb.get(0, 0), 0.0);
-		assertEquals(2.0, mb.get(0, 1), 0.0);
-		assertEquals(3.0, mb.get(1, 0), 0.0);
-		assertEquals(4.0, mb.get(1, 1), 0.0);
-		assertEquals(5.0, mb.get(2, 0), 0.0);
-		assertEquals(6.0, mb.get(2, 1), 0.0);
-		assertEquals(7.0, mb.get(3, 0), 0.0);
-		assertEquals(8.0, mb.get(3, 1), 0.0);
-	}
+    protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName());
+
+    @Test
+    public void testLZWRoundTripMapping() throws Exception {
+        // Build a mapping with repetition to actually exercise LZW
+        // Example: [2,0,2,3,0,2,1,0,2]
+        final int nRows = 9;
+        final int nUnique = 4;
+        AMapToData data = MapToFactory.create(nRows, nUnique);
+        int[] src = new int[]{2, 0, 2, 3, 0, 2, 1, 0, 2};
+        for (int i = 0; i < nRows; i++)
+            data.set(i, src[i]);
+
+        // TODO: Write tests for ColGroupDDCLZW.
+    }
+
+    @Test
+    public void testConvertToDeltaDDCBasic() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(3, 3);
+        data.set(0, 0);
+        data.set(1, 1);
+        data.set(2, 2);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(3, 2, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+        assertEquals(10.0, mb.get(0, 0), 0.0);
+        assertEquals(20.0, mb.get(0, 1), 0.0);
+        assertEquals(11.0, mb.get(1, 0), 0.0);
+        assertEquals(21.0, mb.get(1, 1), 0.0);
+        assertEquals(12.0, mb.get(2, 0), 0.0);
+        assertEquals(22.0, mb.get(2, 1), 0.0);
+    }
+
+    @Test
+    public void testConvertToDeltaDDCSingleColumn() {
+        IColIndex colIndexes = ColIndexFactory.create(1);
+        double[] dictValues = new double[]{1.0, 2.0, 3.0, 4.0, 5.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(5, 5);
+        for (int i = 0; i < 5; i++)
+            data.set(i, i);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(5, 1, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 5);
+
+        assertEquals(1.0, mb.get(0, 0), 0.0);
+        assertEquals(2.0, mb.get(1, 0), 0.0);
+        assertEquals(3.0, mb.get(2, 0), 0.0);
+        assertEquals(4.0, mb.get(3, 0), 0.0);
+        assertEquals(5.0, mb.get(4, 0), 0.0);
+    }
+
+    @Test
+    public void testConvertToDeltaDDCWithRepeatedValues() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{10.0, 20.0, 10.0, 20.0, 10.0, 20.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(3, 3);
+        data.set(0, 0);
+        data.set(1, 1);
+        data.set(2, 2);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(3, 2, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+        assertEquals(10.0, mb.get(0, 0), 0.0);
+        assertEquals(20.0, mb.get(0, 1), 0.0);
+        assertEquals(10.0, mb.get(1, 0), 0.0);
+        assertEquals(20.0, mb.get(1, 1), 0.0);
+        assertEquals(10.0, mb.get(2, 0), 0.0);
+        assertEquals(20.0, mb.get(2, 1), 0.0);
+    }
+
+    @Test
+    public void testConvertToDeltaDDCWithNegativeDeltas() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{10.0, 20.0, 8.0, 15.0, 12.0, 25.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(3, 3);
+        data.set(0, 0);
+        data.set(1, 1);
+        data.set(2, 2);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(3, 2, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+        assertEquals(10.0, mb.get(0, 0), 0.0);
+        assertEquals(20.0, mb.get(0, 1), 0.0);
+        assertEquals(8.0, mb.get(1, 0), 0.0);
+        assertEquals(15.0, mb.get(1, 1), 0.0);
+        assertEquals(12.0, mb.get(2, 0), 0.0);
+        assertEquals(25.0, mb.get(2, 1), 0.0);
+    }
+
+    @Test
+    public void testConvertToDeltaDDCWithZeroDeltas() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{5.0, 0.0, 5.0, 0.0, 0.0, 5.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(3, 3);
+        data.set(0, 0);
+        data.set(1, 1);
+        data.set(2, 2);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(3, 2, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+        assertEquals(5.0, mb.get(0, 0), 0.0);
+        assertEquals(0.0, mb.get(0, 1), 0.0);
+        assertEquals(5.0, mb.get(1, 0), 0.0);
+        assertEquals(0.0, mb.get(1, 1), 0.0);
+        assertEquals(0.0, mb.get(2, 0), 0.0);
+        assertEquals(5.0, mb.get(2, 1), 0.0);
+    }
+
+    @Test
+    public void testConvertToDeltaDDCMultipleUniqueDeltas() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(4, 4);
+        for (int i = 0; i < 4; i++)
+            data.set(i, i);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(4, 2, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 4);
+
+        assertEquals(1.0, mb.get(0, 0), 0.0);
+        assertEquals(2.0, mb.get(0, 1), 0.0);
+        assertEquals(3.0, mb.get(1, 0), 0.0);
+        assertEquals(4.0, mb.get(1, 1), 0.0);
+        assertEquals(5.0, mb.get(2, 0), 0.0);
+        assertEquals(6.0, mb.get(2, 1), 0.0);
+        assertEquals(7.0, mb.get(3, 0), 0.0);
+        assertEquals(8.0, mb.get(3, 1), 0.0);
+    }
 }
 

From b7911d7492c7ba832c5d66ca1b50d6e4cab5d0ca Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Mon, 12 Jan 2026 11:37:28 +0100
Subject: [PATCH 10/18] Added convertToDDCLZW() method to ColGroupDDC Class.
 Added convertToDDC test for ColGroupDDCTest. Improved compress/decompress
 methods in LZW class.

---
 .../compress/colgroup/ColGroupDDC.java        | 2164 ++++++++---------
 .../compress/colgroup/ColGroupDDCLZW.java     |  260 +-
 .../compress/colgroup/ColGroupDDCTest.java    |   67 +-
 3 files changed, 1394 insertions(+), 1097 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
index ac4defcabd5..c0d78e11783 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
@@ -75,1091 +75,1081 @@
  * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC).
  */
 public class ColGroupDDC extends APreAgg implements IMapToDataGroup {
-	private static final long serialVersionUID = -5769772089913918987L;
-
-	protected final AMapToData _data;
-
-	static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;
-
-	protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-		super(colIndexes, dict, cachedCounts);
-		_data = data;
-
-		if(CompressedMatrixBlock.debug) {
-			if(getNumValues() == 0)
-				throw new DMLCompressionException("Invalid construction with empty dictionary");
-			if(data.size() == 0)
-				throw new DMLCompressionException("Invalid length of the data. is zero");
-
-			if(data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
-				throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
-					+ dict.getNumberOfValues(colIndexes.size()));
-			int[] c = getCounts();
-			if(c.length != dict.getNumberOfValues(colIndexes.size()))
-				throw new DMLCompressionException("Invalid DDC Construction");
-			data.verify();
-		}
-	}
-
-	public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-		if(data.getUnique() == 1)
-			return ColGroupConst.create(colIndexes, dict);
-		else if(dict == null)
-			return new ColGroupEmpty(colIndexes);
-		else
-			return new ColGroupDDC(colIndexes, dict, data, cachedCounts);
-	}
-
-	public AColGroup sparsifyFOR() {
-		return ColGroupDDCFOR.sparsifyFOR(this);
-	}
-
-	public CompressionType getCompType() {
-		return CompressionType.DDC;
-	}
-
-	@Override
-	protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-		SparseBlock sb) {
-		for(int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-			final int vr = _data.getIndex(r);
-			if(sb.isEmpty(vr))
-				continue;
-			final double[] c = db.values(offT);
-			final int off = db.pos(offT) + offC;
-			_colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
-		}
-	}
-
-	@Override
-	protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-		double[] values) {
-		final int idxSize = _colIndexes.size();
-		if(db.isContiguous()) {
-			final int nColOut = db.getDim(1);
-			if(idxSize == 1 && nColOut == 1)
-				decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
-			else if(idxSize == 1)
-				decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
-			else if(idxSize == nColOut) // offC == 0 implied
-				decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
-			else if(offC == 0 && offR == 0)
-				decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
-			else if(offC == 0)
-				decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
-			else
-				decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-		}
-		else
-			decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-	}
-
-	private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
-		int offC, double[] values) {
-		final double[] c = db.values(0);
-		final int nCols = db.getDim(1);
-		final int colOff = _colIndexes.get(0) + offC;
-		for(int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
-			c[offT] += values[_data.getIndex(i)];
-
-	}
-
-	@Override
-	public AMapToData getMapToData() {
-		return _data;
-	}
-
-	private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
-		int offC, double[] values) {
-		final double[] c = db.values(0);
-		decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, _data);
-	}
-
-	private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
-		double[] values, AMapToData data) {
-		data.decompressToRange(c, rl, ru, offR, values);
-
-	}
-
-	private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
-		double[] values, int nCol) {
-		final double[] c = db.values(0);
-		for(int r = rl; r < ru; r++) {
-			final int start = _data.getIndex(r) * nCol;
-			final int offStart = (offR + r) * nCol;
-			LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
-		}
-	}
-
-	private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
-		double[] values, int nCol, int colOut) {
-		int off = (rl + offR) * colOut;
-		for(int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
-			final double[] c = db.values(offT);
-			final int rowIndex = _data.getIndex(i) * nCol;
-			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-		}
-	}
-
-	private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
-		final int nCol = _colIndexes.size();
-		final int nColU = db.getDim(1);
-		final double[] c = db.values(0);
-		for(int i = rl; i < ru; i++) {
-			final int off = i * nColU;
-			final int rowIndex = _data.getIndex(i) * nCol;
-			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-		}
-	}
-
-	private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
-		double[] values, int nCol) {
-		for(int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-			final double[] c = db.values(offT);
-			final int off = db.pos(offT) + offC;
-			final int rowIndex = _data.getIndex(i) * nCol;
-			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-		}
-	}
-
-	@Override
-	protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-		SparseBlock sb) {
-		for(int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-			final int vr = _data.getIndex(r);
-			if(sb.isEmpty(vr))
-				continue;
-			final int apos = sb.pos(vr);
-			final int alen = sb.size(vr) + apos;
-			final int[] aix = sb.indexes(vr);
-			final double[] aval = sb.values(vr);
-			for(int j = apos; j < alen; j++)
-				ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
-		}
-	}
-
-	@Override
-	protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-		double[] values) {
-		decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
-	}
-
-	protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-		double[] values, int nCol) {
-		for(int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-			final int rowIndex = _data.getIndex(i) * nCol;
-			for(int j = 0; j < nCol; j++)
-				ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
-		}
-	}
-
-	@Override
-	protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
-		for(int i = rl; i < ru; i++) {
-			final int vr = _data.getIndex(i);
-			if(sb.isEmpty(vr))
-				continue;
-			final int apos = sb.pos(vr);
-			final int alen = sb.size(vr) + apos;
-			final int[] aix = sb.indexes(vr);
-			final double[] aval = sb.values(vr);
-			for(int j = apos; j < alen; j++) {
-				final int rowOut = _colIndexes.get(aix[j]);
-				final double[] c = db.values(rowOut);
-				final int off = db.pos(rowOut);
-				c[off + i] += aval[j];
-			}
-		}
-	}
-
-	@Override
-	protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
-		final int nCol = _colIndexes.size();
-		for(int j = 0; j < nCol; j++) {
-			final int rowOut = _colIndexes.get(j);
-			final double[] c = db.values(rowOut);
-			final int off = db.pos(rowOut);
-			for(int i = rl; i < ru; i++) {
-				final double v = dict[_data.getIndex(i) * nCol + j];
-				c[off + i] += v;
-			}
-		}
-	}
-
-	@Override
-	protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
-
-		int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-		for(int j = 0; j < _colIndexes.size(); j++)
-			sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-		for(int i = 0; i < _data.size(); i++) {
-			int di = _data.getIndex(i);
-			if(sb.isEmpty(di))
-				continue;
-
-			final int apos = sb.pos(di);
-			final int alen = sb.size(di) + apos;
-			final int[] aix = sb.indexes(di);
-			final double[] aval = sb.values(di);
-
-			for(int j = apos; j < alen; j++) {
-				sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
-			}
-		}
-
-	}
-
-	@Override
-	protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
-		int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-		for(int j = 0; j < _colIndexes.size(); j++)
-			sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-		final int nCol = _colIndexes.size();
-		for(int j = 0; j < nCol; j++) {
-			final int rowOut = _colIndexes.get(j);
-			SparseRow r = sbr.get(rowOut);
-
-			for(int i = 0; i < _data.size(); i++) {
-				final double v = dict[_data.getIndex(i) * nCol + j];
-				r = r.append(i, v);
-			}
-			sbr.set(rowOut, r, false);
-		}
-	}
-
-	@Override
-	public double getIdx(int r, int colIdx) {
-		return _dict.getValue(_data.getIndex(r), colIdx, _colIndexes.size());
-	}
-
-	@Override
-	protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
-		for(int rix = rl; rix < ru; rix++)
-			c[rix] += preAgg[_data.getIndex(rix)];
-	}
-
-	@Override
-	protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
-		for(int i = rl; i < ru; i++)
-			c[i] = builtin.execute(c[i], preAgg[_data.getIndex(i)]);
-	}
-
-	@Override
-	protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
-		for(int rix = rl; rix < ru; rix++)
-			c[rix] *= preAgg[_data.getIndex(rix)];
-	}
-
-	@Override
-	public int[] getCounts(int[] counts) {
-		return _data.getCounts(counts);
-	}
-
-	@Override
-	public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-		if(_colIndexes.size() == 1)
-			leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
-		else
-			lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-	}
-
-	private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-		int cu) {
-		final DenseBlock retV = result.getDenseBlock();
-		final int nColM = matrix.getNumColumns();
-		final int nColRet = result.getNumColumns();
-		final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
-		if(matrix.isEmpty())
-			return;
-		else if(matrix.isInSparseFormat()) {
-			if(cl != 0 || cu != _data.size())
-				lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-			else
-				lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
-		}
-		else if(!matrix.getDenseBlock().isContiguous())
-			lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
-				cu);
-		else
-			lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-	}
-
-	private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-		int rl, int ru) {
-
-		if(retV.isContiguous())
-			lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
-		else
-			lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
-	}
-
-	private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-		double[] vals, int rl, int ru) {
-		final int colOut = _colIndexes.get(0);
-
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int apos = sb.pos(r);
-			final int alen = sb.size(r) + apos;
-			final int[] aix = sb.indexes(r);
-			final double[] aval = sb.values(r);
-			final int offR = ret.pos(r);
-			final double[] retV = ret.values(r);
-
-			for(int i = apos; i < alen; i++)
-				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-		}
-	}
-
-	private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-		double[] vals, int rl, int ru) {
-		final int colOut = _colIndexes.get(0);
-
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int apos = sb.pos(r);
-			final int alen = sb.size(r) + apos;
-			final int[] aix = sb.indexes(r);
-			final double[] aval = sb.values(r);
-			final int offR = r * nColRet;
-			for(int i = apos; i < alen; i++)
-				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-		}
-	}
-
-	private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-		int rl, int ru, int cl, int cu) {
-		if(retV.isContiguous())
-			lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-		else
-			lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-	}
-
-	private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-		double[] vals, int rl, int ru, int cl, int cu) {
-		final int colOut = _colIndexes.get(0);
-
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int apos = sb.pos(r);
-			final int aposSkip = sb.posFIndexGTE(r, cl);
-			final int[] aix = sb.indexes(r);
-			if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-				continue;
-			final int alen = sb.size(r) + apos;
-			final double[] aval = sb.values(r);
-			final int offR = ret.pos(r);
-			final double[] retV = ret.values(r);
-			// final int offR = r * nColRet;
-			for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-		}
-	}
-
-	private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-		double[] vals, int rl, int ru, int cl, int cu) {
-		final int colOut = _colIndexes.get(0);
-
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int apos = sb.pos(r);
-			final int aposSkip = sb.posFIndexGTE(r, cl);
-			final int[] aix = sb.indexes(r);
-			if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-				continue;
-			final int alen = sb.size(r) + apos;
-			final double[] aval = sb.values(r);
-			final int offR = r * nColRet;
-			for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-		}
-	}
-
-	private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
-		double[] vals, int rl, int ru, int cl, int cu) {
-		lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-	}
-
-	private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
-		int rl, int ru, int cl, int cu) {
-		if(retV.isContiguous())
-			lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-		else
-			lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-	}
-
-	private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
-		int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
-		final int colOut = _colIndexes.get(0);
-		for(int r = rl; r < ru; r++) {
-			final int offL = db.pos(r);
-			final double[] mV = db.values(r);
-			final int offR = ret.pos(r);
-			final double[] retV = ret.values(r);
-			for(int c = cl; c < cu; c++)
-				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-		}
-	}
-
-	private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
-		double[] vals, int rl, int ru, int cl, int cu) {
-		final int colOut = _colIndexes.get(0);
-		for(int r = rl; r < ru; r++) {
-			final int offL = r * nColM;
-			final int offR = ret.pos(r);
-			final double[] retV = ret.values(r);
-			for(int c = cl; c < cu; c++)
-				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-		}
-	}
-
-	private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
-		double[] vals, int rl, int ru, int cl, int cu) {
-		final int colOut = _colIndexes.get(0);
-		for(int r = rl; r < ru; r++) {
-			final int offL = r * nColM;
-			final int offR = r * nColRet;
-			for(int c = cl; c < cu; c++)
-				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-		}
-	}
-
-	private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-
-		if(matrix.isInSparseFormat())
-			lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-		else
-			lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-	}
-
-	private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-		final DenseBlock db = result.getDenseBlock();
-		final SparseBlock sb = matrix.getSparseBlock();
-
-		if(cl != 0 || cu != _data.size()) {
-			// sub part
-			for(int r = rl; r < ru; r++) {
-				if(sb.isEmpty(r))
-					continue;
-				final double[] retV = db.values(r);
-				final int pos = db.pos(r);
-				lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
-			}
-		}
-		else {
-			for(int r = rl; r < ru; r++)
-				_data.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
-		}
-	}
-
-	private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
-		final int apos = sb.pos(r);
-		final int aposSkip = sb.posFIndexGTE(r, cl);
-		final int[] aix = sb.indexes(r);
-		if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-			return;
-		final int alen = sb.size(r) + apos;
-		final double[] aval = sb.values(r);
-		for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-			_dict.multiplyScalar(aval[i], retV, offR, _data.getIndex(aix[i]), _colIndexes);
-	}
-
-	private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-		if(matrix.getDenseBlock().isContiguous())
-			lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
-		else
-			lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
-	}
-
-	private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-		int cu) {
-		final double[] retV = result.getDenseBlockValues();
-		final int nColM = matrix.getNumColumns();
-		final int nColRet = result.getNumColumns();
-		final double[] mV = matrix.getDenseBlockValues();
-		for(int r = rl; r < ru; r++) {
-			final int offL = r * nColM;
-			final int offR = r * nColRet;
-			for(int c = cl; c < cu; c++)
-				_dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
-		}
-	}
-
-	private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
-		int cu) {
-		final double[] retV = result.getDenseBlockValues();
-		final int nColRet = result.getNumColumns();
-		for(int r = rl; r < ru; r++) {
-			final int offL = db.pos(r);
-			final double[] mV = db.values(r);
-			final int offR = r * nColRet;
-			for(int c = cl; c < cu; c++)
-				_dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
-		}
-	}
-
-	@Override
-	public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
-		_data.preAggregateDense(m, preAgg, rl, ru, cl, cu);
-	}
-
-	@Override
-	public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
-		DenseBlock db = that.getDenseBlock();
-		DenseBlock retDB = ret.getDenseBlock();
-		for(int i = rl; i < ru; i++)
-			leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
-	}
-
-	@Override
-	public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
-		if(_dict instanceof IdentityDictionary)
-			identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
-		else
-			defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
-	}
-
-	private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-		final double[] b = right.getDenseBlockValues();
-		final double[] c = ret.getDenseBlockValues();
-		final int jd = right.getNumColumns();
-		final DoubleVector vVec = DoubleVector.zero(SPECIES);
-		final int vLen = SPECIES.length();
-		final int lenJ = cru - crl;
-		final int end = cru - (lenJ % vLen);
-		for(int i = rl; i < ru; i++) {
-			int k = _data.getIndex(i);
-			final int offOut = i * jd + crl;
-			final double aa = 1;
-			final int k_right = _colIndexes.get(k);
-			vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-		}
-	}
-
-	private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-		final double[] a = _dict.getValues();
-		final double[] b = right.getDenseBlockValues();
-		final double[] c = ret.getDenseBlockValues();
-		final int kd = _colIndexes.size();
-		final int jd = right.getNumColumns();
-		final DoubleVector vVec = DoubleVector.zero(SPECIES);
-		final int vLen = SPECIES.length();
-
-		final int blkzI = 32;
-		final int blkzK = 24;
-		final int lenJ = cru - crl;
-		final int end = cru - (lenJ % vLen);
-		for(int bi = rl; bi < ru; bi += blkzI) {
-			final int bie = Math.min(ru, bi + blkzI);
-			for(int bk = 0; bk < kd; bk += blkzK) {
-				final int bke = Math.min(kd, bk + blkzK);
-				for(int i = bi; i < bie; i++) {
-					int offi = _data.getIndex(i) * kd;
-					final int offOut = i * jd + crl;
-					for(int k = bk; k < bke; k++) {
-						final double aa = a[offi + k];
-						final int k_right = _colIndexes.get(k);
-						vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-					}
-				}
-			}
-		}
-	}
-
-	final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
-		vVec = vVec.broadcast(aa);
-		final int offj = k * jd;
-		final int end = endT + offj;
-		for(int j = offj + crl; j < end; j += vLen, offOut += vLen) {
-			DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
-			DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
-			res = vVec.fma(bVec, res);
-			res.intoArray(c, offOut);
-		}
-		for(int j = end; j < cru + offj; j++, offOut++) {
-			double bb = b[j];
-			c[offOut] += bb * aa;
-		}
-	}
-
-	@Override
-	public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
-		if(cl != 0 || cu != _data.size()) {
-			throw new NotImplementedException();
-		}
-		_data.preAggregateSparse(sb, preAgg, rl, ru);
-	}
-
-	@Override
-	public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
-		try {
-
-			_data.preAggregateDDC_DDC(that._data, that._dict, ret, that._colIndexes.size());
-		}
-		catch(Exception e) {
-			throw new CompressionException(that.toString(), e);
-		}
-	}
-
-	@Override
-	public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
-		_data.preAggregateDDC_SDCZ(that._data, that._dict, that._indexes, ret, that._colIndexes.size());
-	}
-
-	@Override
-	public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
-		final AOffsetIterator itThat = that._indexes.getOffsetIterator();
-		final int nCol = that._colIndexes.size();
-		final int finalOff = that._indexes.getOffsetToLast();
-		final double[] v = ret.getValues();
-		while(true) {
-			final int to = _data.getIndex(itThat.value());
-			that._dict.addToEntry(v, 0, to, nCol);
-			if(itThat.value() == finalOff)
-				break;
-			itThat.next();
-		}
-	}
-
-	@Override
-	protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
-		_data.preAggregateDDC_RLE(that._ptr, that._data, that._dict, ret, that._colIndexes.size());
-	}
-
-	@Override
-	public boolean sameIndexStructure(AColGroupCompressed that) {
-		return that instanceof ColGroupDDC && ((ColGroupDDC) that)._data == _data;
-	}
-
-	@Override
-	public ColGroupType getColGroupType() {
-		return ColGroupType.DDC;
-	}
-
-	@Override
-	public long estimateInMemorySize() {
-		long size = super.estimateInMemorySize();
-		size += _data.getInMemorySize();
-		return size;
-	}
-
-	@Override
-	public AColGroup scalarOperation(ScalarOperator op) {
-		if((op.fn instanceof Plus || op.fn instanceof Minus)) {
-			final double v0 = op.executeScalar(0);
-			if(v0 == 0)
-				return this;
-			final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
-			return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
-		}
-		return create(_colIndexes, _dict.applyScalarOp(op), _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroup unaryOperation(UnaryOperator op) {
-		return create(_colIndexes, _dict.applyUnaryOp(op), _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
-		IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
-		return create(_colIndexes, ret, _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
-		if((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
-			((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
-			final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
-			return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
-		}
-		final IDictionary ret;
-		if(_colIndexes.size() == 1)
-			ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
-		else
-			ret = _dict.binOpRight(op, v, _colIndexes);
-		return create(_colIndexes, ret, _data, getCachedCounts());
-	}
-
-	@Override
-	public void write(DataOutput out) throws IOException {
-		super.write(out);
-		_data.write(out);
-	}
-
-	public static ColGroupDDC read(DataInput in) throws IOException {
-		IColIndex cols = ColIndexFactory.read(in);
-		IDictionary dict = DictionaryFactory.read(in);
-		AMapToData data = MapToFactory.readIn(in);
-		return new ColGroupDDC(cols, dict, data, null);
-	}
-
-	@Override
-	public long getExactSizeOnDisk() {
-		long ret = super.getExactSizeOnDisk();
-		ret += _data.getExactSizeOnDisk();
-		return ret;
-	}
-
-	@Override
-	public double getCost(ComputationCostEstimator e, int nRows) {
-		final int nVals = getNumValues();
-		final int nCols = getNumCols();
-		return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
-	}
-
-	@Override
-	protected int numRowsToMultiply() {
-		return _data.size();
-	}
-
-	@Override
-	protected double computeMxx(double c, Builtin builtin) {
-		return _dict.aggregate(c, builtin);
-	}
-
-	@Override
-	protected void computeColMxx(double[] c, Builtin builtin) {
-		_dict.aggregateCols(c, builtin, _colIndexes);
-	}
-
-	@Override
-	public boolean containsValue(double pattern) {
-		return _dict.containsValue(pattern);
-	}
-
-	@Override
-	protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
-		if(preAgg != null)
-			return create(colIndexes, preAgg, _data, getCachedCounts());
-		else
-			return null;
-	}
-
-	@Override
-	public AColGroup sliceRows(int rl, int ru) {
-		try {
-			return ColGroupDDC.create(_colIndexes, _dict, _data.slice(rl, ru), null);
-		}
-		catch(Exception e) {
-			throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
-		}
-	}
-
-	@Override
-	protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
-		return create(colIndexes, newDictionary, _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroup append(AColGroup g) {
-		if(g instanceof ColGroupDDC) {
-			if(g.getColIndices().equals(_colIndexes)) {
-
-				ColGroupDDC gDDC = (ColGroupDDC) g;
-				if(gDDC._dict.equals(_dict)) {
-					AMapToData nd = _data.append(gDDC._data);
-					return create(_colIndexes, _dict, nd, null);
-				}
-				else
-					LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-			}
-			else
-				LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
-		}
-		else
-			LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
-		return null;
-	}
-
-	@Override
-	public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
-		for(int i = 1; i < g.length; i++) {
-			if(!_colIndexes.equals(g[i]._colIndexes)) {
-				LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
-				return null;
-			}
-
-			if(!(g[i] instanceof ColGroupDDC)) {
-				LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
-				return null;
-			}
-
-			final ColGroupDDC gDDC = (ColGroupDDC) g[i];
-			if(!gDDC._dict.equals(_dict)) {
-				LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-				return null;
-			}
-		}
-		AMapToData nd = _data.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
-		return create(_colIndexes, _dict, nd, null);
-	}
-
-	@Override
-	public ICLAScheme getCompressionScheme() {
-		return DDCScheme.create(this);
-	}
-
-	@Override
-	public AColGroup recompress() {
-		return this;
-	}
-
-	@Override
-	public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
-		try {
-
-			IEncode enc = getEncoding();
-			EstimationFactors ef = new EstimationFactors(_data.getUnique(), _data.size(), _data.size(),
-				_dict.getSparsity());
-			return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
-		}
-		catch(Exception e) {
-			throw new DMLCompressionException(this.toString(), e);
-		}
-	}
-
-	@Override
-	public IEncode getEncoding() {
-		return EncodingFactory.create(_data);
-	}
-
-	@Override
-	protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
-		return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), _data, getCachedCounts());
-	}
-
-	@Override
-	public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-		final SparseBlock sb = selection.getSparseBlock();
-		final SparseBlock retB = ret.getSparseBlock();
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int sPos = sb.pos(r);
-			final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-			decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-		}
-	}
-
-	@Override
-	protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-		// morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
-		final SparseBlock sb = selection.getSparseBlock();
-		final DenseBlock retB = ret.getDenseBlock();
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int sPos = sb.pos(r);
-			final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-			decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-		}
-	}
-
-	private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
-		int cu) {
-		IdentityDictionary a = (IdentityDictionary) _dict;
-		if(_colIndexes instanceof RangeIndex)
-			leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
-		else {
-
-			pos += cl; // left side matrix position offset.
-			if(a.withEmpty()) {
-				final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-				for(int rc = cl; rc < cu; rc++, pos++) {
-					final int idx = _data.getIndex(rc);
-					if(idx != nVal)
-						values2[pos2 + _colIndexes.get(idx)] += values[pos];
-				}
-			}
-			else {
-				for(int rc = cl; rc < cu; rc++, pos++)
-					values2[pos2 + _colIndexes.get(_data.getIndex(rc))] += values[pos];
-			}
-		}
-	}
-
-	private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
-		int cl, int cu) {
-		IdentityDictionary a = (IdentityDictionary) _dict;
-
-		final int firstCol = pos2 + _colIndexes.get(0);
-		pos += cl; // left side matrix position offset.
-		if(a.withEmpty()) {
-			final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-			for(int rc = cl; rc < cu; rc++, pos++) {
-				final int idx = _data.getIndex(rc);
-				if(idx != nVal)
-					values2[firstCol + idx] += values[pos];
-			}
-		}
-		else {
-			for(int rc = cl; rc < cu; rc++, pos++)
-				values2[firstCol + _data.getIndex(rc)] += values[pos];
-		}
-	}
-
-	@Override
-	public AColGroup morph(CompressionType ct, int nRow) {
-		// return this;
-		if(ct == getCompType())
-			return this;
-		else if(ct == CompressionType.SDC) {
-			// return this;
-			int[] counts = getCounts();
-			int maxId = maxIndex(counts);
-			double[] def = _dict.getRow(maxId, _colIndexes.size());
-
-			int offsetSize = nRow - counts[maxId];
-			int[] offsets = new int[offsetSize];
-			AMapToData reducedData = MapToFactory.create(offsetSize, _data.getUnique());
-			int o = 0;
-			for(int i = 0; i < nRow; i++) {
-				int v = _data.getIndex(i);
-				if(v != maxId) {
-					offsets[o] = i;
-					reducedData.set(o, v);
-					o++;
-				}
-			}
-
-			return ColGroupSDC.create(_colIndexes, _data.size(), _dict, def, OffsetFactory.createOffset(offsets),
-				reducedData, null);
-		}
-		else if(ct == CompressionType.CONST) {
-			// if(1 < getNumValues()) {
-			String thisS = this.toString();
-			if(thisS.length() > 10000)
-				thisS = thisS.substring(0, 10000) + "...";
-			LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
-			return this;
-			// }
-		}
-		else if(ct == CompressionType.DDCFOR)
-			return this; // it does not make sense to change to FOR.
-		else
-			return super.morph(ct, nRow);
-	}
-
-	private static int maxIndex(int[] counts) {
-		int id = 0;
-		for(int i = 1; i < counts.length; i++) {
-			if(counts[i] > counts[id]) {
-				id = i;
-			}
-		}
-		return id;
-	}
-
-	@Override
-	public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
-		final IDictionary combined = combineDictionaries(nCol, right);
-		final IColIndex combinedColIndex = combineColIndexes(nCol, right);
-		return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
-		IDictionary b = ((ColGroupDDC) right).getDictionary();
-		IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
-		IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
-		return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
-		AMapToData[] maps = _data.splitReshapeDDC(multiplier);
-		AColGroup[] res = new AColGroup[multiplier];
-		for(int i = 0; i < multiplier; i++) {
-			final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-			res[i] = create(ci, _dict, maps[i], null);
-		}
-		return res;
-	}
-
-	@Override
-	public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
-		throws Exception {
-		AMapToData[] maps = _data.splitReshapeDDCPushDown(multiplier, pool);
-		AColGroup[] res = new AColGroup[multiplier];
-		for(int i = 0; i < multiplier; i++) {
-			final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-			res[i] = create(ci, _dict, maps[i], null);
-		}
-		return res;
-	}
-
-	@Override
-	public String toString() {
-		StringBuilder sb = new StringBuilder();
-		sb.append(super.toString());
-		sb.append(String.format("\n%15s", "Data: "));
-		sb.append(_data);
-		return sb.toString();
-	}
-
-	@Override
-	protected boolean allowShallowIdentityRightMult() {
-		return true;
-	}
-
-	public AColGroup convertToDeltaDDC() {
-		int numCols = _colIndexes.size();
-		int numRows = _data.size();
-		
-		DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64));
-		double[] rowDelta = new double[numCols];
-		double[] prevRow = new double[numCols];
-		DblArray dblArray = new DblArray(rowDelta);
-		int[] rowToDictId = new int[numRows];
-		
-		double[] dictVals = _dict.getValues();
-
-		for(int i = 0; i < numRows; i++) {
-			int dictIdx = _data.getIndex(i);
-			int off = dictIdx * numCols;
-			for(int j = 0; j < numCols; j++) {
-				double val = dictVals[off + j];
-				if(i == 0) {
-					rowDelta[j] = val;
-					prevRow[j] = val;
-				} else {
-					rowDelta[j] = val - prevRow[j];
-					prevRow[j] = val;
-				}
-			}
-			
-			rowToDictId[i] = map.increment(dblArray);
-		}
-		
-		if(map.size() == 0)
-			return new ColGroupEmpty(_colIndexes);
-		
-		ACount<DblArray>[] vals = map.extractValues();
-		final int nVals = vals.length;
-		final double[] dictValues = new double[nVals * numCols];
-		final int[] oldIdToNewId = new int[map.size()];
-		int idx = 0;
-		for(int i = 0; i < nVals; i++) {
-			final ACount<DblArray> dac = vals[i];
-			final double[] arrData = dac.key().getData();
-			System.arraycopy(arrData, 0, dictValues, idx, numCols);
-			oldIdToNewId[dac.id] = i;
-			idx += numCols;
-		}
-		
-		DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols);
-		AMapToData newData = MapToFactory.create(numRows, nVals);
-		for(int i = 0; i < numRows; i++) {
-			newData.set(i, oldIdToNewId[rowToDictId[i]]);
-		}
-		return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null);
-	}
-
+    private static final long serialVersionUID = -5769772089913918987L;
+
+    protected final AMapToData _data;
+
+    static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;
+
+    protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+        super(colIndexes, dict, cachedCounts);
+        _data = data;
+
+        if (CompressedMatrixBlock.debug) {
+            if (getNumValues() == 0)
+                throw new DMLCompressionException("Invalid construction with empty dictionary");
+            if (data.size() == 0)
+                throw new DMLCompressionException("Invalid length of the data. is zero");
+
+            if (data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
+                        + dict.getNumberOfValues(colIndexes.size()));
+            int[] c = getCounts();
+            if (c.length != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid DDC Construction");
+            data.verify();
+        }
+    }
+
+    public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+        if (data.getUnique() == 1)
+            return ColGroupConst.create(colIndexes, dict);
+        else if (dict == null)
+            return new ColGroupEmpty(colIndexes);
+        else
+            return new ColGroupDDC(colIndexes, dict, data, cachedCounts);
+    }
+
+    public AColGroup sparsifyFOR() {
+        return ColGroupDDCFOR.sparsifyFOR(this);
+    }
+
+    public CompressionType getCompType() {
+        return CompressionType.DDC;
+    }
+
+    @Override
+    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                          SparseBlock sb) {
+        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+            final int vr = _data.getIndex(r);
+            if (sb.isEmpty(vr))
+                continue;
+            final double[] c = db.values(offT);
+            final int off = db.pos(offT) + offC;
+            _colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                         double[] values) {
+        final int idxSize = _colIndexes.size();
+        if (db.isContiguous()) {
+            final int nColOut = db.getDim(1);
+            if (idxSize == 1 && nColOut == 1)
+                decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
+            else if (idxSize == 1)
+                decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
+            else if (idxSize == nColOut) // offC == 0 implied
+                decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
+            else if (offC == 0 && offR == 0)
+                decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
+            else if (offC == 0)
+                decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
+            else
+                decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+        } else
+            decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+    }
+
+    private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                          int offC, double[] values) {
+        final double[] c = db.values(0);
+        final int nCols = db.getDim(1);
+        final int colOff = _colIndexes.get(0) + offC;
+        for (int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
+            c[offT] += values[_data.getIndex(i)];
+
+    }
+
+    @Override
+    public AMapToData getMapToData() {
+        return _data;
+    }
+
+    private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                             int offC, double[] values) {
+        final double[] c = db.values(0);
+        decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, _data);
+    }
+
+    private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
+                                                                                    double[] values, AMapToData data) {
+        data.decompressToRange(c, rl, ru, offR, values);
+
+    }
+
+    private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                           double[] values, int nCol) {
+        final double[] c = db.values(0);
+        for (int r = rl; r < ru; r++) {
+            final int start = _data.getIndex(r) * nCol;
+            final int offStart = (offR + r) * nCol;
+            LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
+                                                                  double[] values, int nCol, int colOut) {
+        int off = (rl + offR) * colOut;
+        for (int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
+            final double[] c = db.values(offT);
+            final int rowIndex = _data.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
+        final int nCol = _colIndexes.size();
+        final int nColU = db.getDim(1);
+        final double[] c = db.values(0);
+        for (int i = rl; i < ru; i++) {
+            final int off = i * nColU;
+            final int rowIndex = _data.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                              double[] values, int nCol) {
+        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+            final double[] c = db.values(offT);
+            final int off = db.pos(offT) + offC;
+            final int rowIndex = _data.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                           SparseBlock sb) {
+        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+            final int vr = _data.getIndex(r);
+            if (sb.isEmpty(vr))
+                continue;
+            final int apos = sb.pos(vr);
+            final int alen = sb.size(vr) + apos;
+            final int[] aix = sb.indexes(vr);
+            final double[] aval = sb.values(vr);
+            for (int j = apos; j < alen; j++)
+                ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                          double[] values) {
+        decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
+    }
+
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                          double[] values, int nCol) {
+        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+            final int rowIndex = _data.getIndex(i) * nCol;
+            for (int j = 0; j < nCol; j++)
+                ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
+        for (int i = rl; i < ru; i++) {
+            final int vr = _data.getIndex(i);
+            if (sb.isEmpty(vr))
+                continue;
+            final int apos = sb.pos(vr);
+            final int alen = sb.size(vr) + apos;
+            final int[] aix = sb.indexes(vr);
+            final double[] aval = sb.values(vr);
+            for (int j = apos; j < alen; j++) {
+                final int rowOut = _colIndexes.get(aix[j]);
+                final double[] c = db.values(rowOut);
+                final int off = db.pos(rowOut);
+                c[off + i] += aval[j];
+            }
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+        final int nCol = _colIndexes.size();
+        for (int j = 0; j < nCol; j++) {
+            final int rowOut = _colIndexes.get(j);
+            final double[] c = db.values(rowOut);
+            final int off = db.pos(rowOut);
+            for (int i = rl; i < ru; i++) {
+                final double v = dict[_data.getIndex(i) * nCol + j];
+                c[off + i] += v;
+            }
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
+
+        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+        for (int j = 0; j < _colIndexes.size(); j++)
+            sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+        for (int i = 0; i < _data.size(); i++) {
+            int di = _data.getIndex(i);
+            if (sb.isEmpty(di))
+                continue;
+
+            final int apos = sb.pos(di);
+            final int alen = sb.size(di) + apos;
+            final int[] aix = sb.indexes(di);
+            final double[] aval = sb.values(di);
+
+            for (int j = apos; j < alen; j++) {
+                sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
+            }
+        }
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
+        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+        for (int j = 0; j < _colIndexes.size(); j++)
+            sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+        final int nCol = _colIndexes.size();
+        for (int j = 0; j < nCol; j++) {
+            final int rowOut = _colIndexes.get(j);
+            SparseRow r = sbr.get(rowOut);
+
+            for (int i = 0; i < _data.size(); i++) {
+                final double v = dict[_data.getIndex(i) * nCol + j];
+                r = r.append(i, v);
+            }
+            sbr.set(rowOut, r, false);
+        }
+    }
+
+    @Override
+    public double getIdx(int r, int colIdx) {
+        return _dict.getValue(_data.getIndex(r), colIdx, _colIndexes.size());
+    }
+
+    @Override
+    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+        for (int rix = rl; rix < ru; rix++)
+            c[rix] += preAgg[_data.getIndex(rix)];
+    }
+
+    @Override
+    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+        for (int i = rl; i < ru; i++)
+            c[i] = builtin.execute(c[i], preAgg[_data.getIndex(i)]);
+    }
+
+    @Override
+    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+        for (int rix = rl; rix < ru; rix++)
+            c[rix] *= preAgg[_data.getIndex(rix)];
+    }
+
+    @Override
+    public int[] getCounts(int[] counts) {
+        return _data.getCounts(counts);
+    }
+
+    @Override
+    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        if (_colIndexes.size() == 1)
+            leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
+        else
+            lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+    }
+
+    private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+                                                   int cu) {
+        final DenseBlock retV = result.getDenseBlock();
+        final int nColM = matrix.getNumColumns();
+        final int nColRet = result.getNumColumns();
+        final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
+        if (matrix.isEmpty())
+            return;
+        else if (matrix.isInSparseFormat()) {
+            if (cl != 0 || cu != _data.size())
+                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
+            else
+                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
+        } else if (!matrix.getDenseBlock().isContiguous())
+            lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
+                    cu);
+        else
+            lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                 int rl, int ru) {
+
+        if (retV.isContiguous())
+            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
+        else
+            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+                                                        double[] vals, int rl, int ru) {
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int alen = sb.size(r) + apos;
+            final int[] aix = sb.indexes(r);
+            final double[] aval = sb.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+
+            for (int i = apos; i < alen; i++)
+                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+                                                           double[] vals, int rl, int ru) {
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int alen = sb.size(r) + apos;
+            final int[] aix = sb.indexes(r);
+            final double[] aval = sb.values(r);
+            final int offR = r * nColRet;
+            for (int i = apos; i < alen; i++)
+                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                 int rl, int ru, int cl, int cu) {
+        if (retV.isContiguous())
+            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+        else
+            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+                                                        double[] vals, int rl, int ru, int cl, int cu) {
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int aposSkip = sb.posFIndexGTE(r, cl);
+            final int[] aix = sb.indexes(r);
+            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+                continue;
+            final int alen = sb.size(r) + apos;
+            final double[] aval = sb.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            // final int offR = r * nColRet;
+            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+                                                           double[] vals, int rl, int ru, int cl, int cu) {
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int aposSkip = sb.posFIndexGTE(r, cl);
+            final int[] aix = sb.indexes(r);
+            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+                continue;
+            final int alen = sb.size(r) + apos;
+            final double[] aval = sb.values(r);
+            final int offR = r * nColRet;
+            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
+                                                             double[] vals, int rl, int ru, int cl, int cu) {
+        lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                int rl, int ru, int cl, int cu) {
+        if (retV.isContiguous())
+            lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
+                                                                      int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = db.pos(r);
+            final double[] mV = db.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
+                                                       double[] vals, int rl, int ru, int cl, int cu) {
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
+                                                          double[] vals, int rl, int ru, int cl, int cu) {
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+        }
+    }
+
+    private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+
+        if (matrix.isInSparseFormat())
+            lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        final DenseBlock db = result.getDenseBlock();
+        final SparseBlock sb = matrix.getSparseBlock();
+
+        if (cl != 0 || cu != _data.size()) {
+            // sub part
+            for (int r = rl; r < ru; r++) {
+                if (sb.isEmpty(r))
+                    continue;
+                final double[] retV = db.values(r);
+                final int pos = db.pos(r);
+                lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
+            }
+        } else {
+            for (int r = rl; r < ru; r++)
+                _data.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
+        }
+    }
+
+    private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
+        final int apos = sb.pos(r);
+        final int aposSkip = sb.posFIndexGTE(r, cl);
+        final int[] aix = sb.indexes(r);
+        if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+            return;
+        final int alen = sb.size(r) + apos;
+        final double[] aval = sb.values(r);
+        for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+            _dict.multiplyScalar(aval[i], retV, offR, _data.getIndex(aix[i]), _colIndexes);
+    }
+
+    private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        if (matrix.getDenseBlock().isContiguous())
+            lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+                                                         int cu) {
+        final double[] retV = result.getDenseBlockValues();
+        final int nColM = matrix.getNumColumns();
+        final int nColRet = result.getNumColumns();
+        final double[] mV = matrix.getDenseBlockValues();
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                _dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
+                                                            int cu) {
+        final double[] retV = result.getDenseBlockValues();
+        final int nColRet = result.getNumColumns();
+        for (int r = rl; r < ru; r++) {
+            final int offL = db.pos(r);
+            final double[] mV = db.values(r);
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                _dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
+        }
+    }
+
+    @Override
+    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
+        _data.preAggregateDense(m, preAgg, rl, ru, cl, cu);
+    }
+
+    @Override
+    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
+        DenseBlock db = that.getDenseBlock();
+        DenseBlock retDB = ret.getDenseBlock();
+        for (int i = rl; i < ru; i++)
+            leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
+    }
+
+    @Override
+    public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
+        if (_dict instanceof IdentityDictionary)
+            identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
+        else
+            defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
+    }
+
+    private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+        final double[] b = right.getDenseBlockValues();
+        final double[] c = ret.getDenseBlockValues();
+        final int jd = right.getNumColumns();
+        final DoubleVector vVec = DoubleVector.zero(SPECIES);
+        final int vLen = SPECIES.length();
+        final int lenJ = cru - crl;
+        final int end = cru - (lenJ % vLen);
+        for (int i = rl; i < ru; i++) {
+            int k = _data.getIndex(i);
+            final int offOut = i * jd + crl;
+            final double aa = 1;
+            final int k_right = _colIndexes.get(k);
+            vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+        }
+    }
+
+    private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+        final double[] a = _dict.getValues();
+        final double[] b = right.getDenseBlockValues();
+        final double[] c = ret.getDenseBlockValues();
+        final int kd = _colIndexes.size();
+        final int jd = right.getNumColumns();
+        final DoubleVector vVec = DoubleVector.zero(SPECIES);
+        final int vLen = SPECIES.length();
+
+        final int blkzI = 32;
+        final int blkzK = 24;
+        final int lenJ = cru - crl;
+        final int end = cru - (lenJ % vLen);
+        for (int bi = rl; bi < ru; bi += blkzI) {
+            final int bie = Math.min(ru, bi + blkzI);
+            for (int bk = 0; bk < kd; bk += blkzK) {
+                final int bke = Math.min(kd, bk + blkzK);
+                for (int i = bi; i < bie; i++) {
+                    int offi = _data.getIndex(i) * kd;
+                    final int offOut = i * jd + crl;
+                    for (int k = bk; k < bke; k++) {
+                        final double aa = a[offi + k];
+                        final int k_right = _colIndexes.get(k);
+                        vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+                    }
+                }
+            }
+        }
+    }
+
+    final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
+        vVec = vVec.broadcast(aa);
+        final int offj = k * jd;
+        final int end = endT + offj;
+        for (int j = offj + crl; j < end; j += vLen, offOut += vLen) {
+            DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
+            DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
+            res = vVec.fma(bVec, res);
+            res.intoArray(c, offOut);
+        }
+        for (int j = end; j < cru + offj; j++, offOut++) {
+            double bb = b[j];
+            c[offOut] += bb * aa;
+        }
+    }
+
+    @Override
+    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
+        if (cl != 0 || cu != _data.size()) {
+            throw new NotImplementedException();
+        }
+        _data.preAggregateSparse(sb, preAgg, rl, ru);
+    }
+
+    @Override
+    public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
+        try {
+
+            _data.preAggregateDDC_DDC(that._data, that._dict, ret, that._colIndexes.size());
+        } catch (Exception e) {
+            throw new CompressionException(that.toString(), e);
+        }
+    }
+
+    @Override
+    public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
+        _data.preAggregateDDC_SDCZ(that._data, that._dict, that._indexes, ret, that._colIndexes.size());
+    }
+
+    @Override
+    public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
+        final AOffsetIterator itThat = that._indexes.getOffsetIterator();
+        final int nCol = that._colIndexes.size();
+        final int finalOff = that._indexes.getOffsetToLast();
+        final double[] v = ret.getValues();
+        while (true) {
+            final int to = _data.getIndex(itThat.value());
+            that._dict.addToEntry(v, 0, to, nCol);
+            if (itThat.value() == finalOff)
+                break;
+            itThat.next();
+        }
+    }
+
+    @Override
+    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
+        _data.preAggregateDDC_RLE(that._ptr, that._data, that._dict, ret, that._colIndexes.size());
+    }
+
+    @Override
+    public boolean sameIndexStructure(AColGroupCompressed that) {
+        return that instanceof ColGroupDDC && ((ColGroupDDC) that)._data == _data;
+    }
+
+    @Override
+    public ColGroupType getColGroupType() {
+        return ColGroupType.DDC;
+    }
+
+    @Override
+    public long estimateInMemorySize() {
+        long size = super.estimateInMemorySize();
+        size += _data.getInMemorySize();
+        return size;
+    }
+
+    @Override
+    public AColGroup scalarOperation(ScalarOperator op) {
+        if ((op.fn instanceof Plus || op.fn instanceof Minus)) {
+            final double v0 = op.executeScalar(0);
+            if (v0 == 0)
+                return this;
+            final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
+            return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
+        }
+        return create(_colIndexes, _dict.applyScalarOp(op), _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup unaryOperation(UnaryOperator op) {
+        return create(_colIndexes, _dict.applyUnaryOp(op), _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
+        IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
+        return create(_colIndexes, ret, _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
+        if ((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
+                ((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
+            final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
+            return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
+        }
+        final IDictionary ret;
+        if (_colIndexes.size() == 1)
+            ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
+        else
+            ret = _dict.binOpRight(op, v, _colIndexes);
+        return create(_colIndexes, ret, _data, getCachedCounts());
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+        super.write(out);
+        _data.write(out);
+    }
+
+    public static ColGroupDDC read(DataInput in) throws IOException {
+        IColIndex cols = ColIndexFactory.read(in);
+        IDictionary dict = DictionaryFactory.read(in);
+        AMapToData data = MapToFactory.readIn(in);
+        return new ColGroupDDC(cols, dict, data, null);
+    }
+
+    @Override
+    public long getExactSizeOnDisk() {
+        long ret = super.getExactSizeOnDisk();
+        ret += _data.getExactSizeOnDisk();
+        return ret;
+    }
+
+    @Override
+    public double getCost(ComputationCostEstimator e, int nRows) {
+        final int nVals = getNumValues();
+        final int nCols = getNumCols();
+        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
+    }
+
+    @Override
+    protected int numRowsToMultiply() {
+        return _data.size();
+    }
+
+    @Override
+    protected double computeMxx(double c, Builtin builtin) {
+        return _dict.aggregate(c, builtin);
+    }
+
+    @Override
+    protected void computeColMxx(double[] c, Builtin builtin) {
+        _dict.aggregateCols(c, builtin, _colIndexes);
+    }
+
+    @Override
+    public boolean containsValue(double pattern) {
+        return _dict.containsValue(pattern);
+    }
+
+    @Override
+    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
+        if (preAgg != null)
+            return create(colIndexes, preAgg, _data, getCachedCounts());
+        else
+            return null;
+    }
+
+    @Override
+    public AColGroup sliceRows(int rl, int ru) {
+        try {
+            return ColGroupDDC.create(_colIndexes, _dict, _data.slice(rl, ru), null);
+        } catch (Exception e) {
+            throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
+        }
+    }
+
+    @Override
+    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+        return create(colIndexes, newDictionary, _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup append(AColGroup g) {
+        if (g instanceof ColGroupDDC) {
+            if (g.getColIndices().equals(_colIndexes)) {
+
+                ColGroupDDC gDDC = (ColGroupDDC) g;
+                if (gDDC._dict.equals(_dict)) {
+                    AMapToData nd = _data.append(gDDC._data);
+                    return create(_colIndexes, _dict, nd, null);
+                } else
+                    LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+            } else
+                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
+        } else
+            LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
+        return null;
+    }
+
+    @Override
+    public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
+        for (int i = 1; i < g.length; i++) {
+            if (!_colIndexes.equals(g[i]._colIndexes)) {
+                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
+                return null;
+            }
+
+            if (!(g[i] instanceof ColGroupDDC)) {
+                LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
+                return null;
+            }
+
+            final ColGroupDDC gDDC = (ColGroupDDC) g[i];
+            if (!gDDC._dict.equals(_dict)) {
+                LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+                return null;
+            }
+        }
+        AMapToData nd = _data.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
+        return create(_colIndexes, _dict, nd, null);
+    }
+
+    @Override
+    public ICLAScheme getCompressionScheme() {
+        return DDCScheme.create(this);
+    }
+
+    @Override
+    public AColGroup recompress() {
+        return this;
+    }
+
+    @Override
+    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
+        try {
+
+            IEncode enc = getEncoding();
+            EstimationFactors ef = new EstimationFactors(_data.getUnique(), _data.size(), _data.size(),
+                    _dict.getSparsity());
+            return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
+        } catch (Exception e) {
+            throw new DMLCompressionException(this.toString(), e);
+        }
+    }
+
+    @Override
+    public IEncode getEncoding() {
+        return EncodingFactory.create(_data);
+    }
+
+    @Override
+    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
+        return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), _data, getCachedCounts());
+    }
+
+    @Override
+    public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+        final SparseBlock sb = selection.getSparseBlock();
+        final SparseBlock retB = ret.getSparseBlock();
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int sPos = sb.pos(r);
+            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+            decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+        }
+    }
+
+    @Override
+    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+        // morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
+        final SparseBlock sb = selection.getSparseBlock();
+        final DenseBlock retB = ret.getDenseBlock();
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int sPos = sb.pos(r);
+            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+            decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+        }
+    }
+
+    private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
+                                                          int cu) {
+        IdentityDictionary a = (IdentityDictionary) _dict;
+        if (_colIndexes instanceof RangeIndex)
+            leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
+        else {
+
+            pos += cl; // left side matrix position offset.
+            if (a.withEmpty()) {
+                final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+                for (int rc = cl; rc < cu; rc++, pos++) {
+                    final int idx = _data.getIndex(rc);
+                    if (idx != nVal)
+                        values2[pos2 + _colIndexes.get(idx)] += values[pos];
+                }
+            } else {
+                for (int rc = cl; rc < cu; rc++, pos++)
+                    values2[pos2 + _colIndexes.get(_data.getIndex(rc))] += values[pos];
+            }
+        }
+    }
+
+    private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
+                                                                    int cl, int cu) {
+        IdentityDictionary a = (IdentityDictionary) _dict;
+
+        final int firstCol = pos2 + _colIndexes.get(0);
+        pos += cl; // left side matrix position offset.
+        if (a.withEmpty()) {
+            final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+            for (int rc = cl; rc < cu; rc++, pos++) {
+                final int idx = _data.getIndex(rc);
+                if (idx != nVal)
+                    values2[firstCol + idx] += values[pos];
+            }
+        } else {
+            for (int rc = cl; rc < cu; rc++, pos++)
+                values2[firstCol + _data.getIndex(rc)] += values[pos];
+        }
+    }
+
+    @Override
+    public AColGroup morph(CompressionType ct, int nRow) {
+        // return this;
+        if (ct == getCompType())
+            return this;
+        else if (ct == CompressionType.SDC) {
+            // return this;
+            int[] counts = getCounts();
+            int maxId = maxIndex(counts);
+            double[] def = _dict.getRow(maxId, _colIndexes.size());
+
+            int offsetSize = nRow - counts[maxId];
+            int[] offsets = new int[offsetSize];
+            AMapToData reducedData = MapToFactory.create(offsetSize, _data.getUnique());
+            int o = 0;
+            for (int i = 0; i < nRow; i++) {
+                int v = _data.getIndex(i);
+                if (v != maxId) {
+                    offsets[o] = i;
+                    reducedData.set(o, v);
+                    o++;
+                }
+            }
+
+            return ColGroupSDC.create(_colIndexes, _data.size(), _dict, def, OffsetFactory.createOffset(offsets),
+                    reducedData, null);
+        } else if (ct == CompressionType.CONST) {
+            // if(1 < getNumValues()) {
+            String thisS = this.toString();
+            if (thisS.length() > 10000)
+                thisS = thisS.substring(0, 10000) + "...";
+            LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
+            return this;
+            // }
+        } else if (ct == CompressionType.DDCFOR)
+            return this; // it does not make sense to change to FOR.
+        else
+            return super.morph(ct, nRow);
+    }
+
+    private static int maxIndex(int[] counts) {
+        int id = 0;
+        for (int i = 1; i < counts.length; i++) {
+            if (counts[i] > counts[id]) {
+                id = i;
+            }
+        }
+        return id;
+    }
+
+    @Override
+    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
+        final IDictionary combined = combineDictionaries(nCol, right);
+        final IColIndex combinedColIndex = combineColIndexes(nCol, right);
+        return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
+        IDictionary b = ((ColGroupDDC) right).getDictionary();
+        IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
+        IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
+        return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
+        AMapToData[] maps = _data.splitReshapeDDC(multiplier);
+        AColGroup[] res = new AColGroup[multiplier];
+        for (int i = 0; i < multiplier; i++) {
+            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+            res[i] = create(ci, _dict, maps[i], null);
+        }
+        return res;
+    }
+
+    @Override
+    public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
+            throws Exception {
+        AMapToData[] maps = _data.splitReshapeDDCPushDown(multiplier, pool);
+        AColGroup[] res = new AColGroup[multiplier];
+        for (int i = 0; i < multiplier; i++) {
+            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+            res[i] = create(ci, _dict, maps[i], null);
+        }
+        return res;
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append(super.toString());
+        sb.append(String.format("\n%15s", "Data: "));
+        sb.append(_data);
+        return sb.toString();
+    }
+
+    @Override
+    protected boolean allowShallowIdentityRightMult() {
+        return true;
+    }
+
+    public AColGroup convertToDeltaDDC() {
+        int numCols = _colIndexes.size();
+        int numRows = _data.size();
+
+        DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64));
+        double[] rowDelta = new double[numCols];
+        double[] prevRow = new double[numCols];
+        DblArray dblArray = new DblArray(rowDelta);
+        int[] rowToDictId = new int[numRows];
+
+        double[] dictVals = _dict.getValues();
+
+        for (int i = 0; i < numRows; i++) {
+            int dictIdx = _data.getIndex(i);
+            int off = dictIdx * numCols;
+            for (int j = 0; j < numCols; j++) {
+                double val = dictVals[off + j];
+                if (i == 0) {
+                    rowDelta[j] = val;
+                    prevRow[j] = val;
+                } else {
+                    rowDelta[j] = val - prevRow[j];
+                    prevRow[j] = val;
+                }
+            }
+
+            rowToDictId[i] = map.increment(dblArray);
+        }
+
+        if (map.size() == 0)
+            return new ColGroupEmpty(_colIndexes);
+
+        ACount<DblArray>[] vals = map.extractValues();
+        final int nVals = vals.length;
+        final double[] dictValues = new double[nVals * numCols];
+        final int[] oldIdToNewId = new int[map.size()];
+        int idx = 0;
+        for (int i = 0; i < nVals; i++) {
+            final ACount<DblArray> dac = vals[i];
+            final double[] arrData = dac.key().getData();
+            System.arraycopy(arrData, 0, dictValues, idx, numCols);
+            oldIdToNewId[dac.id] = i;
+            idx += numCols;
+        }
+
+        DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols);
+        AMapToData newData = MapToFactory.create(numRows, nVals);
+        for (int i = 0; i < numRows; i++) {
+            newData.set(i, oldIdToNewId[rowToDictId[i]]);
+        }
+        return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null);
+    }
+
+    public AColGroup convertToDDCLZW() {
+        return ColGroupDDCLZW.create(_colIndexes, _dict, _data, null);
+    }
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 80fc69a7371..d403b6e124d 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -88,11 +88,9 @@ public class ColGroupDDCLZW extends APreAgg implements IMapToDataGroup {
     private static final long serialVersionUID = -5769772089913918987L;
 
     private final int[] _dataLZW; // LZW compressed representation of the mapping
-
     private final int _nRows; // Number of rows in the mapping vector
     private final int _nUnique; // Number of unique values in the mapping vector
 
-
     // Builds a packed 64-bit key for (prefixCode(w), nextSymbol(k)) pairs used in the LZW dictionary. (TODO)
     private static long packKey(int prefixCode, int nextSymbol) {
         return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
@@ -273,7 +271,6 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
         return out;
     }
 
-
     // Build Constructor: Used when creating a new DDCLZW instance during compression/build time. (TODO)
     private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
         super(colIndexes, dict, cachedCounts);
@@ -342,6 +339,28 @@ else if (data.getUnique() == 1)
      *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
      * */
 
+    public AColGroup convertToDDC() {
+        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows);
+        final int[] counts = getCounts(); // may be null depending on your group
+        return ColGroupDDC.create(_colIndexes, _dict, map, counts);
+    }
+
+
+    // Temporary getters for testing ! Remove before PR!
+    /*public int[] get_dataLZW() {
+        return _dataLZW;
+    }
+
+    public int get_nRows() {
+        return _nRows;
+    }
+
+    public int get_nUnique() {
+        return _nUnique;
+    }*/
+    // Temporary getters for testing ! Remove before PR!
+
+
     // Deserialize ColGroupDDCLZW object in binary stream.
     public static ColGroupDDCLZW read(DataInput in) throws IOException {
         final IColIndex colIndexes = ColIndexFactory.read(in);
@@ -364,6 +383,46 @@ public static ColGroupDDCLZW read(DataInput in) throws IOException {
         return new ColGroupDDCLZW(colIndexes, dict, dataLZW, nRows, nUnique, null);
     }
 
+    @Override
+    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
+
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict, int nColOut) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
+
+    }
+
+    @Override
+    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, SparseBlock sb) {
+
+    }
+
+    @Override
+    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, SparseBlock sb) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, double[] values) {
+
+    }
+
     // Serialize a ColGroupDDC-object into binary stream.
     @Override
     public void write(DataOutput out) throws IOException {
@@ -374,5 +433,200 @@ public void write(DataOutput out) throws IOException {
         out.writeInt(_dataLZW.length);
         for (int i : _dataLZW) out.writeInt(i);
     }
+
+    @Override
+    public double getIdx(int r, int colIdx) {
+        return 0;
+    }
+
+    @Override
+    public CompressionType getCompType() {
+        return null;
+    }
+
+    @Override
+    protected ColGroupType getColGroupType() {
+        return null;
+    }
+
+    @Override
+    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+
+    }
+
+    @Override
+    public AColGroup scalarOperation(ScalarOperator op) {
+        return null;
+    }
+
+    @Override
+    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
+        return null;
+    }
+
+    @Override
+    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
+        return null;
+    }
+
+    @Override
+    public AColGroup sliceRows(int rl, int ru) {
+        return null;
+    }
+
+    @Override
+    public boolean containsValue(double pattern) {
+        return false;
+    }
+
+    @Override
+    public double getCost(ComputationCostEstimator e, int nRows) {
+        return 0;
+    }
+
+    @Override
+    public AColGroup unaryOperation(UnaryOperator op) {
+        return null;
+    }
+
+    @Override
+    public AColGroup append(AColGroup g) {
+        return null;
+    }
+
+    @Override
+    protected AColGroup appendNInternal(AColGroup[] groups, int blen, int rlen) {
+        return null;
+    }
+
+    @Override
+    public ICLAScheme getCompressionScheme() {
+        return null;
+    }
+
+    @Override
+    public AColGroup recompress() {
+        return null;
+    }
+
+    @Override
+    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
+        return null;
+    }
+
+    @Override
+    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
+        return null;
+    }
+
+    @Override
+    protected void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+
+    }
+
+    @Override
+    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+
+    }
+
+    @Override
+    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
+        return new AColGroup[0];
+    }
+
+    @Override
+    protected boolean allowShallowIdentityRightMult() {
+        return false;
+    }
+
+    @Override
+    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
+        return null;
+    }
+
+    @Override
+    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+        return null;
+    }
+
+    @Override
+    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
+
+    }
+
+    @Override
+    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
+
+    }
+
+    @Override
+    protected void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
+
+    }
+
+    @Override
+    protected void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
+
+    }
+
+    @Override
+    protected void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
+
+    }
+
+    @Override
+    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
+
+    }
+
+    @Override
+    protected int numRowsToMultiply() {
+        return 0;
+    }
+
+    @Override
+    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
+
+    }
+
+    @Override
+    protected int[] getCounts(int[] out) {
+        return new int[0];
+    }
+
+    @Override
+    protected double computeMxx(double c, Builtin builtin) {
+        return 0;
+    }
+
+    @Override
+    protected void computeColMxx(double[] c, Builtin builtin) {
+
+    }
+
+    @Override
+    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+
+    }
+
+    @Override
+    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+
+    }
+
+    @Override
+    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+
+    }
+
+    @Override
+    public boolean sameIndexStructure(AColGroupCompressed that) {
+        return false;
+    }
+
+    @Override
+    public AMapToData getMapToData() {
+        return null;
+    }
 }
 
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index f3b1350cdc0..cd36b31e86d 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -42,17 +42,70 @@ public class ColGroupDDCTest {
     protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName());
 
     @Test
-    public void testLZWRoundTripMapping() throws Exception {
-        // Build a mapping with repetition to actually exercise LZW
-        // Example: [2,0,2,3,0,2,1,0,2]
-        final int nRows = 9;
-        final int nUnique = 4;
+    public void testConvertToDDCLZWBasic() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
+        Dictionary dict = Dictionary.create(dictValues);
+
+        int[] src = new int[]{
+                // repeating base pattern
+                2, 0, 2, 1, 0, 2, 1, 0, 2,
+                2, 0, 2, 1, 0, 2, 1, 0, 2,
+
+                // variation / shifted pattern
+                1, 0, 1, 2, 0, 1, 2, 0, 1,
+                1, 0, 1, 2, 0, 1, 2, 0, 1,
+
+                // longer runs (good for phrase growth)
+                2, 2, 2, 2, 2,
+                0, 0, 0, 0, 0,
+                1, 1, 1, 1, 1,
+
+                // mixed noise
+                2, 1, 0, 2, 1, 0, 2, 1, 0,
+                0, 2, 1, 0, 2, 1, 0, 2, 1,1,1,1,1,1,1,
+
+                // repeating tail (tests dictionary reuse)
+                2, 0, 2, 1, 0, 2, 1, 0, 2,
+                2, 0, 2, 1, 0, 2, 1, 0, 2,0,0,0,0,0,1
+        };
+
+        final int nRows = src.length;
+        final int nUnique = 3;
         AMapToData data = MapToFactory.create(nRows, nUnique);
-        int[] src = new int[]{2, 0, 2, 3, 0, 2, 1, 0, 2};
         for (int i = 0; i < nRows; i++)
             data.set(i, src[i]);
 
-        // TODO: Write tests for ColGroupDDCLZW.
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDDCLZW();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDDCLZW);
+
+        ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) result;
+        AColGroup ddclzwDecompressed = ddclzw.convertToDDC();
+
+        assertNotNull(ddclzwDecompressed);
+        assertTrue(ddclzwDecompressed instanceof ColGroupDDC);
+
+        ColGroupDDC ddc2 = (ColGroupDDC) ddclzwDecompressed;
+
+        AMapToData d1 = ddc.getMapToData();
+        AMapToData d2 = ddc2.getMapToData();
+
+        assertEquals(d1.size(), d2.size());
+        assertEquals(d1.getUnique(), d2.getUnique());
+        for (int i = 0; i < d1.size(); i++)
+            assertEquals("mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
+
+        assertEquals(d1.size(), d2.size());
+        assertEquals(d1.getUnique(), d2.getUnique());
+
+        for (int i = 0; i < d1.size(); i++) {
+            assertEquals(d1.getIndex(i), d2.getIndex(i));
+        }
+
+        assertEquals(ddc.getColIndices(), ddc2.getColIndices());
     }
 
     @Test

From 1dfe91ee42afdb1d55e5aed7f52e4d80e9865e01 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Mon, 12 Jan 2026 12:33:29 +0100
Subject: [PATCH 11/18] Started working on ColGroupDDCLZW's other methods that
 need to be implemted from its Interface.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 152 ++++++++----------
 1 file changed, 69 insertions(+), 83 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index d403b6e124d..ced9d315cc9 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -345,22 +345,6 @@ public AColGroup convertToDDC() {
         return ColGroupDDC.create(_colIndexes, _dict, map, counts);
     }
 
-
-    // Temporary getters for testing ! Remove before PR!
-    /*public int[] get_dataLZW() {
-        return _dataLZW;
-    }
-
-    public int get_nRows() {
-        return _nRows;
-    }
-
-    public int get_nUnique() {
-        return _nUnique;
-    }*/
-    // Temporary getters for testing ! Remove before PR!
-
-
     // Deserialize ColGroupDDCLZW object in binary stream.
     public static ColGroupDDCLZW read(DataInput in) throws IOException {
         final IColIndex colIndexes = ColIndexFactory.read(in);
@@ -383,70 +367,117 @@ public static ColGroupDDCLZW read(DataInput in) throws IOException {
         return new ColGroupDDCLZW(colIndexes, dict, dataLZW, nRows, nUnique, null);
     }
 
+    // Serialize a ColGroupDDC-object into binary stream.
     @Override
-    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
+    public void write(DataOutput out) throws IOException {
+        _colIndexes.write(out);
+        _dict.write(out);
+        out.writeInt(_nRows);
+        out.writeInt(_nUnique);
+        out.writeInt(_dataLZW.length);
+        for (int i : _dataLZW) out.writeInt(i);
+    }
 
+    @Override
+    public double getIdx(int r, int colIdx) {
+        return 0;
     }
 
     @Override
-    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+    public CompressionType getCompType() {
+        return null;
+    }
 
+    @Override
+    protected ColGroupType getColGroupType() {
+        return null;
     }
 
     @Override
-    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict, int nColOut) {
+    public boolean containsValue(double pattern) {
+        return _dict.containsValue(pattern);
+    }
 
+    @Override
+    public double getCost(ComputationCostEstimator e, int nRows) {
+        final int nVals = getNumValues();
+        final int nCols = getNumCols();
+        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
     }
 
     @Override
-    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
+    public ICLAScheme getCompressionScheme() {
+        throw new NotImplementedException();
+    }
 
+    @Override
+    protected int numRowsToMultiply() {
+        return _nRows;
     }
 
     @Override
-    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, SparseBlock sb) {
+    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+        return new ColGroupDDCLZW(colIndexes, newDictionary, _dataLZW, _nRows, _nUnique, getCachedCounts());
+    }
 
+    @Override
+    public AMapToData getMapToData() {
+        throw new NotImplementedException(); // or decompress and return data...
     }
 
     @Override
-    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) {
+    public boolean sameIndexStructure(AColGroupCompressed that) {
+        return that instanceof ColGroupDDCLZW && ((ColGroupDDCLZW) that)._dataLZW == _dataLZW;
+    }
 
+    @Override
+    protected double computeMxx(double c, Builtin builtin) {
+        return _dict.aggregate(c, builtin);
     }
 
     @Override
-    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, SparseBlock sb) {
+    protected void computeColMxx(double[] c, Builtin builtin) {
+        _dict.aggregateCols(c, builtin, _colIndexes);
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
 
     }
 
     @Override
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, double[] values) {
+    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
 
     }
 
-    // Serialize a ColGroupDDC-object into binary stream.
     @Override
-    public void write(DataOutput out) throws IOException {
-        _colIndexes.write(out);
-        _dict.write(out);
-        out.writeInt(_nRows);
-        out.writeInt(_nUnique);
-        out.writeInt(_dataLZW.length);
-        for (int i : _dataLZW) out.writeInt(i);
+    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict, int nColOut) {
+
     }
 
     @Override
-    public double getIdx(int r, int colIdx) {
-        return 0;
+    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
+
     }
 
     @Override
-    public CompressionType getCompType() {
-        return null;
+    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, SparseBlock sb) {
+
     }
 
     @Override
-    protected ColGroupType getColGroupType() {
-        return null;
+    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, SparseBlock sb) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, double[] values) {
+
     }
 
     @Override
@@ -474,16 +505,6 @@ public AColGroup sliceRows(int rl, int ru) {
         return null;
     }
 
-    @Override
-    public boolean containsValue(double pattern) {
-        return false;
-    }
-
-    @Override
-    public double getCost(ComputationCostEstimator e, int nRows) {
-        return 0;
-    }
-
     @Override
     public AColGroup unaryOperation(UnaryOperator op) {
         return null;
@@ -499,11 +520,6 @@ protected AColGroup appendNInternal(AColGroup[] groups, int blen, int rlen) {
         return null;
     }
 
-    @Override
-    public ICLAScheme getCompressionScheme() {
-        return null;
-    }
-
     @Override
     public AColGroup recompress() {
         return null;
@@ -544,11 +560,6 @@ protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex col
         return null;
     }
 
-    @Override
-    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
-        return null;
-    }
-
     @Override
     public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
 
@@ -579,11 +590,6 @@ protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
 
     }
 
-    @Override
-    protected int numRowsToMultiply() {
-        return 0;
-    }
-
     @Override
     public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
 
@@ -594,16 +600,6 @@ protected int[] getCounts(int[] out) {
         return new int[0];
     }
 
-    @Override
-    protected double computeMxx(double c, Builtin builtin) {
-        return 0;
-    }
-
-    @Override
-    protected void computeColMxx(double[] c, Builtin builtin) {
-
-    }
-
     @Override
     protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
 
@@ -618,15 +614,5 @@ protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double
     protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
 
     }
-
-    @Override
-    public boolean sameIndexStructure(AColGroupCompressed that) {
-        return false;
-    }
-
-    @Override
-    public AMapToData getMapToData() {
-        return null;
-    }
 }
 

From 31568637f6f7ea783b66cc924ee18dd7454fba6a Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Tue, 13 Jan 2026 11:27:24 +0100
Subject: [PATCH 12/18] test commit

---
 .../test/component/compress/colgroup/ColGroupDDCTest.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index cd36b31e86d..5b6a19848a4 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -63,11 +63,11 @@ public void testConvertToDDCLZWBasic() {
 
                 // mixed noise
                 2, 1, 0, 2, 1, 0, 2, 1, 0,
-                0, 2, 1, 0, 2, 1, 0, 2, 1,1,1,1,1,1,1,
+                0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
 
                 // repeating tail (tests dictionary reuse)
                 2, 0, 2, 1, 0, 2, 1, 0, 2,
-                2, 0, 2, 1, 0, 2, 1, 0, 2,0,0,0,0,0,1
+                2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1
         };
 
         final int nRows = src.length;

From 10d577690dbaaab00362b729e725d929b3f34d4a Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Tue, 13 Jan 2026 13:38:30 +0100
Subject: [PATCH 13/18] [SYSTEMDS-3779] Added new Compression and ColGroup
 Types DDCLZW.

---
 .../sysds/runtime/compress/colgroup/AColGroup.java |  4 ++--
 .../runtime/compress/colgroup/ColGroupDDCLZW.java  | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
index 003703f86a4..eb2a5a356b7 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
@@ -65,7 +65,7 @@ public abstract class AColGroup implements Serializable {
 
 	/** Public super types of compression ColGroups supported */
 	public static enum CompressionType {
-		UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCFOR, DDCFOR, DeltaDDC, LinearFunctional;
+		UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCFOR, DDCFOR, DeltaDDC, DDCLZW, LinearFunctional;
 
 		public boolean isDense() {
 			return this == DDC || this == CONST || this == DDCFOR || this == DDCFOR;
@@ -86,7 +86,7 @@ public boolean isSDC() {
 	 * Protected such that outside the ColGroup package it should be unknown which specific subtype is used.
 	 */
 	protected static enum ColGroupType {
-		UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCSingle, SDCSingleZeros, SDCZeros, SDCFOR, DDCFOR, DeltaDDC,
+		UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCSingle, SDCSingleZeros, SDCZeros, SDCFOR, DDCFOR, DDCLZW, DeltaDDC,
 		LinearFunctional;
 	}
 
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index ced9d315cc9..6a73fcb3cda 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -385,12 +385,12 @@ public double getIdx(int r, int colIdx) {
 
     @Override
     public CompressionType getCompType() {
-        return null;
+        return CompressionType.DDCLZW;
     }
 
     @Override
     protected ColGroupType getColGroupType() {
-        return null;
+        return ColGroupType.DDCLZW;
     }
 
     @Override
@@ -440,6 +440,11 @@ protected void computeColMxx(double[] c, Builtin builtin) {
         _dict.aggregateCols(c, builtin, _colIndexes);
     }
 
+    @Override
+    public AColGroup sliceRows(int rl, int ru) {
+        return null;
+    }
+
     @Override
     protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
 
@@ -500,11 +505,6 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa
         return null;
     }
 
-    @Override
-    public AColGroup sliceRows(int rl, int ru) {
-        return null;
-    }
-
     @Override
     public AColGroup unaryOperation(UnaryOperator op) {
         return null;

From 3c9e2ed9447d5b792b91b8af6eb932dedee27a3f Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Tue, 13 Jan 2026 13:52:44 +0100
Subject: [PATCH 14/18] [SYSTEMDS-3779] Introduce initial ColGroupDDCLZW with
 LZW-compressed mapping

This commit adds an initial implementation of ColGroupDDCLZW, a new column
group that stores the mapping vector in LZW-compressed form instead of
materializing MapToData explicitly.

The design focuses on enabling sequential access directly on the compressed
representation, while complex access patterns are intended to fall back to
DDC. No cache or lazy decompression mechanism is introduced at this stage.
---
 .../apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 6a73fcb3cda..b706b7a5e59 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -445,6 +445,7 @@ public AColGroup sliceRows(int rl, int ru) {
         return null;
     }
 
+
     @Override
     protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
 

From a8df1fe7fa85cf1e52fd498d22e570f81099563f Mon Sep 17 00:00:00 2001
From: Annika Lehmann <anlehmannbe@gmail.com>
Date: Thu, 15 Jan 2026 13:18:51 +0100
Subject: [PATCH 15/18] Decompression to a specific index

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 28 ++++++++++++++-----
 .../compress/colgroup/ColGroupDDCTest.java    | 18 ++++++++----
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index b706b7a5e59..ca68bd7b3ce 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -206,7 +206,7 @@ private static int[] unpack(int code, int nUnique, Map<Integer, Long> dict) {
 
     // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form.
     // TODO: Compatibility with compress() and used data structures. Improve time/space complexity.
-    private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
+    private static AMapToData decompress(int[] codes, int nUnique, int nRows, int index) {
         // Validate input arguments.
         if (codes == null)
             throw new IllegalArgumentException("codes is null");
@@ -217,21 +217,26 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
         if (nRows <= 0) {
             throw new IllegalArgumentException("Invalid nRows: " + nRows);
         }
+        if (index > nRows){
+            throw new IllegalArgumentException("Index is larger than Data Length: " + index);
+        }
 
         // Maps: code -> packKey(prefixCode, lastSymbolOfPhrase).
         // Base symbols (0..nUnique-1) are implicit and not stored here.
         final Map<Integer, Long> dict = new HashMap<>();
 
         // Output mapping that will be reconstructed.
-        AMapToData out = MapToFactory.create(nRows, nUnique);
+        AMapToData out = MapToFactory.create(index, nUnique);
         int outPos = 0; // Current write position in the output mapping.
 
         // Decode the first code. The first code always expands to a valid phrase without needing
         // any dictionary entries.
         int old = codes[0];
         int[] oldPhrase = unpack(old, nUnique, dict);
-        for (int v : oldPhrase)
+        for (int v : oldPhrase){
+            if (outPos == index) break;
             out.set(outPos++, v);
+        }
 
         // Next free dictionary code. Codes 0..nUnique-1 are reserved for base symbols.
         int nextCode = nUnique;
@@ -252,7 +257,10 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
             }
 
             // Append the reconstructed phrase to the output mapping.
-            for (int v : next) out.set(outPos++, v);
+            for (int v : next) {
+                if (outPos == index) break;
+                out.set(outPos++, v);
+            }
 
             // Add new phrase to dictionary: nextCode -> (old, firstSymbol(next)).
             int first = next[0];
@@ -264,8 +272,8 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
         }
 
         // Safety check: decoder must produce exactly nRows symbols.
-        if (outPos != nRows)
-            throw new IllegalStateException("Decompression length mismatch: got " + outPos + " expected " + nRows);
+        if (outPos != index)
+            throw new IllegalStateException("Decompression length mismatch: got " + outPos + " expected " + index);
 
         // Return the reconstructed mapping.
         return out;
@@ -340,7 +348,13 @@ else if (data.getUnique() == 1)
      * */
 
     public AColGroup convertToDDC() {
-        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows);
+        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, _nRows);
+        final int[] counts = getCounts(); // may be null depending on your group
+        return ColGroupDDC.create(_colIndexes, _dict, map, counts);
+    }
+
+    public AColGroup convertToDDC(int index) {
+        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, index);
         final int[] counts = getCounts(); // may be null depending on your group
         return ColGroupDDC.create(_colIndexes, _dict, map, counts);
     }
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index 5b6a19848a4..5aab22059fc 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -98,14 +98,20 @@ public void testConvertToDDCLZWBasic() {
         for (int i = 0; i < d1.size(); i++)
             assertEquals("mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
 
-        assertEquals(d1.size(), d2.size());
-        assertEquals(d1.getUnique(), d2.getUnique());
+        assertEquals(ddc.getColIndices(), ddc2.getColIndices());
 
-        for (int i = 0; i < d1.size(); i++) {
-            assertEquals(d1.getIndex(i), d2.getIndex(i));
-        }
+        // Testen der Teildekompression:
+        // Index entspricht der Anzahl der Zeichen, die dekodiert werden sollen (0 bis Index-1)
+        int index = 10;
+        ColGroupDDC ddcIndex = (ColGroupDDC) ddclzw.convertToDDC(index);
 
-        assertEquals(ddc.getColIndices(), ddc2.getColIndices());
+        AMapToData d3 = ddcIndex.getMapToData();
+        assertEquals(index, d3.size());
+        assertEquals(ddc.getColIndices(), ddcIndex.getColIndices());
+
+        for(int i = 0; i < index; i++){
+            assertEquals(d1.getIndex(i), d3.getIndex(i));
+        }
     }
 
     @Test

From 96cb6e9f914a5747e025ca1f9a0ee09e2bd90afc Mon Sep 17 00:00:00 2001
From: Annika Lehmann <anlehmannbe@gmail.com>
Date: Fri, 16 Jan 2026 09:04:52 +0100
Subject: [PATCH 16/18] slice Rows

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 15 ++++++++---
 .../compress/colgroup/ColGroupDDCTest.java    | 25 ++++++++++++++++---
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index ca68bd7b3ce..d2a7dfbce62 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -394,7 +394,10 @@ public void write(DataOutput out) throws IOException {
 
     @Override
     public double getIdx(int r, int colIdx) {
-        return 0;
+        // TODO: soll schnell sein
+        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, r);
+        // TODO: ColumnIndex
+        return map.getIndex(r);
     }
 
     @Override
@@ -421,6 +424,7 @@ public double getCost(ComputationCostEstimator e, int nRows) {
 
     @Override
     public ICLAScheme getCompressionScheme() {
+        //TODO: in ColGroupDDCFor nicht implementiert - sollen wir das erstellen? Inhalt: ncols wie DDC
         throw new NotImplementedException();
     }
 
@@ -436,7 +440,7 @@ protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary)
 
     @Override
     public AMapToData getMapToData() {
-        throw new NotImplementedException(); // or decompress and return data...
+        throw new NotImplementedException(); // or decompress and return data... decompress(_dataLZW, _nUnique, _nRows, _nRows)
     }
 
     @Override
@@ -456,7 +460,12 @@ protected void computeColMxx(double[] c, Builtin builtin) {
 
     @Override
     public AColGroup sliceRows(int rl, int ru) {
-        return null;
+        try{
+            AMapToData map = decompress(_dataLZW, _nUnique, _nRows, ru);
+            return ColGroupDDCLZW.create(_colIndexes, _dict, map.slice(rl, ru), null);
+        } catch(Exception e){
+            throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
+        }
     }
 
 
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index 5aab22059fc..e7d73edde4f 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -25,10 +25,7 @@
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.runtime.compress.colgroup.AColGroup;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupDDCLZW;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC;
+import org.apache.sysds.runtime.compress.colgroup.*;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
 import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
 import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
@@ -43,6 +40,7 @@ public class ColGroupDDCTest {
 
     @Test
     public void testConvertToDDCLZWBasic() {
+        // TODO: neue Methode zum Vergleich
         IColIndex colIndexes = ColIndexFactory.create(2);
         double[] dictValues = new double[]{10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
         Dictionary dict = Dictionary.create(dictValues);
@@ -112,6 +110,25 @@ public void testConvertToDDCLZWBasic() {
         for(int i = 0; i < index; i++){
             assertEquals(d1.getIndex(i), d3.getIndex(i));
         }
+
+        // Testen von SliceRows
+        int low = 3;
+        int high = 10;
+        AColGroup slice = ddclzw.sliceRows(low, high);
+        if(slice instanceof ColGroupDDCLZW ddclzwslice){
+            ColGroupDDC ddcSlice = (ColGroupDDC) ddclzwslice.convertToDDC();
+            ColGroupDDC ddcSlice2 = (ColGroupDDC) ddc.sliceRows(low, high);
+
+            AMapToData d4 = ddcSlice.getMapToData();
+            AMapToData d5 = ddcSlice2.getMapToData();
+
+            assertEquals(d5.size(), d4.size());
+            assertEquals(d5.getUnique(), d4.getUnique());
+
+            for (int i = 0; i < d4.size(); i++)
+                assertEquals("mapping mismatch at row " + i, d4.getIndex(i), d5.getIndex(i));
+        }
+
     }
 
     @Test

From a30cc91150b3698ec63a5915e82868f59c55605a Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Fri, 16 Jan 2026 11:41:16 +0100
Subject: [PATCH 17/18] [SYSTEMDS-3779] Add imemdiate stop after index certain
 index in decompress(). Decompress will now return an empty map if the index
 is zero.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 24 ++++++++++++-------
 .../compress/colgroup/ColGroupDDCTest.java    |  2 +-
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index d2a7dfbce62..50c37936943 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -71,7 +71,6 @@
 import shaded.parquet.it.unimi.dsi.fastutil.ints.IntArrayList;
 import shaded.parquet.it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
 
-
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Stack;
@@ -204,7 +203,7 @@ private static int[] unpack(int code, int nUnique, Map<Integer, Long> dict) {
         return outarray;
     }
 
-    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form.
+    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form until index.
     // TODO: Compatibility with compress() and used data structures. Improve time/space complexity.
     private static AMapToData decompress(int[] codes, int nUnique, int nRows, int index) {
         // Validate input arguments.
@@ -217,10 +216,14 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows, int in
         if (nRows <= 0) {
             throw new IllegalArgumentException("Invalid nRows: " + nRows);
         }
-        if (index > nRows){
+        if (index > nRows) {
             throw new IllegalArgumentException("Index is larger than Data Length: " + index);
         }
 
+        // Return empty Map if index is zero.
+        if (index == 0)
+            return MapToFactory.create(0, nUnique);
+
         // Maps: code -> packKey(prefixCode, lastSymbolOfPhrase).
         // Base symbols (0..nUnique-1) are implicit and not stored here.
         final Map<Integer, Long> dict = new HashMap<>();
@@ -233,7 +236,8 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows, int in
         // any dictionary entries.
         int old = codes[0];
         int[] oldPhrase = unpack(old, nUnique, dict);
-        for (int v : oldPhrase){
+
+        for (int v : oldPhrase) {
             if (outPos == index) break;
             out.set(outPos++, v);
         }
@@ -258,12 +262,14 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows, int in
 
             // Append the reconstructed phrase to the output mapping.
             for (int v : next) {
-                if (outPos == index) break;
                 out.set(outPos++, v);
+                if (outPos == index)
+                    // Stop immediately once done.
+                    return out;
             }
 
             // Add new phrase to dictionary: nextCode -> (old, firstSymbol(next)).
-            int first = next[0];
+            final int first = next[0];
             dict.put(nextCode++, packKey(old, first));
 
             // Advance.
@@ -318,7 +324,7 @@ private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, int[] dataLZW, in
                 throw new DMLCompressionException("Invalid construction with empty dictionary");
             if (_nRows <= 0)
                 throw new DMLCompressionException("Invalid length of the data. is zero");
-            if (_nUnique <= dict.getNumberOfValues(colIndexes.size()))
+            if (_nUnique != dict.getNumberOfValues(colIndexes.size()))
                 throw new DMLCompressionException("Invalid map to dict Map has:" + _nUnique + " while dict has "
                         + dict.getNumberOfValues(colIndexes.size()));
             int[] c = getCounts();
@@ -460,10 +466,10 @@ protected void computeColMxx(double[] c, Builtin builtin) {
 
     @Override
     public AColGroup sliceRows(int rl, int ru) {
-        try{
+        try {
             AMapToData map = decompress(_dataLZW, _nUnique, _nRows, ru);
             return ColGroupDDCLZW.create(_colIndexes, _dict, map.slice(rl, ru), null);
-        } catch(Exception e){
+        } catch (Exception e) {
             throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
         }
     }
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index e7d73edde4f..4f02ce97ae7 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -47,7 +47,7 @@ public void testConvertToDDCLZWBasic() {
 
         int[] src = new int[]{
                 // repeating base pattern
-                2, 0, 2, 1, 0, 2, 1, 0, 2,
+                0,0,2, 0, 2, 1, 0, 2, 1, 0, 2,
                 2, 0, 2, 1, 0, 2, 1, 0, 2,
 
                 // variation / shifted pattern

From d39fad0e324f9622d431d74b50c5b642f45b83bc Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Fri, 16 Jan 2026 16:26:36 +0100
Subject: [PATCH 18/18] [SYSTEMDS-3779] Reverted formatting of
 ColGroupDDC,ColGroupDDCLZW,ColGroupDDCTest back to correct formatting. Added
 LZWMappingIterator to decompress values on the fly without having to allocate
 full compression map [WIP]. Added Test class ColGroupDDCLZWTest.

---
 .../compress/colgroup/ColGroupDDC.java        | 2174 +++++++++--------
 .../compress/colgroup/ColGroupDDCLZW.java     | 1131 +++++----
 .../compress/colgroup/ColGroupDDCLZWTest.java |   76 +
 .../compress/colgroup/ColGroupDDCTest.java    |  527 ++--
 4 files changed, 2038 insertions(+), 1870 deletions(-)
 create mode 100644 src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
index c0d78e11783..140fde5af16 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
@@ -75,1081 +75,1101 @@
  * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC).
  */
 public class ColGroupDDC extends APreAgg implements IMapToDataGroup {
-    private static final long serialVersionUID = -5769772089913918987L;
-
-    protected final AMapToData _data;
-
-    static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;
-
-    protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-        super(colIndexes, dict, cachedCounts);
-        _data = data;
-
-        if (CompressedMatrixBlock.debug) {
-            if (getNumValues() == 0)
-                throw new DMLCompressionException("Invalid construction with empty dictionary");
-            if (data.size() == 0)
-                throw new DMLCompressionException("Invalid length of the data. is zero");
-
-            if (data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
-                        + dict.getNumberOfValues(colIndexes.size()));
-            int[] c = getCounts();
-            if (c.length != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid DDC Construction");
-            data.verify();
-        }
-    }
-
-    public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-        if (data.getUnique() == 1)
-            return ColGroupConst.create(colIndexes, dict);
-        else if (dict == null)
-            return new ColGroupEmpty(colIndexes);
-        else
-            return new ColGroupDDC(colIndexes, dict, data, cachedCounts);
-    }
-
-    public AColGroup sparsifyFOR() {
-        return ColGroupDDCFOR.sparsifyFOR(this);
-    }
-
-    public CompressionType getCompType() {
-        return CompressionType.DDC;
-    }
-
-    @Override
-    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                          SparseBlock sb) {
-        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-            final int vr = _data.getIndex(r);
-            if (sb.isEmpty(vr))
-                continue;
-            final double[] c = db.values(offT);
-            final int off = db.pos(offT) + offC;
-            _colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                         double[] values) {
-        final int idxSize = _colIndexes.size();
-        if (db.isContiguous()) {
-            final int nColOut = db.getDim(1);
-            if (idxSize == 1 && nColOut == 1)
-                decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
-            else if (idxSize == 1)
-                decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
-            else if (idxSize == nColOut) // offC == 0 implied
-                decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
-            else if (offC == 0 && offR == 0)
-                decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
-            else if (offC == 0)
-                decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
-            else
-                decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-        } else
-            decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-    }
-
-    private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                          int offC, double[] values) {
-        final double[] c = db.values(0);
-        final int nCols = db.getDim(1);
-        final int colOff = _colIndexes.get(0) + offC;
-        for (int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
-            c[offT] += values[_data.getIndex(i)];
-
-    }
-
-    @Override
-    public AMapToData getMapToData() {
-        return _data;
-    }
-
-    private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                             int offC, double[] values) {
-        final double[] c = db.values(0);
-        decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, _data);
-    }
-
-    private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
-                                                                                    double[] values, AMapToData data) {
-        data.decompressToRange(c, rl, ru, offR, values);
-
-    }
-
-    private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                           double[] values, int nCol) {
-        final double[] c = db.values(0);
-        for (int r = rl; r < ru; r++) {
-            final int start = _data.getIndex(r) * nCol;
-            final int offStart = (offR + r) * nCol;
-            LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
-                                                                  double[] values, int nCol, int colOut) {
-        int off = (rl + offR) * colOut;
-        for (int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
-            final double[] c = db.values(offT);
-            final int rowIndex = _data.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
-        final int nCol = _colIndexes.size();
-        final int nColU = db.getDim(1);
-        final double[] c = db.values(0);
-        for (int i = rl; i < ru; i++) {
-            final int off = i * nColU;
-            final int rowIndex = _data.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                              double[] values, int nCol) {
-        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-            final double[] c = db.values(offT);
-            final int off = db.pos(offT) + offC;
-            final int rowIndex = _data.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                           SparseBlock sb) {
-        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-            final int vr = _data.getIndex(r);
-            if (sb.isEmpty(vr))
-                continue;
-            final int apos = sb.pos(vr);
-            final int alen = sb.size(vr) + apos;
-            final int[] aix = sb.indexes(vr);
-            final double[] aval = sb.values(vr);
-            for (int j = apos; j < alen; j++)
-                ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                          double[] values) {
-        decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
-    }
-
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                          double[] values, int nCol) {
-        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-            final int rowIndex = _data.getIndex(i) * nCol;
-            for (int j = 0; j < nCol; j++)
-                ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
-        for (int i = rl; i < ru; i++) {
-            final int vr = _data.getIndex(i);
-            if (sb.isEmpty(vr))
-                continue;
-            final int apos = sb.pos(vr);
-            final int alen = sb.size(vr) + apos;
-            final int[] aix = sb.indexes(vr);
-            final double[] aval = sb.values(vr);
-            for (int j = apos; j < alen; j++) {
-                final int rowOut = _colIndexes.get(aix[j]);
-                final double[] c = db.values(rowOut);
-                final int off = db.pos(rowOut);
-                c[off + i] += aval[j];
-            }
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
-        final int nCol = _colIndexes.size();
-        for (int j = 0; j < nCol; j++) {
-            final int rowOut = _colIndexes.get(j);
-            final double[] c = db.values(rowOut);
-            final int off = db.pos(rowOut);
-            for (int i = rl; i < ru; i++) {
-                final double v = dict[_data.getIndex(i) * nCol + j];
-                c[off + i] += v;
-            }
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
-
-        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-        for (int j = 0; j < _colIndexes.size(); j++)
-            sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-        for (int i = 0; i < _data.size(); i++) {
-            int di = _data.getIndex(i);
-            if (sb.isEmpty(di))
-                continue;
-
-            final int apos = sb.pos(di);
-            final int alen = sb.size(di) + apos;
-            final int[] aix = sb.indexes(di);
-            final double[] aval = sb.values(di);
-
-            for (int j = apos; j < alen; j++) {
-                sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
-            }
-        }
-
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
-        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-        for (int j = 0; j < _colIndexes.size(); j++)
-            sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-        final int nCol = _colIndexes.size();
-        for (int j = 0; j < nCol; j++) {
-            final int rowOut = _colIndexes.get(j);
-            SparseRow r = sbr.get(rowOut);
-
-            for (int i = 0; i < _data.size(); i++) {
-                final double v = dict[_data.getIndex(i) * nCol + j];
-                r = r.append(i, v);
-            }
-            sbr.set(rowOut, r, false);
-        }
-    }
-
-    @Override
-    public double getIdx(int r, int colIdx) {
-        return _dict.getValue(_data.getIndex(r), colIdx, _colIndexes.size());
-    }
-
-    @Override
-    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
-        for (int rix = rl; rix < ru; rix++)
-            c[rix] += preAgg[_data.getIndex(rix)];
-    }
-
-    @Override
-    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
-        for (int i = rl; i < ru; i++)
-            c[i] = builtin.execute(c[i], preAgg[_data.getIndex(i)]);
-    }
-
-    @Override
-    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
-        for (int rix = rl; rix < ru; rix++)
-            c[rix] *= preAgg[_data.getIndex(rix)];
-    }
-
-    @Override
-    public int[] getCounts(int[] counts) {
-        return _data.getCounts(counts);
-    }
-
-    @Override
-    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        if (_colIndexes.size() == 1)
-            leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
-        else
-            lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-    }
-
-    private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-                                                   int cu) {
-        final DenseBlock retV = result.getDenseBlock();
-        final int nColM = matrix.getNumColumns();
-        final int nColRet = result.getNumColumns();
-        final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
-        if (matrix.isEmpty())
-            return;
-        else if (matrix.isInSparseFormat()) {
-            if (cl != 0 || cu != _data.size())
-                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-            else
-                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
-        } else if (!matrix.getDenseBlock().isContiguous())
-            lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
-                    cu);
-        else
-            lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                 int rl, int ru) {
-
-        if (retV.isContiguous())
-            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
-        else
-            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-                                                        double[] vals, int rl, int ru) {
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int alen = sb.size(r) + apos;
-            final int[] aix = sb.indexes(r);
-            final double[] aval = sb.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-
-            for (int i = apos; i < alen; i++)
-                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-                                                           double[] vals, int rl, int ru) {
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int alen = sb.size(r) + apos;
-            final int[] aix = sb.indexes(r);
-            final double[] aval = sb.values(r);
-            final int offR = r * nColRet;
-            for (int i = apos; i < alen; i++)
-                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                 int rl, int ru, int cl, int cu) {
-        if (retV.isContiguous())
-            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-        else
-            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-                                                        double[] vals, int rl, int ru, int cl, int cu) {
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int aposSkip = sb.posFIndexGTE(r, cl);
-            final int[] aix = sb.indexes(r);
-            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-                continue;
-            final int alen = sb.size(r) + apos;
-            final double[] aval = sb.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            // final int offR = r * nColRet;
-            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-                                                           double[] vals, int rl, int ru, int cl, int cu) {
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int aposSkip = sb.posFIndexGTE(r, cl);
-            final int[] aix = sb.indexes(r);
-            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-                continue;
-            final int alen = sb.size(r) + apos;
-            final double[] aval = sb.values(r);
-            final int offR = r * nColRet;
-            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
-                                                             double[] vals, int rl, int ru, int cl, int cu) {
-        lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                int rl, int ru, int cl, int cu) {
-        if (retV.isContiguous())
-            lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
-                                                                      int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = db.pos(r);
-            final double[] mV = db.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
-                                                       double[] vals, int rl, int ru, int cl, int cu) {
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
-                                                          double[] vals, int rl, int ru, int cl, int cu) {
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-        }
-    }
-
-    private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-
-        if (matrix.isInSparseFormat())
-            lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        final DenseBlock db = result.getDenseBlock();
-        final SparseBlock sb = matrix.getSparseBlock();
-
-        if (cl != 0 || cu != _data.size()) {
-            // sub part
-            for (int r = rl; r < ru; r++) {
-                if (sb.isEmpty(r))
-                    continue;
-                final double[] retV = db.values(r);
-                final int pos = db.pos(r);
-                lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
-            }
-        } else {
-            for (int r = rl; r < ru; r++)
-                _data.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
-        }
-    }
-
-    private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
-        final int apos = sb.pos(r);
-        final int aposSkip = sb.posFIndexGTE(r, cl);
-        final int[] aix = sb.indexes(r);
-        if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-            return;
-        final int alen = sb.size(r) + apos;
-        final double[] aval = sb.values(r);
-        for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-            _dict.multiplyScalar(aval[i], retV, offR, _data.getIndex(aix[i]), _colIndexes);
-    }
-
-    private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        if (matrix.getDenseBlock().isContiguous())
-            lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-                                                         int cu) {
-        final double[] retV = result.getDenseBlockValues();
-        final int nColM = matrix.getNumColumns();
-        final int nColRet = result.getNumColumns();
-        final double[] mV = matrix.getDenseBlockValues();
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                _dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
-                                                            int cu) {
-        final double[] retV = result.getDenseBlockValues();
-        final int nColRet = result.getNumColumns();
-        for (int r = rl; r < ru; r++) {
-            final int offL = db.pos(r);
-            final double[] mV = db.values(r);
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                _dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
-        }
-    }
-
-    @Override
-    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
-        _data.preAggregateDense(m, preAgg, rl, ru, cl, cu);
-    }
-
-    @Override
-    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
-        DenseBlock db = that.getDenseBlock();
-        DenseBlock retDB = ret.getDenseBlock();
-        for (int i = rl; i < ru; i++)
-            leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
-    }
-
-    @Override
-    public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
-        if (_dict instanceof IdentityDictionary)
-            identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
-        else
-            defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
-    }
-
-    private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-        final double[] b = right.getDenseBlockValues();
-        final double[] c = ret.getDenseBlockValues();
-        final int jd = right.getNumColumns();
-        final DoubleVector vVec = DoubleVector.zero(SPECIES);
-        final int vLen = SPECIES.length();
-        final int lenJ = cru - crl;
-        final int end = cru - (lenJ % vLen);
-        for (int i = rl; i < ru; i++) {
-            int k = _data.getIndex(i);
-            final int offOut = i * jd + crl;
-            final double aa = 1;
-            final int k_right = _colIndexes.get(k);
-            vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-        }
-    }
-
-    private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-        final double[] a = _dict.getValues();
-        final double[] b = right.getDenseBlockValues();
-        final double[] c = ret.getDenseBlockValues();
-        final int kd = _colIndexes.size();
-        final int jd = right.getNumColumns();
-        final DoubleVector vVec = DoubleVector.zero(SPECIES);
-        final int vLen = SPECIES.length();
-
-        final int blkzI = 32;
-        final int blkzK = 24;
-        final int lenJ = cru - crl;
-        final int end = cru - (lenJ % vLen);
-        for (int bi = rl; bi < ru; bi += blkzI) {
-            final int bie = Math.min(ru, bi + blkzI);
-            for (int bk = 0; bk < kd; bk += blkzK) {
-                final int bke = Math.min(kd, bk + blkzK);
-                for (int i = bi; i < bie; i++) {
-                    int offi = _data.getIndex(i) * kd;
-                    final int offOut = i * jd + crl;
-                    for (int k = bk; k < bke; k++) {
-                        final double aa = a[offi + k];
-                        final int k_right = _colIndexes.get(k);
-                        vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-                    }
-                }
-            }
-        }
-    }
-
-    final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
-        vVec = vVec.broadcast(aa);
-        final int offj = k * jd;
-        final int end = endT + offj;
-        for (int j = offj + crl; j < end; j += vLen, offOut += vLen) {
-            DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
-            DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
-            res = vVec.fma(bVec, res);
-            res.intoArray(c, offOut);
-        }
-        for (int j = end; j < cru + offj; j++, offOut++) {
-            double bb = b[j];
-            c[offOut] += bb * aa;
-        }
-    }
-
-    @Override
-    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
-        if (cl != 0 || cu != _data.size()) {
-            throw new NotImplementedException();
-        }
-        _data.preAggregateSparse(sb, preAgg, rl, ru);
-    }
-
-    @Override
-    public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
-        try {
-
-            _data.preAggregateDDC_DDC(that._data, that._dict, ret, that._colIndexes.size());
-        } catch (Exception e) {
-            throw new CompressionException(that.toString(), e);
-        }
-    }
-
-    @Override
-    public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
-        _data.preAggregateDDC_SDCZ(that._data, that._dict, that._indexes, ret, that._colIndexes.size());
-    }
-
-    @Override
-    public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
-        final AOffsetIterator itThat = that._indexes.getOffsetIterator();
-        final int nCol = that._colIndexes.size();
-        final int finalOff = that._indexes.getOffsetToLast();
-        final double[] v = ret.getValues();
-        while (true) {
-            final int to = _data.getIndex(itThat.value());
-            that._dict.addToEntry(v, 0, to, nCol);
-            if (itThat.value() == finalOff)
-                break;
-            itThat.next();
-        }
-    }
-
-    @Override
-    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
-        _data.preAggregateDDC_RLE(that._ptr, that._data, that._dict, ret, that._colIndexes.size());
-    }
-
-    @Override
-    public boolean sameIndexStructure(AColGroupCompressed that) {
-        return that instanceof ColGroupDDC && ((ColGroupDDC) that)._data == _data;
-    }
-
-    @Override
-    public ColGroupType getColGroupType() {
-        return ColGroupType.DDC;
-    }
-
-    @Override
-    public long estimateInMemorySize() {
-        long size = super.estimateInMemorySize();
-        size += _data.getInMemorySize();
-        return size;
-    }
-
-    @Override
-    public AColGroup scalarOperation(ScalarOperator op) {
-        if ((op.fn instanceof Plus || op.fn instanceof Minus)) {
-            final double v0 = op.executeScalar(0);
-            if (v0 == 0)
-                return this;
-            final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
-            return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
-        }
-        return create(_colIndexes, _dict.applyScalarOp(op), _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup unaryOperation(UnaryOperator op) {
-        return create(_colIndexes, _dict.applyUnaryOp(op), _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
-        IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
-        return create(_colIndexes, ret, _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
-        if ((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
-                ((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
-            final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
-            return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
-        }
-        final IDictionary ret;
-        if (_colIndexes.size() == 1)
-            ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
-        else
-            ret = _dict.binOpRight(op, v, _colIndexes);
-        return create(_colIndexes, ret, _data, getCachedCounts());
-    }
-
-    @Override
-    public void write(DataOutput out) throws IOException {
-        super.write(out);
-        _data.write(out);
-    }
-
-    public static ColGroupDDC read(DataInput in) throws IOException {
-        IColIndex cols = ColIndexFactory.read(in);
-        IDictionary dict = DictionaryFactory.read(in);
-        AMapToData data = MapToFactory.readIn(in);
-        return new ColGroupDDC(cols, dict, data, null);
-    }
-
-    @Override
-    public long getExactSizeOnDisk() {
-        long ret = super.getExactSizeOnDisk();
-        ret += _data.getExactSizeOnDisk();
-        return ret;
-    }
-
-    @Override
-    public double getCost(ComputationCostEstimator e, int nRows) {
-        final int nVals = getNumValues();
-        final int nCols = getNumCols();
-        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
-    }
-
-    @Override
-    protected int numRowsToMultiply() {
-        return _data.size();
-    }
-
-    @Override
-    protected double computeMxx(double c, Builtin builtin) {
-        return _dict.aggregate(c, builtin);
-    }
-
-    @Override
-    protected void computeColMxx(double[] c, Builtin builtin) {
-        _dict.aggregateCols(c, builtin, _colIndexes);
-    }
-
-    @Override
-    public boolean containsValue(double pattern) {
-        return _dict.containsValue(pattern);
-    }
-
-    @Override
-    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
-        if (preAgg != null)
-            return create(colIndexes, preAgg, _data, getCachedCounts());
-        else
-            return null;
-    }
-
-    @Override
-    public AColGroup sliceRows(int rl, int ru) {
-        try {
-            return ColGroupDDC.create(_colIndexes, _dict, _data.slice(rl, ru), null);
-        } catch (Exception e) {
-            throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
-        }
-    }
-
-    @Override
-    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
-        return create(colIndexes, newDictionary, _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup append(AColGroup g) {
-        if (g instanceof ColGroupDDC) {
-            if (g.getColIndices().equals(_colIndexes)) {
-
-                ColGroupDDC gDDC = (ColGroupDDC) g;
-                if (gDDC._dict.equals(_dict)) {
-                    AMapToData nd = _data.append(gDDC._data);
-                    return create(_colIndexes, _dict, nd, null);
-                } else
-                    LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-            } else
-                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
-        } else
-            LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
-        return null;
-    }
-
-    @Override
-    public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
-        for (int i = 1; i < g.length; i++) {
-            if (!_colIndexes.equals(g[i]._colIndexes)) {
-                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
-                return null;
-            }
-
-            if (!(g[i] instanceof ColGroupDDC)) {
-                LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
-                return null;
-            }
-
-            final ColGroupDDC gDDC = (ColGroupDDC) g[i];
-            if (!gDDC._dict.equals(_dict)) {
-                LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-                return null;
-            }
-        }
-        AMapToData nd = _data.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
-        return create(_colIndexes, _dict, nd, null);
-    }
-
-    @Override
-    public ICLAScheme getCompressionScheme() {
-        return DDCScheme.create(this);
-    }
-
-    @Override
-    public AColGroup recompress() {
-        return this;
-    }
-
-    @Override
-    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
-        try {
-
-            IEncode enc = getEncoding();
-            EstimationFactors ef = new EstimationFactors(_data.getUnique(), _data.size(), _data.size(),
-                    _dict.getSparsity());
-            return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
-        } catch (Exception e) {
-            throw new DMLCompressionException(this.toString(), e);
-        }
-    }
-
-    @Override
-    public IEncode getEncoding() {
-        return EncodingFactory.create(_data);
-    }
-
-    @Override
-    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
-        return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), _data, getCachedCounts());
-    }
-
-    @Override
-    public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-        final SparseBlock sb = selection.getSparseBlock();
-        final SparseBlock retB = ret.getSparseBlock();
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int sPos = sb.pos(r);
-            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-            decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-        }
-    }
-
-    @Override
-    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-        // morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
-        final SparseBlock sb = selection.getSparseBlock();
-        final DenseBlock retB = ret.getDenseBlock();
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int sPos = sb.pos(r);
-            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-            decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-        }
-    }
-
-    private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
-                                                          int cu) {
-        IdentityDictionary a = (IdentityDictionary) _dict;
-        if (_colIndexes instanceof RangeIndex)
-            leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
-        else {
-
-            pos += cl; // left side matrix position offset.
-            if (a.withEmpty()) {
-                final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-                for (int rc = cl; rc < cu; rc++, pos++) {
-                    final int idx = _data.getIndex(rc);
-                    if (idx != nVal)
-                        values2[pos2 + _colIndexes.get(idx)] += values[pos];
-                }
-            } else {
-                for (int rc = cl; rc < cu; rc++, pos++)
-                    values2[pos2 + _colIndexes.get(_data.getIndex(rc))] += values[pos];
-            }
-        }
-    }
-
-    private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
-                                                                    int cl, int cu) {
-        IdentityDictionary a = (IdentityDictionary) _dict;
-
-        final int firstCol = pos2 + _colIndexes.get(0);
-        pos += cl; // left side matrix position offset.
-        if (a.withEmpty()) {
-            final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-            for (int rc = cl; rc < cu; rc++, pos++) {
-                final int idx = _data.getIndex(rc);
-                if (idx != nVal)
-                    values2[firstCol + idx] += values[pos];
-            }
-        } else {
-            for (int rc = cl; rc < cu; rc++, pos++)
-                values2[firstCol + _data.getIndex(rc)] += values[pos];
-        }
-    }
-
-    @Override
-    public AColGroup morph(CompressionType ct, int nRow) {
-        // return this;
-        if (ct == getCompType())
-            return this;
-        else if (ct == CompressionType.SDC) {
-            // return this;
-            int[] counts = getCounts();
-            int maxId = maxIndex(counts);
-            double[] def = _dict.getRow(maxId, _colIndexes.size());
-
-            int offsetSize = nRow - counts[maxId];
-            int[] offsets = new int[offsetSize];
-            AMapToData reducedData = MapToFactory.create(offsetSize, _data.getUnique());
-            int o = 0;
-            for (int i = 0; i < nRow; i++) {
-                int v = _data.getIndex(i);
-                if (v != maxId) {
-                    offsets[o] = i;
-                    reducedData.set(o, v);
-                    o++;
-                }
-            }
-
-            return ColGroupSDC.create(_colIndexes, _data.size(), _dict, def, OffsetFactory.createOffset(offsets),
-                    reducedData, null);
-        } else if (ct == CompressionType.CONST) {
-            // if(1 < getNumValues()) {
-            String thisS = this.toString();
-            if (thisS.length() > 10000)
-                thisS = thisS.substring(0, 10000) + "...";
-            LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
-            return this;
-            // }
-        } else if (ct == CompressionType.DDCFOR)
-            return this; // it does not make sense to change to FOR.
-        else
-            return super.morph(ct, nRow);
-    }
-
-    private static int maxIndex(int[] counts) {
-        int id = 0;
-        for (int i = 1; i < counts.length; i++) {
-            if (counts[i] > counts[id]) {
-                id = i;
-            }
-        }
-        return id;
-    }
-
-    @Override
-    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
-        final IDictionary combined = combineDictionaries(nCol, right);
-        final IColIndex combinedColIndex = combineColIndexes(nCol, right);
-        return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
-        IDictionary b = ((ColGroupDDC) right).getDictionary();
-        IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
-        IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
-        return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
-        AMapToData[] maps = _data.splitReshapeDDC(multiplier);
-        AColGroup[] res = new AColGroup[multiplier];
-        for (int i = 0; i < multiplier; i++) {
-            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-            res[i] = create(ci, _dict, maps[i], null);
-        }
-        return res;
-    }
-
-    @Override
-    public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
-            throws Exception {
-        AMapToData[] maps = _data.splitReshapeDDCPushDown(multiplier, pool);
-        AColGroup[] res = new AColGroup[multiplier];
-        for (int i = 0; i < multiplier; i++) {
-            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-            res[i] = create(ci, _dict, maps[i], null);
-        }
-        return res;
-    }
-
-    @Override
-    public String toString() {
-        StringBuilder sb = new StringBuilder();
-        sb.append(super.toString());
-        sb.append(String.format("\n%15s", "Data: "));
-        sb.append(_data);
-        return sb.toString();
-    }
-
-    @Override
-    protected boolean allowShallowIdentityRightMult() {
-        return true;
-    }
-
-    public AColGroup convertToDeltaDDC() {
-        int numCols = _colIndexes.size();
-        int numRows = _data.size();
-
-        DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64));
-        double[] rowDelta = new double[numCols];
-        double[] prevRow = new double[numCols];
-        DblArray dblArray = new DblArray(rowDelta);
-        int[] rowToDictId = new int[numRows];
-
-        double[] dictVals = _dict.getValues();
-
-        for (int i = 0; i < numRows; i++) {
-            int dictIdx = _data.getIndex(i);
-            int off = dictIdx * numCols;
-            for (int j = 0; j < numCols; j++) {
-                double val = dictVals[off + j];
-                if (i == 0) {
-                    rowDelta[j] = val;
-                    prevRow[j] = val;
-                } else {
-                    rowDelta[j] = val - prevRow[j];
-                    prevRow[j] = val;
-                }
-            }
-
-            rowToDictId[i] = map.increment(dblArray);
-        }
-
-        if (map.size() == 0)
-            return new ColGroupEmpty(_colIndexes);
-
-        ACount<DblArray>[] vals = map.extractValues();
-        final int nVals = vals.length;
-        final double[] dictValues = new double[nVals * numCols];
-        final int[] oldIdToNewId = new int[map.size()];
-        int idx = 0;
-        for (int i = 0; i < nVals; i++) {
-            final ACount<DblArray> dac = vals[i];
-            final double[] arrData = dac.key().getData();
-            System.arraycopy(arrData, 0, dictValues, idx, numCols);
-            oldIdToNewId[dac.id] = i;
-            idx += numCols;
-        }
-
-        DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols);
-        AMapToData newData = MapToFactory.create(numRows, nVals);
-        for (int i = 0; i < numRows; i++) {
-            newData.set(i, oldIdToNewId[rowToDictId[i]]);
-        }
-        return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null);
-    }
-
-    public AColGroup convertToDDCLZW() {
-        return ColGroupDDCLZW.create(_colIndexes, _dict, _data, null);
-    }
+	private static final long serialVersionUID = -5769772089913918987L;
+
+	protected final AMapToData _data;
+
+	static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;
+
+	protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+		super(colIndexes, dict, cachedCounts);
+		_data = data;
+
+		if(CompressedMatrixBlock.debug) {
+			if(getNumValues() == 0)
+				throw new DMLCompressionException("Invalid construction with empty dictionary");
+			if(data.size() == 0)
+				throw new DMLCompressionException("Invalid length of the data. is zero");
+
+			if(data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException(
+					"Invalid map to dict Map has:" + data.getUnique() + " while dict has " +
+						dict.getNumberOfValues(colIndexes.size()));
+			int[] c = getCounts();
+			if(c.length != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException("Invalid DDC Construction");
+			data.verify();
+		}
+	}
+
+	public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+		if(data.getUnique() == 1)
+			return ColGroupConst.create(colIndexes, dict);
+		else if(dict == null)
+			return new ColGroupEmpty(colIndexes);
+		else
+			return new ColGroupDDC(colIndexes, dict, data, cachedCounts);
+	}
+
+	public AColGroup sparsifyFOR() {
+		return ColGroupDDCFOR.sparsifyFOR(this);
+	}
+
+	public CompressionType getCompType() {
+		return CompressionType.DDC;
+	}
+
+	@Override
+	protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+		SparseBlock sb) {
+		for(int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+			final int vr = _data.getIndex(r);
+			if(sb.isEmpty(vr))
+				continue;
+			final double[] c = db.values(offT);
+			final int off = db.pos(offT) + offC;
+			_colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
+		}
+	}
+
+	@Override
+	protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+		double[] values) {
+		final int idxSize = _colIndexes.size();
+		if(db.isContiguous()) {
+			final int nColOut = db.getDim(1);
+			if(idxSize == 1 && nColOut == 1)
+				decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
+			else if(idxSize == 1)
+				decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
+			else if(idxSize == nColOut) // offC == 0 implied
+				decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
+			else if(offC == 0 && offR == 0)
+				decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
+			else if(offC == 0)
+				decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
+			else
+				decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+		}
+		else
+			decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+	}
+
+	private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
+		int offC, double[] values) {
+		final double[] c = db.values(0);
+		final int nCols = db.getDim(1);
+		final int colOff = _colIndexes.get(0) + offC;
+		for(int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
+			c[offT] += values[_data.getIndex(i)];
+
+	}
+
+	@Override
+	public AMapToData getMapToData() {
+		return _data;
+	}
+
+	private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
+		int offC, double[] values) {
+		final double[] c = db.values(0);
+		decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, _data);
+	}
+
+	private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru,
+		int offR, double[] values, AMapToData data) {
+		data.decompressToRange(c, rl, ru, offR, values);
+
+	}
+
+	private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
+		double[] values, int nCol) {
+		final double[] c = db.values(0);
+		for(int r = rl; r < ru; r++) {
+			final int start = _data.getIndex(r) * nCol;
+			final int offStart = (offR + r) * nCol;
+			LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
+		}
+	}
+
+	private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
+		double[] values, int nCol, int colOut) {
+		int off = (rl + offR) * colOut;
+		for(int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
+			final double[] c = db.values(offT);
+			final int rowIndex = _data.getIndex(i) * nCol;
+			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+		}
+	}
+
+	private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
+		final int nCol = _colIndexes.size();
+		final int nColU = db.getDim(1);
+		final double[] c = db.values(0);
+		for(int i = rl; i < ru; i++) {
+			final int off = i * nColU;
+			final int rowIndex = _data.getIndex(i) * nCol;
+			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+		}
+	}
+
+	private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
+		double[] values, int nCol) {
+		for(int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+			final double[] c = db.values(offT);
+			final int off = db.pos(offT) + offC;
+			final int rowIndex = _data.getIndex(i) * nCol;
+			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+		}
+	}
+
+	@Override
+	protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+		SparseBlock sb) {
+		for(int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+			final int vr = _data.getIndex(r);
+			if(sb.isEmpty(vr))
+				continue;
+			final int apos = sb.pos(vr);
+			final int alen = sb.size(vr) + apos;
+			final int[] aix = sb.indexes(vr);
+			final double[] aval = sb.values(vr);
+			for(int j = apos; j < alen; j++)
+				ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
+		}
+	}
+
+	@Override
+	protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+		double[] values) {
+		decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
+	}
+
+	protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+		double[] values, int nCol) {
+		for(int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+			final int rowIndex = _data.getIndex(i) * nCol;
+			for(int j = 0; j < nCol; j++)
+				ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
+		}
+	}
+
+	@Override
+	protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
+		for(int i = rl; i < ru; i++) {
+			final int vr = _data.getIndex(i);
+			if(sb.isEmpty(vr))
+				continue;
+			final int apos = sb.pos(vr);
+			final int alen = sb.size(vr) + apos;
+			final int[] aix = sb.indexes(vr);
+			final double[] aval = sb.values(vr);
+			for(int j = apos; j < alen; j++) {
+				final int rowOut = _colIndexes.get(aix[j]);
+				final double[] c = db.values(rowOut);
+				final int off = db.pos(rowOut);
+				c[off + i] += aval[j];
+			}
+		}
+	}
+
+	@Override
+	protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+		final int nCol = _colIndexes.size();
+		for(int j = 0; j < nCol; j++) {
+			final int rowOut = _colIndexes.get(j);
+			final double[] c = db.values(rowOut);
+			final int off = db.pos(rowOut);
+			for(int i = rl; i < ru; i++) {
+				final double v = dict[_data.getIndex(i) * nCol + j];
+				c[off + i] += v;
+			}
+		}
+	}
+
+	@Override
+	protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
+
+		int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+		for(int j = 0; j < _colIndexes.size(); j++)
+			sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+		for(int i = 0; i < _data.size(); i++) {
+			int di = _data.getIndex(i);
+			if(sb.isEmpty(di))
+				continue;
+
+			final int apos = sb.pos(di);
+			final int alen = sb.size(di) + apos;
+			final int[] aix = sb.indexes(di);
+			final double[] aval = sb.values(di);
+
+			for(int j = apos; j < alen; j++) {
+				sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
+			}
+		}
+
+	}
+
+	@Override
+	protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
+		int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+		for(int j = 0; j < _colIndexes.size(); j++)
+			sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+		final int nCol = _colIndexes.size();
+		for(int j = 0; j < nCol; j++) {
+			final int rowOut = _colIndexes.get(j);
+			SparseRow r = sbr.get(rowOut);
+
+			for(int i = 0; i < _data.size(); i++) {
+				final double v = dict[_data.getIndex(i) * nCol + j];
+				r = r.append(i, v);
+			}
+			sbr.set(rowOut, r, false);
+		}
+	}
+
+	@Override
+	public double getIdx(int r, int colIdx) {
+		return _dict.getValue(_data.getIndex(r), colIdx, _colIndexes.size());
+	}
+
+	@Override
+	protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+		for(int rix = rl; rix < ru; rix++)
+			c[rix] += preAgg[_data.getIndex(rix)];
+	}
+
+	@Override
+	protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+		for(int i = rl; i < ru; i++)
+			c[i] = builtin.execute(c[i], preAgg[_data.getIndex(i)]);
+	}
+
+	@Override
+	protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+		for(int rix = rl; rix < ru; rix++)
+			c[rix] *= preAgg[_data.getIndex(rix)];
+	}
+
+	@Override
+	public int[] getCounts(int[] counts) {
+		return _data.getCounts(counts);
+	}
+
+	@Override
+	public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+		if(_colIndexes.size() == 1)
+			leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
+		else
+			lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+	}
+
+	private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+		int cu) {
+		final DenseBlock retV = result.getDenseBlock();
+		final int nColM = matrix.getNumColumns();
+		final int nColRet = result.getNumColumns();
+		final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
+		if(matrix.isEmpty())
+			return;
+		else if(matrix.isInSparseFormat()) {
+			if(cl != 0 || cu != _data.size())
+				lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
+					cu);
+			else
+				lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
+		}
+		else if(!matrix.getDenseBlock().isContiguous())
+			lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru,
+				cl, cu);
+		else
+			lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl,
+				cu);
+	}
+
+	private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+		int rl, int ru) {
+
+		if(retV.isContiguous())
+			lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
+		else
+			lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
+	}
+
+	private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+		double[] vals, int rl, int ru) {
+		final int colOut = _colIndexes.get(0);
+
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int apos = sb.pos(r);
+			final int alen = sb.size(r) + apos;
+			final int[] aix = sb.indexes(r);
+			final double[] aval = sb.values(r);
+			final int offR = ret.pos(r);
+			final double[] retV = ret.values(r);
+
+			for(int i = apos; i < alen; i++)
+				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+		}
+	}
+
+	private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+		double[] vals, int rl, int ru) {
+		final int colOut = _colIndexes.get(0);
+
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int apos = sb.pos(r);
+			final int alen = sb.size(r) + apos;
+			final int[] aix = sb.indexes(r);
+			final double[] aval = sb.values(r);
+			final int offR = r * nColRet;
+			for(int i = apos; i < alen; i++)
+				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+		}
+	}
+
+	private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+		int rl, int ru, int cl, int cu) {
+		if(retV.isContiguous())
+			lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+		else
+			lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+	}
+
+	private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+		double[] vals, int rl, int ru, int cl, int cu) {
+		final int colOut = _colIndexes.get(0);
+
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int apos = sb.pos(r);
+			final int aposSkip = sb.posFIndexGTE(r, cl);
+			final int[] aix = sb.indexes(r);
+			if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+				continue;
+			final int alen = sb.size(r) + apos;
+			final double[] aval = sb.values(r);
+			final int offR = ret.pos(r);
+			final double[] retV = ret.values(r);
+			// final int offR = r * nColRet;
+			for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+		}
+	}
+
+	private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+		double[] vals, int rl, int ru, int cl, int cu) {
+		final int colOut = _colIndexes.get(0);
+
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int apos = sb.pos(r);
+			final int aposSkip = sb.posFIndexGTE(r, cl);
+			final int[] aix = sb.indexes(r);
+			if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+				continue;
+			final int alen = sb.size(r) + apos;
+			final double[] aval = sb.values(r);
+			final int offR = r * nColRet;
+			for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+		}
+	}
+
+	private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
+		double[] vals, int rl, int ru, int cl, int cu) {
+		lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+	}
+
+	private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
+		int rl, int ru, int cl, int cu) {
+		if(retV.isContiguous())
+			lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+		else
+			lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+	}
+
+	private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
+		int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
+		final int colOut = _colIndexes.get(0);
+		for(int r = rl; r < ru; r++) {
+			final int offL = db.pos(r);
+			final double[] mV = db.values(r);
+			final int offR = ret.pos(r);
+			final double[] retV = ret.values(r);
+			for(int c = cl; c < cu; c++)
+				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+		}
+	}
+
+	private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
+		double[] vals, int rl, int ru, int cl, int cu) {
+		final int colOut = _colIndexes.get(0);
+		for(int r = rl; r < ru; r++) {
+			final int offL = r * nColM;
+			final int offR = ret.pos(r);
+			final double[] retV = ret.values(r);
+			for(int c = cl; c < cu; c++)
+				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+		}
+	}
+
+	private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
+		double[] vals, int rl, int ru, int cl, int cu) {
+		final int colOut = _colIndexes.get(0);
+		for(int r = rl; r < ru; r++) {
+			final int offL = r * nColM;
+			final int offR = r * nColRet;
+			for(int c = cl; c < cu; c++)
+				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+		}
+	}
+
+	private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+
+		if(matrix.isInSparseFormat())
+			lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+		else
+			lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+	}
+
+	private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+		int cu) {
+		final DenseBlock db = result.getDenseBlock();
+		final SparseBlock sb = matrix.getSparseBlock();
+
+		if(cl != 0 || cu != _data.size()) {
+			// sub part
+			for(int r = rl; r < ru; r++) {
+				if(sb.isEmpty(r))
+					continue;
+				final double[] retV = db.values(r);
+				final int pos = db.pos(r);
+				lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
+			}
+		}
+		else {
+			for(int r = rl; r < ru; r++)
+				_data.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
+		}
+	}
+
+	private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
+		final int apos = sb.pos(r);
+		final int aposSkip = sb.posFIndexGTE(r, cl);
+		final int[] aix = sb.indexes(r);
+		if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+			return;
+		final int alen = sb.size(r) + apos;
+		final double[] aval = sb.values(r);
+		for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+			_dict.multiplyScalar(aval[i], retV, offR, _data.getIndex(aix[i]), _colIndexes);
+	}
+
+	private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+		if(matrix.getDenseBlock().isContiguous())
+			lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
+		else
+			lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
+	}
+
+	private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+		int cu) {
+		final double[] retV = result.getDenseBlockValues();
+		final int nColM = matrix.getNumColumns();
+		final int nColRet = result.getNumColumns();
+		final double[] mV = matrix.getDenseBlockValues();
+		for(int r = rl; r < ru; r++) {
+			final int offL = r * nColM;
+			final int offR = r * nColRet;
+			for(int c = cl; c < cu; c++)
+				_dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
+		}
+	}
+
+	private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
+		int cu) {
+		final double[] retV = result.getDenseBlockValues();
+		final int nColRet = result.getNumColumns();
+		for(int r = rl; r < ru; r++) {
+			final int offL = db.pos(r);
+			final double[] mV = db.values(r);
+			final int offR = r * nColRet;
+			for(int c = cl; c < cu; c++)
+				_dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
+		}
+	}
+
+	@Override
+	public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
+		_data.preAggregateDense(m, preAgg, rl, ru, cl, cu);
+	}
+
+	@Override
+	public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
+		DenseBlock db = that.getDenseBlock();
+		DenseBlock retDB = ret.getDenseBlock();
+		for(int i = rl; i < ru; i++)
+			leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
+	}
+
+	@Override
+	public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl,
+		int cru) {
+		if(_dict instanceof IdentityDictionary)
+			identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
+		else
+			defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
+	}
+
+	private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+		final double[] b = right.getDenseBlockValues();
+		final double[] c = ret.getDenseBlockValues();
+		final int jd = right.getNumColumns();
+		final DoubleVector vVec = DoubleVector.zero(SPECIES);
+		final int vLen = SPECIES.length();
+		final int lenJ = cru - crl;
+		final int end = cru - (lenJ % vLen);
+		for(int i = rl; i < ru; i++) {
+			int k = _data.getIndex(i);
+			final int offOut = i * jd + crl;
+			final double aa = 1;
+			final int k_right = _colIndexes.get(k);
+			vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+		}
+	}
+
+	private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+		final double[] a = _dict.getValues();
+		final double[] b = right.getDenseBlockValues();
+		final double[] c = ret.getDenseBlockValues();
+		final int kd = _colIndexes.size();
+		final int jd = right.getNumColumns();
+		final DoubleVector vVec = DoubleVector.zero(SPECIES);
+		final int vLen = SPECIES.length();
+
+		final int blkzI = 32;
+		final int blkzK = 24;
+		final int lenJ = cru - crl;
+		final int end = cru - (lenJ % vLen);
+		for(int bi = rl; bi < ru; bi += blkzI) {
+			final int bie = Math.min(ru, bi + blkzI);
+			for(int bk = 0; bk < kd; bk += blkzK) {
+				final int bke = Math.min(kd, bk + blkzK);
+				for(int i = bi; i < bie; i++) {
+					int offi = _data.getIndex(i) * kd;
+					final int offOut = i * jd + crl;
+					for(int k = bk; k < bke; k++) {
+						final double aa = a[offi + k];
+						final int k_right = _colIndexes.get(k);
+						vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+					}
+				}
+			}
+		}
+	}
+
+	final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k,
+		int vLen, DoubleVector vVec) {
+		vVec = vVec.broadcast(aa);
+		final int offj = k * jd;
+		final int end = endT + offj;
+		for(int j = offj + crl; j < end; j += vLen, offOut += vLen) {
+			DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
+			DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
+			res = vVec.fma(bVec, res);
+			res.intoArray(c, offOut);
+		}
+		for(int j = end; j < cru + offj; j++, offOut++) {
+			double bb = b[j];
+			c[offOut] += bb * aa;
+		}
+	}
+
+	@Override
+	public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
+		if(cl != 0 || cu != _data.size()) {
+			throw new NotImplementedException();
+		}
+		_data.preAggregateSparse(sb, preAgg, rl, ru);
+	}
+
+	@Override
+	public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
+		try {
+
+			_data.preAggregateDDC_DDC(that._data, that._dict, ret, that._colIndexes.size());
+		}
+		catch(Exception e) {
+			throw new CompressionException(that.toString(), e);
+		}
+	}
+
+	@Override
+	public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
+		_data.preAggregateDDC_SDCZ(that._data, that._dict, that._indexes, ret, that._colIndexes.size());
+	}
+
+	@Override
+	public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
+		final AOffsetIterator itThat = that._indexes.getOffsetIterator();
+		final int nCol = that._colIndexes.size();
+		final int finalOff = that._indexes.getOffsetToLast();
+		final double[] v = ret.getValues();
+		while(true) {
+			final int to = _data.getIndex(itThat.value());
+			that._dict.addToEntry(v, 0, to, nCol);
+			if(itThat.value() == finalOff)
+				break;
+			itThat.next();
+		}
+	}
+
+	@Override
+	protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
+		_data.preAggregateDDC_RLE(that._ptr, that._data, that._dict, ret, that._colIndexes.size());
+	}
+
+	@Override
+	public boolean sameIndexStructure(AColGroupCompressed that) {
+		return that instanceof ColGroupDDC && ((ColGroupDDC) that)._data == _data;
+	}
+
+	@Override
+	public ColGroupType getColGroupType() {
+		return ColGroupType.DDC;
+	}
+
+	@Override
+	public long estimateInMemorySize() {
+		long size = super.estimateInMemorySize();
+		size += _data.getInMemorySize();
+		return size;
+	}
+
+	@Override
+	public AColGroup scalarOperation(ScalarOperator op) {
+		if((op.fn instanceof Plus || op.fn instanceof Minus)) {
+			final double v0 = op.executeScalar(0);
+			if(v0 == 0)
+				return this;
+			final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
+			return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
+		}
+		return create(_colIndexes, _dict.applyScalarOp(op), _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroup unaryOperation(UnaryOperator op) {
+		return create(_colIndexes, _dict.applyUnaryOp(op), _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
+		IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
+		return create(_colIndexes, ret, _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
+		if((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
+			((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
+			final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
+			return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
+		}
+		final IDictionary ret;
+		if(_colIndexes.size() == 1)
+			ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
+		else
+			ret = _dict.binOpRight(op, v, _colIndexes);
+		return create(_colIndexes, ret, _data, getCachedCounts());
+	}
+
+	@Override
+	public void write(DataOutput out) throws IOException {
+		super.write(out);
+		_data.write(out);
+	}
+
+	public static ColGroupDDC read(DataInput in) throws IOException {
+		IColIndex cols = ColIndexFactory.read(in);
+		IDictionary dict = DictionaryFactory.read(in);
+		AMapToData data = MapToFactory.readIn(in);
+		return new ColGroupDDC(cols, dict, data, null);
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		long ret = super.getExactSizeOnDisk();
+		ret += _data.getExactSizeOnDisk();
+		return ret;
+	}
+
+	@Override
+	public double getCost(ComputationCostEstimator e, int nRows) {
+		final int nVals = getNumValues();
+		final int nCols = getNumCols();
+		return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
+	}
+
+	@Override
+	protected int numRowsToMultiply() {
+		return _data.size();
+	}
+
+	@Override
+	protected double computeMxx(double c, Builtin builtin) {
+		return _dict.aggregate(c, builtin);
+	}
+
+	@Override
+	protected void computeColMxx(double[] c, Builtin builtin) {
+		_dict.aggregateCols(c, builtin, _colIndexes);
+	}
+
+	@Override
+	public boolean containsValue(double pattern) {
+		return _dict.containsValue(pattern);
+	}
+
+	@Override
+	protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
+		if(preAgg != null)
+			return create(colIndexes, preAgg, _data, getCachedCounts());
+		else
+			return null;
+	}
+
+	@Override
+	public AColGroup sliceRows(int rl, int ru) {
+		try {
+			return ColGroupDDC.create(_colIndexes, _dict, _data.slice(rl, ru), null);
+		}
+		catch(Exception e) {
+			throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
+		}
+	}
+
+	@Override
+	protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+		return create(colIndexes, newDictionary, _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroup append(AColGroup g) {
+		if(g instanceof ColGroupDDC) {
+			if(g.getColIndices().equals(_colIndexes)) {
+
+				ColGroupDDC gDDC = (ColGroupDDC) g;
+				if(gDDC._dict.equals(_dict)) {
+					AMapToData nd = _data.append(gDDC._data);
+					return create(_colIndexes, _dict, nd, null);
+				}
+				else
+					LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+			}
+			else
+				LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
+		}
+		else
+			LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
+		return null;
+	}
+
+	@Override
+	public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
+		for(int i = 1; i < g.length; i++) {
+			if(!_colIndexes.equals(g[i]._colIndexes)) {
+				LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
+				return null;
+			}
+
+			if(!(g[i] instanceof ColGroupDDC)) {
+				LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
+				return null;
+			}
+
+			final ColGroupDDC gDDC = (ColGroupDDC) g[i];
+			if(!gDDC._dict.equals(_dict)) {
+				LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+				return null;
+			}
+		}
+		AMapToData nd = _data.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
+		return create(_colIndexes, _dict, nd, null);
+	}
+
+	@Override
+	public ICLAScheme getCompressionScheme() {
+		return DDCScheme.create(this);
+	}
+
+	@Override
+	public AColGroup recompress() {
+		return this;
+	}
+
+	@Override
+	public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
+		try {
+
+			IEncode enc = getEncoding();
+			EstimationFactors ef = new EstimationFactors(_data.getUnique(), _data.size(), _data.size(),
+				_dict.getSparsity());
+			return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
+		}
+		catch(Exception e) {
+			throw new DMLCompressionException(this.toString(), e);
+		}
+	}
+
+	@Override
+	public IEncode getEncoding() {
+		return EncodingFactory.create(_data);
+	}
+
+	@Override
+	protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
+		return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), _data, getCachedCounts());
+	}
+
+	@Override
+	public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+		final SparseBlock sb = selection.getSparseBlock();
+		final SparseBlock retB = ret.getSparseBlock();
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int sPos = sb.pos(r);
+			final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+			decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+		}
+	}
+
+	@Override
+	protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+		// morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
+		final SparseBlock sb = selection.getSparseBlock();
+		final DenseBlock retB = ret.getDenseBlock();
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int sPos = sb.pos(r);
+			final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+			decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+		}
+	}
+
+	private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
+		int cu) {
+		IdentityDictionary a = (IdentityDictionary) _dict;
+		if(_colIndexes instanceof RangeIndex)
+			leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
+		else {
+
+			pos += cl; // left side matrix position offset.
+			if(a.withEmpty()) {
+				final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+				for(int rc = cl; rc < cu; rc++, pos++) {
+					final int idx = _data.getIndex(rc);
+					if(idx != nVal)
+						values2[pos2 + _colIndexes.get(idx)] += values[pos];
+				}
+			}
+			else {
+				for(int rc = cl; rc < cu; rc++, pos++)
+					values2[pos2 + _colIndexes.get(_data.getIndex(rc))] += values[pos];
+			}
+		}
+	}
+
+	private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2,
+		int pos2, int cl, int cu) {
+		IdentityDictionary a = (IdentityDictionary) _dict;
+
+		final int firstCol = pos2 + _colIndexes.get(0);
+		pos += cl; // left side matrix position offset.
+		if(a.withEmpty()) {
+			final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+			for(int rc = cl; rc < cu; rc++, pos++) {
+				final int idx = _data.getIndex(rc);
+				if(idx != nVal)
+					values2[firstCol + idx] += values[pos];
+			}
+		}
+		else {
+			for(int rc = cl; rc < cu; rc++, pos++)
+				values2[firstCol + _data.getIndex(rc)] += values[pos];
+		}
+	}
+
+	@Override
+	public AColGroup morph(CompressionType ct, int nRow) {
+		// return this;
+		if(ct == getCompType())
+			return this;
+		else if(ct == CompressionType.SDC) {
+			// return this;
+			int[] counts = getCounts();
+			int maxId = maxIndex(counts);
+			double[] def = _dict.getRow(maxId, _colIndexes.size());
+
+			int offsetSize = nRow - counts[maxId];
+			int[] offsets = new int[offsetSize];
+			AMapToData reducedData = MapToFactory.create(offsetSize, _data.getUnique());
+			int o = 0;
+			for(int i = 0; i < nRow; i++) {
+				int v = _data.getIndex(i);
+				if(v != maxId) {
+					offsets[o] = i;
+					reducedData.set(o, v);
+					o++;
+				}
+			}
+
+			return ColGroupSDC.create(_colIndexes, _data.size(), _dict, def, OffsetFactory.createOffset(offsets),
+				reducedData, null);
+		}
+		else if(ct == CompressionType.CONST) {
+			// if(1 < getNumValues()) {
+			String thisS = this.toString();
+			if(thisS.length() > 10000)
+				thisS = thisS.substring(0, 10000) + "...";
+			LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
+			return this;
+			// }
+		}
+		else if(ct == CompressionType.DDCFOR)
+			return this; // it does not make sense to change to FOR.
+		else
+			return super.morph(ct, nRow);
+	}
+
+	private static int maxIndex(int[] counts) {
+		int id = 0;
+		for(int i = 1; i < counts.length; i++) {
+			if(counts[i] > counts[id]) {
+				id = i;
+			}
+		}
+		return id;
+	}
+
+	@Override
+	public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
+		final IDictionary combined = combineDictionaries(nCol, right);
+		final IColIndex combinedColIndex = combineColIndexes(nCol, right);
+		return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
+		IDictionary b = ((ColGroupDDC) right).getDictionary();
+		IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
+		IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
+		return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
+		AMapToData[] maps = _data.splitReshapeDDC(multiplier);
+		AColGroup[] res = new AColGroup[multiplier];
+		for(int i = 0; i < multiplier; i++) {
+			final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+			res[i] = create(ci, _dict, maps[i], null);
+		}
+		return res;
+	}
+
+	@Override
+	public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
+		throws Exception {
+		AMapToData[] maps = _data.splitReshapeDDCPushDown(multiplier, pool);
+		AColGroup[] res = new AColGroup[multiplier];
+		for(int i = 0; i < multiplier; i++) {
+			final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+			res[i] = create(ci, _dict, maps[i], null);
+		}
+		return res;
+	}
+
+	@Override
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		sb.append(super.toString());
+		sb.append(String.format("\n%15s", "Data: "));
+		sb.append(_data);
+		return sb.toString();
+	}
+
+	@Override
+	protected boolean allowShallowIdentityRightMult() {
+		return true;
+	}
+
+	public AColGroup convertToDeltaDDC() {
+		int numCols = _colIndexes.size();
+		int numRows = _data.size();
+
+		DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64));
+		double[] rowDelta = new double[numCols];
+		double[] prevRow = new double[numCols];
+		DblArray dblArray = new DblArray(rowDelta);
+		int[] rowToDictId = new int[numRows];
+
+		double[] dictVals = _dict.getValues();
+
+		for(int i = 0; i < numRows; i++) {
+			int dictIdx = _data.getIndex(i);
+			int off = dictIdx * numCols;
+			for(int j = 0; j < numCols; j++) {
+				double val = dictVals[off + j];
+				if(i == 0) {
+					rowDelta[j] = val;
+					prevRow[j] = val;
+				}
+				else {
+					rowDelta[j] = val - prevRow[j];
+					prevRow[j] = val;
+				}
+			}
+
+			rowToDictId[i] = map.increment(dblArray);
+		}
+
+		if(map.size() == 0)
+			return new ColGroupEmpty(_colIndexes);
+
+		ACount<DblArray>[] vals = map.extractValues();
+		final int nVals = vals.length;
+		final double[] dictValues = new double[nVals * numCols];
+		final int[] oldIdToNewId = new int[map.size()];
+		int idx = 0;
+		for(int i = 0; i < nVals; i++) {
+			final ACount<DblArray> dac = vals[i];
+			final double[] arrData = dac.key().getData();
+			System.arraycopy(arrData, 0, dictValues, idx, numCols);
+			oldIdToNewId[dac.id] = i;
+			idx += numCols;
+		}
+
+		DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols);
+		AMapToData newData = MapToFactory.create(numRows, nVals);
+		for(int i = 0; i < numRows; i++) {
+			newData.set(i, oldIdToNewId[rowToDictId[i]]);
+		}
+		return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null);
+	}
+
+	public AColGroup convertToDDCLZW() {
+		return ColGroupDDCLZW.create(_colIndexes, _dict, _data, null);
+	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 50c37936943..a8c279828fb 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -22,9 +22,7 @@
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
+import java.util.*;
 import java.util.concurrent.ExecutorService;
 
 import jdk.incubator.vector.DoubleVector;
@@ -71,578 +69,659 @@
 import shaded.parquet.it.unimi.dsi.fastutil.ints.IntArrayList;
 import shaded.parquet.it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
 
-import java.util.Map;
 import java.util.HashMap;
-import java.util.Stack;
 
 /**
  * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC) whose
- * mapping vector is additionally lzw compressed.
- * Idea:
- * - DDCLZW stores the mapping vector exclusively in compressed form.
- * - No persistent MapToData cache is maintained.
- * - Sequential operations decode on-the-fly, while operations requiring random access explicitly materialize and fall back to DDC.
+ * mapping vector is additionally lzw compressed. Idea: - DDCLZW stores the mapping vector exclusively in compressed
+ * form. - No persistent MapToData cache is maintained. - Sequential operations decode on-the-fly, while operations
+ * requiring random access explicitly materialize and fall back to DDC.
  */
 public class ColGroupDDCLZW extends APreAgg implements IMapToDataGroup {
-    private static final long serialVersionUID = -5769772089913918987L;
-
-    private final int[] _dataLZW; // LZW compressed representation of the mapping
-    private final int _nRows; // Number of rows in the mapping vector
-    private final int _nUnique; // Number of unique values in the mapping vector
-
-    // Builds a packed 64-bit key for (prefixCode(w), nextSymbol(k)) pairs used in the LZW dictionary. (TODO)
-    private static long packKey(int prefixCode, int nextSymbol) {
-        return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
-    }
-
-    // Compresses a mapping (AMapToData) into an LZW-compressed byte/integer/? array. (TODO)
-    private static int[] compress(AMapToData data) {
-        if (data == null)
-            throw new IllegalArgumentException("Invalid input: data is null");
-
-        final int nRows = data.size();
-        if (nRows <= 0) {
-            throw new IllegalArgumentException("Invalid input: data has no rows");
-        }
-
-        final int nUnique = data.getUnique();
-        if (nUnique <= 0) {
-            throw new IllegalArgumentException("Invalid input: data has no unique values");
-        }
-
-        // Fast-path: single symbol
-        if (nRows == 1)
-            return new int[]{data.getIndex(0)};
-
-
-        // LZW dictionary. Maps (prefixCode, nextSymbol) -> newCode (to a new code).
-        // Using fastutil keeps lookups fast. (TODO improve time/space complexity)
-        final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
-        dict.defaultReturnValue(-1);
-
-        // Output buffer (heuristic capacity; avoids frequent reallocs)
-        final IntArrayList out = new IntArrayList(Math.max(16, nRows / 2));
-
-        // Codes {0,...,nUnique - 1} are reserved for the original symbols.
-        int nextCode = nUnique;
-
-        // Initialize w with the first input symbol.
-        // AMapToData stores dictionary indices, not actual data values.
-        // Since indices reference positions in an IDictionary, they are always in the valid index range 0 … nUnique−1;
-        int w = data.getIndex(0);
-
-        // Process the remaining input symbols.
-        // Example: _data = [2,0,2,3,0,2,1,0,2].
-        for (int i = 1; i < nRows; i++) {
-            final int k = data.getIndex(i); // next input symbol
-
-            if (k < 0 || k >= nUnique)
-                throw new IllegalArgumentException("Symbol out of range: " + k + " (nUnique=" + nUnique + ")");
-
-            final long key = packKey(w, k); // encode (w,k) into long key
-
-            int wk = dict.get(key); // look if wk exists in dict
-            if (wk != -1) {
-                w = wk; // wk exists in dict so replace w by wk and continue.
-            } else {
-                // wk does not exist in dict. output current phrase, add new phrase, restart at k
-                out.add(w);
-                dict.put(key, nextCode++);
-                w = k; // Start new phrase with k
-            }
-        }
-
-        out.add(w);
-        return out.toIntArray();
-    }
-
-    // Unpack upper 32 bits (w) of (w,k) key pair.
-    private static int unpackfirst(long key) {
-        return (int) (key >>> 32);
-    }
-
-    // Unpack lower 32 bits (k) of (w,k) key pair.
-    private static int unpacksecond(long key) {
-        return (int) (key);
-    }
-
-    // Append symbol to end of int-array.
-    private static int[] packint(int[] arr, int last) {
-        int[] result = Arrays.copyOf(arr, arr.length + 1);
-        result[arr.length] = last;
-        return result;
-    }
-
-    // Reconstruct phrase to lzw-code.
-    private static int[] unpack(int code, int nUnique, Map<Integer, Long> dict) {
-        // Base symbol (implicit alphabet)
-        if (code < nUnique)
-            return new int[]{code};
-
-        Stack<Integer> stack = new Stack<>();
-        int c = code;
-
-        while (c >= nUnique) {
-            Long key = dict.get(c);
-            if (key == null)
-                throw new IllegalStateException("Missing dictionary entry for code: " + c);
-
-            int symbol = unpacksecond(key);
-            stack.push(symbol);
-            c = unpackfirst(key);
-        }
-
-        // Basissymbol
-        stack.push(c);
-        int[] outarray = new int[stack.size()];
-        int i = 0;
-        // korrekt ins Output schreiben
-        while (!stack.isEmpty()) {
-            outarray[i++] = stack.pop();
-        }
-        return outarray;
-    }
-
-    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form until index.
-    // TODO: Compatibility with compress() and used data structures. Improve time/space complexity.
-    private static AMapToData decompress(int[] codes, int nUnique, int nRows, int index) {
-        // Validate input arguments.
-        if (codes == null)
-            throw new IllegalArgumentException("codes is null");
-        if (codes.length == 0)
-            throw new IllegalArgumentException("codes is empty");
-        if (nUnique <= 0)
-            throw new IllegalArgumentException("Invalid alphabet size: " + nUnique);
-        if (nRows <= 0) {
-            throw new IllegalArgumentException("Invalid nRows: " + nRows);
-        }
-        if (index > nRows) {
-            throw new IllegalArgumentException("Index is larger than Data Length: " + index);
-        }
-
-        // Return empty Map if index is zero.
-        if (index == 0)
-            return MapToFactory.create(0, nUnique);
-
-        // Maps: code -> packKey(prefixCode, lastSymbolOfPhrase).
-        // Base symbols (0..nUnique-1) are implicit and not stored here.
-        final Map<Integer, Long> dict = new HashMap<>();
-
-        // Output mapping that will be reconstructed.
-        AMapToData out = MapToFactory.create(index, nUnique);
-        int outPos = 0; // Current write position in the output mapping.
-
-        // Decode the first code. The first code always expands to a valid phrase without needing
-        // any dictionary entries.
-        int old = codes[0];
-        int[] oldPhrase = unpack(old, nUnique, dict);
-
-        for (int v : oldPhrase) {
-            if (outPos == index) break;
-            out.set(outPos++, v);
-        }
-
-        // Next free dictionary code. Codes 0..nUnique-1 are reserved for base symbols.
-        int nextCode = nUnique;
-
-        // Process remaining codes.
-        for (int i = 1; i < codes.length; i++) {
-            int key = codes[i];
-
-            int[] next;
-            if (key < nUnique || dict.containsKey(key)) {
-                // Normal case: The code is either a base symbol or already present in the dictionary.
-                next = unpack(key, nUnique, dict);
-            } else {
-                // KwKwK special case: The current code refers to a phrase that is being defined right now.
-                // next = oldPhrase + first(oldPhrase).
-                int first = oldPhrase[0];
-                next = packint(oldPhrase, first);
-            }
-
-            // Append the reconstructed phrase to the output mapping.
-            for (int v : next) {
-                out.set(outPos++, v);
-                if (outPos == index)
-                    // Stop immediately once done.
-                    return out;
-            }
-
-            // Add new phrase to dictionary: nextCode -> (old, firstSymbol(next)).
-            final int first = next[0];
-            dict.put(nextCode++, packKey(old, first));
-
-            // Advance.
-            old = key;
-            oldPhrase = next;
-        }
-
-        // Safety check: decoder must produce exactly nRows symbols.
-        if (outPos != index)
-            throw new IllegalStateException("Decompression length mismatch: got " + outPos + " expected " + index);
-
-        // Return the reconstructed mapping.
-        return out;
-    }
-
-    // Build Constructor: Used when creating a new DDCLZW instance during compression/build time. (TODO)
-    private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-        super(colIndexes, dict, cachedCounts);
-
-        // Derive metadadata
-        _nRows = data.size();
-        _nUnique = dict.getNumberOfValues(colIndexes.size());
-
-        // Compress mapping to LZW
-        _dataLZW = compress(data);
-
-        if (CompressedMatrixBlock.debug) {
-            if (getNumValues() == 0)
-                throw new DMLCompressionException("Invalid construction with empty dictionary");
-            if (_nRows == 0)
-                throw new DMLCompressionException("Invalid length of the data. is zero");
-            if (data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
-                        + dict.getNumberOfValues(colIndexes.size()));
-            int[] c = getCounts();
-            if (c.length != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid DDC Construction");
-            data.verify();
-        }
-    }
-
-    // Read Constructor: Used when creating this group from a serialized form (e.g., reading a compressed matrix from disk/memory stream). (TODO)
-    private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, int[] dataLZW, int nRows, int nUnique, int[] cachedCounts) {
-        super(colIndexes, dict, cachedCounts);
-
-        _dataLZW = dataLZW;
-        _nRows = nRows;
-        _nUnique = nUnique;
-
-        if (CompressedMatrixBlock.debug) {
-            if (getNumValues() == 0)
-                throw new DMLCompressionException("Invalid construction with empty dictionary");
-            if (_nRows <= 0)
-                throw new DMLCompressionException("Invalid length of the data. is zero");
-            if (_nUnique != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid map to dict Map has:" + _nUnique + " while dict has "
-                        + dict.getNumberOfValues(colIndexes.size()));
-            int[] c = getCounts();
-            if (c.length != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid DDC Construction");
-
-            // Optional: validate that decoding works (expensive)
-            // AMapToData decoded = decode(_dataLZW, _nRows, _nUnique);
-            // decoded.verify();
-        }
-    }
-
-    // Factory method for creating a column group. (AColGroup g = ColGroupDDCLZW.create(...);)
-    public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-        if (dict == null)
-            return new ColGroupEmpty(colIndexes);
-        else if (data.getUnique() == 1)
-            return ColGroupConst.create(colIndexes, dict);
-        else
-            return new ColGroupDDCLZW(colIndexes, dict, data, cachedCounts);
-    }
-
-    /*
-     * TODO: Operations with complex access patterns shall be uncompressed to ddc format.
-     *  ... return ColGroupDDC.create(...,decompress(_dataLZW),...). We need to decide which methods are
-     *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
-     * */
-
-    public AColGroup convertToDDC() {
-        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, _nRows);
-        final int[] counts = getCounts(); // may be null depending on your group
-        return ColGroupDDC.create(_colIndexes, _dict, map, counts);
-    }
-
-    public AColGroup convertToDDC(int index) {
-        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, index);
-        final int[] counts = getCounts(); // may be null depending on your group
-        return ColGroupDDC.create(_colIndexes, _dict, map, counts);
-    }
-
-    // Deserialize ColGroupDDCLZW object in binary stream.
-    public static ColGroupDDCLZW read(DataInput in) throws IOException {
-        final IColIndex colIndexes = ColIndexFactory.read(in);
-        final IDictionary dict = DictionaryFactory.read(in);
-
-        // Metadata for lzw mapping.
-        final int nRows = in.readInt();
-        final int nUnique = in.readInt();
-
-        // Read compressed mapping array.
-        final int len = in.readInt();
-        if (len < 0)
-            throw new IOException("Invalid LZW data length: " + len);
-
-        final int[] dataLZW = new int[len];
-        for (int i = 0; i < len; i++)
-            dataLZW[i] = in.readInt();
-
-        // cachedCounts currently not serialized (mirror ColGroupDDC.read which passes null)
-        return new ColGroupDDCLZW(colIndexes, dict, dataLZW, nRows, nUnique, null);
-    }
-
-    // Serialize a ColGroupDDC-object into binary stream.
-    @Override
-    public void write(DataOutput out) throws IOException {
-        _colIndexes.write(out);
-        _dict.write(out);
-        out.writeInt(_nRows);
-        out.writeInt(_nUnique);
-        out.writeInt(_dataLZW.length);
-        for (int i : _dataLZW) out.writeInt(i);
-    }
-
-    @Override
-    public double getIdx(int r, int colIdx) {
-        // TODO: soll schnell sein
-        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, r);
-        // TODO: ColumnIndex
-        return map.getIndex(r);
-    }
-
-    @Override
-    public CompressionType getCompType() {
-        return CompressionType.DDCLZW;
-    }
-
-    @Override
-    protected ColGroupType getColGroupType() {
-        return ColGroupType.DDCLZW;
-    }
-
-    @Override
-    public boolean containsValue(double pattern) {
-        return _dict.containsValue(pattern);
-    }
-
-    @Override
-    public double getCost(ComputationCostEstimator e, int nRows) {
-        final int nVals = getNumValues();
-        final int nCols = getNumCols();
-        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
-    }
-
-    @Override
-    public ICLAScheme getCompressionScheme() {
-        //TODO: in ColGroupDDCFor nicht implementiert - sollen wir das erstellen? Inhalt: ncols wie DDC
-        throw new NotImplementedException();
-    }
-
-    @Override
-    protected int numRowsToMultiply() {
-        return _nRows;
-    }
-
-    @Override
-    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
-        return new ColGroupDDCLZW(colIndexes, newDictionary, _dataLZW, _nRows, _nUnique, getCachedCounts());
-    }
-
-    @Override
-    public AMapToData getMapToData() {
-        throw new NotImplementedException(); // or decompress and return data... decompress(_dataLZW, _nUnique, _nRows, _nRows)
-    }
-
-    @Override
-    public boolean sameIndexStructure(AColGroupCompressed that) {
-        return that instanceof ColGroupDDCLZW && ((ColGroupDDCLZW) that)._dataLZW == _dataLZW;
-    }
-
-    @Override
-    protected double computeMxx(double c, Builtin builtin) {
-        return _dict.aggregate(c, builtin);
-    }
-
-    @Override
-    protected void computeColMxx(double[] c, Builtin builtin) {
-        _dict.aggregateCols(c, builtin, _colIndexes);
-    }
-
-    @Override
-    public AColGroup sliceRows(int rl, int ru) {
-        try {
-            AMapToData map = decompress(_dataLZW, _nUnique, _nRows, ru);
-            return ColGroupDDCLZW.create(_colIndexes, _dict, map.slice(rl, ru), null);
-        } catch (Exception e) {
-            throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
-        }
-    }
-
-
-    @Override
-    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
-
-    }
-
-    @Override
-    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
-
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict, int nColOut) {
-
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
-
-    }
-
-    @Override
-    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, SparseBlock sb) {
-
-    }
-
-    @Override
-    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) {
+	private static final long serialVersionUID = -5769772089913918987L;
+
+	private final int[] _dataLZW; // LZW compressed representation of the mapping
+	private final int _nRows; // Number of rows in the mapping vector
+	private final int _nUnique; // Number of unique values in the mapping vector
+
+	// Builds a packed 64-bit key for (prefixCode(w), nextSymbol(k)) pairs used in the LZW dictionary. (TODO)
+	private static long packKey(int prefixCode, int nextSymbol) {
+		return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
+	}
+
+	// Compresses a mapping (AMapToData) into an LZW-compressed byte/integer/? array.
+	private static int[] compress(AMapToData data) {
+		if(data == null)
+			throw new IllegalArgumentException("Invalid input: data is null");
+
+		final int nRows = data.size();
+		if(nRows <= 0) {
+			throw new IllegalArgumentException("Invalid input: data has no rows");
+		}
+
+		final int nUnique = data.getUnique();
+		if(nUnique <= 0) {
+			throw new IllegalArgumentException("Invalid input: data has no unique values");
+		}
+
+		// Fast-path: single symbol
+		if(nRows == 1)
+			return new int[] {data.getIndex(0)};
+
+		// LZW dictionary. Maps (prefixCode, nextSymbol) -> newCode (to a new code).
+		// Using fastutil keeps lookups fast. (TODO improve time/space complexity)
+		final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
+		dict.defaultReturnValue(-1);
+
+		// Output buffer (heuristic capacity; avoids frequent reallocs)
+		final IntArrayList out = new IntArrayList(Math.max(16, nRows / 2));
+
+		// Codes {0,...,nUnique - 1} are reserved for the original symbols.
+		int nextCode = nUnique;
+
+		// Initialize w with the first input symbol.
+		// AMapToData stores dictionary indices, not actual data values.
+		// Since indices reference positions in an IDictionary, they are always in the valid index range 0 … nUnique−1;
+		int w = data.getIndex(0);
+
+		// Process the remaining input symbols.
+		// Example: _data = [2,0,2,3,0,2,1,0,2].
+		for(int i = 1; i < nRows; i++) {
+			final int k = data.getIndex(i); // next input symbol
+
+			if(k < 0 || k >= nUnique)
+				throw new IllegalArgumentException("Symbol out of range: " + k + " (nUnique=" + nUnique + ")");
+
+			final long key = packKey(w, k); // encode (w,k) into long key
+
+			int wk = dict.get(key); // look if wk exists in dict
+			if(wk != -1) {
+				w = wk; // wk exists in dict so replace w by wk and continue.
+			}
+			else {
+				// wk does not exist in dict. output current phrase, add new phrase, restart at k
+				out.add(w);
+				dict.put(key, nextCode++);
+				w = k; // Start new phrase with k
+			}
+		}
+
+		out.add(w);
+		return out.toIntArray();
+	}
+
+	// Unpack upper 32 bits (w) of (w,k) key pair.
+	private static int unpackfirst(long key) {
+		return (int) (key >>> 32);
+	}
+
+	// Unpack lower 32 bits (k) of (w,k) key pair.
+	private static int unpacksecond(long key) {
+		return (int) (key);
+	}
+
+	// Append symbol to end of int-array.
+	private static int[] packint(int[] arr, int last) {
+		int[] result = Arrays.copyOf(arr, arr.length + 1);
+		result[arr.length] = last;
+		return result;
+	}
+
+	// Reconstruct phrase to lzw-code.
+	private static int[] unpack(int code, int nUnique, Map<Integer, Long> dict) {
+		// Base symbol (implicit alphabet)
+		if(code < nUnique)
+			return new int[] {code};
+
+		Stack<Integer> stack = new Stack<>();
+		int c = code;
+
+		while(c >= nUnique) {
+			Long key = dict.get(c);
+			if(key == null)
+				throw new IllegalStateException("Missing dictionary entry for code: " + c);
+
+			int symbol = unpacksecond(key);
+			stack.push(symbol);
+			c = unpackfirst(key);
+		}
+
+		// Basissymbol
+		stack.push(c);
+		int[] outarray = new int[stack.size()];
+		int i = 0;
+		// korrekt ins Output schreiben
+		while(!stack.isEmpty()) {
+			outarray[i++] = stack.pop();
+		}
+		return outarray;
+	}
+
+	// Decompresses an LZW-compressed vector into its pre-compressed AMapToData form until index.
+	private static AMapToData decompressFull(int[] codes, int nUnique, int nRows) {
+		return decompress(codes, nUnique, nRows, nRows);
+	}
+
+	private final class LZWMappingIterator {
+		private final Map<Integer, Long> dict = new HashMap<>(); // LZW-dictionary. Maps code -> (prefixCode, nextSymbol).
+		private int lzwIndex = 0; // Current position in the LZW-compressed mapping (_dataLZW).
+		private int mapIndex = 0; // Number of mapping symbols returned so far.
+		private int nextCode = _nUnique; // Next free LZW code.
+		private int[] currentPhrase = null; // Current phrase being decoded from the LZW-compressed mapping.
+		private int currentPhraseIndex = 0; // Next position in the current phrase to return.
+		private int[] oldPhrase = null; // Previous phrase.
+		private int oldCode = -1; // Previous code.
+
+		LZWMappingIterator() {
+			lzwIndex = 1; // First code consumed during initialization.
+			oldCode = _dataLZW[0]; // Decode the first code into initial phrase.
+			oldPhrase = unpack(oldCode, _nUnique, dict);
+			currentPhrase = oldPhrase;
+			currentPhraseIndex = 0;
+			mapIndex = 0; // No mapping symbols have been returned yet.
+		}
+
+		// True if there are more mapping symbols to decode.
+		boolean hasNext() {
+			return mapIndex < _nRows;
+		}
+
+		int next() {
+			if(!hasNext())
+				throw new NoSuchElementException();
+
+			// If the current phrase still has symbols, return the next symbol from it.
+			if(currentPhraseIndex < currentPhrase.length) {
+				mapIndex++;
+				return currentPhrase[currentPhraseIndex++];
+			}
+
+			// Otherwises decode the next code into a new phrase.
+			if(lzwIndex >= _dataLZW.length)
+				throw new IllegalStateException("Invalid LZW index: " + lzwIndex);
+
+			final int key = _dataLZW[lzwIndex++];
+
+			final int[] next;
+			if(key < _nUnique || dict.containsKey(key)) {
+				next = unpack(key, _nUnique,
+					dict); // Normal case: The code is either a base symbol or already present in the dictionary.
+			}
+			else {
+				next = packint(oldPhrase, oldPhrase[0]); // Special case.
+			}
+
+			// Add new phrase to dictionary: nextCode -> (oldCode, firstSymbol(next)).
+			dict.put(nextCode++, packKey(oldCode, next[0]));
+
+			// Advance decoder state.
+			oldCode = key;
+			oldPhrase = next;
+
+			// Start returning symbols from the newly decoded phrase.
+			currentPhrase = next;
+			currentPhraseIndex = 0;
+			
+			mapIndex++;
+			return currentPhrase[currentPhraseIndex++];
+		}
+	}
+
+	// Decompresses an LZW-compressed vector into its pre-compressed AMapToData form until index.
+	private static AMapToData decompress(int[] codes, int nUnique, int nRows, int index) {
+		// Validate input arguments.
+		if(codes == null)
+			throw new IllegalArgumentException("codes is null");
+		if(codes.length == 0)
+			throw new IllegalArgumentException("codes is empty");
+		if(nUnique <= 0)
+			throw new IllegalArgumentException("Invalid alphabet size: " + nUnique);
+		if(nRows <= 0) {
+			throw new IllegalArgumentException("Invalid nRows: " + nRows);
+		}
+		if(index > nRows) {
+			throw new IllegalArgumentException("Index is larger than Data Length: " + index);
+		}
+
+		// Return empty Map if index is zero.
+		if(index == 0)
+			return MapToFactory.create(0, nUnique);
+
+		// Maps: code -> packKey(prefixCode, lastSymbolOfPhrase).
+		// Base symbols (0..nUnique-1) are implicit and not stored here.
+		final Map<Integer, Long> dict = new HashMap<>();
+
+		// Output mapping that will be reconstructed.
+		AMapToData out = MapToFactory.create(index, nUnique);
+		int outPos = 0; // Current write position in the output mapping.
+
+		// Decode the first code. The first code always expands to a valid phrase without needing
+		// any dictionary entries.
+		int old = codes[0];
+		int[] oldPhrase = unpack(old, nUnique, dict);
+
+		for(int v : oldPhrase) {
+			if(outPos == index)
+				break;
+			out.set(outPos++, v);
+		}
+
+		// Next free dictionary code. Codes 0..nUnique-1 are reserved for base symbols.
+		int nextCode = nUnique;
+
+		// Process remaining codes.
+		for(int i = 1; i < codes.length; i++) {
+			int key = codes[i];
+
+			int[] next;
+			if(key < nUnique || dict.containsKey(key)) {
+				// Normal case: The code is either a base symbol or already present in the dictionary.
+				next = unpack(key, nUnique, dict);
+			}
+			else {
+				// KwKwK special case: The current code refers to a phrase that is being defined right now.
+				// next = oldPhrase + first(oldPhrase).
+				int first = oldPhrase[0];
+				next = packint(oldPhrase, first);
+			}
+
+			// Append the reconstructed phrase to the output mapping.
+			for(int v : next) {
+				out.set(outPos++, v);
+				if(outPos == index)
+					// Stop immediately once done.
+					return out;
+			}
+
+			// Add new phrase to dictionary: nextCode -> (old, firstSymbol(next)).
+			final int first = next[0];
+			dict.put(nextCode++, packKey(old, first));
+
+			// Advance.
+			old = key;
+			oldPhrase = next;
+		}
+
+		// Safety check: decoder must produce exactly nRows symbols.
+		if(outPos != index)
+			throw new IllegalStateException("Decompression length mismatch: got " + outPos + " expected " + index);
+
+		// Return the reconstructed mapping.
+		return out;
+	}
+
+	// Build Constructor: Used when creating a new DDCLZW instance during compression/build time. (TODO)
+	private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+		super(colIndexes, dict, cachedCounts);
+
+		// Derive metadadata
+		_nRows = data.size();
+		_nUnique = dict.getNumberOfValues(colIndexes.size());
+
+		// Compress mapping to LZW
+		_dataLZW = compress(data);
+
+		if(CompressedMatrixBlock.debug) {
+			if(getNumValues() == 0)
+				throw new DMLCompressionException("Invalid construction with empty dictionary");
+			if(_nRows == 0)
+				throw new DMLCompressionException("Invalid length of the data. is zero");
+			if(data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException(
+					"Invalid map to dict Map has:" + data.getUnique() + " while dict has " +
+						dict.getNumberOfValues(colIndexes.size()));
+			int[] c = getCounts();
+			if(c.length != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException("Invalid DDC Construction");
+			data.verify();
+		}
+	}
+
+	// Read Constructor: Used when creating this group from a serialized form (e.g., reading a compressed matrix from disk/memory stream). (TODO)
+	private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, int[] dataLZW, int nRows, int nUnique,
+		int[] cachedCounts) {
+		super(colIndexes, dict, cachedCounts);
+
+		_dataLZW = dataLZW;
+		_nRows = nRows;
+		_nUnique = nUnique;
+
+		if(CompressedMatrixBlock.debug) {
+			if(getNumValues() == 0)
+				throw new DMLCompressionException("Invalid construction with empty dictionary");
+			if(_nRows <= 0)
+				throw new DMLCompressionException("Invalid length of the data. is zero");
+			if(_nUnique != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException("Invalid map to dict Map has:" + _nUnique + " while dict has " +
+					dict.getNumberOfValues(colIndexes.size()));
+			int[] c = getCounts();
+			if(c.length != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException("Invalid DDC Construction");
+		}
+	}
+
+	// Factory method for creating a column group. (AColGroup g = ColGroupDDCLZW.create(...);)
+	public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+		if(dict == null)
+			return new ColGroupEmpty(colIndexes);
+		else if(data.getUnique() == 1)
+			return ColGroupConst.create(colIndexes, dict);
+		else
+			return new ColGroupDDCLZW(colIndexes, dict, data, cachedCounts);
+	}
+
+	/*
+	 * TODO: Operations with complex access patterns shall be uncompressed to ddc format.
+	 *  ... return ColGroupDDC.create(...,decompress(_dataLZW),...). We need to decide which methods are
+	 *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
+	 * */
+
+	public AColGroup convertToDDC() {
+		final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, _nRows);
+		final int[] counts = getCounts(); // may be null depending on your group
+		return ColGroupDDC.create(_colIndexes, _dict, map, counts);
+	}
+
+	public AColGroup convertToDDC(int index) {
+		final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, index);
+		final int[] counts = getCounts(); // may be null depending on your group
+		return ColGroupDDC.create(_colIndexes, _dict, map, counts);
+	}
+
+	// Deserialize ColGroupDDCLZW object in binary stream.
+	public static ColGroupDDCLZW read(DataInput in) throws IOException {
+		final IColIndex colIndexes = ColIndexFactory.read(in);
+		final IDictionary dict = DictionaryFactory.read(in);
+
+		// Metadata for lzw mapping.
+		final int nRows = in.readInt();
+		final int nUnique = in.readInt();
+
+		// Read compressed mapping array.
+		final int len = in.readInt();
+		if(len < 0)
+			throw new IOException("Invalid LZW data length: " + len);
+
+		final int[] dataLZW = new int[len];
+		for(int i = 0; i < len; i++)
+			dataLZW[i] = in.readInt();
+
+		// cachedCounts currently not serialized (mirror ColGroupDDC.read which passes null)
+		return new ColGroupDDCLZW(colIndexes, dict, dataLZW, nRows, nUnique, null);
+	}
+
+	// Serialize a ColGroupDDC-object into binary stream.
+	@Override
+	public void write(DataOutput out) throws IOException {
+		_colIndexes.write(out);
+		_dict.write(out);
+		out.writeInt(_nRows);
+		out.writeInt(_nUnique);
+		out.writeInt(_dataLZW.length); // TODO: correct ?
+		for(int i : _dataLZW)
+			out.writeInt(i);
+	}
+
+	@Override
+	public double getIdx(int r, int colIdx) {
+		// TODO: soll schnell sein
+		final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, r);
+		// TODO: ColumnIndex
+		return map.getIndex(r);
+	}
+
+	@Override
+	public CompressionType getCompType() {
+		return CompressionType.DDCLZW;
+	}
+
+	@Override
+	protected ColGroupType getColGroupType() {
+		return ColGroupType.DDCLZW;
+	}
+
+	@Override
+	public boolean containsValue(double pattern) {
+		return _dict.containsValue(pattern);
+	}
+
+	@Override
+	public double getCost(ComputationCostEstimator e, int nRows) {
+		final int nVals = getNumValues();
+		final int nCols = getNumCols();
+		return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
+	}
+
+	@Override
+	public ICLAScheme getCompressionScheme() {
+		//TODO: in ColGroupDDCFor nicht implementiert - sollen wir das erstellen? Inhalt: ncols wie DDC
+		throw new NotImplementedException();
+	}
+
+	@Override
+	protected int numRowsToMultiply() {
+		return _nRows;
+	}
+
+	@Override
+	protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+		return new ColGroupDDCLZW(colIndexes, newDictionary, _dataLZW, _nRows, _nUnique, getCachedCounts());
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		long ret = super.getExactSizeOnDisk();
+		ret += 4; // _nRows size
+		ret += 4; // _nUnique size
+		ret += 4; // dataLZW.length
+		ret += (long) _dataLZW.length * 4; //lzw codes
+		return ret;
+	}
+
+	@Override
+	public AMapToData getMapToData() {
+		throw new NotImplementedException(); // or decompress and return data... decompress(_dataLZW, _nUnique, _nRows, _nRows)
+	}
+
+	@Override
+	public boolean sameIndexStructure(AColGroupCompressed that) {
+		return that instanceof ColGroupDDCLZW && ((ColGroupDDCLZW) that)._dataLZW == _dataLZW;
+	}
+
+	@Override
+	protected double computeMxx(double c, Builtin builtin) {
+		return _dict.aggregate(c, builtin);
+	}
+
+	@Override
+	protected void computeColMxx(double[] c, Builtin builtin) {
+		_dict.aggregateCols(c, builtin, _colIndexes);
+	}
+
+	@Override
+	public AColGroup sliceRows(int rl, int ru) {
+		try {
+			AMapToData map = decompress(_dataLZW, _nUnique, _nRows, ru);
+			return ColGroupDDCLZW.create(_colIndexes, _dict, map.slice(rl, ru), null);
+		}
+		catch(Exception e) {
+			throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
+		}
+	}
+
+	@Override
+	protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
+
+	}
+
+	@Override
+	protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+
+	}
+
+	@Override
+	protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict,
+		int nColOut) {
+
+	}
+
+	@Override
+	protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
+
+	}
+
+	@Override
+	protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+		SparseBlock sb) {
+
+	}
+
+	@Override
+	protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+		double[] values) {
 
-    }
+	}
 
-    @Override
-    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, SparseBlock sb) {
+	@Override
+	protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+		SparseBlock sb) {
 
-    }
+	}
 
-    @Override
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, double[] values) {
+	@Override
+	protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+		double[] values) {
 
-    }
+	}
 
-    @Override
-    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+	@Override
+	public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
 
-    }
+	}
 
-    @Override
-    public AColGroup scalarOperation(ScalarOperator op) {
-        return null;
-    }
+	@Override
+	public AColGroup scalarOperation(ScalarOperator op) {
+		return null;
+	}
 
-    @Override
-    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
-        return null;
-    }
+	@Override
+	public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
+		return null;
+	}
 
-    @Override
-    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
-        return null;
-    }
+	@Override
+	public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
+		return null;
+	}
 
-    @Override
-    public AColGroup unaryOperation(UnaryOperator op) {
-        return null;
-    }
+	@Override
+	public AColGroup unaryOperation(UnaryOperator op) {
+		return null;
+	}
 
-    @Override
-    public AColGroup append(AColGroup g) {
-        return null;
-    }
+	@Override
+	public AColGroup append(AColGroup g) {
+		return null;
+	}
 
-    @Override
-    protected AColGroup appendNInternal(AColGroup[] groups, int blen, int rlen) {
-        return null;
-    }
+	@Override
+	protected AColGroup appendNInternal(AColGroup[] groups, int blen, int rlen) {
+		return null;
+	}
 
-    @Override
-    public AColGroup recompress() {
-        return null;
-    }
+	@Override
+	public AColGroup recompress() {
+		return null;
+	}
 
-    @Override
-    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
-        return null;
-    }
+	@Override
+	public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
+		return null;
+	}
 
-    @Override
-    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
-        return null;
-    }
+	@Override
+	protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
+		return null;
+	}
 
-    @Override
-    protected void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+	@Override
+	protected void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
 
-    }
+	}
 
-    @Override
-    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+	@Override
+	protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
 
-    }
+	}
 
-    @Override
-    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
-        return new AColGroup[0];
-    }
+	@Override
+	public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
+		return new AColGroup[0];
+	}
 
-    @Override
-    protected boolean allowShallowIdentityRightMult() {
-        return false;
-    }
+	@Override
+	protected boolean allowShallowIdentityRightMult() {
+		return false;
+	}
 
-    @Override
-    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
-        return null;
-    }
+	@Override
+	protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
+		return null;
+	}
 
-    @Override
-    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
+	@Override
+	public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
 
-    }
+	}
 
-    @Override
-    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
+	@Override
+	public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
 
-    }
+	}
 
-    @Override
-    protected void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
+	@Override
+	protected void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
 
-    }
+	}
 
-    @Override
-    protected void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
+	@Override
+	protected void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
 
-    }
+	}
 
-    @Override
-    protected void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
+	@Override
+	protected void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
 
-    }
+	}
 
-    @Override
-    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
+	@Override
+	protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
 
-    }
+	}
 
-    @Override
-    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
+	@Override
+	public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
 
-    }
+	}
 
-    @Override
-    protected int[] getCounts(int[] out) {
-        return new int[0];
-    }
+	@Override
+	protected int[] getCounts(int[] out) {
+		return new int[0]; // If returns exeption test wont work.
+	}
 
-    @Override
-    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+	@Override
+	protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
 
-    }
+	}
 
-    @Override
-    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+	@Override
+	protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
 
-    }
+	}
 
-    @Override
-    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+	@Override
+	protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
 
-    }
+	}
 }
 
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java
new file mode 100644
index 00000000000..dfc83673a90
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.component.compress.colgroup;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.EnumSet;
+
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.compress.CompressionSettings;
+import org.apache.sysds.runtime.compress.CompressionSettingsBuilder;
+import org.apache.sysds.runtime.compress.colgroup.AColGroup;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupIO;
+import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
+import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
+import org.apache.sysds.runtime.compress.estim.ComEstExact;
+import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo;
+import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
+import org.apache.sysds.runtime.functionobjects.Builtin;
+import org.apache.sysds.runtime.functionobjects.Divide;
+import org.apache.sysds.runtime.functionobjects.Equals;
+import org.apache.sysds.runtime.functionobjects.Multiply;
+import org.apache.sysds.runtime.functionobjects.GreaterThan;
+import org.apache.sysds.runtime.functionobjects.Minus;
+import org.apache.sysds.runtime.functionobjects.Plus;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.matrix.operators.RightScalarOperator;
+import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
+import org.apache.sysds.runtime.matrix.operators.UnaryOperator;
+import org.apache.sysds.runtime.util.DataConverter;
+import org.junit.Test;
+
+public class ColGroupDDCLZWTest {
+	protected static final Log LOG = LogFactory.getLog(ColGroupDDCLZWTest.class.getName());
+
+	// TODO: use csb instead of create.
+	/*CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0)
+		.setValidCompressions(EnumSet.of(AColGroup.CompressionType.DDCLZW))
+		.setTransposeInput("false");
+	CompressionSettings cs = csb.create();
+
+	final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getColGroupInfo(colIndexes);
+	CompressedSizeInfo csi = new CompressedSizeInfo(cgi);
+	AColGroup cg = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0);*/
+	
+}
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index 4f02ce97ae7..dd06226e093 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -36,272 +36,265 @@
 
 public class ColGroupDDCTest {
 
-    protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName());
-
-    @Test
-    public void testConvertToDDCLZWBasic() {
-        // TODO: neue Methode zum Vergleich
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
-        Dictionary dict = Dictionary.create(dictValues);
-
-        int[] src = new int[]{
-                // repeating base pattern
-                0,0,2, 0, 2, 1, 0, 2, 1, 0, 2,
-                2, 0, 2, 1, 0, 2, 1, 0, 2,
-
-                // variation / shifted pattern
-                1, 0, 1, 2, 0, 1, 2, 0, 1,
-                1, 0, 1, 2, 0, 1, 2, 0, 1,
-
-                // longer runs (good for phrase growth)
-                2, 2, 2, 2, 2,
-                0, 0, 0, 0, 0,
-                1, 1, 1, 1, 1,
-
-                // mixed noise
-                2, 1, 0, 2, 1, 0, 2, 1, 0,
-                0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
-
-                // repeating tail (tests dictionary reuse)
-                2, 0, 2, 1, 0, 2, 1, 0, 2,
-                2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1
-        };
-
-        final int nRows = src.length;
-        final int nUnique = 3;
-        AMapToData data = MapToFactory.create(nRows, nUnique);
-        for (int i = 0; i < nRows; i++)
-            data.set(i, src[i]);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDDCLZW();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDDCLZW);
-
-        ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) result;
-        AColGroup ddclzwDecompressed = ddclzw.convertToDDC();
-
-        assertNotNull(ddclzwDecompressed);
-        assertTrue(ddclzwDecompressed instanceof ColGroupDDC);
-
-        ColGroupDDC ddc2 = (ColGroupDDC) ddclzwDecompressed;
-
-        AMapToData d1 = ddc.getMapToData();
-        AMapToData d2 = ddc2.getMapToData();
-
-        assertEquals(d1.size(), d2.size());
-        assertEquals(d1.getUnique(), d2.getUnique());
-        for (int i = 0; i < d1.size(); i++)
-            assertEquals("mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
-
-        assertEquals(ddc.getColIndices(), ddc2.getColIndices());
-
-        // Testen der Teildekompression:
-        // Index entspricht der Anzahl der Zeichen, die dekodiert werden sollen (0 bis Index-1)
-        int index = 10;
-        ColGroupDDC ddcIndex = (ColGroupDDC) ddclzw.convertToDDC(index);
-
-        AMapToData d3 = ddcIndex.getMapToData();
-        assertEquals(index, d3.size());
-        assertEquals(ddc.getColIndices(), ddcIndex.getColIndices());
-
-        for(int i = 0; i < index; i++){
-            assertEquals(d1.getIndex(i), d3.getIndex(i));
-        }
-
-        // Testen von SliceRows
-        int low = 3;
-        int high = 10;
-        AColGroup slice = ddclzw.sliceRows(low, high);
-        if(slice instanceof ColGroupDDCLZW ddclzwslice){
-            ColGroupDDC ddcSlice = (ColGroupDDC) ddclzwslice.convertToDDC();
-            ColGroupDDC ddcSlice2 = (ColGroupDDC) ddc.sliceRows(low, high);
-
-            AMapToData d4 = ddcSlice.getMapToData();
-            AMapToData d5 = ddcSlice2.getMapToData();
-
-            assertEquals(d5.size(), d4.size());
-            assertEquals(d5.getUnique(), d4.getUnique());
-
-            for (int i = 0; i < d4.size(); i++)
-                assertEquals("mapping mismatch at row " + i, d4.getIndex(i), d5.getIndex(i));
-        }
-
-    }
-
-    @Test
-    public void testConvertToDeltaDDCBasic() {
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(3, 3);
-        data.set(0, 0);
-        data.set(1, 1);
-        data.set(2, 2);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(3, 2, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-        assertEquals(10.0, mb.get(0, 0), 0.0);
-        assertEquals(20.0, mb.get(0, 1), 0.0);
-        assertEquals(11.0, mb.get(1, 0), 0.0);
-        assertEquals(21.0, mb.get(1, 1), 0.0);
-        assertEquals(12.0, mb.get(2, 0), 0.0);
-        assertEquals(22.0, mb.get(2, 1), 0.0);
-    }
-
-    @Test
-    public void testConvertToDeltaDDCSingleColumn() {
-        IColIndex colIndexes = ColIndexFactory.create(1);
-        double[] dictValues = new double[]{1.0, 2.0, 3.0, 4.0, 5.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(5, 5);
-        for (int i = 0; i < 5; i++)
-            data.set(i, i);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(5, 1, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 5);
-
-        assertEquals(1.0, mb.get(0, 0), 0.0);
-        assertEquals(2.0, mb.get(1, 0), 0.0);
-        assertEquals(3.0, mb.get(2, 0), 0.0);
-        assertEquals(4.0, mb.get(3, 0), 0.0);
-        assertEquals(5.0, mb.get(4, 0), 0.0);
-    }
-
-    @Test
-    public void testConvertToDeltaDDCWithRepeatedValues() {
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{10.0, 20.0, 10.0, 20.0, 10.0, 20.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(3, 3);
-        data.set(0, 0);
-        data.set(1, 1);
-        data.set(2, 2);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(3, 2, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-        assertEquals(10.0, mb.get(0, 0), 0.0);
-        assertEquals(20.0, mb.get(0, 1), 0.0);
-        assertEquals(10.0, mb.get(1, 0), 0.0);
-        assertEquals(20.0, mb.get(1, 1), 0.0);
-        assertEquals(10.0, mb.get(2, 0), 0.0);
-        assertEquals(20.0, mb.get(2, 1), 0.0);
-    }
-
-    @Test
-    public void testConvertToDeltaDDCWithNegativeDeltas() {
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{10.0, 20.0, 8.0, 15.0, 12.0, 25.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(3, 3);
-        data.set(0, 0);
-        data.set(1, 1);
-        data.set(2, 2);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(3, 2, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-        assertEquals(10.0, mb.get(0, 0), 0.0);
-        assertEquals(20.0, mb.get(0, 1), 0.0);
-        assertEquals(8.0, mb.get(1, 0), 0.0);
-        assertEquals(15.0, mb.get(1, 1), 0.0);
-        assertEquals(12.0, mb.get(2, 0), 0.0);
-        assertEquals(25.0, mb.get(2, 1), 0.0);
-    }
-
-    @Test
-    public void testConvertToDeltaDDCWithZeroDeltas() {
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{5.0, 0.0, 5.0, 0.0, 0.0, 5.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(3, 3);
-        data.set(0, 0);
-        data.set(1, 1);
-        data.set(2, 2);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(3, 2, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-        assertEquals(5.0, mb.get(0, 0), 0.0);
-        assertEquals(0.0, mb.get(0, 1), 0.0);
-        assertEquals(5.0, mb.get(1, 0), 0.0);
-        assertEquals(0.0, mb.get(1, 1), 0.0);
-        assertEquals(0.0, mb.get(2, 0), 0.0);
-        assertEquals(5.0, mb.get(2, 1), 0.0);
-    }
-
-    @Test
-    public void testConvertToDeltaDDCMultipleUniqueDeltas() {
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(4, 4);
-        for (int i = 0; i < 4; i++)
-            data.set(i, i);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(4, 2, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 4);
-
-        assertEquals(1.0, mb.get(0, 0), 0.0);
-        assertEquals(2.0, mb.get(0, 1), 0.0);
-        assertEquals(3.0, mb.get(1, 0), 0.0);
-        assertEquals(4.0, mb.get(1, 1), 0.0);
-        assertEquals(5.0, mb.get(2, 0), 0.0);
-        assertEquals(6.0, mb.get(2, 1), 0.0);
-        assertEquals(7.0, mb.get(3, 0), 0.0);
-        assertEquals(8.0, mb.get(3, 1), 0.0);
-    }
+	protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName());
+
+	@Test
+	public void testConvertToDDCLZWBasic() {
+		// TODO: neue Methode zum Vergleich
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
+		Dictionary dict = Dictionary.create(dictValues);
+
+		int[] src = new int[] {
+			// repeating base pattern
+			0, 0, 2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2,
+
+			// variation / shifted pattern
+			1, 0, 1, 2, 0, 1, 2, 0, 1, 1, 0, 1, 2, 0, 1, 2, 0, 1,
+
+			// longer runs (good for phrase growth)
+			2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+
+			// mixed noise
+			2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
+
+			// repeating tail (tests dictionary reuse)
+			2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1};
+
+		final int nRows = src.length;
+		final int nUnique = 3;
+		AMapToData data = MapToFactory.create(nRows, nUnique);
+		for(int i = 0; i < nRows; i++)
+			data.set(i, src[i]);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDDCLZW();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDDCLZW);
+
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) result;
+		AColGroup ddclzwDecompressed = ddclzw.convertToDDC();
+
+		assertNotNull(ddclzwDecompressed);
+		assertTrue(ddclzwDecompressed instanceof ColGroupDDC);
+
+		ColGroupDDC ddc2 = (ColGroupDDC) ddclzwDecompressed;
+
+		AMapToData d1 = ddc.getMapToData();
+		AMapToData d2 = ddc2.getMapToData();
+
+		assertEquals(d1.size(), d2.size());
+		assertEquals(d1.getUnique(), d2.getUnique());
+		for(int i = 0; i < d1.size(); i++)
+			assertEquals("mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
+
+		assertEquals(ddc.getColIndices(), ddc2.getColIndices());
+
+		// Testen der Teildekompression:
+		// Index entspricht der Anzahl der Zeichen, die dekodiert werden sollen (0 bis Index-1)
+		int index = 10;
+		ColGroupDDC ddcIndex = (ColGroupDDC) ddclzw.convertToDDC(index);
+
+		AMapToData d3 = ddcIndex.getMapToData();
+		assertEquals(index, d3.size());
+		assertEquals(ddc.getColIndices(), ddcIndex.getColIndices());
+
+		for(int i = 0; i < index; i++) {
+			assertEquals(d1.getIndex(i), d3.getIndex(i));
+		}
+
+		// Testen von SliceRows
+		int low = 3;
+		int high = 10;
+		AColGroup slice = ddclzw.sliceRows(low, high);
+		if(slice instanceof ColGroupDDCLZW ddclzwslice) {
+			ColGroupDDC ddcSlice = (ColGroupDDC) ddclzwslice.convertToDDC();
+			ColGroupDDC ddcSlice2 = (ColGroupDDC) ddc.sliceRows(low, high);
+
+			AMapToData d4 = ddcSlice.getMapToData();
+			AMapToData d5 = ddcSlice2.getMapToData();
+
+			assertEquals(d5.size(), d4.size());
+			assertEquals(d5.getUnique(), d4.getUnique());
+
+			for(int i = 0; i < d4.size(); i++)
+				assertEquals("mapping mismatch at row " + i, d4.getIndex(i), d5.getIndex(i));
+		}
+
+	}
+
+	@Test
+	public void testConvertToDeltaDDCBasic() {
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(3, 3);
+		data.set(0, 0);
+		data.set(1, 1);
+		data.set(2, 2);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(3, 2, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+		assertEquals(10.0, mb.get(0, 0), 0.0);
+		assertEquals(20.0, mb.get(0, 1), 0.0);
+		assertEquals(11.0, mb.get(1, 0), 0.0);
+		assertEquals(21.0, mb.get(1, 1), 0.0);
+		assertEquals(12.0, mb.get(2, 0), 0.0);
+		assertEquals(22.0, mb.get(2, 1), 0.0);
+	}
+
+	@Test
+	public void testConvertToDeltaDDCSingleColumn() {
+		IColIndex colIndexes = ColIndexFactory.create(1);
+		double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(5, 5);
+		for(int i = 0; i < 5; i++)
+			data.set(i, i);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(5, 1, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 5);
+
+		assertEquals(1.0, mb.get(0, 0), 0.0);
+		assertEquals(2.0, mb.get(1, 0), 0.0);
+		assertEquals(3.0, mb.get(2, 0), 0.0);
+		assertEquals(4.0, mb.get(3, 0), 0.0);
+		assertEquals(5.0, mb.get(4, 0), 0.0);
+	}
+
+	@Test
+	public void testConvertToDeltaDDCWithRepeatedValues() {
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {10.0, 20.0, 10.0, 20.0, 10.0, 20.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(3, 3);
+		data.set(0, 0);
+		data.set(1, 1);
+		data.set(2, 2);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(3, 2, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+		assertEquals(10.0, mb.get(0, 0), 0.0);
+		assertEquals(20.0, mb.get(0, 1), 0.0);
+		assertEquals(10.0, mb.get(1, 0), 0.0);
+		assertEquals(20.0, mb.get(1, 1), 0.0);
+		assertEquals(10.0, mb.get(2, 0), 0.0);
+		assertEquals(20.0, mb.get(2, 1), 0.0);
+	}
+
+	@Test
+	public void testConvertToDeltaDDCWithNegativeDeltas() {
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {10.0, 20.0, 8.0, 15.0, 12.0, 25.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(3, 3);
+		data.set(0, 0);
+		data.set(1, 1);
+		data.set(2, 2);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(3, 2, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+		assertEquals(10.0, mb.get(0, 0), 0.0);
+		assertEquals(20.0, mb.get(0, 1), 0.0);
+		assertEquals(8.0, mb.get(1, 0), 0.0);
+		assertEquals(15.0, mb.get(1, 1), 0.0);
+		assertEquals(12.0, mb.get(2, 0), 0.0);
+		assertEquals(25.0, mb.get(2, 1), 0.0);
+	}
+
+	@Test
+	public void testConvertToDeltaDDCWithZeroDeltas() {
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {5.0, 0.0, 5.0, 0.0, 0.0, 5.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(3, 3);
+		data.set(0, 0);
+		data.set(1, 1);
+		data.set(2, 2);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(3, 2, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+		assertEquals(5.0, mb.get(0, 0), 0.0);
+		assertEquals(0.0, mb.get(0, 1), 0.0);
+		assertEquals(5.0, mb.get(1, 0), 0.0);
+		assertEquals(0.0, mb.get(1, 1), 0.0);
+		assertEquals(0.0, mb.get(2, 0), 0.0);
+		assertEquals(5.0, mb.get(2, 1), 0.0);
+	}
+
+	@Test
+	public void testConvertToDeltaDDCMultipleUniqueDeltas() {
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(4, 4);
+		for(int i = 0; i < 4; i++)
+			data.set(i, i);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(4, 2, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 4);
+
+		assertEquals(1.0, mb.get(0, 0), 0.0);
+		assertEquals(2.0, mb.get(0, 1), 0.0);
+		assertEquals(3.0, mb.get(1, 0), 0.0);
+		assertEquals(4.0, mb.get(1, 1), 0.0);
+		assertEquals(5.0, mb.get(2, 0), 0.0);
+		assertEquals(6.0, mb.get(2, 1), 0.0);
+		assertEquals(7.0, mb.get(3, 0), 0.0);
+		assertEquals(8.0, mb.get(3, 1), 0.0);
+	}
 }