Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
ee9e94f
Second skeleton for ColGroupDDCLZW using the IMapToDataInterface and …
florian-jobs Jan 7, 2026
007611c
Idea:
florian-jobs Jan 8, 2026
b1bf906
More TODOS written and cleaned up project.
florian-jobs Jan 9, 2026
8027458
Dictionary initialisierung für Compress und rudimentäre Implementieru…
Jan 10, 2026
ef3b834
Uebersichtlichkeit verbessert
Jan 10, 2026
9886821
Minor error fixing. Redesigned compress method.
florian-jobs Jan 11, 2026
e0d5d75
Added red/write methods to serialize and deserialize from stream.
florian-jobs Jan 11, 2026
beb4613
Commented code, error handling for compress. next step make compress …
florian-jobs Jan 11, 2026
620e03a
Added first stages of tests. improved compression and decompression a…
florian-jobs Jan 11, 2026
b7911d7
Added convertToDDCLZW() method to ColGroupDDC Class. Added convertToD…
florian-jobs Jan 12, 2026
1dfe91e
Started working on ColGroupDDCLZW's other methods that need to be imp…
florian-jobs Jan 12, 2026
3156863
test commit
florian-jobs Jan 13, 2026
10d5776
[SYSTEMDS-3779] Added new Compression and ColGroup Types DDCLZW.
florian-jobs Jan 13, 2026
3c9e2ed
[SYSTEMDS-3779] Introduce initial ColGroupDDCLZW with LZW-compressed …
florian-jobs Jan 13, 2026
a8df1fe
Decompression to a specific index
Jan 15, 2026
96cb6e9
slice Rows
Jan 16, 2026
a30cc91
[SYSTEMDS-3779] Add imemdiate stop after index certain index in decom…
florian-jobs Jan 16, 2026
d39fad0
[SYSTEMDS-3779] Reverted formatting of ColGroupDDC,ColGroupDDCLZW,Col…
florian-jobs Jan 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public abstract class AColGroup implements Serializable {

/** Public super types of compression ColGroups supported */
public static enum CompressionType {
UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCFOR, DDCFOR, DeltaDDC, LinearFunctional;
UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCFOR, DDCFOR, DeltaDDC, DDCLZW, LinearFunctional;

public boolean isDense() {
return this == DDC || this == CONST || this == DDCFOR || this == DDCFOR;
Expand All @@ -86,7 +86,7 @@ public boolean isSDC() {
* Protected such that outside the ColGroup package it should be unknown which specific subtype is used.
*/
protected static enum ColGroupType {
UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCSingle, SDCSingleZeros, SDCZeros, SDCFOR, DDCFOR, DeltaDDC,
UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCSingle, SDCSingleZeros, SDCZeros, SDCFOR, DDCFOR, DDCLZW, DeltaDDC,
LinearFunctional;
}

Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please revert the indentation to tabs again, to avoid changing the DDC base class.

Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,9 @@ protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, i
throw new DMLCompressionException("Invalid length of the data. is zero");

if(data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
+ dict.getNumberOfValues(colIndexes.size()));
throw new DMLCompressionException(
"Invalid map to dict Map has:" + data.getUnique() + " while dict has " +
dict.getNumberOfValues(colIndexes.size()));
int[] c = getCounts();
if(c.length != dict.getNumberOfValues(colIndexes.size()))
throw new DMLCompressionException("Invalid DDC Construction");
Expand Down Expand Up @@ -175,8 +176,8 @@ private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBl
decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, _data);
}

private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
double[] values, AMapToData data) {
private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru,
int offR, double[] values, AMapToData data) {
data.decompressToRange(c, rl, ru, offR, values);

}
Expand Down Expand Up @@ -375,15 +376,17 @@ private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock r
return;
else if(matrix.isInSparseFormat()) {
if(cl != 0 || cu != _data.size())
lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
cu);
else
lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
}
else if(!matrix.getDenseBlock().isContiguous())
lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
cu);
lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru,
cl, cu);
else
lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl,
cu);
}

private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
Expand Down Expand Up @@ -538,7 +541,8 @@ private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, in
lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
}

private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
int cu) {
final DenseBlock db = result.getDenseBlock();
final SparseBlock sb = matrix.getSparseBlock();

Expand Down Expand Up @@ -618,7 +622,8 @@ public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, i
}

@Override
public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl,
int cru) {
if(_dict instanceof IdentityDictionary)
identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
else
Expand Down Expand Up @@ -672,7 +677,8 @@ private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, i
}
}

final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k,
int vLen, DoubleVector vVec) {
vVec = vVec.broadcast(aa);
final int offj = k * jd;
final int end = endT + offj;
Expand Down Expand Up @@ -985,8 +991,8 @@ private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos,
}
}

private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
int cl, int cu) {
private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2,
int pos2, int cl, int cu) {
IdentityDictionary a = (IdentityDictionary) _dict;

final int firstCol = pos2 + _colIndexes.get(0);
Expand Down Expand Up @@ -1112,13 +1118,13 @@ protected boolean allowShallowIdentityRightMult() {
public AColGroup convertToDeltaDDC() {
int numCols = _colIndexes.size();
int numRows = _data.size();

DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64));
double[] rowDelta = new double[numCols];
double[] prevRow = new double[numCols];
DblArray dblArray = new DblArray(rowDelta);
int[] rowToDictId = new int[numRows];

double[] dictVals = _dict.getValues();

for(int i = 0; i < numRows; i++) {
Expand All @@ -1129,18 +1135,19 @@ public AColGroup convertToDeltaDDC() {
if(i == 0) {
rowDelta[j] = val;
prevRow[j] = val;
} else {
}
else {
rowDelta[j] = val - prevRow[j];
prevRow[j] = val;
}
}

rowToDictId[i] = map.increment(dblArray);
}

if(map.size() == 0)
return new ColGroupEmpty(_colIndexes);

ACount<DblArray>[] vals = map.extractValues();
final int nVals = vals.length;
final double[] dictValues = new double[nVals * numCols];
Expand All @@ -1153,7 +1160,7 @@ public AColGroup convertToDeltaDDC() {
oldIdToNewId[dac.id] = i;
idx += numCols;
}

DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols);
AMapToData newData = MapToFactory.create(numRows, nVals);
for(int i = 0; i < numRows; i++) {
Expand All @@ -1162,4 +1169,7 @@ public AColGroup convertToDeltaDDC() {
return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null);
}

public AColGroup convertToDDCLZW() {
return ColGroupDDCLZW.create(_colIndexes, _dict, _data, null);
}
}
Loading