Skip to content

Commit

Permalink
libzstd 1.4.5 with dict training fix
Browse files Browse the repository at this point in the history
ZDICT_optimizeTrainFromBuffer_[fast]Cover fails to select dictionary on some inputs: facebook/zstd#2371
  • Loading branch information
dscheg committed Nov 28, 2020
1 parent ce98e63 commit c901529
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 31 deletions.
12 changes: 10 additions & 2 deletions ZstdNet.Tests/Binding_Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -473,10 +473,18 @@ public void CompressAndDecomress_throwsDstSizeTooSmall_Over2GB([Values(false, tr
[Test, Explicit("stress")]
public void TrainDictionaryParallel()
{
var dict = BuildDictionary();
var buffer = Enumerable.Range(0, 100000).Select(i => unchecked((byte)(i * i))).ToArray();
var samples = Enumerable.Range(0, 100)
.Select(i => buffer.Skip(i).Take(200 - i).ToArray())
.ToArray();

var dict = DictBuilder.TrainFromBuffer(samples);
Assert.Greater(dict.Length, 0);
Assert.LessOrEqual(dict.Length, DictBuilder.DefaultDictCapacity);

Enumerable.Range(0, 100000)
.AsParallel().WithDegreeOfParallelism(Environment.ProcessorCount * 4)
.ForAll(_ => Assert.IsTrue(dict.SequenceEqual(BuildDictionary())));
.ForAll(_ => Assert.IsTrue(dict.SequenceEqual(DictBuilder.TrainFromBuffer(samples))));
}

private static byte[] BuildDictionary()
Expand Down
58 changes: 29 additions & 29 deletions ZstdNet/DictBuilder.cs
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
using System;
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using size_t = System.UIntPtr;

namespace ZstdNet
{
public static class DictBuilder
{
public static byte[] TrainFromBuffer(IEnumerable<byte[]> samples, int dictCapacity = DefaultDictCapacity)
{
var ms = new MemoryStream();
var samplesSizes = samples.Select(sample =>
{
ms.Write(sample, 0, sample.Length);
return (size_t)sample.Length;
}).ToArray();

var dictBuffer = new byte[dictCapacity];
var dictSize = (int)ExternMethods
.ZDICT_trainFromBuffer(dictBuffer, (size_t)dictCapacity, ms.ToArray(), samplesSizes, (uint)samplesSizes.Length)
using System.Linq;
using size_t = System.UIntPtr;

namespace ZstdNet
{
public static class DictBuilder
{
public static byte[] TrainFromBuffer(IEnumerable<byte[]> samples, int dictCapacity = DefaultDictCapacity)
{
var ms = new MemoryStream();
var samplesSizes = samples.Select(sample =>
{
ms.Write(sample, 0, sample.Length);
return (size_t)sample.Length;
}).ToArray();

var dictBuffer = new byte[dictCapacity];
var dictSize = (int)ExternMethods
.ZDICT_trainFromBuffer(dictBuffer, (size_t)dictCapacity, ms.GetBuffer(), samplesSizes, (uint)samplesSizes.Length)
.EnsureZdictSuccess();

if (dictCapacity != dictSize)
if(dictCapacity != dictSize)
Array.Resize(ref dictBuffer, dictSize);

return dictBuffer;
}

public const int DefaultDictCapacity = 112640; // Used by zstd utility by default
}
}

return dictBuffer;
}

public const int DefaultDictCapacity = 112640; // Used by zstd utility by default
}
}
Binary file modified ZstdNet/build/x64/libzstd.dll
Binary file not shown.
Binary file modified ZstdNet/build/x86/libzstd.dll
Binary file not shown.

0 comments on commit c901529

Please sign in to comment.