From 7cdc84189b796cf888274a26aa35c4889602ccfe Mon Sep 17 00:00:00 2001 From: Rostislav Kirillov Date: Thu, 18 Jan 2024 03:40:49 +0400 Subject: [PATCH] feat: better unpacker --- src/SyncFaction.Toolbox/Archiver.cs | 177 +++++++++++------- src/SyncFaction.Toolbox/Args/Get.cs | 2 +- src/SyncFaction.Toolbox/Args/Unpack.cs | 50 ++++- .../Models/UnpackSettings.cs | 2 +- 4 files changed, 149 insertions(+), 82 deletions(-) diff --git a/src/SyncFaction.Toolbox/Archiver.cs b/src/SyncFaction.Toolbox/Archiver.cs index c03ea54..4f99d01 100644 --- a/src/SyncFaction.Toolbox/Archiver.cs +++ b/src/SyncFaction.Toolbox/Archiver.cs @@ -55,32 +55,59 @@ public async Task Unpack(UnpackSettings settings, CancellationToken token) var sw = Stopwatch.StartNew(); using var cts = CancellationTokenSource.CreateLinkedTokenSource(token); var metadata = new Metadata(); - var tasks = archivePaths.Select(archivePath => new FileInfo(archivePath)).Select(x => UnpackArchive(x, output, matcher, settings, string.Empty, cts.Token)).ToList(); + var unpackArgsQueue = archivePaths.Select(x => new FileInfo(x)).Select(x => + { + var extension = x.Name.ToLowerInvariant().Split('.').Last(); + var isArchive = KnownArchiveExtensions.Contains(extension); + var isTextureArchive = KnownTextureArchiveExtensions.Contains(extension); + if (isArchive) + { + return new UnpackArgs(ArchiveType.Vpp, x, output, matcher, settings, string.Empty); + } + + if (isTextureArchive) + { + // unpack explicitly given pegs, even if -t is not specified + var (cpu, gpu) = pegArchiver.GetPairFiles(x); + if (cpu is not null && gpu is not null) + { + return new UnpackArgs(ArchiveType.Peg, cpu, output, matcher, settings, string.Empty); + } + } - //var batchSize = Environment.ProcessorCount; - var batchSize = 1; - while (tasks.Any()) + throw new InvalidOperationException($"Unknown archive type [{x.FullName}]"); + }).ToList(); + var runningTasks = new Dictionary>(); + var batchSize = settings.Parallel; + while (unpackArgsQueue.Any()) { token.ThrowIfCancellationRequested(); try { - var batch = tasks.Take(batchSize).ToList(); - var completed = await Task.WhenAny(batch); + var batch = unpackArgsQueue.Take(batchSize).ToList(); + foreach (var x in batch.Where(x => !runningTasks.ContainsKey(x))) + { + runningTasks.Add(x, UnpackArchive(x, cts.Token)); + log.LogDebug("Started new task, running {running}, args {args}", runningTasks.Count, x); + } + var completed = await Task.WhenAny(runningTasks.Values); var result = await completed; metadata.Add(result.RelativePath, result.ArchiveMetadata); - tasks.AddRange(result.MoreTasks); - tasks.Remove(completed); + unpackArgsQueue.AddRange(result.More); + unpackArgsQueue.Remove(result.Args); + runningTasks.Remove(result.Args); } catch (Exception e) { cts.Cancel(); - tasks.Clear(); + unpackArgsQueue.Clear(); log.LogError("Tasks canceled because of exception"); throw; } } - await Task.WhenAll(tasks); + + await Task.WhenAll(runningTasks.Values); if (settings.Metadata) { @@ -109,33 +136,34 @@ private string Serialize(Metadata metadata) return sb.ToString(); } - private async Task UnpackArchive(FileInfo archive, DirectoryInfo output, Matcher matcher, UnpackSettings settings, string relativePath, CancellationToken token) + record UnpackArgs(ArchiveType Type, FileInfo Archive, DirectoryInfo Output, Matcher Matcher, UnpackSettings Settings, string RelativePath); + + enum ArchiveType { - try - { - return await UnpackArchiveInternal(archive, output, matcher, settings, relativePath, token); - } - catch (Exception e) - { - throw new Exception($"Failed {nameof(UnpackArchive)}({archive.FullName}, {output.FullName}, {matcher}, {settings}, {relativePath}, token)", e); - } + Vpp, Peg } - private async Task UnpackTextures(FileInfo archive, DirectoryInfo output, Matcher matcher, UnpackSettings settings, string relativePath, CancellationToken token) + private async Task UnpackArchive(UnpackArgs args, CancellationToken token) { try { - return await UnpackTexturesInternal(archive, output, matcher, settings, relativePath, token); + return args.Type switch + { + ArchiveType.Vpp => await UnpackArchiveInternal(args, token), + ArchiveType.Peg => await UnpackTexturesInternal(args, token), + _ => throw new ArgumentOutOfRangeException() + }; } catch (Exception e) { - throw new Exception($"Failed {nameof(UnpackTextures)}({archive.FullName}, {output.FullName}, {matcher}, {settings}, {relativePath}, token)", e); + throw new Exception($"Failed {nameof(UnpackArchive)}({args.Type}, {args.Archive.FullName}, {args.Output.FullName}, {args.Settings}, {args.RelativePath})", e); } } - private async Task UnpackArchiveInternal(FileInfo archive, DirectoryInfo output, Matcher matcher, UnpackSettings settings, string relativePath, CancellationToken token) + private async Task UnpackArchiveInternal(UnpackArgs args, CancellationToken token) { token.ThrowIfCancellationRequested(); + var (_, archive, output, matcher, settings, relativePath) = args; var outputDir = new DirectoryInfo(Path.Combine(output.FullName, archive.Name)); if (outputDir.Exists) { @@ -159,7 +187,7 @@ private async Task UnpackArchiveInternal(FileInfo archive, Directo var matchedFiles = matcher.Match(vpp.LogicalFiles.Select(x => x.Name)).Files.Select(x => x.Path).ToHashSet(); log.LogInformation("[{archive}]: [{fileGlob}] matched {count} files", archive.Name, settings.FileGlob, matchedFiles.Count); - var tasks = new List>(); + var result = new List(); var metaEntries = new MetaEntries(); foreach (var logicalFile in vpp.LogicalFiles.Where(x => matchedFiles.Contains(x.Name))) { @@ -179,35 +207,34 @@ private async Task UnpackArchiveInternal(FileInfo archive, Directo var isTextureArchive = KnownTextureArchiveExtensions.Contains(extension); await ExtractFile(logicalFile, isXml, outputFile, settings, token); - outputFile.Refresh(); var eHash = await ComputeHash(outputFile); metaEntries.Add(logicalFile.Name, new EntryMetadata(logicalFile.Name, logicalFile.Order, logicalFile.Offset, (ulong) logicalFile.Content.Length, logicalFile.CompressedSize, eHash)); var innerOutputDir = new DirectoryInfo(Path.Combine(outputFile.Directory.FullName, DefaultDir)); if (settings.Recursive && isArchive) { - var task = UnpackArchive(outputFile, innerOutputDir, matcher, settings, archiveRelativePath, token); - tasks.Add(task); + result.Add(new UnpackArgs(ArchiveType.Vpp, outputFile, innerOutputDir, matcher, settings, archiveRelativePath)); } - if (settings.Textures && isTextureArchive) + if (settings.Textures.Any() && isTextureArchive) { - // TODO possible race condition and unpack starts twice? + // NOTE: no race condition because key is always "cpu" file from the pair and tasks are created per unique args key var (cpu, gpu) = pegArchiver.GetPairFiles(outputFile); if (cpu is not null && gpu is not null) { - var task = UnpackTextures(outputFile, innerOutputDir, matcher, settings, archiveRelativePath, token); - tasks.Add(task); + result.Add(new UnpackArgs(ArchiveType.Peg, cpu, innerOutputDir, matcher, settings, archiveRelativePath)); } } } var archiveMetadata = new ArchiveMetadata(vpp.Name, vpp.Mode.ToString(), (ulong) archive.Length, (ulong) matchedFiles.Count, hash, metaEntries); - return new UnpackResult(archiveRelativePath, archiveMetadata, tasks); + return new UnpackResult(archiveRelativePath, archiveMetadata, args, result); } - private async Task UnpackTexturesInternal(FileInfo archive, DirectoryInfo output, Matcher matcher, UnpackSettings settings, string relativePath, CancellationToken token) + private async Task UnpackTexturesInternal(UnpackArgs args, CancellationToken token) { + token.ThrowIfCancellationRequested(); + var (_, archive, output, matcher, settings, relativePath) = args; var outputDir = new DirectoryInfo(Path.Combine(output.FullName, archive.Name)); var (cpu, gpu) = pegArchiver.GetPairFiles(archive); if (cpu is null || gpu is null) @@ -240,37 +267,21 @@ private async Task UnpackTexturesInternal(FileInfo archive, Direct var matchedFiles = matcher.Match(peg.LogicalTextures.Select(x => x.Name)).Files.Select(x => x.Path).ToHashSet(); log.LogInformation("[{archive}]: [{fileGlob}] matched {count} files", archive.Name, settings.FileGlob, matchedFiles.Count); - var tasks = new List>(); + // NOTE: peg containers are not expected to have nested stuff + var result = new List(); var metaEntries = new MetaEntries(); foreach (var logicalTexture in peg.LogicalTextures.Where(x => matchedFiles.Contains(x.Name))) { - var fileName = Path.GetFileNameWithoutExtension(logicalTexture.Name); - // NOTE: names are non-unique - var name = $"{logicalTexture.Order:D4} {fileName}"; - var outputFile = new FileInfo($"{Path.Combine(outputDir.FullName, name)}.dds"); - if (outputFile.Exists) - { - throw new InvalidOperationException($"File [{outputFile.FullName}] exists, can not unpack. Duplicate entries in archive?"); - } - - await ExtractRawTexture(logicalTexture, outputFile, token); - outputFile.Refresh(); - var dHash = await ComputeHash(outputFile); - metaEntries.Add(name, new EntryMetadata(outputFile.Name, logicalTexture.Order, (ulong)logicalTexture.DataOffset, (ulong) logicalTexture.Data.Length, 0, dHash)); - - logicalTexture.Data.Seek(0, SeekOrigin.Begin); - var pngFile = new FileInfo($"{Path.Combine(outputDir.FullName, name)}.png"); - if (pngFile.Exists) + foreach (var textureFormat in args.Settings.Textures) { - throw new InvalidOperationException($"File [{pngFile.FullName}] exists, can not unpack. Duplicate entries in archive?"); + token.ThrowIfCancellationRequested(); + var outputFile = await ExtractTexture(logicalTexture, textureFormat, outputDir, token); + var hash = await ComputeHash(outputFile); + metaEntries.Add(outputFile.Name, new EntryMetadata(outputFile.Name, logicalTexture.Order, (ulong)logicalTexture.DataOffset, (ulong) logicalTexture.Data.Length, 0, hash)); } - await ExtractPngTexture(logicalTexture, pngFile, token); - pngFile.Refresh(); - var pHash = await ComputeHash(pngFile); - metaEntries.Add(pngFile.Name, new EntryMetadata(pngFile.Name, logicalTexture.Order, 0, (ulong) pngFile.Length, 0, pHash)); } - var archiveMetadata = new ArchiveMetadata(peg.Name, "texture", (ulong) archive.Length, (ulong) matchedFiles.Count, $"{cpuHash}_{gpuHash}", metaEntries); - return new UnpackResult(archiveRelativePath, archiveMetadata, tasks); + var archiveMetadata = new ArchiveMetadata(peg.Name, "peg", (ulong) archive.Length, (ulong) matchedFiles.Count, $"{cpuHash}_{gpuHash}", metaEntries); + return new UnpackResult(archiveRelativePath, archiveMetadata, args, result); } private async Task ExtractFile(LogicalFile logicalFile, bool isXml, FileInfo outputFile, UnpackSettings settings, CancellationToken token) @@ -290,21 +301,40 @@ private async Task ExtractFile(LogicalFile logicalFile, bool isXml, FileInfo out { await logicalFile.Content.CopyToAsync(fileStream, token); } + outputFile.Refresh(); } - private async Task ExtractRawTexture(LogicalTexture logicalTexture, FileInfo outputFile, CancellationToken token) - { - await using var fileStream = outputFile.OpenWrite(); - var header = await imageConverter.BuildHeader(logicalTexture, token); - await header.CopyToAsync(fileStream, token); - await logicalTexture.Data.CopyToAsync(fileStream, token); - } - - private async Task ExtractPngTexture(LogicalTexture logicalTexture, FileInfo outputFile, CancellationToken token) + private async Task ExtractTexture(LogicalTexture logicalTexture, TextureFormat format, DirectoryInfo outputDir, CancellationToken token) { - await using var fileStream = outputFile.OpenWrite(); - var image = imageConverter.DecodeFirstFrame(logicalTexture); - await imageConverter.WritePngFile(image, fileStream, token); + var fileName = Path.GetFileNameWithoutExtension(logicalTexture.Name); + // NOTE: names are non-unique + var name = $"{logicalTexture.Order:D4} {fileName}"; + var outputFile = new FileInfo($"{Path.Combine(outputDir.FullName, name)}.{format.ToString().ToLowerInvariant()}"); + if (outputFile.Exists) + { + throw new InvalidOperationException($"File [{outputFile.FullName}] exists, can not unpack. Duplicate entries in archive?"); + } + await using var output = outputFile.OpenWrite(); + switch(format) + { + case TextureFormat.DDS: + var header = await imageConverter.BuildHeader(logicalTexture, token); + await header.CopyToAsync(output, token); + await logicalTexture.Data.CopyToAsync(output, token); + break; + case TextureFormat.PNG: + var image = imageConverter.DecodeFirstFrame(logicalTexture); + await imageConverter.WritePngFile(image, output, token); + break; + case TextureFormat.RAW: + await logicalTexture.Data.CopyToAsync(output, token); + break; + default: + throw new ArgumentOutOfRangeException(nameof(format), format, null); + } + logicalTexture.Data.Seek(0, SeekOrigin.Begin); + outputFile.Refresh(); + return outputFile; } public static async Task ComputeHash(FileInfo file) @@ -341,5 +371,10 @@ public static string ComputeHash(Stream stream) "gvbm_pc", }.ToImmutableHashSet(); - private record UnpackResult(string RelativePath, ArchiveMetadata ArchiveMetadata, IReadOnlyList> MoreTasks); + private record UnpackResult(string RelativePath, ArchiveMetadata ArchiveMetadata, UnpackArgs Args, IReadOnlyList More); + + public enum TextureFormat + { + DDS, PNG, RAW + } } diff --git a/src/SyncFaction.Toolbox/Args/Get.cs b/src/SyncFaction.Toolbox/Args/Get.cs index 83825df..dcfe644 100644 --- a/src/SyncFaction.Toolbox/Args/Get.cs +++ b/src/SyncFaction.Toolbox/Args/Get.cs @@ -41,7 +41,7 @@ public Get() : base(nameof(Get).ToLowerInvariant(), "Extract certain file from v private async Task Handle(string archive, string file, string output, bool xmlFormat, bool force, InvocationContext context, CancellationToken token) { - var settings = new UnpackSettings(archive, file, output, xmlFormat, false, false, false, force); + var settings = new UnpackSettings(archive, file, output, xmlFormat, false, new List(), false, force, 1); var archiver = context.GetHost().Services.GetRequiredService(); await archiver.Unpack(settings, token); return 0; diff --git a/src/SyncFaction.Toolbox/Args/Unpack.cs b/src/SyncFaction.Toolbox/Args/Unpack.cs index ff53339..c3ecf15 100644 --- a/src/SyncFaction.Toolbox/Args/Unpack.cs +++ b/src/SyncFaction.Toolbox/Args/Unpack.cs @@ -2,6 +2,7 @@ using System.CommandLine.Hosting; using System.CommandLine.Invocation; using System.CommandLine.NamingConventionBinder; +using System.Text.Json; using Microsoft.Extensions.DependencyInjection; using SyncFaction.Toolbox.Models; @@ -9,7 +10,8 @@ namespace SyncFaction.Toolbox.Args; public class Unpack : Command { - private readonly Argument archiveArg = new("archive", "vpp_pc to unpack, globs allowed"); + private readonly Argument archiveArg = new("archive", "vpp or peg archive to unpack, globs allowed"); + private readonly Argument fileArg = new("file", () => "*", "file inside archive to extract, globs allowed"); private readonly Argument outputArg = new("output", () => Archiver.DefaultDir, "output path"); private readonly Option xmlFormat = new(new[] @@ -17,28 +19,33 @@ public class Unpack : Command "-x", "--xml-format" }, - "format xml file"); + "format xml-like files (.xtbl .dtdox .gtdox) for readability, some files will become unusable in game"); private readonly Option recursive = new(new[] { "-r", "--recursive" }, - $"unpack nested archives recursively in default subfolder ({Archiver.DefaultDir})"); + $"unpack nested archives (typically .str2_pc) recursively in {Archiver.DefaultDir} subfolder"); - private readonly Option textures = new(new[] + private readonly Option> textures = new(new[] { "-t", "--textures" }, - $"unpack cpeg_pc/cvbm_pc/gpeg_pc/gvbm_pc texture containers"); + () => new List(), + $"unpack textures from containers (.cpeg_pc .cvbm_pc .gpeg_pc .gvbm_pc) in {Archiver.DefaultDir} subfolder. Specify one or more supported formats: dds png raw") + { + ArgumentHelpName = "formats", + AllowMultipleArgumentsPerToken = true + }; private readonly Option metadata = new(new[] { "-m", "--metadata" }, - $"write file with archive information ({Archiver.MetadataFile})"); + $"write {Archiver.MetadataFile} file with archive information: entries, sizes, hashes"); private readonly Option force = new(new[] { @@ -47,21 +54,46 @@ public class Unpack : Command }, "overwrite output if exists"); - public Unpack() : base(nameof(Unpack).ToLowerInvariant(), "Extract vpp_pc to dir") + private readonly Option parallel = new(new[] + { + "-p", + "--parallel" + }, + "number of parallel tasks. Defaults to processor core count. Use 1 for lower RAM usage") { + ArgumentHelpName = "N" + }; + + public override string? Description => @"Extract archive to dir +Supported formats: " + string.Join(" ", Archiver.KnownArchiveExtensions.Concat(Archiver.KnownTextureArchiveExtensions)); + + public Unpack() : base(nameof(Unpack).ToLowerInvariant()) + { + AddArgument(archiveArg); + AddArgument(fileArg); AddArgument(outputArg); AddOption(xmlFormat); AddOption(recursive); AddOption(textures); AddOption(metadata); AddOption(force); + AddOption(parallel); Handler = CommandHandler.Create(Handle); } - private async Task Handle(string archive, string output, bool xmlFormat, bool recursive, bool textures, bool metadata, bool force, InvocationContext context, CancellationToken token) + private async Task Handle(InvocationContext context, CancellationToken token) { - var settings = new UnpackSettings(archive, "*", output, xmlFormat, recursive, textures, metadata, force); + var archive = context.ParseResult.GetValueForArgument(archiveArg); + var file = context.ParseResult.GetValueForArgument(fileArg); + var output = context.ParseResult.GetValueForArgument(outputArg); + var xmlFormat = context.ParseResult.GetValueForOption(this.xmlFormat); + var recursive = context.ParseResult.GetValueForOption(this.recursive); + var textures = context.ParseResult.GetValueForOption(this.textures); + var metadata = context.ParseResult.GetValueForOption(this.metadata); + var force = context.ParseResult.GetValueForOption(this.force); + var parallel = context.ParseResult.GetValueForOption(this.parallel); + var settings = new UnpackSettings(archive, file, output, xmlFormat, recursive, textures, metadata, force, parallel < 1 ? Environment.ProcessorCount : parallel); var archiver = context.GetHost().Services.GetRequiredService(); await archiver.Unpack(settings, token); return 0; diff --git a/src/SyncFaction.Toolbox/Models/UnpackSettings.cs b/src/SyncFaction.Toolbox/Models/UnpackSettings.cs index 36e5a44..9af73a0 100644 --- a/src/SyncFaction.Toolbox/Models/UnpackSettings.cs +++ b/src/SyncFaction.Toolbox/Models/UnpackSettings.cs @@ -1,3 +1,3 @@ namespace SyncFaction.Toolbox.Models; -public record UnpackSettings(string ArchiveGlob, string FileGlob, string OutputPath, bool XmlFormat, bool Recursive, bool Textures, bool Metadata, bool Force); +public record UnpackSettings(string ArchiveGlob, string FileGlob, string OutputPath, bool XmlFormat, bool Recursive, List Textures, bool Metadata, bool Force, int Parallel);