Files
ExPkg/src/Pipeline/ExtractPipeline.cpp

529 lines
21 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "Core/Logger.h"
#include "Core/ProgressBar.h"
#include "Decoder/DxtDecoder.h"
#include "Decoder/PkgExtractor.h"
#include "Decoder/TexDecoder.h"
#include "Encoder/ImageEncoder.h"
#include "ExtractPipeline.h"
#include "IO/StreamReader.h"
#include "IO/StreamWriter.h"
#include "TaskScheduler.h"
#include <algorithm>
#include <chrono>
namespace PKG {
// ─── RAII 计时器 ─────────────────────────────────────────────────
// 构造时记录起始时间,析构时打印耗时(自动选择 ms/s 单位)
class ScopedTimer {
public:
ScopedTimer() : m_Start(std::chrono::steady_clock::now()) {}
~ScopedTimer() {
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - m_Start)
.count();
if (ms < 1000)
Logger::Instance().Info("Time: " + std::to_string(ms) + " ms");
else
Logger::Instance().Info("Time: " + std::to_string(ms / 1000.0) + " s");
}
private:
std::chrono::steady_clock::time_point m_Start;
};
// ─── 辅助函数 ─────────────────────────────────────────────────────
// 就地 DXT 解压并将格式标记为 RGBA8888
// 返回是否实际执行了解压
static bool DecompressDxtInPlace(TexMipMap &mipmap) {
switch (mipmap.Format) {
case MipmapFormat::CompressedDXT5:
DxtDecoder::DecompressImage(mipmap.Width, mipmap.Height, mipmap.Data, DXTFlags::DXT5);
mipmap.Format = MipmapFormat::RGBA8888;
return true;
case MipmapFormat::CompressedDXT3:
DxtDecoder::DecompressImage(mipmap.Width, mipmap.Height, mipmap.Data, DXTFlags::DXT3);
mipmap.Format = MipmapFormat::RGBA8888;
return true;
case MipmapFormat::CompressedDXT1:
DxtDecoder::DecompressImage(mipmap.Width, mipmap.Height, mipmap.Data, DXTFlags::DXT1);
mipmap.Format = MipmapFormat::RGBA8888;
return true;
default:
return false;
}
}
// 通过文件头魔数检测图像格式,返回对应扩展名(不含点)
// 用于修正未识别格式的输出扩展名,避免产生 .unknown 文件
static std::string DetectExtensionByMagic(const std::vector<uint8_t> &data) {
if (data.size() >= 8 && data[0] == 0x89 && data[1] == 0x50 && data[2] == 0x4E && data[3] == 0x47)
return "png"; // \x89PNG
if (data.size() >= 3 && data[0] == 0xFF && data[1] == 0xD8 && data[2] == 0xFF)
return "jpg"; // \xFF\xD8\xFF
if (data.size() >= 6 && data[0] == 0x47 && data[1] == 0x49 && data[2] == 0x46)
return "gif"; // GIF8
if (data.size() >= 2 && data[0] == 0x42 && data[1] == 0x4D)
return "bmp"; // BM
if (data.size() >= 12 && data[0] == 0x52 && data[1] == 0x49 && data[2] == 0x46 && data[3] == 0x46 && data[8] == 0x57 && data[9] == 0x45 && data[10] == 0x42 && data[11] == 0x50)
return "webp"; // RIFF....WEBP
return {};
}
// 判断是否为已编码图像扩展名(直接拷贝无需转码)
static bool IsRawImageFile(const std::string &ext) {
return ext == ".gif" || ext == ".jpg" || ext == ".png" || ext == ".jpeg" || ext == ".webp";
}
// ─── ExtractPipeline 公开接口 ─────────────────────────────────────
ExtractPipeline::ExtractPipeline() = default;
ExtractPipeline::ExtractPipeline(const ExtractConfig &config)
: m_Config(config) {}
Result<void> ExtractPipeline::Run(const std::filesystem::path &inputPath,
const std::filesystem::path &outDir,
bool showProgress,
ExtractStats *stats) {
ScopedTimer timer;
// 记录起始时间和输入文件大小
if (stats) {
stats->startTime = std::chrono::steady_clock::now();
std::error_code ec;
uint64_t sz = std::filesystem::file_size(inputPath, ec);
stats->inputFileSize = ec ? 0 : sz; // 文件不存在时置 0避免垃圾值
}
// 输出路径outDir/<输入文件名(去扩展名)>/
// 这样多次提取不同 pkg 时,输出按文件名分目录存放,互不混淆
std::filesystem::path subDir = outDir / inputPath.stem();
std::string ext = inputPath.extension().string();
Result<void> res;
if (ext == ".pkg" || ext == ".mpkg")
res = RunPkg(inputPath, subDir, showProgress, stats);
else if (ext == ".tex")
res = RunTex(inputPath, subDir, stats);
else
res = Fail(ErrorCode::UnsupportedFormat, "Unsupported file extension: " + ext);
// 统一设置结束时间,确保早期失败时 stats 时间也有效
if (stats)
stats->endTime = std::chrono::steady_clock::now();
return res;
}
// ─── PKG/MPKG 包提取 ──────────────────────────────────────────────
Result<void> ExtractPipeline::RunPkg(const std::filesystem::path &inputPath,
const std::filesystem::path &outDir,
bool showProgress,
ExtractStats *stats) {
StreamReader reader(inputPath);
if (!reader.IsOpen())
return Fail(ErrorCode::FileOpen, "Failed to open file: " + inputPath.string());
// 解析包索引
PkgExtractor extractor;
auto entriesRes = extractor.ParseIndex(reader);
if (!entriesRes)
return Fail(entriesRes.error.code, entriesRes.error.message);
uint32_t offsetPosition = static_cast<uint32_t>(reader.tellg());
// 预创建所有目录,避免多线程并发创建冲突
for (const auto &entry : *entriesRes) {
auto parentDir = (outDir / entry.FullPath).parent_path();
if (!parentDir.empty() && !std::filesystem::exists(parentDir))
std::filesystem::create_directories(parentDir);
}
// 线程数上限:不超过条目数,避免创建无用线程
uint32_t threadCount = m_Config.threadCount;
if (threadCount == 0)
threadCount = std::thread::hardware_concurrency();
threadCount = std::min(threadCount, static_cast<uint32_t>(entriesRes->size()));
if (threadCount == 0)
threadCount = 1;
TaskScheduler scheduler(threadCount);
Logger::Instance().Info("Using " + std::to_string(scheduler.ThreadCount()) + " threads");
// 限制并发 .tex 处理数:.tex 解码 + DXT 解压 + PNG 编码内存开销大,
// 全部线程同时处理大纹理会导致峰值内存过高
Semaphore texSem(4);
// 进度条在解析索引后创建(此时已知总条目数)
// 注册到 Logger日志会通过进度条协调打印
ProgressBar progress(showProgress ? entriesRes->size() : 0);
if (showProgress)
Logger::Instance().SetProgressBar(&progress);
if (stats)
stats->totalEntries.store(entriesRes->size(), std::memory_order_relaxed);
// 提交所有任务到线程池
std::vector<std::future<Result<void>>> futures;
futures.reserve(entriesRes->size());
for (const auto &entry : *entriesRes) {
auto future = scheduler.Submit(
[this, entry, pkgPath = inputPath, offsetPosition, outDir, &texSem, &progress, stats]() -> Result<void> {
auto res = ProcessEntry(entry, pkgPath, offsetPosition, outDir, texSem, stats);
progress.Increment();
if (!res && stats)
stats->AddFailure();
return res;
});
futures.push_back(std::move(future));
}
scheduler.WaitAll();
Logger::Instance().SetProgressBar(nullptr);
if (stats)
stats->endTime = std::chrono::steady_clock::now();
// 收集错误信息
std::vector<std::string> errors;
for (size_t i = 0; i < futures.size(); i++) {
auto res = futures[i].get();
if (!res)
errors.push_back((*entriesRes)[i].FullPath.string() + ": " + res.error.message);
}
if (!errors.empty()) {
std::string msg = "Completed with " + std::to_string(errors.size()) + " errors:\n";
for (const auto &e : errors)
msg += " " + e + "\n";
return Fail(ErrorCode::ReadFailed, msg);
}
return Ok();
}
// ─── 单个 TEX 文件提取 ────────────────────────────────────────────
Result<void> ExtractPipeline::RunTex(const std::filesystem::path &inputPath,
const std::filesystem::path &outDir,
ExtractStats *stats) {
StreamReader reader(inputPath);
if (!reader.IsOpen())
return Fail(ErrorCode::FileOpen, "Failed to open file: " + inputPath.string());
TexDecoder texDecoder;
auto texRes = texDecoder.Decode(reader);
if (!texRes)
return Fail(texRes.error.code, texRes.error.message);
auto &tex = *texRes;
// GIF 需要额外读取帧信息表(位于 TEX 数据末尾)
if (tex.IsGif) {
auto frameRes = ReadGifFrameInfo(tex, reader);
if (!frameRes)
return frameRes;
}
if (stats)
stats->totalEntries.store(1, std::memory_order_relaxed);
// 输出路径outDir/<输入文件名(去扩展名)>/<输入文件名(去扩展名)>.<新扩展名>
// 确保输出目录存在
std::error_code ec;
std::filesystem::create_directories(outDir, ec);
std::filesystem::path outPath = outDir / inputPath.stem();
auto res = EncodeTex(tex, outPath, stats);
if (stats)
stats->endTime = std::chrono::steady_clock::now();
if (!res && stats)
stats->AddFailure();
return res;
}
// ─── GIF 帧信息读取 ───────────────────────────────────────────────
// TEXS 块位于 TEX 文件末尾,包含 GIF 动画的每一帧位置与时长
// 支持 TEXS0001整数坐标和 TEXS0003浮点坐标 + 画布尺寸)两个版本
Result<void> ExtractPipeline::ReadGifFrameInfo(Tex &tex, StreamReader &reader) {
auto magicRes = reader.ReadNString(16);
if (!magicRes)
return Fail(magicRes.error.code, magicRes.error.message);
tex.FrameInfoContainer.Magic = *magicRes;
auto frameCountRes = reader.ReadInt32();
if (!frameCountRes)
return Fail(frameCountRes.error.code, frameCountRes.error.message);
// TEXS0003 额外包含 GIF 画布尺寸
if (tex.FrameInfoContainer.Magic == "TEXS0003") {
auto widthRes = reader.ReadInt32();
if (!widthRes)
return Fail(widthRes.error.code, widthRes.error.message);
tex.FrameInfoContainer.GifWidth = *widthRes;
auto heightRes = reader.ReadInt32();
if (!heightRes)
return Fail(heightRes.error.code, heightRes.error.message);
tex.FrameInfoContainer.GifHeight = *heightRes;
}
// 逐帧读取
for (int i = 0; i < *frameCountRes; i++) {
TexFrameInfo frameInfo{};
auto imageIdRes = reader.ReadInt32();
if (!imageIdRes)
return Fail(imageIdRes.error.code, imageIdRes.error.message);
frameInfo.ImageId = *imageIdRes;
auto frametimeRes = reader.ReadSingle();
if (!frametimeRes)
return Fail(frametimeRes.error.code, frametimeRes.error.message);
frameInfo.Frametime = *frametimeRes;
// TEXS0001: 坐标为整数TEXS0003: 坐标为浮点
if (tex.FrameInfoContainer.Magic == "TEXS0001") {
auto posXRes = reader.ReadInt32();
if (!posXRes)
return Fail(posXRes.error.code, posXRes.error.message);
frameInfo.PosX = static_cast<float>(*posXRes);
auto posYRes = reader.ReadInt32();
if (!posYRes)
return Fail(posYRes.error.code, posYRes.error.message);
frameInfo.PosY = static_cast<float>(*posYRes);
auto widthRes = reader.ReadInt32();
if (!widthRes)
return Fail(widthRes.error.code, widthRes.error.message);
frameInfo.Width = static_cast<float>(*widthRes);
auto widthYRes = reader.ReadInt32();
if (!widthYRes)
return Fail(widthYRes.error.code, widthYRes.error.message);
frameInfo.WidthY = static_cast<float>(*widthYRes);
auto heightXRes = reader.ReadInt32();
if (!heightXRes)
return Fail(heightXRes.error.code, heightXRes.error.message);
frameInfo.HeightX = static_cast<float>(*heightXRes);
auto heightRes = reader.ReadInt32();
if (!heightRes)
return Fail(heightRes.error.code, heightRes.error.message);
frameInfo.Height = static_cast<float>(*heightRes);
} else {
auto posXRes = reader.ReadSingle();
if (!posXRes)
return Fail(posXRes.error.code, posXRes.error.message);
frameInfo.PosX = *posXRes;
auto posYRes = reader.ReadSingle();
if (!posYRes)
return Fail(posYRes.error.code, posYRes.error.message);
frameInfo.PosY = *posYRes;
auto widthRes = reader.ReadSingle();
if (!widthRes)
return Fail(widthRes.error.code, widthRes.error.message);
frameInfo.Width = *widthRes;
auto widthYRes = reader.ReadSingle();
if (!widthYRes)
return Fail(widthYRes.error.code, widthYRes.error.message);
frameInfo.WidthY = *widthYRes;
auto heightXRes = reader.ReadSingle();
if (!heightXRes)
return Fail(heightXRes.error.code, heightXRes.error.message);
frameInfo.HeightX = *heightXRes;
auto heightRes = reader.ReadSingle();
if (!heightRes)
return Fail(heightRes.error.code, heightRes.error.message);
frameInfo.Height = *heightRes;
}
tex.FrameInfoContainer.Frames.push_back(frameInfo);
}
// 兜底:若画布尺寸未设置,取首帧尺寸
if (tex.FrameInfoContainer.GifWidth == 0 || tex.FrameInfoContainer.GifHeight == 0) {
tex.FrameInfoContainer.GifWidth = static_cast<int>(tex.FrameInfoContainer.Frames[0].Width);
tex.FrameInfoContainer.GifHeight = static_cast<int>(tex.FrameInfoContainer.Frames[0].Height);
}
return Ok();
}
// ─── TEX 编码输出 ────────────────────────────────────────────────
// 根据纹理类型选择编码方式:
// - GIF 动画 → EncodeGif逐帧处理
// - 视频纹理 → 原始数据写出MP4
// - DXT 压缩 → 解压后 PNG 编码
// - 已编码图像 → 魔数检测修正扩展名后直接写出
Result<void> ExtractPipeline::EncodeTex(Tex &tex, const std::filesystem::path &outPath, ExtractStats *stats) {
if (tex.ImageContainer.Images.empty())
return Ok();
// GIF 动画:交给 EncodeGif 逐帧处理
if (tex.IsGif) {
std::filesystem::path gifOutPath = outPath;
gifOutPath.replace_extension("gif");
Logger::Instance().Info("Convert: " + gifOutPath.string());
ImageEncoder encoder;
auto res = encoder.EncodeGif(tex, gifOutPath);
if (res && stats)
stats->AddOutput("gif", std::filesystem::file_size(gifOutPath));
return res;
}
auto &sourceMipmap = tex.ImageContainer.Images[0].Mipmaps[0];
MipmapFormat format = tex.IsVideoTexture ? MipmapFormat::VideoMp4 : sourceMipmap.Format;
// 视频纹理:校验 MP4 头并直接写出
if (tex.IsVideoTexture) {
if (sourceMipmap.Data.size() >= 12) {
std::string mp4Magic = std::string(reinterpret_cast<const char *>(&sourceMipmap.Data[4]), 8);
if (mp4Magic != "ftypisom" && mp4Magic != "ftypmsnv" && mp4Magic != "ftypmp42")
Logger::Instance().Error("Warning: Bad MP4 magic header");
} else {
Logger::Instance().Error("Warning: MP4 data too short");
}
std::filesystem::path imgOutPath = outPath;
imgOutPath.replace_extension(GetFileExtension(format));
Logger::Instance().Info("Convert: " + imgOutPath.string());
ImageEncoder encoder;
auto res = encoder.EncodeRaw(sourceMipmap, imgOutPath);
if (res && stats)
stats->AddOutput(imgOutPath.extension().string().substr(1),
std::filesystem::file_size(imgOutPath));
return res;
}
// 非 GIF、非视频尝试 DXT 解压
DecompressDxtInPlace(sourceMipmap);
// DXT 解压后为原始像素R8/RG88/RGBA8888编码为 PNG
if (static_cast<int>(sourceMipmap.Format) >= 1 && static_cast<int>(sourceMipmap.Format) <= 3) {
std::filesystem::path imgOutPath = outPath;
imgOutPath.replace_extension("png");
Logger::Instance().Info("Convert: " + imgOutPath.string());
ImageEncoder encoder;
auto res = encoder.EncodePng(sourceMipmap, imgOutPath);
if (res && stats)
stats->AddOutput("png", std::filesystem::file_size(imgOutPath));
return res;
}
// 原始数据可能是已编码图像PNG/JPEG/GIF/BMP/WEBP
// 通过魔数检测修正扩展名,避免输出 .unknown
std::filesystem::path imgOutPath = outPath;
std::string detectedExt = DetectExtensionByMagic(sourceMipmap.Data);
if (!detectedExt.empty())
imgOutPath.replace_extension(detectedExt);
else
imgOutPath.replace_extension(GetFileExtension(format));
Logger::Instance().Info("Convert: " + imgOutPath.string());
StreamWriter writer(imgOutPath, std::ios::binary);
auto res = writer.WriteBytes(reinterpret_cast<const char *>(sourceMipmap.Data.data()),
static_cast<uint32_t>(sourceMipmap.Data.size()));
if (res && stats)
stats->AddOutput(imgOutPath.extension().string().substr(1),
std::filesystem::file_size(imgOutPath));
return res;
}
// ─── 单条目处理(线程池任务)──────────────────────────────────────
// 每个任务独立打开 PKG 文件并 seek 到条目偏移,避免共享 StreamReader
Result<void> ExtractPipeline::ProcessEntry(const Entry &entry,
const std::filesystem::path &pkgPath,
uint32_t offsetPosition,
const std::filesystem::path &outDir,
Semaphore &texSem,
ExtractStats *stats) {
std::filesystem::path outPath = outDir / entry.FullPath;
StreamReader reader(pkgPath);
if (!reader.IsOpen())
return Fail(ErrorCode::FileOpen, "Failed to open pkg: " + pkgPath.string());
reader.seekg(entry.Offset + offsetPosition);
// .tex 文件:解码 + 编码(内存开销大,需信号量限流)
if (entry.Type == ".tex") {
SemaphoreGuard guard(texSem);
// 读取条目数据到内存,再用内存模式 reader 解码(零拷贝)
std::string texData;
auto readRes = reader.ReadData(texData, entry.Length);
if (!readRes)
return readRes;
StreamReader texReader(std::move(texData));
TexDecoder texDecoder;
auto texRes = texDecoder.Decode(texReader);
if (!texRes)
return Fail(texRes.error.code, texRes.error.message);
auto &tex = *texRes;
if (tex.IsGif) {
auto frameRes = ReadGifFrameInfo(tex, texReader);
if (!frameRes)
return frameRes;
}
return EncodeTex(tex, outPath, stats);
}
// 已编码图像文件:直接拷贝
if (IsRawImageFile(entry.Type)) {
Logger::Instance().Info("Extract: " + outPath.string());
std::string data;
auto readRes = reader.ReadData(data, entry.Length);
if (!readRes)
return readRes;
StreamWriter writer(outPath, std::ios::binary);
auto res = writer.WriteBytes(data.data(), static_cast<uint32_t>(data.size()));
if (res && stats)
stats->AddOutput(outPath.extension().string().substr(1),
std::filesystem::file_size(outPath));
return res;
}
// 其他文本文件(.json/.frag/.vert/.mdl 等)
Logger::Instance().Info("Extract: " + outPath.string());
auto dataRes = reader.ReadStringFileData(entry.Length);
if (!dataRes)
return Fail(dataRes.error.code, dataRes.error.message);
StreamWriter writer(outPath);
auto res = writer.WriteString(*dataRes);
if (res && stats) {
std::string ext = outPath.extension().string();
stats->AddOutput(ext.empty() ? "bin" : ext.substr(1),
std::filesystem::file_size(outPath));
}
return res;
}
} // namespace PKG