From d8002f42b3295383edea486bf5eb07eb9b87819b Mon Sep 17 00:00:00 2001 From: devseed Date: Wed, 28 Feb 2024 18:32:34 +0900 Subject: [PATCH] remake ftextcvt, improve log format and optimize code --- .github/workflows/build_pyexe.yml | 3 +- .github/workflows/build_pysrc.yml | 55 ++ README.md | 46 +- project/pyexe_bintext/build_nuitka.bat | 1 - project/pyexe_bintext/build_pyinstaller.bat | 4 +- project/pyexe_bintext/build_viapyenv.bat | 11 - project/pyexe_bintext/requirements.txt | 2 + project/pyexe_ftextcvt/build_nuitka.bat | 10 +- project/pyexe_ftextcvt/build_pyinstaller.bat | 4 +- project/pyexe_ftextcvt/build_viapyenv.bat | 12 - project/pyexe_ftextcvt/requirements.txt | 3 + project/pysrc_all/build_pysrc.sh | 38 +- project/pysrc_all/test_pycli.sh | 30 ++ src/ftextcvt.py | 510 +++++++------------ src/ftextpack.h | 460 ++++++++--------- src/ftextpack.py | 27 +- src/libtext.py | 153 ++++-- src/libutil.py | 422 ++++++++------- test/test_ftextcvt.py | 39 ++ 19 files changed, 959 insertions(+), 871 deletions(-) create mode 100644 .github/workflows/build_pysrc.yml delete mode 100644 project/pyexe_bintext/build_viapyenv.bat create mode 100644 project/pyexe_bintext/requirements.txt delete mode 100644 project/pyexe_ftextcvt/build_viapyenv.bat create mode 100644 project/pyexe_ftextcvt/requirements.txt create mode 100644 project/pysrc_all/test_pycli.sh create mode 100644 test/test_ftextcvt.py diff --git a/.github/workflows/build_pyexe.yml b/.github/workflows/build_pyexe.yml index 2de5470..67e6153 100644 --- a/.github/workflows/build_pyexe.yml +++ b/.github/workflows/build_pyexe.yml @@ -25,8 +25,7 @@ jobs: - name: make python enviroment run: | - python -m pip install nuitka zstandard - python -m pip install python-docx + python -m pip install -r .\project\pyexe_${{ matrix.pyexe_name }}\requirements.txt - name: build single pyexe by nuitka run: | diff --git a/.github/workflows/build_pysrc.yml b/.github/workflows/build_pysrc.yml new file mode 100644 index 0000000..bacfe97 --- /dev/null +++ b/.github/workflows/build_pysrc.yml @@ -0,0 +1,55 @@ +name: build_pysrc +on: + push: {tags: ['v*'] } # Push events to matching v*, i.e. v1.0, v20.15.10 + pull_request: + +permissions: + contents: write + +jobs: + test_pysrc: + runs-on: ubuntu-22.04 + steps: + - name: pull and init + uses: actions/checkout@v3 + with: {submodules: true} + + - name: make enviroment + run: | + pip install python-docx pillow numpy numba + + - name: test pysrc + run: | + cd project/pysrc_all && chmod +x *.sh && ./test_pysrc.sh + + - name: test pycli + run: | + cd project/pysrc_all && chmod +x *.sh && ./test_pycli.sh + + build_pysrc: + runs-on: ubuntu-22.04 + steps: + - name: pull and init + uses: actions/checkout@v3 + with: {submodules: true} + + - name: make enviroment + run: | + sudo apt-get update && sudo apt-get install p7zip-full + pip install python-docx pillow numpy numba + + - name: build pysrc + run: | + cd project/pysrc_all + chmod +x *.sh && bash ./build_pysrc.sh + archive_name=LocalizationPytool_${{ github.ref_name }} + 7z a ${archive_name}.7z build + 7z rn ${archive_name}.7z build ${archive_name} + + - name: create a release + uses: ncipollo/release-action@v1 + if: github.event_name == 'push' + with: + artifacts: "project/pysrc_all/*.7z" + allowUpdates: "true" + token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/README.md b/README.md index 68b4850..4f0d758 100644 --- a/README.md +++ b/README.md @@ -25,20 +25,56 @@ See also, [GalgameReverse](https://github.com/YuriSizuku/GalgameReverse) for spe ## CLI Example +Using ">" to load or save files in zip, such as `path1/file1.zip>path2/file2` +For these examples, you need `mkdir -p project/pyexe_bintext/build` before. + ### bintext You can also replace `python src/libtext.py` with `cbintext.exe` in command line. ```shell + +# insert ftext (save direct or in gz file) python src/libtext.py insert test/sample/COM001 test/sample/COM001.txt --refer test/sample/COM001 -t test/sample/COM001.tbl -o project/pyexe_bintext/build/COM001_rebuild.bin --log_level info --bytes_padding "2020" --bytes_fallback "815A" --insert_shorter --insert_longer --text_replace "季" "季季季" --text_replace "煌びやかな光" "你你你你你" +python src/libtext.py insert test/sample/COM001 test/sample/COM001.txt --refer test/sample/COM001 -t test/sample/COM001.tbl -o project/pyexe_bintext/build/COM001_rebuild.bin.gz --log_level info + +# extract ftext from bin file (save direct or in zip file) python src/libtext.py extract project/pyexe_bintext/build/COM001_rebuild.bin -o project/pyexe_bintext/build/COM001_rebuild.txt --log_level info -e sjis --has_cjk --min_len 4 --skip 0x16 --size 1024 +python src/libtext.py extract project/pyexe_bintext/build/COM001_rebuild.bin -o "project/pyexe_bintext/build/COM001.zip>COM001/COM001_rebuild.txt" --log_level info -e sjis --has_cjk --min_len 4 --skip 0x16 --size 1024 + +# check ftext (direct or in zip file) python src/libtext.py check project/pyexe_bintext/build/COM001_rebuild.txt --refer project/pyexe_bintext/build/COM001_rebuild.bin -o "project/pyexe_bintext/build/COM001_rebuild_check.txt" --log_level info -e sjis +python src/libtext.py check "project/pyexe_bintext/build/COM001.zip>COM001/COM001_rebuild.txt" --refer project/pyexe_bintext/build/COM001_rebuild.bin -o "project/pyexe_bintext/build/COM001.zip>COM001/COM001_rebuild_check.txt" --log_level info -e sjis ``` ### ftextpack ```shell -python src/ftextpack.py test/sample/COM001 test/sample/COM001.txt -o project/pyexe_bintext/build/COM001.fp01 -t test/sample/COM001.tbl --pack_compact +# pack both of origin and new text in fp01 file +python src/ftextpack.py test/sample/COM001 test/sample/COM001.txt -o project/pyexe_bintext/build/COM001.fp01 -t test/sample/COM001.tbl --pack_org + +# pack compact mode in zip file +python src/ftextpack.py test/sample/COM001 test/sample/COM001.txt -o "project/pyexe_bintext/build/COM001.zip>COM001/COM001.fp01" -t test/sample/COM001.tbl --pack_compact +``` + +### ftextcvt + +``` shell +# json convert +python src/ftextcvt.py test/sample/COM001.txt -o project/pyexe_bintext/build/COM001.json +python src/ftextcvt.py project/pyexe_bintext/build/COM001.json -o project/pyexe_bintext/build/COM001.json.txt + +# csv convert +python src/ftextcvt.py test/sample/COM001.txt -o project/pyexe_bintext/build/COM001.csv +python src/ftextcvt.py project/pyexe_bintext/build/COM001.csv -o project/pyexe_bintext/build/COM001.csv.txt + +# docx convert +python src/ftextcvt.py test/sample/COM001.txt -o project/pyexe_bintext/build/COM001.docx +python src/ftextcvt.py project/pyexe_bintext/build/COM001.docx -o project/pyexe_bintext/build/COM001.docx.txt + +# pretty ftext format +python src/ftextcvt.py project/pyexe_bintext/build/COM001.json.txt -o project/pyexe_bintext/build/COM001.json.txt + ``` ## File Formats @@ -133,3 +169,11 @@ v0.1, initial version with data.fp01 v0.1.1, add allow_compat for smaller memory use v0.2, remake according to libtext v0.6 ``` + +* `ftextcvt.py` + +```shell +v0.1, initial version with formatftext, docx2ftext, ftext2docx +v0.2, add support for csv and json, compatiable with paratranz.cn +v0.3, remake according to libtext v0.6 +``` diff --git a/project/pyexe_bintext/build_nuitka.bat b/project/pyexe_bintext/build_nuitka.bat index 62bade4..3f9a505 100644 --- a/project/pyexe_bintext/build_nuitka.bat +++ b/project/pyexe_bintext/build_nuitka.bat @@ -1,5 +1,4 @@ @echo off call %~dp0_env.bat -python -m pip install nuitka nuitka --standalone --onefile --full-compat --show-progress "%PYSRC_PATH%" --windows-icon-from-ico="%ICON_PATH%" --output-dir="%OUT_DIR%" -o "c%TARGET_NAME%.exe" --assume-yes-for-downloads \ No newline at end of file diff --git a/project/pyexe_bintext/build_pyinstaller.bat b/project/pyexe_bintext/build_pyinstaller.bat index ba07f09..cb2404f 100644 --- a/project/pyexe_bintext/build_pyinstaller.bat +++ b/project/pyexe_bintext/build_pyinstaller.bat @@ -1,4 +1,4 @@ -:: build by pyinstaller +@echo off call %~dp0_env.bat -python -m pip install pyinstaller + pyinstaller -F "%PYSRC_PATH%" --name "%TARGET_NAME%.exe" --distpath="%OUT_DIR%" --workpath="%OUT_DIR%/obj/pyinstaller" --specpath="%OUT_DIR%/obj/pyinstaller" --icon="%ICON_PATH%" --exclude-module=numpy --exclude-module=PIL --console --clean --noupx -y \ No newline at end of file diff --git a/project/pyexe_bintext/build_viapyenv.bat b/project/pyexe_bintext/build_viapyenv.bat deleted file mode 100644 index d9ce1d0..0000000 --- a/project/pyexe_bintext/build_viapyenv.bat +++ /dev/null @@ -1,11 +0,0 @@ -::@echo off -:: use as build_viapyenv.bat path/to/xxx.bat - -call %~dp0_env.bat -if not exist "%PYENV_DIR%" mkdir "%PYENV_DIR%" - -pushd "%PYENV_DIR%" -python -m venv %PYENV_NAME% -cd %PYENV_NAME%\Scripts -call %1 -popd \ No newline at end of file diff --git a/project/pyexe_bintext/requirements.txt b/project/pyexe_bintext/requirements.txt new file mode 100644 index 0000000..2276485 --- /dev/null +++ b/project/pyexe_bintext/requirements.txt @@ -0,0 +1,2 @@ +zstandard +nuitka \ No newline at end of file diff --git a/project/pyexe_ftextcvt/build_nuitka.bat b/project/pyexe_ftextcvt/build_nuitka.bat index b9bc2bc..f416126 100644 --- a/project/pyexe_ftextcvt/build_nuitka.bat +++ b/project/pyexe_ftextcvt/build_nuitka.bat @@ -1,4 +1,8 @@ -:: build single files +@echo off call %~dp0_env.bat -python -m pip install nuitka -nuitka --standalone --onefile --full-compat --show-progress "%PYSRC_PATH%" --windows-icon-from-ico="%ICON_PATH%" --output-dir="%OUT_DIR%" -o "c%TARGET_NAME%.exe" --assume-yes-for-downloads \ No newline at end of file + +for /f "tokens=* USEBACKQ" %%f IN (`python -c "import os, docx;print(os.path.dirname(docx.__file__))"`) DO ( + set DOCX_DIR=%%f +) + +nuitka --standalone --onefile --full-compat --show-progress "%PYSRC_PATH%" --windows-icon-from-ico="%ICON_PATH%" --output-dir="%OUT_DIR%" -o "c%TARGET_NAME%.exe" --include-data-dir="%DOCX_DIR%\templates=docx/templates" --assume-yes-for-downloads \ No newline at end of file diff --git a/project/pyexe_ftextcvt/build_pyinstaller.bat b/project/pyexe_ftextcvt/build_pyinstaller.bat index ba07f09..cb2404f 100644 --- a/project/pyexe_ftextcvt/build_pyinstaller.bat +++ b/project/pyexe_ftextcvt/build_pyinstaller.bat @@ -1,4 +1,4 @@ -:: build by pyinstaller +@echo off call %~dp0_env.bat -python -m pip install pyinstaller + pyinstaller -F "%PYSRC_PATH%" --name "%TARGET_NAME%.exe" --distpath="%OUT_DIR%" --workpath="%OUT_DIR%/obj/pyinstaller" --specpath="%OUT_DIR%/obj/pyinstaller" --icon="%ICON_PATH%" --exclude-module=numpy --exclude-module=PIL --console --clean --noupx -y \ No newline at end of file diff --git a/project/pyexe_ftextcvt/build_viapyenv.bat b/project/pyexe_ftextcvt/build_viapyenv.bat deleted file mode 100644 index 06439b4..0000000 --- a/project/pyexe_ftextcvt/build_viapyenv.bat +++ /dev/null @@ -1,12 +0,0 @@ -::@echo off -:: use as build_viapyenv.bat path/to/xxx.bat - -call %~dp0_env.bat -if not exist "%PYENV_DIR%" mkdir "%PYENV_DIR%" - -pushd "%PYENV_DIR%" -python -m venv %PYENV_NAME% -cd %PYENV_NAME%\Scripts -python -m pip install python-docx -call %1 -popd \ No newline at end of file diff --git a/project/pyexe_ftextcvt/requirements.txt b/project/pyexe_ftextcvt/requirements.txt new file mode 100644 index 0000000..9296a5e --- /dev/null +++ b/project/pyexe_ftextcvt/requirements.txt @@ -0,0 +1,3 @@ +zstandard +nuitka +python-docx \ No newline at end of file diff --git a/project/pysrc_all/build_pysrc.sh b/project/pysrc_all/build_pysrc.sh index b7c8147..cc2ff36 100644 --- a/project/pysrc_all/build_pysrc.sh +++ b/project/pysrc_all/build_pysrc.sh @@ -1,19 +1,19 @@ -build_pysrc() -{ - src_dir=$1 - dst_dir=$2 - module_name=$3 - module_ver=$(python -c "import sys, os; sys.path.append(r'$src_dir'); import $module_name as l; print(l.__version__)") - echo build ${module_name}_v${module_ver}.py - cp -f $src_dir/${module_name}.py $dst_dir/${module_name}_v${module_ver}.py -} - -if ! [ -d build ]; then mkdir build; fi - -build_pysrc ../../src ./build libutil -build_pysrc ../../src ./build libtext -# build_pysrc ../../src ./build libfont -# build_pysrc ../../src ./build libimage -# build_pysrc ../../src ./build libalg -# build_pysrc ../../src ./build ftextcvt -build_pysrc ../../src ./build ftextpack +build_pysrc() +{ + src_dir=$1 + dst_dir=$2 + module_name=$3 + module_ver=$(python -c "import sys, os; sys.path.append(r'$src_dir'); import $module_name as l; print(l.__version__)") + echo build ${module_name}_v${module_ver}.py + cp -f $src_dir/${module_name}.py $dst_dir/${module_name}_v${module_ver}.py +} + +if ! [ -d build ]; then mkdir build; fi + +build_pysrc ../../src ./build libutil +build_pysrc ../../src ./build libtext +# build_pysrc ../../src ./build libfont +# build_pysrc ../../src ./build libimage +# build_pysrc ../../src ./build libalg +build_pysrc ../../src ./build ftextcvt +build_pysrc ../../src ./build ftextpack diff --git a/project/pysrc_all/test_pycli.sh b/project/pysrc_all/test_pycli.sh new file mode 100644 index 0000000..07dba75 --- /dev/null +++ b/project/pysrc_all/test_pycli.sh @@ -0,0 +1,30 @@ +pushd ../.. +mkdir -p project/pyexe_bintext/build +rm -rf project/pyexe_bintext/build/COM001.zip + +echo "## test libtext insert" +python src/libtext.py insert test/sample/COM001 test/sample/COM001.txt --refer test/sample/COM001 -t test/sample/COM001.tbl -o project/pyexe_bintext/build/COM001_rebuild.bin --log_level info --bytes_padding "2020" --bytes_fallback "815A" --insert_shorter --insert_longer --text_replace "季" "季季季" --text_replace "煌びやかな光" "你你你你你" +python src/libtext.py insert test/sample/COM001 test/sample/COM001.txt --refer test/sample/COM001 -t test/sample/COM001.tbl -o project/pyexe_bintext/build/COM001_rebuild.bin.gz --log_level info + +echo "## test libtext extract" +python src/libtext.py extract project/pyexe_bintext/build/COM001_rebuild.bin -o project/pyexe_bintext/build/COM001_rebuild.txt --log_level info -e sjis --has_cjk --min_len 4 --skip 0x16 --size 1024 +python src/libtext.py extract project/pyexe_bintext/build/COM001_rebuild.bin -o "project/pyexe_bintext/build/COM001.zip>COM001/COM001_rebuild.txt" --log_level info -e sjis --has_cjk --min_len 4 --skip 0x16 --size 1024 + +echo "## test libtext check" +python src/libtext.py check project/pyexe_bintext/build/COM001_rebuild.txt --refer project/pyexe_bintext/build/COM001_rebuild.bin -o "project/pyexe_bintext/build/COM001_rebuild_check.txt" --log_level info -e sjis +python src/libtext.py check "project/pyexe_bintext/build/COM001.zip>COM001/COM001_rebuild.txt" --refer project/pyexe_bintext/build/COM001_rebuild.bin -o "project/pyexe_bintext/build/COM001.zip>COM001/COM001_rebuild_check.txt" --log_level info -e sjis + +echo "## test ftextpack" +python src/ftextpack.py test/sample/COM001 test/sample/COM001.txt -o project/pyexe_bintext/build/COM001.fp01 -t test/sample/COM001.tbl --pack_org +python src/ftextpack.py test/sample/COM001 test/sample/COM001.txt -o "project/pyexe_bintext/build/COM001.zip>COM001/COM001.fp01" -t test/sample/COM001.tbl --pack_compact + +echo "## test ftextcvt" +python src/ftextcvt.py test/sample/COM001.txt -o project/pyexe_bintext/build/COM001.json +python src/ftextcvt.py project/pyexe_bintext/build/COM001.json -o project/pyexe_bintext/build/COM001.json.txt +python src/ftextcvt.py test/sample/COM001.txt -o project/pyexe_bintext/build/COM001.csv +python src/ftextcvt.py project/pyexe_bintext/build/COM001.csv -o project/pyexe_bintext/build/COM001.csv.txt +python src/ftextcvt.py test/sample/COM001.txt -o project/pyexe_bintext/build/COM001.docx +python src/ftextcvt.py project/pyexe_bintext/build/COM001.docx -o project/pyexe_bintext/build/COM001.docx.txt +python src/ftextcvt.py project/pyexe_bintext/build/COM001.json.txt -o project/pyexe_bintext/build/COM001.json.txt + +popd \ No newline at end of file diff --git a/src/ftextcvt.py b/src/ftextcvt.py index 09880bc..465fd98 100644 --- a/src/ftextcvt.py +++ b/src/ftextcvt.py @@ -1,327 +1,185 @@ -""" -This is a tool to change or adjust format -in ftext made by bintext.py - v0.2, developed by devseed -""" - -import os -import re -import codecs -import argparse -import json -from io import StringIO -from csv import DictWriter, DictReader -from docx import Document # pip install python-docx -from docx.shared import Pt -from typing import Union, List, Dict - -FTEXT_VERSION = 200 - -# util functions -def dump_ftext(ftexts1:List[Dict[str,Union[int,str]]], - ftexts2: List[Dict[str, Union[int, str]]], - outpath: str="", *, num_width=5, - addr_width=6, size_width=3) -> List[str]: - """ - ftexts1, ftexts2 -> ftext lines - text dict is as {'addr':, 'size':, 'text':} - :param ftexts1[]: text dict array in '○' line, - :param ftexts2[]: text dict array in '●' line - :return: ftext lines - """ - - if num_width==0: - num_width = len(str(len(ftexts1))) - if addr_width==0: - d = max([t['addr'] for t in ftexts1]) - addr_width = len(hex(d))-2 - if size_width==0: - d = max([t['size'] for t in ftexts1]) - size_width = len(hex(d))-2 - - fstr1 = "○{num:0"+ str(num_width) + "d}|{addr:0" + str(addr_width) + "X}|{size:0"+ str(size_width) + "X}○ {text}\n" - fstr2 = fstr1.replace('○', '●') - lines = [] - - length = 0 - if ftexts1 == None: - length = len(ftexts2) - fstr2 += '\n' - if ftexts2 == None: - length = len(ftexts1) - fstr1 += '\n' - if ftexts1 != None and ftexts2 != None : - length = min(len(ftexts1), len(ftexts2)) - fstr2 += '\n' - - for i in range(length): - if ftexts1 != None: - t1 = ftexts1[i] - lines.append(fstr1.format( - num=i,addr=t1['addr'],size=t1['size'],text=t1['text'])) - if ftexts2 != None: - t2 = ftexts2[i] - lines.append(fstr2.format( - num=i,addr=t2['addr'],size=t2['size'],text=t2['text'])) - - if outpath != "": - with codecs.open(outpath, 'w', 'utf-8') as fp: - fp.writelines(lines) - return lines - -def load_ftext(ftextobj: Union[str, List[str]], - only_text = False ) -> List[Dict[str, Union[int, str]]]: - """ - ftext lines -> ftexts1, ftexts2 - text dict is as {'addr':, 'size':, 'text':} - :param inobj: can be path, or lines[] - :return: ftexts1[]: text dict array in '○' line, - ftexts2[]: text dict array in '●' line - """ - - ftexts1, ftexts2 = [], [] - if type(ftextobj) == str: - with codecs.open(ftextobj, 'r', 'utf-8') as fp: - lines = fp.readlines() - else: lines = ftextobj - - if only_text == True: # This is used for merge_text - re_line1 = re.compile(r"^○(.+?)○[ ](.*)") - re_line2 = re.compile(r"^●(.+?)●[ ](.*)") - for line in lines: - line = line.strip("\n").strip('\r') - m = re_line1.match(line) - if m is not None: - ftexts1.append({'addr':0,'size':0,'text': m.group(2)}) - m = re_line2.match(line) - if m is not None: - ftexts2.append({'addr':0,'size':0,'text': m.group(2)}) - else: - re_line1 = re.compile(r"^○(\d*)\|(.+?)\|(.+?)○[ ](.*)") - re_line2 = re.compile(r"^●(\d*)\|(.+?)\|(.+?)●[ ](.*)") - for line in lines: - line = line.strip("\n").strip('\r') - m = re_line1.match(line) - if m is not None: - ftexts1.append({'addr':int(m.group(2),16), - 'size':int(m.group(3),16),'text': m.group(4)}) - m = re_line2.match(line) - if m is not None: - ftexts2.append({'addr':int(m.group(2),16), - 'size':int(m.group(3),16),'text': m.group(4)}) - return ftexts1, ftexts2 - -def file2lines(func, argidx=0, encoding="utf-8"): - def wrapper(*args, **kw): - if type(args[argidx]) == str: - args = list(args) - path = args[argidx] - if path!="": - with codecs.open(path, 'r', encoding) as fp: - args[argidx] = fp.readlines() - else: args[argidx] = None - return func(*args, **kw) - return wrapper - -def file2bytes(func, argidx=0): - def wrapper(*args, **kw): - if type(args[argidx]) == str: - with open(args[argidx], 'rb') as fp: - args[argidx] = fp.read() - return func(*args, **kw) - return wrapper - -# ftextcvt functions -def docx2ftext(wordpath, outpath="") -> List[str]: - - lines = [] - document = Document(wordpath) - for p in document.paragraphs: - # text is the whole text, p.run are every str in styles - line = p.text.rstrip('\n').rstrip('\r') - lines.append(line + '\n') - if outpath!="": - with open(outpath, "wb") as fp: - for line in lines: - fp.write(line.encode('utf-8')) - return lines - -@file2lines -def csv2ftext(csvobj: Union[str, List[str]], - outpath="") -> List[str]: - - ftexts1, ftexts2 = [], [] - for t in DictReader(StringIO("".join(csvobj))): - keyarr = t['key'].split('|') - addr = int(keyarr[1], 16) - size = int(keyarr[2], 16) - ftexts1.append({'addr': addr, - 'size': size, 'text': t['origin']}) - ftexts2.append({'addr': addr, - 'size': size, 'text': t['translation']}) - return dump_ftext(ftexts1, ftexts2, outpath) - -@file2lines -def json2ftext(jsonobj: Union[str, List[str]], - outpath="") -> List[str]: - """ - This is for paratranz.cn format, - :jsonobj: [{key: idx|addr|size, origin: text, translation: text}] - """ - - ftexts1, ftexts2 = [], [] - for t in json.loads("".join(jsonobj)): - keyarr = t['key'].split('|') - addr = int(keyarr[1], 16) - size = int(keyarr[2], 16) - ftexts1.append({'addr': addr, - 'size': size, 'text': t['origin']}) - ftexts2.append({'addr': addr, - 'size': size, 'text': t['translation']}) - return dump_ftext(ftexts1, ftexts2, outpath) - -@file2lines -def ftext2docx(ftextobj: Union[str, List[str]], - outpath="") -> Document: - """ - if this function compiled by nuitka, - it needs default.docx file in ./docx/templates - """ - lines = ftextobj - document = Document() - for i, line in enumerate(lines): - line = line.rstrip('\n').rstrip('\r') - p = document.add_paragraph() - p.paragraph_format.space_after = Pt(0) - run = p.add_run(line) - run.font.name = "SimSun" - run.font.size = Pt(10.5) - if outpath!="": - document.save(outpath) - return document - -@file2lines -def ftext2csv(ftextobj: Union[str, List[str]], - outpath="", *, num_width=5, - addr_width=6, size_width=3) -> List[str]: - - ftexts1, ftexts2 = load_ftext(ftextobj, False) - fstr = "{num:0"+ str(num_width) + "d}|{addr:0" + str(addr_width) + "X}|{size:0"+ str(size_width) + "X}" - csvstrio = StringIO() - csvwr = DictWriter(csvstrio, ['key', 'origin', 'translation']) - csvwr.writeheader() - for i, (t1, t2) in enumerate(zip(ftexts1, ftexts2)): - csvwr.writerow({ - "key": fstr.format( - num=i, addr=t2['addr'], size=t2['size']), - "origin": t1['text'], "translation": t2['text'] - }) - - csvlines = csvstrio.getvalue().splitlines(True) - if outpath: - with codecs.open(outpath, 'w', 'utf8') as fp: - fp.writelines(csvlines) - return csvlines - -@file2lines -def ftext2json(ftextobj: Union[str, List[str]], - outpath="",*, num_width=5, - addr_width=6, size_width=3) -> List[str]: - - ftexts1, ftexts2 = load_ftext(ftextobj, False) - jsonarray: List[Dict[str: str]] = [] - fstr = "{num:0"+ str(num_width) + "d}|{addr:0" + str(addr_width) + "X}|{size:0"+ str(size_width) + "X}" - for i, (t1, t2) in enumerate(zip(ftexts1, ftexts2)): - jsonarray.append({ - "key": fstr.format( - num=i, addr=t2['addr'], size=t2['size']), - "origin": t1['text'], "translation": t2['text'] - }) - - jsonstr = json.dumps(jsonarray, - ensure_ascii=False, indent=2) - jsonlines = jsonstr.splitlines(True) - if outpath!="": - with codecs.open(outpath, 'w', 'utf8') as fp: - fp.writelines(jsonlines) - return jsonlines - -@file2lines -def formatftext(ftextobj: Union[str, List[str]], - outpath="") -> List[str]: - - lines = ftextobj - for i, line in enumerate(lines): - _c = line[0] - if _c=='○' or _c=='●': - _idx = line.find(_c, 1) + 1 - if _idx==-1: - raise ValueError(f"lines[{i}] error not closed, {line}!") - elif line[_idx]!=' ': - print(f"detect line[{i}] without space") - line = line[0:_idx] + ' ' + line[_idx: ] - line = line.rstrip('\n').rstrip('\r') - lines[i] = line + '\n' - - if outpath!="": - with open(outpath, "wb") as fp: - for line in lines: - fp.write(line.encode('utf-8')) - return lines - -def debug(): - pass - -def main(cmdstr=None): - parser = argparse.ArgumentParser( - description="convert the ftext format to others\n" - "ftextcvt v0.2, developed by devseed") - parser.add_argument("inpath", type=str) - parser.add_argument("-o", "--outpath", - type=str, default="out.txt") - - if cmdstr is None: args = parser.parse_args() - else: args = parser.parse_args(cmdstr.split(' ')) - - outpath = args.outpath - outpath_ext=os.path.splitext(outpath)[1].lower() - inpath = args.inpath - inpath_ext = os.path.splitext(inpath)[1].lower() - - if inpath_ext== '.txt': # ftext to others - if outpath_ext == '.txt': - formatftext(inpath, outpath) - elif outpath_ext == '.docx': - ftext2docx(inpath, outpath) - elif outpath_ext == '.csv': - ftext2csv(inpath, outpath) - elif outpath_ext == ".json": - ftext2json(inpath, outpath) - else: - raise NotImplementedError( - f"unkonw format {inpath_ext}->{outpath_ext}") - elif outpath_ext == '.txt': # others to ftext - if inpath_ext == '.docx': - docx2ftext(inpath, outpath) - elif inpath_ext == '.csv': - csv2ftext(inpath, outpath) - elif inpath_ext == '.json': - json2ftext(inpath, outpath) - else: - raise NotImplementedError( - f"unkonw format {inpath_ext}->{outpath_ext}") - return - else: - raise NotImplementedError( - f"unkonw format {inpath_ext}->{outpath_ext}") - -if __name__ == '__main__': - # debug() - main() - pass - -""" -history: -v0.1, initial version with formatftext, docx2ftext, ftext2docx -v0.2, add support for csv and json, compatiable with paratranz.cn +# -*- coding: utf-8 -*- +g_description = f""" +A tool to change or adjust ftext + v0.3, developed by devseed +""" + +import os +import codecs +import argparse +import json +from io import StringIO +from csv import DictWriter, DictReader +from docx import Document # pip install python-docx +from docx.shared import Pt +from typing import Union, List, Dict + +try: + from libutil import writelines, savebytes, loadfiles, ftext_t, load_ftext, save_ftext +except ImportError: + exec("from libutil_v600 import writelines, savebytes, loadfiles, ftext_t, load_ftext, save_ftext") + +__version__ = 300 + +@loadfiles((0, 'utf-8')) +def ftext2pretty(linesobj: Union[str, List[str]], outpath=None) -> List[str]: + """ + make ftext2 format pretty, and try to fix some problem + """ + + lines = linesobj + if len(lines) > 0: lines[0] = lines[0].lstrip("\ufeff") # remove bom + for i, line in enumerate(lines): # try fix break + indicator = line[0] + line = line.rstrip('\n').rstrip('\r') + if indicator=="○" or indicator=="●": + text_offset = line.find(indicator, 1) + 1 + if text_offset < 0: raise ValueError(f"indicator {indicator} not closed, [lineno={i+1} line='{line}']") + elif line[text_offset]!=" ": + print(f"detect no ' ' [lineno={i} line='{line}']") + line = line[0: text_offset] + ' ' + line[text_offset:] + lines[i] = line + '\n' + return save_ftext(*load_ftext(lines), outpath) + +def ftext2csv(linesobj: Union[str, List[str]], outpath=None) -> List[str]: + """ + convert ftext2 files to csv format + tag(org ○, now ●),addr,size,text + + """ + + ftexts1, ftexts2 = load_ftext(linesobj) + assert(len(ftexts1) == len(ftexts2)) + sbufio = StringIO() + wr = DictWriter(sbufio, ["tag", "addr", "size", "text"]) + wr.writeheader() # will automaticaly detect comma, use excel to write csv can not encode as utf8 + for i, (t1, t2) in enumerate(zip(ftexts1, ftexts2)): + wr.writerow({"tag": "org", "addr": hex(t1.addr), "size": hex(t1.size), "text": t1.text}) + wr.writerow({"tag": "now", "addr": hex(t2.addr), "size": hex(t2.size), "text": t2.text}) + lines = sbufio.getvalue().splitlines(True) + sbufio.close() + if outpath: savebytes(outpath, codecs.BOM_UTF8 + writelines(lines, "utf-8")) + return lines + +def ftext2json(ftextobj: Union[str, List[str]], outpath=None) -> List[str]: + """ + convert ftext2 to json format + { + "org": {"addr": 0, "size": 0, "text": ""} + "now" : {"addr": 0, "size": 0, "text": ""} + } + """ + + ftexts1, ftexts2 = load_ftext(ftextobj) + assert(len(ftexts1) == len(ftexts2)) + jarr: List[Dict[str: str]] = [] + for i, (t1, t2) in enumerate(zip(ftexts1, ftexts2)): + jarr.append({ + "org": {"addr": hex(t1.addr), "size": hex(t1.size), "text": t1.text}, + "now" : {"addr": hex(t2.addr), "size": hex(t2.size), "text": t2.text} + }) + + jstr = json.dumps(jarr, ensure_ascii=False, indent=2) + if outpath: savebytes(outpath, jstr.encode("utf-8")) + return jstr.splitlines(True) + +@loadfiles([(0, "utf-8")]) +def ftext2docx(linesobj: Union[str, List[str]], outpath=None) -> Document: + """ + if this function compiled by nuitka, + it needs default.docx file in ./docx/templates + """ + + lines = linesobj + document = Document() + for i, line in enumerate(lines): + line = line.rstrip('\n').rstrip('\r') + p = document.add_paragraph() + p.paragraph_format.space_after = Pt(0) + run = p.add_run(line) + run.font.name = "SimSun" + run.font.size = Pt(10.5) + if outpath: document.save(outpath) + return document + +@loadfiles([(0, "utf-8")]) +def csv2ftext(linesobj: Union[str, List[str]], outpath=None) -> List[str]: + lines = linesobj + if len(lines) > 0: lines[0] = lines[0].lstrip("\ufeff") + ftexts1, ftexts2 = [], [] + for i, t in enumerate(DictReader(lines)): + ftext = None + try: + ftext = ftext_t(int(t['addr'], 0), int(t['size'], 0), t['text']) + except ValueError and TypeError as e: print(f"{repr(e)} [i={i} '{t}']") + if ftext is None: continue + if t["tag"]=="org": ftexts1.append(ftext) + elif t["tag"]=="now": ftexts2.append(ftext) + else: raise ValueError(f"unknow tag {t['tag']} [i={i} '{t}']") + assert(len(ftexts1) == len(ftexts2)) + return save_ftext(ftexts1, ftexts2, outpath) + +@loadfiles(0) +def json2ftext(binobj: Union[str, List[str]], outpath=None) -> List[str]: + ftexts1, ftexts2 = [], [] + for i, t in enumerate(json.loads(binobj)): + if "org" in t: + t2 = t["org"] + ftext = ftext_t(int(t2['addr'], 0), int(t2['size'], 0), t2['text']) + ftexts1.append(ftext) + if "now" in t: + t2 = t["now"] + ftext = ftext_t(int(t2['addr'], 0), int(t2['size'], 0), t2['text']) + ftexts2.append(ftext) + assert(len(ftexts1) == len(ftexts2)) + return save_ftext(ftexts1, ftexts2, outpath) + +def docx2ftext(docxobj, outpath=None) -> List[str]: + lines = [] + document = Document(docxobj) + # text is the whole text, p.run are every str in styles + for p in document.paragraphs: + line = p.text.rstrip('\n').rstrip('\r') + lines.append(line + '\n') + if outpath: savebytes(outpath, writelines(lines)) + return lines + +def cli(cmdstr=None): + def cmd_convert(): + flag = False + if inpath_ext== '.txt': # ftext to others + if outpath_ext == '.txt': ftext2pretty(inpath, outpath) + elif outpath_ext == '.docx': ftext2docx(inpath, outpath) + elif outpath_ext == '.csv': ftext2csv(inpath, outpath) + elif outpath_ext == ".json": ftext2json(inpath, outpath) + else: flag = True + elif outpath_ext == '.txt': # others to ftext + if inpath_ext == '.docx': docx2ftext(inpath, outpath) + elif inpath_ext == '.csv': csv2ftext(inpath, outpath) + elif inpath_ext == '.json': json2ftext(inpath, outpath) + else: flag = True + return + else: flag = True + if not flag: return + raise NotImplementedError(f"convert not support {inpath_ext}->{outpath_ext}") + + parser = argparse.ArgumentParser(description=g_description) + parser.add_argument("inpath") + parser.add_argument("-o", "--outpath", default="out.txt") + + args = parser.parse_args(cmdstr.split(' ') if cmdstr else None) + inpath, outpath = args.inpath, args.outpath + outpath = outpath if len(outpath) > 0 else None + inpath_ext = os.path.splitext(inpath)[1].lower() + outpath_ext = os.path.splitext(outpath)[1].lower() + cmd_convert() + +if __name__ == '__main__': + cli() + +""" +history: +v0.1, initial version with formatftext, docx2ftext, ftext2docx +v0.2, add support for csv and json, compatiable with paratranz.cn +v0.3, remake according to libtext v0.6 """ \ No newline at end of file diff --git a/src/ftextpack.h b/src/ftextpack.h index 115ab56..8e9ac94 100644 --- a/src/ftextpack.h +++ b/src/ftextpack.h @@ -1,231 +1,231 @@ -/** - * A flexible format and low memory use implementation - * for general dynamic localization - * v0.1.1, developed by devseed - * - * use ftextpack_index_t.py to generate data.fp01 - * you can pack all the ftexts files in a folder to single data file -*/ - -#ifndef _FTEXTPACK_H -#define _FTEXTPACK_H -#include -#include - -#define FTEXTPACK_VERSION 110 - -typedef struct _ftextpack_textinfo_t { - union - { - uint32_t hash; // crc32 - uint32_t extra; // user defined information - }; - uint32_t offset; // offset in pack - uint32_t addr; // addr in script - uint32_t size; // text size -}ftextpack_textinfo_t; - -#ifdef FTEXTPACK_COMPACT -/** - * some trick to save index memory - * with memory overlap by union -*/ -typedef struct _ftextpack_info_t { - union { - ftextpack_textinfo_t org; - ftextpack_textinfo_t now; - }; -} ftextpack_info_t; -#else -typedef struct _ftextpack_info_t { - ftextpack_textinfo_t org; - ftextpack_textinfo_t now; -} ftextpack_info_t; -#endif - -/** - * use smaller memory for store index information -*/ -typedef struct _ftextpack_textmap_t -{ - union - { - uint32_t value; - uint32_t hash; - uint32_t addr; - }; - uint32_t offset; -}ftextpack_textmap_t; - - -typedef struct _ftextpack_map_t { - union - { - ftextpack_textmap_t org; - ftextpack_textmap_t now; - }; -}ftextpack_map_t; - -typedef struct _ftextpack_index_t { - char magic[4]; // FP01, LP01 fp for full info, lp for brief info - uint32_t count; // text info count - uint32_t offset; // text offset - uint32_t reserved; - union - { - ftextpack_info_t info[1]; - ftextpack_map_t map[1]; - }; -}ftextpack_index_t; - - -/** - * general crc32 method -*/ -uint32_t ftextpack_crc32(const void *buf, int n); - -/** - * load index or index map (brief index) from file - * @return whole index size -*/ -int ftextpack_loadindex(FILE *fp, void *outbuf, int bufsize); -int ftextpack_loadindexmap(FILE *fp, void *outbuf, int bufsize); - -/** - * directly load text from file to outbuf - * the text should be end with \0 - * @param offset start from ftell, and it recover fp position - * @return outtext size, error with 0 -*/ -int ftextpack_loadtext(FILE *fp, ftextpack_info_t *info, void *outbuf, int bufsize); - -/** - * search the index by hash, it must be sorted by org hash - * @return index of ftextpack_info_t, not find with -1 -*/ -int ftextpack_searchbyhash(ftextpack_index_t *index, uint32_t hash, int start); - -/** - * search the index by addr, it must be sorted by org addr - * @return index of ftextpack_info_t, not find with -1 -*/ -int ftextpack_searchbyaddr(ftextpack_index_t *index, uint32_t addr, int start); - -#ifdef FTEXTPACK_IMPLEMENT -uint32_t ftextpack_crc32(const void *buf, int n) -{ - uint32_t crc32 = ~0; - for(int i=0; i< n; i++) - { - crc32 ^= *(const uint8_t*)((uint8_t*)buf+i); - - for(int i = 0; i < 8; i++) - { - uint32_t t = ~((crc32&1) - 1); - crc32 = (crc32>>1) ^ (0xEDB88320 & t); - } - } - return ~crc32; -} - -int ftextpack_loadindex(FILE *fp, void *outbuf, int bufsize) -{ - if(!fp) return 0; - - ftextpack_index_t *index = (ftextpack_index_t*)outbuf; - uint8_t *cur = (uint8_t*)outbuf; - if(bufsize < sizeof(ftextpack_index_t)) return 0; - cur += sizeof(ftextpack_index_t) * fread(cur, sizeof(ftextpack_index_t), 1, fp); - index->magic[0] = 'f'; - - int n = index->count; - int leftsize = bufsize - sizeof(ftextpack_index_t); - int leftn = (n-1) * sizeof(ftextpack_info_t) < leftsize ? n-1 : leftsize/sizeof(ftextpack_info_t); - cur += sizeof(ftextpack_info_t) * fread(cur, sizeof(ftextpack_info_t), leftn, fp); - return (int)(cur - (uint8_t*)outbuf); -} - -int ftextpack_loadindexmap(FILE *fp, void *outbuf, int bufsize) -{ - if(!fp) return 0; - - ftextpack_index_t *index = (ftextpack_index_t*)outbuf; - ftextpack_map_t *map = index->map; - ftextpack_info_t tmpinfo; - - uint8_t *cur = (uint8_t*)outbuf; - if(bufsize < sizeof(ftextpack_index_t)) return 0; - cur += sizeof(ftextpack_index_t) * fread(cur, sizeof(ftextpack_index_t), 1, fp); - index->magic[0] = 'l'; - - int n = index->count; - int leftsize = bufsize - sizeof(ftextpack_index_t); - int leftn = (n-1) * sizeof(ftextpack_map_t) < leftsize ? n -1 : leftsize/sizeof(ftextpack_map_t); - - memcpy(&tmpinfo, map, sizeof(tmpinfo)); - map->org.addr = tmpinfo.org.addr; - map->now.offset = tmpinfo.now.offset; - cur = (uint8_t*)&index->map[1]; - for(int i=0; iorg.addr = tmpinfo.org.addr; - map->now.offset = tmpinfo.now.offset; - cur += sizeof(ftextpack_map_t); - } - return (int)(cur - (uint8_t*)outbuf); -} - -int ftextpack_searchbyaddr(ftextpack_index_t *index, uint32_t addr, int start) -{ - if(!index) return -1; - - int mid = -1; - int end = index->count; - ftextpack_map_t* map = index->map; - ftextpack_info_t* info = index->info; - //LOG("ftextpack_searchbyaddr: index=%p, addr=%x, start=%d, end=%d\n", index, addr, start, end) - - while (start<=end) - { - mid = (start + end) / 2; - uint32_t addr_mid = index->magic[0] == 'l' ? map[mid].org.addr: info[mid].org.addr; - // LOG("start=%d, end=%d, addr=%x, addr_mid=%x, index=%p, map[mid]=%p\n", - // start, end, addr, addr_mid, index, &map[mid]); - if(addr_mid > addr) end = mid - 1; - else if(addr_mid < addr) start = mid + 1; - else return mid; - } - return -1; -} - -int ftextpack_loadtext(FILE *fp, ftextpack_info_t *info, void *outbuf, int bufsize) -{ - if(!fp) return 0; - size_t offset = ftell(fp); - - int c; - int pos = 0; - - fseek(fp, info->now.offset, SEEK_CUR); - uint8_t* out = (uint8_t*)outbuf; - while ((c=fgetc(fp))>0) - { - out[pos++] = (uint8_t)c; - if(pos>=bufsize-1) break; - } - out[pos++] = 0; - fseek(fp, offset, SEEK_SET); - // LOG("%p, %x, %x, %d \n", info, offset, info->now.offset, pos); - return pos; -} - -#endif -#endif - -/** - * history: - * v0.1, initial version with data.fp01 - * v0.1.1, add ftextpack_loadindexmap for smaller memory use +/** + * A flexible format and low memory use implementation + * for general dynamic localization + * v0.1.1, developed by devseed + * + * use ftextpack_index_t.py to generate data.fp01 + * you can pack all the ftexts files in a folder to single data file +*/ + +#ifndef _FTEXTPACK_H +#define _FTEXTPACK_H +#include +#include + +#define FTEXTPACK_VERSION 110 + +typedef struct _ftextpack_textinfo_t { + union + { + uint32_t hash; // crc32 + uint32_t extra; // user defined information + }; + uint32_t offset; // offset in pack + uint32_t addr; // addr in script + uint32_t size; // text size +}ftextpack_textinfo_t; + +#ifdef FTEXTPACK_COMPACT +/** + * some trick to save index memory + * with memory overlap by union +*/ +typedef struct _ftextpack_info_t { + union { + ftextpack_textinfo_t org; + ftextpack_textinfo_t now; + }; +} ftextpack_info_t; +#else +typedef struct _ftextpack_info_t { + ftextpack_textinfo_t org; + ftextpack_textinfo_t now; +} ftextpack_info_t; +#endif + +/** + * use smaller memory for store index information +*/ +typedef struct _ftextpack_textmap_t +{ + union + { + uint32_t value; + uint32_t hash; + uint32_t addr; + }; + uint32_t offset; +}ftextpack_textmap_t; + + +typedef struct _ftextpack_map_t { + union + { + ftextpack_textmap_t org; + ftextpack_textmap_t now; + }; +}ftextpack_map_t; + +typedef struct _ftextpack_index_t { + char magic[4]; // FP01, LP01 fp for full info, lp for brief info + uint32_t count; // text info count + uint32_t offset; // text offset + uint32_t reserved; + union + { + ftextpack_info_t info[1]; + ftextpack_map_t map[1]; + }; +}ftextpack_index_t; + + +/** + * general crc32 method +*/ +uint32_t ftextpack_crc32(const void *buf, int n); + +/** + * load index or index map (brief index) from file + * @return whole index size +*/ +int ftextpack_loadindex(FILE *fp, void *outbuf, int bufsize); +int ftextpack_loadindexmap(FILE *fp, void *outbuf, int bufsize); + +/** + * directly load text from file to outbuf + * the text should be end with \0 + * @param offset start from ftell, and it recover fp position + * @return outtext size, error with 0 +*/ +int ftextpack_loadtext(FILE *fp, ftextpack_info_t *info, void *outbuf, int bufsize); + +/** + * search the index by hash, it must be sorted by org hash + * @return index of ftextpack_info_t, not find with -1 +*/ +int ftextpack_searchbyhash(ftextpack_index_t *index, uint32_t hash, int start); + +/** + * search the index by addr, it must be sorted by org addr + * @return index of ftextpack_info_t, not find with -1 +*/ +int ftextpack_searchbyaddr(ftextpack_index_t *index, uint32_t addr, int start); + +#ifdef FTEXTPACK_IMPLEMENT +uint32_t ftextpack_crc32(const void *buf, int n) +{ + uint32_t crc32 = ~0; + for(int i=0; i< n; i++) + { + crc32 ^= *(const uint8_t*)((uint8_t*)buf+i); + + for(int i = 0; i < 8; i++) + { + uint32_t t = ~((crc32&1) - 1); + crc32 = (crc32>>1) ^ (0xEDB88320 & t); + } + } + return ~crc32; +} + +int ftextpack_loadindex(FILE *fp, void *outbuf, int bufsize) +{ + if(!fp) return 0; + + ftextpack_index_t *index = (ftextpack_index_t*)outbuf; + uint8_t *cur = (uint8_t*)outbuf; + if(bufsize < sizeof(ftextpack_index_t)) return 0; + cur += sizeof(ftextpack_index_t) * fread(cur, sizeof(ftextpack_index_t), 1, fp); + index->magic[0] = 'f'; + + int n = index->count; + int leftsize = bufsize - sizeof(ftextpack_index_t); + int leftn = (n-1) * sizeof(ftextpack_info_t) < leftsize ? n-1 : leftsize/sizeof(ftextpack_info_t); + cur += sizeof(ftextpack_info_t) * fread(cur, sizeof(ftextpack_info_t), leftn, fp); + return (int)(cur - (uint8_t*)outbuf); +} + +int ftextpack_loadindexmap(FILE *fp, void *outbuf, int bufsize) +{ + if(!fp) return 0; + + ftextpack_index_t *index = (ftextpack_index_t*)outbuf; + ftextpack_map_t *map = index->map; + ftextpack_info_t tmpinfo; + + uint8_t *cur = (uint8_t*)outbuf; + if(bufsize < sizeof(ftextpack_index_t)) return 0; + cur += sizeof(ftextpack_index_t) * fread(cur, sizeof(ftextpack_index_t), 1, fp); + index->magic[0] = 'l'; + + int n = index->count; + int leftsize = bufsize - sizeof(ftextpack_index_t); + int leftn = (n-1) * sizeof(ftextpack_map_t) < leftsize ? n -1 : leftsize/sizeof(ftextpack_map_t); + + memcpy(&tmpinfo, map, sizeof(tmpinfo)); + map->org.addr = tmpinfo.org.addr; + map->now.offset = tmpinfo.now.offset; + cur = (uint8_t*)&index->map[1]; + for(int i=0; iorg.addr = tmpinfo.org.addr; + map->now.offset = tmpinfo.now.offset; + cur += sizeof(ftextpack_map_t); + } + return (int)(cur - (uint8_t*)outbuf); +} + +int ftextpack_searchbyaddr(ftextpack_index_t *index, uint32_t addr, int start) +{ + if(!index) return -1; + + int mid = -1; + int end = index->count; + ftextpack_map_t* map = index->map; + ftextpack_info_t* info = index->info; + //LOG("ftextpack_searchbyaddr: index=%p, addr=%x, start=%d, end=%d\n", index, addr, start, end) + + while (start<=end) + { + mid = (start + end) / 2; + uint32_t addr_mid = index->magic[0] == 'l' ? map[mid].org.addr: info[mid].org.addr; + // LOG("start=%d, end=%d, addr=%x, addr_mid=%x, index=%p, map[mid]=%p\n", + // start, end, addr, addr_mid, index, &map[mid]); + if(addr_mid > addr) end = mid - 1; + else if(addr_mid < addr) start = mid + 1; + else return mid; + } + return -1; +} + +int ftextpack_loadtext(FILE *fp, ftextpack_info_t *info, void *outbuf, int bufsize) +{ + if(!fp) return 0; + size_t offset = ftell(fp); + + int c; + int pos = 0; + + fseek(fp, info->now.offset, SEEK_CUR); + uint8_t* out = (uint8_t*)outbuf; + while ((c=fgetc(fp))>0) + { + out[pos++] = (uint8_t)c; + if(pos>=bufsize-1) break; + } + out[pos++] = 0; + fseek(fp, offset, SEEK_SET); + // LOG("%p, %x, %x, %d \n", info, offset, info->now.offset, pos); + return pos; +} + +#endif +#endif + +/** + * history: + * v0.1, initial version with data.fp01 + * v0.1.1, add ftextpack_loadindexmap for smaller memory use */ \ No newline at end of file diff --git a/src/ftextpack.py b/src/ftextpack.py index 6a2651a..9d70167 100644 --- a/src/ftextpack.py +++ b/src/ftextpack.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -g_description = """ +description = """ A flexble format with low memory implementation v0.2, developed by devseed @@ -18,10 +18,10 @@ from typing import Callable, List try: - from libutil import loadfiles, ftext_t, load_ftext, load_tbl + from libutil import savebytes, loadfiles, ftext_t, load_ftext, load_tbl from libtext import encode_extend except ImportError: - exec("from libutil_v600 import loadfiles, ftext_t, load_ftext, load_tbl") + exec("from libutil_v600 import savebytes, loadfiles, ftext_t, load_ftext, load_tbl") exec("from libtext_v600 import encode_extend") __version__ = 200 @@ -75,7 +75,7 @@ def _load_org(t: ftext_t, srcdata: bytes, crcmap) -> ftextpack_textinfo_t: textbytes = srcdata[t.addr: t.addr+t.size] tcrc = zlib.crc32(textbytes) if tcrc in crcmap and pack_nodup: - logging.info(f"dropdup crc=0x{tcrc:x} addr=0x{t.ddr:x} size=0x{t.size:x} text='{t.text}'") + logging.info(f"dropdup [crc=0x{tcrc:x} addr=0x{t.ddr:x} size=0x{t.size:x} text='{t.text}]'") return None if pack_org: start = bufio.tell(); bufio.write(textbytes); bufio.write(b'\x00') tmp = struct.pack("4I", tcrc, start, t.addr, t.size) @@ -111,11 +111,9 @@ def _load_pair(ftextobj, binobj) -> List[ftextpack_info_t]: info.org, info.now = org, now infos.append(info) - def save(fpack: Fpack, outobj, pack_compact=False): - _fp = None - if type(outobj) != str: fp=outobj - else: fp = open(outobj, 'wb+'); _fp = fp - + def save_fpack(fpack: Fpack, outobj, pack_compact=False) -> None: + fp = outobj if type(outobj)!=str else BytesIO() + start = fp.tell() index, infos, content =fpack.index, fpack.infos, fpack.content fp.write(index) fp.seek(-sizeof(ftextpack_info_t), 1) @@ -125,7 +123,9 @@ def save(fpack: Fpack, outobj, pack_compact=False): fp.write(struct.pack("<4I", *tmp)) else: fp.write(info) fp.write(content) - if _fp: _fp.close() + end = fp.tell() + if type(outobj) == str: savebytes(outobj, fp.getvalue()); fp.close() + logging.info(f"save 0x{end-start:x} bytes to {repr(outobj)}") # prepare enviroment bufio = BytesIO() @@ -161,9 +161,9 @@ def save(fpack: Fpack, outobj, pack_compact=False): size_textinfo = sizeof(ftextpack_textinfo_t) if not pack_compact: index.offset = size_index + (n-1)*size_info else: index.offset = size_index - size_info + n*size_textinfo - + fpack = Fpack(index, infos, bufio.getbuffer()[:bufio.tell()]) - if outobj: save(fpack, outobj, pack_compact) + if outobj: save_fpack(fpack, outobj, pack_compact) return fpack def cli(cmdstr=None): @@ -178,7 +178,7 @@ def cmd_pack(args): pack_sort=args.pack_sort, pack_org=args.pack_org, pack_nodup=args.pack_nodup, pack_compact=args.pack_compact) - parser = argparse.ArgumentParser(description=g_description) + parser = argparse.ArgumentParser(description=description) parser.add_argument("binpath", help="bin file or dir") parser.add_argument("ftextpath", help="ftext file or dir") parser.add_argument("-o", "--outpath", default="data.fp01") @@ -201,7 +201,6 @@ def cmd_pack(args): logging.basicConfig(level=logging.getLevelName(loglevel.upper()), format="%(levelname)s:%(funcName)s: %(message)s") cmd_pack(args) - if __name__ == '__main__': cli() diff --git a/src/libtext.py b/src/libtext.py index 0a64091..a96ee9a 100644 --- a/src/libtext.py +++ b/src/libtext.py @@ -1,19 +1,18 @@ # -*- coding: utf-8 -*- -g_description = """ +description = """ A binary text tool (remake) for text exporting, importing and checking v0.6, developed by devseed """ -import codecs import logging import argparse from io import StringIO, BytesIO from typing import Callable, Tuple, Union, List, Dict try: - from libutil import loadfiles, readlines, ftext_t, tbl_t, jtable_t, msg_t, save_ftext, load_ftext, load_tbl + from libutil import writelines, savebytes, loadfiles, ftext_t, tbl_t, jtable_t, msg_t, save_ftext, load_ftext, load_tbl except ImportError: - exec("from libutil_v600 import loadfiles, readlines, ftext_t, tbl_t, jtable_t, msg_t, save_ftext, load_ftext, load_tbl") + exec("from libutil_v600 import writelines, savebytes, loadfiles, ftext_t, tbl_t, jtable_t, msg_t, save_ftext, load_ftext, load_tbl") __version__ = 600 @@ -72,16 +71,18 @@ def encode_tbl(text: str, tbl: List[tbl_t], bytes_fallback: bytes=None) -> bytes encode_tbl.tbl = tbl encode_tbl.tblmap = dict((t.tchar, t.tcode) for t in tbl) - data = BytesIO() + bufio = BytesIO() tblmap = encode_tbl.tblmap for i, c in enumerate(text): - if c in tblmap: data.write(tblmap[c]) - elif bytes_fallback: data.write(bytes_fallback) + if c in tblmap: bufio.write(tblmap[c]) + elif bytes_fallback: bufio.write(bytes_fallback) else: - logging.error(f"failed with {c} at {i}, text='{text}'") + bufio.close() + logging.error(f"encode failed at {i} with {c} [text='{text}]'") return None - - return data.getvalue() + data = bufio.getvalue() + bufio.close() + return data def decode_tbl(data: bytes, tbl: List[tbl_t]) -> str: """ @@ -95,22 +96,24 @@ def decode_tbl(data: bytes, tbl: List[tbl_t]) -> str: decode_tbl.maxlen = max(len(t.tcode) for t in tbl) i = 0 - text = StringIO() + sbufio = StringIO() tblrmap = decode_tbl.tblrmap maxlen = decode_tbl.maxlen while i < len(data): flag = False for j in range(1, maxlen+1): if data[i: i+j] not in tblrmap: continue - text.write(tblrmap[data[i:i+j]]) + sbufio.write(tblrmap[data[i:i+j]]) flag = True break if not flag: - logging.error(f"[decode_tbl] failed with at {i}, data={data.hex(' ')}") + sbufio.close() + logging.error(f"decode failed at {i} [data={data.hex(' ')}]") return None i += j - - return text.getvalue() + text = sbufio.getvalue() + sbufio.close() + return text def encode_general(text: str, enc: Union[str, List[tbl_t]]='utf-8', enc_error: Union[str, bytes]='ignore'): """ @@ -123,7 +126,7 @@ def encode_general(text: str, enc: Union[str, List[tbl_t]]='utf-8', enc_error: U try: return text.encode(enc, enc_error) except UnicodeEncodeError as e: - logging.error(f"failed {e}, text='{text}'") + logging.error(f"encode failed {e} [text='{text}]'") return None else: return encode_tbl(text, enc, enc_error) @@ -287,6 +290,46 @@ def detect_text_utf8 (data, min_len=3) -> Tuple[List[int], List[int]]: return addrs, sizes +def check_ftextlines(lines: List[str]) -> List[msg_t]: + indicator_pre = "●" + msgs = [] + if len(lines) > 0: lines[0] = lines[0].lstrip("\ufeff") # remove bom + for i, line in enumerate(lines): + indicator = line[0] + if indicator == "#": continue + if indicator not in {"○", "●"}: continue + line = line.rstrip('\n').rstrip('\r') + + # check indicator + if indicator == indicator_pre: + msg = msg_t(-1, f"indicator {indicator} same as last [lineno={i+1} line='{line}']", logging.WARNING) + logging.warning(f"{msg.msg}") + msgs.append(msg) + if line.find(indicator, 1) < 0: + msg = msg_t(-1, f"indicator {indicator} not closed [lineno={i+1} line='{line}']", logging.ERROR) + logging.error(f"{msg.msg}") + msgs.append(msg) + indicator_pre = indicator + continue + + # check index and text + _, t1, *t2 = line.split(indicator) + t2 = "".join(t2) + if t2 and len(t2) and t2[0]!=" ": + msg = msg_t(-1, f"must have a space ' ' before text [lineno={i+1} line='{line}]'", logging.ERROR) + logging.error(f"{msg.msg}") + msgs.append(msg) + try: + _, t12, t13 = t1.split('|') + addr, size = int(t12, 16), int(t13, 16) + except ValueError as e: + msg = msg_t(-1, f"parse failed {repr(e)} [lineno={i+1} line='{line}]'", logging.ERROR) + logging.error(f"{msg.msg}") + msgs.append(msg) + indicator_pre = indicator + + return msgs + @loadfiles(0) def extract_ftexts(binobj: Union[str, bytes], outpath=None, encoding='utf-8', tblobj: Union[str, List[tbl_t]]=None, *, @@ -318,7 +361,7 @@ def _make_ftexts(addrs, sizes): if has_cjk and not hascjk(text): continue text = text.replace('\n', r'[\n]').replace('\r', r'[\r]') ftexts.append(ftext_t(addr, size, text)) - logging.info(f"extract i={i} addr=0x{addr:x} size=0x{size:x} text='{text}'") + logging.info(f"extracted [i={i} addr=0x{addr:x} size=0x{size:x} text='{text}]'") return ftexts data = memoryview(binobj) @@ -328,10 +371,10 @@ def _make_ftexts(addrs, sizes): addrs = list(map(lambda x: x + data_slice.start, addrs)) ftexts = _make_ftexts(addrs, sizes) if outpath: save_ftext(ftexts, ftexts, outpath) - logging.info(f"finished, extract {len(ftexts)} ftexts") + logging.info(f"finish extract {len(ftexts)} ftexts") return ftexts -@loadfiles([0, 1, "referobj"]) +@loadfiles([0, (1, 'utf-8'), "referobj"]) def insert_ftexts(binobj: Union[str, bytes], ftextsobj: Union[str, Tuple[List[ftext_t], List[ftext_t]]], outpath=None, encoding='utf-8', tblobj: Union[str, List[tbl_t]]=None, *, @@ -353,6 +396,7 @@ def insert_ftexts(binobj: Union[str, bytes], """ def addr_find(t:ftext_t, data: bytes, refdata: bytes, addr_cache: set=None) -> int: + if t.addr < 0 or t.size <= 0: return -1 addr = 0 n = len(refdata) if t.addr + t.size > n: @@ -377,7 +421,7 @@ def addr_find(t:ftext_t, data: bytes, refdata: bytes, addr_cache: set=None) -> i addr_cache |= {addr} return addr else: - logging.warning(f"not find, addr=0x{t.addr:x} text='{t.text}'") + logging.warning(f"ref addr not find [addr=0x{t.addr:x} text='{t.text}]'") return -1 def insert_adjust(encbytes: bytes, t: ftext_t, *, @@ -388,7 +432,7 @@ def insert_adjust(encbytes: bytes, t: ftext_t, *, encbytes += padding(t.size - len(encbytes), bytes_padding) else: if not insert_longer: - logging.warning(f"strip longer text, addr={t.addr:x} size=0x{len(encbytes):x}>0x{t.size:x}") + logging.warning(f"strip longer text 0x{len(encbytes):x}>0x{t.size:x} [addr={t.addr:x}]") encbytes = encbytes[:t.size] # adjust encbytes align @@ -404,18 +448,18 @@ def insert_adjust(encbytes: bytes, t: ftext_t, *, enc = tbl if tbl else encoding enc_error = bytes_fallback if tbl else ("ignore" if bytes_fallback else "strict") text_replace = text_replace if text_replace else dict() - _, ftexts = load_ftext(readlines(ftextsobj)) if type(ftextsobj)==bytes else ftextsobj + _, ftexts = ftextsobj if type(ftextsobj)==tuple else load_ftext(ftextsobj) ftexts.sort(key=lambda x: x.addr) logging.info(f"load {len(ftexts)} ftexts") shift = 0 last_addr = 0 srcdata = bytes(binobj) - dstio = BytesIO(srcdata) + dstio = BytesIO() for i, t in enumerate(ftexts): addr = addr_find(t, srcdata, refdata, refcache) if refdata else t.addr if addr < 0: - logging.warning(f"ftext addr not find, i={i} addr=0x{t.addr:x} text='{t.text}'") + logging.warning(f"ftext addr not find [i={i} addr=0x{t.addr:x} text='{t.text}]'") continue dstio.write(srcdata[last_addr: addr]) text = f_before(srcdata, t) if f_before else t.text @@ -424,7 +468,7 @@ def insert_adjust(encbytes: bytes, t: ftext_t, *, encbytes = insert_adjust(encbytes, t, insert_longer=insert_longer, insert_shorter=insert_shorter, insert_align=insert_align, bytes_padding=bytes_padding) if f_after: - dstdata = dstio.getbuffer()[:dstio.tell()] + dstdata = dstio.getbuffer() encbytes = f_after(srcdata, dstdata, encbytes, t) dstio.flush() del dstdata @@ -437,16 +481,17 @@ def insert_adjust(encbytes: bytes, t: ftext_t, *, if t.addr >= addr: t.addr_new = t.addr + shift if t.toaddr >= addr: t.toaddr_new = t.toaddr + shift _sizestr = f"0x{t.size:x}" + (f"->0x{len(encbytes):x}" if len(encbytes)!=t.size else "") - logging.info(f"insert addr=0x{addr:x} size={_sizestr} text='{text}'") + logging.info(f"inserted [addr=0x{addr:x} size={_sizestr} text='{text}]'") dstio.write(srcdata[last_addr:]) - if outpath: - with open(outpath, 'wb') as fp: fp.write(dstio.getbuffer()[:dstio.tell()] ) - logging.info(f"finished, datasize=0x{len(srcdata):x}->0x{dstio.tell():x}") - return dstio.getbuffer()[:dstio.tell()] - -@loadfiles([0, "referobj"]) -def check_ftexts(ftextsobj: Union[str, Tuple[List[ftext_t], List[ftext_t]]], outpath=None, + dstdata = dstio.getvalue() + dstio.close() + if outpath: savebytes(outpath, dstdata) + logging.info(f"finished with datasize 0x{len(srcdata):x}->0x{len(dstdata):x}") + return dstdata + +@loadfiles([(0, 'utf-8', 'ignore'), "referobj"]) +def check_ftexts(linesobj: Union[str, Tuple[List[ftext_t], List[ftext_t]]], outpath=None, encoding='utf-8', tblobj: Union[str, List[tbl_t]]=None, *, text_noeval=False, text_replace: Dict[bytes, bytes]=None, bytes_fallback: bytes = None, insert_longer=False, @@ -456,34 +501,35 @@ def check_ftexts(ftextsobj: Union[str, Tuple[List[ftext_t], List[ftext_t]]], out """ msgs: List[msg_t] = [] + lines = linesobj tbl = load_tbl(tblobj) if tblobj else None enc = tbl if tbl else encoding enc_error = bytes_fallback if tbl else ("ignore" if bytes_fallback else "strict") refdata = memoryview(referobj) if referobj else None text_replace = text_replace if text_replace else dict() - ftexts1, ftexts2 = load_ftext(readlines(ftextsobj)) if type(ftextsobj)==bytes else ftextsobj + msgs += check_ftextlines(lines) + ftexts1, ftexts2 = load_ftext(lines) if len(ftexts1) != len(ftexts2): - msg = msg_t(0, f"○● count not match, {len(ftexts1)}!={len(ftexts2)}", logging.WARNING) + msg = msg_t(-1, f"○● count not match {len(ftexts1)}!={len(ftexts2)}", logging.WARNING) logging.warning(msg.msg) msgs.append(msg) for i, (t1, t2) in enumerate(zip(ftexts1, ftexts2)): - # check match - if t1.addr != t2.addr: - msg = msg_t(t1.addr, f"○●{i} addr not match, 0x{t1.addr:x}!=0x{t2.addr:x}", logging.WARNING) - logging.warning(f"addr=0x{msg.id:x} {msg.msg}") - msgs.append(msg) - if t1.size != t2.size: - msg = msg_t(t1.addr, f"○●{i} size not match, 0x{t1.size:x}!=0x{t2.size:x}", logging.WARNING) - logging.warning(f"addr=0x{msg.id:x} {msg.msg}") + # check org now match + err = "" + if t1.addr != t2.addr: err += f"addr not match 0x{t1.addr:x}!=0x{t2.addr:x}, " + if t1.size != t2.size: err += f"size not match 0x{t1.size:x}!=0x{t2.size:x} " + if len(err) > 0: + msg = msg_t(t1.addr, err + f"[○●no={i+1} addr=0x{t1.addr} text='{t1.text}']", logging.WARNING) + logging.warning(f"{msg.msg}") msgs.append(msg) # check src data if refdata: encbytes = encode_extend(t1.text, enc, enc_error, text_noeval) if encbytes != refdata[t1.addr: t1.addr + t1.size]: - msg = msg_t(t1.addr, f"○{i} text not match, {t1.text}", logging.WARNING) - logging.warning(f"addr=0x{msg.id:x} {msg.msg}") + msg = msg_t(t1.addr, f"text not match [○no{i+1} addr=0x{t1.addr} text='{t1.text}']", logging.WARNING) + logging.warning(f"{msg.msg}") msgs.append(msg) # check dst data @@ -497,19 +543,20 @@ def check_ftexts(ftextsobj: Union[str, Tuple[List[ftext_t], List[ftext_t]]], out if x is None: reject |= {c} if len(reject): reject_str = " ".join(list(reject)) - msg = msg_t(t1.addr, f"●{i} encode failed, reject=({reject_str}), text='{text}'", logging.ERROR) - logging.error(f"addr=0x{msg.id:x} {msg.msg}") + _s = f"encode failed, reject=({reject_str}) [●no{i+1} addr={t2.addr:x} text='{text}]'" + msg = msg_t(t1.addr, _s, logging.ERROR) + logging.error(f"{msg.msg}") msgs.append(msg) encbytes = encode_general(text, enc, enc_error) if not insert_longer and encbytes and len(encbytes) > t1.size: - msg = msg_t(t1.addr, f"●{i} size owerflow, 0x{len(encbytes):x}>0x{t2.size:x}", logging.WARNING) - logging.error(f"addr=0x{msg.id:x}, {msg.msg}") + err = f"size owerflow 0x{len(encbytes):x}>0x{t2.size:x} [●no{i+1} addr={t2.addr:x} text='{text}]" + msg = msg_t(t1.addr, err, logging.WARNING) + logging.warning(f"{msg.msg}") msgs.append(msg) - if outpath: - with codecs.open(outpath, "w", "utf-8") as fp: - fp.writelines(f"{logging.getLevelName(t.type)}:{t.id: x}: {t.msg}\n" for t in msgs) + lines = [f"{logging.getLevelName(t.type)}: {t.msg}\n" for t in msgs] + if outpath: savebytes(outpath, writelines(lines)) return msgs @@ -547,7 +594,7 @@ def cmd_check(args): text_replace=text_replace, text_noeval=args.text_noeval, bytes_fallback=bytes_fallback, insert_longer=args.insert_longer) - parser = argparse.ArgumentParser(description=g_description) + parser = argparse.ArgumentParser(description=description) subparsers = parser.add_subparsers(title="sub command") parser_e = subparsers.add_parser("extract", help="extract text in binfile to ftext") parser_i = subparsers.add_parser("insert", help="insert ftext to binfile") diff --git a/src/libutil.py b/src/libutil.py index 4e4f2da..121ade7 100644 --- a/src/libutil.py +++ b/src/libutil.py @@ -1,196 +1,228 @@ -""" -util functions and structures for galgame localization - v0.6, developed by devseed -""" - -import os -import gzip -import codecs -import zipfile -from io import BytesIO -from dataclasses import dataclass -from typing import Union, List, Tuple - -__version__ = 600 - -# util functions -def readlines(data: bytes, encoding='utf-8', encoding_error='ignore') -> List[str]: - i = 0 - start = 0 - lines = [] - while i < len(data): - if data[i] == ord('\r'): - if i+1 < len(data) and data[i+1] == '\n': i += 1 - lines.append(str(data[start: i+1], encoding, encoding_error)) - start = i+1 - elif data[i] == ord('\n'): - lines.append(str(data[start: i+1], encoding, encoding_error)) - start = i+1 - i += 1 - if start < len(data): lines.append(str(data[start:], encoding, encoding_error)) - return lines - -def writelines(lines: List[str], encoding='utf-8', encoding_error='ignore') -> bytes: - bufio = BytesIO() - for line in lines: - bufio.write(line.encode(encoding, encoding_error)) - return bufio.getbuffer() - -def loadfiles(indexs=None): - if indexs == None: indexs = [0] - if type(indexs) == int: indexs = [indexs] - - def load_gz(path) -> bytes: # path/x.gz - with gzip.GzipFile(path, 'rb') as fp: - return fp.read() - - def load_zip(path) -> bytes: # path1/x.zip>path2/y - path1, path2 = path.split(".zip>") - path2 = path2.replace('\\', '/') - with zipfile.ZipFile(path1 + ".zip", 'r') as fp1: - with fp1.open(path2, 'r') as fp2: - return fp2.read() - - def load_direct(path) -> bytes: - with open(path, 'rb') as fp: - return fp.read() - - def load_file(path: str) -> bytes: - if os.path.splitext(path)[1] == '.gz': data = load_gz(path) - elif ".zip>" in path: data = load_zip(path) - else: data = load_direct(path) - return data - - def wrapper1(func): # decorator(dec_args)(func)(fun_args) - def wrapper2(*args, **kw): - newargs = list(args) - for i, t in enumerate(indexs): - if type(t) == int and type(newargs[t]) == str: - newargs[t] = load_file(newargs[t]) - elif type(t) == str and t in kw and type(kw[t]) == str: - kw[t] = load_file(kw[t]) - return func(*newargs, **kw) - return wrapper2 - return wrapper1 - -# structures -@dataclass -class ftext_t: - addr: int = 0 - size: int = 0 - text: str = "" - -@dataclass -class tbl_t: - tcode : bytes = b"" - tchar : str = "" - -@dataclass -class jtable_t: # jump table - addr: int = 0 - addr_new: int = 0 - toaddr: int = 0 - toaddr_new: int = 0 - -@dataclass -class msg_t: - id: int = 0 - msg: str = "" - type: int = 0 - -# serilization functions -def save_ftext(ftexts1: List[ftext_t], ftexts2: List[ftext_t], outpath: str = None, *, - encoding="utf-8", width_index = (5, 6, 3)) -> List[str]: - """ - format text, such as ●num|addr|size● text - :param ftexts1[]: text dict array in '○' line, - :param ftexts2[]: text dict array in '●' line - :return: ftext lines - """ - - width_num, width_addr, width_size = width_index - if width_num==0: width_num = len(str(len(ftexts1))) - if width_addr==0: width_addr = len(hex(max(t.addr for t in ftexts1))) - 2 - if width_size==0: width_size = len(hex(max(t.size for t in ftexts1))) - 2 - - lines = [] - fstr1 = "○{num:0%dd}|{addr:0%dX}|{size:0%dX}○ {text}\n" \ - % (width_num, width_addr, width_size) - fstr2 = fstr1.replace('○', '●') - if not ftexts1: ftexts1 = [None for x in ftexts2] - if not ftexts2: ftexts2 = [None for x in ftexts1] - for i, (t1, t2) in enumerate(zip(ftexts1, ftexts2)): - if t1: lines.append(fstr1.format(num=i, addr=t1.addr, size=t1.size, text=t1.text)) - if t2: lines.append(fstr2.format(num=i, addr=t2.addr, size=t2.size, text=t2.text)) - lines.append("\n") - - if outpath: - with codecs.open(outpath, 'w', encoding) as fp: - fp.writelines(lines) - - return lines - -@loadfiles(0) -def load_ftext(inobj: Union[str, List[str]], *, - encoding="utf-8") -> Tuple[List[ftext_t], List[ftext_t]]: - """ - format text, such as ●num|addr|size● text - :param inobj: can be path, or lines[] - :return: ftexts1[]: text dict array in '○' line, - ftexts2[]: text dict array in '●' line - """ - - ftexts1, ftexts2 = [], [] - lines = readlines(inobj, encoding, 'ignore') if type(inobj) != list else inobj - for line in lines: - indicator = line[0] - if indicator == "#": continue - if indicator not in {"○", "●"}: continue - line = line.rstrip('\n').rstrip('\r') - _, t1, *t2 = line.split(indicator) - t2 = "".join(t2) - ftext = ftext_t(text=t2[1:]) - try: - _, t12, t13 = t1.split('|') - ftext.addr, ftext.size = int(t12, 16), int(t13, 16) - except ValueError: pass - if indicator=='○': ftexts1.append(ftext) - else: ftexts2.append(ftext) - - return ftexts1, ftexts2 - -def save_tbl(tbl: List[tbl_t], outpath=None, *, encoding='utf-8') -> List[str]: - lines = [] - for t in tbl: - raw_str = "" - for d in t.tcode: raw_str += f"{d:02X}" - line = ("{:s}={:s}\n".format(raw_str, t.tchar)) - lines.append(line) - if outpath: - with codecs.open(outpath, "w", encoding) as fp: - fp.writelines(lines) - return lines - -@loadfiles(0) -def load_tbl(inobj: Union[str, List[str]], *, encoding='utf-8') -> List[tbl_t]: - """ - tbl file format "tcode=tchar", - :param inobj: can be path, or lines_text[] - :return: [(charcode, charstr)] - """ - - tbl: List[tbl_t] = [] - lines = readlines(inobj, encoding, 'ignore') if type(inobj) != list else inobj - for line in lines: - indicator = line[0] - if indicator == "#": continue - line = line.rstrip('\n').rstrip('\r') - if len(line) <= 0: continue - if line.find("==") == -1: t1, tchar = line.split('=') - else: t1 = line.split('=')[0]; tchar = '=' - tcode = bytearray() - for i in range(0, len(t1), 2): - tcode.append(int(t1[i: i+2], 16)) - tbl.append(tbl_t(bytes(tcode), tchar)) - +# -*- coding: utf-8 -*- +""" +util functions and structures for galgame localization + v0.6, developed by devseed +""" + +import os +import gzip +import zipfile +from io import BytesIO +from datetime import datetime +from dataclasses import dataclass +from typing import Union, List, Tuple + +__version__ = 600 + +# util functions +def readlines(data: bytes, encoding='utf-8', encoding_error='ignore') -> List[str]: + i = 0 + start = 0 + lines = [] + mem = memoryview(data) + while i < len(mem): + if mem[i] == ord('\r'): + if i+1 < len(mem) and mem[i+1] == '\n': i += 1 + lines.append(str(mem[start: i+1], encoding, encoding_error)) + start = i+1 + elif mem[i] == ord('\n'): + lines.append(str(mem[start: i+1], encoding, encoding_error)) + start = i+1 + i += 1 + if start < len(mem): lines.append(str(mem[start:], encoding, encoding_error)) + return lines + +def writelines(lines: List[str], encoding='utf-8', encoding_error='ignore') -> bytes: + bufio = BytesIO() + for line in lines: + bufio.write(line.encode(encoding, encoding_error)) + data = bufio.getvalue() + bufio.close() + return data + +def loadbytes(path) -> bytes: + def load_gz(path) -> bytes: # path/x.gz + with gzip.GzipFile(path, 'rb') as fp: + return fp.read() + + def load_zip(path) -> bytes: # path1/x.zip>path2/y + path1, path2 = path.split(".zip>") + path2 = path2.replace('\\', '/') + with zipfile.ZipFile(path1 + ".zip", 'r') as fp1: + with fp1.open(path2, 'r') as fp2: + return fp2.read() + + def load_direct(path) -> bytes: + with open(path, 'rb') as fp: + return fp.read() + + if os.path.splitext(path)[1] == '.gz': data = load_gz(path) + elif ".zip>" in path: data = load_zip(path) + else: data = load_direct(path) + return data + +def savebytes(path, data) -> int: + def save_gz(path, data) -> int: # path/x.gz + with gzip.GzipFile(path, 'wb') as fp: + return fp.write(data) + + def save_zip(path, data) -> int: # path1/x.zip>path2/y + path1, path2 = path.split(".zip>") + path2 = path2.replace('\\', '/') + with zipfile.ZipFile(path1 + ".zip", 'a') as fp1: + now = datetime.now() + info = zipfile.ZipInfo(filename=path2, date_time= \ + (now.year, now.month, now.day, now.hour, now.minute, now.second)) + fp1.writestr(info, data) + return len(data) + + def save_direct(path, data) -> int: + with open(path, 'wb') as fp: + return fp.write(data) + + if os.path.splitext(path)[1]==".gz": return save_gz(path, data) + elif ".zip>" in path: return save_zip(path, data) + else: return save_direct(path, data) + +def loadfiles(targets: Union[int, str, List]=None): + """ + :params targets: can be 0, 'k', [0], [(0, 'utf8', 'ignore'), 'k'], + """ + + if targets == None: targets = [0] + if type(targets) != list: targets = [targets] + + def wrapper1(func): # decorator(dec_args)(func)(fun_args) + def wrapper2(*_args, **kw): + args = list(_args) + for i, t in enumerate(targets): + w = None # for args or kw + t0 = t[0] if type(t)==tuple else t # for index + t1 = t[1:] if type(t)==tuple else None # for encoding, encoding_error + if type(t0)==int and type(args[t0])==str: w=args + elif type(t0)==str and t in kw and type(kw[t0]) == str: w=kw + if w is None: continue # no target arg + data = loadbytes(w[t0]) + w[t0] = readlines(data, *t1) if t1 else data + return func(*args, **kw) + return wrapper2 + return wrapper1 + +# structures +@dataclass +class ftext_t: + addr: int = 0 + size: int = 0 + text: str = "" + +@dataclass +class tbl_t: + tcode : bytes = b"" + tchar : str = "" + +@dataclass +class jtable_t: # jump table + addr: int = 0 + addr_new: int = 0 + toaddr: int = 0 + toaddr_new: int = 0 + +@dataclass +class msg_t: + id: int = 0 + msg: str = "" + type: int = 0 + +# serilization functions +def save_ftext(ftexts1: List[ftext_t], ftexts2: List[ftext_t], + outpath: str = None, *, encoding="utf-8", width_index = (5, 6, 3)) -> List[str]: + """ + format text, such as ●num|addr|size● text + :param ftexts1[]: text dict array in '○' line, + :param ftexts2[]: text dict array in '●' line + :return: ftext lines + """ + + width_num, width_addr, width_size = width_index + if width_num==0: width_num = len(str(len(ftexts1))) + if width_addr==0: width_addr = len(hex(max(t.addr for t in ftexts1))) - 2 + if width_size==0: width_size = len(hex(max(t.size for t in ftexts1))) - 2 + + lines = [] + fstr1 = "○{num:0%dd}|{addr:0%dX}|{size:0%dX}○ {text}\n" \ + % (width_num, width_addr, width_size) + fstr2 = fstr1.replace('○', '●') + if not ftexts1: ftexts1 = [None] * len(ftexts2) + if not ftexts2: ftexts2 = [None] * len(ftexts1) + for i, (t1, t2) in enumerate(zip(ftexts1, ftexts2)): + if t1: lines.append(fstr1.format(num=i, addr=t1.addr, size=t1.size, text=t1.text)) + if t2: lines.append(fstr2.format(num=i, addr=t2.addr, size=t2.size, text=t2.text)) + lines.append("\n") + + if outpath: savebytes(outpath, writelines(lines, encoding)) + + return lines + +@loadfiles(0) +def load_ftext(inobj: Union[str, List[str]], *, + encoding="utf-8") -> Tuple[List[ftext_t], List[ftext_t]]: + """ + format text, such as ●num|addr|size● text + :param inobj: can be path, or lines[] + :return: ftexts1[]: text dict array in '○' line, + ftexts2[]: text dict array in '●' line + """ + + ftexts1, ftexts2 = [], [] + lines = readlines(inobj, encoding, 'ignore') if type(inobj) != list else inobj + if len(lines) > 0: lines[0] = lines[0].lstrip("\ufeff") # remove bom + for line in lines: + indicator = line[0] + if indicator == "#": continue + if indicator != "○" and indicator != "●": continue + line = line.rstrip('\n').rstrip('\r') + _, t1, *t2 = line.split(indicator) + t2 = "".join(t2) + ftext = ftext_t(-1, 0, t2[1:]) + try: + _, t12, t13 = t1.split('|') + ftext.addr, ftext.size = int(t12, 16), int(t13, 16) + except ValueError: pass + if indicator=='○': ftexts1.append(ftext) + else: ftexts2.append(ftext) + + return ftexts1, ftexts2 + +def save_tbl(tbl: List[tbl_t], outpath=None, *, encoding='utf-8') -> List[str]: + lines = [] + for t in tbl: + raw_str = "" + for d in t.tcode: raw_str += f"{d:02X}" + line = ("{:s}={:s}\n".format(raw_str, t.tchar)) + lines.append(line) + if outpath: savebytes(outpath, writelines(lines, encoding)) + return lines + +@loadfiles(0) +def load_tbl(inobj: Union[str, List[str]], *, encoding='utf-8') -> List[tbl_t]: + """ + tbl file format "tcode=tchar", + :param inobj: can be path, or lines_text[] + :return: [(charcode, charstr)] + """ + + tbl: List[tbl_t] = [] + lines = readlines(inobj, encoding, 'ignore') if type(inobj) != list else inobj + for line in lines: + indicator = line[0] + if indicator == "#": continue + line = line.rstrip('\n').rstrip('\r') + if len(line) <= 0: continue + if line.find("==") == -1: t1, tchar = line.split('=') + else: t1 = line.split('=')[0]; tchar = '=' + tcode = bytearray() + for i in range(0, len(t1), 2): + tcode.append(int(t1[i: i+2], 16)) + tbl.append(tbl_t(bytes(tcode), tchar)) + return tbl \ No newline at end of file diff --git a/test/test_ftextcvt.py b/test/test_ftextcvt.py new file mode 100644 index 0000000..744de82 --- /dev/null +++ b/test/test_ftextcvt.py @@ -0,0 +1,39 @@ +import logging +import unittest +import tempfile + +from common import * +import libutil as util +import ftextcvt + +class TestCsv(unittest.TestCase): + def test_com001(self): + ftextlines = util.readlines(util.loadbytes(paths_ftext["COM001"])) + csvlines = ftextcvt.ftext2csv(ftextlines) + ftextlines2 = ftextcvt.csv2ftext(csvlines) + assert_lines(self, ftextlines, ftextlines2) + +class TestJson(unittest.TestCase): + def test_com001(self): + ftextlines = util.readlines(util.loadbytes(paths_ftext["COM001"])) + jsonlines = ftextcvt.ftext2json(ftextlines) + ftextlines2 = ftextcvt.json2ftext(b"".join([x.encode('utf-8') for x in jsonlines])) + assert_lines(self, ftextlines, ftextlines2) + +class TestDocx(unittest.TestCase): + def test_com001(self): + tmppath = tempfile.NamedTemporaryFile("wb+") + ftextlines = util.readlines(util.loadbytes(paths_ftext["COM001"])) + doc = ftextcvt.ftext2docx(ftextlines, tmppath) + ftextlines2 = ftextcvt.docx2ftext(tmppath) + assert_lines(self, ftextlines, ftextlines2) + +class TestPretty(unittest.TestCase): + def test_com001(self): + ftextlines = util.readlines(util.loadbytes(paths_ftext["COM001"])) + ftextlines2 = ftextcvt.ftext2pretty(ftextlines) + assert_lines(self, ftextlines, ftextlines2) + +if __name__ == '__main__': + logging.basicConfig(level=logging.WARNING, format="%(levelname)s:%(funcName)s: %(message)s") + unittest.main() \ No newline at end of file