diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 63405a5d..78d15b65 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -120,5 +120,5 @@ jobs: # This test will fail if the output encoding is cp1252 # Warning: the diff line below is PowerShell syntax, not bash! run: | - echo ćś | readalongs make-xml -l fra - - > cs.readalong + echo ćś | readalongs make-xml -l fra - - | findstr /v meta > cs.readalong if (diff (cat cs.readalong) (cat test/data/cs-ref.readalong)) { throw "Output did not match reference" } diff --git a/.gitignore b/.gitignore index 60c0f607..cb3111ba 100644 --- a/.gitignore +++ b/.gitignore @@ -208,3 +208,7 @@ $RECYCLE.BIN/ *.lnk # End of https://www.gitignore.io/api/linux,macos,python,windows,visualstudiocode + +#temporary file +.tmp +.conda diff --git a/readalongs/__init__.py b/readalongs/__init__.py index 20e3bcb6..f375d2ac 100644 --- a/readalongs/__init__.py +++ b/readalongs/__init__.py @@ -10,7 +10,7 @@ import sys -VERSION = "1.0" +VERSION = "1.1" if sys.version_info < (3, 8, 0): # pragma: no cover sys.exit( diff --git a/readalongs/_version.py b/readalongs/_version.py index 0ac53ac1..379aa8d6 100644 --- a/readalongs/_version.py +++ b/readalongs/_version.py @@ -1 +1 @@ -__version__ = "1.0.20240426" +__version__ = "1.1.20240619" diff --git a/readalongs/align.py b/readalongs/align.py index 5e2259aa..75f82b58 100644 --- a/readalongs/align.py +++ b/readalongs/align.py @@ -18,6 +18,8 @@ from pympi.Praat import TextGrid from webvtt import Caption, WebVTT +from readalongs import VERSION +from readalongs._version import __version__ from readalongs.audio_utils import ( extract_section, mute_section, @@ -175,7 +177,7 @@ def parse_and_make_xml( """Parse XML input and run tokenization and G2P. Args: - xml_path (str): Path to input in ReadAlong XML format (see static/read-along-1.0.dtd) + xml_path (str): Path to input in ReadAlong XML format (see static/read-along-1.1.dtd) config (dict): Optional; ReadAlong-Studio configuration to use save_temps (str): Optional; Save temporary files, by default None verbose_g2p_warnings (boolean): Optional; display all g2p errors and warnings @@ -568,7 +570,7 @@ def align_audio( """Align an XML input file to an audio file. Args: - xml_path (str): Path to input file in ReadAlong XML format (see static/read-along-1.0.dtd) + xml_path (str): Path to input file in ReadAlong XML format (see static/read-along-1.1.dtd) audio_path (str): Path to audio input. Must be in a format supported by ffmpeg unit (str): Optional; Element to create alignments for, by default 'w' bare (boolean): Optional; @@ -1156,7 +1158,8 @@ def convert_to_xhtml(tokenized_xml, title="Book"): # TODO: add this to template RAS_TEMPLATE = """ - + + {{#pages}} @@ -1177,7 +1180,7 @@ def convert_to_xhtml(tokenized_xml, title="Book"): def create_ras_from_text(lines: Iterable[str], text_languages=Sequence[str]) -> str: - """Create input xml in ReadAlong XML format (see static/read-along-1.0.dtd) + """Create input xml in ReadAlong XML format (see static/read-along-1.1.dtd) Uses the line sequence to infer paragraph and sentence structure from plain text: Assumes a double blank line marks a page break, and a single blank line marks a paragraph break. @@ -1194,6 +1197,8 @@ def create_ras_from_text(lines: Iterable[str], text_languages=Sequence[str]) -> kwargs = { "main_lang": text_languages[0], "fallback_langs": ",".join(text_languages[1:]), + "studio_version": __version__, + "format_version": VERSION, } pages: List[dict] = [] paragraphs: List[dict] = [] @@ -1223,7 +1228,7 @@ def create_ras_from_text(lines: Iterable[str], text_languages=Sequence[str]) -> def create_input_ras(**kwargs): - """Create input xml in ReadAlong XML format (see static/read-along-1.0.dtd) + """Create input xml in ReadAlong XML format (see static/read-along-1.1.dtd) Uses readlines to infer paragraph and sentence structure from plain text. Assumes a double blank line marks a page break, and a single blank line marks a paragraph break. diff --git a/readalongs/static/read-along-1.1.dtd b/readalongs/static/read-along-1.1.dtd new file mode 100644 index 00000000..8202e169 --- /dev/null +++ b/readalongs/static/read-along-1.1.dtd @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/readalongs/text/make_package.py b/readalongs/text/make_package.py index 2b429772..96f2991b 100644 --- a/readalongs/text/make_package.py +++ b/readalongs/text/make_package.py @@ -37,7 +37,7 @@ - + {title} diff --git a/readalongs/text/util.py b/readalongs/text/util.py index b193e974..7af2febb 100644 --- a/readalongs/text/util.py +++ b/readalongs/text/util.py @@ -230,7 +230,7 @@ def copy_file_to_zip(zip_path, origin_path, destination_path): - + {title} diff --git a/readalongs/web_api.py b/readalongs/web_api.py index 9e30bc63..bb63f1d6 100644 --- a/readalongs/web_api.py +++ b/readalongs/web_api.py @@ -41,6 +41,8 @@ from pydantic import BaseModel, Field from starlette.background import BackgroundTask +from readalongs import VERSION +from readalongs._version import __version__ from readalongs.align import create_ras_from_text, save_label_files, save_subtitles from readalongs.log import LOGGER, capture_logs from readalongs.text.add_ids_to_xml import add_ids @@ -77,7 +79,7 @@ # Call get_langs() when the server loads to load the languages into memory LANGS = get_langs() # Get the DTD -DTDPATH = os.path.join(os.path.dirname(__file__), "static", "read-along-1.0.dtd") +DTDPATH = os.path.join(os.path.dirname(__file__), "static", "read-along-1.1.dtd") with open(DTDPATH) as dtdfh: DTD = etree.DTD(dtdfh) @@ -323,7 +325,8 @@ class ConvertRequest(BaseModel): dedent( """\ - + +
@@ -337,6 +340,7 @@ class ConvertRequest(BaseModel): """ + % (VERSION, __version__) ) ], ) diff --git a/test/data/cs-ref.readalong b/test/data/cs-ref.readalong index 71f3d659..b5d09e82 100644 --- a/test/data/cs-ref.readalong +++ b/test/data/cs-ref.readalong @@ -1,5 +1,5 @@ - +
diff --git a/test/data/ej-fra-anchors.readalong b/test/data/ej-fra-anchors.readalong index bef3896c..8f593076 100644 --- a/test/data/ej-fra-anchors.readalong +++ b/test/data/ej-fra-anchors.readalong @@ -1,5 +1,6 @@ - + +
diff --git a/test/data/ej-fra-anchors2.readalong b/test/data/ej-fra-anchors2.readalong index b929b7e9..f2595346 100644 --- a/test/data/ej-fra-anchors2.readalong +++ b/test/data/ej-fra-anchors2.readalong @@ -1,5 +1,6 @@ - + + diff --git a/test/data/ej-fra-dna.readalong b/test/data/ej-fra-dna.readalong index bf885889..73bdbb93 100644 --- a/test/data/ej-fra-dna.readalong +++ b/test/data/ej-fra-dna.readalong @@ -1,5 +1,6 @@ - + +
diff --git a/test/data/ej-fra-package.readalong b/test/data/ej-fra-package.readalong index ae1f9f70..1ecda7e9 100644 --- a/test/data/ej-fra-package.readalong +++ b/test/data/ej-fra-package.readalong @@ -1,5 +1,6 @@ - + +
diff --git a/test/data/ej-fra-silence-bad.readalong b/test/data/ej-fra-silence-bad.readalong index f40b4a80..d8bbdd5c 100644 --- a/test/data/ej-fra-silence-bad.readalong +++ b/test/data/ej-fra-silence-bad.readalong @@ -1,5 +1,6 @@ - + +
diff --git a/test/data/ej-fra-silence.readalong b/test/data/ej-fra-silence.readalong index a24dd15b..d9de6e72 100644 --- a/test/data/ej-fra-silence.readalong +++ b/test/data/ej-fra-silence.readalong @@ -1,5 +1,6 @@ - + +
diff --git a/test/data/ej-fra-subword.readalong b/test/data/ej-fra-subword.readalong index 173811aa..070ceb13 100644 --- a/test/data/ej-fra-subword.readalong +++ b/test/data/ej-fra-subword.readalong @@ -1,5 +1,6 @@ - + + diff --git a/test/data/ej-fra-translated.readalong b/test/data/ej-fra-translated.readalong index 63ffb0ab..7f8bca50 100644 --- a/test/data/ej-fra-translated.readalong +++ b/test/data/ej-fra-translated.readalong @@ -1,5 +1,6 @@ - + + diff --git a/test/data/ej-fra.readalong b/test/data/ej-fra.readalong index 5c78c992..7531c352 100644 --- a/test/data/ej-fra.readalong +++ b/test/data/ej-fra.readalong @@ -1,5 +1,6 @@ - + +
diff --git a/test/data/ej-fra.wav b/test/data/ej-fra.wav new file mode 100644 index 00000000..149df9a4 Binary files /dev/null and b/test/data/ej-fra.wav differ diff --git a/test/data/fra-prepared.readalong b/test/data/fra-prepared.readalong index a7b8bc6f..898f01a4 100644 --- a/test/data/fra-prepared.readalong +++ b/test/data/fra-prepared.readalong @@ -1,5 +1,6 @@ - + +
diff --git a/test/data/fra-tokenized.readalong b/test/data/fra-tokenized.readalong index 2c5d597a..058b883e 100644 --- a/test/data/fra-tokenized.readalong +++ b/test/data/fra-tokenized.readalong @@ -1,5 +1,6 @@ - + +
diff --git a/test/data/mixed-langs.readalong b/test/data/mixed-langs.readalong index 17c6554e..af345dea 100644 --- a/test/data/mixed-langs.readalong +++ b/test/data/mixed-langs.readalong @@ -1,5 +1,6 @@ - + +
diff --git a/test/data/patrickxtlan.readalong b/test/data/patrickxtlan.readalong index 75ebf312..4b8a8560 100644 --- a/test/data/patrickxtlan.readalong +++ b/test/data/patrickxtlan.readalong @@ -1,5 +1,6 @@ - + +

diff --git a/test/test_align_cli.py b/test/test_align_cli.py index c10cb640..00673998 100755 --- a/test/test_align_cli.py +++ b/test/test_align_cli.py @@ -14,6 +14,8 @@ from lxml.html import fromstring from sound_swallower_stub import SoundSwallowerStub +from readalongs import VERSION +from readalongs._version import __version__ from readalongs.cli import align, langs @@ -325,10 +327,13 @@ def test_bad_anchors(self): """Make sure invalid anchors yield appropriate errors""" xml_text = """ -

+

Bonjour.

- """ + """ % ( + VERSION, + __version__, + ) xml_file = join(self.tempdir, "bad-anchor.readalong") with open(xml_file, "w", encoding="utf8") as f: print(xml_text, file=f) @@ -626,4 +631,4 @@ def slurp_text(filename, encoding): if __name__ == "__main__": - main() + main() diff --git a/test/test_dtd.py b/test/test_dtd.py index fde55957..3e36e749 100644 --- a/test/test_dtd.py +++ b/test/test_dtd.py @@ -9,7 +9,7 @@ from lxml import etree DTDPATH = os.path.join( - dirname(__file__), "..", "readalongs", "static", "read-along-1.0.dtd" + dirname(__file__), "..", "readalongs", "static", "read-along-1.1.dtd" ) VALID_RAS = """ diff --git a/test/test_g2p_cli.py b/test/test_g2p_cli.py index 953c1a09..830ce549 100755 --- a/test/test_g2p_cli.py +++ b/test/test_g2p_cli.py @@ -9,6 +9,7 @@ from basic_test_case import BasicTestCase from lxml import etree from sound_swallower_stub import SoundSwallowerStub +from test_make_xml_cli import updateFormatVersion, updateStudioVersion from readalongs.align import align_audio from readalongs.cli import align, g2p, make_xml, tokenize @@ -76,9 +77,13 @@ def test_mixed_langs(self): ref_file, encoding="utf8" ) as ref_f: self.maxDiff = None + # update version info + ref_list = list(ref_f) + ref_list[1] = updateFormatVersion(ref_list[1]) + ref_list[2] = updateStudioVersion(ref_list[2]) self.assertListEqual( list(output_f), - list(ref_f), + ref_list, f"output {g2p_file} and reference {ref_file} differ.", ) diff --git a/test/test_make_xml_cli.py b/test/test_make_xml_cli.py index a36a23db..c7f8c03b 100755 --- a/test/test_make_xml_cli.py +++ b/test/test_make_xml_cli.py @@ -10,10 +10,20 @@ from basic_test_case import BasicTestCase +from readalongs import VERSION + +# from readalongs.log import LOGGER +from readalongs._version import __version__ from readalongs.align import create_input_ras, create_ras_from_text from readalongs.cli import align, make_xml -# from readalongs.log import LOGGER + +def updateFormatVersion(input): + return input.replace("{{format_version}}", VERSION) + + +def updateStudioVersion(input): + return input.replace("{{studio_version}}", __version__) class TestMakeXMLCli(BasicTestCase): @@ -90,9 +100,13 @@ def test_output_correct(self): ref_file, encoding="utf8" ) as ref_f: self.maxDiff = None + # update version info + ref_list = list(ref_f) + ref_list[1] = updateFormatVersion(ref_list[1]) + ref_list[2] = updateStudioVersion(ref_list[2]) self.assertListEqual( list(output_f), - list(ref_f), + ref_list, f"output {xml_file} and reference {ref_file} differ.", ) diff --git a/test/test_misc.py b/test/test_misc.py index 55cd37db..fbe9e03c 100755 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -12,6 +12,8 @@ from lxml import etree from test_dna_utils import segments_from_pairs +from readalongs import VERSION +from readalongs._version import __version__ from readalongs.align import split_silences from readalongs.log import LOGGER, capture_logs from readalongs.text.util import ( @@ -89,7 +91,8 @@ def test_split_silences(self): self.assertEqual(words, ref) def test_get_attrib_recursive(self): - raw_xml = """ + raw_xml = """ +

stuffnonsense

stuffnonsense

@@ -101,8 +104,12 @@ def test_get_attrib_recursive(self):

stuffnonsense!

- """ + """ % ( + VERSION, + __version__, + ) xml = parse_xml(raw_xml) + for i, s, lang in zip( itertools.count(), xml.xpath(".//s"), diff --git a/test/test_web_api.py b/test/test_web_api.py index dab9ba4f..def7a0f6 100755 --- a/test/test_web_api.py +++ b/test/test_web_api.py @@ -7,6 +7,8 @@ from basic_test_case import BasicTestCase +from readalongs import VERSION +from readalongs._version import __version__ from readalongs.log import LOGGER from readalongs.text.add_ids_to_xml import add_ids from readalongs.text.convert_xml import convert_xml @@ -31,7 +33,12 @@ def API_CLIENT(self): def slurp_data_file(self, filename: str) -> str: """Convenience function to slurp a whole file in self.data_dir""" with open(os.path.join(self.data_dir, filename), encoding="utf8") as f: - return f.read().strip() + return ( + f.read() + .strip() + .replace("{{format_version}}", VERSION) + .replace("{{studio_version}}", __version__) + ) def test_assemble_from_plain_text(self): # Test the assemble endpoint with plain text @@ -198,9 +205,9 @@ def test_debug(self): self.assertIsNone(content["g2ped"]) hej_verden_xml = dedent( - """\ - - + """ + +
@@ -215,6 +222,7 @@ def test_debug(self): """ + % (VERSION, __version__) ) def test_convert_to_TextGrid(self): @@ -431,9 +439,9 @@ def test_cleanup_even_if_error(self): # that exception in a sane way, with a 422 status code, while # also making sure the temporary directory gets deleted. overlap_xml = dedent( - """\ - - + """ + +
@@ -448,6 +456,7 @@ def test_cleanup_even_if_error(self): """ + % (VERSION, __version__) ) request = { "dur": 83.1,