Skip to content

Commit

Permalink
Remove RLI and PDI characters from Hebrew gloss Kindle files
Browse files Browse the repository at this point in the history
Kindle shows them as rectangles.
  • Loading branch information
xxyzz committed Mar 23, 2024
1 parent e575a1c commit c8852ca
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions src/proficiency/create_klld.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ def create_klld_db(
) in wiktionary_conn.execute(
"SELECT id, lemma_id, pos, short_def, full_def, example FROM senses"
):
if gloss_lang == "he":
short_def = remove_rtl_pdi(short_def)
full_def = remove_rtl_pdi(full_def)

klld_conn.execute(
"""
INSERT INTO senses (id, display_lemma_id, term_id, term_lemma_id, pos_type, source_id,
Expand Down Expand Up @@ -148,3 +152,8 @@ def create_klld_db(
klld_conn.commit()
klld_conn.close()
wiktionary_conn.close()


def remove_rtl_pdi(text: str) -> str:
# https://en.wikipedia.org/wiki/Bidirectional_text
return text.replace("\u2067", "").replace("\u2069", "")

0 comments on commit c8852ca

Please sign in to comment.