From 67778600efe76475e57f360ec890da8861c1b8aa Mon Sep 17 00:00:00 2001 From: Nicholas Rempel Date: Mon, 31 May 2021 10:09:22 -0700 Subject: [PATCH] Don't index English words --- instant-distance-py/examples/translations/translate.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/instant-distance-py/examples/translations/translate.py b/instant-distance-py/examples/translations/translate.py index 85cbf78..085ecf0 100644 --- a/instant-distance-py/examples/translations/translate.py +++ b/instant-distance-py/examples/translations/translate.py @@ -68,6 +68,11 @@ async def download_build_index(): if lang == "en": word_map[value] = embedding else: + # Don't index words that exist in english + # to improve the quality of the results. + if value in word_map: + continue + # We track values here to build the instant-distance index # Every value is prepended with 2 character language code. # This allows us to determine language output later.