2026-02-01 09:31:38 +01:00

84 lines
3.3 KiB
Python

import os
import shutil
import unittest.mock
from nltk import download
from nltk.downloader import build_index
def test_downloader_using_existing_parent_download_dir(tmp_path):
"""Test that download works properly when the parent folder of the download_dir exists"""
download_dir = str(tmp_path.joinpath("another_dir"))
download_status = download("mwa_ppdb", download_dir)
assert download_status is True
def test_downloader_using_non_existing_parent_download_dir(tmp_path):
"""Test that download works properly when the parent folder of the download_dir does not exist"""
download_dir = str(
tmp_path.joinpath("non-existing-parent-folder", "another-non-existing-folder")
)
download_status = download("mwa_ppdb", download_dir)
assert download_status is True
def test_downloader_redownload(tmp_path):
"""Test that a second download correctly triggers the 'already up-to-date' message"""
first_download = 0
second_download = 1
download_dir = str(tmp_path.joinpath("test_repeat_download"))
for i in range(first_download, second_download + 1):
# capsys doesn't capture functools.partial stdout, which nltk.download.show uses, so just mock print
with unittest.mock.patch("builtins.print") as print_mock:
download_status = download("stopwords", download_dir)
assert download_status is True
if i == first_download:
expected_second_call = unittest.mock.call(
"[nltk_data] Unzipping %s."
% os.path.join("corpora", "stopwords.zip")
)
assert print_mock.call_args_list[1].args == expected_second_call.args
elif i == second_download:
expected_second_call = unittest.mock.call(
"[nltk_data] Package stopwords is already up-to-date!"
)
assert print_mock.call_args_list[1].args == expected_second_call.args
def test_build_index(tmp_path):
"""Test building index with both checksums."""
test_pkg_dir = str(tmp_path.joinpath("packages"))
test_pkg_name = "test_package"
test_pkg_path = os.path.join(test_pkg_dir, f"{test_pkg_name}")
os.makedirs(test_pkg_path, exist_ok=True)
test_xml_path = os.path.join(test_pkg_path, f"{test_pkg_name}.xml")
with open(test_xml_path, "w") as fi:
fi.write(
f'<package id="{test_pkg_name}" name="A Test Package" webpage="http://www.somefake.url/"'
' unzip="1"/>'
)
# Cannot mock a zip here as we are trying to validate file checksums, so just create a simple one with the XML
zip_path = os.path.join(test_pkg_path, f"{test_pkg_name}")
shutil.make_archive(
base_name=zip_path,
format="zip",
root_dir=test_pkg_dir,
base_dir=os.path.basename(test_pkg_path),
)
xml_index = build_index(
root=os.path.dirname(test_pkg_dir), base_url="https://someurl"
)
package_element = xml_index[0][0]
assert package_element.get("id") == "test_package"
md5_checksum = package_element.get("checksum")
assert isinstance(md5_checksum, str)
assert len(md5_checksum) > 5
sha256_checksum = package_element.get("sha256_checksum")
assert isinstance(sha256_checksum, str)
assert len(sha256_checksum) > 5