| 12345678910111213141516171819202122 |
- import pytest
- from typing import Set
- from dw_base.spark.udf.customs.clean_crawler_data import get_regex_match, clean_germany_company_name
- @pytest.mark.parametrize("company_name, expected", [
- ('Sécheron SA', {'é'}),
- ('Sécheron SA\\u0022ss',{'é','\\u0022'}),
- ('GEHS GRÜN ENERGİE HEIZUNG UND SANİTÂR',{'Ü','Â'})
- ])
- def test_get_regex_match(company_name: str, expected: Set[str]):
- result = get_regex_match(company_name)
- assert result == expected
- @pytest.mark.parametrize("company_name, expected", [
- ('Beiersdorf Indústria Comércio', 'Beiersdorf Indústria Comércio'),
- ('GPS Prüftechnik Rhein/Main GmbH', 'GPS Prüftechnik Rhein/Main GmbH')
- ])
- def test_clean_germany_company_name(company_name: str, expected: str):
- result = clean_germany_company_name(company_name)
- assert result == expected
|