import pytest from dw_base.spark.udf.enterprise.ent_clean_text import * @pytest.mark.parametrize("country, url, expected", [ ('China', 'https://www.ianshaw.biz/p/contact-management.php', 'ianshaw.biz/p/contact-management.php') ]) def test_clean_url(country, url, expected): result = clean_url(country, url) assert result == expected @pytest.mark.parametrize("url, expected", [ ('https://charnleyfertilisers.co.uk/', 'charnleyfertilisers.co.uk') ]) def test_clean_url_common(url, expected): result = clean_url_common(url) assert result == expected @pytest.mark.parametrize("str, expected", [ ('13.02.2024', '2024-02-13'), ('13/02/2024', '2024-02-13'), ('', None), (None, None) ]) def test_reverse_str(str, expected): result = reverse_str(str) assert result == expected @pytest.mark.parametrize("str, expected", [ ('13a02b20 24', '13022024'), ]) def test_replace_english_and_space(str, expected): result = replace_english_and_space(str) assert result == expected @pytest.mark.parametrize("str, expected", [ ('981617611,981617611,981617611', '981617611'), ('', None), (None, None) ]) def test_array_remove_duplicates(str, expected): result = array_remove_duplicates(str) assert result == expected @pytest.mark.parametrize("str, expected", [ ('12.575.462 ALVARO PEREIRA DA SILVEIRA FILHO', 'ALVARO PEREIRA DA SILVEIRA FILHO'), ('HEBE DE ABREU VILELA CPF 027116806149', 'HEBE DE ABREU VILELA CPF'), ('HEBE DE ABREU VILELA CPF 027116', 'HEBE DE ABREU VILELA CPF 027116'), ('', None), (None, None) ]) def test_clean_brazil_company_name(str, expected): result = clean_brazil_company_name(str) assert result == expected @pytest.mark.parametrize("str, expected", [ ('2124859522,2124859523', '2124859522,2124859523'), ('123456,2124859523', '2124859523'), ('123456789,12345678', None), ('', None), (None, None) ]) def test_phone_clean_turkey(str, expected): result = phone_clean_turkey(str) assert result == expected @pytest.mark.parametrize("str, expected", [ ('91234567891', None), ('01234567891', '1234567891'), ('123456789012', None), ('21', None), ('', None), (None, None) ]) def test_fax_clean_turkey(str, expected): result = fax_clean_turkey(str) assert result == expected @pytest.mark.parametrize("str, expected", [ ('["10.71.01-Taze pastane ürünleri imalatı (yaş pasta, kuru pasta, poğaça, kek, börek, pay, turta, waffles vb.)"]', '107101'), ('["35.12.13-Elektrik enerjisinin iletimi (elektrik üretim kaynağından dağıtım sistemine aktaran iletim sistemlerinin işletilmesi)","42.22.02-Enerji santralleri inşaatı (hidroelektrik santrali, termik santral, nükleer enerji üretim santralleri vb.)","35.11.19-Elektrik enerjisi üretimi"]', '351213, 422202, 351119'), ('', None), (None, None) ]) def test_turkey_nicecode(str, expected): result = turkey_nicecode(str) assert result == expected @pytest.mark.parametrize("str, expected", [ ('HUGO.SANSIL@GMAIL.COM E HUGO@SISTEMAFIEG.ORG.BR', ['hugo.sansil@gmail.com', 'hugo@sistemafieg.org.br']), ('SANDRA_MMC@BOL.COM.BR NAIRMOTADIAS@HOTMAIL.COM', ['sandra_mmc@bol.com.br', 'nairmotadias@hotmail.com']), ('', None), (None, None) ]) def test_email_clean_brazil(str, expected): result = email_clean_brazil(str) assert result == expected @pytest.mark.parametrize("str, expected", [ ('["medical practice","medical practices","hospital & health care"]', 'medical practice,medical practices,hospital & health care'), ('["construction"]', 'construction'), ('', None), (None, None) ]) def test_arr_str_to_str(str, expected): result = arr_str_to_str(str) assert result == expected @pytest.mark.parametrize("str, expected", [ ('+1-866-344-7857 ext. 311', '+1 866 344 7857 311'), ('(844)800-BULL', None), ('', None), (None, None) ]) def test_clean_tel_apollo(str, expected): result = clean_tel_apollo(str) assert result == expected @pytest.mark.parametrize("socialtype, url, expected", [ ("youtube", "https://youtube.com/user/BrotherCanadaEn", 'user/brothercanadaen'), ("whatsapp", "919822025525", '919822025525'), ("twitter", "https://twitter.com/#", ''), ("linkedin", "https://www.linkedin.com/in/meb-jsc/#", 'in/meb-jsc'), ("instagram", "https://www.instagram.com/##############/", ''), ("facebook", "https://www.facebook.com/https://www.facebook.com/komlider38/", 'komlider38'), ("pinterest", "https://www.pinterest.com/lampstore/https://www.pinterest.com/lampstore/", 'lampstore'), (None, "",None), ("", None,None), (None, None,None) ]) def test_socialmedia_url(socialtype, url, expected): result = socialmedia_url(socialtype, url) assert result == expected @pytest.mark.parametrize("str, expected", [ ('-- PACIFIC PRODUCTS LIMITED AUSTRALIAN PRODUCTS LIMITED', 'PACIFIC PRODUCTS LIMITED AUSTRALIAN PRODUCTS LIMITED'), ('03-MAY-2013 Fuente Union Import And Export Limited 福恩特聯合進出口有限公司', 'Fuente Union Import And Export Limited 福恩特聯合進出口有限公司'), ('', None), (None, None) ]) def test_hongkong_previous_name_clean(str, expected): result = hongkong_previous_name_clean(str) assert result == expected @pytest.mark.parametrize("str, expected", [ ('["ownership-of-shares-25-to-50-percent","voting-rights-25-to-50-percent"]', '25-to-50'), ('["ownership-of-shares-more-than-25-percent-registered-overseas-entity"]', 'more-than-25'), ('', None), (None, None) ]) def test_uk_sharepercent(str, expected): result = uk_sharepercent(str) assert result == expected if __name__ == '__main__': pytest.main()