| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156 |
- import pytest
- from dw_base.spark.udf.enterprise.ent_clean_text import *
- @pytest.mark.parametrize("country, url, expected", [
- ('China', 'https://www.ianshaw.biz/p/contact-management.php', 'ianshaw.biz/p/contact-management.php')
- ])
- def test_clean_url(country, url, expected):
- result = clean_url(country, url)
- assert result == expected
- @pytest.mark.parametrize("url, expected", [
- ('https://charnleyfertilisers.co.uk/', 'charnleyfertilisers.co.uk')
- ])
- def test_clean_url_common(url, expected):
- result = clean_url_common(url)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('13.02.2024', '2024-02-13'),
- ('13/02/2024', '2024-02-13'),
- ('', None),
- (None, None)
- ])
- def test_reverse_str(str, expected):
- result = reverse_str(str)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('13a02b20 24', '13022024'),
- ])
- def test_replace_english_and_space(str, expected):
- result = replace_english_and_space(str)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('981617611,981617611,981617611', '981617611'),
- ('', None),
- (None, None)
- ])
- def test_array_remove_duplicates(str, expected):
- result = array_remove_duplicates(str)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('12.575.462 ALVARO PEREIRA DA SILVEIRA FILHO', 'ALVARO PEREIRA DA SILVEIRA FILHO'),
- ('HEBE DE ABREU VILELA CPF 027116806149', 'HEBE DE ABREU VILELA CPF'),
- ('HEBE DE ABREU VILELA CPF 027116', 'HEBE DE ABREU VILELA CPF 027116'),
- ('', None),
- (None, None)
- ])
- def test_clean_brazil_company_name(str, expected):
- result = clean_brazil_company_name(str)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('2124859522,2124859523', '2124859522,2124859523'),
- ('123456,2124859523', '2124859523'),
- ('123456789,12345678', None),
- ('', None),
- (None, None)
- ])
- def test_phone_clean_turkey(str, expected):
- result = phone_clean_turkey(str)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('91234567891', None),
- ('01234567891', '1234567891'),
- ('123456789012', None),
- ('21', None),
- ('', None),
- (None, None)
- ])
- def test_fax_clean_turkey(str, expected):
- result = fax_clean_turkey(str)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('["10.71.01-Taze pastane ürünleri imalatı (yaş pasta, kuru pasta, poğaça, kek, börek, pay, turta, waffles vb.)"]', '107101'),
- ('["35.12.13-Elektrik enerjisinin iletimi (elektrik üretim kaynağından dağıtım sistemine aktaran iletim sistemlerinin işletilmesi)","42.22.02-Enerji santralleri inşaatı (hidroelektrik santrali, termik santral, nükleer enerji üretim santralleri vb.)","35.11.19-Elektrik enerjisi üretimi"]', '351213, 422202, 351119'),
- ('', None),
- (None, None)
- ])
- def test_turkey_nicecode(str, expected):
- result = turkey_nicecode(str)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('HUGO.SANSIL@GMAIL.COM E HUGO@SISTEMAFIEG.ORG.BR', ['hugo.sansil@gmail.com', 'hugo@sistemafieg.org.br']),
- ('SANDRA_MMC@BOL.COM.BR NAIRMOTADIAS@HOTMAIL.COM', ['sandra_mmc@bol.com.br', 'nairmotadias@hotmail.com']),
- ('', None),
- (None, None)
- ])
- def test_email_clean_brazil(str, expected):
- result = email_clean_brazil(str)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('["medical practice","medical practices","hospital & health care"]', 'medical practice,medical practices,hospital & health care'),
- ('["construction"]', 'construction'),
- ('', None),
- (None, None)
- ])
- def test_arr_str_to_str(str, expected):
- result = arr_str_to_str(str)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('+1-866-344-7857 ext. 311', '+1 866 344 7857 311'),
- ('(844)800-BULL', None),
- ('', None),
- (None, None)
- ])
- def test_clean_tel_apollo(str, expected):
- result = clean_tel_apollo(str)
- assert result == expected
- @pytest.mark.parametrize("socialtype, url, expected", [
- ("youtube", "https://youtube.com/user/BrotherCanadaEn", 'user/brothercanadaen'),
- ("whatsapp", "919822025525", '919822025525'),
- ("twitter", "https://twitter.com/#", ''),
- ("linkedin", "https://www.linkedin.com/in/meb-jsc/#", 'in/meb-jsc'),
- ("instagram", "https://www.instagram.com/##############/", ''),
- ("facebook", "https://www.facebook.com/https://www.facebook.com/komlider38/", 'komlider38'),
- ("pinterest", "https://www.pinterest.com/lampstore/https://www.pinterest.com/lampstore/", 'lampstore'),
- (None, "",None),
- ("", None,None),
- (None, None,None)
- ])
- def test_socialmedia_url(socialtype, url, expected):
- result = socialmedia_url(socialtype, url)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('-- PACIFIC PRODUCTS LIMITED AUSTRALIAN PRODUCTS LIMITED', 'PACIFIC PRODUCTS LIMITED AUSTRALIAN PRODUCTS LIMITED'),
- ('03-MAY-2013 Fuente Union Import And Export Limited 福恩特聯合進出口有限公司', 'Fuente Union Import And Export Limited 福恩特聯合進出口有限公司'),
- ('', None),
- (None, None)
- ])
- def test_hongkong_previous_name_clean(str, expected):
- result = hongkong_previous_name_clean(str)
- assert result == expected
- @pytest.mark.parametrize("str, expected", [
- ('["ownership-of-shares-25-to-50-percent","voting-rights-25-to-50-percent"]', '25-to-50'),
- ('["ownership-of-shares-more-than-25-percent-registered-overseas-entity"]', 'more-than-25'),
- ('', None),
- (None, None)
- ])
- def test_uk_sharepercent(str, expected):
- result = uk_sharepercent(str)
- assert result == expected
- if __name__ == '__main__':
- pytest.main()
|