|
|
@@ -1,1556 +0,0 @@
|
|
|
-import sys
|
|
|
-import re
|
|
|
-import os
|
|
|
-
|
|
|
-abspath = os.path.abspath(__file__)
|
|
|
-root_path = re.sub(r"tendata-warehouse.*", "tendata-warehouse", abspath)
|
|
|
-sys.path.append(root_path)
|
|
|
-
|
|
|
-from dw_base.spark.udf.customs.common_clean import clean_company_name
|
|
|
-
|
|
|
-kaz_enclosers = [('""', '""'), ('"', '"'), ('<<', '>>'), ('?', '?')]
|
|
|
-
|
|
|
-pakistan_suffix_list = [
|
|
|
- 'GROUPCOMPANYLIMITED',
|
|
|
- 'LIMITEDPARTNERSHIP',
|
|
|
- 'CORPORATIONLIMITED',
|
|
|
- 'SMCPRIVATE',
|
|
|
- 'OFCOMPANY',
|
|
|
- 'PRIVATELIMIT',
|
|
|
- 'PRIVATECO',
|
|
|
- 'LIABILITYCOMPANY',
|
|
|
- 'LIMITEDCOMPANY',
|
|
|
- 'COMPANYLIMITED',
|
|
|
- 'INCORPORAT',
|
|
|
- 'CORPORATION',
|
|
|
- 'GROUPCOLTD',
|
|
|
- 'COMPANYLTD',
|
|
|
- 'COLIMITED',
|
|
|
- 'GROUPLTD',
|
|
|
- 'SMCPVT',
|
|
|
- 'PVTLIMIT',
|
|
|
- 'PVTCOLTD',
|
|
|
- 'PVTLTD',
|
|
|
- 'FACTORY',
|
|
|
- 'CORPLTD',
|
|
|
- 'COMPANY',
|
|
|
- 'PTYLTD',
|
|
|
- 'AGENCY',
|
|
|
- 'OFFICE',
|
|
|
- 'CENTER',
|
|
|
- 'COLTD',
|
|
|
- 'COINC',
|
|
|
- 'C0LTD',
|
|
|
- 'LIMIT',
|
|
|
- 'CORP',
|
|
|
- 'LLC',
|
|
|
- 'LTD',
|
|
|
- 'COLT'
|
|
|
-]
|
|
|
-SECOND_AMERICA_SUFFIX_LIST = [
|
|
|
- ' UNLIMITED',
|
|
|
- ' LIMITED',
|
|
|
- ' CO LTD',
|
|
|
- ' COMPANY LTD',
|
|
|
- ' AND COMPANY',
|
|
|
- ' CORPORATION',
|
|
|
- ' CORP',
|
|
|
- ' COMPANY INC',
|
|
|
- ' COMPANY',
|
|
|
- ' LLC',
|
|
|
- ' CO INC',
|
|
|
- ' CO',
|
|
|
- ' MD',
|
|
|
- ' LTD',
|
|
|
- ' INC'
|
|
|
- ' LLP',
|
|
|
- ' PLC',
|
|
|
- ' EST',
|
|
|
-]
|
|
|
-
|
|
|
-third_AMERICA_SUFFIX_LIST = [
|
|
|
- ' CORPORATION',
|
|
|
- ' COMPANY LTD',
|
|
|
- ' COMPANY INC',
|
|
|
- ' UNLIMITED',
|
|
|
- ' LIMITED',
|
|
|
- ' CO LTD',
|
|
|
- ' COMPANY',
|
|
|
- ' CO INC',
|
|
|
- ' CORP',
|
|
|
- ' LLC',
|
|
|
- ' LTD',
|
|
|
- ' INC'
|
|
|
- ' LLP',
|
|
|
- ' PLC',
|
|
|
- ' EST',
|
|
|
- ' CO',
|
|
|
- ' MD',
|
|
|
-]
|
|
|
-
|
|
|
-first_chile_SUFFIX_LIST = [
|
|
|
- ' SPA',
|
|
|
- ' S A',
|
|
|
- ' SA',
|
|
|
- ' LTDA',
|
|
|
- ' LIMITADA',
|
|
|
- ' LLC',
|
|
|
- ' SOCIEDAD ANONIMA',
|
|
|
- ' CO LTD',
|
|
|
- ' LTD',
|
|
|
- ' LIMI',
|
|
|
- ' E I R'
|
|
|
-]
|
|
|
-
|
|
|
-first_bangladesh_suffix_list = [
|
|
|
- 'CHANGED FROM',
|
|
|
- 'CHANGED',
|
|
|
- 'CHANGE FROM',
|
|
|
- 'CHANGE',
|
|
|
- 'EXCHANGE'
|
|
|
-]
|
|
|
-ukraine_suffix_first = [
|
|
|
- ' М КИЇВ ВУЛ ',
|
|
|
- ' ВУЛ '
|
|
|
-]
|
|
|
-ukraine_suffix_second = [
|
|
|
- ' S R O ',
|
|
|
- ' Z O O '
|
|
|
-]
|
|
|
-
|
|
|
-second_bangladesh_suffix_list = [
|
|
|
- 'PVT CO LIMITED',
|
|
|
- 'PVT LIMITED',
|
|
|
- 'LIMITED',
|
|
|
- 'PVT LTD',
|
|
|
- 'LTD',
|
|
|
- 'PVT',
|
|
|
- 'CO LTD',
|
|
|
- 'CO',
|
|
|
- 'PLC'
|
|
|
-]
|
|
|
-
|
|
|
-FIRST_Rwanda_suffix_list = [
|
|
|
- 'COMPANY RWANDA LTD',
|
|
|
- 'COMPANY LTD',
|
|
|
- ' CO LTD',
|
|
|
- 'LTD',
|
|
|
- 'LIMITED'
|
|
|
-]
|
|
|
-FIRST_england_suffix_list = [
|
|
|
- ' COMPANY LIMITED',
|
|
|
- ' ENTERPRISES LTD',
|
|
|
- ' LIMITED',
|
|
|
- ' COMPANY',
|
|
|
- ' CO LTD',
|
|
|
- ' LTD',
|
|
|
- ' LLP'
|
|
|
-]
|
|
|
-FIRST_philippines_suffix_list = [
|
|
|
- ' CO INC',
|
|
|
- ' CO LTD',
|
|
|
- 'INC',
|
|
|
- 'CORPORATION',
|
|
|
- 'CORP',
|
|
|
- 'LLC',
|
|
|
- 'ENTERPRISES',
|
|
|
- 'INCORPORATED',
|
|
|
- ' CO',
|
|
|
- 'PTE LTD',
|
|
|
- 'PTY LTD',
|
|
|
- 'LTD',
|
|
|
- 'GMBH',
|
|
|
- 'S R L',
|
|
|
- 'SRL'
|
|
|
-]
|
|
|
-FIRST_colombia_suffix_list = [
|
|
|
- "LIMITADA",
|
|
|
- "S A S",
|
|
|
- "LITDA",
|
|
|
- "LTDA",
|
|
|
- "SAS",
|
|
|
- "S A",
|
|
|
- "LLC"
|
|
|
-]
|
|
|
-frist_america_suffix_list = [
|
|
|
- 'PRODUCT',
|
|
|
- 'UNION OF THE UNITED STATES',
|
|
|
- ' FOUNDATION',
|
|
|
- 'SA DE CV',
|
|
|
- ' UNLIMITED',
|
|
|
- ' LIMITED',
|
|
|
- 'CENTERS OF AMERICA',
|
|
|
- ' AMERICA CORP',
|
|
|
- ' USA CORP',
|
|
|
- ' CORP',
|
|
|
- ' CORPORATION',
|
|
|
- 'FOUNDATION',
|
|
|
- ' PLLC',
|
|
|
- ' LP',
|
|
|
- ' PA',
|
|
|
- ' CO',
|
|
|
- 'ENTERPRISE',
|
|
|
- 'COMPANY',
|
|
|
- ' AMERICA LLC',
|
|
|
- ' AMERICA INC',
|
|
|
- ' USA LLC',
|
|
|
- ' USA INC',
|
|
|
- ' FL LLC',
|
|
|
- ' FL INC',
|
|
|
- ' 2 LLC',
|
|
|
- ' 2 INC',
|
|
|
- ' 3 LLC',
|
|
|
- ' 3 INC',
|
|
|
- ' 2022 LLC',
|
|
|
- ' 2022 INC',
|
|
|
- ' 2021 LLC',
|
|
|
- ' 2021 INC',
|
|
|
- ' 2020 LLC',
|
|
|
- ' 2020 INC',
|
|
|
- ' CO LLC',
|
|
|
- ' CO INC',
|
|
|
- ' LLC',
|
|
|
- ' INC',
|
|
|
- ' CO LTD',
|
|
|
- ' LTD'
|
|
|
-]
|
|
|
-
|
|
|
-indonesia_suffix_list = [
|
|
|
- 'AGENC',
|
|
|
- 'COMPANY',
|
|
|
- 'DEVELOPMENT',
|
|
|
- 'ORGANIZATION',
|
|
|
- 'ASSOCIATION',
|
|
|
- 'SERVICE',
|
|
|
- 'GROUP',
|
|
|
- 'PTY LTD',
|
|
|
- 'PTY LIMIT',
|
|
|
- ' CO LTD',
|
|
|
- ' CO LIMIT',
|
|
|
- ' PTE LTD',
|
|
|
- 'INDONESIA CO',
|
|
|
- 'INDONESIA INCORP',
|
|
|
- 'INDONESIA LTD',
|
|
|
- 'PHILS CO',
|
|
|
- 'INDONESIA UNLIMIT',
|
|
|
- ' ASIA CO',
|
|
|
- ' ASIA UNLIMITED',
|
|
|
- 'INCORPORATED',
|
|
|
- 'ENTERPRISE',
|
|
|
- ' INDONESIA INC',
|
|
|
- ' ASIA INC',
|
|
|
- ' INDONESIA CO INC',
|
|
|
- ' CO',
|
|
|
- ' CORP',
|
|
|
- 'CORPORATION',
|
|
|
- ' INC',
|
|
|
- ' INDONESIA',
|
|
|
- ' TBK'
|
|
|
-]
|
|
|
-
|
|
|
-venezuela_suffix_list = [
|
|
|
- 'S A',
|
|
|
- 'C A',
|
|
|
- 'R L',
|
|
|
- 'R S',
|
|
|
- 'F P',
|
|
|
- 'S R L',
|
|
|
- 'LTD',
|
|
|
- 'INC',
|
|
|
- 'COMPANY C A',
|
|
|
- 'COMPAÑIA ANONIMA',
|
|
|
- 'CORPORATION C A',
|
|
|
- 'COOPERATIVA',
|
|
|
- 'INTERNATIONAL',
|
|
|
- 'CORPORACIÓN',
|
|
|
- 'REPRESENTACIONES',
|
|
|
- 'ASOCIACION CIVIL',
|
|
|
- 'FUNDACION'
|
|
|
-]
|
|
|
-
|
|
|
-kaz_heads = ["TOO",
|
|
|
- "ООО",
|
|
|
- "АО",
|
|
|
- "ФХ",
|
|
|
- "ИП OOO",
|
|
|
- "НПЦ ООО",
|
|
|
- "СП OOO",
|
|
|
- "ЧП"]
|
|
|
-
|
|
|
-moldova_suffix_list = [
|
|
|
- 'ASOCIATIA GOSPODARIILOR TARANESTI',
|
|
|
- 'COOPERATIVA DE ÎNTREPRINZATOR',
|
|
|
- 'COOPERATIVA DE PRODUCERE',
|
|
|
- 'COOPERATIVA DE',
|
|
|
- 'COOPERATIVA AGRICOLA DE INTREPRINZATOR',
|
|
|
- 'COOPERATIVA AGRICOLA',
|
|
|
- 'CENTRUL TEHNIC',
|
|
|
- 'COMPANIA',
|
|
|
- 'FIRMA COOPERATISTA TEHNICO-STIINTIFICA DE PRODUCTIE',
|
|
|
- 'FIRMA DE PRODUCTIE',
|
|
|
- 'FIRMA DE PRODUCŢIE ŞI COMERŢ',
|
|
|
- 'FIRMA',
|
|
|
- 'SOCIETATEA COMERCIALĂ',
|
|
|
- 'SOCIETATEA CU RASPUNDERE LIMITATA FIRMA',
|
|
|
- 'SOCIETATEA CU RĂSPUNDERE LIMITATĂ',
|
|
|
- 'SOCIETATEA CU RASPUNDERE LIMITATA',
|
|
|
- 'SOCIETATEA PE ACTIUNI',
|
|
|
- 'SOCIETATEA IN NUME COLECTIV AGENTIA',
|
|
|
- 'INTREPRINDEREA INDIVIDUALA',
|
|
|
- 'ÎNTREPRINZĂTOR INDIVIDUAL',
|
|
|
- 'ÎNTREPRINDEREA INDIVIDUALĂ',
|
|
|
- 'ÎNTREPRINDEREA MUNICIPALĂ',
|
|
|
- 'ÎNTREPRINDEREA CU CAPITAL STRĂIN',
|
|
|
- 'INSTITUŢIA MEDICO-SANITARĂ PUBLICĂ',
|
|
|
- 'REDACTIA GAZETEI',
|
|
|
- 'ORGANIZATIA DE ADMINISTRARE FIDUCIARA A INVESTITILOR',
|
|
|
- 'S R L',
|
|
|
- 'SOCIETATEA CU RESPONSABILITATE LIMITATA',
|
|
|
- 'SOCIETATE CU RĂSPUNDERE LIMITATĂ'
|
|
|
-]
|
|
|
-
|
|
|
-moldova_suffix_list2 = [
|
|
|
- 'S R L',
|
|
|
- 'SOCIETATEA CU RESPONSABILITATE LIMITATA',
|
|
|
- 'SOCIETATE CU RĂSPUNDERE LIMITATĂ'
|
|
|
-]
|
|
|
-
|
|
|
-singapore_suffix_list = [
|
|
|
- 'SINGAPORE PTE LTD',
|
|
|
- 'S PTE LTD',
|
|
|
- 'PTE LTD',
|
|
|
- 'ENTERPRISES',
|
|
|
- 'ENTERPRISE',
|
|
|
- 'ENT',
|
|
|
- 'AGENCIES',
|
|
|
- 'AGENCY',
|
|
|
- 'PRIVATE LIMITED',
|
|
|
- 'COMPANY',
|
|
|
- 'LLP',
|
|
|
- 'CO'
|
|
|
-]
|
|
|
-
|
|
|
-hongkong_suffix_list = [
|
|
|
- ' CO LIMITED',
|
|
|
- ' LIMITED',
|
|
|
- ' CO LTD',
|
|
|
- ' COMPANY',
|
|
|
- ' LTD'
|
|
|
-]
|
|
|
-
|
|
|
-china_suffix_list = [
|
|
|
- ' GROUP CORPORATION LIMITED',
|
|
|
- ' CORPORATION LIMITED',
|
|
|
- ' GROUP CORPORATION',
|
|
|
- ' GROUP CO LIMITED',
|
|
|
- ' LIMITED COMPANY',
|
|
|
- ' COMPANY LIMITED',
|
|
|
- ' GROUP CO LTD',
|
|
|
- ' CORPORATION',
|
|
|
- ' CO LIMITED',
|
|
|
- ' GROUP CORP',
|
|
|
- ' CORP LTD',
|
|
|
- ' LIMITED',
|
|
|
- ' COMPANY',
|
|
|
- ' FACTORY',
|
|
|
- ' CO LTD',
|
|
|
- ' CO INC',
|
|
|
- ' CORP',
|
|
|
- ' INC',
|
|
|
- ' CO'
|
|
|
-]
|
|
|
-
|
|
|
-vietnam_right_separator_list = [
|
|
|
- 'COMPANY LIMITED ',
|
|
|
- 'COMPANY LTD '
|
|
|
-]
|
|
|
-
|
|
|
-vietnam_left_separator_list = [
|
|
|
- ' CO LTD',
|
|
|
- ' PTE LTD',
|
|
|
- ' JOINT STOCK COMPANY',
|
|
|
- ' COMPANY'
|
|
|
-]
|
|
|
-
|
|
|
-vietnam_suffix_list = [
|
|
|
- ' CORP',
|
|
|
- ' LLC',
|
|
|
- ' CO JSC',
|
|
|
- ' JSC',
|
|
|
- ' LTD'
|
|
|
-]
|
|
|
-
|
|
|
-ind_head = [
|
|
|
- 'M S',
|
|
|
- 'MS'
|
|
|
-]
|
|
|
-
|
|
|
-india_suffix_list = [
|
|
|
- ' CO I PVT L',
|
|
|
- ' CO PVT L',
|
|
|
- ' CO PRIVATE L',
|
|
|
- ' CO I LTD',
|
|
|
- ' I LTD',
|
|
|
- ' I LIMITED',
|
|
|
- ' I PVT L',
|
|
|
- ' I PRIVATE L',
|
|
|
- ' COMPANY PRIVATE L',
|
|
|
- ' COMPANY PVT L',
|
|
|
- ' P LTD',
|
|
|
- ' PRIVATE L',
|
|
|
- ' PVT L',
|
|
|
- ' CO',
|
|
|
- ' INC',
|
|
|
- ' CO LIMITED',
|
|
|
- ' LTD',
|
|
|
- ' LIMITED',
|
|
|
- ' CO I',
|
|
|
- ' I'
|
|
|
-]
|
|
|
-
|
|
|
-mexico_suffix_list = [
|
|
|
- ' S P R DE R L DE C V',
|
|
|
- ' S DE R L DE C V',
|
|
|
- ' S DE RL DE CV',
|
|
|
- ' S A P I DE CV',
|
|
|
- ' S P R DE R L',
|
|
|
- ' S A DE C V',
|
|
|
- ' SA DE CV'
|
|
|
-]
|
|
|
-
|
|
|
-nigeria_suffix_list = [
|
|
|
- ' COMPANY LIMITED',
|
|
|
- ' COMPANY LTD',
|
|
|
- ' COMPANY',
|
|
|
- ' LIMITED',
|
|
|
- ' PTE LTD',
|
|
|
- ' CO LTD',
|
|
|
- ' LTD',
|
|
|
- ' LLC'
|
|
|
-]
|
|
|
-
|
|
|
-peru_suffix_list = [
|
|
|
- 'SOCIEDAD ANONIMA CERRADA',
|
|
|
- 'SOCIEDAD ANONIMA CER',
|
|
|
- 'E I R LTDA',
|
|
|
- 'S R LTDA',
|
|
|
- 'E I R L',
|
|
|
- 'S R L',
|
|
|
- 'S A C',
|
|
|
- 'SAC',
|
|
|
- 'S A'
|
|
|
-]
|
|
|
-lesotho_suffix_list = [
|
|
|
- ' LLC (EXTERNAL COMPANY) LTD',
|
|
|
- ' LLC (EXTERNAL COMPANY)',
|
|
|
- ' (PROPRIETARY) LIMITED',
|
|
|
- ' COMPANY (PTY) LTD',
|
|
|
- ' COMPANY LIMITED',
|
|
|
- ' COMPANY LTD',
|
|
|
- ' LIMITED',
|
|
|
- ' PTY LTD',
|
|
|
- ' CO LTD'
|
|
|
-]
|
|
|
-
|
|
|
-germany_suffix_list = [
|
|
|
- 'GMBH AND CO KGAA',
|
|
|
- 'GMBH AND CO OHG',
|
|
|
- 'GMBH AND CO KG',
|
|
|
- 'AG AND CO KGAA',
|
|
|
- 'AG AND CO OHG',
|
|
|
- 'LIMITED ŞTI',
|
|
|
- 'GMBH AND CO',
|
|
|
- 'S A DE C V',
|
|
|
- 'CO LIMITED',
|
|
|
- 'LIMITED',
|
|
|
- 'S R L',
|
|
|
- 'GMBH',
|
|
|
- 'GBR',
|
|
|
- 'SRL',
|
|
|
- 'INC',
|
|
|
- 'LLC',
|
|
|
- 'OHG',
|
|
|
- 'A S',
|
|
|
- 'E K',
|
|
|
- 'AG',
|
|
|
- 'SA',
|
|
|
- 'UG'
|
|
|
-]
|
|
|
-
|
|
|
-
|
|
|
-def kaz_extract_text_from_enclosers(text):
|
|
|
- result = text
|
|
|
- for encloser in kaz_enclosers:
|
|
|
- open_str, close_str = encloser[0], encloser[1]
|
|
|
- open_inx = text.find(open_str)
|
|
|
- close_inx = text.rfind(close_str)
|
|
|
- if close_inx - open_inx > 1:
|
|
|
- return text[open_inx + 1:close_inx]
|
|
|
- return result
|
|
|
-
|
|
|
-
|
|
|
-def remove_prefix(text, prefix):
|
|
|
- if text.startswith(prefix):
|
|
|
- return text[len(prefix):]
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- parts = text.split(suffix, 1)
|
|
|
- return parts[0]
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def pakistan_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- upper_name = company_name.upper()
|
|
|
- cleaned_name = re.sub(r'[^A-Z0-9]', '', upper_name)
|
|
|
- removed_prefix_name = remove_prefix(cleaned_name, 'ms')
|
|
|
- truncated_name = truncate_at_suffix(removed_prefix_name, pakistan_suffix_list).strip()
|
|
|
- if len(truncated_name) > 4:
|
|
|
- return truncated_name
|
|
|
- elif len(removed_prefix_name) > 4:
|
|
|
- return removed_prefix_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def mirror_pakistan_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- upper_name = company_name.upper()
|
|
|
- cleaned_name = re.sub(r'[^A-Z0-9 ]', '', upper_name)
|
|
|
- removed_prefix_name = remove_prefix(cleaned_name, 'ms').strip()
|
|
|
- truncated_name = truncate_at_suffix(removed_prefix_name, pakistan_suffix_list).strip()
|
|
|
- if len(truncated_name) > 4:
|
|
|
- return truncated_name
|
|
|
- elif len(removed_prefix_name) > 4:
|
|
|
- return removed_prefix_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def split_last(text, suffix):
|
|
|
- if text:
|
|
|
- last_occurrence_index = text.rfind(suffix)
|
|
|
- if last_occurrence_index != -1:
|
|
|
- return text[:last_occurrence_index]
|
|
|
- return text
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-# 纳米比亚进口的mc_org处理逻辑
|
|
|
-def split_first_dtp(text):
|
|
|
- if text:
|
|
|
- if " ---DTP" in text:
|
|
|
- return text.split(" ---DTP", 1)[0]
|
|
|
- elif "---DTP" in text:
|
|
|
- return text.split("---DTP", 1)[0]
|
|
|
- elif "--DTP" in text:
|
|
|
- return text.split("--DTP", 1)[0]
|
|
|
- else:
|
|
|
- return text
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def america_truncate_at_suffix_first(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if (suffix != ' FOUNDATION' and suffix != ' UNLIMITED'
|
|
|
- and suffix != ' AMERICA CORP' and suffix != ' USA CORP' and suffix != ' CORP'
|
|
|
- and suffix != ' CORPORATION' and suffix != 'FOUNDATION'
|
|
|
- and suffix != ' PLLC' and suffix != ' LP' and suffix != ' PA' and suffix != ' CO' and suffix != 'ENTERPRISE'
|
|
|
- and suffix != 'COMPANY'
|
|
|
- and suffix != ' LLC' and suffix != ' INC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' FOUNDATION' and text.endswith(' FOUNDATION'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' UNLIMITED' and text.endswith(' UNLIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' AMERICA CORP' and text.endswith(' AMERICA CORP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' USA CORP' and text.endswith(' USA CORP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CORP' and text.endswith(' CORP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CORPORATION' and text.endswith(' CORPORATION'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'FOUNDATION' and text.endswith('FOUNDATION'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' PLLC' and text.endswith(' PLLC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LP' and text.endswith(' LP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' PA' and text.endswith(' PA'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO' and text.endswith(' CO'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'ENTERPRISE' and text.endswith('ENTERPRISE'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'COMPANY' and text.endswith('COMPANY'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LLC' and text.endswith(' LLC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' INC' and text.endswith(' INC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def america_truncate_at_suffix_second(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if (suffix != ' UNLIMITED' and suffix != ' LIMITED'
|
|
|
- and suffix != ' AND COMPANY' and suffix != ' CORPORATION' and suffix != ' CORP'
|
|
|
- and suffix != ' COMPANY' and suffix != ' LLC'
|
|
|
- and suffix != ' CO'
|
|
|
- and suffix != ' MD' and suffix != ' LTD' and suffix != ' INC'
|
|
|
- and suffix != ' PLC' and suffix != ' LLP' and suffix != ' EST'
|
|
|
- ):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' UNLIMITED' and text.endswith(' UNLIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LIMITED' and text.endswith(' LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' AND COMPANY' and text.endswith(' AND COMPANY'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CORPORATION' and text.endswith(' CORPORATION'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CORP' and text.endswith(' CORP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' COMPANY' and text.endswith(' COMPANY'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LLC' and text.endswith(' LLC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO' and text.endswith(' CO'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' MD' and text.endswith(' MD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LTD' and text.endswith(' LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' INC' and text.endswith(' INC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LLP' and text.endswith(' LLP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' PLC' and text.endswith(' PLC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' EST' and text.endswith(' EST'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def america_truncate_at_suffix_third(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if (suffix != ' UNLIMITED' and suffix != ' LIMITED'
|
|
|
- and suffix != ' CORPORATION' and suffix != ' CORP'
|
|
|
- and suffix != ' COMPANY' and suffix != ' LLC'
|
|
|
- and suffix != ' CO'
|
|
|
- and suffix != ' MD' and suffix != ' LTD' and suffix != ' INC'
|
|
|
- and suffix != ' PLC' and suffix != ' LLP' and suffix != ' EST'
|
|
|
- ):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CORPORATION' and text.endswith(' CORPORATION'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' UNLIMITED' and text.endswith(' UNLIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LIMITED' and text.endswith(' LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' COMPANY' and text.endswith(' COMPANY'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CORP' and text.endswith(' CORP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LLC' and text.endswith(' LLC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LTD' and text.endswith(' LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' INC' and text.endswith(' INC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LLP' and text.endswith(' LLP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' PLC' and text.endswith(' PLC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' EST' and text.endswith(' EST'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO' and text.endswith(' CO'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' MD' and text.endswith(' MD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def bangladesh_truncate_at_suffix_first(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if (suffix != 'CHANGED') and suffix != 'CHANGE' and suffix != 'EXCHANGE':
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'CHANGED' and text.endswith('CHANGED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'CHANGE' and text.endswith('CHANGE'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'EXCHANGE' and text.endswith('EXCHANGE'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def bangladesh_truncate_at_suffix_second(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if suffix == 'PVT CO LIMITED' and text.endswith('PVT CO LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'PVT LIMITED' and text.endswith('PVT LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'LIMITED' and text.endswith('LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'PVT LTD' and text.endswith('PVT LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'LTD' and text.endswith('LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'PVT' and text.endswith('PVT'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'LTD' and text.endswith('LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'CO' and text.endswith('CO'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'PLC' and text.endswith('PLC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'PVT':
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def indonesia_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if (suffix != ' CO' and suffix != ' CORP' and suffix != 'CORPORATION' and suffix != ' INC'
|
|
|
- and suffix != ' INDONESIA' and suffix != ' TBK'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO' and text.endswith(' CO'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CORP' and text.endswith(' CORP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'CORPORATION' and text.endswith('CORPORATION'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' INC' and text.endswith(' INC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' INDONESIA' and text.endswith(' INDONESIA'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' TBK' and text.endswith(' TBK'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def rwanda_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if (suffix != 'COMPANY RWANDA LTD' and suffix != 'COMPANY LTD' and suffix != 'CO LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'COMPANY RWANDA LTD' and text.endswith('COMPANY RWANDA LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'COMPANY LTD' and text.endswith('COMPANY LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'CO LTD' and text.endswith('CO LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
-
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def philippines_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if text.endswith(suffix):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def england_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if text.endswith(suffix):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def colombia_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if text.endswith(suffix):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def chile_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if (suffix != ' SPA' and suffix != ' S A' and suffix != ' SA' and suffix != ' LTDA'
|
|
|
- and suffix != ' LIMITADA' and suffix != ' LLC'
|
|
|
- and suffix != ' SOCIEDAD ANONIMA' and suffix != ' CO LTD' and suffix != ' LTD' and suffix != ' LIMI'
|
|
|
- and suffix != ' E I R'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' SPA' and text.endswith(' SPA'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' S A' and text.endswith(' S A'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' SA' and text.endswith(' SA'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LTDA' and text.endswith(' LTDA'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LIMITADA' and text.endswith(' LIMITADA'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LLC' and text.endswith(' LLC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' SOCIEDAD ANONIMA' and text.endswith(' SOCIEDAD ANONIMA'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO LTD' and text.endswith(' CO LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LTD' and text.endswith(' LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LIMI' and text.endswith(' LIMI'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' E I R' and text.endswith(' E I R'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def venezuela_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if (
|
|
|
- suffix != 'S A' and suffix != 'C A' and suffix != 'R L' and suffix != 'R S' and suffix != 'F P' and suffix != 'S R L'
|
|
|
- and suffix != 'INC' and suffix != 'COMPANY C A' and suffix != 'COMPAÑIA ANONIMA' and suffix != 'CORPORATION C A'
|
|
|
- and suffix != 'COOPERATIVA' and suffix != 'INTERNATIONAL' and suffix != 'CORPORACIÓN' and suffix != 'REPRESENTACIONES'
|
|
|
- and suffix != 'ASOCIACION CIVIL' and suffix != 'FUNDACION'
|
|
|
- ):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'S A' and text.endswith('S A'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'C A' and text.endswith('C A'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'R L' and text.endswith('R L'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'R S' and text.endswith('R S'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'F P' and text.endswith('F P'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'S R L' and text.endswith('S R L'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'INC' and text.endswith('INC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'COMPANY C A' and text.endswith('COMPANY C A'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'COMPAÑIA ANONIMA' and text.endswith('COMPAÑIA ANONIMA'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'CORPORATION C A' and text.endswith('CORPORATION C A'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'COOPERATIVA' and text.startswith('COOPERATIVA'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'INTERNATIONAL' and text.startswith('INTERNATIONAL'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'CORPORACIÓN' and text.startswith('CORPORACIÓN'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'REPRESENTACIONES' and text.startswith('REPRESENTACIONES'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'ASOCIACION CIVIL' and text.startswith('ASOCIACION CIVIL'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'FUNDACION' and text.startswith('FUNDACION'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def moldova_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if suffix == 'ASOCIATIA GOSPODARIILOR TARANESTI' and text.startswith('ASOCIATIA GOSPODARIILOR TARANESTI'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'COOPERATIVA DE ÎNTREPRINZATOR' and text.startswith('COOPERATIVA DE ÎNTREPRINZATOR'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'COOPERATIVA DE PRODUCERE' and text.startswith('COOPERATIVA DE PRODUCERE'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'COOPERATIVA DE' and text.startswith('COOPERATIVA DE'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'COOPERATIVA AGRICOLA DE INTREPRINZATOR' and text.startswith(
|
|
|
- 'COOPERATIVA AGRICOLA DE INTREPRINZATOR'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'COOPERATIVA AGRICOLA' and text.startswith('COOPERATIVA AGRICOLA'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'CENTRUL TEHNIC' and text.startswith('CENTRUL TEHNIC'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'COMPANIA' and text.startswith('COMPANIA'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'FIRMA COOPERATISTA TEHNICO-STIINTIFICA DE PRODUCTIE' and text.startswith(
|
|
|
- 'FIRMA COOPERATISTA TEHNICO-STIINTIFICA DE PRODUCTIE'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'FIRMA DE PRODUCTIE' and text.startswith('FIRMA DE PRODUCTIE'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'FIRMA DE PRODUCŢIE ŞI COMERŢ' and text.startswith('FIRMA DE PRODUCŢIE ŞI COMERŢ'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'FIRMA' and text.startswith('FIRMA'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'SOCIETATEA COMERCIALĂ' and text.startswith('SOCIETATEA COMERCIALĂ'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'SOCIETATEA CU RASPUNDERE LIMITATA FIRMA' and text.startswith(
|
|
|
- 'SOCIETATEA CU RASPUNDERE LIMITATA FIRMA'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'SOCIETATEA CU RĂSPUNDERE LIMITATĂ' and text.startswith('SOCIETATEA CU RĂSPUNDERE LIMITATĂ'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'SOCIETATEA CU RASPUNDERE LIMITATA' and text.startswith('SOCIETATEA CU RASPUNDERE LIMITATA'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'SOCIETATEA PE ACTIUNI' and text.startswith('SOCIETATEA PE ACTIUNI'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'SOCIETATEA IN NUME COLECTIV AGENTIA' and text.startswith(
|
|
|
- 'SOCIETATEA IN NUME COLECTIV AGENTIA'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'INTREPRINDEREA INDIVIDUALA' and text.startswith('INTREPRINDEREA INDIVIDUALA'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'ÎNTREPRINZĂTOR INDIVIDUAL' and text.startswith('ÎNTREPRINZĂTOR INDIVIDUAL'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'ÎNTREPRINDEREA INDIVIDUALĂ' and text.startswith('ÎNTREPRINDEREA INDIVIDUALĂ'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'ÎNTREPRINDEREA MUNICIPALĂ' and text.startswith('ÎNTREPRINDEREA MUNICIPALĂ'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'ÎNTREPRINDEREA CU CAPITAL STRĂIN' and text.startswith('ÎNTREPRINDEREA CU CAPITAL STRĂIN'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'INSTITUŢIA MEDICO-SANITARĂ PUBLICĂ' and text.startswith(
|
|
|
- 'INSTITUŢIA MEDICO-SANITARĂ PUBLICĂ'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'REDACTIA GAZETEI' and text.startswith('REDACTIA GAZETEI'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'ORGANIZATIA DE ADMINISTRARE FIDUCIARA A INVESTITILOR' and text.startswith(
|
|
|
- 'ORGANIZATIA DE ADMINISTRARE FIDUCIARA A INVESTITILOR'):
|
|
|
- return text.split(suffix, 1)[1]
|
|
|
- elif suffix == 'S R L' and text.endswith('S R L'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'SOCIETATEA CU RESPONSABILITATE LIMITATA' and text.endswith(
|
|
|
- 'SOCIETATEA CU RESPONSABILITATE LIMITATA'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'SOCIETATE CU RĂSPUNDERE LIMITATĂ' and text.endswith('SOCIETATE CU RĂSPUNDERE LIMITATĂ'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def moldova_truncate_at_suffix_second(text, suffix_list2):
|
|
|
- for suffix in suffix_list2:
|
|
|
- if suffix in text:
|
|
|
- if suffix == 'S R L' and text.endswith('S R L'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'SOCIETATEA CU RESPONSABILITATE LIMITATA' and text.endswith(
|
|
|
- 'SOCIETATEA CU RESPONSABILITATE LIMITATA'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'SOCIETATE CU RĂSPUNDERE LIMITATĂ' and text.endswith('SOCIETATE CU RĂSPUNDERE LIMITATĂ'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def singapore_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if suffix == 'SINGAPORE PTE LTD' and text.endswith('SINGAPORE PTE LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'S PTE LTD' and text.endswith('S PTE LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'PTE LTD' and text.endswith('PTE LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'ENTERPRISES' and text.endswith('ENTERPRISES'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'ENTERPRISE' and text.endswith('ENTERPRISE'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'ENT' and text.endswith('ENT'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'AGENCIES' and text.endswith('AGENCIES'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'AGENCY' and text.endswith('AGENCY'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'PRIVATE LIMITED' and text.endswith('PRIVATE LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'COMPANY' and text.endswith('COMPANY'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'LLP' and text.endswith('LLP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == 'CO' and text.endswith('CO'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def hongkong_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if suffix == ' CO LIMITED' and text.endswith(' CO LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LIMITED' and text.endswith(' LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO LTD' and text.endswith(' CO LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' COMPANY' and text.endswith(' COMPANY'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LTD' and text.endswith(' LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def china_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if suffix == ' GROUP CORPORATION LIMITED' and text.endswith(' GROUP CORPORATION LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CORPORATION LIMITED' and text.endswith(' CORPORATION LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' GROUP CORPORATION' and text.endswith(' GROUP CORPORATION'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' GROUP CO LIMITED' and text.endswith(' GROUP CO LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LIMITED COMPANY' and text.endswith(' LIMITED COMPANY'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' COMPANY LIMITED' and text.endswith(' COMPANY LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' GROUP CO LTD' and text.endswith(' GROUP CO LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CORPORATION' and text.endswith(' CORPORATION'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO LIMITED' and text.endswith(' CO LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' GROUP CORP' and text.endswith(' GROUP CORP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CORP LTD' and text.endswith(' CORP LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LIMITED' and text.endswith(' LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' COMPANY' and text.endswith(' COMPANY'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' FACTORY' and text.endswith(' FACTORY'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO LTD' and text.endswith(' CO LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO INC' and text.endswith(' CO INC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CORP' and text.endswith(' CORP'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' INC' and text.endswith(' INC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO' and text.endswith(' CO'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def vietnam_take_right_half(company_name: str):
|
|
|
- for separator in vietnam_right_separator_list:
|
|
|
- if separator in company_name:
|
|
|
- return company_name.split(separator, 1)[1].strip()
|
|
|
- return company_name.strip()
|
|
|
-
|
|
|
-
|
|
|
-def vietnam_take_left_half(company_name: str):
|
|
|
- for separator in vietnam_left_separator_list:
|
|
|
- if separator in company_name:
|
|
|
- return company_name.rsplit(separator, 1)[0].strip()
|
|
|
- return company_name.strip()
|
|
|
-
|
|
|
-
|
|
|
-def vietnam_truncate_at_suffix(company_name: str):
|
|
|
- for suffix in vietnam_suffix_list:
|
|
|
- if suffix in company_name and company_name.endswith(suffix):
|
|
|
- return company_name.rsplit(suffix, 1)[0].strip()
|
|
|
- return company_name.strip()
|
|
|
-
|
|
|
-
|
|
|
-def india_truncate_at_suffix(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- if (
|
|
|
- suffix != ' CO' and suffix != ' INC' and suffix != ' CO LIMITED' and suffix != ' LTD'
|
|
|
- and suffix != ' LIMITED' and suffix != ' CO I' and suffix != ' I'
|
|
|
- ):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO' and text.endswith(' CO'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' INC' and text.endswith(' INC'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO LIMITED' and ' AND CO LIMITED' not in text:
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LTD' and text.endswith(' LTD'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' LIMITED' and text.endswith(' LIMITED'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' CO I' and text.endswith(' CO I'):
|
|
|
- return split_last(text, suffix)
|
|
|
- elif suffix == ' I' and text.endswith(' I'):
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def mexico_truncate_at_suffix(cleaned_name):
|
|
|
- for suffix in mexico_suffix_list:
|
|
|
- if suffix in cleaned_name and cleaned_name.endswith(suffix):
|
|
|
- return cleaned_name.rsplit(suffix, 1)[0].strip()
|
|
|
- return cleaned_name.strip()
|
|
|
-
|
|
|
-
|
|
|
-def nigeria_truncate_at_suffix(cleaned_name):
|
|
|
- for suffix in nigeria_suffix_list:
|
|
|
- if cleaned_name.endswith(suffix):
|
|
|
- return cleaned_name.rsplit(suffix, 1)[0].strip()
|
|
|
- return cleaned_name.strip()
|
|
|
-
|
|
|
-
|
|
|
-def peru_truncate_at_suffix(cleaned_name, peru_suffix_list):
|
|
|
- for suffix in peru_suffix_list:
|
|
|
- if cleaned_name.endswith(suffix):
|
|
|
- return cleaned_name.rsplit(suffix, 1)[0].strip()
|
|
|
- return cleaned_name.strip()
|
|
|
-
|
|
|
-
|
|
|
-def lesotho_truncate_at_suffix(cleaned_name, lesotho_suffix_list):
|
|
|
- for suffix in lesotho_suffix_list:
|
|
|
- if cleaned_name.endswith(suffix):
|
|
|
- return cleaned_name.rsplit(suffix, 1)[0].strip()
|
|
|
- return cleaned_name.strip()
|
|
|
-
|
|
|
-
|
|
|
-def germany_truncate_at_suffix(cleaned_name, germany_suffix_list):
|
|
|
- for suffix in germany_suffix_list:
|
|
|
- if cleaned_name.endswith(suffix):
|
|
|
- return cleaned_name.rsplit(suffix, 1)[0].strip()
|
|
|
- return cleaned_name.strip()
|
|
|
-
|
|
|
-
|
|
|
-def america_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_first_name = america_truncate_at_suffix_first(cleaned_name, frist_america_suffix_list)
|
|
|
- if len(truncated_first_name.strip()) < 8:
|
|
|
- return cleaned_name
|
|
|
- else:
|
|
|
- return truncated_first_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def america_company_abbr_second(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_first_name = america_truncate_at_suffix_second(cleaned_name, SECOND_AMERICA_SUFFIX_LIST)
|
|
|
- if len(truncated_first_name.strip()) < 5:
|
|
|
- return cleaned_name
|
|
|
- else:
|
|
|
- return truncated_first_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def america_company_abbr_third(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_first_name = america_truncate_at_suffix_third(cleaned_name, third_AMERICA_SUFFIX_LIST)
|
|
|
- if 9 < len(truncated_first_name.strip()) < 12:
|
|
|
- return cleaned_name
|
|
|
- elif len(truncated_first_name.strip()) <= 9:
|
|
|
- return None
|
|
|
- elif len(truncated_first_name.strip()) >= 12:
|
|
|
- return truncated_first_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def bangladesh_company_abbr_first(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_first_name = bangladesh_truncate_at_suffix_first(cleaned_name, first_bangladesh_suffix_list)
|
|
|
- return truncated_first_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def bangladesh_company_abbr_second(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_first_name = bangladesh_truncate_at_suffix_first(cleaned_name, first_bangladesh_suffix_list)
|
|
|
- truncated_second_name = bangladesh_truncate_at_suffix_second(truncated_first_name.strip(),
|
|
|
- second_bangladesh_suffix_list)
|
|
|
- if len(truncated_second_name.strip()) < 6:
|
|
|
- return truncated_first_name.strip()
|
|
|
- else:
|
|
|
- return truncated_second_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def chile_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_first_name = chile_truncate_at_suffix(cleaned_name, first_chile_SUFFIX_LIST)
|
|
|
- if len(truncated_first_name.strip()) < 8:
|
|
|
- return cleaned_name
|
|
|
- else:
|
|
|
- return truncated_first_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def rwanda_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_first_name = rwanda_truncate_at_suffix(cleaned_name, FIRST_Rwanda_suffix_list)
|
|
|
- if len(truncated_first_name.strip()) < 6:
|
|
|
- return cleaned_name
|
|
|
- else:
|
|
|
- return truncated_first_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def philippines_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_first_name = philippines_truncate_at_suffix(cleaned_name, FIRST_philippines_suffix_list)
|
|
|
- if len(truncated_first_name.strip()) < 6:
|
|
|
- return cleaned_name
|
|
|
- else:
|
|
|
- return truncated_first_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def colombia_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_first_name = colombia_truncate_at_suffix(cleaned_name, FIRST_colombia_suffix_list)
|
|
|
- if len(truncated_first_name.strip()) < 6:
|
|
|
- return cleaned_name
|
|
|
- else:
|
|
|
- return truncated_first_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def indonesia_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = indonesia_truncate_at_suffix(cleaned_name, indonesia_suffix_list)
|
|
|
- if len(truncated_name.strip()) >= 8:
|
|
|
- return truncated_name.strip()
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def venezuela_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = venezuela_truncate_at_suffix(cleaned_name, venezuela_suffix_list)
|
|
|
- if len(truncated_name.strip()) >= 6:
|
|
|
- return truncated_name.strip()
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def uzbekistan_company_abbr(company_name):
|
|
|
- if company_name:
|
|
|
- bak_name = company_name.upper()
|
|
|
- company_name = kaz_extract_text_from_enclosers(bak_name)
|
|
|
- company_name = clean_company_name(company_name)
|
|
|
- for head in kaz_heads:
|
|
|
- if company_name.startswith(head):
|
|
|
- company_name = remove_prefix(company_name, head)
|
|
|
- break
|
|
|
- if len(company_name) < 8:
|
|
|
- return clean_company_name(bak_name)
|
|
|
- else:
|
|
|
- return company_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def kazakhstan_company_abbr(company_name):
|
|
|
- if company_name:
|
|
|
- bak_name = company_name.upper()
|
|
|
- company_name = kaz_extract_text_from_enclosers(bak_name)
|
|
|
- company_name = clean_company_name(company_name)
|
|
|
- for head in kaz_heads:
|
|
|
- if company_name.startswith(head):
|
|
|
- company_name = remove_prefix(company_name, head)
|
|
|
- break
|
|
|
- if len(company_name) < 8:
|
|
|
- return clean_company_name(bak_name)
|
|
|
- else:
|
|
|
- return company_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def moldova_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- first_truncated_name = moldova_truncate_at_suffix(cleaned_name, moldova_suffix_list)
|
|
|
- truncated_name = moldova_truncate_at_suffix_second(first_truncated_name, moldova_suffix_list2)
|
|
|
- if len(truncated_name.strip()) >= 6:
|
|
|
- return truncated_name.strip()
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def singapore_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = singapore_truncate_at_suffix(cleaned_name, singapore_suffix_list)
|
|
|
- if len(truncated_name.strip()) >= 8:
|
|
|
- return truncated_name.strip()
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def hongkong_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = hongkong_truncate_at_suffix(cleaned_name, hongkong_suffix_list)
|
|
|
- if len(truncated_name.strip()) >= 6:
|
|
|
- return truncated_name.strip()
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def china_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = china_truncate_at_suffix(cleaned_name, china_suffix_list)
|
|
|
- if len(truncated_name.strip()) >= 6:
|
|
|
- return truncated_name.strip()
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def vietnam_company_abbr(company_name: str) -> str or None:
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- right_half = vietnam_take_right_half(cleaned_name)
|
|
|
- left_half = vietnam_take_left_half(right_half)
|
|
|
- truncated_name = vietnam_truncate_at_suffix(left_half)
|
|
|
- if len(truncated_name) >= 8:
|
|
|
- return truncated_name
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def india_company_abbr(company_name):
|
|
|
- if company_name:
|
|
|
- bak_name = company_name.upper()
|
|
|
- company_name = clean_company_name(bak_name)
|
|
|
- for head in ind_head:
|
|
|
- if company_name.startswith(head):
|
|
|
- company_name = remove_prefix(company_name, head)
|
|
|
- break
|
|
|
- truncated_name = india_truncate_at_suffix(company_name, india_suffix_list)
|
|
|
- if (len(truncated_name.strip()) < 8):
|
|
|
- return clean_company_name(bak_name)
|
|
|
- else:
|
|
|
- return truncated_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def ukraine_truncate_at_suffix_first(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- return split_last(text, suffix)
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def ukraine_truncate_at_suffix_second(text, suffix_list):
|
|
|
- for suffix in suffix_list:
|
|
|
- if suffix in text:
|
|
|
- return split_last(text, suffix) + suffix
|
|
|
- return text
|
|
|
-
|
|
|
-
|
|
|
-def ukraine_company_abbr_first(company_name):
|
|
|
- if company_name:
|
|
|
- bak_name = company_name.upper()
|
|
|
- truncated_name = ukraine_truncate_at_suffix_first(bak_name, ukraine_suffix_first)
|
|
|
- return truncated_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def ukraine_company_abbr_second(company_name):
|
|
|
- if company_name:
|
|
|
- bak_name = company_name.upper()
|
|
|
- truncated_name = ukraine_truncate_at_suffix_second(bak_name, ukraine_suffix_second)
|
|
|
- return truncated_name.strip()
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def mexico_company_abbr(company_name):
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = mexico_truncate_at_suffix(cleaned_name)
|
|
|
- if len(truncated_name) >= 8:
|
|
|
- return truncated_name
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def nigeria_company_abbr(company_name):
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = nigeria_truncate_at_suffix(cleaned_name)
|
|
|
- if len(truncated_name) >= 4:
|
|
|
- return truncated_name
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def philippines_company_abbr_second(company_name):
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = philippines_truncate_at_suffix(cleaned_name, FIRST_philippines_suffix_list)
|
|
|
- if len(truncated_name) >= 6:
|
|
|
- return truncated_name.strip()
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def england_company_abbr(company_name):
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = england_truncate_at_suffix(cleaned_name, FIRST_england_suffix_list)
|
|
|
- if len(truncated_name) >= 8:
|
|
|
- return truncated_name.strip()
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def peru_company_abbr(company_name):
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = peru_truncate_at_suffix(cleaned_name, peru_suffix_list)
|
|
|
- if len(truncated_name) >= 6:
|
|
|
- return truncated_name.strip()
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def lesotho_company_abbr(company_name):
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = lesotho_truncate_at_suffix(cleaned_name, lesotho_suffix_list)
|
|
|
- if len(truncated_name) >= 6:
|
|
|
- return truncated_name.strip()
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def germany_company_abbr(company_name):
|
|
|
- if company_name:
|
|
|
- cleaned_name = clean_company_name(company_name)
|
|
|
- truncated_name = germany_truncate_at_suffix(cleaned_name, germany_suffix_list)
|
|
|
- if len(truncated_name) >= 8:
|
|
|
- return truncated_name.strip()
|
|
|
- else:
|
|
|
- return cleaned_name
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def company_abbr(country_name: str, company_name: str) -> str or None:
|
|
|
- if country_name == 'pakistan':
|
|
|
- return pakistan_company_abbr(company_name)
|
|
|
- if country_name == 'mirror_pakistan':
|
|
|
- return mirror_pakistan_company_abbr(company_name)
|
|
|
- elif country_name == 'america':
|
|
|
- return america_company_abbr(company_name)
|
|
|
- elif country_name == 'indonesia':
|
|
|
- return indonesia_company_abbr(company_name)
|
|
|
- elif country_name == 'venezuela':
|
|
|
- return venezuela_company_abbr(company_name)
|
|
|
- elif country_name == 'america_second':
|
|
|
- return america_company_abbr_second(company_name)
|
|
|
- elif country_name == 'uzbekistan':
|
|
|
- return uzbekistan_company_abbr(company_name)
|
|
|
- elif country_name == 'kazakhstan':
|
|
|
- return kazakhstan_company_abbr(company_name)
|
|
|
- elif country_name == 'chile':
|
|
|
- return chile_company_abbr(company_name)
|
|
|
- elif country_name == 'moldova':
|
|
|
- return moldova_company_abbr(company_name)
|
|
|
- elif country_name == 'bangladesh_fist':
|
|
|
- return bangladesh_company_abbr_first(company_name)
|
|
|
- elif country_name == 'bangladesh_second':
|
|
|
- return bangladesh_company_abbr_second(company_name)
|
|
|
- elif country_name == 'rwanda':
|
|
|
- return rwanda_company_abbr(company_name)
|
|
|
- elif country_name == 'singapore':
|
|
|
- return singapore_company_abbr(company_name)
|
|
|
- elif country_name == 'hongkong':
|
|
|
- return hongkong_company_abbr(company_name)
|
|
|
- elif country_name == 'philippines':
|
|
|
- return philippines_company_abbr(company_name)
|
|
|
- elif country_name == 'china':
|
|
|
- return china_company_abbr(company_name)
|
|
|
- elif country_name == 'vietnam':
|
|
|
- return vietnam_company_abbr(company_name)
|
|
|
- elif country_name == 'india':
|
|
|
- return india_company_abbr(company_name)
|
|
|
- elif country_name == 'ukraine_first':
|
|
|
- return ukraine_company_abbr_first(company_name)
|
|
|
- elif country_name == 'ukraine_second':
|
|
|
- return ukraine_company_abbr_second(company_name)
|
|
|
- elif country_name == 'america_third':
|
|
|
- return america_company_abbr_third(company_name)
|
|
|
- elif country_name == 'mexico':
|
|
|
- return mexico_company_abbr(company_name)
|
|
|
- elif country_name == 'colombia':
|
|
|
- return colombia_company_abbr(company_name)
|
|
|
- elif country_name == 'nigeria':
|
|
|
- return nigeria_company_abbr(company_name)
|
|
|
- elif country_name == 'philippines_second':
|
|
|
- return philippines_company_abbr_second(company_name)
|
|
|
- elif country_name == 'peru':
|
|
|
- return peru_company_abbr(company_name)
|
|
|
- elif country_name == 'lesotho':
|
|
|
- return lesotho_company_abbr(company_name)
|
|
|
- elif country_name == 'germany':
|
|
|
- return germany_company_abbr(company_name)
|
|
|
- elif country_name == 'england':
|
|
|
- return england_company_abbr(company_name)
|
|
|
- else:
|
|
|
- return company_name
|
|
|
-
|
|
|
-
|
|
|
-if __name__ == '__main__':
|
|
|
- test_cases = [
|
|
|
- 'Wilhelm Manz GmbH & Co. KG',
|
|
|
- 'Wilhelm Zuleeg GmbH',
|
|
|
- 'Aba Air Group Llc',
|
|
|
- 'CAMUSAT (MAURICE) LIMITED',
|
|
|
- 'BMTS Technology Austria GmbH & Co',
|
|
|
- 'Arhetipo Grup SRL',
|
|
|
- 'Boegli-Gravures SA',
|
|
|
- 'Kronos International Inc.',
|
|
|
- 'YAHO AUTO EXCHANGE CO. LIMITED',
|
|
|
- 'Radpar Otomotiv Sanayi ve Ticaret Limited Şti.',
|
|
|
- 'SERVICIOS INTERSEC S.A. DE C.V.',
|
|
|
- 'PLASTIC SOLUTIONS DI MARTOCCIA CRISTIANS.A.S.',
|
|
|
- 'C-Solution Elektrotechnik GbR',
|
|
|
- 'Baumer Hhs S.R.L.',
|
|
|
- 'AJH Druck & Technik Helge Klemt e.K.',
|
|
|
- 'ADM Hamburg AG',
|
|
|
- 'Lauer Ventilation UG',
|
|
|
- 'Bankhaus J. Faisst OHG',
|
|
|
- 'Continental Teves AG & Co.OHG',
|
|
|
- 'Dow Produktions und Vertriebs GmbH & Co. OHG',
|
|
|
- 'Springer Nature AG & Co. KGaA',
|
|
|
- 'Paragon GmbH & Co. KGaA'
|
|
|
- ]
|
|
|
- for test_case in test_cases:
|
|
|
- print("{:<50} {:>50}".format(test_case, company_abbr('germany', test_case)))
|
|
|
-
|
|
|
- # test_cases = [
|
|
|
- # 'COMPANY LIMITED NGOC PHAT TM',
|
|
|
- # 'COMPANY LTD PHAM',
|
|
|
- # 'TAIHING MOULDS CO LTD',
|
|
|
- # 'REPRESENTATIVE OFFICE OF HETTICH SINGAPORE SEA PTE LTD IN HO CHI MINH CITY',
|
|
|
- # 'SAI GON WASTE SOLUTION JOINT STOCK COMPANY',
|
|
|
- # 'ENTERTAINMENT FISHING ROD IMPORT EXPORT TRADING COMPANY LIMI',
|
|
|
- # 'TPP PLUS CORP',
|
|
|
- # 'VILOMIX VIETNAM LLC',
|
|
|
- # 'SMILETECH JSC',
|
|
|
- # 'DUC MINH CTI CO JSC',
|
|
|
- # 'SMILETECH JSC',
|
|
|
- # 'HUVICO LTD'
|
|
|
- # ]
|
|
|
- # for test in test_cases:
|
|
|
- # print(vietnam_company_abbr(test))
|