| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556 |
- import sys
- import re
- import os
- abspath = os.path.abspath(__file__)
- root_path = re.sub(r"tendata-warehouse.*", "tendata-warehouse", abspath)
- sys.path.append(root_path)
- from dw_base.spark.udf.customs.common_clean import clean_company_name
- kaz_enclosers = [('""', '""'), ('"', '"'), ('<<', '>>'), ('?', '?')]
- pakistan_suffix_list = [
- 'GROUPCOMPANYLIMITED',
- 'LIMITEDPARTNERSHIP',
- 'CORPORATIONLIMITED',
- 'SMCPRIVATE',
- 'OFCOMPANY',
- 'PRIVATELIMIT',
- 'PRIVATECO',
- 'LIABILITYCOMPANY',
- 'LIMITEDCOMPANY',
- 'COMPANYLIMITED',
- 'INCORPORAT',
- 'CORPORATION',
- 'GROUPCOLTD',
- 'COMPANYLTD',
- 'COLIMITED',
- 'GROUPLTD',
- 'SMCPVT',
- 'PVTLIMIT',
- 'PVTCOLTD',
- 'PVTLTD',
- 'FACTORY',
- 'CORPLTD',
- 'COMPANY',
- 'PTYLTD',
- 'AGENCY',
- 'OFFICE',
- 'CENTER',
- 'COLTD',
- 'COINC',
- 'C0LTD',
- 'LIMIT',
- 'CORP',
- 'LLC',
- 'LTD',
- 'COLT'
- ]
- SECOND_AMERICA_SUFFIX_LIST = [
- ' UNLIMITED',
- ' LIMITED',
- ' CO LTD',
- ' COMPANY LTD',
- ' AND COMPANY',
- ' CORPORATION',
- ' CORP',
- ' COMPANY INC',
- ' COMPANY',
- ' LLC',
- ' CO INC',
- ' CO',
- ' MD',
- ' LTD',
- ' INC'
- ' LLP',
- ' PLC',
- ' EST',
- ]
- third_AMERICA_SUFFIX_LIST = [
- ' CORPORATION',
- ' COMPANY LTD',
- ' COMPANY INC',
- ' UNLIMITED',
- ' LIMITED',
- ' CO LTD',
- ' COMPANY',
- ' CO INC',
- ' CORP',
- ' LLC',
- ' LTD',
- ' INC'
- ' LLP',
- ' PLC',
- ' EST',
- ' CO',
- ' MD',
- ]
- first_chile_SUFFIX_LIST = [
- ' SPA',
- ' S A',
- ' SA',
- ' LTDA',
- ' LIMITADA',
- ' LLC',
- ' SOCIEDAD ANONIMA',
- ' CO LTD',
- ' LTD',
- ' LIMI',
- ' E I R'
- ]
- first_bangladesh_suffix_list = [
- 'CHANGED FROM',
- 'CHANGED',
- 'CHANGE FROM',
- 'CHANGE',
- 'EXCHANGE'
- ]
- ukraine_suffix_first = [
- ' М КИЇВ ВУЛ ',
- ' ВУЛ '
- ]
- ukraine_suffix_second = [
- ' S R O ',
- ' Z O O '
- ]
- second_bangladesh_suffix_list = [
- 'PVT CO LIMITED',
- 'PVT LIMITED',
- 'LIMITED',
- 'PVT LTD',
- 'LTD',
- 'PVT',
- 'CO LTD',
- 'CO',
- 'PLC'
- ]
- FIRST_Rwanda_suffix_list = [
- 'COMPANY RWANDA LTD',
- 'COMPANY LTD',
- ' CO LTD',
- 'LTD',
- 'LIMITED'
- ]
- FIRST_england_suffix_list = [
- ' COMPANY LIMITED',
- ' ENTERPRISES LTD',
- ' LIMITED',
- ' COMPANY',
- ' CO LTD',
- ' LTD',
- ' LLP'
- ]
- FIRST_philippines_suffix_list = [
- ' CO INC',
- ' CO LTD',
- 'INC',
- 'CORPORATION',
- 'CORP',
- 'LLC',
- 'ENTERPRISES',
- 'INCORPORATED',
- ' CO',
- 'PTE LTD',
- 'PTY LTD',
- 'LTD',
- 'GMBH',
- 'S R L',
- 'SRL'
- ]
- FIRST_colombia_suffix_list = [
- "LIMITADA",
- "S A S",
- "LITDA",
- "LTDA",
- "SAS",
- "S A",
- "LLC"
- ]
- frist_america_suffix_list = [
- 'PRODUCT',
- 'UNION OF THE UNITED STATES',
- ' FOUNDATION',
- 'SA DE CV',
- ' UNLIMITED',
- ' LIMITED',
- 'CENTERS OF AMERICA',
- ' AMERICA CORP',
- ' USA CORP',
- ' CORP',
- ' CORPORATION',
- 'FOUNDATION',
- ' PLLC',
- ' LP',
- ' PA',
- ' CO',
- 'ENTERPRISE',
- 'COMPANY',
- ' AMERICA LLC',
- ' AMERICA INC',
- ' USA LLC',
- ' USA INC',
- ' FL LLC',
- ' FL INC',
- ' 2 LLC',
- ' 2 INC',
- ' 3 LLC',
- ' 3 INC',
- ' 2022 LLC',
- ' 2022 INC',
- ' 2021 LLC',
- ' 2021 INC',
- ' 2020 LLC',
- ' 2020 INC',
- ' CO LLC',
- ' CO INC',
- ' LLC',
- ' INC',
- ' CO LTD',
- ' LTD'
- ]
- indonesia_suffix_list = [
- 'AGENC',
- 'COMPANY',
- 'DEVELOPMENT',
- 'ORGANIZATION',
- 'ASSOCIATION',
- 'SERVICE',
- 'GROUP',
- 'PTY LTD',
- 'PTY LIMIT',
- ' CO LTD',
- ' CO LIMIT',
- ' PTE LTD',
- 'INDONESIA CO',
- 'INDONESIA INCORP',
- 'INDONESIA LTD',
- 'PHILS CO',
- 'INDONESIA UNLIMIT',
- ' ASIA CO',
- ' ASIA UNLIMITED',
- 'INCORPORATED',
- 'ENTERPRISE',
- ' INDONESIA INC',
- ' ASIA INC',
- ' INDONESIA CO INC',
- ' CO',
- ' CORP',
- 'CORPORATION',
- ' INC',
- ' INDONESIA',
- ' TBK'
- ]
- venezuela_suffix_list = [
- 'S A',
- 'C A',
- 'R L',
- 'R S',
- 'F P',
- 'S R L',
- 'LTD',
- 'INC',
- 'COMPANY C A',
- 'COMPAÑIA ANONIMA',
- 'CORPORATION C A',
- 'COOPERATIVA',
- 'INTERNATIONAL',
- 'CORPORACIÓN',
- 'REPRESENTACIONES',
- 'ASOCIACION CIVIL',
- 'FUNDACION'
- ]
- kaz_heads = ["TOO",
- "ООО",
- "АО",
- "ФХ",
- "ИП OOO",
- "НПЦ ООО",
- "СП OOO",
- "ЧП"]
- moldova_suffix_list = [
- 'ASOCIATIA GOSPODARIILOR TARANESTI',
- 'COOPERATIVA DE ÎNTREPRINZATOR',
- 'COOPERATIVA DE PRODUCERE',
- 'COOPERATIVA DE',
- 'COOPERATIVA AGRICOLA DE INTREPRINZATOR',
- 'COOPERATIVA AGRICOLA',
- 'CENTRUL TEHNIC',
- 'COMPANIA',
- 'FIRMA COOPERATISTA TEHNICO-STIINTIFICA DE PRODUCTIE',
- 'FIRMA DE PRODUCTIE',
- 'FIRMA DE PRODUCŢIE ŞI COMERŢ',
- 'FIRMA',
- 'SOCIETATEA COMERCIALĂ',
- 'SOCIETATEA CU RASPUNDERE LIMITATA FIRMA',
- 'SOCIETATEA CU RĂSPUNDERE LIMITATĂ',
- 'SOCIETATEA CU RASPUNDERE LIMITATA',
- 'SOCIETATEA PE ACTIUNI',
- 'SOCIETATEA IN NUME COLECTIV AGENTIA',
- 'INTREPRINDEREA INDIVIDUALA',
- 'ÎNTREPRINZĂTOR INDIVIDUAL',
- 'ÎNTREPRINDEREA INDIVIDUALĂ',
- 'ÎNTREPRINDEREA MUNICIPALĂ',
- 'ÎNTREPRINDEREA CU CAPITAL STRĂIN',
- 'INSTITUŢIA MEDICO-SANITARĂ PUBLICĂ',
- 'REDACTIA GAZETEI',
- 'ORGANIZATIA DE ADMINISTRARE FIDUCIARA A INVESTITILOR',
- 'S R L',
- 'SOCIETATEA CU RESPONSABILITATE LIMITATA',
- 'SOCIETATE CU RĂSPUNDERE LIMITATĂ'
- ]
- moldova_suffix_list2 = [
- 'S R L',
- 'SOCIETATEA CU RESPONSABILITATE LIMITATA',
- 'SOCIETATE CU RĂSPUNDERE LIMITATĂ'
- ]
- singapore_suffix_list = [
- 'SINGAPORE PTE LTD',
- 'S PTE LTD',
- 'PTE LTD',
- 'ENTERPRISES',
- 'ENTERPRISE',
- 'ENT',
- 'AGENCIES',
- 'AGENCY',
- 'PRIVATE LIMITED',
- 'COMPANY',
- 'LLP',
- 'CO'
- ]
- hongkong_suffix_list = [
- ' CO LIMITED',
- ' LIMITED',
- ' CO LTD',
- ' COMPANY',
- ' LTD'
- ]
- china_suffix_list = [
- ' GROUP CORPORATION LIMITED',
- ' CORPORATION LIMITED',
- ' GROUP CORPORATION',
- ' GROUP CO LIMITED',
- ' LIMITED COMPANY',
- ' COMPANY LIMITED',
- ' GROUP CO LTD',
- ' CORPORATION',
- ' CO LIMITED',
- ' GROUP CORP',
- ' CORP LTD',
- ' LIMITED',
- ' COMPANY',
- ' FACTORY',
- ' CO LTD',
- ' CO INC',
- ' CORP',
- ' INC',
- ' CO'
- ]
- vietnam_right_separator_list = [
- 'COMPANY LIMITED ',
- 'COMPANY LTD '
- ]
- vietnam_left_separator_list = [
- ' CO LTD',
- ' PTE LTD',
- ' JOINT STOCK COMPANY',
- ' COMPANY'
- ]
- vietnam_suffix_list = [
- ' CORP',
- ' LLC',
- ' CO JSC',
- ' JSC',
- ' LTD'
- ]
- ind_head = [
- 'M S',
- 'MS'
- ]
- india_suffix_list = [
- ' CO I PVT L',
- ' CO PVT L',
- ' CO PRIVATE L',
- ' CO I LTD',
- ' I LTD',
- ' I LIMITED',
- ' I PVT L',
- ' I PRIVATE L',
- ' COMPANY PRIVATE L',
- ' COMPANY PVT L',
- ' P LTD',
- ' PRIVATE L',
- ' PVT L',
- ' CO',
- ' INC',
- ' CO LIMITED',
- ' LTD',
- ' LIMITED',
- ' CO I',
- ' I'
- ]
- mexico_suffix_list = [
- ' S P R DE R L DE C V',
- ' S DE R L DE C V',
- ' S DE RL DE CV',
- ' S A P I DE CV',
- ' S P R DE R L',
- ' S A DE C V',
- ' SA DE CV'
- ]
- nigeria_suffix_list = [
- ' COMPANY LIMITED',
- ' COMPANY LTD',
- ' COMPANY',
- ' LIMITED',
- ' PTE LTD',
- ' CO LTD',
- ' LTD',
- ' LLC'
- ]
- peru_suffix_list = [
- 'SOCIEDAD ANONIMA CERRADA',
- 'SOCIEDAD ANONIMA CER',
- 'E I R LTDA',
- 'S R LTDA',
- 'E I R L',
- 'S R L',
- 'S A C',
- 'SAC',
- 'S A'
- ]
- lesotho_suffix_list = [
- ' LLC (EXTERNAL COMPANY) LTD',
- ' LLC (EXTERNAL COMPANY)',
- ' (PROPRIETARY) LIMITED',
- ' COMPANY (PTY) LTD',
- ' COMPANY LIMITED',
- ' COMPANY LTD',
- ' LIMITED',
- ' PTY LTD',
- ' CO LTD'
- ]
- germany_suffix_list = [
- 'GMBH AND CO KGAA',
- 'GMBH AND CO OHG',
- 'GMBH AND CO KG',
- 'AG AND CO KGAA',
- 'AG AND CO OHG',
- 'LIMITED ŞTI',
- 'GMBH AND CO',
- 'S A DE C V',
- 'CO LIMITED',
- 'LIMITED',
- 'S R L',
- 'GMBH',
- 'GBR',
- 'SRL',
- 'INC',
- 'LLC',
- 'OHG',
- 'A S',
- 'E K',
- 'AG',
- 'SA',
- 'UG'
- ]
- def kaz_extract_text_from_enclosers(text):
- result = text
- for encloser in kaz_enclosers:
- open_str, close_str = encloser[0], encloser[1]
- open_inx = text.find(open_str)
- close_inx = text.rfind(close_str)
- if close_inx - open_inx > 1:
- return text[open_inx + 1:close_inx]
- return result
- def remove_prefix(text, prefix):
- if text.startswith(prefix):
- return text[len(prefix):]
- return text
- def truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- parts = text.split(suffix, 1)
- return parts[0]
- return text
- def pakistan_company_abbr(company_name: str) -> str or None:
- if company_name:
- upper_name = company_name.upper()
- cleaned_name = re.sub(r'[^A-Z0-9]', '', upper_name)
- removed_prefix_name = remove_prefix(cleaned_name, 'ms')
- truncated_name = truncate_at_suffix(removed_prefix_name, pakistan_suffix_list).strip()
- if len(truncated_name) > 4:
- return truncated_name
- elif len(removed_prefix_name) > 4:
- return removed_prefix_name
- return None
- def mirror_pakistan_company_abbr(company_name: str) -> str or None:
- if company_name:
- upper_name = company_name.upper()
- cleaned_name = re.sub(r'[^A-Z0-9 ]', '', upper_name)
- removed_prefix_name = remove_prefix(cleaned_name, 'ms').strip()
- truncated_name = truncate_at_suffix(removed_prefix_name, pakistan_suffix_list).strip()
- if len(truncated_name) > 4:
- return truncated_name
- elif len(removed_prefix_name) > 4:
- return removed_prefix_name
- return None
- def split_last(text, suffix):
- if text:
- last_occurrence_index = text.rfind(suffix)
- if last_occurrence_index != -1:
- return text[:last_occurrence_index]
- return text
- return None
- # 纳米比亚进口的mc_org处理逻辑
- def split_first_dtp(text):
- if text:
- if " ---DTP" in text:
- return text.split(" ---DTP", 1)[0]
- elif "---DTP" in text:
- return text.split("---DTP", 1)[0]
- elif "--DTP" in text:
- return text.split("--DTP", 1)[0]
- else:
- return text
- return None
- def america_truncate_at_suffix_first(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if (suffix != ' FOUNDATION' and suffix != ' UNLIMITED'
- and suffix != ' AMERICA CORP' and suffix != ' USA CORP' and suffix != ' CORP'
- and suffix != ' CORPORATION' and suffix != 'FOUNDATION'
- and suffix != ' PLLC' and suffix != ' LP' and suffix != ' PA' and suffix != ' CO' and suffix != 'ENTERPRISE'
- and suffix != 'COMPANY'
- and suffix != ' LLC' and suffix != ' INC'):
- return split_last(text, suffix)
- elif suffix == ' FOUNDATION' and text.endswith(' FOUNDATION'):
- return split_last(text, suffix)
- elif suffix == ' UNLIMITED' and text.endswith(' UNLIMITED'):
- return split_last(text, suffix)
- elif suffix == ' AMERICA CORP' and text.endswith(' AMERICA CORP'):
- return split_last(text, suffix)
- elif suffix == ' USA CORP' and text.endswith(' USA CORP'):
- return split_last(text, suffix)
- elif suffix == ' CORP' and text.endswith(' CORP'):
- return split_last(text, suffix)
- elif suffix == ' CORPORATION' and text.endswith(' CORPORATION'):
- return split_last(text, suffix)
- elif suffix == 'FOUNDATION' and text.endswith('FOUNDATION'):
- return split_last(text, suffix)
- elif suffix == ' PLLC' and text.endswith(' PLLC'):
- return split_last(text, suffix)
- elif suffix == ' LP' and text.endswith(' LP'):
- return split_last(text, suffix)
- elif suffix == ' PA' and text.endswith(' PA'):
- return split_last(text, suffix)
- elif suffix == ' CO' and text.endswith(' CO'):
- return split_last(text, suffix)
- elif suffix == 'ENTERPRISE' and text.endswith('ENTERPRISE'):
- return split_last(text, suffix)
- elif suffix == 'COMPANY' and text.endswith('COMPANY'):
- return split_last(text, suffix)
- elif suffix == ' LLC' and text.endswith(' LLC'):
- return split_last(text, suffix)
- elif suffix == ' INC' and text.endswith(' INC'):
- return split_last(text, suffix)
- return text
- def america_truncate_at_suffix_second(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if (suffix != ' UNLIMITED' and suffix != ' LIMITED'
- and suffix != ' AND COMPANY' and suffix != ' CORPORATION' and suffix != ' CORP'
- and suffix != ' COMPANY' and suffix != ' LLC'
- and suffix != ' CO'
- and suffix != ' MD' and suffix != ' LTD' and suffix != ' INC'
- and suffix != ' PLC' and suffix != ' LLP' and suffix != ' EST'
- ):
- return split_last(text, suffix)
- elif suffix == ' UNLIMITED' and text.endswith(' UNLIMITED'):
- return split_last(text, suffix)
- elif suffix == ' LIMITED' and text.endswith(' LIMITED'):
- return split_last(text, suffix)
- elif suffix == ' AND COMPANY' and text.endswith(' AND COMPANY'):
- return split_last(text, suffix)
- elif suffix == ' CORPORATION' and text.endswith(' CORPORATION'):
- return split_last(text, suffix)
- elif suffix == ' CORP' and text.endswith(' CORP'):
- return split_last(text, suffix)
- elif suffix == ' COMPANY' and text.endswith(' COMPANY'):
- return split_last(text, suffix)
- elif suffix == ' LLC' and text.endswith(' LLC'):
- return split_last(text, suffix)
- elif suffix == ' CO' and text.endswith(' CO'):
- return split_last(text, suffix)
- elif suffix == ' MD' and text.endswith(' MD'):
- return split_last(text, suffix)
- elif suffix == ' LTD' and text.endswith(' LTD'):
- return split_last(text, suffix)
- elif suffix == ' INC' and text.endswith(' INC'):
- return split_last(text, suffix)
- elif suffix == ' LLP' and text.endswith(' LLP'):
- return split_last(text, suffix)
- elif suffix == ' PLC' and text.endswith(' PLC'):
- return split_last(text, suffix)
- elif suffix == ' EST' and text.endswith(' EST'):
- return split_last(text, suffix)
- return text
- def america_truncate_at_suffix_third(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if (suffix != ' UNLIMITED' and suffix != ' LIMITED'
- and suffix != ' CORPORATION' and suffix != ' CORP'
- and suffix != ' COMPANY' and suffix != ' LLC'
- and suffix != ' CO'
- and suffix != ' MD' and suffix != ' LTD' and suffix != ' INC'
- and suffix != ' PLC' and suffix != ' LLP' and suffix != ' EST'
- ):
- return split_last(text, suffix)
- elif suffix == ' CORPORATION' and text.endswith(' CORPORATION'):
- return split_last(text, suffix)
- elif suffix == ' UNLIMITED' and text.endswith(' UNLIMITED'):
- return split_last(text, suffix)
- elif suffix == ' LIMITED' and text.endswith(' LIMITED'):
- return split_last(text, suffix)
- elif suffix == ' COMPANY' and text.endswith(' COMPANY'):
- return split_last(text, suffix)
- elif suffix == ' CORP' and text.endswith(' CORP'):
- return split_last(text, suffix)
- elif suffix == ' LLC' and text.endswith(' LLC'):
- return split_last(text, suffix)
- elif suffix == ' LTD' and text.endswith(' LTD'):
- return split_last(text, suffix)
- elif suffix == ' INC' and text.endswith(' INC'):
- return split_last(text, suffix)
- elif suffix == ' LLP' and text.endswith(' LLP'):
- return split_last(text, suffix)
- elif suffix == ' PLC' and text.endswith(' PLC'):
- return split_last(text, suffix)
- elif suffix == ' EST' and text.endswith(' EST'):
- return split_last(text, suffix)
- elif suffix == ' CO' and text.endswith(' CO'):
- return split_last(text, suffix)
- elif suffix == ' MD' and text.endswith(' MD'):
- return split_last(text, suffix)
- return text
- def bangladesh_truncate_at_suffix_first(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if (suffix != 'CHANGED') and suffix != 'CHANGE' and suffix != 'EXCHANGE':
- return split_last(text, suffix)
- elif suffix == 'CHANGED' and text.endswith('CHANGED'):
- return split_last(text, suffix)
- elif suffix == 'CHANGE' and text.endswith('CHANGE'):
- return split_last(text, suffix)
- elif suffix == 'EXCHANGE' and text.endswith('EXCHANGE'):
- return split_last(text, suffix)
- return text
- def bangladesh_truncate_at_suffix_second(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if suffix == 'PVT CO LIMITED' and text.endswith('PVT CO LIMITED'):
- return split_last(text, suffix)
- elif suffix == 'PVT LIMITED' and text.endswith('PVT LIMITED'):
- return split_last(text, suffix)
- elif suffix == 'LIMITED' and text.endswith('LIMITED'):
- return split_last(text, suffix)
- elif suffix == 'PVT LTD' and text.endswith('PVT LTD'):
- return split_last(text, suffix)
- elif suffix == 'LTD' and text.endswith('LTD'):
- return split_last(text, suffix)
- elif suffix == 'PVT' and text.endswith('PVT'):
- return split_last(text, suffix)
- elif suffix == 'LTD' and text.endswith('LTD'):
- return split_last(text, suffix)
- elif suffix == 'CO' and text.endswith('CO'):
- return split_last(text, suffix)
- elif suffix == 'PLC' and text.endswith('PLC'):
- return split_last(text, suffix)
- elif suffix == 'PVT':
- return split_last(text, suffix)
- return text
- def indonesia_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if (suffix != ' CO' and suffix != ' CORP' and suffix != 'CORPORATION' and suffix != ' INC'
- and suffix != ' INDONESIA' and suffix != ' TBK'):
- return split_last(text, suffix)
- elif suffix == ' CO' and text.endswith(' CO'):
- return split_last(text, suffix)
- elif suffix == ' CORP' and text.endswith(' CORP'):
- return split_last(text, suffix)
- elif suffix == 'CORPORATION' and text.endswith('CORPORATION'):
- return split_last(text, suffix)
- elif suffix == ' INC' and text.endswith(' INC'):
- return split_last(text, suffix)
- elif suffix == ' INDONESIA' and text.endswith(' INDONESIA'):
- return split_last(text, suffix)
- elif suffix == ' TBK' and text.endswith(' TBK'):
- return split_last(text, suffix)
- return text
- def rwanda_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if (suffix != 'COMPANY RWANDA LTD' and suffix != 'COMPANY LTD' and suffix != 'CO LTD'):
- return split_last(text, suffix)
- elif suffix == 'COMPANY RWANDA LTD' and text.endswith('COMPANY RWANDA LTD'):
- return split_last(text, suffix)
- elif suffix == 'COMPANY LTD' and text.endswith('COMPANY LTD'):
- return split_last(text, suffix)
- elif suffix == 'CO LTD' and text.endswith('CO LTD'):
- return split_last(text, suffix)
- return text
- def philippines_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if text.endswith(suffix):
- return split_last(text, suffix)
- return text
- def england_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if text.endswith(suffix):
- return split_last(text, suffix)
- return text
- def colombia_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if text.endswith(suffix):
- return split_last(text, suffix)
- return text
- def chile_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if (suffix != ' SPA' and suffix != ' S A' and suffix != ' SA' and suffix != ' LTDA'
- and suffix != ' LIMITADA' and suffix != ' LLC'
- and suffix != ' SOCIEDAD ANONIMA' and suffix != ' CO LTD' and suffix != ' LTD' and suffix != ' LIMI'
- and suffix != ' E I R'):
- return split_last(text, suffix)
- elif suffix == ' SPA' and text.endswith(' SPA'):
- return split_last(text, suffix)
- elif suffix == ' S A' and text.endswith(' S A'):
- return split_last(text, suffix)
- elif suffix == ' SA' and text.endswith(' SA'):
- return split_last(text, suffix)
- elif suffix == ' LTDA' and text.endswith(' LTDA'):
- return split_last(text, suffix)
- elif suffix == ' LIMITADA' and text.endswith(' LIMITADA'):
- return split_last(text, suffix)
- elif suffix == ' LLC' and text.endswith(' LLC'):
- return split_last(text, suffix)
- elif suffix == ' SOCIEDAD ANONIMA' and text.endswith(' SOCIEDAD ANONIMA'):
- return split_last(text, suffix)
- elif suffix == ' CO LTD' and text.endswith(' CO LTD'):
- return split_last(text, suffix)
- elif suffix == ' LTD' and text.endswith(' LTD'):
- return split_last(text, suffix)
- elif suffix == ' LIMI' and text.endswith(' LIMI'):
- return split_last(text, suffix)
- elif suffix == ' E I R' and text.endswith(' E I R'):
- return split_last(text, suffix)
- return text
- def venezuela_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if (
- suffix != 'S A' and suffix != 'C A' and suffix != 'R L' and suffix != 'R S' and suffix != 'F P' and suffix != 'S R L'
- and suffix != 'INC' and suffix != 'COMPANY C A' and suffix != 'COMPAÑIA ANONIMA' and suffix != 'CORPORATION C A'
- and suffix != 'COOPERATIVA' and suffix != 'INTERNATIONAL' and suffix != 'CORPORACIÓN' and suffix != 'REPRESENTACIONES'
- and suffix != 'ASOCIACION CIVIL' and suffix != 'FUNDACION'
- ):
- return split_last(text, suffix)
- elif suffix == 'S A' and text.endswith('S A'):
- return split_last(text, suffix)
- elif suffix == 'C A' and text.endswith('C A'):
- return split_last(text, suffix)
- elif suffix == 'R L' and text.endswith('R L'):
- return split_last(text, suffix)
- elif suffix == 'R S' and text.endswith('R S'):
- return split_last(text, suffix)
- elif suffix == 'F P' and text.endswith('F P'):
- return split_last(text, suffix)
- elif suffix == 'S R L' and text.endswith('S R L'):
- return split_last(text, suffix)
- elif suffix == 'INC' and text.endswith('INC'):
- return split_last(text, suffix)
- elif suffix == 'COMPANY C A' and text.endswith('COMPANY C A'):
- return split_last(text, suffix)
- elif suffix == 'COMPAÑIA ANONIMA' and text.endswith('COMPAÑIA ANONIMA'):
- return split_last(text, suffix)
- elif suffix == 'CORPORATION C A' and text.endswith('CORPORATION C A'):
- return split_last(text, suffix)
- elif suffix == 'COOPERATIVA' and text.startswith('COOPERATIVA'):
- return text.split(suffix, 1)[1]
- elif suffix == 'INTERNATIONAL' and text.startswith('INTERNATIONAL'):
- return text.split(suffix, 1)[1]
- elif suffix == 'CORPORACIÓN' and text.startswith('CORPORACIÓN'):
- return text.split(suffix, 1)[1]
- elif suffix == 'REPRESENTACIONES' and text.startswith('REPRESENTACIONES'):
- return text.split(suffix, 1)[1]
- elif suffix == 'ASOCIACION CIVIL' and text.startswith('ASOCIACION CIVIL'):
- return text.split(suffix, 1)[1]
- elif suffix == 'FUNDACION' and text.startswith('FUNDACION'):
- return text.split(suffix, 1)[1]
- return text
- def moldova_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if suffix == 'ASOCIATIA GOSPODARIILOR TARANESTI' and text.startswith('ASOCIATIA GOSPODARIILOR TARANESTI'):
- return text.split(suffix, 1)[1]
- elif suffix == 'COOPERATIVA DE ÎNTREPRINZATOR' and text.startswith('COOPERATIVA DE ÎNTREPRINZATOR'):
- return text.split(suffix, 1)[1]
- elif suffix == 'COOPERATIVA DE PRODUCERE' and text.startswith('COOPERATIVA DE PRODUCERE'):
- return text.split(suffix, 1)[1]
- elif suffix == 'COOPERATIVA DE' and text.startswith('COOPERATIVA DE'):
- return text.split(suffix, 1)[1]
- elif suffix == 'COOPERATIVA AGRICOLA DE INTREPRINZATOR' and text.startswith(
- 'COOPERATIVA AGRICOLA DE INTREPRINZATOR'):
- return text.split(suffix, 1)[1]
- elif suffix == 'COOPERATIVA AGRICOLA' and text.startswith('COOPERATIVA AGRICOLA'):
- return text.split(suffix, 1)[1]
- elif suffix == 'CENTRUL TEHNIC' and text.startswith('CENTRUL TEHNIC'):
- return text.split(suffix, 1)[1]
- elif suffix == 'COMPANIA' and text.startswith('COMPANIA'):
- return text.split(suffix, 1)[1]
- elif suffix == 'FIRMA COOPERATISTA TEHNICO-STIINTIFICA DE PRODUCTIE' and text.startswith(
- 'FIRMA COOPERATISTA TEHNICO-STIINTIFICA DE PRODUCTIE'):
- return text.split(suffix, 1)[1]
- elif suffix == 'FIRMA DE PRODUCTIE' and text.startswith('FIRMA DE PRODUCTIE'):
- return text.split(suffix, 1)[1]
- elif suffix == 'FIRMA DE PRODUCŢIE ŞI COMERŢ' and text.startswith('FIRMA DE PRODUCŢIE ŞI COMERŢ'):
- return text.split(suffix, 1)[1]
- elif suffix == 'FIRMA' and text.startswith('FIRMA'):
- return text.split(suffix, 1)[1]
- elif suffix == 'SOCIETATEA COMERCIALĂ' and text.startswith('SOCIETATEA COMERCIALĂ'):
- return text.split(suffix, 1)[1]
- elif suffix == 'SOCIETATEA CU RASPUNDERE LIMITATA FIRMA' and text.startswith(
- 'SOCIETATEA CU RASPUNDERE LIMITATA FIRMA'):
- return text.split(suffix, 1)[1]
- elif suffix == 'SOCIETATEA CU RĂSPUNDERE LIMITATĂ' and text.startswith('SOCIETATEA CU RĂSPUNDERE LIMITATĂ'):
- return text.split(suffix, 1)[1]
- elif suffix == 'SOCIETATEA CU RASPUNDERE LIMITATA' and text.startswith('SOCIETATEA CU RASPUNDERE LIMITATA'):
- return text.split(suffix, 1)[1]
- elif suffix == 'SOCIETATEA PE ACTIUNI' and text.startswith('SOCIETATEA PE ACTIUNI'):
- return text.split(suffix, 1)[1]
- elif suffix == 'SOCIETATEA IN NUME COLECTIV AGENTIA' and text.startswith(
- 'SOCIETATEA IN NUME COLECTIV AGENTIA'):
- return text.split(suffix, 1)[1]
- elif suffix == 'INTREPRINDEREA INDIVIDUALA' and text.startswith('INTREPRINDEREA INDIVIDUALA'):
- return text.split(suffix, 1)[1]
- elif suffix == 'ÎNTREPRINZĂTOR INDIVIDUAL' and text.startswith('ÎNTREPRINZĂTOR INDIVIDUAL'):
- return text.split(suffix, 1)[1]
- elif suffix == 'ÎNTREPRINDEREA INDIVIDUALĂ' and text.startswith('ÎNTREPRINDEREA INDIVIDUALĂ'):
- return text.split(suffix, 1)[1]
- elif suffix == 'ÎNTREPRINDEREA MUNICIPALĂ' and text.startswith('ÎNTREPRINDEREA MUNICIPALĂ'):
- return text.split(suffix, 1)[1]
- elif suffix == 'ÎNTREPRINDEREA CU CAPITAL STRĂIN' and text.startswith('ÎNTREPRINDEREA CU CAPITAL STRĂIN'):
- return text.split(suffix, 1)[1]
- elif suffix == 'INSTITUŢIA MEDICO-SANITARĂ PUBLICĂ' and text.startswith(
- 'INSTITUŢIA MEDICO-SANITARĂ PUBLICĂ'):
- return text.split(suffix, 1)[1]
- elif suffix == 'REDACTIA GAZETEI' and text.startswith('REDACTIA GAZETEI'):
- return text.split(suffix, 1)[1]
- elif suffix == 'ORGANIZATIA DE ADMINISTRARE FIDUCIARA A INVESTITILOR' and text.startswith(
- 'ORGANIZATIA DE ADMINISTRARE FIDUCIARA A INVESTITILOR'):
- return text.split(suffix, 1)[1]
- elif suffix == 'S R L' and text.endswith('S R L'):
- return split_last(text, suffix)
- elif suffix == 'SOCIETATEA CU RESPONSABILITATE LIMITATA' and text.endswith(
- 'SOCIETATEA CU RESPONSABILITATE LIMITATA'):
- return split_last(text, suffix)
- elif suffix == 'SOCIETATE CU RĂSPUNDERE LIMITATĂ' and text.endswith('SOCIETATE CU RĂSPUNDERE LIMITATĂ'):
- return split_last(text, suffix)
- return text
- def moldova_truncate_at_suffix_second(text, suffix_list2):
- for suffix in suffix_list2:
- if suffix in text:
- if suffix == 'S R L' and text.endswith('S R L'):
- return split_last(text, suffix)
- elif suffix == 'SOCIETATEA CU RESPONSABILITATE LIMITATA' and text.endswith(
- 'SOCIETATEA CU RESPONSABILITATE LIMITATA'):
- return split_last(text, suffix)
- elif suffix == 'SOCIETATE CU RĂSPUNDERE LIMITATĂ' and text.endswith('SOCIETATE CU RĂSPUNDERE LIMITATĂ'):
- return split_last(text, suffix)
- return text
- def singapore_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if suffix == 'SINGAPORE PTE LTD' and text.endswith('SINGAPORE PTE LTD'):
- return split_last(text, suffix)
- elif suffix == 'S PTE LTD' and text.endswith('S PTE LTD'):
- return split_last(text, suffix)
- elif suffix == 'PTE LTD' and text.endswith('PTE LTD'):
- return split_last(text, suffix)
- elif suffix == 'ENTERPRISES' and text.endswith('ENTERPRISES'):
- return split_last(text, suffix)
- elif suffix == 'ENTERPRISE' and text.endswith('ENTERPRISE'):
- return split_last(text, suffix)
- elif suffix == 'ENT' and text.endswith('ENT'):
- return split_last(text, suffix)
- elif suffix == 'AGENCIES' and text.endswith('AGENCIES'):
- return split_last(text, suffix)
- elif suffix == 'AGENCY' and text.endswith('AGENCY'):
- return split_last(text, suffix)
- elif suffix == 'PRIVATE LIMITED' and text.endswith('PRIVATE LIMITED'):
- return split_last(text, suffix)
- elif suffix == 'COMPANY' and text.endswith('COMPANY'):
- return split_last(text, suffix)
- elif suffix == 'LLP' and text.endswith('LLP'):
- return split_last(text, suffix)
- elif suffix == 'CO' and text.endswith('CO'):
- return split_last(text, suffix)
- return text
- def hongkong_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if suffix == ' CO LIMITED' and text.endswith(' CO LIMITED'):
- return split_last(text, suffix)
- elif suffix == ' LIMITED' and text.endswith(' LIMITED'):
- return split_last(text, suffix)
- elif suffix == ' CO LTD' and text.endswith(' CO LTD'):
- return split_last(text, suffix)
- elif suffix == ' COMPANY' and text.endswith(' COMPANY'):
- return split_last(text, suffix)
- elif suffix == ' LTD' and text.endswith(' LTD'):
- return split_last(text, suffix)
- return text
- def china_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if suffix == ' GROUP CORPORATION LIMITED' and text.endswith(' GROUP CORPORATION LIMITED'):
- return split_last(text, suffix)
- elif suffix == ' CORPORATION LIMITED' and text.endswith(' CORPORATION LIMITED'):
- return split_last(text, suffix)
- elif suffix == ' GROUP CORPORATION' and text.endswith(' GROUP CORPORATION'):
- return split_last(text, suffix)
- elif suffix == ' GROUP CO LIMITED' and text.endswith(' GROUP CO LIMITED'):
- return split_last(text, suffix)
- elif suffix == ' LIMITED COMPANY' and text.endswith(' LIMITED COMPANY'):
- return split_last(text, suffix)
- elif suffix == ' COMPANY LIMITED' and text.endswith(' COMPANY LIMITED'):
- return split_last(text, suffix)
- elif suffix == ' GROUP CO LTD' and text.endswith(' GROUP CO LTD'):
- return split_last(text, suffix)
- elif suffix == ' CORPORATION' and text.endswith(' CORPORATION'):
- return split_last(text, suffix)
- elif suffix == ' CO LIMITED' and text.endswith(' CO LIMITED'):
- return split_last(text, suffix)
- elif suffix == ' GROUP CORP' and text.endswith(' GROUP CORP'):
- return split_last(text, suffix)
- elif suffix == ' CORP LTD' and text.endswith(' CORP LTD'):
- return split_last(text, suffix)
- elif suffix == ' LIMITED' and text.endswith(' LIMITED'):
- return split_last(text, suffix)
- elif suffix == ' COMPANY' and text.endswith(' COMPANY'):
- return split_last(text, suffix)
- elif suffix == ' FACTORY' and text.endswith(' FACTORY'):
- return split_last(text, suffix)
- elif suffix == ' CO LTD' and text.endswith(' CO LTD'):
- return split_last(text, suffix)
- elif suffix == ' CO INC' and text.endswith(' CO INC'):
- return split_last(text, suffix)
- elif suffix == ' CORP' and text.endswith(' CORP'):
- return split_last(text, suffix)
- elif suffix == ' INC' and text.endswith(' INC'):
- return split_last(text, suffix)
- elif suffix == ' CO' and text.endswith(' CO'):
- return split_last(text, suffix)
- return text
- def vietnam_take_right_half(company_name: str):
- for separator in vietnam_right_separator_list:
- if separator in company_name:
- return company_name.split(separator, 1)[1].strip()
- return company_name.strip()
- def vietnam_take_left_half(company_name: str):
- for separator in vietnam_left_separator_list:
- if separator in company_name:
- return company_name.rsplit(separator, 1)[0].strip()
- return company_name.strip()
- def vietnam_truncate_at_suffix(company_name: str):
- for suffix in vietnam_suffix_list:
- if suffix in company_name and company_name.endswith(suffix):
- return company_name.rsplit(suffix, 1)[0].strip()
- return company_name.strip()
- def india_truncate_at_suffix(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- if (
- suffix != ' CO' and suffix != ' INC' and suffix != ' CO LIMITED' and suffix != ' LTD'
- and suffix != ' LIMITED' and suffix != ' CO I' and suffix != ' I'
- ):
- return split_last(text, suffix)
- elif suffix == ' CO' and text.endswith(' CO'):
- return split_last(text, suffix)
- elif suffix == ' INC' and text.endswith(' INC'):
- return split_last(text, suffix)
- elif suffix == ' CO LIMITED' and ' AND CO LIMITED' not in text:
- return split_last(text, suffix)
- elif suffix == ' LTD' and text.endswith(' LTD'):
- return split_last(text, suffix)
- elif suffix == ' LIMITED' and text.endswith(' LIMITED'):
- return split_last(text, suffix)
- elif suffix == ' CO I' and text.endswith(' CO I'):
- return split_last(text, suffix)
- elif suffix == ' I' and text.endswith(' I'):
- return split_last(text, suffix)
- return text
- def mexico_truncate_at_suffix(cleaned_name):
- for suffix in mexico_suffix_list:
- if suffix in cleaned_name and cleaned_name.endswith(suffix):
- return cleaned_name.rsplit(suffix, 1)[0].strip()
- return cleaned_name.strip()
- def nigeria_truncate_at_suffix(cleaned_name):
- for suffix in nigeria_suffix_list:
- if cleaned_name.endswith(suffix):
- return cleaned_name.rsplit(suffix, 1)[0].strip()
- return cleaned_name.strip()
- def peru_truncate_at_suffix(cleaned_name, peru_suffix_list):
- for suffix in peru_suffix_list:
- if cleaned_name.endswith(suffix):
- return cleaned_name.rsplit(suffix, 1)[0].strip()
- return cleaned_name.strip()
- def lesotho_truncate_at_suffix(cleaned_name, lesotho_suffix_list):
- for suffix in lesotho_suffix_list:
- if cleaned_name.endswith(suffix):
- return cleaned_name.rsplit(suffix, 1)[0].strip()
- return cleaned_name.strip()
- def germany_truncate_at_suffix(cleaned_name, germany_suffix_list):
- for suffix in germany_suffix_list:
- if cleaned_name.endswith(suffix):
- return cleaned_name.rsplit(suffix, 1)[0].strip()
- return cleaned_name.strip()
- def america_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_first_name = america_truncate_at_suffix_first(cleaned_name, frist_america_suffix_list)
- if len(truncated_first_name.strip()) < 8:
- return cleaned_name
- else:
- return truncated_first_name
- return None
- def america_company_abbr_second(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_first_name = america_truncate_at_suffix_second(cleaned_name, SECOND_AMERICA_SUFFIX_LIST)
- if len(truncated_first_name.strip()) < 5:
- return cleaned_name
- else:
- return truncated_first_name.strip()
- return None
- def america_company_abbr_third(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_first_name = america_truncate_at_suffix_third(cleaned_name, third_AMERICA_SUFFIX_LIST)
- if 9 < len(truncated_first_name.strip()) < 12:
- return cleaned_name
- elif len(truncated_first_name.strip()) <= 9:
- return None
- elif len(truncated_first_name.strip()) >= 12:
- return truncated_first_name.strip()
- return None
- def bangladesh_company_abbr_first(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_first_name = bangladesh_truncate_at_suffix_first(cleaned_name, first_bangladesh_suffix_list)
- return truncated_first_name.strip()
- return None
- def bangladesh_company_abbr_second(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_first_name = bangladesh_truncate_at_suffix_first(cleaned_name, first_bangladesh_suffix_list)
- truncated_second_name = bangladesh_truncate_at_suffix_second(truncated_first_name.strip(),
- second_bangladesh_suffix_list)
- if len(truncated_second_name.strip()) < 6:
- return truncated_first_name.strip()
- else:
- return truncated_second_name.strip()
- return None
- def chile_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_first_name = chile_truncate_at_suffix(cleaned_name, first_chile_SUFFIX_LIST)
- if len(truncated_first_name.strip()) < 8:
- return cleaned_name
- else:
- return truncated_first_name.strip()
- return None
- def rwanda_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_first_name = rwanda_truncate_at_suffix(cleaned_name, FIRST_Rwanda_suffix_list)
- if len(truncated_first_name.strip()) < 6:
- return cleaned_name
- else:
- return truncated_first_name.strip()
- return None
- def philippines_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_first_name = philippines_truncate_at_suffix(cleaned_name, FIRST_philippines_suffix_list)
- if len(truncated_first_name.strip()) < 6:
- return cleaned_name
- else:
- return truncated_first_name.strip()
- return None
- def colombia_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_first_name = colombia_truncate_at_suffix(cleaned_name, FIRST_colombia_suffix_list)
- if len(truncated_first_name.strip()) < 6:
- return cleaned_name
- else:
- return truncated_first_name.strip()
- return None
- def indonesia_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = indonesia_truncate_at_suffix(cleaned_name, indonesia_suffix_list)
- if len(truncated_name.strip()) >= 8:
- return truncated_name.strip()
- else:
- return cleaned_name
- return None
- def venezuela_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = venezuela_truncate_at_suffix(cleaned_name, venezuela_suffix_list)
- if len(truncated_name.strip()) >= 6:
- return truncated_name.strip()
- else:
- return cleaned_name
- return None
- def uzbekistan_company_abbr(company_name):
- if company_name:
- bak_name = company_name.upper()
- company_name = kaz_extract_text_from_enclosers(bak_name)
- company_name = clean_company_name(company_name)
- for head in kaz_heads:
- if company_name.startswith(head):
- company_name = remove_prefix(company_name, head)
- break
- if len(company_name) < 8:
- return clean_company_name(bak_name)
- else:
- return company_name.strip()
- return None
- def kazakhstan_company_abbr(company_name):
- if company_name:
- bak_name = company_name.upper()
- company_name = kaz_extract_text_from_enclosers(bak_name)
- company_name = clean_company_name(company_name)
- for head in kaz_heads:
- if company_name.startswith(head):
- company_name = remove_prefix(company_name, head)
- break
- if len(company_name) < 8:
- return clean_company_name(bak_name)
- else:
- return company_name.strip()
- return None
- def moldova_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- first_truncated_name = moldova_truncate_at_suffix(cleaned_name, moldova_suffix_list)
- truncated_name = moldova_truncate_at_suffix_second(first_truncated_name, moldova_suffix_list2)
- if len(truncated_name.strip()) >= 6:
- return truncated_name.strip()
- else:
- return cleaned_name
- return None
- def singapore_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = singapore_truncate_at_suffix(cleaned_name, singapore_suffix_list)
- if len(truncated_name.strip()) >= 8:
- return truncated_name.strip()
- else:
- return cleaned_name
- return None
- def hongkong_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = hongkong_truncate_at_suffix(cleaned_name, hongkong_suffix_list)
- if len(truncated_name.strip()) >= 6:
- return truncated_name.strip()
- else:
- return cleaned_name
- return None
- def china_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = china_truncate_at_suffix(cleaned_name, china_suffix_list)
- if len(truncated_name.strip()) >= 6:
- return truncated_name.strip()
- else:
- return cleaned_name
- return None
- def vietnam_company_abbr(company_name: str) -> str or None:
- if company_name:
- cleaned_name = clean_company_name(company_name)
- right_half = vietnam_take_right_half(cleaned_name)
- left_half = vietnam_take_left_half(right_half)
- truncated_name = vietnam_truncate_at_suffix(left_half)
- if len(truncated_name) >= 8:
- return truncated_name
- else:
- return cleaned_name
- return None
- def india_company_abbr(company_name):
- if company_name:
- bak_name = company_name.upper()
- company_name = clean_company_name(bak_name)
- for head in ind_head:
- if company_name.startswith(head):
- company_name = remove_prefix(company_name, head)
- break
- truncated_name = india_truncate_at_suffix(company_name, india_suffix_list)
- if (len(truncated_name.strip()) < 8):
- return clean_company_name(bak_name)
- else:
- return truncated_name.strip()
- return None
- def ukraine_truncate_at_suffix_first(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- return split_last(text, suffix)
- return text
- def ukraine_truncate_at_suffix_second(text, suffix_list):
- for suffix in suffix_list:
- if suffix in text:
- return split_last(text, suffix) + suffix
- return text
- def ukraine_company_abbr_first(company_name):
- if company_name:
- bak_name = company_name.upper()
- truncated_name = ukraine_truncate_at_suffix_first(bak_name, ukraine_suffix_first)
- return truncated_name.strip()
- return None
- def ukraine_company_abbr_second(company_name):
- if company_name:
- bak_name = company_name.upper()
- truncated_name = ukraine_truncate_at_suffix_second(bak_name, ukraine_suffix_second)
- return truncated_name.strip()
- return None
- def mexico_company_abbr(company_name):
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = mexico_truncate_at_suffix(cleaned_name)
- if len(truncated_name) >= 8:
- return truncated_name
- else:
- return cleaned_name
- return None
- def nigeria_company_abbr(company_name):
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = nigeria_truncate_at_suffix(cleaned_name)
- if len(truncated_name) >= 4:
- return truncated_name
- else:
- return cleaned_name
- return None
- def philippines_company_abbr_second(company_name):
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = philippines_truncate_at_suffix(cleaned_name, FIRST_philippines_suffix_list)
- if len(truncated_name) >= 6:
- return truncated_name.strip()
- else:
- return cleaned_name
- return None
- def england_company_abbr(company_name):
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = england_truncate_at_suffix(cleaned_name, FIRST_england_suffix_list)
- if len(truncated_name) >= 8:
- return truncated_name.strip()
- else:
- return cleaned_name
- return None
- def peru_company_abbr(company_name):
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = peru_truncate_at_suffix(cleaned_name, peru_suffix_list)
- if len(truncated_name) >= 6:
- return truncated_name.strip()
- else:
- return cleaned_name
- return None
- def lesotho_company_abbr(company_name):
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = lesotho_truncate_at_suffix(cleaned_name, lesotho_suffix_list)
- if len(truncated_name) >= 6:
- return truncated_name.strip()
- else:
- return cleaned_name
- return None
- def germany_company_abbr(company_name):
- if company_name:
- cleaned_name = clean_company_name(company_name)
- truncated_name = germany_truncate_at_suffix(cleaned_name, germany_suffix_list)
- if len(truncated_name) >= 8:
- return truncated_name.strip()
- else:
- return cleaned_name
- return None
- def company_abbr(country_name: str, company_name: str) -> str or None:
- if country_name == 'pakistan':
- return pakistan_company_abbr(company_name)
- if country_name == 'mirror_pakistan':
- return mirror_pakistan_company_abbr(company_name)
- elif country_name == 'america':
- return america_company_abbr(company_name)
- elif country_name == 'indonesia':
- return indonesia_company_abbr(company_name)
- elif country_name == 'venezuela':
- return venezuela_company_abbr(company_name)
- elif country_name == 'america_second':
- return america_company_abbr_second(company_name)
- elif country_name == 'uzbekistan':
- return uzbekistan_company_abbr(company_name)
- elif country_name == 'kazakhstan':
- return kazakhstan_company_abbr(company_name)
- elif country_name == 'chile':
- return chile_company_abbr(company_name)
- elif country_name == 'moldova':
- return moldova_company_abbr(company_name)
- elif country_name == 'bangladesh_fist':
- return bangladesh_company_abbr_first(company_name)
- elif country_name == 'bangladesh_second':
- return bangladesh_company_abbr_second(company_name)
- elif country_name == 'rwanda':
- return rwanda_company_abbr(company_name)
- elif country_name == 'singapore':
- return singapore_company_abbr(company_name)
- elif country_name == 'hongkong':
- return hongkong_company_abbr(company_name)
- elif country_name == 'philippines':
- return philippines_company_abbr(company_name)
- elif country_name == 'china':
- return china_company_abbr(company_name)
- elif country_name == 'vietnam':
- return vietnam_company_abbr(company_name)
- elif country_name == 'india':
- return india_company_abbr(company_name)
- elif country_name == 'ukraine_first':
- return ukraine_company_abbr_first(company_name)
- elif country_name == 'ukraine_second':
- return ukraine_company_abbr_second(company_name)
- elif country_name == 'america_third':
- return america_company_abbr_third(company_name)
- elif country_name == 'mexico':
- return mexico_company_abbr(company_name)
- elif country_name == 'colombia':
- return colombia_company_abbr(company_name)
- elif country_name == 'nigeria':
- return nigeria_company_abbr(company_name)
- elif country_name == 'philippines_second':
- return philippines_company_abbr_second(company_name)
- elif country_name == 'peru':
- return peru_company_abbr(company_name)
- elif country_name == 'lesotho':
- return lesotho_company_abbr(company_name)
- elif country_name == 'germany':
- return germany_company_abbr(company_name)
- elif country_name == 'england':
- return england_company_abbr(company_name)
- else:
- return company_name
- if __name__ == '__main__':
- test_cases = [
- 'Wilhelm Manz GmbH & Co. KG',
- 'Wilhelm Zuleeg GmbH',
- 'Aba Air Group Llc',
- 'CAMUSAT (MAURICE) LIMITED',
- 'BMTS Technology Austria GmbH & Co',
- 'Arhetipo Grup SRL',
- 'Boegli-Gravures SA',
- 'Kronos International Inc.',
- 'YAHO AUTO EXCHANGE CO. LIMITED',
- 'Radpar Otomotiv Sanayi ve Ticaret Limited Şti.',
- 'SERVICIOS INTERSEC S.A. DE C.V.',
- 'PLASTIC SOLUTIONS DI MARTOCCIA CRISTIANS.A.S.',
- 'C-Solution Elektrotechnik GbR',
- 'Baumer Hhs S.R.L.',
- 'AJH Druck & Technik Helge Klemt e.K.',
- 'ADM Hamburg AG',
- 'Lauer Ventilation UG',
- 'Bankhaus J. Faisst OHG',
- 'Continental Teves AG & Co.OHG',
- 'Dow Produktions und Vertriebs GmbH & Co. OHG',
- 'Springer Nature AG & Co. KGaA',
- 'Paragon GmbH & Co. KGaA'
- ]
- for test_case in test_cases:
- print("{:<50} {:>50}".format(test_case, company_abbr('germany', test_case)))
- # test_cases = [
- # 'COMPANY LIMITED NGOC PHAT TM',
- # 'COMPANY LTD PHAM',
- # 'TAIHING MOULDS CO LTD',
- # 'REPRESENTATIVE OFFICE OF HETTICH SINGAPORE SEA PTE LTD IN HO CHI MINH CITY',
- # 'SAI GON WASTE SOLUTION JOINT STOCK COMPANY',
- # 'ENTERTAINMENT FISHING ROD IMPORT EXPORT TRADING COMPANY LIMI',
- # 'TPP PLUS CORP',
- # 'VILOMIX VIETNAM LLC',
- # 'SMILETECH JSC',
- # 'DUC MINH CTI CO JSC',
- # 'SMILETECH JSC',
- # 'HUVICO LTD'
- # ]
- # for test in test_cases:
- # print(vietnam_company_abbr(test))
|