CKIP
參考來源: https://github.com/ldkrsi/ckip_python https://github.com/ldkrsi/ckip_python
#python3
from CKIP_python import CKIP_client
#處理回傳結果,有時會黏在一起!?
def raw2ckip(inp):
inp=inp.replace('\xa0','').replace('\u3000','') #先去除奇怪的空格
sentences=inp.split('\n') #再以換行斷開
all_term=[]
all_pos =[]
for sentence in sentences:
if sentence!='' and sentence!='\n':
result=CKIP_client.ckip_client(sentence)
pat=re.compile(r'\([0-9,A-Z,a-z,_]+\)')
if result==None:
pass
else:
for tp in result[0].split(' '):
result_re=pat.findall(tp)
if result_re!=None and len(result_re)==1:
pos=result_re[0]
all_term.append(tp.replace(pos,''))
all_pos.append(pos.replace('(','').replace(')',''))
elif result_re!=None and len(result_re)>1:
for p in result_re:
new_term=tp.split(p)[0]
all_term.append(new_term)
all_pos.append(p.replace('(','').replace(')',''))
tp=tp.replace(new_term,'',1).replace(p,'',1)
else:
print('not found pos :'+tp)
return all_term,all_pos
Last updated