CKIP

參考來源: https://github.com/ldkrsi/ckip_python https://github.com/ldkrsi/ckip_python

#python3
from CKIP_python import CKIP_client

#處理回傳結果,有時會黏在一起!?
def raw2ckip(inp):
    inp=inp.replace('\xa0','').replace('\u3000','') #先去除奇怪的空格
    sentences=inp.split('\n') #再以換行斷開

    all_term=[]
    all_pos =[]
    for sentence in sentences:
        if sentence!='' and sentence!='\n':
            result=CKIP_client.ckip_client(sentence)
            pat=re.compile(r'\([0-9,A-Z,a-z,_]+\)')
            if result==None:
                pass
            else:
                for tp in result[0].split(' '):
                    result_re=pat.findall(tp)
                    if result_re!=None and len(result_re)==1:
                        pos=result_re[0]
                        all_term.append(tp.replace(pos,''))
                        all_pos.append(pos.replace('(','').replace(')',''))
                    elif result_re!=None and len(result_re)>1:
                        for p in result_re:
                            new_term=tp.split(p)[0]
                            all_term.append(new_term)
                            all_pos.append(p.replace('(','').replace(')',''))
                            tp=tp.replace(new_term,'',1).replace(p,'',1)    
                    else:
                        print('not found pos :'+tp)            
    return all_term,all_pos

Last updated