pandas处理文本关键词命中

使用pandas中的apply提取关键词

从description字段中查看命中了哪些关键词并形成新的一列

用第二列匹配出第三列

pandas处理文本关键词命中

代码如下

import re
import pandas as pd
#导入路径
from setting import DataPath
from os import path


def mingzhong(das):
    keywords = ['关键词1','关键词2','关键词3'....]


    key_box = []
    for keyword in keywords:
        pattern = re.compile('.*' + keyword + '.*')
        if pattern.match(das["description"]) is not None:
            key_box.append(keyword)
            print(key_box)
    return key_box
    
 df1 = pd.read_excel(path.join(DataPath, '欲处理的文件.xlsx'),sheet_name="sheet页的名字",usecols='L,AL')
df1["命中关键词"]=df1.apply(mingzhong,axis=1)

writer = pd.ExcelWriter(path.join(DataPath,'命中.xlsx'))
df1.to_excel(writer, sheet_name='sheet1', index=False)
writer.save()
writer.close()

相关推荐