ES需要先下载插件:ingest-attachment
{
"properties" : {
"attachment" : {
"properties" : {
"content" : {
"type" : "text",
"analyzer" : "ik_max_word",
"search_analyzer" : "ik_smart"
},
"content_length" : {
"type" : "long"
},
"content_type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"date" : {
"type" : "date"
},
"language" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"title" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"id" : {
"type" : "keyword"
},
"dataId" : {
"type" : "keyword"
},
"modelId" : {
"type" : "keyword"
},
"fileName" : {
"type" : "text",
"analyzer" : "ik_max_word",
"search_analyzer" : "ik_smart"
}
}
}在Java调用前需要在ES开通一个通道用于文件解析
# 查询当前ES所有通道
GET _ingest/pipeline
# 创建通道
PUT _ingest/pipeline/attachment
{
"description":"Extract attachment information",
"processors":[
{
"attachment":{
"field":"content",
"ignore_missing":true
}
},
{
"remove":{
# 需要解析文件编码的字段名
"field":"content"
}
}
]
}接着在Java内调用:
// 文件需要编码填入字符串的content内也就是上面配置的字段
Base64.encode(FileUtil.file(localPath))
IndexRequest indexRequest = new IndexRequest(EsIndexConstant.FILE_ES_INDEX);
indexRequest.id(fIleEsDb.getId());
indexRequest.source(JSONUtil.toJSONString(fIleEsDb), XContentType.JSON);
indexRequest.setPipeline("attachment");
restHighLevelClient.index(indexRequest, RequestOptions.DEFAULT);