Java 调用ES解析文件内容

ES需要先下载插件:ingest-attachment { "properties" : { "attachment" : { "properties" : { "content" : { "type" : "text",

ES需要先下载插件:ingest-attachment

{
  "properties" : {
    "attachment" : {
      "properties" : {
        "content" : {
          "type" : "text",
          "analyzer" : "ik_max_word",
          "search_analyzer" : "ik_smart"
        },
        "content_length" : {
          "type" : "long"
        },
        "content_type" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "date" : {
          "type" : "date"
        },
        "language" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "title" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        }
      }
    },
    "id" : {
      "type" : "keyword"
    },
    "dataId" : {
      "type" : "keyword"
    },
    "modelId" : {
      "type" : "keyword"
    },
    "fileName" : {
      "type" : "text",
      "analyzer" : "ik_max_word",
      "search_analyzer" : "ik_smart"
    }
  }
}

在Java调用前需要在ES开通一个通道用于文件解析

# 查询当前ES所有通道
GET _ingest/pipeline

# 创建通道
PUT _ingest/pipeline/attachment
{
        "description":"Extract attachment information",
        "processors":[
            {
                "attachment":{
                    "field":"content",
                    "ignore_missing":true
                }
            },
            {
                "remove":{
                    # 需要解析文件编码的字段名
                    "field":"content"
                }
            }
        ]
    }

接着在Java内调用:

        // 文件需要编码填入字符串的content内也就是上面配置的字段
        Base64.encode(FileUtil.file(localPath))        


        IndexRequest indexRequest = new IndexRequest(EsIndexConstant.FILE_ES_INDEX);
        indexRequest.id(fIleEsDb.getId());
        indexRequest.source(JSONUtil.toJSONString(fIleEsDb), XContentType.JSON);
        indexRequest.setPipeline("attachment");
        restHighLevelClient.index(indexRequest, RequestOptions.DEFAULT);

评论