【太阳集团所有网址16877】ElasticSearch Java Api

mongo里面的数据是这样的:
doc1 = {
“freq”:1
…..
}
doc2 = {
“freq”:3
…..
}

1.restful 风格 client

restClient 走的是http 协议,9200端口
restClient 使用与ElasticSearch版本无关,这是一个很大的优势

 public  static RestClient restClient;
    static {
        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        restClient = RestClient.builder(new HttpHost("192.168.1.148",9200,"http"))
                .setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
                    @Override
                    public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
                        return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                    }
                }).build();
    }

要求是统计出freq=1的文档个数,freq=2的文档的个数。。。 典型的mapreduce任务,正好试试mongo的mapreduce。 感觉还行,做一些简单的聚集操作还凑活,看看回头有没有更复杂一些的应用。

2. java api client

太阳集团所有网址16877,java api client 通信走的是 tcp 协议,9300端口

    private static int port = 9300;
    private static String cluster = "192.168.1.1";
    private static String index = "idx-comment";   // 推荐数据
    private static String type = "commen";

       static {
         // 2.0.0 版本连接方式
        String[] hosts = testhost.split(",");
        Settings settings = Settings.settingsBuilder().put("cluster.name", cluster).put("client.transport.sniff", false).build();    // 开启集群嗅探功能
        try {
            client = TransportClient.builder().settings(settings).build();
            for (String host : hosts) {
                client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(host), port));
            }
        } catch (UnknownHostException e) {
            e.printStackTrace();
        }
      // 5.0.0 版本连接方式
  /*      Settings esSettings = Settings.builder()
                .put("cluster.name", cluster) //设置ES实例的名称
                .put("client.transport.sniff", true) //自动嗅探整个集群的状态,把集群中其他ES节点的ip添加到本地的客户端列表中
                .build();
        try {
            client = new PreBuiltTransportClient(esSettings)
                    .addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(testhost), 9300));
            System.out.println("ElasticsearchClient 连接成功");
        } catch (UnknownHostException e) {
            e.printStackTrace();
        }*/
    }
   #!/usr/bin/env python

import pymongo

from bson.code; import Code

def calc_freq_distribution(collection_handler):

    out_collection_name = collection_handler.name '_freqdist'

    map = Code("function () {"

                "emit(this.freq, {count:1});"

                "}")

    reduce = Code("function (key, values) {"

                   "  var total = 0"

                   "  for (var i = 0 i < values.length; i  ) {"

                   "    total  = values[i].count;"

                   "  }"

                   "  return {count:total};"

                   "}")

    result = collection_handler.map_reduce(map, reduce, out = out_collection_name)

    fname = out_collection_name '.csv'

    with open(fname, 'w') as f:

        for doc in result.find;():

            f.write;(','.join([str(doc['_id']), str(doc['value']['count'])]) 'n')

if __name__ == '__main__':

    conn = pymongo.Connection;(['192.168.1.1'], 27018)

    input_collection= conn.cname.things;

    print calc_freq_distribution(merge_spam)

    merge_ham = conn.antispam.mergeham;

    print calc_freq_distribution(merge_ham)

3.拿到所有数据

QueryBuilder qb = termQuery("multi", "test");

SearchResponse scrollResp = client.prepareSearch(test)
        .addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC)
        .setScroll(new TimeValue(60000))
        .setQuery(qb)
        .setSize(100).get(); //max of 100 hits will be returned for each scroll
//Scroll until no hits are returned
do {
    for (SearchHit hit : scrollResp.getHits().getHits()) {
        //Handle the hit...
    }

    scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(60000)).execute().actionGet();
} while(scrollResp.getHits().getHits().length != 0); // Zero hits mark the end of the scroll and the while loop.

本文由太阳集团所有网址16877发布于太阳集团城网址送彩金,转载请注明出处:【太阳集团所有网址16877】ElasticSearch Java Api

您可能还会对下面的文章感兴趣: