Skip to content

itmare/search_api_service

 
 

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

19 Commits
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

  1. 서버구조 스프링 부트 (체크아웃후 메이븐 라이브러리 import 이후 바로 자체 서버가 기동됨)1

  1. 엘라스틱서치 http://gofile.me/32vUe/GykSKPQqd 다운로드 (패스워드 : 2580)
    설치내역
    자동완성 플러그인
    은전한님 플러그인
    스펠체커용 ICU 플러그인
    상점정보 10만건

  2. 세팅
    한글형태소분석기
    스펠체커
    자동완성분석기


-PUT store
{
   "settings" : {
     "index":{
       "analysis":{
         "analyzer":{
           "korean":{
             "type":"custom",
             "tokenizer":"seunjeon_default_tokenizer"
           },
          "nfd_normalized": {
            "tokenizer": "standard",
            "char_filter": [
              "nfd_normalizer"
            ]
          },
          "my-index-edge-jaso":{
	        "type":"custom",
	        "tokenizer":"keyword",
	        "filter":["my-jaso-filter","edge_filter"]
	      },
	      "my-index-full-jaso":{
	        "type":"custom",
	        "tokenizer":"keyword",
	        "filter":["edge_reverse_filter","my-jaso-filter","edge_filter"]
	      },
	     "my-search-jaso":{
	        "type":"custom",
	        "tokenizer":"keyword",
	        "filter":["my-jaso-filter"]
	      }
         },
         "tokenizer": {
           "seunjeon_default_tokenizer": {
             "type": "seunjeon_tokenizer",
             "index_eojeol": false,
             "user_words": ["낄끼+빠빠,-100", "c\\+\\+", "어그로", "버카충", "abc마트"]
           }
         },
        "char_filter": {
          "nfd_normalizer": {
            "type": "icu_normalizer",
            "name": "nfc",
            "mode": "decompose"
          }
        },
        "filter" : {
            "my-jaso-filter" : {
                "type" : "qj-analyzer-filter",
                "tokenizer": "keyword",
                "mode":"simple_jaso",
                "jaso_typo" : true
            },
            "edge_filter": {
              "type": "edge_ngram",
              "min_gram": 1,
              "max_gram": 10,
              "token_chars": [
                "letter",
                "digit"
              ]
            },
        	"edge_reverse_filter": {
              "type": "edge_ngram",
              "min_gram": 1,
              "max_gram": 10,
              "side" : "back",
              "token_chars": [
                "letter",
                "digit"
              ]
            }
        }
       }
     }
   }
 }
  1. 매핑

-PUT http://localhost:9200/store/_mappings/info

{
"properties": {
    "address": {
        "type": "text",
        "fields": {
            "keyword": {
                "type": "keyword",
                "ignore_above": 256
            }
        }
    },
    "category": {
        "type": "text",
        "fields": {
            "keyword": {
                "type": "keyword",
                "ignore_above": 256
            }
        }
    },
    "category1": {
        "type": "text",
        "fields": {
            "keyword": {
                "type": "keyword",
                "ignore_above": 256
            }
        }
    },
    "category2": {
        "type": "text",
        "fields": {
            "keyword": {
                "type": "keyword",
                "ignore_above": 256
            }
        }
    },
    "description": {
        "type": "text",
        "analyzer": "korean",
        "fields": {
            "keyword": {
                "type": "keyword",
                "ignore_above": 256
            }
        }
    },
    "region": {
        "type": "keyword"
    },
    "idx": {
        "type": "long"
    },
    "lat": {
        "type": "float"
    },
    "link": {
        "type": "keyword"
    },
    "lng": {
        "type": "float"
    },
    "location": {
        "type": "geo_point"
    },
    "rate": {
        "type": "long"
    },
    "roadaddress": {
        "type": "text",
        "analyzer": "korean",
        "fields": {
            "keyword": {
                "type": "keyword",
                "ignore_above": 256
            }
        }
    },
    "telephone": {
        "type": "text",
        "fields": {
            "keyword": {
                "type": "keyword",
                "ignore_above": 256
            }
        }
    },
    "title": {
        "type": "text",
        "analyzer": "korean",
        "fields": {
            "keyword": {
                "type": "keyword",
                "ignore_above": 256
            },
            "spells": {
                "type": "text",
                "fielddata":"true",
                "analyzer": "nfd_normalized"
            },
            "auto": {
                "type": "text",
                "fielddata":"true",
                "analyzer": "my-index-edge-jaso",
                "search_analyzer": "my-search-jaso"
            }

        }
    }
	}
}
  1. 로그스태쉬를 이용한 색인
input {
  jdbc {
    jdbc_driver_library => "/home/ec2-user/logstash-6.1.3/mysql-connector-java-5.1.18.jar"
    jdbc_driver_class => "com.mysql.jdbc.Driver"
    jdbc_connection_string => "jdbc:mysql://123.142.190.80:23306/foodblog"
    jdbc_user => "crawl_user"
    jdbc_password => "crawl_user!!"
    statement => "SELECT * FROM naveraddress WHERE not isnull(lat) LIMIT 100000"
    jdbc_paging_enabled => "true"
    jdbc_page_size => "5000"
#    schedule => "* * * * *"
  }
}

#    WHERE id > :sql_last_value
#    use_column_value => true
#    tracking_column => id

filter {
    mutate {
       split => { "category" => ">" }
       add_field => {
	"category1" => "%{[category][0]}"
 	"category2" => "%{[category][1]}"
       }
       split => { "address" => " " }
       add_field => {
	"region" => "%{[address][0]}"
       }
       remove_field => [  "@version", "@timestamp"]
       add_field => {
        "location" => ["%{lat},%{lng}"]
        }
    }
}

output {
  stdout {
        codec => rubydebug
    }
  elasticsearch {
    hosts => ["127.0.0.1:9200"]
        index => "store"
        document_type => "info"
        manage_template => false
  }
}

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages

  • JavaScript 44.4%
  • HTML 30.4%
  • CSS 21.9%
  • Java 1.9%
  • PHP 1.0%
  • Shell 0.2%
  • Batchfile 0.2%