๐Ÿ”ค ์ž๋™์™„์„ฑ & ngram ๊ฒ€์ƒ‰ ์ „๋žต

์ด ๋ฌธ์„œ๋Š” ๊ฒ€์ƒ‰์–ด ์ž๋™์™„์„ฑ ๊ธฐ๋Šฅ์„ ๊ตฌํ˜„ํ•  ๋•Œ ์‚ฌ์šฉ๋˜๋Š” Elasticsearch ์ „๋žต์„ ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค.
prefix, edge_ngram, completion ์„ธ ๊ฐ€์ง€ ์ ‘๊ทผ ๋ฐฉ์‹๊ณผ ๊ฐ๊ฐ์˜ ์žฅ๋‹จ์ , ์„ค์ • ๋ฐฉ๋ฒ•์„ ๋‹ค๋ฃน๋‹ˆ๋‹ค.


1. ๐Ÿ“Œ ์ž๋™์™„์„ฑ ๊ตฌํ˜„ ๋ฐฉ์‹ ๋น„๊ต

๋ฐฉ์‹์žฅ์ ๋‹จ์ 
prefix์„ค์ • ๊ฐ„๋‹จ, ๊ธฐ๋ณธ ๊ธฐ๋Šฅ๊ฒ€์ƒ‰ ์ •ํ™•๋„ ๋‚ฎ์Œ, ๋…ธ์ด์ฆˆ ๋ฐœ์ƒ
edge_ngram๊ฒ€์ƒ‰์–ด ์กฐํ•ฉ ๋‹ค์–‘, ๋ถ€๋ถ„์ผ์น˜ ๊ฐ€๋Šฅ์ธ๋ฑ์Šค ์šฉ๋Ÿ‰ ์ฆ๊ฐ€, ํŠœ๋‹ ํ•„์š”
completion์†๋„ ๋น ๋ฆ„, ์ถ”์ฒœ์–ด์— ์ ํ•ฉ๊ธฐ๋Šฅ ์ œํ•œ, ์Šค์ฝ”์–ด ์ปค์Šคํ„ฐ๋งˆ์ด์ง• ์–ด๋ ค์›€

2. โœ๏ธ prefix ์ฟผ๋ฆฌ ์˜ˆ์‹œ

{
  "query": {
    "prefix": {
      "title": "์นด์นด"
    }
  }
}
  • ์‚ฌ์šฉ์ž๊ฐ€ “์นด์นด” ์ž…๋ ฅ ์‹œ “์นด์นด์˜ค”, “์นด์นด์‹œ”, “์นด์นดํ‘ธ” ๋“ฑ ๋ฐ˜ํ™˜
  • ์ •ํ™•๋„๋ณด๋‹ค ๊ฐ„๋‹จํ•œ ์‹œ์ž‘ ๋‹จ์–ด ๊ธฐ๋ฐ˜ ๋งค์นญ์— ์ ํ•ฉ

3. ๐Ÿงฑ edge_ngram ๊ธฐ๋ฐ˜ ๋ถ„์„๊ธฐ ์„ค์ • ์˜ˆ์‹œ

๋งคํ•‘ ์„ค์ •

{
  "settings": {
    "analysis": {
      "tokenizer": {
        "autocomplete_tokenizer": {
          "type": "edge_ngram",
          "min_gram": 1,
          "max_gram": 20,
          "token_chars": ["letter", "digit"]
        }
      },
      "analyzer": {
        "autocomplete": {
          "tokenizer": "autocomplete_tokenizer",
          "filter": ["lowercase"]
        },
        "autocomplete_search": {
          "tokenizer": "standard",
          "filter": ["lowercase"]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "title": {
        "type": "text",
        "analyzer": "autocomplete",
        "search_analyzer": "autocomplete_search"
      }
    }
  }
}
  • autocomplete_tokenizer: edge_ngram ๊ธฐ๋ฐ˜์œผ๋กœ ๋ถ€๋ถ„ ํ† ํฐ ์ƒ์„ฑ
  • search_analyzer: ํ‘œ์ค€ ๋ถ„์„๊ธฐ๋กœ ๊ฒ€์ƒ‰์–ด ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ

4. ๐Ÿ” ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ ์˜ˆ์‹œ (edge_ngram)

{
  "query": {
    "match": {
      "title": {
        "query": "์นด์นด"
      }
    }
  }
}
  • ์‚ฌ์šฉ์ž๊ฐ€ ์นด์นด๋ผ๊ณ  ๊ฒ€์ƒ‰ํ•˜๋ฉด ์นด์นด์˜ค, ์นด์นด์‹œ, ์นด์นดXX ๋ชจ๋‘ ๊ฒ€์ƒ‰ ๊ฐ€๋Šฅ
  • ํ† ํฐํ™” ์˜ˆ์‹œ (์นด์นด์˜ค): ์นด, ์นด์นด, ์นด์นด์˜ค

5. โšก completion ํ•„๋“œ ์˜ˆ์‹œ

๋งคํ•‘ ์„ค์ •

{
  "mappings": {
    "properties": {
      "suggest": {
        "type": "completion"
      }
    }
  }
}

๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ

{
  "suggest": {
    "title-suggest": {
      "prefix": "์นด์นด",
      "completion": {
        "field": "suggest"
      }
    }
  }
}
  • autocomplete์— ํŠนํ™”๋œ ๋ฐ์ดํ„ฐ ๊ตฌ์กฐ๋กœ ๋น ๋ฅธ ์ถ”์ฒœ
  • ๋‹จ์ : ์ผ๋ฐ˜์ ์ธ match ์ฟผ๋ฆฌ์™€ ํ†ตํ•ฉ ์–ด๋ ค์›€

โœ… ์š”์•ฝ ์ •๋ฆฌ

์ „๋žต์„ค๋ช…
prefix๊ฐ„๋‹จํ•˜์ง€๋งŒ ์ •ํ™•๋„ ๋‚ฎ๊ณ  ์œ ์—ฐ์„ฑ ๋ถ€์กฑ
edge_ngram๊ฐ•๋ ฅํ•œ ์ž๋™์™„์„ฑ ๊ตฌํ˜„ ๊ฐ€๋Šฅ, ์ธ๋ฑ์Šค ์šฉ๋Ÿ‰ ์ฆ๊ฐ€ ์ฃผ์˜
completion๋น ๋ฅธ ์ถ”์ฒœ ์‹œ์Šคํ…œ์— ์ ํ•ฉ, ๊ธฐ๋Šฅ ์ œํ•œ ์žˆ์Œ