새로운 기능
- Elasticsearch 2.1.1지원
mecab-ko-lucene-analyzer에는 변화가 없으므로, 업그레이드하지 않으셔도 됩니다.
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.1.1.1
<dependencies> <dependency> <groupId>org.bitbucket.eunjeon</groupId> <artifactId>seunjeon_2.11</artifactId> <version>1.0.2</version> </dependency> </dependencies>
libraryDependencies += "org.bitbucket.eunjeon" %% "seunjeon" % "1.0.2"
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.1.1.0
<dependencies> <dependency> <groupId>org.bitbucket.eunjeon</groupId> <artifactId>seunjeon_2.11</artifactId> <version>1.0.1</version> </dependency> </dependencies>
libraryDependencies += "org.bitbucket.eunjeon" %% "seunjeon" % "1.0.1"
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.1.0.0
#!/bin/bash ES='http://localhost:9200' ESIDX='seunjeon-idx' curl -XDELETE $ES/$ESIDX?pretty curl -XPUT $ES/$ESIDX/?pretty -d '{ "settings" : { "index":{ "analysis":{ "analyzer":{ "korean":{ "type":"custom", "tokenizer":"seunjeon_tokenizer" }, "korean_noun": { "type":"custom", "tokenizer":"noun_tokenizer" } }, "tokenizer": { "seunjeon_tokenizer": { "type": "seunjeon_tokenizer", "user_words": ["낄끼빠빠,-100", "버카충"] }, "noun_tokenizer": { "type": "seunjeon_tokenizer", "index_eojeol": false, "index_poses": ["N"] } } } } } }' sleep 1 echo "========================================================================" curl -XGET $ES/$ESIDX/_analyze?analyzer=korean\&pretty -d '낄끼빠빠' echo "========================================================================" curl -XGET $ES/$ESIDX/_analyze?analyzer=korean\&pretty -d '삼성전자' echo "========================================================================" curl -XGET $ES/$ESIDX/_analyze?analyzer=korean\&pretty -d '슬픈' echo "========================================================================" curl -XGET $ES/$ESIDX/_analyze?analyzer=korean_noun\&pretty -d '꽃이피다'
<dependencies> <dependency> <groupId>org.bitbucket.eunjeon</groupId> <artifactId>seunjeon_2.11</artifactId> <version>1.0.0</version> </dependency> </dependencies>
import org.bitbucket.eunjeon.seunjeon.Analyzer // 형태소 분석 Analyzer.parse("아버지가방에들어가신다.").foreach(println) // 어절 분석 Analyzer.parseEojeol("아버지가방에들어가신다.").foreach(println) // or Analyzer.parseEojeol(Analyzer.parse("아버지가방에들어가신다.")).foreach(println) /** * 사용자 사전 추가 * surface,cost * surface: 단어 * cost: 단어 출연 비용. 작을수록 출연할 확률이 높다. */ Analyzer.setUserDict(Seq("덕후", "버카충,-100", "낄끼빠빠").toIterator) Analyzer.parse("덕후냄새가 난다.").foreach(println) // 활용어 원형 Analyzer.parse("슬픈").flatMap(_.deInflect()).foreach(println) // 복합명사 분해 Analyzer.parse("삼성전자").flatMap(_.deCompound()).foreach(println)