새로운 기능
- Lucene/Solr 6.3.0 지원
- Elasticsearch 5.1.1 지원 - issue #6
- 로딩 실패시 UnsatisfiedLinkError throw 하도록 수정 - issue #5
소스 패치를 보내주신 Jaepil Jeong 님과 이슈 등록해주신 devimapreduce 님께 감사드립니다.
import org.bitbucket.eunjeon.seunjeon.Analyzer // 형태소 분석 Analyzer.parse("아버지가방에들어가신다.").foreach(println) // 어절 분석 Analyzer.parseEojeol("아버지가방에들어가신다.").foreach(println) // or Analyzer.parseEojeol(Analyzer.parse("아버지가방에들어가신다.")).foreach(println) /** * 사용자 사전 추가 * surface,cost * surface: 단어명. '+' 로 복합명사를 구성할 수 있다. * '+'문자 자체를 사전에 등록하기 위해서는 '\+'로 입력. 예를 들어 'C\+\+' * cost: 단어 출연 비용. 작을수록 출연할 확률이 높다. */ Analyzer.setUserDict(Seq("덕후", "버카충,-100", "낄끼+빠빠,-100", """C\+\+""").toIterator) Analyzer.parse("덕후냄새가 난다.").foreach(println) // 활용어 원형 Analyzer.parse("빨라짐").flatMap(_.deInflect()).foreach(println) // 복합명사 분해 val ggilggi = Analyzer.parse("낄끼빠빠") ggilggi.foreach(println) // 낄끼빠빠 ggilggi.flatMap(_.deCompound()).foreach(println) // 낄끼+빠빠 Analyzer.parse("C++").flatMap(_.deInflect()).foreach(println) // C++
#!/usr/bin/env bash ES='http://localhost:9200' ESIDX='seunjeon-idx' curl -XDELETE "${ES}/${ESIDX}?pretty" sleep 1 curl -XPUT "${ES}/${ESIDX}/?pretty" -d '{ "settings" : { "index":{ "analysis":{ "analyzer":{ "korean":{ "type":"custom", "tokenizer":"seunjeon_default_tokenizer" } }, "tokenizer": { "seunjeon_default_tokenizer": { "type": "seunjeon_tokenizer", "index_eojeol": false, "user_words": ["낄끼+빠빠,-100", "c\\+\\+", "어그로", "버카충", "abc마트"] } } } } } }' sleep 1 echo "# 삼성/N 전자/N" curl -XGET "${ES}/${ESIDX}/_analyze?analyzer=korean&pretty" -d '삼성전자' echo "# 빠르/V 지/V" curl -XGET "${ES}/${ESIDX}/_analyze?analyzer=korean&pretty" -d '빨라짐' echo "# 슬프/V" curl -XGET "${ES}/${ESIDX}/_analyze?analyzer=korean&pretty" -d '슬픈' echo "# 새롭/V 사전/N 생성/N" curl -XGET "${ES}/${ESIDX}/_analyze?analyzer=korean&pretty" -d '새로운사전생성' echo "# 낄끼/N 빠빠/N c++/N" curl -XGET "${ES}/${ESIDX}/_analyze?analyzer=korean&pretty" -d '낄끼빠빠 c++'
./bin/elasticsearch-plugin install https://oss.sonatype.org/service/local/repositories/releases/content/org/bitbucket/eunjeon/elasticsearch-analysis-seunjeon/5.0.0.0/elasticsearch-analysis-seunjeon-5.0.0.0.zip
version | scala(java) | note |
---|---|---|
1.2.0 | 2.11(1.7), 2.12(1.8) | 추가기능 없음 |
1.1.1 | 2.10(1.7), 2.11(1.7) |
<dependencies> <dependency> <groupId>org.bitbucket.eunjeon</groupId> <artifactId>seunjeon_2.11</artifactId> <version>1.2.0</version> </dependency> </dependencies>
libraryDependencies += "org.bitbucket.eunjeon" %% "seunjeon" % "1.2.0"
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.4.0.1
<dependencies> <dependency> <groupId>org.bitbucket.eunjeon</groupId> <artifactId>seunjeon_2.11</artifactId> <version>1.1.1</version> </dependency> </dependencies>
libraryDependencies += "org.bitbucket.eunjeon" %% "seunjeon" % "1.1.1"
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.4.0.0
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.3.5.0
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.3.3.0
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.3.2.1
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.2.0.1
<dependencies> <dependency> <groupId>org.bitbucket.eunjeon</groupId> <artifactId>seunjeon_2.11</artifactId> <version>1.1.0</version> </dependency> </dependencies>
libraryDependencies += "org.bitbucket.eunjeon" %% "seunjeon" % "1.1.0"
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.3.2.0
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.3.0.0
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.3.1.0
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.2.0.0
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.1.1.3
<dependencies> <dependency> <groupId>org.bitbucket.eunjeon</groupId> <artifactId>seunjeon_2.11</artifactId> <version>1.0.4</version> </dependency> </dependencies>
libraryDependencies += "org.bitbucket.eunjeon" %% "seunjeon" % "1.0.4"
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.1.1.2
<dependencies> <dependency> <groupId>org.bitbucket.eunjeon</groupId> <artifactId>seunjeon_2.11</artifactId> <version>1.0.3</version> </dependency> </dependencies>
libraryDependencies += "org.bitbucket.eunjeon" %% "seunjeon" % "1.0.3"
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.1.1.1
<dependencies> <dependency> <groupId>org.bitbucket.eunjeon</groupId> <artifactId>seunjeon_2.11</artifactId> <version>1.0.2</version> </dependency> </dependencies>
libraryDependencies += "org.bitbucket.eunjeon" %% "seunjeon" % "1.0.2"
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.1.1.0
<dependencies> <dependency> <groupId>org.bitbucket.eunjeon</groupId> <artifactId>seunjeon_2.11</artifactId> <version>1.0.1</version> </dependency> </dependencies>
libraryDependencies += "org.bitbucket.eunjeon" %% "seunjeon" % "1.0.1"
./bin/plugin install org.bitbucket.eunjeon/elasticsearch-analysis-seunjeon/2.1.0.0
#!/bin/bash ES='http://localhost:9200' ESIDX='seunjeon-idx' curl -XDELETE $ES/$ESIDX?pretty curl -XPUT $ES/$ESIDX/?pretty -d '{ "settings" : { "index":{ "analysis":{ "analyzer":{ "korean":{ "type":"custom", "tokenizer":"seunjeon_tokenizer" }, "korean_noun": { "type":"custom", "tokenizer":"noun_tokenizer" } }, "tokenizer": { "seunjeon_tokenizer": { "type": "seunjeon_tokenizer", "user_words": ["낄끼빠빠,-100", "버카충"] }, "noun_tokenizer": { "type": "seunjeon_tokenizer", "index_eojeol": false, "index_poses": ["N"] } } } } } }' sleep 1 echo "========================================================================" curl -XGET $ES/$ESIDX/_analyze?analyzer=korean\&pretty -d '낄끼빠빠' echo "========================================================================" curl -XGET $ES/$ESIDX/_analyze?analyzer=korean\&pretty -d '삼성전자' echo "========================================================================" curl -XGET $ES/$ESIDX/_analyze?analyzer=korean\&pretty -d '슬픈' echo "========================================================================" curl -XGET $ES/$ESIDX/_analyze?analyzer=korean_noun\&pretty -d '꽃이피다'
<dependencies> <dependency> <groupId>org.bitbucket.eunjeon</groupId> <artifactId>seunjeon_2.11</artifactId> <version>1.0.0</version> </dependency> </dependencies>
import org.bitbucket.eunjeon.seunjeon.Analyzer // 형태소 분석 Analyzer.parse("아버지가방에들어가신다.").foreach(println) // 어절 분석 Analyzer.parseEojeol("아버지가방에들어가신다.").foreach(println) // or Analyzer.parseEojeol(Analyzer.parse("아버지가방에들어가신다.")).foreach(println) /** * 사용자 사전 추가 * surface,cost * surface: 단어 * cost: 단어 출연 비용. 작을수록 출연할 확률이 높다. */ Analyzer.setUserDict(Seq("덕후", "버카충,-100", "낄끼빠빠").toIterator) Analyzer.parse("덕후냄새가 난다.").foreach(println) // 활용어 원형 Analyzer.parse("슬픈").flatMap(_.deInflect()).foreach(println) // 복합명사 분해 Analyzer.parse("삼성전자").flatMap(_.deCompound()).foreach(println)