-
Notifications
You must be signed in to change notification settings - Fork 516
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add KWS examples for Java API (#930)
- Loading branch information
1 parent
bcaa6df
commit 5860e45
Showing
11 changed files
with
295 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// Copyright 2024 Xiaomi Corporation | ||
|
||
// This file shows how to use a keyword spotter model to spot keywords from | ||
// a file. | ||
|
||
import com.k2fsa.sherpa.onnx.*; | ||
|
||
public class KyewordSpotterFromFile { | ||
public static void main(String[] args) { | ||
// please download test files from https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models | ||
String encoder = | ||
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"; | ||
String decoder = | ||
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"; | ||
String joiner = | ||
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"; | ||
String tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"; | ||
|
||
String keywordsFile = | ||
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"; | ||
|
||
String waveFilename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; | ||
|
||
OnlineTransducerModelConfig transducer = | ||
OnlineTransducerModelConfig.builder() | ||
.setEncoder(encoder) | ||
.setDecoder(decoder) | ||
.setJoiner(joiner) | ||
.build(); | ||
|
||
OnlineModelConfig modelConfig = | ||
OnlineModelConfig.builder() | ||
.setTransducer(transducer) | ||
.setTokens(tokens) | ||
.setNumThreads(1) | ||
.setDebug(true) | ||
.build(); | ||
|
||
KeywordSpotterConfig config = | ||
KeywordSpotterConfig.builder() | ||
.setOnlineModelConfig(modelConfig) | ||
.setKeywordsFile(keywordsFile) | ||
.build(); | ||
|
||
KeywordSpotter kws = new KeywordSpotter(config); | ||
OnlineStream stream = kws.createStream(); | ||
|
||
WaveReader reader = new WaveReader(waveFilename); | ||
|
||
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
|
||
float[] tailPaddings = new float[(int) (0.8 * reader.getSampleRate())]; | ||
stream.acceptWaveform(tailPaddings, reader.getSampleRate()); | ||
while (kws.isReady(stream)) { | ||
kws.decode(stream); | ||
|
||
String keyword = kws.getResult(stream).getKeyword(); | ||
if (!keyword.isEmpty()) { | ||
System.out.printf("Detected keyword: %s\n", keyword); | ||
} | ||
} | ||
|
||
kws.release(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
mkdir -p ../build | ||
pushd ../build | ||
cmake \ | ||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
-DBUILD_SHARED_LIBS=ON \ | ||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
-DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
.. | ||
|
||
make -j4 | ||
ls -lh lib | ||
popd | ||
fi | ||
|
||
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
pushd ../sherpa-onnx/java-api | ||
make | ||
popd | ||
fi | ||
|
||
if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
fi | ||
|
||
java \ | ||
-Djava.library.path=$PWD/../build/lib \ | ||
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
./KeywordSpotterFromFile.java |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/KeywordSpotter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Copyright 2024 Xiaomi Corporation | ||
|
||
package com.k2fsa.sherpa.onnx; | ||
|
||
public class KeywordSpotter { | ||
static { | ||
System.loadLibrary("sherpa-onnx-jni"); | ||
} | ||
|
||
private long ptr = 0; | ||
|
||
public KeywordSpotter(KeywordSpotterConfig config) { | ||
ptr = newFromFile(config); | ||
} | ||
|
||
public OnlineStream createStream(String keywords) { | ||
long p = createStream(ptr, keywords); | ||
return new OnlineStream(p); | ||
} | ||
|
||
public OnlineStream createStream() { | ||
long p = createStream(ptr, ""); | ||
return new OnlineStream(p); | ||
} | ||
|
||
public void decode(OnlineStream s) { | ||
decode(ptr, s.getPtr()); | ||
} | ||
|
||
public boolean isReady(OnlineStream s) { | ||
return isReady(ptr, s.getPtr()); | ||
} | ||
|
||
public KeywordSpotterResult getResult(OnlineStream s) { | ||
Object[] arr = getResult(ptr, s.getPtr()); | ||
String keyword = (String) arr[0]; | ||
String[] tokens = (String[]) arr[1]; | ||
float[] timestamps = (float[]) arr[2]; | ||
return new KeywordSpotterResult(keyword, tokens, timestamps); | ||
} | ||
|
||
protected void finalize() throws Throwable { | ||
release(); | ||
} | ||
|
||
// You'd better call it manually if it is not used anymore | ||
public void release() { | ||
if (this.ptr == 0) { | ||
return; | ||
} | ||
delete(this.ptr); | ||
this.ptr = 0; | ||
} | ||
|
||
private native long newFromFile(KeywordSpotterConfig config); | ||
|
||
private native void delete(long ptr); | ||
|
||
private native long createStream(long ptr, String keywords); | ||
|
||
private native void decode(long ptr, long streamPtr); | ||
|
||
private native boolean isReady(long ptr, long streamPtr); | ||
|
||
private native Object[] getResult(long ptr, long streamPtr); | ||
} |
77 changes: 77 additions & 0 deletions
77
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/KeywordSpotterConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
// Copyright 2024 Xiaomi Corporation | ||
|
||
package com.k2fsa.sherpa.onnx; | ||
|
||
public class KeywordSpotterConfig { | ||
private final FeatureConfig featConfig; | ||
private final OnlineModelConfig modelConfig; | ||
|
||
private final int maxActivePaths; | ||
private final String keywordsFile; | ||
private final float keywordsScore; | ||
private final float keywordsThreshold; | ||
private final int numTrailingBlanks; | ||
|
||
private KeywordSpotterConfig(Builder builder) { | ||
this.featConfig = builder.featConfig; | ||
this.modelConfig = builder.modelConfig; | ||
this.maxActivePaths = builder.maxActivePaths; | ||
this.keywordsFile = builder.keywordsFile; | ||
this.keywordsScore = builder.keywordsScore; | ||
this.keywordsThreshold = builder.keywordsThreshold; | ||
this.numTrailingBlanks = builder.numTrailingBlanks; | ||
} | ||
|
||
public static Builder builder() { | ||
return new Builder(); | ||
} | ||
|
||
public static class Builder { | ||
private FeatureConfig featConfig = FeatureConfig.builder().build(); | ||
private OnlineModelConfig modelConfig = OnlineModelConfig.builder().build(); | ||
private int maxActivePaths = 4; | ||
private String keywordsFile = "keywords.txt"; | ||
private float keywordsScore = 1.5f; | ||
private float keywordsThreshold = 0.25f; | ||
private int numTrailingBlanks = 2; | ||
|
||
public KeywordSpotterConfig build() { | ||
return new KeywordSpotterConfig(this); | ||
} | ||
|
||
public Builder setFeatureConfig(FeatureConfig featConfig) { | ||
this.featConfig = featConfig; | ||
return this; | ||
} | ||
|
||
public Builder setOnlineModelConfig(OnlineModelConfig modelConfig) { | ||
this.modelConfig = modelConfig; | ||
return this; | ||
} | ||
|
||
public Builder setMaxActivePaths(int maxActivePaths) { | ||
this.maxActivePaths = maxActivePaths; | ||
return this; | ||
} | ||
|
||
public Builder setKeywordsFile(String keywordsFile) { | ||
this.keywordsFile = keywordsFile; | ||
return this; | ||
} | ||
|
||
public Builder setKeywordsScore(float keywordsScore) { | ||
this.keywordsScore = keywordsScore; | ||
return this; | ||
} | ||
|
||
public Builder setKeywordsThreshold(float keywordsThreshold) { | ||
this.keywordsThreshold = keywordsThreshold; | ||
return this; | ||
} | ||
|
||
public Builder setNumTrailingBlanks(int numTrailingBlanks) { | ||
this.numTrailingBlanks = numTrailingBlanks; | ||
return this; | ||
} | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/KeywordSpotterResult.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
// Copyright 2024 Xiaomi Corporation | ||
|
||
package com.k2fsa.sherpa.onnx; | ||
|
||
public class KeywordSpotterResult { | ||
private final String keyword; | ||
private final String[] tokens; | ||
private final float[] timestamps; | ||
|
||
public KeywordSpotterResult(String keyword, String[] tokens, float[] timestamps) { | ||
this.keyword = keyword; | ||
this.tokens = tokens; | ||
this.timestamps = timestamps; | ||
} | ||
|
||
public String getKeyword() { | ||
return keyword; | ||
} | ||
|
||
public String[] getTokens() { | ||
return tokens; | ||
} | ||
|
||
public float[] getTimestamps() { | ||
return timestamps; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters