mirror of
https://github.com/kennycason/kumo
synced 2025-03-26 08:48:49 -04:00
parent
65e8696429
commit
4eda4a64d1
26
CLI.md
26
CLI.md
@ -22,7 +22,7 @@ Output file for the generated word cloud.
|
||||
|
||||
### --min-word-length, -mwl
|
||||
|
||||
The minimum word length required to be allowed in the word cloud. Default is 4.
|
||||
The minimum word length required to be allowed in the word cloud. Default is 2.
|
||||
|
||||
### --stop-words, -sw
|
||||
|
||||
@ -50,15 +50,15 @@ Height of the word cloud. Default is 480px.
|
||||
|
||||
The collision algorithm to use when placing text into the word cloud.
|
||||
|
||||
### --padding, -p
|
||||
|
||||
The minimum padding allowed between two words in the word cloud. This works with pixel-perfect collision detection as well. Default is 2px.
|
||||
|
||||
| Value | Description |
|
||||
|-------|-------------|
|
||||
| pixel_perfect | (default) When placing text into the word cloud check pixel-by-pixel to determine if text overlaps. |
|
||||
| rectangle | Perform simple rectangular collision detection when placing text. This is results in faster generation of word clouds but they may not be aesthetically pleasing. |
|
||||
|
||||
### --padding, -p
|
||||
|
||||
The minimum padding allowed between two words in the word cloud. This works with pixel-perfect collision detection as well. Default is 2px.
|
||||
|
||||
### --background, -bg
|
||||
|
||||
One ore more input sources. Input sources may be local files or Urls of an image used to define the shape of the word cloud. By default the word cloud is drawn onto a rectangle. The word cloud will place text only in places where background image has non-transparent pixels.
|
||||
@ -71,7 +71,7 @@ Background color. Default is Black.
|
||||
|
||||
### --color, -c
|
||||
|
||||
A comma separated list of colors to use in the word cloud. Values most be provided in one of the below formats
|
||||
A comma separated list of colors to use for the word cloud text. Values most be provided in one of the below formats
|
||||
|
||||
| Format | Description/Example |
|
||||
|--------|-------------|
|
||||
@ -111,6 +111,10 @@ One or more fonts. If more than one font is listed they must be comma separated.
|
||||
|
||||
The name of the font to use. The system must have the font loaded already. Default is "Comic Sans MS".
|
||||
|
||||
### --encoding, -e
|
||||
|
||||
Character Encoding. Default is UTF-8
|
||||
|
||||
### --word-start, -ws
|
||||
|
||||
Determine where to start drawing text to the word cloud.
|
||||
@ -133,7 +137,7 @@ One or more normalizers to apply to words in the word cloud.
|
||||
| bubble | replace alphabet characters with bubble representations. e.g. a -> ⓐ |
|
||||
| character-stripping | By default this normalizer will remove common punctuation characters. It is programmatically configurable and will eventually be supported in the CLI as well. |
|
||||
|
||||
### --tokenizer, -t
|
||||
### --tokenizer, -tok
|
||||
|
||||
Determine how to tokenize the input text. It is still TBD on the future of tokenization existing in the Kumo core package.
|
||||
|
||||
@ -141,4 +145,10 @@ Determine how to tokenize the input text. It is still TBD on the future of token
|
||||
|-------|-------------|
|
||||
| white-space | (default) Performs a simple white space tokenization of the text. |
|
||||
| english | Use an English language aware tokenizer to tokenize. (org.languagetool.language-en) |
|
||||
| chinese | Use an Chinese language aware tokenizer to tokenize. (org.languagetool.language-zh) |
|
||||
| chinese | Use an Chinese language aware tokenizer to tokenize. (org.languagetool.language-zh) |
|
||||
|
||||
|
||||
## TODO
|
||||
|
||||
- Add support for word angles.
|
||||
- Add support for word placing strategies.
|
@ -133,7 +133,7 @@ public class WordCloud {
|
||||
LOGGER.info("Saving WordCloud to " + outputFileName);
|
||||
ImageIO.write(bufferedImage, extension, new File(outputFileName));
|
||||
|
||||
} catch (IOException e) {
|
||||
} catch (final IOException e) {
|
||||
LOGGER.error(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
@ -159,7 +159,7 @@ public class WordCloud {
|
||||
ImageIO.write(bufferedImage, format, outputStream);
|
||||
LOGGER.debug("Done writing WordCloud image data to output stream");
|
||||
|
||||
} catch (IOException e) {
|
||||
} catch (final IOException e) {
|
||||
LOGGER.error(e.getMessage(), e);
|
||||
throw new KumoException("Could not write wordcloud to outputstream due to an IOException", e);
|
||||
}
|
||||
@ -282,31 +282,31 @@ public class WordCloud {
|
||||
return Lambda.max(wordFrequencies, on(WordFrequency.class).getFrequency());
|
||||
}
|
||||
|
||||
public void setBackgroundColor(Color backgroundColor) {
|
||||
public void setBackgroundColor(final Color backgroundColor) {
|
||||
this.backgroundColor = backgroundColor;
|
||||
}
|
||||
|
||||
public void setPadding(int padding) {
|
||||
public void setPadding(final int padding) {
|
||||
this.padding = padding;
|
||||
}
|
||||
|
||||
public void setColorPalette(ColorPalette colorPalette) {
|
||||
public void setColorPalette(final ColorPalette colorPalette) {
|
||||
this.colorPalette = colorPalette;
|
||||
}
|
||||
|
||||
public void setBackground(Background background) {
|
||||
public void setBackground(final Background background) {
|
||||
this.background = background;
|
||||
}
|
||||
|
||||
public void setFontScalar(FontScalar fontScalar) {
|
||||
public void setFontScalar(final FontScalar fontScalar) {
|
||||
this.fontScalar = fontScalar;
|
||||
}
|
||||
|
||||
public void setKumoFont(KumoFont kumoFont) {
|
||||
public void setKumoFont(final KumoFont kumoFont) {
|
||||
this.kumoFont = kumoFont;
|
||||
}
|
||||
|
||||
public void setAngleGenerator(AngleGenerator angleGenerator) {
|
||||
public void setAngleGenerator(final AngleGenerator angleGenerator) {
|
||||
this.angleGenerator = angleGenerator;
|
||||
}
|
||||
|
||||
@ -318,11 +318,11 @@ public class WordCloud {
|
||||
return skipped;
|
||||
}
|
||||
|
||||
public void setWordStartScheme(WordStartStrategy startscheme) {
|
||||
public void setWordStartScheme(final WordStartStrategy startscheme) {
|
||||
this.wordStartStrategy = startscheme;
|
||||
}
|
||||
|
||||
public void setWordPlacer(RectangleWordPlacer wordPlacer) {
|
||||
public void setWordPlacer(final RectangleWordPlacer wordPlacer) {
|
||||
this.wordPlacer = wordPlacer;
|
||||
}
|
||||
|
||||
|
@ -5,12 +5,16 @@ import com.beust.jcommander.IStringConverter;
|
||||
import com.beust.jcommander.Parameter;
|
||||
import com.beust.jcommander.ParameterException;
|
||||
import com.kennycason.kumo.CollisionMode;
|
||||
import com.kennycason.kumo.font.FontWeight;
|
||||
|
||||
import java.awt.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static org.apache.commons.lang3.StringUtils.isBlank;
|
||||
|
||||
/**
|
||||
* Created by kenny on 6/11/16.
|
||||
*
|
||||
@ -22,14 +26,14 @@ public class CliParameters {
|
||||
@Parameter(names = { "--type", "-t" }, description = "The type of word cloud to generate.", converter = TypeConverter.class)
|
||||
private Type type = Type.STANDARD;
|
||||
|
||||
@Parameter(names = { "--input", "-i" }, description = "One ore more input sources. Input sources may be local files or Urls. If more than one input source is provided they must be comma separated. For standard word clouds only the first input source will be analyzed. Multiple input sources are only relevant for polar or layered word clouds.")
|
||||
@Parameter(names = { "--input", "-i" }, required = true, description = "One ore more input sources. Input sources may be local files or Urls. If more than one input source is provided they must be comma separated. For standard word clouds only the first input source will be analyzed. Multiple input sources are only relevant for polar or layered word clouds.")
|
||||
private List<String> inputSources = new ArrayList<>();
|
||||
|
||||
@Parameter(names = { "--output", "-o" }, description = "Output file for the generated word cloud.")
|
||||
private List<String> outputSources = new ArrayList<>();
|
||||
@Parameter(names = { "--output", "-o" }, required = true, description = "Output file for the generated word cloud.")
|
||||
private String outputSource;
|
||||
|
||||
@Parameter(names = { "--min-word-length", "-mwl" }, description = "The minimum word length required to be allowed in the word cloud.")
|
||||
private int minWordLength = 4;
|
||||
private int minWordLength = 2;
|
||||
|
||||
@Parameter(names = { "--word-count", "-wc" }, description = "Number of words from data set to draw to word cloud. After the words are sorted by frequency, the words are attempted to be placed in descending order.")
|
||||
private int wordCount = 1000;
|
||||
@ -56,13 +60,14 @@ public class CliParameters {
|
||||
private List<String> backgrounds = new ArrayList<>();
|
||||
|
||||
@Parameter(names = { "--background-color", "-bgc" }, description = "Background color. Default is Black.", converter = ColorConverter.class)
|
||||
private Color color = Color.BLACK;
|
||||
private Color backgroundColor = Color.BLACK;
|
||||
|
||||
@Parameter(names = { "--color", "-c" }, description = "A comma separated list of colors to use in the word cloud. Values most be provided in one of the below formats. Refer to CLI.md for usage examples.", converter = ColorsConverter.class)
|
||||
private List<Color> colors = new ArrayList<>();
|
||||
@Parameter(names = { "--color", "-c" }, description = "A comma separated list of colors to use for the word cloud text. Values most be provided in one of the below formats. Refer to CLI.md for usage examples.")
|
||||
// perform actual parsing in the getter, the commas in our color format cause issues with jCommander
|
||||
private String colorRaw;
|
||||
|
||||
@Parameter(names = { "--font-scalar", "-fs" }, description = "Method to scale font. Default is Linear.", converter = FontScaleConverter.class)
|
||||
private FontScale fontScale = FontScale.LINEAR;
|
||||
private FontScalarType fontScalarType = FontScalarType.LINEAR;
|
||||
|
||||
@Parameter(names = { "--font-size-min", "-fmin" }, description = "Minimum font size, default is 10px.")
|
||||
private int fontSizeMin = 10;
|
||||
@ -71,18 +76,21 @@ public class CliParameters {
|
||||
private int fontSizeMax = 40;
|
||||
|
||||
@Parameter(names = { "--font-weight", "-fw" }, description = "One or more fonts. If more than one font is listed they must be comma separated. Default is Bold")
|
||||
private List<FontWeight> fontWeight = Arrays.asList(FontWeight.BOLD);
|
||||
private List<FontWeight> fontWeights = Arrays.asList(FontWeight.BOLD);
|
||||
|
||||
@Parameter(names = { "--font-type", "-ft" }, description = "The name of the font to use. The system must have the font loaded already. Default is \"Comic Sans MS\".")
|
||||
private String fontType = "Comic Sans MS";
|
||||
|
||||
@Parameter(names = { "--encoding", "-e" }, description = "Character Encoding. Default is UTF-8")
|
||||
private String characterEncoding = "UTF-8";
|
||||
|
||||
@Parameter(names = { "--word-start", "-ws" }, description = "Determine where to start drawing text to the word cloud.")
|
||||
private WordStart wordStart = WordStart.CENTER;
|
||||
|
||||
@Parameter(names = { "--normalizer", "-n" }, description = "One or more normalizers to apply to words in the word cloud.", converter = NormalizerConverter.class)
|
||||
private List<NormalizerType> normalizers = new ArrayList<>();
|
||||
|
||||
@Parameter(names = { "--tokenizer", "-t" }, description = "Determine where to start drawing text to the word cloud.", converter = TokenizerConverter.class)
|
||||
@Parameter(names = { "--tokenizer", "-tok" }, description = "Determine where to start drawing text to the word cloud.", converter = TokenizerConverter.class)
|
||||
private TokenizerType tokenizer = TokenizerType.WHITE_SPACE;
|
||||
|
||||
public List<String> getBackgrounds() {
|
||||
@ -93,16 +101,19 @@ public class CliParameters {
|
||||
return collisionMode;
|
||||
}
|
||||
|
||||
public Color getColor() {
|
||||
return color;
|
||||
public Color getBackgroundColor() {
|
||||
return backgroundColor;
|
||||
}
|
||||
|
||||
public List<Color> getColors() {
|
||||
return colors;
|
||||
if (isBlank(colorRaw)) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return new ColorsConverter().convert(colorRaw);
|
||||
}
|
||||
|
||||
public FontScale getFontScale() {
|
||||
return fontScale;
|
||||
public FontScalarType getFontScalarType() {
|
||||
return fontScalarType;
|
||||
}
|
||||
|
||||
public int getFontSizeMax() {
|
||||
@ -117,8 +128,12 @@ public class CliParameters {
|
||||
return fontType;
|
||||
}
|
||||
|
||||
public List<FontWeight> getFontWeight() {
|
||||
return fontWeight;
|
||||
public List<FontWeight> getFontWeights() {
|
||||
return fontWeights;
|
||||
}
|
||||
|
||||
public String getCharacterEncoding() {
|
||||
return characterEncoding;
|
||||
}
|
||||
|
||||
public int getHeight() {
|
||||
@ -137,8 +152,8 @@ public class CliParameters {
|
||||
return normalizers;
|
||||
}
|
||||
|
||||
public List<String> getOutputSources() {
|
||||
return outputSources;
|
||||
public String getOutputSource() {
|
||||
return outputSource;
|
||||
}
|
||||
|
||||
public int getPadding() {
|
||||
@ -179,16 +194,11 @@ public class CliParameters {
|
||||
POLAR,
|
||||
LAYERED
|
||||
}
|
||||
public enum FontScale {
|
||||
public enum FontScalarType {
|
||||
LINEAR,
|
||||
SQRT,
|
||||
LOG
|
||||
}
|
||||
public enum FontWeight {
|
||||
PLAIN,
|
||||
BOLD,
|
||||
ITALIC
|
||||
}
|
||||
public enum WordStart {
|
||||
CENTER,
|
||||
RANDOM
|
||||
@ -215,7 +225,7 @@ public class CliParameters {
|
||||
if (input.contains(",")) {
|
||||
return parseRGBValues(input);
|
||||
}
|
||||
return new Color(Integer.parseInt(input));
|
||||
return new Color(parseNumber(input));
|
||||
|
||||
} catch (final RuntimeException e) {
|
||||
throw new ParameterException("Failed to parse Color from input: [" + input + "]");
|
||||
@ -228,9 +238,16 @@ public class CliParameters {
|
||||
throw new ParameterException("Expected to find 3 numbers (RGB), instead found " + rgb.length + ", when parsing: [" + input + "]");
|
||||
}
|
||||
return new Color(
|
||||
Integer.parseInt(rgb[0]),
|
||||
Integer.parseInt(rgb[1]),
|
||||
Integer.parseInt(rgb[2]));
|
||||
parseNumber(rgb[0]),
|
||||
parseNumber(rgb[1]),
|
||||
parseNumber(rgb[2]));
|
||||
}
|
||||
|
||||
private static int parseNumber(final String number) {
|
||||
if (number.startsWith("0x")) {
|
||||
return Integer.parseInt(number.substring(2), 16);
|
||||
}
|
||||
return Integer.parseInt(number);
|
||||
}
|
||||
}
|
||||
public static class ColorsConverter implements IStringConverter<List<Color>> {
|
||||
@ -257,10 +274,10 @@ public class CliParameters {
|
||||
return new EnumConverter<>(CollisionMode.class).convert(input);
|
||||
}
|
||||
}
|
||||
public static class FontScaleConverter implements IStringConverter<FontScale> {
|
||||
public static class FontScaleConverter implements IStringConverter<FontScalarType> {
|
||||
@Override
|
||||
public FontScale convert(final String input) {
|
||||
return new EnumConverter<>(FontScale.class).convert(input);
|
||||
public FontScalarType convert(final String input) {
|
||||
return new EnumConverter<>(FontScalarType.class).convert(input);
|
||||
}
|
||||
}
|
||||
public static class FontWeightConverter implements IStringConverter<FontWeight> {
|
||||
|
@ -1,12 +1,39 @@
|
||||
package com.kennycason.kumo.cli;
|
||||
|
||||
import com.beust.jcommander.JCommander;
|
||||
import com.kennycason.kumo.WordCloud;
|
||||
import com.kennycason.kumo.WordFrequency;
|
||||
import com.kennycason.kumo.bg.Background;
|
||||
import com.kennycason.kumo.bg.PixelBoundryBackground;
|
||||
import com.kennycason.kumo.cli.CliParameters.FontScalarType;
|
||||
import com.kennycason.kumo.cli.CliParameters.NormalizerType;
|
||||
import com.kennycason.kumo.cli.CliParameters.WordStart;
|
||||
import com.kennycason.kumo.font.FontWeight;
|
||||
import com.kennycason.kumo.font.KumoFont;
|
||||
import com.kennycason.kumo.font.scale.FontScalar;
|
||||
import com.kennycason.kumo.font.scale.LinearFontScalar;
|
||||
import com.kennycason.kumo.font.scale.LogFontScalar;
|
||||
import com.kennycason.kumo.font.scale.SqrtFontScalar;
|
||||
import com.kennycason.kumo.nlp.FrequencyAnalyzer;
|
||||
import com.kennycason.kumo.nlp.normalize.*;
|
||||
import com.kennycason.kumo.nlp.tokenizer.ChineseWordTokenizer;
|
||||
import com.kennycason.kumo.nlp.tokenizer.EnglishWordTokenizer;
|
||||
import com.kennycason.kumo.nlp.tokenizer.WhiteSpaceWordTokenizer;
|
||||
import com.kennycason.kumo.nlp.tokenizer.WordTokenizer;
|
||||
import com.kennycason.kumo.palette.ColorPalette;
|
||||
import com.kennycason.kumo.wordstart.CenterWordStart;
|
||||
import com.kennycason.kumo.wordstart.RandomWordStart;
|
||||
import com.kennycason.kumo.wordstart.WordStartStrategy;
|
||||
|
||||
import java.awt.*;
|
||||
import java.io.*;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by kenny on 6/12/16.
|
||||
*/
|
||||
public class KumoCli {
|
||||
|
||||
private final CliParameters cliParameters = new CliParameters();
|
||||
|
||||
public static void main(final String[] args) {
|
||||
@ -15,6 +42,121 @@ public class KumoCli {
|
||||
|
||||
public void runWithArguments(final String[] args) {
|
||||
new JCommander(cliParameters).parse(args);
|
||||
switch (cliParameters.getType()) {
|
||||
case STANDARD:
|
||||
buildStandardWordCloud();
|
||||
break;
|
||||
case POLAR:
|
||||
case LAYERED:
|
||||
throw new UnsupportedOperationException("Unsupported type: " + cliParameters.getType());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void buildStandardWordCloud() {
|
||||
final WordCloud wordCloud = new WordCloud(
|
||||
new Dimension(cliParameters.getWidth(), cliParameters.getHeight()),
|
||||
cliParameters.getCollisionMode()
|
||||
);
|
||||
if (!cliParameters.getBackgrounds().isEmpty()) {
|
||||
wordCloud.setBackground(buildBackground(cliParameters.getBackgrounds().get(0)));
|
||||
}
|
||||
wordCloud.setBackgroundColor(cliParameters.getBackgroundColor());
|
||||
if (!cliParameters.getColors().isEmpty()) {
|
||||
wordCloud.setColorPalette(new ColorPalette(cliParameters.getColors()));
|
||||
}
|
||||
wordCloud.setFontScalar(buildFontScalar(cliParameters.getFontScalarType()));
|
||||
wordCloud.setPadding(cliParameters.getPadding());
|
||||
wordCloud.setWordStartScheme(buildWordStart(cliParameters.getWordStart()));
|
||||
wordCloud.setKumoFont(buildKumoFont(cliParameters.getFontWeights().get(0)));
|
||||
wordCloud.build(loadFrequencies(cliParameters.getInputSources().get(0)));
|
||||
wordCloud.writeToFile(cliParameters.getOutputSource());
|
||||
}
|
||||
|
||||
private List<WordFrequency> loadFrequencies(final String input) {
|
||||
try {
|
||||
final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
|
||||
frequencyAnalyzer.setWordFrequenciesToReturn(cliParameters.getWordCount());
|
||||
frequencyAnalyzer.setMinWordLength(cliParameters.getMinWordLength());
|
||||
frequencyAnalyzer.setStopWords(cliParameters.getStopWords());
|
||||
frequencyAnalyzer.setCharacterEncoding(cliParameters.getCharacterEncoding());
|
||||
for (final NormalizerType normalizer : cliParameters.getNormalizers()) {
|
||||
frequencyAnalyzer.setNormalizer(buildNormalizer(normalizer));
|
||||
}
|
||||
frequencyAnalyzer.setWordTokenizer(buildTokenizer());
|
||||
|
||||
return frequencyAnalyzer.load(toInputStream(input));
|
||||
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private WordTokenizer buildTokenizer() {
|
||||
switch (cliParameters.getTokenizer()) {
|
||||
case WHITE_SPACE: return new WhiteSpaceWordTokenizer();
|
||||
case ENGLISH: return new EnglishWordTokenizer();
|
||||
case CHINESE: return new ChineseWordTokenizer();
|
||||
}
|
||||
throw new IllegalStateException("Unknown tokenizer: " + cliParameters.getTokenizer());
|
||||
}
|
||||
|
||||
private Normalizer buildNormalizer(final NormalizerType normalizer) {
|
||||
switch (normalizer) {
|
||||
case LOWERCASE: return new LowerCaseNormalizer();
|
||||
case UPPERCASE: return new UpperCaseNormalizer();
|
||||
case BUBBLE: return new BubbleTextNormalizer();
|
||||
case CHARACTER_STRIPPING: return new CharacterStrippingNormalizer();
|
||||
case UPSIDE_DOWN: return new UpsideDownNormalizer();
|
||||
case TRIM: return new TrimToEmptyNormalizer();
|
||||
}
|
||||
throw new IllegalStateException("Unknown normalizer: " + normalizer);
|
||||
}
|
||||
|
||||
private KumoFont buildKumoFont(final FontWeight fontWeight) {
|
||||
return new KumoFont(cliParameters.getFontType(), fontWeight);
|
||||
}
|
||||
|
||||
private static WordStartStrategy buildWordStart(final WordStart wordStart) {
|
||||
switch (wordStart) {
|
||||
case CENTER: return new CenterWordStart();
|
||||
case RANDOM: return new RandomWordStart();
|
||||
}
|
||||
throw new IllegalStateException("Unknown word start: " + wordStart);
|
||||
}
|
||||
|
||||
|
||||
private FontScalar buildFontScalar(final FontScalarType fontScalarType) {
|
||||
switch (fontScalarType) {
|
||||
case LINEAR: return new LinearFontScalar(cliParameters.getFontSizeMin(), cliParameters.getFontSizeMax());
|
||||
case SQRT: return new SqrtFontScalar(cliParameters.getFontSizeMin(), cliParameters.getFontSizeMax());
|
||||
case LOG: return new LogFontScalar(cliParameters.getFontSizeMin(), cliParameters.getFontSizeMax());
|
||||
}
|
||||
throw new IllegalStateException("Unknown font scalar type: " + fontScalarType);
|
||||
}
|
||||
|
||||
private static Background buildBackground(final String background) {
|
||||
try {
|
||||
return new PixelBoundryBackground(toInputStream(background));
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private static InputStream toInputStream(final String path) {
|
||||
final File file = new File(path);
|
||||
if (file.exists() && !file.isDirectory()) {
|
||||
try {
|
||||
return new FileInputStream(file);
|
||||
|
||||
} catch (final FileNotFoundException e) {
|
||||
throw new RuntimeException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
try {
|
||||
return new URL(path).openStream();
|
||||
} catch (final IOException ignored) {
|
||||
}
|
||||
throw new RuntimeException("Input path [" + path + "] not a file or url.");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -18,7 +18,11 @@ public class ChineseWordTokenizer implements WordTokenizer {
|
||||
final List<String> rawTokens = tokenizer.tokenize(sentence);
|
||||
final List<String> tokens = new ArrayList<>();
|
||||
for (final String rawToken : rawTokens) { // parse parts-of-speech tags away (政府/n, 依照/p, 法律/n, 行/ng, 使/v, 执法/vn)
|
||||
tokens.add(rawToken.substring(0, rawToken.indexOf('/')));
|
||||
if (rawToken.contains("/")) {
|
||||
tokens.add(rawToken.substring(0, rawToken.indexOf('/')));
|
||||
} else {
|
||||
tokens.add(rawToken);
|
||||
}
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
123
src/test/java/com/kennycason/kumo/cli/KumoCliITest.java
Normal file
123
src/test/java/com/kennycason/kumo/cli/KumoCliITest.java
Normal file
@ -0,0 +1,123 @@
|
||||
package com.kennycason.kumo.cli;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Created by kenny on 6/12/16.
|
||||
*/
|
||||
public class KumoCliITest {
|
||||
|
||||
@Test
|
||||
public void simple() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://en.wikipedia.org/wiki/Nintendo",
|
||||
"--output", "/tmp/nintendo.png"
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void stopwords() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://en.wikipedia.org/wiki/Nintendo",
|
||||
"--output", "/tmp/nintendo_stopwords.png",
|
||||
"--stop-words", "nintendo,the"
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void wordCount() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://en.wikipedia.org/wiki/Nintendo",
|
||||
"--output", "/tmp/nintendo_wordcount_10.png",
|
||||
"--word-count", "10"
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void widthAndHeight() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://en.wikipedia.org/wiki/Nintendo",
|
||||
"--output", "/tmp/nintendo_width_and_height.png",
|
||||
"--width", "100",
|
||||
"--height", "100"
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void randomWordStart() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://en.wikipedia.org/wiki/Nintendo",
|
||||
"--output", "/tmp/nintendo_random_wordstart.png",
|
||||
"--word-start", "random"
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void font() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://en.wikipedia.org/wiki/Nintendo",
|
||||
"--output", "/tmp/nintendo_font.png",
|
||||
"--font-scalar", "sqrt",
|
||||
"--font-type", "Impact",
|
||||
"--font-weight", "plain",
|
||||
"--font-size-min", "4",
|
||||
"--font-size-max", "100"
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void normalizer() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://en.wikipedia.org/wiki/Nintendo",
|
||||
"--output", "/tmp/nintendo_normalizers.png",
|
||||
"--normalizer", "uppercase,bubble"
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void backgroundImage() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://en.wikipedia.org/wiki/Nintendo",
|
||||
"--output", "/tmp/nintendo_whale_background.png",
|
||||
"--width", "990",
|
||||
"--height", "618",
|
||||
"--background", "https://raw.githubusercontent.com/kennycason/kumo/master/src/test/resources/backgrounds/whale.png"
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void colorRgb() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://en.wikipedia.org/wiki/Nintendo",
|
||||
"--output", "/tmp/nintendo_rgb_color.png",
|
||||
"--color", "(255,0,0),(0,255,0),(0,0,255)"
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void colorRgbHex() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://en.wikipedia.org/wiki/Nintendo",
|
||||
"--output", "/tmp/nintendo_color_rgb_hex.png",
|
||||
"--color", "(0xff,0,0),(0,0xff,0),(0,0,0xff)"
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void colorHex() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://en.wikipedia.org/wiki/Nintendo",
|
||||
"--output", "/tmp/nintendo_color_hex.png",
|
||||
"--color", "(0xffffff),(0xcccccc),(0x999999),(0x666666),(0x333333)"
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void chinese() {
|
||||
KumoCli.main(new String[] {
|
||||
"--input", "https://zh.wikipedia.org/wiki/%E4%BB%BB%E5%A4%A9%E5%A0%82",
|
||||
"--output", "/tmp/nintendo_chinese.png",
|
||||
"--tokenizer", "chinese"
|
||||
});
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user