实操演示:Java中Cosine相似度计算类的运用示例
最编程
2024-07-27 21:20:43
...
实例1: indexOf
import org.simmetrics.metrics.CosineSimilarity; //导入依赖的package包/类
public int indexOf(Object obj, float distance, int startIndex) {
int matchI = -1;
if (obj == null)
return matchI;
matchI = super.indexOf(obj,startIndex);
if (matchI != -1 ) //exact match found
return matchI;
// Now loop and find the match which distance closer to given one
// for timebeing we are using cosine distance
ArrayList<Character> alist = StringCaseFormatUtil.toArrayListChar(obj.toString());
java.util.Set<Character> aset = new java.util.HashSet<Character>(alist);
CosineSimilarity<Character> fuzzyalgo = new CosineSimilarity<Character> ();
for (int i=startIndex; i < this.size(); i++) {
Object b = this.get(i);
if (b == null )
continue;
ArrayList<Character> blist = StringCaseFormatUtil.toArrayListChar(b.toString());
java.util.Set<Character> bset = new java.util.HashSet<Character>(blist);
float dis = fuzzyalgo.compare(aset, bset);
if (dis>=distance)
return i;
}
return matchI;
}
实例2: similarityStringsCosine
import org.simmetrics.metrics.CosineSimilarity; //导入依赖的package包/类
public static float similarityStringsCosine(String stringA, String stringB) {
if(stringA.equals("")){
return -1;
}else{
StringMetric metric =
with(new CosineSimilarity<String>())
.simplify(Simplifiers.toLowerCase())
.tokenize(Tokenizers.whitespace())
.build();
return metric.compare(stringA, stringB);
}
}
实例3: processTypoCustomProperties
import org.simmetrics.metrics.CosineSimilarity; //导入依赖的package包/类
protected String processTypoCustomProperties(String liferayVersion,
Properties customPortalProperties) throws IOException {
if (customPortalProperties.isEmpty()) {
return StringPool.BLANK;
}
SortedSet<String> customKeys = new TreeSet<String>(
customPortalProperties.stringPropertyNames());
StringBuilder stringBuilder = new StringBuilder();
boolean processedContext = false;
StringMetric metric = StringMetricBuilder
.with(new CosineSimilarity<String>())
.simplify(new Case.Lower(Locale.ENGLISH))
.simplify(new WordCharacters()).tokenize(new Whitespace())
.tokenize(new QGram(2)).build();
Set<String> defaultKeys = getProperyKeys(liferayVersion);
for (String customKey : customKeys) {
float distance = 0;
String key = null;
for (String defaultKey : defaultKeys) {
float temp = metric.compare(defaultKey, customKey);
if (temp > distance) {
distance = temp;
key = defaultKey;
}
}
if (distance > 0.9) {
if (!processedContext) {
stringBuilder.append("##\n## Typo properties\n##");
stringBuilder.append("\n\n");
stringBuilder.append(" #\n");
stringBuilder
.append(" # The properties listed below looks like that has a typo in its declaration\n");
stringBuilder
.append(" # which means that they don't have any influence in how Liferay is configured.\n");
stringBuilder
.append(" # The system suggested the correct property name in the comments.\n");
stringBuilder.append(" #");
processedContext = true;
}
String value = fixLineBreak(customPortalProperties
.getProperty(customKey));
stringBuilder.append("\n");
stringBuilder.append(" #" + key + "=" + value);
stringBuilder.append("\n");
stringBuilder.append(" " + customKey + "=" + value);
customPortalProperties.remove(customKey);
}
}
if (processedContext) {
stringBuilder.append("\n");
stringBuilder.append("\n");
}
return stringBuilder.toString();
}