欢迎您访问 最编程 本站为您分享编程语言代码,编程技术文章!
您现在的位置是: 首页

实操演示:Java中Cosine相似度计算类的运用示例

最编程 2024-07-27 21:20:43
...

实例1: indexOf

import org.simmetrics.metrics.CosineSimilarity; //导入依赖的package包/类
public int indexOf(Object obj, float distance, int startIndex) {
	int matchI = -1;
	if (obj == null)
		return matchI;
	matchI = super.indexOf(obj,startIndex);
	if (matchI != -1 ) //exact match found
		return matchI;
	
	// Now loop and find the match which distance closer to given one
	// for timebeing we are using cosine distance
	ArrayList<Character> alist = StringCaseFormatUtil.toArrayListChar(obj.toString());
	java.util.Set<Character> aset = new java.util.HashSet<Character>(alist);
	CosineSimilarity<Character> fuzzyalgo = new CosineSimilarity<Character> ();
	for (int i=startIndex; i < this.size(); i++) {
		Object b = this.get(i);
		if (b == null )
				continue;
		ArrayList<Character> blist = StringCaseFormatUtil.toArrayListChar(b.toString());
		java.util.Set<Character> bset = new java.util.HashSet<Character>(blist);
		float dis = fuzzyalgo.compare(aset, bset);
		if (dis>=distance)
			return i;
	}
	return matchI;
}
 

实例2: similarityStringsCosine

import org.simmetrics.metrics.CosineSimilarity; //导入依赖的package包/类
public static float similarityStringsCosine(String stringA, String stringB) {

		if(stringA.equals("")){
			return -1;
		}else{
			StringMetric metric = 
					with(new CosineSimilarity<String>())
					.simplify(Simplifiers.toLowerCase())
					.tokenize(Tokenizers.whitespace())
					.build();

			return metric.compare(stringA, stringB);	
		}
	}
 

实例3: processTypoCustomProperties

import org.simmetrics.metrics.CosineSimilarity; //导入依赖的package包/类
protected String processTypoCustomProperties(String liferayVersion,
		Properties customPortalProperties) throws IOException {
	if (customPortalProperties.isEmpty()) {
		return StringPool.BLANK;
	}

	SortedSet<String> customKeys = new TreeSet<String>(
			customPortalProperties.stringPropertyNames());
	StringBuilder stringBuilder = new StringBuilder();

	boolean processedContext = false;
	StringMetric metric = StringMetricBuilder
			.with(new CosineSimilarity<String>())
			.simplify(new Case.Lower(Locale.ENGLISH))
			.simplify(new WordCharacters()).tokenize(new Whitespace())
			.tokenize(new QGram(2)).build();
	Set<String> defaultKeys = getProperyKeys(liferayVersion);

	for (String customKey : customKeys) {
		float distance = 0;
		String key = null;

		for (String defaultKey : defaultKeys) {
			float temp = metric.compare(defaultKey, customKey);

			if (temp > distance) {
				distance = temp;
				key = defaultKey;
			}
		}

		if (distance > 0.9) {
			if (!processedContext) {
				stringBuilder.append("##\n## Typo properties\n##");
				stringBuilder.append("\n\n");
				stringBuilder.append("    #\n");
				stringBuilder
						.append("    # The properties listed below looks like that has a typo in its declaration\n");
				stringBuilder
						.append("    # which means that they don't have any influence in how Liferay is configured.\n");
				stringBuilder
						.append("    # The system suggested the correct property name in the comments.\n");
				stringBuilder.append("    #");

				processedContext = true;
			}
			String value = fixLineBreak(customPortalProperties
					.getProperty(customKey));

			stringBuilder.append("\n");
			stringBuilder.append("    #" + key + "=" + value);
			stringBuilder.append("\n");
			stringBuilder.append("    " + customKey + "=" + value);
			customPortalProperties.remove(customKey);
		}
	}

	if (processedContext) {
		stringBuilder.append("\n");
		stringBuilder.append("\n");
	}

	return stringBuilder.toString();
}