java程序:统计单词词频,

Python010

java程序:统计单词词频,,第1张

不多说,先看代码

import java.util.*

import java.io.*

public class wordsRate {

public static void main(String[] args) throws Exception {

BufferedReader infile = new BufferedReader(new FileReader("article.txt"))

String string

String file = null

while ((string = infile.readLine()) != null) {

file += string

}

file = file.toLowerCase()

file = file.replaceAll("[^A-Za-z]", " ")

file = file.replaceAll("\\s+", " ")

String words[]

words = file.split("\\s+")

Map<String, Integer>hashMap = new HashMap<String, Integer>()

for (int i = 0i <words.lengthi++) {

String key = words[i]

if (hashMap.get(key) != null) {

int value = ((Integer) hashMap.get(key)).intValue()

value++

hashMap.put(key, new Integer(value))

} else {

hashMap.put(key, new Integer(1))

}

}

Map<String, Object>treeMap = new TreeMap<String, Object>(hashMap)

Map<String, Object>treeMap1 = new TreeMap<String, Object>(hashMap)

BufferedWriter bw = new BufferedWriter(new FileWriter("result.txt"))

//下面是我改动的你的代码:

Iterator iter = treeMap.entrySet().iterator()

//定义两个新的数组ss1和ss2,数组长度就是hashMap的长度,里面放分别是hashMap的value和key

String ss1[]=new String[treeMap.size()]

int ss2[]=new int[treeMap.size()]

int i=0

while (iter.hasNext()) {

Map.Entry entry = (Map.Entry) iter.next()

int val = (Integer)entry.getValue()

String key =(String) entry.getKey()

ss1[i]=key

ss2[i]=val

i++

}

//下面将ss1数组进行排序,并将其与ss2数组的内容相对应起来

int sValue=0

String sKey=""

for(int j=0j<ss2.lengthj++){

for(int k=0k<ik++){

if(ss2[j]>ss2[k]){

sValue=ss2[j]

sKey=ss1[j]

ss2[j]=ss2[k]

ss1[j]=ss1[k]

ss2[k]=sValue

ss1[k]=sKey

}

}

}

for(int j=0j<ss2.lengthj++){

System.out.println(ss1[j]+"="+ss2[j])

bw.write(ss1[j]+"="+ss2[j])

bw.newLine()

bw.flush()

}

}

}

代码是本人自己写的,也经过了自己的验证,肯定没问题,希望采纳。

功能实现了,我是将其key和value值放在了数组之中,然后进行排序,将其输出到了txt文件里

排序方式不一样,实现的方式也不一样,所谓仁者见仁智者见智。

String result = sb.toString()

String[] Str = result.split("[^A-Za-z0-9]")//quanbu

for(String string:Str){

singleSet.add(string)

if("".equals(string)){//这里是我加的,去除空格次数的处理

singleSet.remove("")

}

}

Map<String, Integer>map=new HashMap<String, Integer>()

for (String childString : singleSet){

int count=0

for(String fatherString : Str){

if(fatherString.equals(childString)){

count++

}

}

map.put(childString, count) //存储在hashmap中

}

ArrayList<Entry<String,Integer>>l = new ArrayList<Entry<String,Integer>>(map.entrySet())

Collections.sort(l, new Comparator<Object>(){

public int compare(Object e1, Object e2){

int v1 = Integer.parseInt(((Entry<String,Integer>)e1).getValue().toString())

int v2 = Integer.parseInt(((Entry)e2).getValue().toString())

return v2-v1 //改为v1-v2就是从小到大了

}

})

for (Entry<String, Integer>e: l){

System.out.println(e.getKey()+" "+e.getValue())

}

代码仅供参考!希望对你有用