<无详细内容>
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileReader;
- import java.util.ArrayList;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Set;
- import java.util.TreeSet;
- public class TestTxt {
-
- File file;
- String content; //保存文章内容
- String[] rawWords; //保存单个单词集合
- String[] words; //保存各个单词对应的词频
- int[] wordFreqs; //输入文章内容
-
- public static String txtToString(File file){ //读取文件
- String result = "";
- try{
- BufferedReader br = new BufferedReader(new FileReader(file)); //构造一个BufferedReader类来读取文件
- String s = null;
- while((s = br.readLine())!=null){ //使用readLine方法,一次读一行
- result = result + s + "n";
- }
- br.close();
- }catch(Exception e){
- e.printStackTrace();
- }
- return result;
- }
-
- public void splitWord(){ //对文章根据分隔符进行分词,将结果保存到rawWords数组中
- final char SPACE = ' '; //分词的时候,所有的符号全部替换为空格
- content = content.replace(''', SPACE).replace(',', SPACE).replace('.', SPACE);
- content = content.replace('(', SPACE).replace(')', SPACE).replace('-', SPACE);
- rawWords = content.split("\s+"); //凡是空格隔开的都算单词
- }
-
- public void countWordFreq(){ //统计单词个数
- Set<String> set = new TreeSet<String>(); //将所有出现的字符串放入唯一的set中
- for(String word: rawWords){
- set.add(word);
- }
- System.out.println(set);
- Iterator ite = set.iterator();
- List<String> wordsList = new ArrayList<String>(); //开辟空间函数
- List<Integer> freqList = new ArrayList<Integer>();
- while(ite.hasNext()){
- String word = (String) ite.next();
- int count = 0; //统计相同字符串的个数
- for(String str: rawWords){
- if(str.equals(word)){
- count++;
- }
- }
- wordsList.add(word);
- freqList.add(count++);
- }
- words = wordsList.toArray(new String[0]); //存入数组当中
- wordFreqs = new int[freqList.size()];
- for(int i = 0; i < freqList.size(); i++){
- wordFreqs[i] = freqList.get(i);
- }
- }
-
- public static void main(String[] args) {
- // TODO Auto-generated method stub
- TestTxt t = new TestTxt();
- t.file = new File("D:/test1.txt");
- t.content = txtToString(t.file);
- t.splitWord();
- t.countWordFreq();
- for(int i = 0;i < t.wordFreqs.length ; i++){
- System.out.println(t.words[i] + ":" + t.wordFreqs[i]);
- }
- }
- }
复制代码 |