唉,压缩包居然不可以上传,一个个传上来
- package MeiNvTuPian;
- public class DataProcessor {
-
- private static DataProcessor inst = new DataProcessor();
- public PrintFile printFile;
- public static DataProcessor inst(){return inst;}
- /**
- *
- * @param data
- * @param blogId
- */
- public void getBolgMsg(String data) {
- String str = data.substring(data.indexOf("title="原创博客"")+"title="原创博客"".length());
- str = str.substring(0, str.lastIndexOf("pages sm-hide")+"pages sm-hide".length());
- String[] blogs = str.split("title="原创博客"");
- for(String s : blogs){
- BlogModel bm = new BlogModel();
- BlogHtml bh = new BlogHtml();
- try {
- s = s.substring(s.indexOf("href="")+"href="".length());
- bm.blogUrl = s.substring(0, s.indexOf("">"));
-
- s = s.substring(s.indexOf("">")+"">".length());
- bm.title = s.substring(0, s.indexOf("</a>")).replaceAll(" ", "");
-
- s = s.substring(s.indexOf("time">")+"time">".length());
- bm.time = s.substring(0,s.indexOf("发布")).replaceAll(" ", "");
-
- this.getMoreData(bm);
- } catch (Exception e) {
- e.printStackTrace();
- }
- bh.p1 = "<p><a href=""+bm.blogUrl+"">("+bm.time+")"+bm.title+"</a></p>";
- if(bm.music == null || bm.music.equals("")){
- bh.p2 = "<p>本次动弹听歌,戳<a href="">(不用点了,这期没拿到)</a></p>";
- }else{
- bh.p2 = "<p>本次动弹听歌,戳<a href=""+bm.music+"">(这里)</a></p>";
- }
- bh.p3 = "<p>小树医生心理生理医务室↓↓↓</p>";
- if(bm.mmPhoto == null || bm.mmPhoto.equals("")){
- bh.p4 = "<p>本期弃疗</p>";
- }else{
- bh.p4 = "<p><img alt="福利" src=""+bm.mmPhoto+"" /></p>";
- }
- printFile.onData(bh);
- }
- }
-
- private void getMoreData(BlogModel bm) {
- String data = DataFetcher.inst().getData(bm.blogUrl);
- if(data.equals("")){
- System.out.println("动弹不存在!");
- return;
- }
- if(data.indexOf("手机党少年们想听歌,请使劲儿戳")>0){
- data = data.substring(data.indexOf("请使劲儿戳(<a href="")+"请使劲儿戳(<a href="".length());
- bm.music = data.substring(0,data.indexOf("""));
- }
- if(data.indexOf("小树医生心理生理医务室")>0){
- data = data.substring(data.indexOf("小树医生心理生理医务室")+"小树医生心理生理医务室".length());
- data = data.substring(data.indexOf("src="")+"src="".length());
- bm.mmPhoto = data.substring(0,data.indexOf("""));
- }
- }
- }
复制代码
- package MeiNvTuPian;
- import java.io.BufferedReader;
- import java.io.InputStreamReader;
- import java.net.URL;
- import java.net.URLConnection;
- public class DataFetcher {
- private static DataFetcher inst = new DataFetcher();
-
- public static DataFetcher inst() { return inst; }
- //模拟浏览器发出请求,并返回网页内容字符串
- public String getData(String urlStr)
- {
- String result = "";
- BufferedReader in = null;
- try {
- URL url = new URL(urlStr);
- System.out.println(urlStr);
- URLConnection connection = url.openConnection();
- connection.setRequestProperty("accept", "*/*");
- connection.setRequestProperty("connection", "Keep-Alive");
- connection.setRequestProperty("user-agent",
- "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11");
- connection.connect();
- in = new BufferedReader(new InputStreamReader(
- connection.getInputStream()));
- String line;
- while ((line = in.readLine()) != null) {
- result += line;
- }
- } catch (java.io.IOException e1)
- {
- e1.printStackTrace();
- }
-
- catch (Exception e) {
- System.out.println("get" + e);
- e.printStackTrace();
- }
- finally {
- try {
- if (in != null) {
- in.close();
- }
- } catch (Exception e2) {
- e2.printStackTrace();
- }
- }
- return result;
- }
-
- }
复制代码
- package MeiNvTuPian;
- public class Main {
- public static void main(String[] args) throws InterruptedException {
- int pageNum = 16;
- PrintFile p = new PrintFile();
- DataProcessor.inst().printFile = p;
- //p.init("blogMsg");
- p.init1("blogHtml");
- getBolgMsg(pageNum);
- Thread.sleep(5000);
- p.finish();
- }
- //处理数据
- private static void getBolgMsg(int pageNum) throws InterruptedException {
- for (int i = 1; i <= pageNum; i++)
- {
- System.out.println("page" + i);
- String data = DataFetcher.inst().getData("http://my.oschina.net/xxiaobian/blog?sort=time&p="+i);
- if(data.equals("")){
- break;
- }
- DataProcessor.inst().getBolgMsg(data);
- Thread.sleep(3000);//每获取一页数据线程睡眠3秒
- }
- }
- }
复制代码
- package MeiNvTuPian;
- import java.io.File;
- import java.io.FileWriter;
- import java.io.IOException;
- public class PrintFile {
- private FileWriter writer = null;
- //写入数据
- /* public void onData(BlogModel bm){
- try {
- writer.write(bm.dump()+ "n");
- writer.flush();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }*/
- public void onData(BlogHtml bh){
- try {
- writer.write(bh.dump()+ "n");
- writer.flush();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- //数据行头部写入
- public void init(String filename)
- {
- try {
- File f = new File("F:/"+filename);
- if (!f.exists())
- {
- f.mkdir();
- }
- writer = new FileWriter("F:/"+filename+"/"+filename+".txt");
- String title = "动弹url 标题 时间 歌 图片urln";
- writer.write(title);
- writer.flush();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- //数据行头部写入
- public void init1(String filename)
- {
- try {
- File f = new File("F:/"+filename);
- if (!f.exists())
- {
- f.mkdir();
- }
- writer = new FileWriter("F:/"+filename+"/"+filename+".txt");
- String title = "p1 p2 p3 p4n";
- writer.write(title);
- writer.flush();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- //结束
- public void finish()
- {
- System.out.println("done.");
- try {
- writer.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
复制代码
- package MeiNvTuPian;
- public class BlogHtml {
- String p1;
- String p2;
- String p3;
- String p4;
- public String dump(){
- String seperator = " ";
- String ret = p1 + seperator
- + p2 + seperator
- + p3 + seperator
- + p4;
- return ret;
- }
- }
复制代码
- package MeiNvTuPian;
- /**
- *
- * @author Administrator
- * 对象类
- */
- public class BlogModel {
- String blogUrl;
- String title;
- String time;
- String music;
- String mmPhoto;
-
- public String dump(){
- String seperator = " ";
- String ret = blogUrl + seperator
- + title + seperator
- + time + seperator
- + music + seperator
- + mmPhoto;
- return ret;
- }
- }
复制代码 |