- import java.io.FileOutputStream;
- import java.io.InputStream;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
-
- import org.apache.commons.io.IOUtils;
-
- import com.gargoylesoftware.htmlunit.Page;
- import com.gargoylesoftware.htmlunit.WebClient;
-
- public class DownloadFile {
- public static void main(String[] args) throws Exception {
- String baseUrl = "<a href="http://hanyu.iciba.com/hanzi/1.shtml";" target="_blank">http://hanyu.iciba.com/hanzi/1.shtml";</a>
- String bihuaRegex = "class="guanggao"[^<]*<[^<]*<param\s*name="movie"\s*value="([^"]*)";
- String aSoundRegex = "class="js12">ā.*?name="FlashVars"\s*value="f=([^"]*)";
- String eSoundRegex = "class="js12">ē.*?name="FlashVars"\s*value="f=([^"]*)";
- WebClient client = new WebClient();
- client.getOptions().setCssEnabled(false);
- client.getOptions().setJavaScriptEnabled(false);
- client.getOptions().setThrowExceptionOnFailingStatusCode(false);
- client.getOptions().setThrowExceptionOnScriptError(false);
- Page page = client.getPage(baseUrl);
- String source = page.getWebResponse().getContentAsString();
- Matcher mBihuan = Regex(source, bihuaRegex);
- Matcher mA = Regex(source, aSoundRegex);
- Matcher mE = Regex(source, eSoundRegex);
- while(mBihuan.find()) {
- String url = "<a href="http://hanyu.iciba.com/" + mBihuan.group" target="_blank">http://hanyu.iciba.com/" + mBihuan.group</a>(1);
- page = client.getPage(url);
- saveFile(page, "d:/testDownload/bihua.swf");
- }
- while(mA.find()) {
- String url = mA.group(1);
- page = client.getPage(url);
- saveFile(page, "d:/testDownload/a.mp3");
- }
- while(mE.find()) {
- String url = mE.group(1);
- page = client.getPage(url);
- saveFile(page, "d:/testDownload/e.mp3");
- }
- }
-
- public static Matcher Regex(String source, String regex) {
- Pattern p = Pattern.compile(regex, Pattern.DOTALL);
- return p.matcher(source);
- }
-
- public static void saveFile(Page page, String file) throws Exception {
- InputStream is = page.getWebResponse().getContentAsStream();
- FileOutputStream output = new FileOutputStream(file);
- IOUtils.copy(is, output);
- output.close();
- }
- }
复制代码
|