֮ǰÔÚ±¾µØÍøÂçÖн¨ÁËÒ»¸öCentOSÈí¼þÔ´£¬Í¦ºÃÓõģ¬¿ÉÊÇ·¢ÏÖÓÐЩÈí¼þ»¹ÊÇ×°²»ÉÏ£¬ÒòΪϵͳ°²×°ÅÌÖеİü²¢²»È«£¬ÓÐЩÈí¼þµÄÒÀÀµÔÚupdatesÔ´ÖУ¬updatesÔ´ÔÚÍøÉÏ£¬Ôõô°ÑÆäÖеİüÄõ½ÄØ£¿Ò»ÖÖ·½·¨£¬µ±È»Ò²ÊÇ×î×îÕý³£µÄ·½·¨£¬¾ÍÊÇ×ö¸ö¼Æ»®ÈÎÎñ£¬¶¨Ê±´ÓCentOS¹Ù·½¾µÏñÕ¾ÖÐͬ²½¡£
¿ÉÊÇÎÒÃÇÒ²¾ÍÊÇÔÚ±¾µØÓÃÓ㬲»ÐèҪͬ²½£¬ÓÚÊÇдÁ˸öJava³ÌÐò£¬ÕÒÁ˸öËٶȱȽϿìµÄ¾µÏñ£¨¶«±±´óѧµÄ£¬Ð´³ÌÐòµÄʱºò±±Àí¹¤µÄ¾µÏñÕ¾´ò²»¿ª£©£¬Ö±½ÓºÜ±©Á¦µØ°ÑÉÏÃæµÄupdatesĿ¼Ïµİü£¨http://mirror.neu.edu.cn/centos/6.4/updates/x86_64/Packages/£©È«¶¼×¥ÏÂÀ´ÁË£¬È»ºócreaterepoһϾͿÉÒÔÓÃÁË£¬×¥Èí¼þ°üµÄ´úÂëÈçÏ£¬ÆäÖÐHttpFactoyÊÇ×Ô¶¨ÒåµÄÒ»¸ö·â×°Apache HttpClient½Ó¿ÚµÄ¹¤¾ßÀࣺ[code]package cn.edu.ruc.extract;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.visitors.HtmlPage;
public class Main
{
public static void main(String[] args)
{
try
{
//ҪץȡµÄĿ¼
String baseUrl = "http://mirror.neu.edu.cn/centos/6.4/updates/x86_64/Packages/";
String html = HttpFactory.getInstance().getPageContent(baseUrl);
//´æ·ÅÈí¼þ°üµÄ±¾µØÄ¿Â¼
String rootDir = "/home/Hadoop/updates/";
Parser parser = Parser.createParser(html, "utf-8");
HtmlPage page = new HtmlPage(parser);
parser.visitAllNodesWith(page);
NodeFilter filter = new TagNameFilter("TABLE");
NodeList nodes = page.getBody().extractAllNodesThatMatch(filter,
true);
int num = 0;
for (int i = 0; i < nodes.size(); ++i)
{
TableTag tableTag = (TableTag) nodes.elementAt(i);
TableRow[] rows = tableTag.getRows();
for (TableRow row : rows)
{
TableColumn[] columns = row.getColumns();
if (columns != null && columns.length > 0)
{
if (num > 0)
{
String fileName = columns[0].toPlainTextString().trim();
String url = baseUrl + fileName;
System.out.println(url);
File rpmFile = new File(rootDir + fileName);
FileOutputStream outputStream = new FileOutputStream(rpmFile);
InputStream inputStream = HttpFactory.getInstance().getResponseEntity(url).getContent();
byte b[] = new byte[1024*1024];
int j = 0;
while ((j = inputStream.read(b)) != -1)
{
outputStream.write(b, 0, j);
}
outputStream.flush();
outputStream.close();
inputStream.close();
}
num++;
}
}
}
System.out.println(num-1 + " packages downloaded.");
}
catch (Exception e)
{
e.printStackTrace();
}
}
}[/code]»»±ðµÄ¾µÏñÕ¾¿ÉÄÜÐèÒª±ä±ähtml½âÎöµÄ´úÂë¡£
×÷Õߣºbhq2010