一domj 解析xml
倆jar包domjjar 和jaxenbetajar
view plainprint?
/** * java解析xml文件各個節點信息* * @author Jeelon * @param string * 解析的文件名*/ private static void getXmlInfo(String string) { SAXReader reader = new SAXReader()InputStream in = ThreadcurrentThread()getContextClassLoader()getResourceAsStream(string)try { Document doc = readerread(in)Element driverNameEls = (Element) docselectObject(/config/dbinfo/drivername)Element urlEls = (Element) docselectObject(/config/dbinfo/url)Element userNameEls = (Element) docselectObject(/config/dbinfo/username)Element passwordEls = (Element) docselectObject(/config/dbinfo/password)
String driverName = driverNameElsgetStringValue()String url = urlElsgetStringValue()String userName = userNameElsgetStringValue()String password = passwordElsgetStringValue()
Systemoutprintln(====================================)Systemoutprintln(驅動名 + driverName)Systemoutprintln(URL地址 + url)Systemoutprintln(用戶名 + userName)Systemoutprintln(密碼 + password)Systemoutprintln(====================================)} catch (DocumentException e) { eprintStackTrace()}
}
二java解析HTML需要的jar包jsoupjar
view plainprint?
/** * 提取HTML文件的文本內容* * @author Jeelon * @param html * 提取的html文件名* @return 返回提取內容String */ private static String getDocument(File html) { String text = try { // 設置編碼集orgjsoupnodesDocument doc = Jsoupparse(html UTF)// 提取標題信息Elements title = docselect(title)for (orgjsoupnodesElement link title) { text += linktext() + } // 提取table中的文本信息Elements links = docselect(table)for (orgjsoupnodesElement link links) { text += linktext() + } // 提取div中的文本信息Elements divs = docselect(div[class=post])for (orgjsoupnodesElement link divs) { text += linktext() + } } catch (IOException e) { eprintStackTrace()}
return text}
view plainprint?
Element element = nullFile f = new File(axml)DocumentBuilder db = null // documentBuilder為抽象不能直接實例化(將XML文件轉換為DOM文件)
DocumentBuilderFactory dbf = nulltry {
dbf = DocumentBuilderFactorynewInstance() // 返回documentBuilderFactory對象db = dbfnewDocumentBuilder()// 返回db對象用documentBuilderFatory對象獲得返回documentBuildr對象
Document dt = dbparse(f) // 得到一個DOM並返回給document對象element = dtgetDocumentElement()// 得到一個elment根元素
Systemoutprintln(根元素 + elementgetNodeName()) // 獲得根節點
NodeList childNodes = elementgetChildNodes() // 獲得根元素下的子節點
for (int i = i < childNodesgetLength() i++) {// 遍歷這些子節點Node node = em(i) // em(i)// 獲得每個對應位置i的結點if (Accountequals(nodegetNodeName())) { // 如果節點的名稱為Account則輸出Account元素屬性type Systemoutprintln(\r\n找到一篇賬號 所屬區域 + nodegetAttributes()getNamedItem(type)getNodeValue() + )NodeList nodeDetail = nodegetChildNodes() // 獲得<Accounts>下的節點for (int j = j < nodeDetailgetLength() j++) { // 遍歷<Accounts>下的節點Node detail = em(j) // 獲得<Accounts>元素每一個節點if (codeequals(detailgetNodeName())) // 輸出code Systemoutprintln(卡號 + detailgetTextContent())else if (passequals(detailgetNodeName())) // 輸出pass Systemoutprintln(密碼 + detailgetTextContent())else if (nameequals(detailgetNodeName())) // 輸出name Systemoutprintln(姓名 + detailgetTextContent())else if (moneyequals(detailgetNodeName())) // 輸出money Systemoutprintln(余額 + detailgetTextContent())
}
} } catch (Exception e) { Systemoutprintln(e)}
From:http://tw.wingwit.com/Article/program/Java/hx/201311/26292.html