抓取各大網站的數據插入數據庫
獲取百度的歌曲名
package webTools;
import java
import java
import java
import java
import
import
import java
import java
import java
import java
import java
import dbTools
public class IOTOWeb {
public String getHtmlContent(String htmlURL) {
URL url = null;
String rowContent =
StringBuffer htmlContent = new StringBuffer();
try {
url = new URL(htmlURL);
BufferedReader in = new BufferedReader(new InputStreamReader(url
while ((rowContent = in
htmlContent
}
in
} catch (MalformedURLException e) {
// TODO Auto
e
} catch (UnsupportedEncodingException e) {
// TODO Auto
e
} catch (IOException e) {
// TODO Auto
e
}
return htmlContent
}
public List getLink(String htmlContent) {
ArrayList listLink = new ArrayList();
String regex =
Pattern pattern = pile(regex
Matcher matcher = pattern
while (matcher
listLink
}
return listLink;
}
public List<String> getHref(String htmlContent) {
String regex;
List listtHref = new ArrayList();
regex =
Pattern pa = pile(regex
Matcher ma = pa
while (ma
listtHref
}
return listtHref;
}
public List<String> getPerson(String htmlContent) {
String regex;
List list = new ArrayList();
regex =
Pattern pa = pile(regex
Matcher ma = pa
while (ma
list
}
return list;
}
public List<String> getSongName(String htmlContent) {
String regex;
List listPerson = new ArrayList();
regex =
Pattern pa = pile(regex
Matcher ma = pa
while (ma
listPerson
}
return listPerson;
}
public String getMainContent(String htmlContent) {
String regex =
StringBuffer mainContent = new StringBuffer();
Pattern pattern = pile(regex
Matcher matcher = pattern
while (matcher
mainContent
}
return mainContent
}
public String outTag(final String s) {
return s
}
DBTools dbTools = new DBTools();
public void getFromBaiduMap
HashMap htmlContentMap = new HashMap();
String htmlContent = getHtmlContent(htmlURL);
String mainContent = getMainContent(htmlContent);
List listLink = getLink(mainContent);
for (int j =
String tdTag = listLink
List songNameList = getSongName(tdTag);
String songName = outTag(songNameList
List personList = getPerson(tdTag);
String songPerson =
if (personList
for (int n =
// System
songPerson = outTag(personList
}
} else {
songPerson =
}
// System
List hrefList = getHref(songNameList
String songHref = hrefList
System
String sql =
ArrayList list_values = new ArrayList();
list_values
list_values
list_values
dbTools
}
}
}
DBTools數據庫鏈接類
package dbTools;
import java
import java
public class DBTools {
private PreparedStatement preparedStatement;
private ResultSet resultSet;
private Connection connection;
public DBTools() {
try {
Class
} catch (ClassNotFoundException e) {
// TODO Auto
e
}
try {
connection = DriverManager
} catch (SQLException e) {
// TODO Auto
e
}
}
public ArrayList query(String sql
ArrayList listRows = new ArrayList();
preparedStatement = connection
for (int i =
preparedStatement
}
resultSet = preparedStatement
while (resultSet
String[] rowinfo = new String[resultSet
for (int i =
rowinfo[i] = resultSet
}
listRows
}
return listRows;
}
public void update(String sql
preparedStatement = connection
for (int i =
preparedStatement
}
preparedStatement
preparedStatement
}
}
Servlet調用
代碼
package controller;
import java
import java
import java
import javax
import javax
import javax
import javax
import webTools
public class TestURL extends HttpServlet {
/**
* Constructor of the object
*/
public TestURL() {
super();
}
/**
* Destruction of the servlet
*/
public void destroy() {
super
// Put your code here
}
/**
* The doGet method of the servlet
*
* This method is called when a form has its tag value method equals to get
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
public void doGet(HttpServletRequest request
throws ServletException
try {
IOTOWeb iotoWeb = new IOTOWeb();
iotoWeb
} catch (Throwable e) {
// TODO Auto
e
}
}
/**
* The doPost method of the servlet
*
* This method is called when a form has its tag value method equals to
* post
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
public void doPost(HttpServletRequest request
throws ServletException
response
PrintWriter out = response
out
out
out
out
out
out
out
out
out
out
out
}
/**
* Initialization of the servlet
*
* @throws ServletException
* if an error occurs
*/
public void init() throws ServletException {
// Put your code here
}
}
獲取金書網的圖書名
代碼
package webTools;
import java
import java
import
import java
import java
import java
import java
import dbTools
public class GetBook {
public String getHtmlContent(String htmlURL) throws Throwable {
URL url = null;
String rowContent =
StringBuffer htmlContent = new StringBuffer();
url = new URL(htmlURL);
BufferedReader in = new BufferedReader(new InputStreamReader(url
while ((rowContent = in
htmlContent
}
in
return htmlContent
}
public String getBookName(String htmlContent) {
String bookName =
String regex =
Pattern pattern = pile(regex
Matcher matcher = pattern
if (matcher
bookName = matcher
}
return bookName;
}
public String outTag(final String s) {
return s
}
DBTools dbtools = new DBTools();
public void getFromJINSHU(String htmlURL) throws Throwable {
String htmlContent = getHtmlContent(htmlURL);
String bookName = outTag(getBookName(htmlContent));
if (bookName != null && !
System
String sql =
ArrayList list_values = new ArrayList();
list_values
dbtools
}
}
}
調用Servlet
代碼
package controller;
import java
import java
import javax
import javax
import javax
import javax
import webTools
public class TestBook extends HttpServlet {
/**
* Constructor of the object
*/
public TestBook() {
super();
}
/**
* Destruction of the servlet
*/
public void destroy() {
super
// Put your code here
}
/**
* The doGet method of the servlet
*
* This method is called when a form has its tag value method equals to get
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
int i =
public void doGet(HttpServletRequest request
throws ServletException
GetBook bookinfo = new GetBook();
for (; i <
String bookURL =
+ l
try {
bookinfo
} catch (Throwable e) {
i++;
doPost(request
}
}
}
/**
* The doPost method of the servlet
*
* This method is called when a form has its tag value method equals to
* post
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
public void doPost(HttpServletRequest request
throws ServletException
GetBook bookinfo = new GetBook();
for (; i <
String bookURL =
+ l
try {
bookinfo
} catch (Throwable e) {
i++;
doGet(request
}
}
}
/**
* Initialization of the servlet
*
* @throws ServletException
* if an error occurs
*/
public void init() throws ServletException {
// Put your code here
}
}
每種功能的實現方法有很多
From:http://tw.wingwit.com/Article/program/Java/hx/201311/25707.html