熱點推薦:
您现在的位置: 電腦知識網 >> 編程 >> Java編程 >> Java核心技術 >> 正文

Java獲取網絡文件並插入數據庫

2013-11-23 18:46:36  來源: Java核心技術 

  抓取各大網站的數據插入數據庫這樣就不用為沒有數據而煩惱了

  獲取百度的歌曲名歌手和鏈接!!

  package webTools;

  import javaioBufferedReader;

  import javaioIOException;

  import javaioInputStreamReader;

  import javaioUnsupportedEncodingException;

  import MalformedURLException;

  import URL;

  import javautilArrayList;

  import javautilHashMap;

  import javautilList;

  import javautilregexMatcher;

  import javautilregexPattern;

  import dbToolsDBTools;

  public class IOTOWeb {

  public String getHtmlContent(String htmlURL) {

  URL url = null;

  String rowContent = ;

  StringBuffer htmlContent = new StringBuffer();

  try {

  url = new URL(htmlURL);

  BufferedReader in = new BufferedReader(new InputStreamReader(url

  openStream() gb));

  while ((rowContent = inreadLine()) != null) {

  htmlContentappend(rowContent);

  }

  inclose();

  } catch (MalformedURLException e) {

  // TODO Autogenerated catch block

  eprintStackTrace();

  } catch (UnsupportedEncodingException e) {

  // TODO Autogenerated catch block

  eprintStackTrace();

  } catch (IOException e) {

  // TODO Autogenerated catch block

  eprintStackTrace();

  }

  return htmlContenttoString();

  }

  public List getLink(String htmlContent) {

  ArrayList listLink = new ArrayList();

  String regex = <td[^>]*>[\\(]*<a[^>]*href=(\([^\]*)\|\([^\]*)\|([^\\s>]*))[^>]*>(*?)[\\)]*[\\s]*</td>;

  Pattern pattern = pile(regex PatternDOTALL);

  Matcher matcher = patternmatcher(htmlContent);

  while (matcherfind()) {

  listLinkadd(matchergroup());

  }

  return listLink;

  }

  public List<String> getHref(String htmlContent) {

  String regex;

  List listtHref = new ArrayList();

  regex = href=(\([^\]*)\|\([^\]*)\|([^\\s>]*))\;

  Pattern pa = pile(regex PatternDOTALL);

  Matcher ma = pamatcher(htmlContent);

  while (mafind()) {

  listtHrefadd(magroup()replaceFirst(href=\ )replace(\

  ));

  }

  return listtHref;

  }

  public List<String> getPerson(String htmlContent) {

  String regex;

  List list = new ArrayList();

  regex = ]*href=(\"([^\"]*)\"|\([^\]*)\|([^\\s>]*))[^>]*>(*?)\\>\\(<a[^>]*href=(\([^\]*)\|\([^\]*)\|([^\\s>]*))[^>]*>(*?)\\);

  Pattern pa = pile(regex PatternDOTALL);

  Matcher ma = pamatcher(htmlContent);

  while (mafind()) {

  listadd(magroup()replaceFirst(href=\ )replace(\ ));

  }

  return list;

  }

  public List<String> getSongName(String htmlContent) {

  String regex;

  List listPerson = new ArrayList();

  regex = <a[^>]*href=(\([^\]*)\|\([^\]*)\|([^\\s>]*))[^>]*>(*?)</a>\\s;

  Pattern pa = pile(regex PatternDOTALL);

  Matcher ma = pamatcher(htmlContent);

  while (mafind()) {

  listPersonadd(magroup());

  }

  return listPerson;

  }

  public String getMainContent(String htmlContent) {

  String regex = <table width=\%\ align=\center\ cellpadding=\\ cellspacing=\\ class=\list\>(*?)</table>;

  StringBuffer mainContent = new StringBuffer();

  Pattern pattern = pile(regex PatternDOTALL);

  Matcher matcher = patternmatcher(htmlContent);

  while (matcherfind()) {

  mainContentappend(matchergroup());

  }

  return mainContenttoString();

  }

  public String outTag(final String s) {

  return sreplaceAll(<*?> );

  }

  DBTools dbTools = new DBTools();

  public void getFromBaiduMap(String htmlURL) throws Throwable {

  HashMap htmlContentMap = new HashMap();

  String htmlContent = getHtmlContent(htmlURL);

  String mainContent = getMainContent(htmlContent);

  List listLink = getLink(mainContent);

  for (int j = ; j < listLinksize(); j++) {

  String tdTag = listLinkget(j)toString();

  List songNameList = getSongName(tdTag);

  String songName = outTag(songNameListget()toString());

  List personList = getPerson(tdTag);

  String songPerson = ;

  if (personListsize() != ) {

  for (int n = ; n < personListsize(); n++) {

  // Systemoutprintln(personListget(n)toString());

  songPerson = outTag(personListget(n)toString());

  }

  } else {

  songPerson = ;

  }

  // Systemoutprint(songNameListget()toString());

  List hrefList = getHref(songNameListget()toString());

  String songHref = hrefListget()toString();

  Systemoutprintln();

  String sql = insert into song(songNamesongPersonsongHref) values(???);

  ArrayList list_values = new ArrayList();

  list_valuesadd(songName);

  list_valuesadd(songPerson);

  list_valuesadd(songHref);

  dbToolsupdate(sql list_values);

  }

  }

  }

  DBTools數據庫鏈接類

  package dbTools;

  import javautilArrayList;

  import javasql*;

  public class DBTools {

  private PreparedStatement preparedStatement;

  private ResultSet resultSet;

  private Connection connection;

  public DBTools() {

  try {

  ClassforName(commysqljdbcDriver);

  } catch (ClassNotFoundException e) {

  // TODO Autogenerated catch block

  eprintStackTrace();

  }

  try {

  connection = DriverManagergetConnection(

  jdbc:mysql://localhost:/TestURL root zhuyi);

  } catch (SQLException e) {

  // TODO Autogenerated catch block

  eprintStackTrace();

  }

  }

  public ArrayList query(String sql ArrayList list_values) throws Throwable {

  ArrayList listRows = new ArrayList();

  preparedStatement = connectionprepareStatement(sql);

  for (int i = ; i < list_valuessize(); i++) {

  preparedStatementsetObject(i + list_valuesget(i));

  }

  resultSet = preparedStatementexecuteQuery();

  while (resultSetnext()) {

  String[] rowinfo = new String[resultSetgetMetaData()

  getColumnCount()];

  for (int i = ; i < rowinfolength; i++) {

  rowinfo[i] = resultSetgetString(i + );

  }

  listRowsadd(rowinfo);

  }

  return listRows;

  }

  public void update(String sql ArrayList list_values) throws Throwable {

  preparedStatement = connectionprepareStatement(sql);

  for (int i = ; i < list_valuessize(); i++) {

  preparedStatementsetObject(i + list_valuesget(i));

  }

  preparedStatementexecuteUpdate();

  preparedStatementclose();

  }

  }

  Servlet調用

  代碼

  package controller;

  import javaioIOException;

  import javaioPrintWriter;

  import javautilList;

  import javaxservletServletException;

  import javaxservlethttpHttpServlet;

  import javaxservlethttpHttpServletRequest;

  import javaxservlethttpHttpServletResponse;

  import webToolsIOTOWeb;

  public class TestURL extends HttpServlet {

  /**

  * Constructor of the object

  */

  public TestURL() {

  super();

  }

  /**

  * Destruction of the servlet <br>

  */

  public void destroy() {

  superdestroy(); // Just puts destroy string in log

  // Put your code here

  }

  /**

  * The doGet method of the servlet <br>

  *

  * This method is called when a form has its tag value method equals to get

  *

  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred

  */

  public void doGet(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException {

  try {

  IOTOWeb iotoWeb = new IOTOWeb();

  iotoWebgetFromBaiduMap(?id=?top);

  } catch (Throwable e) {

  // TODO Autogenerated catch block

  eprintStackTrace();

  }

  }

  /**

  * The doPost method of the servlet <br>

  *

  * This method is called when a form has its tag value method equals to

  * post

  *

  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred

  */

  public void doPost(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException {

  responsesetContentType(text/html);

  PrintWriter out = responsegetWriter();

  out

  println(<!DOCTYPE HTML PUBLIC \//WC//DTD HTML Transitional//EN\>);

  outprintln(<HTML>);

  outprintln(  <HEAD><TITLE>A Servlet</TITLE></HEAD>);

  outprintln(  <BODY>);

  outprint(    This is );

  outprint(thisgetClass());

  outprintln( using the POST method);

  outprintln(  </BODY>);

  outprintln(</HTML>);

  outflush();

  outclose();

  }

  /**

  * Initialization of the servlet <br>

  *

  * @throws ServletException

  *             if an error occurs

  */

  public void init() throws ServletException {

  // Put your code here

  }

  }

  獲取金書網的圖書名

  代碼

  package webTools;

  import javaioBufferedReader;

  import javaioInputStreamReader;

  import URL;

  import javautilArrayList;

  import javautilList;

  import javautilregexMatcher;

  import javautilregexPattern;

  import dbToolsDBTools;

  public class GetBook {

  public String getHtmlContent(String htmlURL) throws Throwable {

  URL url = null;

  String rowContent = ;

  StringBuffer htmlContent = new StringBuffer();

  url = new URL(htmlURL);

  BufferedReader in = new BufferedReader(new InputStreamReader(url

  openStream() gb));

  while ((rowContent = inreadLine()) != null) {

  htmlContentappend(rowContent);

  }

  inclose();

  return htmlContenttoString();

  }

  public String getBookName(String htmlContent) {

  String bookName = ;

  String regex = <span class=\style\>[^>]*</span>;

  Pattern pattern = pile(regex PatternDOTALL);

  Matcher matcher = patternmatcher(htmlContent);

  if (matcherfind()) {

  bookName = matchergroup();

  }

  return bookName;

  }

  public String outTag(final String s) {

  return sreplaceAll(<*?> );

  }

  DBTools dbtools = new DBTools();

  public void getFromJINSHU(String htmlURL) throws Throwable {

  String htmlContent = getHtmlContent(htmlURL);

  String bookName = outTag(getBookName(htmlContent));

  if (bookName != null && !equals(bookName)) {

  Systemoutprintln(bookName);

  String sql = insert into bookinfo(bookName) values(?);

  ArrayList list_values = new ArrayList();

  list_valuesadd(bookName);

  dbtoolsupdate(sql list_values);

  }

  }

  }

  調用Servlet

  代碼

  package controller;

  import javaioIOException;

  import javaioPrintWriter;

  import javaxservletServletException;

  import javaxservlethttpHttpServlet;

  import javaxservlethttpHttpServletRequest;

  import javaxservlethttpHttpServletResponse;

  import webToolsGetBook;

  public class TestBook extends HttpServlet {

  /**

  * Constructor of the object

  */

  public TestBook() {

  super();

  }

  /**

  * Destruction of the servlet <br>

  */

  public void destroy() {

  superdestroy(); // Just puts destroy string in log

  // Put your code here

  }

  /**

  * The doGet method of the servlet <br>

  *

  * This method is called when a form has its tag value method equals to get

  *

  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred

  */

  int i = ;

  public void doGet(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException {

  GetBook bookinfo = new GetBook();

  for (; i < ; i++) {

  String bookURL = /booksinfo// + i

  + l;

  try {

  bookinfogetFromJINSHU(bookURL);

  } catch (Throwable e) {

  i++;

  doPost(request response);

  }

  }

  }

  /**

  * The doPost method of the servlet <br>

  *

  * This method is called when a form has its tag value method equals to

  * post

  *

  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred

  */

  public void doPost(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException {

  GetBook bookinfo = new GetBook();

  for (; i < ; i++) {

  String bookURL = /booksinfo// + i

  + l;

  try {

  bookinfogetFromJINSHU(bookURL);

  } catch (Throwable e) {

  i++;

  doGet(request response);

  }

  }

  }

  /**

  * Initialization of the servlet <br>

  *

  * @throws ServletException

  *             if an error occurs

  */

  public void init() throws ServletException {

  // Put your code here

  }

  }

  每種功能的實現方法有很多希望各位可以交流不同的思想和方法


From:http://tw.wingwit.com/Article/program/Java/hx/201311/25707.html
  • 上一篇文章:

  • 下一篇文章:
  • 推薦文章
    Copyright © 2005-2013 電腦知識網 Computer Knowledge   All rights reserved.