package com
import java
import java
import java
import java
import java
import java
import java
import java
import java
import java
import java
import java
import java
import java
import
import
import java
import lparser
import lparser
import lparser
import lparser
import lparser
import lparser
import lparser
import lparser
import toptrack
import javax
import javax
import javax
import javax
import javax
import javax
import javax
import javax
import javax
import javax
import javax
import javax
/**
* mht文件解析類
* @author dl
*/
public class Html
private URL strWeb = null; /**網頁地址*/
private String strText = null; /**網頁文本內容*/
private String strFileName = null; /**本地文件名*/
private String strEncoding = null; /**網頁編碼*/
//mht格式附加信息
private String from =
private String to;
private String subject =
private String cc;
private String bcc;
private String smtp =
public static void main(String[] args) {
String strUrl =
String strEncoding =
String strText = JQuery
if (strText == null)
return;
Html
pile();
//Html
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
public Html
// TODO Auto
try {
strWeb = new URL(strUrl);
} catch (MalformedURLException e) {
// TODO Auto
e
return;
}
this
this
this
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
public boolean compile() {
if (strWeb == null || strText == null || strFileName == null || strEncoding == null)
return false;
HashMap urlMap = new HashMap();
NodeList nodes = new NodeList();
try {
Parser parser = createParser(strText);
parser
nodes = parser
} catch (ParserException e) {
// TODO Auto
e
}
extractAllScriptNodes(nodes);
ArrayList urlScriptList = extractAllScriptNodes(nodes
ArrayList urlImageList = extractAllImageNodes(nodes
for (Iterator iter = urlMap
Map
String key = (String)entry
String val = (String)entry
strText = JHtmlClear
}
try {
createMhtArchive(strText
} catch (Exception e) {
// TODO Auto
e
return false;
}
return true;
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
private Parser createParser(String inputHTML) {
// TODO Auto
Lexer mLexer = new Lexer(new Page(inputHTML));
return new Parser(mLexer
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
private void extractAllScriptNodes(NodeList nodes) {
NodeList filtered = nodes
if (filtered != null && filtered
Tag tag = (Tag) filtered
String href = tag
if (href != null && href
try {
strWeb = new URL(href);
} catch (MalformedURLException e) {
// TODO Auto
e
}
}
}
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
private ArrayList extractAllScriptNodes(NodeList nodes
ArrayList urlList = new ArrayList();
NodeList filtered = nodes
for (int i =
Tag tag = (Tag) filtered
String src = tag
// Handle external css file
if (src != null && src
String innerURL = src;
String absoluteURL = makeAbsoluteURL(strWeb
if (absoluteURL != null && !ntainsKey(absoluteURL)) {
urlMap
ArrayList urlInfo = new ArrayList();
urlInfo
urlInfo
urlList
}
tag
}
}
filtered = nodes
for (int i =
Tag tag = (Tag) filtered
String type = (tag
String rel = (tag
String href = tag
boolean isCssFile = false;
if (rel != null) {
isCssFile = rel
} else if (type != null) {
isCssFile |= type
}
// Handle external css file
if (isCssFile && href != null && href
String innerURL = href;
String absoluteURL = makeAbsoluteURL(strWeb
if (absoluteURL != null && !ntainsKey(absoluteURL)) {
urlMap
ArrayList urlInfo = new ArrayList();
urlInfo
urlInfo
urlList
}
tag
}
}
return urlList;
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
private ArrayList extractAllImageNodes(NodeList nodes
ArrayList urlList = new ArrayList();
NodeList filtered = nodes
for (int i =
Tag tag = (Tag) filtered
String src = tag
// Handle external css file
if (src != null && src
String innerURL = src;
String absoluteURL = makeAbsoluteURL(strWeb
if (absoluteURL != null && !ntainsKey(absoluteURL)) {
urlMap
ArrayList urlInfo = new ArrayList();
urlInfo
urlInfo
urlList
}
tag
}
}
return urlList;
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
public static String makeAbsoluteURL(URL strWeb
// TODO Auto
//去除後綴
int pos = innerURL
if (pos !=
innerURL = innerURL
}
if (innerURL != null
&& innerURL
System
return innerURL;
}
URL linkUri = null;
try {
linkUri = new URL(strWeb
} catch (MalformedURLException e) {
//TODO Auto
e
return null;
}
String absURL = linkUri
absURL = JHtmlClear
absURL = JHtmlClear
System
return absURL;
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
private void createMhtArchive(String content
//Instantiate a Multipart object
MimeMultipart mp = new MimeMultipart(
Properties props = new Properties();
props
Session session = Session
MimeMessage msg = new MimeMessage(session);
// set mailer
msg
// set from
if (from != null) {
msg
}
// set subject
if (subject != null) {
msg
}
// to
if (to != null) {
InternetAddress[] toAddresses = getInetAddresses(to);
msg
}
// cc
if (cc != null) {
InternetAddress[] ccAddresses = getInetAddresses(cc);
msg
}
// bcc
if (bcc != null) {
InternetAddress[] bccAddresses = getInetAddresses(bcc);
msg
}
//設置網頁正文
MimeBodyPart bp = new MimeBodyPart();
bp
bp
bp
mp
int urlCount = urlScriptList
for (int i =
bp = new MimeBodyPart();
ArrayList urlInfo = (ArrayList) urlScriptList
// String url = urlInfo
String absoluteURL = urlInfo
bp
javax
DataSource source = new AttachmentDataSource(absoluteURL
bp
mp
}
urlCount = urlImageList
for (int i =
bp = new MimeBodyPart();
ArrayList urlInfo = (ArrayList) urlImageList
// String url = urlInfo
String absoluteURL = urlInfo
bp
javax
DataSource source = new AttachmentDataSource(absoluteURL
bp
mp
}
msg
// write the mime multi part message to a file
msg
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
public static void mht
try {
//TODO readEmlFile
InputStream fis = new FileInputStream(strMht);
Session mailSession = Session
MimeMessage msg = new MimeMessage(mailSession
Object content = msg
if (content instanceof Multipart) {
MimeMultipart mp = (MimeMultipart)content;
MimeBodyPart bp
String strEncodng = getEncoding(bp
String strText = getHtmlText(bp
if (strText == null)
return;
File parent = null;
if (mp
parent = new File(new File(strHtml)
parent
if (!parent
return;
}
for (int i =
MimeBodyPart bp = (MimeBodyPart)mp
String strUrl = getResourcesUrl(bp);
if (strUrl == null)
continue;
DataHandler dataHandler = bp
MimePartDataSource source = (MimePartDataSource)dataHandler
File resources = new File(parent
if (saveResourcesFile(resources
strText = JHtmlClear
}
saveHtml(strText
}
} catch (Exception e) {
// TODO Auto
e
}
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
public static String getName(String strName
char separator =
System
System
if( strName
return format(strName
return
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
private static String getEncoding(MimeBodyPart bp) {
if (bp != null) {
try {
Enumeration list = bp
while (list
javax
if (head
String strType = head
int pos = strType
if (pos !=
String strEncoding = strType
if (strEncoding
strEncoding =
}
return strEncoding;
}
}
}
} catch (MessagingException e) {
// TODO Auto
e
}
}
return null;
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
private static String getResourcesUrl(MimeBodyPart bp) {
if (bp != null) {
try {
Enumeration list = bp
while (list
javax
if (head
return head
}
}
} catch (MessagingException e) {
// TODO Auto
e
}
}
return null;
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
private static String format(String strName) {
if (strName == null)
return null;
strName = strName
String strText =
for (int i =
String ch = String
if (strText
strName = strName
}
}
return strName;
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
private static boolean saveResourcesFile(File resources
if (resources == null || inputStream == null) {
return false;
}
BufferedInputStream in = null;
FileOutputStream fio = null;
BufferedOutputStream osw = null;
try {
in = new BufferedInputStream(inputStream);
fio = new FileOutputStream(resources);
osw = new BufferedOutputStream(new DataOutputStream(fio));
int b;
byte[] a = new byte[
boolean isEmpty = true;
while ((b = in
isEmpty = false;
osw
osw
}
osw
fio
in
inputStream
if (isEmpty)
resources
return true;
} catch (Exception e) {
// TODO Auto
e
System
return false;
} finally{
try {
if (osw != null)
osw
if (fio != null)
fio
if (in != null)
in
if (inputStream != null)
inputStream
} catch (Exception e) {
e
System
return false;
}
}
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
public static String getTitle(String mhtFilename) {
try {
//TODO readEmlFile
InputStream fis = new FileInputStream(mhtFilename);
Session mailSession = Session
MimeMessage msg = new MimeMessage(mailSession
Object content = msg
if (content instanceof Multipart) {
MimeMultipart mp = (MimeMultipart)content;
MimeBodyPart bp
String strEncodng = getEncoding(bp
String strText = getHtmlText(bp
if (strText == null)
return null;
strText = strText
int pos
int pos
if (pos
return strText
}
}
return null;
} catch (Exception e) {
// TODO Auto
e
return null;
}
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
private static String getHtmlText(MimeBodyPart bp
InputStream textStream = null;
BufferedInputStream buff = null;
BufferedReader br = null;
Reader r = null;
try {
textStream = bp
buff = new BufferedInputStream(textStream);
r = new InputStreamReader(buff
br = new BufferedReader(r);
StringBuffer strHtml = new StringBuffer(
String strLine = null;
while ((strLine = br
strHtml
}
br
r
textStream
return strHtml
} catch (Exception e) {
// TODO Auto
e
} finally{
try{
if (br != null)
br
if (buff != null)
buff
if (textStream != null)
textStream
}catch(Exception e){
System
}
}
return null;
}
/**
*<br>方法說明
*<br>輸入參數
*<br>返回類型
*/
private static void saveHtml(String strText
try {
FileWriter fw = new FileWriter(strHtml);
fw
fw
} catch (IOException e) {
// TODO Auto
e
System
}
}
private InternetAddress[] getInetAddresses(String emails) throws Exception {
ArrayList list = new ArrayList();
StringTokenizer tok = new StringTokenizer(emails
while (tok
list
}
int count = list
InternetAddress[] addresses = new InternetAddress[count];
for (int i =
addresses[i] = new InternetAddress(list
}
return addresses;
}
class AttachmentDataSource implements DataSource {
private MimetypesFileTypeMap map = new MimetypesFileTypeMap();
private String strUrl;
private String strType;
private byte[] dataSize = null;
/**
* This is some content type maps
*/
private Map normalMap = new HashMap();
{
// Initiate normal mime type map
// Images
normalMap
normalMap
}
public AttachmentDataSource(String strUrl
this
this
strUrl = strUrl
strUrl = strUrl
dataSize = JQuery
}
/**
* Returns the content type
*/
public String getContentType() {
return getMimeType(getName());
}
public String getName() {
char separator = File
if( strUrl
return strUrl
return strUrl;
}
private String getMimeType(String fileName) {
String type = (String)normalMap
if (type == null) {
try {
type = map
} catch (Exception e) {
// TODO: handle exception
}
System
// Fix the null exception
if (type == null) {
type =
}
}
return type;
}
public InputStream getInputStream() throws IOException {
// TODO Auto
if (dataSize == null)
dataSize = new byte[
return new ByteArrayInputStream(dataSize);
}
public OutputStream getOutputStream() throws IOException {
// TODO Auto
return new java
}
}
}
From:http://tw.wingwit.com/Article/program/Java/hx/201311/26795.html