由胡正開發的星際譯王是Linux平台上很強大的一個開源的翻譯軟件(也有Windows版本的)支持多種詞庫
import java
import java
import java
import java
/**
* {@docRoot}
* Java版詞典測試版
* 詞典采用星際譯王的詞典
*
* @author menglongbor
* @updateDate
* @version v
*
* 相關參考鏈接
*
* _zhu_xiang/item/
*
*
*
*
*
*/
public class testdict
{
final static intMAX_WORD=
final static intMAX_KEYS=
final static intSIZEINT=
final static StringKEY[]= {//
public static InputStreamisidx= null;// 讀取idx文件時所要的流
public static InputStreamisdict= null;// 讀取dict文件時所要的流
public static longSTREAM_LOCAL=
public static StringidxfileString=
public static StringdictfileString=
/**
* 從idx文件中獲取當前目標單詞
* @param word_buf 保存的是c/c++字符串數組轉換為JAVA字符串
* @param data_poffset 用來保存單詞的data偏移位置信息
* @param data_plength 用來保存單詞的data長度信息
* @param len
* @return
*/
public static boolean get_word(String[] word_buf
int[] data_plength
{
// int len =
boolean flag = true;
len[
int index =
byte wd[] = new byte[MAX_WORD];
int value =
try
{
// 讀取單詞
// 讀到單詞結束符\
while (true)
{
index = isidx
STREAM_LOCAL++;// 每讀取一次
if (index ==
{
// isidx
flag = false;
break;
}
if ((index !=
{
wd[len[
len[
} else
{
break;
}
}
// 轉換為JAVA字符串
// 此處不用再需要像c/c++那樣去掉了最後那個結束符了
byte wd
for (int i =
{
wd
}
word_buf[
// System
// wd = null;// 釋放內存
// wd
// 讀取偏移量值
for (int i =
{
// 將
int shift = (
index = isidx
STREAM_LOCAL++;// 每讀取一次
if (index ==
{
// isidx
flag = false;
return flag;
}
value += (index &
}
data_poffset[
// 讀取區塊大小值
value =
for (int i =
{
// 將
int shift = (
index = isidx
STREAM_LOCAL++;// 每讀取一次
if (index ==
{
// isidx
flag = false;
return flag;
}
value += (index &
}
data_plength[
}
catch (Exception e)
{
System
}
// System
// 得到單詞字符長度
return flag;
}
/**
* 通過偏移位置offset和長度length 來從dict文件中獲取data內容UTF
* @param offset 要讀取的內容的起始偏移
* @param length 要讀取的內容的數據塊大小
* @return 字節數組的data int
*/
public static byte[] get_data(int[] offset
{
long oft = offset[
long len = length[
long skip;
byte data_buf[] = new byte[length[
System
+ length[
try
{
isdict
long valuedata = isdict
if (valuedata < oft + len)
{
System
}
// skip=isdict
skip = skipBytesFromStream(isdict
if (skip != oft)
{
System
}
if (isdict
{
System
}
// // Unicode
// StringBuffer sb = new StringBuffer()
//
// int size =isdict
//
// for (int j =
// {
//
// int l = data_buf[j++];
//
// int h = data_buf[j++];
//
// char c = (char) ((l &
//
// sb
//
// }
//
// // return sb
}
catch (Exception e)
{
data_buf = null;
System
e
}
if (data_buf == null)
{
return null;
}
return data_buf;
}
/**
* utf
* 假如 newContent 為UTF
* URLEncoder
* @param in 要進行解碼的UTF
* @param offset
* @param length
* @return
*/
public static String UTF
{
StringBuffer buff = new StringBuffer()
int max = offset + length;
for (int i = offset; i < max; i++)
{
char c =
if ((in[i] &
{
c = (char) in[i];
} else if ((in[i] &
{
c |= ((in[i] &
i++;
c |= ((in[i] &
} else if ((in[i] &
{
c |= ((in[i] &
i++;
c |= ((in[i] &
i++;
c |= ((in[i] &
} else if ((in[i] &
{
c |= ((in[i] &
i++;
c |= ((in[i] &
i++;
c |= ((in[i] &
i++;
c |= ((in[i] &
} else
{
c =
}
buff
}
return buff
}
public static byte[] UTF
{
ByteArrayOutputStream bos = new ByteArrayOutputStream()
try
{
int strlen = str
for (int i =
{
char t = str
int c =
c |= (t &
if (c >=
{
bos
} else if (c >
{
bos
bos
} else if (c >
{
bos
// correction
// (mb)
bos
bos
} else if (c >
{
bos
bos
bos
bos
}
}
bos
}
catch (Exception e)
{
}
return bos
}
/**
* 將UTF
*
* @param utf_data
* byte[]
* @param len
* int
* @return String
*/
public static String UTF
{
StringBuffer unis = new StringBuffer()
char unic =
int ptr =
int cntBits =
for (
{
cntBits = getCntBits(utf_data[ptr])
if (cntBits ==
{
++ptr;
continue;
} else if (cntBits ==
{
unic = UTFC
++ptr;
} else
{
unic = UTFC
ptr += cntBits;
}
unis
}
return unis
}
/**
* 將指定的UTF
* @param utf byte[]
* @param sptr int
* @param cntBits int
* @return char
*/
public static char UTFC
{
/*
* Unicode <
* U
*
*
*
*
*/
int uniC =
byte firstByte = utf[sptr];
int ptr =
// resolve single byte UTF
if (cntBits ==
return (char) firstByte;
// resolve the first byte
firstByte &= (
// resolve multiple bytes UTF
for (int i = sptr + cntBits
{
byte utfb = utf[i];
uniC |= (utfb &
ptr +=
}
uniC |= firstByte 《 ptr;
return (char) uniC;
}
/**
* 根據給定字節計算UTF
* @param b
* @return
*/
private static int getCntBits(byte b)
{
int cnt =
if (b ==
return
for (int i =
{
if (((b 》 i) &
++cnt;
else
break;
}
return (cnt >
}
/**
* 顯示data內容
* @param data_buf UTF
* @param data_length UTF
*/
public static void display_data(byte[] data_buf
{
// 將UTF
// String tempString = UTF
String tempString = UTF
// String tempString = new String(data_buf)
data_buf = null;
System
}
/**
* 從idx文件中搜索由word指定的單詞
* @param word
* @param data_poffset
* @param data_plength
* @return 是否搜索成功
*/
public static boolean search_word(String word
int[] data_plength)
{
String wd[] = new String[
boolean temp = false;
int len[] = new int[
// 從idx文件中獲取當前目標單詞
// for (get_word(wd
// data_poffset
// {
while (get_word(wd
{
// System
// if (wd[pareToIgnoreCase(word) ==
// 比較字符串s
if (strsEqualsIgnoreCase(wd[
{
System
temp = true;
break;
}
}
return temp;
}
/**
* 從標准輸入獲取待查詢的單詞
* @param max_len
* @param count
* @return
*/
public static String get_input(int max_len
{
byte input_buf[] = new byte[max_len];
count[
String tempString[] = new String[
try
{
count[
byte temp_buf[] = new byte[count[
for (int i =
{
temp_buf[i] = input_buf[i];
}
tempString[
}
catch (Exception e)
{
System
}
System
return tempString[
}
/**
* 從標准輸入獲取待查詢的單詞
* @param input_buf
* @param count
* @return
*/
public static byte[] get_input(byte[] input_buf
{
try
{
count[
}
catch (Exception e)
{
input_buf = null;
System
}
return input_buf;
}
/**
* 緩存KEYS在idx中的偏移信息
* @param idx_cache 保存每個單字母單詞對應的起始位置
* @return
*/
public static void cache_idx(long[] idx_cache)
{
int i;
long[] p = idx_cache;
int unused
int unused
try
{
// 將文件內部的位置指針重新指向一個流(數據流/文件)的開頭返回FILE指針當前位置
// 然後重新遍歷整個文件搜尋下一個字母開頭的單詞
isidx
STREAM_LOCAL =
for (i =
{
// System
if (search_word(KEY[i]
{
p[i] = STREAM_LOCAL; // 返回當前文件位置
// String tempString = Long
// System
System
+
} else
p[i] =
}
// isidx
}
catch (Exception e)
{
// TODO: handle exception
}
}
/**
* 定位由word指定的單詞在idx文件中的大概偏移位置
* @param word
* @param idx_cache
* @return
*/
public static long locate_idx(String word
{
int i =
int pre =
String tempString = word
while (i < MAX_KEYS && KEY[i]
{
pre = i;
++i;
}
if (tempString
{
pre =
}
System
return idx_cache[pre];
}
/**
* 主要查詢函數
*/
public static void consult()
{
byte data[] = null;// 釋義數據
long idx[] = new long[MAX_KEYS];//
int offset[] = new int[
int length[] = new int[
System
try
{
System
// 讀取字典索引文件
isidx = new BufferedInputStream(new FileInputStream(
idxfileString))
isidx
if (!isidx
{
System
}
}
catch (Exception e)
{
System
e
}
cache_idx(idx)
try
{
isdict = new BufferedInputStream(new FileInputStream(
dictfileString))
isdict
if (!isdict
{
System
}
}
catch (Exception e)
{
System
e
}
while (true)
{
System
int count[] = new int[
String word = get_input(MAX_WORD
long skips
if (count[
{
try
{
// 從文件開頭跳到單詞大致索引所在位置
// isidx
isidx
skips
// skips
skips
System
}
catch (Exception e)
{
System
e
}
if (search_word(word
{
data = get_data(offset
display_data(data
data = null;
} else
System
System
} else
break;
}
}
/**
* 不區分大小寫比較兩個字符串
*
* @param s
* @param s
* @return
*/
public static int strsEqualsIgnoreCase(String s
{
int n
for (int i
{
char c
char c
if (c
{
// 源字符串全部都轉為大寫字符串
c
c
if (c
{
// 源字符串全部都轉為小寫字符串
c
c
if (c
{
return c
}
}
}
}
return n
}
/**
* 重寫了Inpustream 中的skip(long n) 方法
* 參考
* @param inputStream
* @param n
* @return
*/
private static long skipBytesFromStream(InputStream inputStream
{
long remaining = n; // SKIP_BUFFER_SIZE is used to determine the size of
// skipBuffer
int SKIP_BUFFER_SIZE =
// skip(long)
byte[] skipBuffer = null;
int nr =
if (skipBuffer == null)
{
skipBuffer = new byte[SKIP_BUFFER_SIZE];
}
byte[] localSkipBuffer = skipBuffer;
if (n <=
{
return
}
while (remaining >
{
try
{
nr = inputStream
SKIP_BUFFER_SIZE
}
catch (IOException e)
{
e
}
if (nr <
{
break;
}
remaining
}
return n
}
/**
* 主函數
* @param args
*/
public static void main(String args[])
{
consult()
try
{
isidx
isdict
}
catch (Exception e)
{
System
e
}
}
}
如果要在windows平台下編譯
//UTF
char* U
{
int len = MultiByteToWideChar(CP_UTF
wchar_t* wstr = new wchar_t[len+
memset(wstr
MultiByteToWideChar(CP_UTF
len = WideCharToMultiByte(CP_ACP
char* str = new char[len+
memset(str
WideCharToMultiByte(CP_ACP
if(wstr) delete[] wstr;
return str;
}
//GB
char* G
{
int len = MultiByteToWideChar(CP_ACP
wchar_t* wstr = new wchar_t[len+
memset(wstr
MultiByteToWideChar(CP_ACP
len = WideCharToMultiByte(CP_UTF
char* str = new char[len+
memset(str
WideCharToMultiByte(CP_UTF
if(wstr) delete[] wstr;
return str;
}
/*
* 顯示data內容
*/
void display_data(char *data_buf
{
fwrite(data_buf
char *data=(char *)malloc(data_length)
memcpy(data
char *p=U
printf(
free(data)
delete p;
}
以星際譯王所支持的牛津英漢詞典oxford
結果顯示能夠正確得到單詞的釋義
From:http://tw.wingwit.com/Article/program/Java/hx/201311/25526.html