熱點推薦:
您现在的位置: 電腦知識網 >> 編程 >> .NET編程 >> 正文

漢字拼音的一個完整解決方法!

2013-11-13 10:29:59  來源: .NET編程 

  Imports SystemIO
Imports SystemTextRegularExpressions

Namespace BusinnessPinYin
Public Class PYService
Private gDataSet As New dsPinYin

<summary>
漢字表
</summary>
Public ReadOnly Property PinYinTable() As dsPinYinPinYinDataTable
Get
Return gDataSetPinYin
End Get
End Property

Private gTxtFile As String = AppDomainCurrentDomainSetupInformationApplicationBase & pinyintxt
Private gxmlFile As String = AppDomainCurrentDomainSetupInformationApplicationBase & pinyinxml

Private gRegex As New Regex((?<Word>^[\ue\ufa]+)(?<PingYin>*))

<summary>
加載漢字庫文件名為pinyinxml在程序當前目錄下
</summary>
Public Sub Load()
If Not IOFileExists(gxmlFile) Then
Throw New Exception(StringFormat(文件{}不存在 gxmlFile))
End If

DataSetInitialize()
gDataSetReadXml(gxmlFile)
End Sub

<summary>
從漢字文件中更新文件名為pinyintxt在程序當前目錄下
</summary>
<remarks></remarks>
Public Sub Update()
If Not IOFileExists(gTxtFile) Then
Throw New Exception(StringFormat(文件{}不存在 gTxtFile))
End If

UpdateFromTxt(gTxtFile)
End Sub

<summary>
保存漢字庫文件為pingyinxml在程序當前目錄下
</summary>
<remarks></remarks>
Public Sub Save()
gDataSetWriteXml(gxmlFile)
End Sub

  Private Sub DataSetInitialize()
在更新或讀入時清除
MegDataSetClear()
MegDataSetAcceptChanges()
End Sub

Private Sub UpdateFromTxt(ByVal file As String)
DataSetInitialize()

Dim mLine As String
Dim mBuilder As New SystemTextStringBuilder
Dim mReader As New IOStreamReader(file SystemTextEncodingDefault)
Do
mLine = mReaderReadLine
Add(mLine)
Loop Until StringIsNullOrEmpty(mLine)

mReaderClose()
mReaderDispose()

MegDataSetPinYinAcceptChanges()
End Sub

Private Sub Add(ByVal line As String)
If line Is Nothing Then Exit Sub

With gRegexMatch(line)

If Success Then

只取單字不取詞組
If Groups(Word)ValueLength = Then
Add(Groups(Word)Value Groups(PingYin)Value)
End If

End If
End With
End Sub

Private Sub Add(ByVal word As String ByVal py As String)
多音的拼音間用單個空枨符隔開

py = pyTrimReplace( )

Dim mCode As String = ChineseCode(word)
Dim mRow As dsPinYinPinYinRow = MegDataSetPinYinFindBy代碼(mCode)
If mRow Is Nothing Then
MegDataSetPinYinAddPinYinRow(word mCode py)
Else

  Dim pyArray() As String = pySplit( c)
For Each s As String In pyArray
If Not mRow拼音Contains(s) Then
mRow拼音 = StringConcat(mRow拼音 & s)
End If
Next
End If

End Sub

<summary>
將字符串轉為拼音
</summary>
<param name=line>字符串</param>
<param name=isgetfirst>如是多音字取第一個拼音</param>
Public Function ToPinyin(ByVal line As String ByVal isgetfirst As Boolean) As String
Dim mBuilder As New TextStringBuilder

  For Each s As Char In lineToCharArray
If IsTrue(s) Then
mBuilderAppend(GetPinyin(s isgetfirst))
Else
mBuilderAppend(s)
End If
Next

Return mBuilderToString
End Function

Private Function GetPinyin(ByVal word As String ByVal isgetfirst As Boolean) As String
Dim mResult As String = word

Dim mArray As String() = PinYinArray(ChineseCode(word)) 取拼音組
If Not mArray Is Nothing Then
If mArrayLength = Or isgetfirst Then
mResult = mArray() 單音的
Else

mResult = StringFormat(({}) StringJoin( mArray)) 多音的用括號括住拼音間用逗號隔開
End If
End If

Return mResult
End Function

取拼音組
Private Function PinYinArray(ByVal code As String) As String()
Dim mRow As dsPinYinPinYinRow = MegDataSetPinYinFindBy代碼(code)
If mRow Is Nothing Then Return Nothing

  Return mRow拼音Split( c)
End Function

<summary>
按拼音查字
</summary>
<param name=pinyin>拼音</param>
Public Function WordArray(ByVal pinyin As String) As String()
Dim mRows As dsPinYinPinYinRow() = CType(MegDataSetPinYinSelect(StringFormat(拼音 LIKE %{}% pinyin)) dsPinYinPinYinRow())
Dim mResult() As String
For i As Integer = To mRowsLength
If ArrayIndexOf(mRows(i)拼音Split( c) pinyin) <> Then
MeAppend(mResult mRows(i)漢字)
End If
Next
Return mResult
End Function

<summary>
按拼音查字
</summary>
<param name=pinyin>拼音</param>
Public Function Words(ByVal pinyin As String) As String
Return StringConcat(WordArray(pinyin))
End Function


<summary>
漢字代碼
</summary>
<param name=word>單個漢字</param>
Public Shared Function ChineseCode(ByVal word As String) As String
If Not IsTrue(word) Then Return Nothing

Dim bytes() As Byte = SystemTextEncodingDefaultGetBytes(word)

Return StringConcat(Hex(bytes()) Hex(bytes()))
End Function

<summary>
是否是單個漢字
</summary>
<param name=word>字符</param>
Public Shared Function IsTrue(ByVal word As String) As Boolean
If word Is Nothing Then Return False

Return SystemTextRegularExpressionsRegexIsMatch(word ^[\ue\ufa]$)
End Function

Private Sub Append(ByRef collection As String() ByVal value As String)
ReDim Preserve collection(collectionLength)
collection(collectionLength ) = value
End Sub
End Class
End Namespace


From:http://tw.wingwit.com/Article/program/net/201311/13826.html
    推薦文章
    Copyright © 2005-2013 電腦知識網 Computer Knowledge   All rights reserved.