熱點推薦:
您现在的位置: 電腦知識網 >> 編程 >> SQL Server >> 正文

SQL2005CLR函數擴展 - 關於山寨索引

2022-06-13   來源: SQL Server 
本文只是一個山寨試驗品思路僅供參考

原理介紹:
索引建立
目錄結構劃分方案也只是很簡易的實現了一下通過unicode把任意連續的兩個字符(中文或英文)分為個字節來做四層目錄把索引的內容對應的主關鍵字(主要為了使用sql索引和唯一性)作為文件名兩個字符在索引內容中的位置作為文件後綴來存儲文件本身為字節不保存任何信息

比如一條數據 pk山寨索引
山寨索引 四個字的unicode為
[]:
[]:
[]:
[]:
[]:
[]:
[]:
[]:
那麼對應的文件結構為
/////pk
/////pk
/////pk

索引使用
比如搜索寨索引
則搜索 ///// 目錄下的所有文件然後根據 pk 的文件後綴名去看 /////pk文件是否存在依次類推最後返回一個結果集

實用性
具體的實用性還有待驗證這只是實現了精確的like搜索而不能做常見搜索引擎的分詞效果另外海量數據重建索引的性能也是面臨很嚴峻的問題比如cpu負載和磁盤io負載關於windows一個目錄下可以保持多少個文件而不會對文件搜索造成大的性能損失也有待評估不過這個可以考慮根據主鍵的文件名hash來增加文件目錄深度降低單一目錄下的文件數量

演示效果
實現了針對test標的name和caption兩個字段作索引搜索

設置和獲取索引文件根目錄
select dboxfn_SetMyIndexFileRoot(d:/MyIndex)
select dboxfn_GetMyIndexFileRoot()
建立測試環境
 go
create table test( id uniqueidentifier name nvarchar ( ) caption nvarchar ( ))
insert into test select top newid () 我的索引 測試 from sysobjects
insert into test select top newid () 我的測試 索引 from sysobjects
insert into test select top newid () 測試索引 測試索引 from sysobjects
insert into test select top newid () 我的索引 索引 from sysobjects
create index i_testid on test( id)
建立索引文件
declare @t int
select @t=
dbo xfn_SetKeyForMyIndex( id testIndex name + + caption)   
from test
查詢數據
select  a*   from   test a dbo xfn_GetKeyFromMyIndex( 測試 索引 我的 testIndex )  b
    where a id= b pk
/*
CEADFAAEBDFEED   我的索引 測試
DDBCDFFBBAF   我的索引 測試
CACFCBACEAEE   我的索引 測試
CBFFBFFBAECEADE   我的測試 索引
BBFCCDBBFCBBEBDEDDF   我的測試 索引
BDEACDFFAA   我的測試 索引
*/
drop table test

clr代碼如下:編譯為MyFullIndexdll

  復制代碼 代碼如下:

  using System;
using SystemDataSqlTypes;
using MicrosoftSqlServerServer;
using SystemCollections;
using SystemCollectionsGeneric;
public partial class UserDefinedFunctions
{
    /// <summary>
    /// 設置索引目錄
    /// </summary>
    /// <param name=value></param>
    /// <returns></returns>
    [MicrosoftSqlServerServerSqlFunction ]
    public static SqlBoolean SetRoot(SqlString value)
    {
        if (valueIsNull) return false ;
        if (SystemIODirectory Exists(valueValue))
        {
            root = valueValue;
            return true ;
        }
        else
        {
            return false ;
        }
    }
    /// <summary>
    /// 獲取索引目錄
    /// </summary>
    /// <returns></returns>
    [MicrosoftSqlServerServerSqlFunction ]
    public static SqlString GetRoot()
    {
        return new SqlString (root);
    }
    /// <summary>
    /// 建立索引
    /// </summary>
    /// <param name=key> 主鍵 </param>
    /// <param name=indexName> 索引名稱 </param>
    /// <param name=content> 索引內容 </param>
    /// <returns></returns>
    [MicrosoftSqlServerServerSqlFunction ]
    public static SqlInt SetIndex(SqlString keySqlString indexNameSqlString content)
    {
        if (keyIsNull || contentIsNull||indexNameIsNull) return ;
        return _setIndex(keyValueindexNameValue contentValue);
    }

    /// <summary>
    /// 查詢索引
    /// </summary>
    /// <param name=word> 關鍵字(空格區分) </param>
    /// <param name=indexName> 索引名稱 </param>
    /// <returns></returns>
    [SqlFunction (TableDefinition = pk nvarchar() Name = GetIndex FillRowMethodName = FillRow )]
    public static IEnumerable GetIndex(SqlString wordSqlString indexName)
    {

        SystemCollectionsGenericList <string > ret = new List <string >();
        if (wordIsNull || indexNameIsNull) return ret;
        return _getIndex(wordValue indexNameValue);
    }

    public static void FillRow(Object obj out SqlString pk)
    {
        string key = objToString();
        pk = key;
    }
    static string root = @d:/index ;

    /// <summary>
    /// 獲取有空格分隔的索引信息
    /// </summary>
    /// <param name=word></param>
    /// <param name=indexName></param>
    /// <returns></returns>
    static SystemCollectionsGenericList <string > _getIndex(string word string indexName)
    {
        string [] arrWord = wordSplit(new char [] { } StringSplitOptions RemoveEmptyEntries);

        SystemCollectionsGenericList <string > key_ = _getIndex(arrWord[] indexName);

        if (arrWordLength == ) return key_;
        SystemCollectionsGenericList <string > [] key_list=new List <string >[arrWordLength];
        for (int i = ; i < arrWordLength; i++)
        {
            SystemCollectionsGenericList <string > key_i = _getIndex(arrWord[i+]indexName);
            key_list[i] = key_i;
        }

        for (int i=key_Count;i>=;i)
        {
            foreach (SystemCollectionsGenericList <string > key_i in key_list)
            {
                if (key_iContains(key_[i]) == false )
                {
                    key_RemoveAt(i);
                    continue ;
                }
            }
        }
        return key_;
    }
    /// <summary>
    /// 獲取單個詞的索引信息
    /// </summary>
    /// <param name=word></param>
    /// <param name=indexName></param>
    /// <returns></returns>
    static SystemCollectionsGenericList <string > _getIndex(string word string indexName)
    {
        SystemCollectionsGenericList <string > ret = new List <string >();
        byte [] bWord = SystemTextEncoding UnicodeGetBytes(word);
        if (bWordLength < ) return ret;

        string path = string Format(@{}/{}/{}/{}/{}/{}/ rootindexName bWord[] bWord[] bWord[] bWord[]);
        if (SystemIODirectory Exists(path) == false )
        {
            return ret;
        }
        string [] arrFiles = SystemIODirectory GetFiles(path);

        foreach (string file in arrFiles)
        {
            string key = SystemIOPath GetFileNameWithoutExtension(file);
            string index = SystemIOPath GetExtension(file)TrimStart(new char [] { });
            int cIndex = int Parse(index);
            bool bHas = true ;
            for (int i = ; i < bWordLength ; i = i + )
            {
                string nextFile = string Format(@{}/{}/{}/{}/{}/{}/{}{}
                    root indexName bWord[i + ] bWord[i + ] bWord[i + ] bWord[i + ] key ++cIndex);

                if (SystemIOFile Exists(nextFile) == false )
                {
                    bHas = false ;
                    break ;
                }
            }
            if (bHas == true &&retContains(key)==false )
                retAdd(key);

        }
        return ret;
    }

    /// <summary>
    /// 建立索引文件
    /// </summary>
    /// <param name=key></param>
    /// <param name=indexName></param>
    /// <param name=content></param>
    /// <returns></returns>
    static int _setIndex(string keystring indexName string content)
    {
        byte [] bContent = SystemTextEncoding UnicodeGetBytes(content);
        if (bContentLength <= ) return ;
        for (int i = ; i < bContentLength ; i = i + )
        {
            string path = string Format(@{}/{}/{}/{}/{}/{}/ rootindexName bContent[i + ] bContent[i + ] bContent[i + ] bContent[i + ]);
            if (SystemIODirectory Exists(path) == false )
            {
                SystemIODirectory CreateDirectory(path);
            }
            string file = string Format(@{}/{}{} path key i / );

            if (SystemIOFile Exists(file) == false )
            {
                SystemIOFile Create(file)Close();
            }
        }
        return contentLength;
    }
};



部署的sql腳本如下
drop function dboxfn_SetMyIndexFileRoot
drop function dboxfn_GetMyIndexFileRoot
drop function dboxfn_GetKeyFromMyIndex
drop function dboxfn_SetKeyForMyIndex
drop assembly MyFullIndex
go
CREATE ASSEMBLY MyFullIndex FROM d:/SQLCLR/MyFullIndexdll WITH PERMISSION_SET = UnSAFE;

go
索引搜索
CREATE FUNCTION dbo xfn_GetKeyFromMyIndex ( @word nvarchar ( max ) @indexName  nvarchar ( ))   
RETURNS table ( pk nvarchar ( ))
AS EXTERNAL NAME MyFullIndex UserDefinedFunctions GetIndex
go
索引建立
CREATE FUNCTION dbo xfn_SetKeyForMyIndex ( @pk nvarchar ( ) @indexName  nvarchar ( ) @word nvarchar ( max ))   
RETURNS int
AS EXTERNAL NAME MyFullIndex UserDefinedFunctions SetIndex
go
獲取索引文件根目錄
CREATE FUNCTION dbo xfn_GetMyIndexFileRoot ()   
RETURNS nvarchar ( max )
AS EXTERNAL NAME MyFullIndex UserDefinedFunctions GetRoot
go
設置索引文件根目錄(默認目錄為 d:/myindex )
CREATE FUNCTION dbo xfn_SetMyIndexFileRoot ( @FileRoot nvarchar ( max ))   
RETURNS bit
AS EXTERNAL NAME MyFullIndex UserDefinedFunctions SetRoot
go

From:http://tw.wingwit.com/Article/program/SQLServer/201404/30570.html
    推薦文章
    Copyright © 2005-2022 電腦知識網 Computer Knowledge   All rights reserved.