diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000..d23cc2425
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,192 @@
+---
+Language: Cpp
+# BasedOnStyle: Microsoft
+AccessModifierOffset: -2
+AlignAfterOpenBracket: Align
+AlignArrayOfStructures: None
+AlignConsecutiveMacros: None
+AlignConsecutiveAssignments: None
+AlignConsecutiveBitFields: None
+AlignConsecutiveDeclarations: None
+AlignEscapedNewlines: Right
+AlignOperands: Align
+AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortEnumsOnASingleLine: false
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortLambdasOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: MultiLine
+AttributeMacros:
+ - __capability
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+ AfterCaseLabel: false
+ AfterClass: true
+ AfterControlStatement: Always
+ AfterEnum: true
+ AfterFunction: true
+ AfterNamespace: true
+ AfterObjCDeclaration: true
+ AfterStruct: true
+ AfterUnion: false
+ AfterExternBlock: true
+ BeforeCatch: true
+ BeforeElse: true
+ BeforeLambdaBody: false
+ BeforeWhile: false
+ IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeConceptDeclarations: true
+BreakBeforeBraces: Custom
+BreakBeforeInheritanceComma: false
+BreakInheritanceList: BeforeColon
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit: 120
+CommentPragmas: '^ IWYU pragma:'
+QualifierAlignment: Leave
+CompactNamespaces: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DeriveLineEnding: true
+DerivePointerAlignment: false
+DisableFormat: false
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: LogicalBlock
+ExperimentalAutoDetectBinPacking: false
+PackConstructorInitializers: BinPack
+BasedOnStyle: ''
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+AllowAllConstructorInitializersOnNextLine: true
+FixNamespaceComments: true
+ForEachMacros:
+ - foreach
+ - Q_FOREACH
+ - BOOST_FOREACH
+IfMacros:
+ - KJ_IF_MAYBE
+IncludeBlocks: Preserve
+IncludeCategories:
+ - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
+ Priority: 2
+ SortPriority: 0
+ CaseSensitive: false
+ - Regex: '^(<|"(gtest|gmock|isl|json)/)'
+ Priority: 3
+ SortPriority: 0
+ CaseSensitive: false
+ - Regex: '.*'
+ Priority: 1
+ SortPriority: 0
+ CaseSensitive: false
+IncludeIsMainRegex: '(Test)?$'
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: false
+IndentCaseLabels: false
+IndentCaseBlocks: false
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentExternBlock: AfterExternBlock
+IndentRequires: false
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+InsertTrailingCommas: None
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+LambdaBodyIndentation: Signature
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 2
+ObjCBreakBeforeNestedBlockParam: true
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakOpenParenthesis: 0
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 1000
+PenaltyIndentedWhitespace: 0
+PointerAlignment: Right
+PPIndentWidth: -1
+ReferenceAlignment: Pointer
+ReflowComments: true
+RemoveBracesLLVM: false
+SeparateDefinitionBlocks: Leave
+ShortNamespaceLines: 1
+SortIncludes: CaseSensitive
+SortJavaStaticImport: Before
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeParensOptions:
+ AfterControlStatements: true
+ AfterForeachMacros: true
+ AfterFunctionDefinitionName: false
+ AfterFunctionDeclarationName: false
+ AfterIfMacros: true
+ AfterOverloadedOperator: false
+ BeforeNonEmptyParentheses: false
+SpaceAroundPointerQualifiers: Default
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: Never
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInLineCommentPrefix:
+ Minimum: 1
+ Maximum: -1
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpaceBeforeSquareBrackets: false
+BitFieldColonSpacing: Both
+Standard: Latest
+StatementAttributeLikeMacros:
+ - Q_EMIT
+StatementMacros:
+ - Q_UNUSED
+ - QT_REQUIRE_VERSION
+TabWidth: 4
+UseCRLF: false
+UseTab: Never
+WhitespaceSensitiveMacros:
+ - STRINGIZE
+ - PP_STRINGIZE
+ - BOOST_PP_STRINGIZE
+ - NS_SWIFT_NAME
+ - CF_SWIFT_NAME
+...
+
diff --git a/.gitignore b/.gitignore
index 750f6dec5..4642f4e8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,9 @@
*.obj
*.elf
+# Log files
+*.tlog
+
# Linker output
*.ilk
*.map
@@ -60,4 +63,402 @@ dkms.conf
/build
/ipch
/packages
-/out/build/x64-Debug
\ No newline at end of file
+/out/build/x64-Debug
+/sgKey.snk
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
+
+# User-specific files
+*.rsuser
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Mono auto generated files
+mono_crash.*
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+[Ww][Ii][Nn]32/
+[Aa][Rr][Mm]/
+[Aa][Rr][Mm]64/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+[Ll]ogs/
+
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUnit
+*.VisualState.xml
+TestResult.xml
+nunit-*.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+
+# ASP.NET Scaffolding
+ScaffoldingReadMe.txt
+
+# StyleCop
+StyleCopReport.xml
+
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_h.h
+*.ilk
+*.meta
+*.obj
+*.iobj
+*.pch
+*.pdb
+*.ipdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*_wpftmp.csproj
+*.log
+*.tlog
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# Visual Studio Trace Files
+*.e2e
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+
+# Coverlet is a free, cross platform Code Coverage Tool
+coverage*.json
+coverage*.xml
+coverage*.info
+
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# NuGet Symbol Packages
+*.snupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+*.appxbundle
+*.appxupload
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!?*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+
+# Including strong name files can present a security risk
+# (https://github.com/github/gitignore/pull/2483#issue-259490424)
+#*.snk
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+ServiceFabricBackup/
+*.rptproj.bak
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+*.rptproj.rsuser
+*- [Bb]ackup.rdl
+*- [Bb]ackup ([0-9]).rdl
+*- [Bb]ackup ([0-9][0-9]).rdl
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+
+# Visual Studio 6 auto-generated project file (contains which files were open etc.)
+*.vbp
+
+# Visual Studio 6 workspace and project file (working project files containing files to include in project)
+*.dsw
+*.dsp
+
+# Visual Studio 6 technical files
+*.ncb
+*.aps
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# CodeRush personal settings
+.cr/personal
+
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+*.tss
+
+# Telerik's JustMock configuration file
+*.jmconfig
+
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+
+# OpenCover UI analysis results
+OpenCover/
+
+# Azure Stream Analytics local run output
+ASALocalRun/
+
+# MSBuild Binary and Structured Log
+*.binlog
+
+# NVidia Nsight GPU debugger configuration file
+*.nvuser
+
+# MFractors (Xamarin productivity tool) working folder
+.mfractor/
+
+# Local History for Visual Studio
+.localhistory/
+
+# Visual Studio History (VSHistory) files
+.vshistory/
+
+# BeatPulse healthcheck temp database
+healthchecksdb
+
+# Backup folder for Package Reference Convert tool in Visual Studio 2017
+MigrationBackup/
+
+# Ionide (cross platform F# VS Code tools) working folder
+.ionide/
+
+# Fody - auto-generated XML schema
+FodyWeavers.xsd
+
+# VS Code files for those working on multiple tools
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+*.code-workspace
+
+# Local History for Visual Studio Code
+.history/
+
+# Windows Installer files from build outputs
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# JetBrains Rider
+*.sln.iml
diff --git a/.gitmodules b/.gitmodules
index a6fa563cf..c37db6a11 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -2,3 +2,12 @@
path = ThirdParty/zstd
url = https://github.com/facebook/zstd
branch = release
+[submodule "ThirdParty/spdk"]
+ path = ThirdParty/spdk
+ url = https://github.com/spdk/spdk
+[submodule "ThirdParty/isal-l_crypto"]
+ path = ThirdParty/isal-l_crypto
+ url = https://github.com/intel/isa-l_crypto
+[submodule "ThirdParty/RocksDB"]
+ path = ThirdParty/RocksDB
+ url = https://github.com/PtilopsisL/rocksdb
diff --git a/AnnService.users.props b/AnnService.users.props
index e65231357..15f2fb5bd 100644
--- a/AnnService.users.props
+++ b/AnnService.users.props
@@ -10,9 +10,10 @@
$(SolutionDir)\$(Platform)\$(Configuration)\
- $(SolutionDir)\$(Platform)\$(Configuration)\
+ $(SolutionDir)\$(Platform)\$(Configuration)\
+ 3.9
diff --git a/AnnService/Aggregator.vcxproj b/AnnService/Aggregator.vcxproj
index 4946c13f9..a969443ce 100644
--- a/AnnService/Aggregator.vcxproj
+++ b/AnnService/Aggregator.vcxproj
@@ -42,15 +42,16 @@
Application
true
- v142
+ v143
MultiByte
Application
false
- v142
+ v143
true
MultiByte
+ Spectre
@@ -132,11 +133,13 @@
true
_MBCS;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
stdcpp17
+ Guard
true
true
CoreLibrary.lib;SocketLib.lib;%(AdditionalDependencies)
+ true
@@ -158,26 +161,26 @@
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/AnnService/BalancedDataPartition.vcxproj b/AnnService/BalancedDataPartition.vcxproj
index 6ce001f60..f4a712655 100644
--- a/AnnService/BalancedDataPartition.vcxproj
+++ b/AnnService/BalancedDataPartition.vcxproj
@@ -42,13 +42,13 @@
Application
true
- v142
+ v143
MultiByte
Application
false
- v142
+ v143
true
MultiByte
@@ -95,6 +95,7 @@
true
/Zc:twoPhase- %(AdditionalOptions)
stdcpp17
+ Guard
true
@@ -148,5 +149,12 @@
+
+
+
+ This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
+
+
+
\ No newline at end of file
diff --git a/AnnService/CMakeLists.txt b/AnnService/CMakeLists.txt
index 5aaa1cbca..851996f7e 100644
--- a/AnnService/CMakeLists.txt
+++ b/AnnService/CMakeLists.txt
@@ -10,6 +10,39 @@ include_directories(${Zstd}/lib)
file(GLOB_RECURSE HDR_FILES ${AnnService}/inc/Core/*.h ${AnnService}/inc/Helper/*.h)
file(GLOB_RECURSE SRC_FILES ${AnnService}/src/Core/*.cpp ${AnnService}/src/Helper/*.cpp)
+set(SPDK_LIBRARIES "")
+if (SPDK)
+ set(Spdk ${PROJECT_SOURCE_DIR}/ThirdParty/spdk/build)
+ set(Dpdk ${PROJECT_SOURCE_DIR}/ThirdParty/spdk/dpdk/build)
+ set(IsalLCrypto ${PROJECT_SOURCE_DIR}/ThirdParty/isal-l_crypto/.libs/libisal_crypto.a)
+ set(SpdkLibPrefix ${Spdk}/lib/libspdk_)
+ set(DpdkLibPrefix ${Dpdk}/lib/librte_)
+ if ((NOT EXISTS ${SpdkLibPrefix}bdev_nvme.a) OR (NOT EXISTS ${DpdkLibPrefix}pci.a) OR (NOT EXISTS ${IsalLCrypto}))
+ set (SPDK_LIBRARIES "")
+ message (FATAL_ERROR "Cound not find SPDK, DPDK and IsalLCrypto!")
+ list(REMOVE_ITEM HDR_FILES
+ ${AnnService}/inc/Core/SPANN/ExtraSPDKController.h
+ )
+
+ list(REMOVE_ITEM SRC_FILES
+ ${AnnService}/src/Core/SPANN/ExtraSPDKController.cpp
+ )
+ else()
+ include_directories(${Spdk}/include)
+ add_definitions(-DSPDK)
+ set(SPDK_LIBRARIES -Wl,--whole-archive ${SpdkLibPrefix}bdev_nvme.a ${SpdkLibPrefix}bdev.a ${SpdkLibPrefix}nvme.a ${SpdkLibPrefix}vfio_user.a ${SpdkLibPrefix}sock.a ${SpdkLibPrefix}dma.a ${SpdkLibPrefix}notify.a ${SpdkLibPrefix}accel.a ${SpdkLibPrefix}event_bdev.a ${SpdkLibPrefix}event_accel.a ${SpdkLibPrefix}vmd.a ${SpdkLibPrefix}event_vmd.a ${SpdkLibPrefix}event_sock.a ${SpdkLibPrefix}event_iobuf.a ${SpdkLibPrefix}event.a ${SpdkLibPrefix}env_dpdk.a ${SpdkLibPrefix}log.a ${SpdkLibPrefix}thread.a ${SpdkLibPrefix}rpc.a ${SpdkLibPrefix}init.a ${SpdkLibPrefix}jsonrpc.a ${SpdkLibPrefix}json.a ${SpdkLibPrefix}trace.a ${SpdkLibPrefix}util.a ${DpdkLibPrefix}mempool.a ${DpdkLibPrefix}mempool_ring.a ${DpdkLibPrefix}eal.a ${DpdkLibPrefix}ring.a ${DpdkLibPrefix}telemetry.a ${DpdkLibPrefix}bus_pci.a ${DpdkLibPrefix}kvargs.a ${DpdkLibPrefix}pci.a -Wl,--no-whole-archive dl rt isal ${IsalLCrypto} uuid)
+ message (STATUS "Found SPDK:${SPDK_LIBRARIES}")
+ endif()
+else()
+ list(REMOVE_ITEM HDR_FILES
+ ${AnnService}/inc/Core/SPANN/ExtraSPDKController.h
+ )
+
+ list(REMOVE_ITEM SRC_FILES
+ ${AnnService}/src/Core/SPANN/ExtraSPDKController.cpp
+ )
+endif()
+
list(REMOVE_ITEM HDR_FILES
${AnnService}/inc/Core/Common/DistanceUtils.h
${AnnService}/inc/Core/Common/SIMDUtils.h
@@ -38,9 +71,10 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
endif()
add_library (SPTAGLib SHARED ${SRC_FILES} ${HDR_FILES})
-target_link_libraries (SPTAGLib DistanceUtils libzstd_shared)
+target_link_libraries (SPTAGLib DistanceUtils ${RocksDB_LIBRARIES} ${uring_LIBRARIES} libzstd_shared ${NUMA_LIBRARY} ${TBB_LIBRARIES} ${SPDK_LIBRARIES})
add_library (SPTAGLibStatic STATIC ${SRC_FILES} ${HDR_FILES})
-target_link_libraries (SPTAGLibStatic DistanceUtils libzstd_static)
+target_link_libraries (SPTAGLibStatic DistanceUtils ${RocksDB_LIBRARIES} ${uring_LIBRARIES} libzstd_static ${NUMA_LIBRARY_STATIC} ${TBB_LIBRARIES} ${SPDK_LIBRARIES})
+
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
target_compile_options(SPTAGLibStatic PRIVATE -fPIC)
endif()
@@ -87,23 +121,31 @@ endif()
file(GLOB_RECURSE SSD_SERVING_HDR_FILES ${AnnService}/inc/SSDServing/*.h)
file(GLOB_RECURSE SSD_SERVING_FILES ${AnnService}/src/SSDServing/*.cpp)
-#if(NOT WIN32)
-# list(REMOVE_ITEM SSD_SERVING_HDR_FILES
-# ${VECTORSEARCH_INC_DIR}/AsyncFileReader.h
-# )
-#elseif(WIN32)
-# list(REMOVE_ITEM SSD_SERVING_HDR_FILES
-# ${VECTORSEARCH_INC_DIR}/AsyncFileReaderLinux.h
-# )
-#endif()
+
+file(GLOB_RECURSE SPFRESH_HDR_FILES ${AnnService}/inc/SPFresh/*.h)
+file(GLOB_RECURSE SPFRESH_FILES ${AnnService}/src/SPFresh/*.cpp)
+
+file(GLOB_RECURSE KEYVALUETEST_HDR_FILES ${AnnService}/inc/KeyValueTest/*.h)
+file(GLOB_RECURSE KEYVALUETEST_FILES ${AnnService}/src/KeyValueTest/*.cpp)
+
+file(GLOB_RECURSE USEFULTOOL_FILES ${AnnService}/src/UsefulTool/*.cpp)
add_executable(ssdserving ${SSD_SERVING_HDR_FILES} ${SSD_SERVING_FILES})
-target_link_libraries(ssdserving SPTAGLibStatic ${Boost_LIBRARIES})
+add_executable(spfresh ${SPFRESH_HDR_FILES} ${SPFRESH_FILES})
+add_executable(keyvaluetest ${KEYVALUETEST_HDR_FILES} ${KEYVALUETEST_FILES})
+add_executable(usefultool ${USEFULTOOL_FILES})
+target_link_libraries(ssdserving SPTAGLibStatic ${Boost_LIBRARIES} ${RocksDB_LIBRARIES} ${uring_LIBRARIES} ${TBB_LIBRARIES} ${SPDK_LIBRARIES})
+target_link_libraries(spfresh SPTAGLibStatic ${Boost_LIBRARIES} ${RocksDB_LIBRARIES} ${uring_LIBRARIES} ${TBB_LIBRARIES} ${SPDK_LIBRARIES})
+target_link_libraries(keyvaluetest SPTAGLibStatic ${Boost_LIBRARIES} ${RocksDB_LIBRARIES} ${uring_LIBRARIES} ${TBB_LIBRARIES} ${SPDK_LIBRARIES})
+target_link_libraries(usefultool SPTAGLibStatic ${Boost_LIBRARIES} ${RocksDB_LIBRARIES} ${uring_LIBRARIES} ${TBB_LIBRARIES} ${SPDK_LIBRARIES})
target_compile_definitions(ssdserving PRIVATE _exe)
+target_compile_definitions(spfresh PRIVATE _exe)
+target_compile_definitions(keyvaluetest PRIVATE _exe)
+target_compile_definitions(usefultool PRIVATE _exe)
# for Test
add_library(ssdservingLib ${SSD_SERVING_HDR_FILES} ${SSD_SERVING_FILES})
-target_link_libraries(ssdservingLib SPTAGLibStatic ${Boost_LIBRARIES})
+target_link_libraries(ssdservingLib SPTAGLibStatic ${Boost_LIBRARIES} ${RocksDB_LIBRARIES} ${uring_LIBRARIES} ${TBB_LIBRARIES} ${SPDK_LIBRARIES})
find_package(MPI)
if (MPI_FOUND)
diff --git a/AnnService/Client.vcxproj b/AnnService/Client.vcxproj
index 9381af598..fdf22990f 100644
--- a/AnnService/Client.vcxproj
+++ b/AnnService/Client.vcxproj
@@ -42,15 +42,16 @@
Application
true
- v142
+ v143
MultiByte
Application
false
- v142
+ v143
true
MultiByte
+ Spectre
@@ -105,10 +106,13 @@
CoreLibrary.lib;SocketLib.lib;%(AdditionalDependencies)
+ true
_MBCS;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
stdcpp17
+ Level3
+ Guard
@@ -125,26 +129,26 @@
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/AnnService/CoreLibrary.vcxproj b/AnnService/CoreLibrary.vcxproj
index 1c73e1873..721d74d4b 100644
--- a/AnnService/CoreLibrary.vcxproj
+++ b/AnnService/CoreLibrary.vcxproj
@@ -1,14 +1,6 @@
-
- Debug
- Win32
-
-
- Release
- Win32
-
Debug
x64
@@ -27,43 +19,25 @@
-
- Application
- true
- v142
- MultiByte
-
-
- Application
- false
- v142
- true
- MultiByte
-
StaticLibrary
true
- v142
+ v143
MultiByte
StaticLibrary
false
- v142
+ v143
true
MultiByte
+ Spectre
-
-
-
-
-
-
@@ -78,15 +52,6 @@
$(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\
$(OutLibDir)
-
-
- Level3
- Disabled
- true
- true
- /Zc:twoPhase- %(AdditionalOptions)
-
-
Level3
@@ -97,25 +62,10 @@
_MBCS;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
Guard
ProgramDatabase
- /Zc:twoPhase- /Zc:__cplusplus %(AdditionalOptions)
+ /Zc:twoPhase- /Zc:__cplusplus /bigobj %(AdditionalOptions)
stdcpp17
-
-
- Level3
- MaxSpeed
- true
- true
- true
- true
- /Zc:twoPhase- %(AdditionalOptions)
-
-
- true
- true
-
-
Level3
@@ -126,8 +76,13 @@
true
true
_MBCS;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
- /Zc:twoPhase- /Zc:__cplusplus %(AdditionalOptions)
+ /Zc:twoPhase- /Zc:__cplusplus /bigobj %(AdditionalOptions)
stdcpp17
+ Speed
+ AdvancedVectorExtensions
+ AnySuitable
+ true
+ Fast
true
@@ -135,15 +90,18 @@
+
+
+
@@ -159,14 +117,21 @@
+
+
-
+
+
+
+
+
+
@@ -177,6 +142,7 @@
+
@@ -198,9 +164,14 @@
+
+
+
+
+
@@ -221,13 +192,5 @@
-
-
-
-
-
- This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
-
-
+
\ No newline at end of file
diff --git a/AnnService/CoreLibrary.vcxproj.filters b/AnnService/CoreLibrary.vcxproj.filters
index 7c749f00c..3f7b3fa11 100644
--- a/AnnService/CoreLibrary.vcxproj.filters
+++ b/AnnService/CoreLibrary.vcxproj.filters
@@ -85,6 +85,12 @@
Header Files\Core
+
+ Header Files\Core
+
+
+ Header Files\Core
+
Header Files\Core
@@ -196,7 +202,7 @@
Header Files\Core\SPANN
-
+
Header Files\Core\SPANN
@@ -220,6 +226,48 @@
Header Files\Core\Common
+
+ Header Files\Core\SPANN
+
+
+ Header Files\Core\SPANN
+
+
+ Header Files\Helper
+
+
+ Header Files\Core\Common
+
+
+ Header Files\Core\SPANN
+
+
+ Header Files\Core\SPANN
+
+
+ Header Files\Core\SPANN
+
+
+ Header Files\Core\SPANN
+
+
+ Header Files\Helper
+
+
+ Header Files\Core\Common
+
+
+ Header Files\Core\SPANN
+
+
+ Header Files\Core\SPANN
+
+
+ Header Files\Core\Common
+
+
+ Header Files\Core\Common
+
@@ -237,6 +285,12 @@
Source Files\Core
+
+ Source Files\Core
+
+
+ Source Files\Core
+
Source Files\Helper
@@ -282,14 +336,23 @@
Source Files\Core\Common
+
+ Source Files\Core\Common
+
+
+ Source Files\Core\Common
+
Source Files\Core\SPANN
Source Files\Helper
-
- Source Files\Core\Common
+
+ Source Files\Core\SPANN
+
+
+ Source Files\Core\SPANN
diff --git a/AnnService/GPUCoreLibrary.vcxproj b/AnnService/GPUCoreLibrary.vcxproj
index 2296ce147..e2f78a12a 100644
--- a/AnnService/GPUCoreLibrary.vcxproj
+++ b/AnnService/GPUCoreLibrary.vcxproj
@@ -15,6 +15,7 @@
GPUCoreLibrary
10.0
GPUCoreLibrary
+ $(CUDA_PATH)
@@ -22,14 +23,15 @@
StaticLibrary
true
MultiByte
- v142
+ v143
+ x64
StaticLibrary
false
true
MultiByte
- v142
+ v143
@@ -50,6 +52,7 @@
false
+ $(CudaToolkitIncludeDir);$(PublicIncludeDirectories)
@@ -60,6 +63,7 @@
true
4819;%(DisableSpecificWarnings)
MultiThreadedDebug
+ Default
true
@@ -70,10 +74,13 @@
64
compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;
WIN32;%(Defines)
- MTd
- /openmp /std:c++14 /Zc:__cplusplus /FS
+ InheritFromHost
+ /openmp /std:c++17 /Zc:__cplusplus /FS
true
InheritFromHost
+ %(Include)
+ Static
+ 64
@@ -86,6 +93,8 @@
true
4819;%(DisableSpecificWarnings)
MultiThreaded
+ Default
+ ProgramDatabase
true
@@ -97,7 +106,7 @@
64
compute_70,sm_70;compute_75,sm_75;compute_80,sm_80
- /openmp /std:c++14 /Zc:__cplusplus /FS
+ /openmp /std:c++17 /Zc:__cplusplus /FS
true
O2
MT
@@ -106,11 +115,13 @@
+
+
@@ -130,6 +141,8 @@
+
+
@@ -186,7 +199,7 @@
-
+
@@ -196,27 +209,5 @@
-
-
-
-
-
-
-
-
-
-
-
-
- This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
-
-
-
-
-
-
-
-
-
+
\ No newline at end of file
diff --git a/AnnService/GPUCoreLibrary.vcxproj.filters b/AnnService/GPUCoreLibrary.vcxproj.filters
index 79e050332..c9500e6d2 100644
--- a/AnnService/GPUCoreLibrary.vcxproj.filters
+++ b/AnnService/GPUCoreLibrary.vcxproj.filters
@@ -163,9 +163,6 @@
Header Files\Core\Common
-
- Header Files\Core\Common\cuda
-
Header Files\Core\Common\cuda
@@ -226,6 +223,12 @@
Header Files\Core\Common
+
+ Header Files\Core\Common\cuda
+
+
+ Header Files\Core\Common\cuda
+
@@ -270,6 +273,12 @@
Source Files\Helper
+
+ Source Files\Core\SPANN
+
+
+ Source Files\Core\Common
+
@@ -290,9 +299,6 @@
Source Files\Core\Common
-
- Source Files\Core\SPANN
-
Source Files\Core\Common
@@ -302,5 +308,8 @@
Source Files\Helper
+
+ Source Files\Core\Common
+
\ No newline at end of file
diff --git a/AnnService/GPUIndexBuilder.vcxproj b/AnnService/GPUIndexBuilder.vcxproj
index 1d9c0ddc7..a9ac5367e 100644
--- a/AnnService/GPUIndexBuilder.vcxproj
+++ b/AnnService/GPUIndexBuilder.vcxproj
@@ -22,14 +22,14 @@
Application
true
MultiByte
- v142
+ v143
Application
false
true
MultiByte
- v142
+ v143
@@ -71,7 +71,7 @@
compute_70,sm_70;compute_75,sm_75;compute_80,sm_80
WIN32;%(Defines)
MTd
- /openmp /std:c++14 /Zc:__cplusplus /FS
+ /openmp /std:c++17 /Zc:__cplusplus /FS
true
InheritFromHost
@@ -96,13 +96,14 @@
64
compute_70,sm_70;compute_75,sm_75;compute_80,sm_80
- /openmp /std:c++14 /Zc:__cplusplus /FS
+ /openmp /std:c++17 /Zc:__cplusplus /FS
true
O2
MT
+
@@ -112,27 +113,5 @@
-
-
-
-
-
-
-
-
-
-
-
-
- This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
-
-
-
-
-
-
-
-
-
+
\ No newline at end of file
diff --git a/AnnService/GPUSSDServing.vcxproj b/AnnService/GPUSSDServing.vcxproj
index 97d63367a..7b1e1bada 100644
--- a/AnnService/GPUSSDServing.vcxproj
+++ b/AnnService/GPUSSDServing.vcxproj
@@ -22,14 +22,14 @@
Application
true
MultiByte
- v142
+ v143
Application
false
true
MultiByte
- v142
+ v143
@@ -71,9 +71,11 @@
compute_70,sm_70;compute_75,sm_75;compute_80,sm_80
WIN32;%(Defines)
MTd
- /openmp /std:c++14 /Zc:__cplusplus /FS /D "_exe"
+ /openmp /std:c++17 /Zc:__cplusplus /FS /D "_exe"
true
InheritFromHost
+ Static
+ 64
@@ -97,7 +99,7 @@
64
compute_70,sm_70;compute_75,sm_75;compute_80,sm_80
- /openmp /std:c++14 /Zc:__cplusplus /FS /D "_exe"
+ /openmp /std:c++17 /Zc:__cplusplus /FS /D "_exe"
true
O2
MT
@@ -119,27 +121,5 @@
-
-
-
-
-
-
-
-
-
-
-
-
- This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
-
-
-
-
-
-
-
-
-
+
\ No newline at end of file
diff --git a/AnnService/GPUSSDServing.vcxproj.filters b/AnnService/GPUSSDServing.vcxproj.filters
index a8eb95b2f..41835709b 100644
--- a/AnnService/GPUSSDServing.vcxproj.filters
+++ b/AnnService/GPUSSDServing.vcxproj.filters
@@ -26,8 +26,6 @@
-
- Source Files
-
+
\ No newline at end of file
diff --git a/AnnService/IndexBuilder.vcxproj b/AnnService/IndexBuilder.vcxproj
index 0900590cb..deb392b01 100644
--- a/AnnService/IndexBuilder.vcxproj
+++ b/AnnService/IndexBuilder.vcxproj
@@ -42,15 +42,16 @@
Application
true
- v142
+ v143
MultiByte
Application
false
- v142
+ v143
true
MultiByte
+ Spectre
@@ -95,10 +96,12 @@
true
/Zc:twoPhase- %(AdditionalOptions)
stdcpp17
+ Guard
true
true
+ true
@@ -147,27 +150,5 @@
-
-
-
-
-
-
-
-
-
-
-
-
- This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
-
-
-
-
-
-
-
-
-
+
\ No newline at end of file
diff --git a/AnnService/IndexSearcher.vcxproj b/AnnService/IndexSearcher.vcxproj
index 6d1378379..77b43c579 100644
--- a/AnnService/IndexSearcher.vcxproj
+++ b/AnnService/IndexSearcher.vcxproj
@@ -43,15 +43,16 @@
Application
true
- v142
+ v143
MultiByte
Application
false
- v142
+ v143
true
MultiByte
+ Spectre
@@ -96,10 +97,12 @@
true
/Zc:twoPhase- %(AdditionalOptions)
stdcpp17
+ Guard
true
true
+ true
@@ -148,27 +151,5 @@
-
-
-
-
-
-
-
-
-
-
-
-
- This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
-
-
-
-
-
-
-
-
-
+
\ No newline at end of file
diff --git a/AnnService/Quantizer.vcxproj b/AnnService/Quantizer.vcxproj
index 942e55e1d..a725f173f 100644
--- a/AnnService/Quantizer.vcxproj
+++ b/AnnService/Quantizer.vcxproj
@@ -36,43 +36,25 @@
-
- Application
- true
- v142
- MultiByte
-
-
- Application
- false
- v142
- true
- MultiByte
-
Application
true
- v142
+ v143
MultiByte
Application
false
- v142
+ v143
true
MultiByte
+ Spectre
-
-
-
-
-
-
@@ -134,6 +116,7 @@
true
Guard
ProgramDatabase
+ stdcpp17
Console
@@ -152,6 +135,7 @@
true
/Zc:twoPhase- %(AdditionalOptions)
Guard
+ stdcpp17
Console
@@ -160,30 +144,9 @@
true
%(AdditionalLibraryDirectories)
CoreLibrary.lib;%(AdditionalDependencies)
+ true
-
-
-
-
-
-
-
-
-
-
-
-
- This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
-
-
-
-
-
-
-
-
-
+
\ No newline at end of file
diff --git a/AnnService/SSDServing.vcxproj b/AnnService/SSDServing.vcxproj
index f66765978..da8261b0d 100644
--- a/AnnService/SSDServing.vcxproj
+++ b/AnnService/SSDServing.vcxproj
@@ -1,4 +1,4 @@
-
+
@@ -53,15 +53,16 @@
StaticLibrary
true
- v142
+ v143
MultiByte
StaticLibrary
false
- v142
+ v143
true
MultiByte
+ Spectre
@@ -161,6 +162,12 @@
true
/Zc:twoPhase- %(AdditionalOptions)
stdcpp17
+ $(IntDir)$(ProjectName).pdb
+ Speed
+ AdvancedVectorExtensions
+ AnySuitable
+ true
+ Fast
Console
@@ -172,27 +179,5 @@
-
-
-
-
-
-
-
-
-
-
-
-
- This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
-
-
-
-
-
-
-
-
-
+
\ No newline at end of file
diff --git a/AnnService/Server.vcxproj b/AnnService/Server.vcxproj
index 3b38afe40..fefafe8b8 100644
--- a/AnnService/Server.vcxproj
+++ b/AnnService/Server.vcxproj
@@ -42,15 +42,16 @@
Application
true
- v142
+ v143
MultiByte
Application
false
- v142
+ v143
true
MultiByte
+ Spectre
@@ -105,10 +106,13 @@
CoreLibrary.lib;SocketLib.lib;%(AdditionalDependencies)
+ true
_MBCS;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
stdcpp17
+ Level3
+ Guard
@@ -133,26 +137,26 @@
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/AnnService/SocketLib.vcxproj b/AnnService/SocketLib.vcxproj
index d29ac8ede..4a28ae733 100644
--- a/AnnService/SocketLib.vcxproj
+++ b/AnnService/SocketLib.vcxproj
@@ -41,15 +41,16 @@
StaticLibrary
true
- v142
+ v143
MultiByte
StaticLibrary
false
- v142
+ v143
true
MultiByte
+ Spectre
@@ -88,6 +89,7 @@
_MBCS;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
stdcpp17
+ Level3
@@ -115,12 +117,12 @@
-
+
This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
-
+
\ No newline at end of file
diff --git a/AnnService/inc/Core/BKT/Index.h b/AnnService/inc/Core/BKT/Index.h
index f3131343c..dd9734bed 100644
--- a/AnnService/inc/Core/BKT/Index.h
+++ b/AnnService/inc/Core/BKT/Index.h
@@ -41,8 +41,32 @@ namespace SPTAG
public:
RebuildJob(COMMON::Dataset* p_data, COMMON::BKTree* p_tree, COMMON::RelativeNeighborhoodGraph* p_graph,
DistCalcMethod p_distMethod) : m_data(p_data), m_tree(p_tree), m_graph(p_graph), m_distMethod(p_distMethod) {}
+
+ void exec(void* p_workSpace, IAbortOperation* p_abort) {
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot support job.exec(workspace, abort)!\n");
+ }
+
void exec(IAbortOperation* p_abort) {
- m_tree->Rebuild(*m_data, m_distMethod, p_abort);
+ COMMON::BKTree newTrees(*m_tree);
+ newTrees.BuildTrees(*m_data, m_distMethod, 1, nullptr, nullptr, false, p_abort);
+
+ std::unique_lock lock(*(m_tree->m_lock));
+ const std::unordered_map* idmap = &(m_tree->GetSampleMap());
+ for (auto iter = idmap->begin(); iter != idmap->end(); iter++) {
+ if (iter->first < 0)
+ {
+ (*m_graph)[-1 - iter->first][m_graph->m_iNeighborhoodSize - 1] = -1;
+ }
+ }
+ m_tree->SwapTree(newTrees);
+
+ const std::unordered_map* newidmap = &(m_tree->GetSampleMap());
+ for (auto iter = newidmap->begin(); iter != newidmap->end(); iter++) {
+ if (iter->first < 0)
+ {
+ (*m_graph)[-1 - iter->first][m_graph->m_iNeighborhoodSize - 1] = -2 - iter->second;
+ }
+ }
}
private:
COMMON::Dataset* m_data;
@@ -72,7 +96,6 @@ namespace SPTAG
std::shared_timed_mutex m_dataDeleteLock;
COMMON::Labelset m_deletedID;
- std::unique_ptr> m_workSpacePool;
Helper::ThreadPool m_threadPool;
int m_iNumberOfThreads;
@@ -85,6 +108,7 @@ namespace SPTAG
int m_iNumberOfInitialDynamicPivots;
int m_iNumberOfOtherDynamicPivots;
int m_iHashTableExp;
+ std::unique_ptr> m_workSpaceFactory;
public:
Index()
@@ -98,6 +122,7 @@ namespace SPTAG
m_pSamples.SetName("Vector");
m_fComputeDistance = std::function(COMMON::DistanceCalcSelector(m_iDistCalcMethod));
m_iBaseSquare = (m_iDistCalcMethod == DistCalcMethod::Cosine) ? COMMON::Utils::GetBase() * COMMON::Utils::GetBase() : 1;
+ m_workSpaceFactory = std::make_unique>();
}
~Index() {}
@@ -122,6 +147,9 @@ namespace SPTAG
return 1.0f - xy / (sqrt(xx) * sqrt(yy));
}
inline float ComputeDistance(const void* pX, const void* pY) const { return m_fComputeDistance((const T*)pX, (const T*)pY, m_pSamples.C()); }
+ inline float GetDistance(const void* target, const SizeType idx) const {
+ return ComputeDistance(target, m_pSamples.At(idx));
+ }
inline const void* GetSample(const SizeType idx) const { return (void*)m_pSamples[idx]; }
inline bool ContainSample(const SizeType idx) const { return idx >= 0 && idx < m_deletedID.R() && !m_deletedID.Contains(idx); }
inline bool NeedRefine() const { return m_deletedID.Count() > (size_t)(GetNumSamples() * m_fDeletePercentageForRefine); }
@@ -154,9 +182,18 @@ namespace SPTAG
ErrorCode BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension, bool p_normalized = false, bool p_shareOwnership = false);
ErrorCode SearchIndex(QueryResult &p_query, bool p_searchDeleted = false) const;
+
+ std::shared_ptr GetIterator(const void* p_target, bool p_searchDeleted = false, std::function p_filterFunc = nullptr, int p_maxCheck = 0) const;
+ ErrorCode SearchIndexIterativeNext(QueryResult& p_query, COMMON::WorkSpace* workSpace, int p_batch, int& resultCount, bool p_isFirst, bool p_searchDeleted) const;
+ ErrorCode SearchIndexIterativeEnd(std::unique_ptr workSpace) const;
+ bool SearchIndexIterativeFromNeareast(QueryResult& p_query, COMMON::WorkSpace* p_space, bool p_isFirst, bool p_searchDeleted = false) const;
+ std::unique_ptr RentWorkSpace(int batch, std::function p_filterFunc = nullptr, int p_maxCheck = 0) const;
+ ErrorCode SearchIndexWithFilter(QueryResult& p_query, std::function filterFunc, int maxCheck = 0, bool p_searchDeleted = false) const;
ErrorCode RefineSearchIndex(QueryResult &p_query, bool p_searchDeleted = false) const;
ErrorCode SearchTree(QueryResult &p_query) const;
ErrorCode AddIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension, std::shared_ptr p_metadataSet, bool p_withMetaIndex = false, bool p_normalized = false);
+ ErrorCode AddIndexIdx(SizeType begin, SizeType end);
+ ErrorCode AddIndexId(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension, int& beginHead, int& endHead);
ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum);
ErrorCode DeleteIndex(const SizeType& p_id);
@@ -164,11 +201,42 @@ namespace SPTAG
std::string GetParameter(const char* p_param, const char* p_section = nullptr) const;
ErrorCode UpdateIndex();
- ErrorCode RefineIndex(const std::vector>& p_indexStreams, IAbortOperation* p_abort);
+ ErrorCode RefineIndex(const std::vector> &p_indexStreams,
+ IAbortOperation *p_abort, std::vector *p_mapping);
ErrorCode RefineIndex(std::shared_ptr& p_newIndex);
+ ErrorCode SetWorkSpaceFactory(std::unique_ptr> up_workSpaceFactory)
+ {
+ SPTAG::COMMON::IWorkSpaceFactory* raw_generic_ptr = up_workSpaceFactory.release();
+ if (!raw_generic_ptr) return ErrorCode::Fail;
+
+
+ SPTAG::COMMON::IWorkSpaceFactory* raw_specialized_ptr = dynamic_cast*>(raw_generic_ptr);
+ if (!raw_specialized_ptr)
+ {
+ delete raw_generic_ptr;
+ return ErrorCode::Fail;
+ }
+ else
+ {
+ m_workSpaceFactory = std::unique_ptr>(raw_specialized_ptr);
+ return ErrorCode::Success;
+ }
+ }
+
+ virtual ErrorCode Check() override;
+ virtual std::string GetPriorityID(int queryID) const override { return m_pTrees.GetPriorityID(m_pSamples, queryID, m_fComputeDistance); }
private:
- void SearchIndex(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, bool p_searchDeleted, bool p_searchDuplicated) const;
+
+ int SearchIndexIterative(COMMON::QueryResultSet& p_query, COMMON::WorkSpace& p_space, bool p_isFirst, int batch, bool p_searchDeleted, bool p_searchDuplicated) const;
+
+ template &, SizeType, float), bool (*checkFilter)(const std::shared_ptr &, SizeType, std::function)>
+ int SearchIterative(COMMON::QueryResultSet& p_query,
+ COMMON::WorkSpace& p_space, bool p_isFirst, int batch) const;
+
+ void SearchIndex(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, bool p_searchDeleted, bool p_searchDuplicated, std::function filterFunc = nullptr) const;
+ template &, SizeType, float), bool(*checkFilter)(const std::shared_ptr&, SizeType, std::function)>
+ void Search(COMMON::QueryResultSet& p_query, COMMON::WorkSpace& p_space, std::function filterFunc) const;
};
} // namespace BKT
} // namespace SPTAG
diff --git a/AnnService/inc/Core/Common.h b/AnnService/inc/Core/Common.h
index 1bad007dd..dce9e4d51 100644
--- a/AnnService/inc/Core/Common.h
+++ b/AnnService/inc/Core/Common.h
@@ -1,9 +1,17 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#ifndef _SPTAG_CORE_COMMONDEFS_H_
#define _SPTAG_CORE_COMMONDEFS_H_
+#ifdef DEBUG
+#define IF_DEBUG(statement) statement
+#define IF_NDEBUG(statement)
+#else
+#define IF_DEBUG(statement)
+#define IF_NDEBUG(statement) statement
+#endif
+
#include
#include
#include
@@ -12,8 +20,8 @@
#include
#include
#include
+#include
#include "inc/Helper/Logging.h"
-#include "inc/Helper/DiskIO.h"
#ifndef _MSC_VER
#include
@@ -40,6 +48,13 @@ inline bool fileexists(const char* path) {
struct stat info;
return stat(path, &info) == 0 && (info.st_mode & S_IFDIR) == 0;
}
+/*
+inline int64_t filesize(const char* path) {
+ struct stat info;
+ if (stat(path, &info) == 0) return info.st_blocks * info.st_blksize;
+ return 0;
+}
+*/
template
inline T min(T a, T b) {
@@ -80,9 +95,45 @@ inline bool fileexists(const TCHAR* path) {
auto dwAttr = GetFileAttributes(path);
return (dwAttr != INVALID_FILE_ATTRIBUTES) && (dwAttr & FILE_ATTRIBUTE_DIRECTORY) == 0;
}
+/*
+inline int64_t filesize(const char* path) {
+ WIN32_FILE_ATTRIBUTE_DATA fad;
+ if (!GetFileAttributesEx(path, GetFileExInfoStandard, &fad)) return -1;
+ LARGE_INTEGER size;
+ size.HighPart = fad.nFileSizeHigh;
+ size.LowPart = fad.nFileSizeLow;
+ return size.QuadPart;
+}
+*/
+
#define mkdir(a) CreateDirectory(a, NULL)
+
+#ifndef max
+#define max(a,b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#ifndef min
+#define min(a,b) (((a) < (b)) ? (a) : (b))
+#endif
+
+FORCEINLINE
+char
+InterlockedCompareExchange(
+ _Inout_ _Interlocked_operand_ char volatile* Destination,
+ _In_ char Exchange,
+ _In_ char Comperand
+)
+{
+ return (char)_InterlockedCompareExchange8(Destination, Exchange, Comperand);
+}
+
#endif
+inline int64_t filesize(const char* path) {
+ std::filesystem::path p{path};
+ return std::filesystem::file_size(p);
+}
+
namespace SPTAG
{
#if (__cplusplus < 201703L)
@@ -90,115 +141,121 @@ namespace SPTAG
#define ALIGN_FREE(ptr) _mm_free(ptr)
#define PAGE_ALLOC(size) _mm_malloc(size, 512)
#define PAGE_FREE(ptr) _mm_free(ptr)
+#define BLOCK_ALLOC(size, val) _mm_malloc(size, val)
+#define BLOCK_FREE(ptr, val) _mm_free(ptr)
#else
#define ALIGN_ALLOC(size) ::operator new(size, (std::align_val_t)32)
#define ALIGN_FREE(ptr) ::operator delete(ptr, (std::align_val_t)32)
#define PAGE_ALLOC(size) ::operator new(size, (std::align_val_t)512)
#define PAGE_FREE(ptr) ::operator delete(ptr, (std::align_val_t)512)
+#define BLOCK_ALLOC(size, val) ::operator new(size, (std::align_val_t)val)
+#define BLOCK_FREE(ptr, val) ::operator delete(ptr, (std::align_val_t)val)
#endif
-typedef std::int32_t SizeType;
-typedef std::int32_t DimensionType;
+#define ALIGN_ROUND(size) ((size) + 31) / 32 * 32
+#define ROUND_UP(size, val) (size + val - 1) / val * val
-const SizeType MaxSize = (std::numeric_limits::max)();
-const float MinDist = (std::numeric_limits::min)();
-const float MaxDist = (std::numeric_limits::max)() / 10;
-const float Epsilon = 0.000001f;
-const std::uint16_t PageSize = 4096;
-const int PageSizeEx = 12;
+ typedef std::int32_t SizeType;
+ typedef std::int32_t DimensionType;
-extern std::mt19937 rg;
-
-extern std::shared_ptr(*f_createIO)();
+ const SizeType MaxSize = (std::numeric_limits::max)();
+ const float MinDist = (std::numeric_limits::min)();
+ const float MaxDist = (std::numeric_limits::max)() / 10;
+ const float Epsilon = 0.000001f;
+ const std::uint16_t PageSize = 4096;
+ const int PageSizeEx = 12;
+ const std::chrono::microseconds MaxTimeout = (std::chrono::microseconds::max)();
#define IOBINARY(ptr, func, bytes, ...) if (ptr->func(bytes, __VA_ARGS__) != bytes) return ErrorCode::DiskIOFail
#define IOSTRING(ptr, func, ...) if (ptr->func(__VA_ARGS__) == 0) return ErrorCode::DiskIOFail
-extern std::shared_ptr g_pLogger;
+ extern Helper::LoggerHolder& GetLoggerHolder();
+ extern std::shared_ptr GetLogger();
+ extern void SetLogger(std::shared_ptr);
-#define LOG(l, ...) g_pLogger->Logging("SPTAG", l, __FILE__, __LINE__, __FUNCTION__, __VA_ARGS__)
+#define SPTAGLIB_LOG(l, ...) GetLogger()->Logging("SPTAG", l, __FILE__, __LINE__, __FUNCTION__, __VA_ARGS__)
-class MyException : public std::exception
-{
-private:
- std::string Exp;
-public:
- MyException(std::string e) { Exp = e; }
+ class MyException : public std::exception
+ {
+ private:
+ std::string Exp;
+ public:
+ MyException(std::string e) { Exp = e; }
#ifdef _MSC_VER
- const char* what() const { return Exp.c_str(); }
+ const char* what() const { return Exp.c_str(); }
#else
- const char* what() const noexcept { return Exp.c_str(); }
+ const char* what() const noexcept { return Exp.c_str(); }
#endif
-};
+ };
-enum class ErrorCode : std::uint16_t
-{
+ enum class ErrorCode : std::uint16_t
+ {
#define DefineErrorCode(Name, Value) Name = Value,
#include "DefinitionList.h"
#undef DefineErrorCode
- Undefined
-};
-static_assert(static_cast(ErrorCode::Undefined) != 0, "Empty ErrorCode!");
+ Undefined
+ };
+ static_assert(static_cast(ErrorCode::Undefined) != 0, "Empty ErrorCode!");
-enum class DistCalcMethod : std::uint8_t
-{
+ enum class DistCalcMethod : std::uint8_t
+ {
#define DefineDistCalcMethod(Name) Name,
#include "DefinitionList.h"
#undef DefineDistCalcMethod
- Undefined
-};
-static_assert(static_cast(DistCalcMethod::Undefined) != 0, "Empty DistCalcMethod!");
+ Undefined
+ };
+ static_assert(static_cast(DistCalcMethod::Undefined) != 0, "Empty DistCalcMethod!");
-enum class VectorValueType : std::uint8_t
-{
+ enum class VectorValueType : std::uint8_t
+ {
#define DefineVectorValueType(Name, Type) Name,
#include "DefinitionList.h"
#undef DefineVectorValueType
- Undefined
-};
-static_assert(static_cast(VectorValueType::Undefined) != 0, "Empty VectorValueType!");
+ Undefined
+ };
+ static_assert(static_cast(VectorValueType::Undefined) != 0, "Empty VectorValueType!");
-enum class IndexAlgoType : std::uint8_t
-{
+ enum class IndexAlgoType : std::uint8_t
+ {
#define DefineIndexAlgo(Name) Name,
#include "DefinitionList.h"
#undef DefineIndexAlgo
- Undefined
-};
-static_assert(static_cast(IndexAlgoType::Undefined) != 0, "Empty IndexAlgoType!");
+ Undefined
+ };
+ static_assert(static_cast(IndexAlgoType::Undefined) != 0, "Empty IndexAlgoType!");
-enum class VectorFileType : std::uint8_t
-{
+ enum class VectorFileType : std::uint8_t
+ {
#define DefineVectorFileType(Name) Name,
#include "DefinitionList.h"
#undef DefineVectorFileType
- Undefined
-};
-static_assert(static_cast(VectorFileType::Undefined) != 0, "Empty VectorFileType!");
+ Undefined
+ };
+ static_assert(static_cast(VectorFileType::Undefined) != 0, "Empty VectorFileType!");
-enum class TruthFileType : std::uint8_t
-{
+ enum class TruthFileType : std::uint8_t
+ {
#define DefineTruthFileType(Name) Name,
#include "DefinitionList.h"
#undef DefineTruthFileType
- Undefined
-};
-static_assert(static_cast(TruthFileType::Undefined) != 0, "Empty TruthFileType!");
+ Undefined
+ };
+ static_assert(static_cast(TruthFileType::Undefined) != 0, "Empty TruthFileType!");
-template
-constexpr VectorValueType GetEnumValueType()
-{
- return VectorValueType::Undefined;
-}
+ template
+ constexpr VectorValueType GetEnumValueType()
+ {
+ return VectorValueType::Undefined;
+ }
#define DefineVectorValueType(Name, Type) \
@@ -212,10 +269,10 @@ constexpr VectorValueType GetEnumValueType() \
#undef DefineVectorValueType
-inline std::size_t GetValueTypeSize(VectorValueType p_valueType)
-{
- switch (p_valueType)
+ inline std::size_t GetValueTypeSize(VectorValueType p_valueType)
{
+ switch (p_valueType)
+ {
#define DefineVectorValueType(Name, Type) \
case VectorValueType::Name: \
return sizeof(Type); \
@@ -223,22 +280,52 @@ inline std::size_t GetValueTypeSize(VectorValueType p_valueType)
#include "DefinitionList.h"
#undef DefineVectorValueType
- default:
- break;
- }
+ default:
+ break;
+ }
- return 0;
-}
+ return 0;
+ }
-enum class QuantizerType : std::uint8_t
-{
+ enum class QuantizerType : std::uint8_t
+ {
#define DefineQuantizerType(Name, Type) Name,
#include "DefinitionList.h"
#undef DefineQuantizerType
- Undefined
-};
-static_assert(static_cast(QuantizerType::Undefined) != 0, "Empty QuantizerType!");
+ Undefined
+ };
+ static_assert(static_cast(QuantizerType::Undefined) != 0, "Empty QuantizerType!");
+
+ enum class NumaStrategy : std::uint8_t
+ {
+#define DefineNumaStrategy(Name) Name,
+#include "DefinitionList.h"
+#undef DefineNumaStrategy
+
+ Undefined
+ };
+ static_assert(static_cast(NumaStrategy::Undefined) != 0, "Empty NumaStrategy!");
+
+ enum class OrderStrategy : std::uint8_t
+ {
+#define DefineOrderStrategy(Name) Name,
+#include "DefinitionList.h"
+#undef DefineOrderStrategy
+
+ Undefined
+ };
+ static_assert(static_cast(OrderStrategy::Undefined) != 0, "Empty OrderStrategy!");
+
+ enum class Storage : std::uint8_t
+ {
+#define DefineStorage(Name) Name,
+#include "DefinitionList.h"
+#undef DefineStorage
+
+ Undefined
+ };
+ static_assert(static_cast(Storage::Undefined) != 0, "Empty Storage!");
} // namespace SPTAG
diff --git a/AnnService/inc/Core/Common/BKTree.h b/AnnService/inc/Core/Common/BKTree.h
index f920dd273..00019d97c 100644
--- a/AnnService/inc/Core/Common/BKTree.h
+++ b/AnnService/inc/Core/Common/BKTree.h
@@ -7,8 +7,8 @@
#include
#include
#include
+#include
#include
-
#include "inc/Core/VectorIndex.h"
#include "CommonUtils.h"
@@ -37,7 +37,7 @@ namespace SPTAG
int _DK;
DimensionType _D;
DimensionType _RD;
- int _T;
+ int _TH;
DistCalcMethod _M;
T* centers;
T* newTCenters;
@@ -52,26 +52,25 @@ namespace SPTAG
std::function fComputeDistance;
const std::shared_ptr& m_pQuantizer;
- KmeansArgs(int k, DimensionType dim, SizeType datasize, int threadnum, DistCalcMethod distMethod, const std::shared_ptr& quantizer = nullptr) : _K(k), _DK(k), _D(dim), _RD(dim), _T(threadnum), _M(distMethod), m_pQuantizer(quantizer){
+ KmeansArgs(int k, DimensionType dim, SizeType datasize, int threadnum, DistCalcMethod distMethod, const std::shared_ptr& quantizer = nullptr) : _K(k), _DK(k), _D(dim), _RD(dim), _TH(threadnum), _M(distMethod), m_pQuantizer(quantizer) {
if (m_pQuantizer) {
_RD = m_pQuantizer->ReconstructDim();
fComputeDistance = m_pQuantizer->DistanceCalcSelector(distMethod);
}
- else
- {
+ else {
fComputeDistance = COMMON::DistanceCalcSelector(distMethod);
}
centers = (T*)ALIGN_ALLOC(sizeof(T) * _K * _D);
newTCenters = (T*)ALIGN_ALLOC(sizeof(T) * _K * _D);
counts = new SizeType[_K];
- newCenters = new float[_T * _K * _RD];
- newCounts = new SizeType[_T * _K];
+ newCenters = new float[_TH * _K * _RD];
+ newCounts = new SizeType[_TH * _K];
label = new int[datasize];
- clusterIdx = new SizeType[_T * _K];
- clusterDist = new float[_T * _K];
+ clusterIdx = new SizeType[_TH * _K];
+ clusterDist = new float[_TH * _K];
weightedCounts = new float[_K];
- newWeightedCounts = new float[_T * _K];
+ newWeightedCounts = new float[_TH * _K];
}
~KmeansArgs() {
@@ -88,16 +87,16 @@ namespace SPTAG
}
inline void ClearCounts() {
- memset(newCounts, 0, sizeof(SizeType) * _T * _K);
- memset(newWeightedCounts, 0, sizeof(float) * _T * _K);
+ memset(newCounts, 0, sizeof(SizeType) * _TH * _K);
+ memset(newWeightedCounts, 0, sizeof(float) * _TH * _K);
}
inline void ClearCenters() {
- memset(newCenters, 0, sizeof(float) * _T * _K * _RD);
+ memset(newCenters, 0, sizeof(float) * _TH * _K * _RD);
}
inline void ClearDists(float dist) {
- for (int i = 0; i < _T * _K; i++) {
+ for (int i = 0; i < _TH * _K; i++) {
clusterIdx[i] = -1;
clusterDist[i] = dist;
}
@@ -109,7 +108,7 @@ namespace SPTAG
for (int k = 1; k < _K; k++) pos[k] = pos[k - 1] + newCounts[k - 1];
for (int k = 0; k < _K; k++) {
- if (newCounts[k] == 0) continue;
+ if (counts[k] == 0) continue;
SizeType i = pos[k];
while (newCounts[k] > 0) {
SizeType swapid = pos[label[i]] + newCounts[label[i]] - 1;
@@ -157,7 +156,7 @@ namespace SPTAG
}
if (maxcluster != -1 && (args.clusterIdx[maxcluster] < 0 || args.clusterIdx[maxcluster] >= data.R()))
- LOG(Helper::LogLevel::LL_Debug, "maxcluster:%d(%d) Error dist:%f\n", maxcluster, args.newCounts[maxcluster], args.clusterDist[maxcluster]);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Debug, "maxcluster:%d(%d) Error dist:%f\n", maxcluster, args.newCounts[maxcluster], args.clusterDist[maxcluster]);
float diff = 0;
std::vector reconstructVector(args._RD, 0);
@@ -179,19 +178,20 @@ namespace SPTAG
for (DimensionType j = 0; j < args._RD; j++) {
currCenters[j] /= args.counts[k];
}
+
if (args._M == DistCalcMethod::Cosine) {
COMMON::Utils::Normalize(currCenters, args._RD, COMMON::Utils::GetBase());
}
-
+
if (args.m_pQuantizer) {
for (DimensionType j = 0; j < args._RD; j++) reconstructVector[j] = (R)(currCenters[j]);
args.m_pQuantizer->QuantizeVector(reconstructVector.data(), (uint8_t*)TCenter);
}
else {
- for (DimensionType j = 0; j < args._RD; j++) TCenter[j] = (T)(currCenters[j]);
+ for (DimensionType j = 0; j < args._D; j++) TCenter[j] = (T)(currCenters[j]);
}
}
- diff += args.fComputeDistance(args.centers + k*args._D, TCenter, args._D);
+ diff += DistanceUtils::ComputeDistance(TCenter, args.centers + k * args._D, args._D, DistCalcMethod::L2);
}
return diff;
}
@@ -222,62 +222,84 @@ namespace SPTAG
const SizeType first, const SizeType last, KmeansArgs& args,
const bool updateCenters, float lambda) {
float currDist = 0;
- SizeType subsize = (last - first - 1) / args._T + 1;
+ SizeType subsize = (last - first - 1) / args._TH + 1;
-#pragma omp parallel for num_threads(args._T) shared(data, indices) reduction(+:currDist)
- for (int tid = 0; tid < args._T; tid++)
+ std::vector mythreads;
+ mythreads.reserve(args._TH);
+ for (int tid = 0; tid < args._TH; tid++)
{
- SizeType istart = first + tid * subsize;
- SizeType iend = min(first + (tid + 1) * subsize, last);
- SizeType *inewCounts = args.newCounts + tid * args._K;
- float *inewCenters = args.newCenters + tid * args._K * args._RD;
- SizeType * iclusterIdx = args.clusterIdx + tid * args._K;
- float * iclusterDist = args.clusterDist + tid * args._K;
- float * iweightedCounts = args.newWeightedCounts + tid * args._K;
- float idist = 0;
- R* reconstructVector = nullptr;
- if (args.m_pQuantizer) reconstructVector = (R*)ALIGN_ALLOC(args.m_pQuantizer->ReconstructSize());
-
- for (SizeType i = istart; i < iend; i++) {
- int clusterid = 0;
- float smallestDist = MaxDist;
- for (int k = 0; k < args._DK; k++) {
- float dist = args.fComputeDistance(data[indices[i]], args.centers + k*args._D, args._D) + lambda*args.counts[k];
- if (dist > -MaxDist && dist < smallestDist) {
- clusterid = k; smallestDist = dist;
- }
- }
- args.label[i] = clusterid;
- inewCounts[clusterid]++;
- iweightedCounts[clusterid] += smallestDist;
- idist += smallestDist;
- if (updateCenters) {
- if (args.m_pQuantizer) {
- args.m_pQuantizer->ReconstructVector((const uint8_t*)data[indices[i]], reconstructVector);
- }
- else {
- reconstructVector = (R*)data[indices[i]];
+ mythreads.emplace_back([tid, first, last, updateCenters, lambda, subsize, &data, &indices, &args, &currDist]() {
+ SizeType istart = first + tid * subsize;
+ SizeType iend = min(first + (tid + 1) * subsize, last);
+ SizeType *inewCounts = args.newCounts + tid * args._K;
+ float *inewCenters = args.newCenters + tid * args._K * args._RD;
+ SizeType *iclusterIdx = args.clusterIdx + tid * args._K;
+ float *iclusterDist = args.clusterDist + tid * args._K;
+ float *iweightedCounts = args.newWeightedCounts + tid * args._K;
+ float idist = 0;
+ R *reconstructVector = nullptr;
+ if (args.m_pQuantizer)
+ reconstructVector = (R *)ALIGN_ALLOC(args.m_pQuantizer->ReconstructSize());
+
+ for (SizeType i = istart; i < iend; i++)
+ {
+ int clusterid = 0;
+ float smallestDist = MaxDist;
+ for (int k = 0; k < args._DK; k++)
+ {
+ float dist = args.fComputeDistance(data[indices[i]], args.centers + k * args._D, args._D) +
+ lambda * args.counts[k];
+ if (dist > -MaxDist && dist < smallestDist)
+ {
+ clusterid = k;
+ smallestDist = dist;
+ }
}
- float* center = inewCenters + clusterid*args._RD;
- for (DimensionType j = 0; j < args._RD; j++) center[j] += reconstructVector[j];
-
- if (smallestDist > iclusterDist[clusterid]) {
- iclusterDist[clusterid] = smallestDist;
- iclusterIdx[clusterid] = indices[i];
+ args.label[i] = clusterid;
+ inewCounts[clusterid]++;
+ iweightedCounts[clusterid] += smallestDist;
+ idist += smallestDist;
+ if (updateCenters)
+ {
+ if (args.m_pQuantizer)
+ {
+ args.m_pQuantizer->ReconstructVector((const uint8_t *)data[indices[i]],
+ reconstructVector);
+ }
+ else
+ {
+ reconstructVector = (R *)data[indices[i]];
+ }
+ float *center = inewCenters + clusterid * args._RD;
+ for (DimensionType j = 0; j < args._RD; j++)
+ center[j] += reconstructVector[j];
+
+ if (smallestDist > iclusterDist[clusterid])
+ {
+ iclusterDist[clusterid] = smallestDist;
+ iclusterIdx[clusterid] = indices[i];
+ }
}
- }
- else {
- if (smallestDist <= iclusterDist[clusterid]) {
- iclusterDist[clusterid] = smallestDist;
- iclusterIdx[clusterid] = indices[i];
+ else
+ {
+ if (smallestDist <= iclusterDist[clusterid])
+ {
+ iclusterDist[clusterid] = smallestDist;
+ iclusterIdx[clusterid] = indices[i];
+ }
}
}
- }
- if (args.m_pQuantizer) ALIGN_FREE(reconstructVector);
- currDist += idist;
+ if (args.m_pQuantizer)
+ ALIGN_FREE(reconstructVector);
+ COMMON::Utils::atomic_float_add(&currDist, idist);
+ });
}
-
- for (int i = 1; i < args._T; i++) {
+ for (auto &t : mythreads)
+ {
+ t.join();
+ }
+ mythreads.clear();
+ for (int i = 1; i < args._TH; i++) {
for (int k = 0; k < args._DK; k++) {
args.newCounts[k] += args.newCounts[i * args._K + k];
args.newWeightedCounts[k] += args.newWeightedCounts[i * args._K + k];
@@ -285,7 +307,7 @@ namespace SPTAG
}
if (updateCenters) {
- for (int i = 1; i < args._T; i++) {
+ for (int i = 1; i < args._TH; i++) {
float* currCenter = args.newCenters + i*args._K*args._RD;
for (size_t j = 0; j < ((size_t)args._DK) * args._RD; j++) args.newCenters[j] += currCenter[j];
@@ -298,7 +320,7 @@ namespace SPTAG
}
}
else {
- for (int i = 1; i < args._T; i++) {
+ for (int i = 1; i < args._TH; i++) {
for (int k = 0; k < args._DK; k++) {
if (args.clusterIdx[i*args._K + k] != -1 && args.clusterDist[i*args._K + k] <= args.clusterDist[k]) {
args.clusterDist[k] = args.clusterDist[i*args._K + k];
@@ -346,6 +368,7 @@ namespace SPTAG
float adjustedLambda = InitCenters(data, indices, first, last, args, samples, 3);
if (abort && abort->ShouldAbort()) return 0;
+ std::mt19937 rg;
SizeType batchEnd = min(first + samples, last);
float currDiff, currDist, minClusterDist = MaxDist;
int noImprovement = 0;
@@ -374,12 +397,12 @@ namespace SPTAG
for (int k = 0; k < args._DK; k++) {
log += std::to_string(args.counts[k]) + " ";
}
- LOG(Helper::LogLevel::LL_Info, "iter %d dist:%f lambda:(%f,%f) counts:%s\n", iter, currDist, originalLambda, adjustedLambda, log.c_str());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "iter %d dist:%f lambda:(%f,%f) counts:%s\n", iter, currDist, originalLambda, adjustedLambda, log.c_str());
}
*/
currDiff = RefineCenters(data, args);
- //if (debug) LOG(Helper::LogLevel::LL_Info, "iter %d dist:%f diff:%f\n", iter, currDist, currDiff);
+ //if (debug) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "iter %d dist:%f diff:%f\n", iter, currDist, currDiff);
if (abort && abort->ShouldAbort()) return 0;
if (currDiff < 1e-3 || noImprovement >= 5) break;
@@ -406,7 +429,7 @@ namespace SPTAG
if (args.counts[i] > 0) availableClusters++;
}
CountStd = sqrt(CountStd / args._DK) / CountAvg;
- if (debug) LOG(Helper::LogLevel::LL_Info, "Lambda:min(%g,%g) Max:%d Min:%d Avg:%f Std/Avg:%f Dist:%f NonZero/Total:%d/%d\n", originalLambda, adjustedLambda, maxCount, minCount, CountAvg, CountStd, currDist, availableClusters, args._DK);
+ if (debug) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Lambda:min(%g,%g) Max:%d Min:%d Avg:%f Std/Avg:%f Dist:%f NonZero/Total:%d/%d\n", originalLambda, adjustedLambda, maxCount, minCount, CountAvg, CountStd, currDist, availableClusters, args._DK);
return CountStd;
}
@@ -418,7 +441,7 @@ namespace SPTAG
float bestLambdaFactor = 100.0f, bestCountStd = (std::numeric_limits::max)();
for (float lambdaFactor = 0.001f; lambdaFactor <= 1000.0f + 1e-3; lambdaFactor *= 10) {
- float CountStd;
+ float CountStd = 0.0;
if (args.m_pQuantizer)
{
switch (args.m_pQuantizer->GetReconstructType())
@@ -458,7 +481,7 @@ break;
}
}
*/
- LOG(Helper::LogLevel::LL_Info, "Best Lambda Factor:%f\n", bestLambdaFactor);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Best Lambda Factor:%f\n", bestLambdaFactor);
return bestLambdaFactor;
}
@@ -509,7 +532,8 @@ break;
m_iSamples(other.m_iSamples),
m_fBalanceFactor(other.m_fBalanceFactor),
m_lock(new std::shared_timed_mutex),
- m_pQuantizer(other.m_pQuantizer) {}
+ m_pQuantizer(other.m_pQuantizer),
+ m_bfs(0) {}
~BKTree() {}
inline const BKTNode& operator[](SizeType index) const { return m_pTreeRoots[index]; }
@@ -524,6 +548,13 @@ break;
inline const std::unordered_map& GetSampleMap() const { return m_pSampleCenterMap; }
+ inline void SwapTree(BKTree& newTrees)
+ {
+ m_pTreeRoots.swap(newTrees.m_pTreeRoots);
+ m_pTreeStart.swap(newTrees.m_pTreeStart);
+ m_pSampleCenterMap.swap(newTrees.m_pSampleCenterMap);
+ }
+
template
void Rebuild(const Dataset& data, DistCalcMethod distMethod, IAbortOperation* abort)
{
@@ -560,6 +591,7 @@ break;
if (m_fBalanceFactor < 0) m_fBalanceFactor = DynamicFactorSelect(data, localindices, 0, (SizeType)localindices.size(), args, m_iSamples);
+ std::mt19937 rg;
m_pSampleCenterMap.clear();
for (char i = 0; i < m_iTreeNumber; i++)
{
@@ -567,12 +599,15 @@ break;
m_pTreeStart.push_back((SizeType)m_pTreeRoots.size());
m_pTreeRoots.emplace_back((SizeType)localindices.size());
- LOG(Helper::LogLevel::LL_Info, "Start to build BKTree %d\n", i + 1);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start to build BKTree %d\n", i + 1);
ss.push(BKTStackItem(m_pTreeStart[i], 0, (SizeType)localindices.size(), true));
while (!ss.empty()) {
- if (abort && abort->ShouldAbort()) return;
-
+ if (abort && abort->ShouldAbort())
+ {
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "Abort!!!\n");
+ return;
+ }
BKTStackItem item = ss.top(); ss.pop();
m_pTreeRoots[item.index].childStart = (SizeType)m_pTreeRoots.size();
if (item.last - item.first <= m_iBKTLeafSize) {
@@ -615,7 +650,7 @@ break;
m_pTreeRoots[item.index].childEnd = (SizeType)m_pTreeRoots.size();
}
m_pTreeRoots.emplace_back(-1);
- LOG(Helper::LogLevel::LL_Info, "%d BKTree built, %zu %zu\n", i + 1, m_pTreeRoots.size() - m_pTreeStart[i], localindices.size());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "%d BKTree built, %zu %zu\n", i + 1, m_pTreeRoots.size() - m_pTreeStart[i], localindices.size());
}
}
@@ -633,13 +668,13 @@ break;
SizeType treeNodeSize = (SizeType)m_pTreeRoots.size();
IOBINARY(p_out, WriteBinary, sizeof(treeNodeSize), (char*)&treeNodeSize);
IOBINARY(p_out, WriteBinary, sizeof(BKTNode) * treeNodeSize, (char*)m_pTreeRoots.data());
- LOG(Helper::LogLevel::LL_Info, "Save BKT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save BKT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
return ErrorCode::Success;
}
ErrorCode SaveTrees(std::string sTreeFileName) const
{
- LOG(Helper::LogLevel::LL_Info, "Save BKT to %s\n", sTreeFileName.c_str());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save BKT to %s\n", sTreeFileName.c_str());
auto ptr = f_createIO();
if (ptr == nullptr || !ptr->Initialize(sTreeFileName.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile;
return SaveTrees(ptr);
@@ -658,7 +693,7 @@ break;
m_pTreeRoots.resize(treeNodeSize);
memcpy(m_pTreeRoots.data(), pBKTMemFile, sizeof(BKTNode) * treeNodeSize);
if (m_pTreeRoots.size() > 0 && m_pTreeRoots.back().centerid != -1) m_pTreeRoots.emplace_back(-1);
- LOG(Helper::LogLevel::LL_Info, "Load BKT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load BKT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
return ErrorCode::Success;
}
@@ -674,13 +709,13 @@ break;
IOBINARY(p_input, ReadBinary, sizeof(BKTNode) * treeNodeSize, (char*)m_pTreeRoots.data());
if (m_pTreeRoots.size() > 0 && m_pTreeRoots.back().centerid != -1) m_pTreeRoots.emplace_back(-1);
- LOG(Helper::LogLevel::LL_Info, "Load BKT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load BKT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
return ErrorCode::Success;
}
ErrorCode LoadTrees(std::string sTreeFileName)
{
- LOG(Helper::LogLevel::LL_Info, "Load BKT From %s\n", sTreeFileName.c_str());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load BKT From %s\n", sTreeFileName.c_str());
auto ptr = f_createIO();
if (ptr == nullptr || !ptr->Initialize(sTreeFileName.c_str(), std::ios::binary | std::ios::in)) return ErrorCode::FailedOpenFile;
return LoadTrees(ptr);
@@ -698,8 +733,12 @@ break;
int MaxBFSNodes = 100;
p_space.m_currBSPTQueue.Resize(MaxBFSNodes); p_space.m_nextBSPTQueue.Resize(MaxBFSNodes);
Heap* p_curr = &p_space.m_currBSPTQueue, * p_next = &p_space.m_nextBSPTQueue;
-
p_curr->Top().distance = 1e9;
+
+ for (SizeType begin = node.childStart; begin < node.childEnd; begin++) {
+ _mm_prefetch((const char*)(data[m_pTreeRoots[begin].centerid]), _MM_HINT_T0);
+ }
+
for (SizeType begin = node.childStart; begin < node.childEnd; begin++) {
SizeType index = m_pTreeRoots[begin].centerid;
float dist = fComputeDistance(p_query.GetQuantizedTarget(), data[index], data.C());
@@ -711,7 +750,7 @@ break;
}
}
- for (int level = 1; level < 2; level++) {
+ for (int level = 1; level <= m_bfs; level++) {
p_next->Top().distance = 1e9;
while (!p_curr->empty()) {
NodeDistPair tmp = p_curr->pop();
@@ -720,6 +759,9 @@ break;
p_space.m_SPTQueue.insert(tmp);
}
else {
+ for (SizeType begin = tnode.childStart; begin < tnode.childEnd; begin++) {
+ _mm_prefetch((const char*)(data[m_pTreeRoots[begin].centerid]), _MM_HINT_T0);
+ }
if (!p_space.CheckAndSet(tnode.centerid)) {
p_space.m_NGQueue.insert(NodeDistPair(tnode.centerid, tmp.distance));
}
@@ -743,6 +785,9 @@ break;
}
}
else {
+ for (SizeType begin = node.childStart; begin < node.childEnd; begin++) {
+ _mm_prefetch((const char*)(data[m_pTreeRoots[begin].centerid]), _MM_HINT_T0);
+ }
for (SizeType begin = node.childStart; begin < node.childEnd; begin++) {
SizeType index = m_pTreeRoots[begin].centerid;
p_space.m_SPTQueue.insert(NodeDistPair(begin, fComputeDistance(p_query.GetQuantizedTarget(), data[index], data.C())));
@@ -767,6 +812,9 @@ break;
if (p_space.m_iNumberOfCheckedLeaves >= p_limits) break;
}
else {
+ for (SizeType begin = tnode.childStart; begin < tnode.childEnd; begin++) {
+ _mm_prefetch((const char*)(data[m_pTreeRoots[begin].centerid]), _MM_HINT_T0);
+ }
if (!p_space.CheckAndSet(tnode.centerid)) {
p_space.m_NGQueue.insert(NodeDistPair(tnode.centerid, bcell.distance));
}
@@ -778,6 +826,32 @@ break;
}
}
+ template
+ std::string GetPriorityID(const Dataset& data, int p_queryID, std::function fComputeDistance) const {
+ std::string ret = "";
+ for (char i = 0; i < m_iTreeNumber; i++) {
+ const BKTNode* node = &(m_pTreeRoots[m_pTreeStart[i]]);
+ while (node->childStart > 0) {
+ float minDist = MaxDist;
+ SizeType minIdx = -1;
+ for (SizeType begin = node->childStart; begin < node->childEnd; begin++) {
+ _mm_prefetch((const char*)(data[m_pTreeRoots[begin].centerid]), _MM_HINT_T0);
+ }
+ for (SizeType begin = node->childStart; begin < node->childEnd; begin++) {
+ SizeType index = m_pTreeRoots[begin].centerid;
+ float dist = fComputeDistance(data[p_queryID], data[index], data.C());
+ if (dist < minDist) {
+ minDist = dist;
+ minIdx = begin;
+ }
+ }
+ ret += static_cast(minIdx - node->childStart);
+ node = &(m_pTreeRoots[minIdx]);
+ }
+ }
+ return ret;
+ }
+
private:
std::vector m_pTreeStart;
std::vector m_pTreeRoots;
diff --git a/AnnService/inc/Core/Common/Checksum.h b/AnnService/inc/Core/Common/Checksum.h
new file mode 100644
index 000000000..499bdb56f
--- /dev/null
+++ b/AnnService/inc/Core/Common/Checksum.h
@@ -0,0 +1,59 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#ifndef _SPTAG_COMMON_CHECKSUM_H_
+#define _SPTAG_COMMON_CHECKSUM_H_
+
+#include
+#include "inc/Core/Common.h"
+
+namespace SPTAG
+{
+typedef uint8_t ChecksumType;
+
+namespace COMMON
+{
+class Checksum
+{
+ public:
+ Checksum() : m_type(0), m_seed(0), m_skip(false)
+ {
+ }
+
+ void Initialize(bool p_skip, uint8_t p_type, int p_seed = 0)
+ {
+ m_type = p_type;
+ m_seed = p_seed;
+ m_skip = p_skip;
+ }
+
+ ChecksumType CalcChecksum(const char *p_data, int p_length)
+ {
+ uint8_t cs = m_seed;
+ for (int i = 0; i < p_length; i++)
+ cs ^= p_data[i];
+ return cs;
+ }
+
+ ChecksumType AppendChecksum(ChecksumType p_checksum, const char* p_data, int p_length)
+ {
+ for (int i = 0; i < p_length; i++)
+ p_checksum ^= p_data[i];
+ return p_checksum;
+ }
+
+ bool ValidateChecksum(const char *p_data, int p_length, ChecksumType p_checksum)
+ {
+ if (m_skip) return true;
+ return (CalcChecksum(p_data, p_length) == p_checksum);
+ }
+
+ private:
+ uint8_t m_type;
+ int m_seed;
+ bool m_skip;
+};
+} // namespace COMMON
+} // namespace SPTAG
+
+#endif // _SPTAG_COMMON_CHECKSUM_H_
\ No newline at end of file
diff --git a/AnnService/inc/Core/Common/CommonUtils.h b/AnnService/inc/Core/Common/CommonUtils.h
index 755994ced..905de2cd7 100644
--- a/AnnService/inc/Core/Common/CommonUtils.h
+++ b/AnnService/inc/Core/Common/CommonUtils.h
@@ -11,8 +11,8 @@
#include
#include
+#include
#include
-#include
#include
#include
#include
diff --git a/AnnService/inc/Core/Common/Dataset.h b/AnnService/inc/Core/Common/Dataset.h
index 1b2cdbd9c..a5abaf743 100644
--- a/AnnService/inc/Core/Common/Dataset.h
+++ b/AnnService/inc/Core/Common/Dataset.h
@@ -4,11 +4,14 @@
#ifndef _SPTAG_COMMON_DATASET_H_
#define _SPTAG_COMMON_DATASET_H_
+#include "inc/Core/VectorIndex.h"
+
namespace SPTAG
{
namespace COMMON
{
// structure to save Data and Graph
+ /*
template
class Dataset
{
@@ -39,6 +42,12 @@ namespace SPTAG
}
void Initialize(SizeType rows_, DimensionType cols_, SizeType rowsInBlock_, SizeType capacity_, T* data_ = nullptr, bool shareOwnership_ = true)
{
+ if (data != nullptr) {
+ if (ownData) ALIGN_FREE(data);
+ for (T* ptr : incBlocks) ALIGN_FREE(ptr);
+ incBlocks.clear();
+ }
+
rows = rows_;
cols = cols_;
data = data_;
@@ -73,11 +82,20 @@ namespace SPTAG
inline const T* At(SizeType index) const
{
- if (index >= rows) {
- SizeType incIndex = index - rows;
- return incBlocks[incIndex >> rowsInBlockEx] + ((size_t)(incIndex & rowsInBlock)) * cols;
+ if (index < R() && index >= 0)
+ {
+ if (index >= rows) {
+ SizeType incIndex = index - rows;
+ return incBlocks[incIndex >> rowsInBlockEx] + ((size_t)(incIndex & rowsInBlock)) * cols;
+ }
+ return data + ((size_t)index) * cols;
+ }
+ else
+ {
+ std::ostringstream oss;
+ oss << "Index out of range in Dataset. Index: " << index << " Size: " << R();
+ throw std::out_of_range(oss.str());
}
- return data + ((size_t)index) * cols;
}
T* operator[](SizeType index)
@@ -90,7 +108,7 @@ namespace SPTAG
return At(index);
}
- ErrorCode AddBatch(const T* pData, SizeType num)
+ ErrorCode AddBatch(SizeType num, const T* pData = nullptr)
{
if (R() > maxRows - num) return ErrorCode::MemoryOverFlow;
@@ -98,38 +116,20 @@ namespace SPTAG
while (written < num) {
SizeType curBlockIdx = ((incRows + written) >> rowsInBlockEx);
if (curBlockIdx >= (SizeType)incBlocks.size()) {
- T* newBlock = (T*)ALIGN_ALLOC(((size_t)rowsInBlock + 1) * cols * sizeof(T));
+ T* newBlock = (T*)ALIGN_ALLOC(sizeof(T) * (rowsInBlock + 1) * cols);
if (newBlock == nullptr) return ErrorCode::MemoryOverFlow;
+ std::memset(newBlock, -1, sizeof(T) * (rowsInBlock + 1) * cols);
incBlocks.push_back(newBlock);
}
SizeType curBlockPos = ((incRows + written) & rowsInBlock);
SizeType toWrite = min(rowsInBlock + 1 - curBlockPos, num - written);
- std::memcpy(incBlocks[curBlockIdx] + ((size_t)curBlockPos) * cols, pData + ((size_t)written) * cols, ((size_t)toWrite) * cols * sizeof(T));
+ if (pData != nullptr) std::memcpy(incBlocks[curBlockIdx] + ((size_t)curBlockPos) * cols, pData + ((size_t)written) * cols, ((size_t)toWrite) * cols * sizeof(T));
written += toWrite;
}
incRows += written;
return ErrorCode::Success;
}
- ErrorCode AddBatch(SizeType num)
- {
- if (R() > maxRows - num) return ErrorCode::MemoryOverFlow;
-
- SizeType written = 0;
- while (written < num) {
- SizeType curBlockIdx = (incRows + written) >> rowsInBlockEx;
- if (curBlockIdx >= (SizeType)incBlocks.size()) {
- T* newBlock = (T*)ALIGN_ALLOC(sizeof(T) * (rowsInBlock + 1) * cols);
- if (newBlock == nullptr) return ErrorCode::MemoryOverFlow;
- std::memset(newBlock, -1, sizeof(T) * (rowsInBlock + 1) * cols);
- incBlocks.push_back(newBlock);
- }
- written += min(rowsInBlock + 1 - ((incRows + written) & rowsInBlock), num - written);
- }
- incRows += written;
- return ErrorCode::Success;
- }
-
ErrorCode Save(std::shared_ptr p_out) const
{
SizeType CR = R();
@@ -143,13 +143,13 @@ namespace SPTAG
SizeType remain = (incRows & rowsInBlock);
if (remain > 0) IOBINARY(p_out, WriteBinary, sizeof(T) * cols * remain, (char*)incBlocks[blocks]);
- LOG(Helper::LogLevel::LL_Info, "Save %s (%d,%d) Finish!\n", name.c_str(), CR, cols);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save %s (%d,%d) Finish!\n", name.c_str(), CR, cols);
return ErrorCode::Success;
}
ErrorCode Save(std::string sDataPointsFileName) const
{
- LOG(Helper::LogLevel::LL_Info, "Save %s To %s\n", name.c_str(), sDataPointsFileName.c_str());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save %s To %s\n", name.c_str(), sDataPointsFileName.c_str());
auto ptr = f_createIO();
if (ptr == nullptr || !ptr->Initialize(sDataPointsFileName.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile;
return Save(ptr);
@@ -162,13 +162,13 @@ namespace SPTAG
Initialize(rows, cols, blockSize, capacity);
IOBINARY(pInput, ReadBinary, sizeof(T) * cols * rows, (char*)data);
- LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", name.c_str(), rows, cols);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", name.c_str(), rows, cols);
return ErrorCode::Success;
}
ErrorCode Load(std::string sDataPointsFileName, SizeType blockSize, SizeType capacity)
{
- LOG(Helper::LogLevel::LL_Info, "Load %s From %s\n", name.c_str(), sDataPointsFileName.c_str());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s From %s\n", name.c_str(), sDataPointsFileName.c_str());
auto ptr = f_createIO();
if (ptr == nullptr || !ptr->Initialize(sDataPointsFileName.c_str(), std::ios::binary | std::ios::in)) return ErrorCode::FailedOpenFile;
return Load(ptr, blockSize, capacity);
@@ -186,7 +186,7 @@ namespace SPTAG
pDataPointsMemFile += sizeof(DimensionType);
Initialize(R, C, blockSize, capacity, (T*)pDataPointsMemFile);
- LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", name.c_str(), R, C);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", name.c_str(), R, C);
return ErrorCode::Success;
}
@@ -209,19 +209,293 @@ namespace SPTAG
for (SizeType i = 0; i < R; i++) {
IOBINARY(output, WriteBinary, sizeof(T) * cols, (char*)At(indices[i]));
}
- LOG(Helper::LogLevel::LL_Info, "Save Refine %s (%d,%d) Finish!\n", name.c_str(), R, cols);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save Refine %s (%d,%d) Finish!\n", name.c_str(), R, cols);
return ErrorCode::Success;
}
ErrorCode Refine(const std::vector& indices, std::string sDataPointsFileName) const
{
- LOG(Helper::LogLevel::LL_Info, "Save Refine %s To %s\n", name.c_str(), sDataPointsFileName.c_str());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save Refine %s To %s\n", name.c_str(), sDataPointsFileName.c_str());
auto ptr = f_createIO();
if (ptr == nullptr || !ptr->Initialize(sDataPointsFileName.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile;
return Refine(indices, ptr);
}
};
+ */
+ template
+ class Dataset
+ {
+ private:
+ std::string name = "Data";
+ SizeType rows = 0;
+ DimensionType cols = 1;
+ char* data = nullptr;
+ bool ownData = false;
+ SizeType incRows = 0;
+ SizeType maxRows = MaxSize;
+ SizeType rowsInBlock = 1024 * 1024;
+ SizeType rowsInBlockEx = 20;
+ std::shared_ptr> incBlocks;
+
+ DimensionType colStart = 0;
+ DimensionType mycols = 0;
+
+ public:
+ Dataset() {}
+
+ Dataset(SizeType rows_, DimensionType cols_, SizeType rowsInBlock_, SizeType capacity_, const void* data_ = nullptr, bool shareOwnership_ = true, std::shared_ptr> incBlocks_ = nullptr, int colStart_ = 0, int rowEnd_ = -1)
+ {
+ Initialize(rows_, cols_, rowsInBlock_, capacity_, data_, shareOwnership_, incBlocks_, colStart_, rowEnd_);
+ }
+ ~Dataset()
+ {
+ if (ownData) ALIGN_FREE(data);
+ if (incBlocks != nullptr) {
+ for (char* ptr : *incBlocks) ALIGN_FREE(ptr);
+ incBlocks->clear();
+ }
+ }
+
+ void Initialize(SizeType rows_, DimensionType cols_, SizeType rowsInBlock_, SizeType capacity_, const void* data_ = nullptr, bool shareOwnership_ = true, std::shared_ptr> incBlocks_ = nullptr, int colStart_ = 0, int rowEnd_ = -1)
+ {
+ if (data != nullptr) {
+ if (ownData) ALIGN_FREE(data);
+ for (char* ptr : *incBlocks) ALIGN_FREE(ptr);
+ incBlocks->clear();
+ }
+
+ rows = rows_;
+ if (rowEnd_ >= colStart_) cols = rowEnd_;
+ else cols = cols_ * sizeof(T);
+ data = (char*)data_;
+ if (data_ == nullptr || !shareOwnership_)
+ {
+ ownData = true;
+ data = (char*)ALIGN_ALLOC(((size_t)rows) * cols);
+ if (data_ != nullptr) memcpy(data, data_, ((size_t)rows) * cols);
+ else std::memset(data, -1, ((size_t)rows) * cols);
+ }
+ maxRows = capacity_;
+ rowsInBlockEx = static_cast(ceil(log2(rowsInBlock_)));
+ rowsInBlock = (1 << rowsInBlockEx) - 1;
+ incBlocks = incBlocks_;
+ if (incBlocks == nullptr) incBlocks.reset(new std::vector());
+ incBlocks->reserve((static_cast(capacity_) + rowsInBlock) >> rowsInBlockEx);
+
+ colStart = colStart_;
+ mycols = cols_;
+ }
+
+ bool IsReady() const { return data != nullptr; }
+
+ void SetName(const std::string& name_) { name = name_; }
+ const std::string& Name() const { return name; }
+
+ void SetR(SizeType R_)
+ {
+ if (R_ >= rows)
+ incRows = R_ - rows;
+ else
+ {
+ rows = R_;
+ incRows = 0;
+ }
+ }
+
+ inline SizeType R() const { return rows + incRows; }
+ inline const DimensionType& C() const { return mycols; }
+ inline std::uint64_t BufferSize() const { return sizeof(SizeType) + sizeof(DimensionType) + sizeof(T) * R() * C(); }
+
+#define GETITEM(index) \
+ if (index >= rows) { \
+ SizeType incIndex = index - rows; \
+ return (T*)((*incBlocks)[incIndex >> rowsInBlockEx] + ((size_t)(incIndex & rowsInBlock)) * cols + colStart); \
+ } \
+ return (T*)(data + ((size_t)index) * cols + colStart); \
+
+ inline const T* At(SizeType index) const
+ {
+ GETITEM(index)
+ }
+
+ inline T* At(SizeType index)
+ {
+ GETITEM(index)
+ }
+
+ inline T* operator[](SizeType index)
+ {
+ GETITEM(index)
+ }
+
+ inline const T* operator[](SizeType index) const
+ {
+ GETITEM(index)
+ }
+
+#undef GETITEM
+
+ ErrorCode AddBatch(SizeType num, const T* pData = nullptr)
+ {
+ if (colStart != 0) return ErrorCode::Success;
+ if (R() > maxRows - num) return ErrorCode::MemoryOverFlow;
+
+ SizeType written = 0;
+ while (written < num) {
+ SizeType curBlockIdx = ((incRows + written) >> rowsInBlockEx);
+ if (curBlockIdx >= (SizeType)(incBlocks->size())) {
+ char* newBlock = (char*)ALIGN_ALLOC(((size_t)rowsInBlock + 1) * cols);
+ if (newBlock == nullptr) return ErrorCode::MemoryOverFlow;
+ std::memset(newBlock, -1, ((size_t)rowsInBlock + 1) * cols);
+ incBlocks->push_back(newBlock);
+ }
+ SizeType curBlockPos = ((incRows + written) & rowsInBlock);
+ SizeType toWrite = min(rowsInBlock + 1 - curBlockPos, num - written);
+ if (pData) {
+ for (int i = 0; i < toWrite; i++) {
+ std::memcpy((*incBlocks)[curBlockIdx] + ((size_t)curBlockPos + i) * cols + colStart, pData + ((size_t)written + i) * mycols, mycols * sizeof(T));
+ }
+ }
+ written += toWrite;
+ }
+ incRows += written;
+ return ErrorCode::Success;
+ }
+
+ ErrorCode Save(std::shared_ptr p_out) const
+ {
+ SizeType CR = R();
+ IOBINARY(p_out, WriteBinary, sizeof(SizeType), (char*)&CR);
+ IOBINARY(p_out, WriteBinary, sizeof(DimensionType), (char*)&mycols);
+ for (SizeType i = 0; i < CR; i++) {
+ IOBINARY(p_out, WriteBinary, sizeof(T) * mycols, (char*)At(i));
+ }
+
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save %s (%d,%d) Finish!\n", name.c_str(), CR, mycols);
+ return ErrorCode::Success;
+ }
+
+ ErrorCode Save(std::string sDataPointsFileName) const
+ {
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save %s To %s\n", name.c_str(), sDataPointsFileName.c_str());
+ auto ptr = f_createIO();
+ if (ptr == nullptr || !ptr->Initialize(sDataPointsFileName.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile;
+ return Save(ptr);
+ }
+
+ ErrorCode Load(std::shared_ptr pInput, SizeType blockSize, SizeType capacity)
+ {
+ SizeType r;
+ DimensionType c;
+ IOBINARY(pInput, ReadBinary, sizeof(SizeType), (char*)&(r));
+ IOBINARY(pInput, ReadBinary, sizeof(DimensionType), (char*)(&c));
+
+ if (data == nullptr) Initialize(r, c, blockSize, capacity);
+ else if (r > rows + incRows) {
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "Read more data (%d, %d) than before (%d, %d)!\n", r, c, rows + incRows, mycols);
+ if(AddBatch(r - rows - incRows) != ErrorCode::Success) return ErrorCode::MemoryOverFlow;
+ }
+
+ for (SizeType i = 0; i < r; i++) {
+ IOBINARY(pInput, ReadBinary, sizeof(T) * mycols, (char*)At(i));
+ }
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", name.c_str(), r, c);
+ return ErrorCode::Success;
+ }
+
+ ErrorCode Load(std::string sDataPointsFileName, SizeType blockSize, SizeType capacity)
+ {
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s From %s\n", name.c_str(), sDataPointsFileName.c_str());
+ auto ptr = f_createIO();
+ if (ptr == nullptr || !ptr->Initialize(sDataPointsFileName.c_str(), std::ios::binary | std::ios::in)) return ErrorCode::FailedOpenFile;
+ return Load(ptr, blockSize, capacity);
+ }
+
+ // Functions for loading models from memory mapped files
+ ErrorCode Load(char* pDataPointsMemFile, SizeType blockSize, SizeType capacity)
+ {
+ SizeType R;
+ DimensionType C;
+ R = *((SizeType*)pDataPointsMemFile);
+ pDataPointsMemFile += sizeof(SizeType);
+
+ C = *((DimensionType*)pDataPointsMemFile);
+ pDataPointsMemFile += sizeof(DimensionType);
+
+ Initialize(R, C, blockSize, capacity, (char*)pDataPointsMemFile);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", name.c_str(), R, C);
+ return ErrorCode::Success;
+ }
+
+ ErrorCode Refine(const std::vector& indices, COMMON::Dataset& dataset) const
+ {
+ SizeType newrows = (SizeType)(indices.size());
+ if (dataset.data == nullptr) dataset.Initialize(newrows, mycols, rowsInBlock + 1, static_cast(incBlocks->capacity() * (rowsInBlock + 1)));
+
+ for (SizeType i = 0; i < newrows; i++) {
+ std::memcpy((void*)dataset.At(i), (void*)At(indices[i]), sizeof(T) * mycols);
+ }
+ return ErrorCode::Success;
+ }
+
+ virtual ErrorCode Refine(const std::vector& indices, std::shared_ptr output) const
+ {
+ SizeType newrows = (SizeType)(indices.size());
+ IOBINARY(output, WriteBinary, sizeof(SizeType), (char*)&newrows);
+ IOBINARY(output, WriteBinary, sizeof(DimensionType), (char*)&mycols);
+
+ for (SizeType i = 0; i < newrows; i++) {
+ IOBINARY(output, WriteBinary, sizeof(T) * mycols, (char*)At(indices[i]));
+ }
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save Refine %s (%d,%d) Finish!\n", name.c_str(), newrows, C());
+ return ErrorCode::Success;
+ }
+
+ virtual ErrorCode Refine(const std::vector& indices, std::string sDataPointsFileName) const
+ {
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save Refine %s To %s\n", name.c_str(), sDataPointsFileName.c_str());
+ auto ptr = f_createIO();
+ if (ptr == nullptr || !ptr->Initialize(sDataPointsFileName.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile;
+ return Refine(indices, ptr);
+ }
+ };
+
+ template
+ ErrorCode LoadOptDatasets(std::shared_ptr pVectorsInput, std::shared_ptr pGraphInput,
+ Dataset& pVectors, Dataset& pGraph, DimensionType pNeighborhoodSize,
+ SizeType blockSize, SizeType capacity) {
+ SizeType VR, GR;
+ DimensionType VC, GC;
+ IOBINARY(pVectorsInput, ReadBinary, sizeof(SizeType), (char*)&VR);
+ IOBINARY(pVectorsInput, ReadBinary, sizeof(DimensionType), (char*)&VC);
+ DimensionType totalC = ALIGN_ROUND(sizeof(T) * VC + sizeof(SizeType) * pNeighborhoodSize);
+
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "OPT TotalC: %d\n", totalC);
+ char* data = (char*)ALIGN_ALLOC(((size_t)totalC) * VR);
+ std::shared_ptr> incBlocks(new std::vector());
+
+ pVectors.Initialize(VR, VC, blockSize, capacity, data, true, incBlocks, 0, totalC);
+ pVectors.SetName("Opt" + pVectors.Name());
+ for (SizeType i = 0; i < VR; i++) {
+ IOBINARY(pVectorsInput, ReadBinary, sizeof(T) * VC, (char*)(pVectors.At(i)));
+ }
+
+ IOBINARY(pGraphInput, ReadBinary, sizeof(SizeType), (char*)&GR);
+ IOBINARY(pGraphInput, ReadBinary, sizeof(DimensionType), (char*)&GC);
+ if (GR != VR || GC != pNeighborhoodSize) return ErrorCode::DiskIOFail;
+
+ pGraph.Initialize(GR, GC, blockSize, capacity, data, false, incBlocks, sizeof(T) * VC, totalC);
+ pGraph.SetName("Opt" + pGraph.Name());
+ for (SizeType i = 0; i < VR; i++) {
+ IOBINARY(pGraphInput, ReadBinary, sizeof(SizeType) * GC, (char*)(pGraph.At(i)));
+ }
+
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", pVectors.Name().c_str(), pVectors.R(), pVectors.C());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", pGraph.Name().c_str(), pGraph.R(), pGraph.C());
+
+ return ErrorCode::Success;
+ }
}
}
-#endif // _SPTAG_COMMON_DATASET_H_
\ No newline at end of file
+#endif // _SPTAG_COMMON_DATASET_H_
diff --git a/AnnService/inc/Core/Common/DistanceUtils.h b/AnnService/inc/Core/Common/DistanceUtils.h
index e46c0c84f..cfb02155d 100644
--- a/AnnService/inc/Core/Common/DistanceUtils.h
+++ b/AnnService/inc/Core/Common/DistanceUtils.h
@@ -4,7 +4,6 @@
#ifndef _SPTAG_COMMON_DISTANCEUTILS_H_
#define _SPTAG_COMMON_DISTANCEUTILS_H_
-#include
#include
#include
@@ -103,17 +102,21 @@ namespace SPTAG
return func(p1, p2, length);
}
+ template
static inline float ConvertCosineSimilarityToDistance(float cs)
{
// Cosine similarity is in [-1, 1], the higher the value, the closer are the two vectors.
// However, the tree is built and searched based on "distance" between two vectors, that's >=0. The smaller the value, the closer are the two vectors.
// So we do a linear conversion from a cosine similarity to a distance value.
- return 1 - cs; //[1, 3]
+ int base = Utils::GetBase();
+ return (1 - cs) * (base * base);
}
+ template
static inline float ConvertDistanceBackToCosineSimilarity(float d)
{
- return 1 - d;
+ int base = Utils::GetBase();
+ return 1 - d / (base * base);
}
};
template
diff --git a/AnnService/inc/Core/Common/FineGrainedLock.h b/AnnService/inc/Core/Common/FineGrainedLock.h
index 98659dab2..6491cf0f3 100644
--- a/AnnService/inc/Core/Common/FineGrainedLock.h
+++ b/AnnService/inc/Core/Common/FineGrainedLock.h
@@ -29,14 +29,41 @@ namespace SPTAG
unsigned index = hash_func((unsigned)idx);
return m_locks[index];
}
+
+ static inline unsigned hash_func(unsigned idx)
+ {
+ return ((unsigned)(idx * 99991) + _rotl(idx, 2) + 101) & PoolSize;
+ }
+
private:
static const int PoolSize = 32767;
std::unique_ptr m_locks;
+ };
- inline unsigned hash_func(unsigned idx) const
+ class FineGrainedRWLock {
+ public:
+ FineGrainedRWLock() {
+ m_locks.reset(new std::shared_timed_mutex[PoolSize + 1]);
+ }
+ ~FineGrainedRWLock() {}
+
+ std::shared_timed_mutex& operator[](SizeType idx) {
+ unsigned index = hash_func((unsigned)idx);
+ return m_locks[index];
+ }
+
+ const std::shared_timed_mutex& operator[](SizeType idx) const {
+ unsigned index = hash_func((unsigned)idx);
+ return m_locks[index];
+ }
+
+ static inline unsigned hash_func(unsigned idx)
{
return ((unsigned)(idx * 99991) + _rotl(idx, 2) + 101) & PoolSize;
}
+ private:
+ static const int PoolSize = 32767;
+ std::unique_ptr m_locks;
};
}
}
diff --git a/AnnService/inc/Core/Common/Heap.h b/AnnService/inc/Core/Common/Heap.h
index a5d544b1d..826ef376e 100644
--- a/AnnService/inc/Core/Common/Heap.h
+++ b/AnnService/inc/Core/Common/Heap.h
@@ -13,7 +13,7 @@ namespace SPTAG
template
class Heap {
public:
- Heap() : heap(nullptr), length(0), count(0) {}
+ Heap() : heap(nullptr), length(0), count(0),lastlevel(0) {}
Heap(int size) { Resize(size); }
@@ -24,10 +24,15 @@ namespace SPTAG
count = 0;
lastlevel = int(pow(2.0, floor(log2((float)size))));
}
- ~Heap() {}
+ ~Heap() { heap.reset(); }
inline int size() { return count; }
inline bool empty() { return count == 0; }
- inline void clear() { count = 0; }
+ inline void clear(int size)
+ {
+ if (size > length) Resize(size);
+ count = 0;
+ }
+
inline T& Top() { if (count == 0) return heap[0]; else return heap[1]; }
// Insert a new element in the heap.
diff --git a/AnnService/inc/Core/Common/IQuantizer.h b/AnnService/inc/Core/Common/IQuantizer.h
index cbf3a91be..26d23889a 100644
--- a/AnnService/inc/Core/Common/IQuantizer.h
+++ b/AnnService/inc/Core/Common/IQuantizer.h
@@ -5,6 +5,7 @@
#define _SPTAG_COMMON_QUANTIZER_H_
#include "inc/Core/Common.h"
+#include "inc/Helper/DiskIO.h"
#include
#include "inc/Core/CommonDataStructure.h"
#include "DistanceUtils.h"
@@ -23,7 +24,7 @@ namespace SPTAG
template
std::function DistanceCalcSelector(SPTAG::DistCalcMethod p_method) const;
- virtual void QuantizeVector(const void* vec, std::uint8_t* vecout) const = 0;
+ virtual void QuantizeVector(const void* vec, std::uint8_t* vecout, bool ADC = true) const = 0;
virtual SizeType QuantizeSize() const = 0;
@@ -58,6 +59,9 @@ namespace SPTAG
virtual int GetBase() const = 0;
virtual float* GetL2DistanceTables() = 0;
+
+ template
+ T* GetCodebooks();
};
}
}
diff --git a/AnnService/inc/Core/Common/InstructionUtils.h b/AnnService/inc/Core/Common/InstructionUtils.h
index e311061c6..4cd3703c1 100644
--- a/AnnService/inc/Core/Common/InstructionUtils.h
+++ b/AnnService/inc/Core/Common/InstructionUtils.h
@@ -6,8 +6,13 @@
#include
#include
+#ifndef GPU
+
#ifndef _MSC_VER
#include
+#include
+#include
+
void cpuid(int info[4], int InfoType);
#else
@@ -15,6 +20,8 @@ void cpuid(int info[4], int InfoType);
#define cpuid(info, x) __cpuidex(info, x, 0)
#endif
+#endif
+
namespace SPTAG {
namespace COMMON {
diff --git a/AnnService/inc/Core/Common/KDTree.h b/AnnService/inc/Core/Common/KDTree.h
index 9a0fdef6f..90049f3c0 100644
--- a/AnnService/inc/Core/Common/KDTree.h
+++ b/AnnService/inc/Core/Common/KDTree.h
@@ -30,11 +30,11 @@ namespace SPTAG
class KDTree
{
public:
- KDTree() : m_iTreeNumber(2), m_numTopDimensionKDTSplit(5), m_iSamples(1000), m_lock(new std::shared_timed_mutex), m_pQuantizer(nullptr) {}
+ KDTree() : m_iTreeNumber(2), m_numTopDimensionKDTSplit(5), m_iSamples(1000), m_lock(new std::shared_timed_mutex), m_bOldVersion(false), m_pQuantizer(nullptr) {}
KDTree(const KDTree& other) : m_iTreeNumber(other.m_iTreeNumber),
m_numTopDimensionKDTSplit(other.m_numTopDimensionKDTSplit),
- m_iSamples(other.m_iSamples), m_lock(new std::shared_timed_mutex), m_pQuantizer(other.m_pQuantizer) {}
+ m_iSamples(other.m_iSamples), m_lock(new std::shared_timed_mutex), m_bOldVersion(other.m_bOldVersion), m_pQuantizer(other.m_pQuantizer) {}
~KDTree() {}
inline const KDTNode& operator[](SizeType index) const { return m_pTreeRoots[index]; }
@@ -96,22 +96,48 @@ break;
m_pTreeRoots.resize(m_iTreeNumber * localindices.size());
m_pTreeStart.resize(m_iTreeNumber, 0);
-#pragma omp parallel for num_threads(numOfThreads)
- for (int i = 0; i < m_iTreeNumber; i++)
+ std::vector mythreads;
+ mythreads.reserve(numOfThreads);
+ std::atomic_size_t sent(0);
+ for (int tid = 0; tid < numOfThreads; tid++)
{
- if (abort && abort->ShouldAbort()) continue;
-
- Sleep(i * 100); std::srand(clock());
-
- std::vector pindices(localindices.begin(), localindices.end());
- std::shuffle(pindices.begin(), pindices.end(), rg);
-
- m_pTreeStart[i] = i * (SizeType)pindices.size();
- LOG(Helper::LogLevel::LL_Info, "Start to build KDTree %d\n", i + 1);
- SizeType iTreeSize = m_pTreeStart[i];
- DivideTree(data, pindices, 0, (SizeType)pindices.size() - 1, m_pTreeStart[i], iTreeSize, abort);
- LOG(Helper::LogLevel::LL_Info, "%d KDTree built, %d %zu\n", i + 1, iTreeSize - m_pTreeStart[i], pindices.size());
+ mythreads.emplace_back([&, tid]() {
+ size_t i = 0;
+ std::mt19937 rg;
+ while (true)
+ {
+ i = sent.fetch_add(1);
+ if (i < m_iTreeNumber)
+ {
+ if (abort && abort->ShouldAbort())
+ continue;
+
+ Sleep(i * 100);
+ std::srand(clock());
+
+ std::vector pindices(localindices.begin(), localindices.end());
+ std::shuffle(pindices.begin(), pindices.end(), rg);
+
+ m_pTreeStart[i] = i * (SizeType)pindices.size();
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start to build KDTree %d\n", i + 1);
+ SizeType iTreeSize = m_pTreeStart[i];
+ DivideTree(data, pindices, 0, (SizeType)pindices.size() - 1, m_pTreeStart[i],
+ iTreeSize, abort);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "%d KDTree built, %d %zu\n", i + 1,
+ iTreeSize - m_pTreeStart[i], pindices.size());
+ }
+ else
+ {
+ return;
+ }
+ }
+ });
+ }
+ for (auto &t : mythreads)
+ {
+ t.join();
}
+ mythreads.clear();
}
inline std::uint64_t BufferSize() const
@@ -128,13 +154,13 @@ break;
SizeType treeNodeSize = (SizeType)m_pTreeRoots.size();
IOBINARY(p_out, WriteBinary, sizeof(treeNodeSize), (char*)&treeNodeSize);
IOBINARY(p_out, WriteBinary, sizeof(KDTNode) * treeNodeSize, (char*)m_pTreeRoots.data());
- LOG(Helper::LogLevel::LL_Info, "Save KDT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save KDT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
return ErrorCode::Success;
}
ErrorCode SaveTrees(std::string sTreeFileName) const
{
- LOG(Helper::LogLevel::LL_Info, "Save KDT to %s\n", sTreeFileName.c_str());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save KDT to %s\n", sTreeFileName.c_str());
auto ptr = f_createIO();
if (ptr == nullptr || !ptr->Initialize(sTreeFileName.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile;
return SaveTrees(ptr);
@@ -152,7 +178,7 @@ break;
pKDTMemFile += sizeof(SizeType);
m_pTreeRoots.resize(treeNodeSize);
memcpy(m_pTreeRoots.data(), pKDTMemFile, sizeof(KDTNode) * treeNodeSize);
- LOG(Helper::LogLevel::LL_Info, "Load KDT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load KDT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
return ErrorCode::Success;
}
@@ -186,7 +212,7 @@ break;
}
treeNodeSize += iNodeSize;
}
- LOG(Helper::LogLevel::LL_Info, "Load KDT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load KDT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
return ErrorCode::Success;
}
IOBINARY(p_input, ReadBinary, sizeof(m_iTreeNumber), (char*)&m_iTreeNumber);
@@ -198,65 +224,40 @@ break;
m_pTreeRoots.resize(treeNodeSize);
IOBINARY(p_input, ReadBinary, sizeof(KDTNode) * treeNodeSize, (char*)m_pTreeRoots.data());
- LOG(Helper::LogLevel::LL_Info, "Load KDT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load KDT (%d,%d) Finish!\n", m_iTreeNumber, treeNodeSize);
return ErrorCode::Success;
}
ErrorCode LoadTrees(std::string sTreeFileName)
{
- LOG(Helper::LogLevel::LL_Info, "Load KDT From %s\n", sTreeFileName.c_str());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load KDT From %s\n", sTreeFileName.c_str());
auto ptr = f_createIO();
if (ptr == nullptr || !ptr->Initialize(sTreeFileName.c_str(), std::ios::binary | std::ios::in)) return ErrorCode::FailedOpenFile;
return LoadTrees(ptr);
}
- template
+ template
void InitSearchTrees(const Dataset& p_data, std::function fComputeDistance, COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space) const
{
for (int i = 0; i < m_iTreeNumber; i++) {
- KDTSearch(p_data, fComputeDistance, p_query, p_space, m_pTreeStart[i], 0);
+ KDTSearch(p_data, fComputeDistance, p_query, p_space, m_pTreeStart[i], 0);
}
}
- template
+ template
void SearchTrees(const Dataset& p_data, std::function fComputeDistance, COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const int p_limits) const
{
while (!p_space.m_SPTQueue.empty() && p_space.m_iNumberOfCheckedLeaves < p_limits)
{
auto& tcell = p_space.m_SPTQueue.pop();
- KDTSearch(p_data, fComputeDistance, p_query, p_space, tcell.node, tcell.distance);
+ KDTSearch(p_data, fComputeDistance, p_query, p_space, tcell.node, tcell.distance);
}
}
private:
- template
- void KDTSearch(const Dataset& p_data, std::function fComputeDistance, COMMON::QueryResultSet& p_query,
- COMMON::WorkSpace& p_space, const SizeType node, const float distBound) const
- {
- if (m_pQuantizer)
- {
- switch (m_pQuantizer->GetReconstructType())
- {
-#define DefineVectorValueType(Name, Type) \
-case VectorValueType::Name: \
-return KDTSearchCore(p_data, fComputeDistance, p_query, p_space, node, distBound);
-
-#include "inc/Core/DefinitionList.h"
-#undef DefineVectorValueType
-
- default: break;
- }
- }
- else
- {
- return KDTSearchCore(p_data, fComputeDistance, p_query, p_space, node, distBound);
- }
-
- }
-
template
- void KDTSearchCore(const Dataset& p_data, std::function fComputeDistance, COMMON::QueryResultSet &p_query,
+ void KDTSearch(const Dataset& p_data, std::function fComputeDistance, COMMON::QueryResultSet &p_query,
COMMON::WorkSpace& p_space, const SizeType node, const float distBound) const {
if (node < 0)
{
@@ -292,7 +293,7 @@ return KDTSearchCore(p_data, fComputeDistance, p_query, p_space, node,
}
p_space.m_SPTQueue.insert(NodeDistPair(otherChild, distanceBound));
- KDTSearchCore(p_data, fComputeDistance, p_query, p_space, bestChild, distBound);
+ KDTSearch(p_data, fComputeDistance, p_query, p_space, bestChild, distBound);
}
diff --git a/AnnService/inc/Core/Common/Labelset.h b/AnnService/inc/Core/Common/Labelset.h
index d310d0bf2..c34f563de 100644
--- a/AnnService/inc/Core/Common/Labelset.h
+++ b/AnnService/inc/Core/Common/Labelset.h
@@ -13,19 +13,28 @@ namespace SPTAG
{
class Labelset
{
+ public:
+ enum class InvalidIDBehavior
+ {
+ Passthrough,
+ AlwaysContains,
+ AlwaysNotContains
+ };
private:
std::atomic m_inserted;
Dataset m_data;
+ InvalidIDBehavior m_invalidIDBehaviorSetting;
public:
- Labelset()
+ Labelset()
{
m_inserted = 0;
m_data.SetName("DeleteID");
}
- void Initialize(SizeType size, SizeType blockSize, SizeType capacity)
+ void Initialize(SizeType size, SizeType blockSize, SizeType capacity, InvalidIDBehavior invalidIDBehaviorSetting = InvalidIDBehavior::Passthrough)
{
+ m_invalidIDBehaviorSetting = invalidIDBehaviorSetting;
m_data.Initialize(size, 1, blockSize, capacity);
}
@@ -33,17 +42,64 @@ namespace SPTAG
inline bool Contains(const SizeType& key) const
{
+ if (key >= R())
+ {
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "LabelSet::Contains: key %lld out of range %lld\n", (long long)key, (long long)R());
+ switch (m_invalidIDBehaviorSetting)
+ {
+ case InvalidIDBehavior::AlwaysContains:
+ return true;
+ case InvalidIDBehavior::AlwaysNotContains:
+ return false;
+ default: {}
+ }
+ }
+
return *m_data[key] == 1;
}
inline bool Insert(const SizeType& key)
{
+ if (key >= R())
+ {
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "LabelSet::Insert: key %lld out of range %lld\n", (long long)key, (long long)R());
+ switch (m_invalidIDBehaviorSetting)
+ {
+ case InvalidIDBehavior::AlwaysContains:
+ return true;
+ case InvalidIDBehavior::AlwaysNotContains:
+ return false;
+ default: {}
+ }
+ }
+
char oldvalue = InterlockedExchange8((char*)m_data[key], 1);
if (oldvalue == 1) return false;
m_inserted++;
return true;
}
+ inline bool Reset(const SizeType& key)
+ {
+ if (key >= R())
+ {
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "LabelSet::Insert: key %lld out of range %lld\n",
+ (long long)key, (long long)R());
+ switch (m_invalidIDBehaviorSetting)
+ {
+ case InvalidIDBehavior::AlwaysContains:
+ return true;
+ case InvalidIDBehavior::AlwaysNotContains:
+ return false;
+ default: {}
+ }
+ }
+ char oldvalue = InterlockedExchange8((char *)m_data[key], -1);
+ if (oldvalue == -1) return false;
+ m_inserted--;
+ return true;
+ }
+
inline ErrorCode Save(std::shared_ptr output)
{
SizeType deleted = m_inserted.load();
@@ -53,30 +109,32 @@ namespace SPTAG
inline ErrorCode Save(std::string filename)
{
- LOG(Helper::LogLevel::LL_Info, "Save %s To %s\n", m_data.Name().c_str(), filename.c_str());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save %s To %s\n", m_data.Name().c_str(), filename.c_str());
auto ptr = f_createIO();
if (ptr == nullptr || !ptr->Initialize(filename.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile;
return Save(ptr);
}
- inline ErrorCode Load(std::shared_ptr input, SizeType blockSize, SizeType capacity)
+ inline ErrorCode Load(std::shared_ptr input, SizeType blockSize, SizeType capacity, InvalidIDBehavior invalidIDBehaviorSetting = InvalidIDBehavior::Passthrough)
{
+ m_invalidIDBehaviorSetting = invalidIDBehaviorSetting;
SizeType deleted;
IOBINARY(input, ReadBinary, sizeof(SizeType), (char*)&deleted);
m_inserted = deleted;
return m_data.Load(input, blockSize, capacity);
}
- inline ErrorCode Load(std::string filename, SizeType blockSize, SizeType capacity)
+ inline ErrorCode Load(std::string filename, SizeType blockSize, SizeType capacity, InvalidIDBehavior invalidIDBehaviorSetting = InvalidIDBehavior::Passthrough)
{
- LOG(Helper::LogLevel::LL_Info, "Load %s From %s\n", m_data.Name().c_str(), filename.c_str());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s From %s\n", m_data.Name().c_str(), filename.c_str());
auto ptr = f_createIO();
if (ptr == nullptr || !ptr->Initialize(filename.c_str(), std::ios::binary | std::ios::in)) return ErrorCode::FailedOpenFile;
- return Load(ptr, blockSize, capacity);
+ return Load(ptr, blockSize, capacity, invalidIDBehaviorSetting);
}
- inline ErrorCode Load(char* pmemoryFile, SizeType blockSize, SizeType capacity)
+ inline ErrorCode Load(char* pmemoryFile, SizeType blockSize, SizeType capacity, InvalidIDBehavior invalidIDBehaviorSetting = InvalidIDBehavior::Passthrough)
{
+ m_invalidIDBehaviorSetting = invalidIDBehaviorSetting;
m_inserted = *((SizeType*)pmemoryFile);
return m_data.Load(pmemoryFile + sizeof(SizeType), blockSize, capacity);
}
@@ -104,4 +162,4 @@ namespace SPTAG
}
}
-#endif // _SPTAG_COMMON_LABELSET_H_
+#endif // _SPTAG_COMMON_LABELSET_H_
\ No newline at end of file
diff --git a/AnnService/inc/Core/Common/NeighborhoodGraph.h b/AnnService/inc/Core/Common/NeighborhoodGraph.h
index 991045072..92ff358b2 100644
--- a/AnnService/inc/Core/Common/NeighborhoodGraph.h
+++ b/AnnService/inc/Core/Common/NeighborhoodGraph.h
@@ -34,7 +34,8 @@ namespace SPTAG
class NeighborhoodGraph
{
public:
- NeighborhoodGraph() : m_iTPTNumber(32),
+ NeighborhoodGraph() : m_iGraphSize(0),
+ m_iTPTNumber(32),
m_iTPTLeafSize(2000),
m_iSamples(1000),
m_numTopDimensionTPTSplit(5),
@@ -52,7 +53,7 @@ namespace SPTAG
m_iGPULeafSize(500),
m_iheadNumGPUs(1),
m_iTPTBalanceFactor(2),
- m_rebuild(0)
+ m_rebuild(0), m_iThreadNum(1)
{}
~NeighborhoodGraph() {}
@@ -65,36 +66,62 @@ namespace SPTAG
{
DimensionType* correct = new DimensionType[samples];
-#pragma omp parallel for schedule(dynamic)
- for (SizeType i = 0; i < samples; i++)
+ std::vector mythreads;
+ mythreads.reserve(m_iThreadNum);
+ std::atomic_size_t sent(0);
+ for (int tid = 0; tid < m_iThreadNum; tid++)
{
- SizeType x = COMMON::Utils::rand(m_iGraphSize);
- //int x = i;
- COMMON::QueryResultSet query(nullptr, m_iCEF);
- for (SizeType y = 0; y < m_iGraphSize; y++)
- {
- if ((idmap != nullptr && idmap->find(y) != idmap->end())) continue;
- float dist = index->ComputeDistance(index->GetSample(x), index->GetSample(y));
- query.AddPoint(y, dist);
- }
- query.SortResult();
- SizeType* exact_rng = new SizeType[m_iNeighborhoodSize];
- RebuildNeighbors(index, x, exact_rng, query.GetResults(), m_iCEF);
-
- correct[i] = 0;
- for (DimensionType j = 0; j < m_iNeighborhoodSize; j++) {
- if (exact_rng[j] == -1) {
- correct[i] += m_iNeighborhoodSize - j;
- break;
- }
- for (DimensionType k = 0; k < m_iNeighborhoodSize; k++)
- if ((m_pNeighborhoodGraph)[x][k] == exact_rng[j]) {
- correct[i]++;
- break;
+ mythreads.emplace_back([&, tid](){
+ size_t i = 0;
+ while (true)
+ {
+ i = sent.fetch_add(1);
+ if (i < samples)
+ {
+ SizeType x = COMMON::Utils::rand(m_iGraphSize);
+ // int x = i;
+ COMMON::QueryResultSet query(nullptr, m_iCEF);
+ for (SizeType y = 0; y < m_iGraphSize; y++)
+ {
+ if ((idmap != nullptr && idmap->find(y) != idmap->end()))
+ continue;
+ float dist = index->ComputeDistance(index->GetSample(x), index->GetSample(y));
+ query.AddPoint(y, dist);
+ }
+ query.SortResult();
+ SizeType *exact_rng = new SizeType[m_iNeighborhoodSize];
+ RebuildNeighbors(index, x, exact_rng, query.GetResults(), m_iCEF);
+
+ correct[i] = 0;
+ for (DimensionType j = 0; j < m_iNeighborhoodSize; j++)
+ {
+ if (exact_rng[j] == -1)
+ {
+ correct[i] += m_iNeighborhoodSize - j;
+ break;
+ }
+ for (DimensionType k = 0; k < m_iNeighborhoodSize; k++)
+ if ((m_pNeighborhoodGraph)[x][k] == exact_rng[j])
+ {
+ correct[i]++;
+ break;
+ }
+ }
+ delete[] exact_rng;
}
- }
- delete[] exact_rng;
+ else
+ {
+ return;
+ }
+ }
+ });
+ }
+ for (auto &t : mythreads)
+ {
+ t.join();
}
+ mythreads.clear();
+
float acc = 0;
for (SizeType i = 0; i < samples; i++) acc += float(correct[i]);
acc = acc / samples / m_iNeighborhoodSize;
@@ -109,13 +136,8 @@ namespace SPTAG
SizeType initSize;
SPTAG::Helper::Convert::ConvertStringTo(index->GetParameter("NumberOfInitialDynamicPivots").c_str(), initSize);
- if (index->m_pQuantizer) {
- buildGraph(index, m_iGraphSize, m_iNeighborhoodSize, m_iTPTNumber, (int*)m_pNeighborhoodGraph[0], m_iGPURefineSteps, m_iGPURefineDepth, m_iGPUGraphType, m_iGPULeafSize, initSize, m_iheadNumGPUs, m_iTPTBalanceFactor);
- }
- else {
- // Build the entire RNG graph, both builds the KNN and refines it to RNG
- buildGraph(index, m_iGraphSize, m_iNeighborhoodSize, m_iTPTNumber, (int*)m_pNeighborhoodGraph[0], m_iGPURefineSteps, m_iGPURefineDepth, m_iGPUGraphType, m_iGPULeafSize, initSize, m_iheadNumGPUs, m_iTPTBalanceFactor);
- }
+ // Build the entire RNG graph, both builds the KNN and refines it to RNG
+ buildGraph(index, m_iGraphSize, m_iNeighborhoodSize, m_iTPTNumber, (int*)m_pNeighborhoodGraph[0], m_iGPURefineSteps, m_iGPURefineDepth, m_iGPUGraphType, m_iGPULeafSize, initSize, m_iheadNumGPUs, m_iTPTBalanceFactor);
if (idmap != nullptr) {
std::unordered_map::const_iterator iter;
@@ -149,7 +171,6 @@ break;
}
else
{
- printf("No quantizer!\n");
PartitionByTptreeCore(index, indices, first, last, leaves);
}
}
@@ -317,51 +338,99 @@ break;
(NeighborhoodDists)[i][j] = MaxDist;
auto t1 = std::chrono::high_resolution_clock::now();
- LOG(Helper::LogLevel::LL_Info, "Parallel TpTree Partition begin\n");
-#pragma omp parallel for schedule(dynamic)
- for (int i = 0; i < m_iTPTNumber; i++)
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Parallel TpTree Partition begin\n");
{
- Sleep(i * 100); std::srand(clock());
- for (SizeType j = 0; j < m_iGraphSize; j++) TptreeDataIndices[i][j] = j;
- std::shuffle(TptreeDataIndices[i].begin(), TptreeDataIndices[i].end(), rg);
- PartitionByTptree(index, TptreeDataIndices[i], 0, m_iGraphSize - 1, TptreeLeafNodes[i]);
- LOG(Helper::LogLevel::LL_Info, "Finish Getting Leaves for Tree %d\n", i);
+ std::vector mythreads;
+ mythreads.reserve(m_iThreadNum);
+ std::atomic_size_t sent(0);
+ for (int tid = 0; tid < m_iThreadNum; tid++)
+ {
+ mythreads.emplace_back([&, tid]() {
+ size_t i = 0;
+ std::mt19937 rg;
+ while (true)
+ {
+ i = sent.fetch_add(1);
+ if (i < m_iTPTNumber)
+ {
+ Sleep(i * 100);
+ std::srand(clock());
+ for (SizeType j = 0; j < m_iGraphSize; j++)
+ TptreeDataIndices[i][j] = j;
+ std::shuffle(TptreeDataIndices[i].begin(), TptreeDataIndices[i].end(), rg);
+ PartitionByTptree(index, TptreeDataIndices[i], 0, m_iGraphSize - 1,
+ TptreeLeafNodes[i]);
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Finish Getting Leaves for Tree %d\n", i);
+ }
+ else
+ {
+ return;
+ }
+ }
+ });
+ }
+ for (auto &t : mythreads)
+ {
+ t.join();
+ }
+ mythreads.clear();
}
- LOG(Helper::LogLevel::LL_Info, "Parallel TpTree Partition done\n");
- auto t2 = std::chrono::high_resolution_clock::now();
- LOG(Helper::LogLevel::LL_Info, "Build TPTree time (s): %lld\n", std::chrono::duration_cast(t2 - t1).count());
- for(int i=0; i<10; i++) {
- for(int j=0; j<20; j++) {
- std::cout << static_cast(((uint8_t*)index->GetSample(i))[j]) << ", ";
- }
- std::cout << std::endl;
- }
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Parallel TpTree Partition done\n");
+ auto t2 = std::chrono::high_resolution_clock::now();
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Build TPTree time (s): %lld\n", std::chrono::duration_cast(t2 - t1).count());
for (int i = 0; i < m_iTPTNumber; i++)
{
-#pragma omp parallel for schedule(dynamic)
- for (SizeType j = 0; j < (SizeType)TptreeLeafNodes[i].size(); j++)
+ std::vector mythreads;
+ mythreads.reserve(m_iThreadNum);
+ std::atomic_size_t sent(0);
+ for (int tid = 0; tid < m_iThreadNum; tid++)
{
- SizeType start_index = TptreeLeafNodes[i][j].first;
- SizeType end_index = TptreeLeafNodes[i][j].second;
- if ((j * 5) % TptreeLeafNodes[i].size() == 0) LOG(Helper::LogLevel::LL_Info, "Processing Tree %d %d%%\n", i, static_cast(j * 1.0 / TptreeLeafNodes[i].size() * 100));
- for (SizeType x = start_index; x < end_index; x++)
- {
- for (SizeType y = x + 1; y <= end_index; y++)
+ mythreads.emplace_back([&, tid]() {
+ size_t j = 0;
+ while (true)
{
- SizeType p1 = TptreeDataIndices[i][x];
- SizeType p2 = TptreeDataIndices[i][y];
- float dist = index->ComputeDistance(index->GetSample(p1), index->GetSample(p2));
- if (idmap != nullptr) {
- p1 = (idmap->find(p1) == idmap->end()) ? p1 : idmap->at(p1);
- p2 = (idmap->find(p2) == idmap->end()) ? p2 : idmap->at(p2);
+ j = sent.fetch_add(1);
+ if (j < TptreeLeafNodes[i].size())
+ {
+ SizeType start_index = TptreeLeafNodes[i][j].first;
+ SizeType end_index = TptreeLeafNodes[i][j].second;
+ if ((j * 5) % TptreeLeafNodes[i].size() == 0)
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Processing Tree %d %d%%\n", i,
+ static_cast(j * 1.0 / TptreeLeafNodes[i].size() * 100));
+ for (SizeType x = start_index; x < end_index; x++)
+ {
+ for (SizeType y = x + 1; y <= end_index; y++)
+ {
+ SizeType p1 = TptreeDataIndices[i][x];
+ SizeType p2 = TptreeDataIndices[i][y];
+ float dist =
+ index->ComputeDistance(index->GetSample(p1), index->GetSample(p2));
+ if (idmap != nullptr)
+ {
+ p1 = (idmap->find(p1) == idmap->end()) ? p1 : idmap->at(p1);
+ p2 = (idmap->find(p2) == idmap->end()) ? p2 : idmap->at(p2);
+ }
+ COMMON::Utils::AddNeighbor(p2, dist, (m_pNeighborhoodGraph)[p1],
+ (NeighborhoodDists)[p1], m_iNeighborhoodSize);
+ COMMON::Utils::AddNeighbor(p1, dist, (m_pNeighborhoodGraph)[p2],
+ (NeighborhoodDists)[p2], m_iNeighborhoodSize);
+ }
+ }
+ }
+ else
+ {
+ return;
}
- COMMON::Utils::AddNeighbor(p2, dist, (m_pNeighborhoodGraph)[p1], (NeighborhoodDists)[p1], m_iNeighborhoodSize);
- COMMON::Utils::AddNeighbor(p1, dist, (m_pNeighborhoodGraph)[p2], (NeighborhoodDists)[p2], m_iNeighborhoodSize);
}
- }
+ });
}
+ for (auto &t : mythreads)
+ {
+ t.join();
+ }
+ mythreads.clear();
TptreeDataIndices[i].clear();
TptreeLeafNodes[i].clear();
}
@@ -369,14 +438,14 @@ break;
TptreeLeafNodes.clear();
auto t3 = std::chrono::high_resolution_clock::now();
- LOG(Helper::LogLevel::LL_Info, "Process TPTree time (s): %lld\n", std::chrono::duration_cast(t3 - t2).count());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Process TPTree time (s): %lld\n", std::chrono::duration_cast(t3 - t2).count());
}
#endif
template
void BuildGraph(VectorIndex* index, const std::unordered_map* idmap = nullptr)
{
- LOG(Helper::LogLevel::LL_Info, "build RNG graph!\n");
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "build RNG graph!\n");
m_iGraphSize = index->GetNumSamples();
m_iNeighborhoodSize = (DimensionType)(ceil(m_iNeighborhoodSize * m_fNeighborhoodScale) * (m_rebuild + 1));
@@ -384,25 +453,25 @@ break;
if (m_iGraphSize < 1000) {
RefineGraph(index, idmap);
- LOG(Helper::LogLevel::LL_Info, "Build RNG Graph end!\n");
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Build RNG Graph end!\n");
return;
}
auto t1 = std::chrono::high_resolution_clock::now();
BuildInitKNNGraph(index, idmap);
auto t2 = std::chrono::high_resolution_clock::now();
- LOG(Helper::LogLevel::LL_Info, "BuildInitKNNGraph time (s): %lld\n", std::chrono::duration_cast(t2 - t1).count());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "BuildInitKNNGraph time (s): %lld\n", std::chrono::duration_cast(t2 - t1).count());
RefineGraph(index, idmap);
auto t3 = std::chrono::high_resolution_clock::now();
- LOG(Helper::LogLevel::LL_Info, "BuildGraph time (s): %lld\n", std::chrono::duration_cast(t3 - t1).count());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "BuildGraph time (s): %lld\n", std::chrono::duration_cast(t3 - t1).count());
if (m_rebuild) {
m_iNeighborhoodSize = m_iNeighborhoodSize / 2;
RebuildGraph(index, idmap);
auto t4 = std::chrono::high_resolution_clock::now();
- LOG(Helper::LogLevel::LL_Info, "ReBuildGraph time (s): %lld\n", std::chrono::duration_cast(t4 - t3).count());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "ReBuildGraph time (s): %lld\n", std::chrono::duration_cast(t4 - t3).count());
}
if (idmap != nullptr) {
@@ -417,57 +486,111 @@ break;
template
void RebuildGraph(VectorIndex* index, const std::unordered_map* idmap = nullptr)
{
- std::vector indegree(m_iGraphSize);
-
-#pragma omp parallel for schedule(dynamic)
- for (SizeType i = 0; i < m_iGraphSize; i++) indegree[i] = 0;
+ std::vector indegree(m_iGraphSize, 0);
auto t0 = std::chrono::high_resolution_clock::now();
-#pragma omp parallel for schedule(dynamic)
- for (SizeType i = 0; i < m_iGraphSize; i++)
{
- SizeType* outnodes = m_pNeighborhoodGraph[i];
- for (SizeType j = 0; j < m_iNeighborhoodSize; j++)
+ std::vector mythreads;
+ mythreads.reserve(m_iThreadNum);
+ std::atomic_size_t sent(0);
+ for (int tid = 0; tid < m_iThreadNum; tid++)
{
- int node = outnodes[j];
- if (node >= 0) {
- indegree[node]++;
- }
+ mythreads.emplace_back([&, tid]() {
+ size_t i = 0;
+ while (true)
+ {
+ i = sent.fetch_add(1);
+ if (i < m_iGraphSize)
+ {
+ SizeType *outnodes = m_pNeighborhoodGraph[i];
+ for (SizeType j = 0; j < m_iNeighborhoodSize; j++)
+ {
+ int node = outnodes[j];
+ if (node >= 0)
+ {
+ indegree[node]++;
+ }
+ }
+ }
+ else
+ {
+ return;
+ }
+ }
+ });
+ }
+ for (auto &t : mythreads)
+ {
+ t.join();
}
+ mythreads.clear();
}
auto t1 = std::chrono::high_resolution_clock::now();
- LOG(Helper::LogLevel::LL_Info, "Calculate Indegree time (s): %lld\n", std::chrono::duration_cast(t1 - t0).count());
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Calculate Indegree time (s): %lld\n", std::chrono::duration_cast(t1 - t0).count());
int rebuild_threshold = m_iNeighborhoodSize / 2;
int rebuildstart = m_iNeighborhoodSize / 2;
-#pragma omp parallel for schedule(dynamic)
- for (SizeType i = 0; i < m_iGraphSize; i++)
+
+ std::vector mythreads;
+ mythreads.reserve(m_iThreadNum);
+ std::atomic_size_t sent(0);
+ for (int tid = 0; tid < m_iThreadNum; tid++)
{
- SizeType* outnodes = m_pNeighborhoodGraph[i];
- std::vector reserve(2 * m_iNeighborhoodSize, false);
- int total = 0;
- for (SizeType j = rebuildstart; j < m_iNeighborhoodSize * 2; j++)
- if ( outnodes[j] >= 0 && indegree[outnodes[j]] < rebuild_threshold) {
- reserve[j] = true;
- total++;
- }
+ mythreads.emplace_back([&, tid]() {
+ size_t i = 0;
+ while (true)
+ {
+ i = sent.fetch_add(1);
+ if (i < m_iGraphSize)
+ {
+ SizeType *outnodes = m_pNeighborhoodGraph[i];
+ std::vector reserve(2 * m_iNeighborhoodSize, false);
+ int total = 0;
+ for (SizeType j = rebuildstart; j < m_iNeighborhoodSize * 2; j++)
+ if (outnodes[j] >= 0 && indegree[outnodes[j]] < rebuild_threshold)
+ {
+ reserve[j] = true;
+ total++;
+ }
+
+ for (SizeType j = rebuildstart;
+ j < m_iNeighborhoodSize * 2 && total < m_iNeighborhoodSize - rebuildstart; j++)
+ {
+ if (!reserve[j])
+ {
+ reserve[j] = true;
+ total++;
+ }
+ }
+ for (SizeType j = rebuildstart, z = rebuildstart; j < m_iNeighborhoodSize; j++)
+ {
+ while (!reserve[z])
+ z++;
+ if (outnodes[j] >= 0)
+ indegree[outnodes[j]] = indegree[outnodes[j]] - 1;
+ if (outnodes[z] >= 0)
+ indegree[outnodes[z]] = indegree[outnodes[z]] + 1;
+ outnodes[j] = outnodes[z];
+ z++;
+ }
+ if ((i * 5) % m_iGraphSize == 0)
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Rebuild %d%%\n",
+ static_cast(i * 1.0 / m_iGraphSize * 100));
- for (SizeType j = rebuildstart; j < m_iNeighborhoodSize * 2 && total < m_iNeighborhoodSize - rebuildstart; j++) {
- if (!reserve[j]) {
- reserve[j] = true;
- total++;
+ }
+ else
+ {
+ return;
+ }
}
- }
- for (SizeType j = rebuildstart, z = rebuildstart; j < m_iNeighborhoodSize; j++) {
- while (!reserve[z]) z++;
- if(outnodes[j] >= 0) indegree[outnodes[j]] = indegree[outnodes[j]] - 1;
- if(outnodes[z] >= 0) indegree[outnodes[z]] = indegree[outnodes[z]] + 1;
- outnodes[j] = outnodes[z];
- z++;
- }
- if ((i * 5) % m_iGraphSize == 0) LOG(Helper::LogLevel::LL_Info, "Rebuild %d%%\n", static_cast(i * 1.0 / m_iGraphSize * 100));
+ });
+ }
+ for (auto &t : mythreads)
+ {
+ t.join();
}
+ mythreads.clear();
auto t2 = std::chrono::high_resolution_clock::now();
- LOG(Helper::LogLevel::LL_Info, "Rebuild RNG time (s): %lld Graph Acc: %f\n", std::chrono::duration_cast(t2 - t1).count(), GraphAccuracyEstimation(index, 100, idmap));
+ SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Rebuild RNG time (s): %lld Graph Acc: %f\n", std::chrono::duration_cast(t2 - t1).count(), GraphAccuracyEstimation(index, 100, idmap));
}
template
@@ -476,31 +599,79 @@ break;
for (int iter = 0; iter < m_iRefineIter - 1; iter++)
{
auto t1 = std::chrono::high_resolution_clock::now();
-#pragma omp parallel for schedule(dynamic)
- for (SizeType i = 0; i < m_iGraphSize; i++)
+ std::vector