From 4daf40fff1c678cd87f2bef819250438ba2b1719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E5=87=AF?= Date: Thu, 6 Mar 2025 14:13:21 +0800 Subject: [PATCH 1/4] [fix](build) fix mac ut for clucene --- .github/workflows/clucene-ut.yml | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/.github/workflows/clucene-ut.yml b/.github/workflows/clucene-ut.yml index 4c28612fd9b..a0a5e2ec90a 100644 --- a/.github/workflows/clucene-ut.yml +++ b/.github/workflows/clucene-ut.yml @@ -78,7 +78,7 @@ jobs: run_clucene_ut_macos: name: CLucene UT (MacOS) - runs-on: macos-12 + runs-on: macos-15 steps: - name: "Checkout ${{ github.event.pull_request.number }} ${{ github.event.pull_request.head.sha }}" uses: actions/checkout@v4 @@ -108,9 +108,36 @@ jobs: 'maven' \ 'node' \ 'llvm@16' + + - name: "Ensure Correct Xcode" + run: sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer + + - name: "Set SDKROOT" + run: echo "SDKROOT=$(xcrun --show-sdk-path)" >> $GITHUB_ENV + + - name: "Set Compiler" + run: | + echo "CC=$(xcrun --find clang)" >> $GITHUB_ENV + echo "CXX=$(xcrun --find clang++)" >> $GITHUB_ENV + + - name: "Set Compilation Flags" + run: | + echo "CFLAGS=-isysroot $(xcrun --show-sdk-path)" >> $GITHUB_ENV + echo "CXXFLAGS=-isysroot $(xcrun --show-sdk-path)" >> $GITHUB_ENV + + - name: "Use Homebrew LLVM" + run: | + echo "CC=$(brew --prefix llvm@16)/bin/clang" >> $GITHUB_ENV + echo "CXX=$(brew --prefix llvm@16)/bin/clang++" >> $GITHUB_ENV + - name: "Run" run: | set -x + export SDKROOT=$(xcrun --show-sdk-path) + export CC=$(xcrun --find clang) + export CXX=$(xcrun --find clang++) + export CFLAGS="-isysroot $(xcrun --show-sdk-path)" + export CXXFLAGS="-isysroot $(xcrun --show-sdk-path)" mkdir build && cd build cmake ../ make cl_test From 57c5208f9c41c066190a90092182d2552d3ddbbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E5=87=AF?= Date: Thu, 6 Mar 2025 14:57:54 +0800 Subject: [PATCH 2/4] [fix](build) fix mac ut for clucene --- src/test/analysis/TestStandard95.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/analysis/TestStandard95.cpp b/src/test/analysis/TestStandard95.cpp index 80f3ba88240..ef1f9717200 100644 --- a/src/test/analysis/TestStandard95.cpp +++ b/src/test/analysis/TestStandard95.cpp @@ -470,7 +470,7 @@ std::vector datas = { "{\"qid\": \"qid_8325162146787472205\", \"category\": \"娱乐-博彩\", \"title\": \"第一次发图。。。(图2)请高手进来批一下。。第一次发图。拍摄不好 \", \"desc\": \"第一次发图。不好虽见笑```谢谢进来帮批示的朋友。。:)))\\r\", \"answer\": \"6 12场感觉挺悬的...祝好\"}" }; -std::vector tokens = { +std::vector standard95_tokens = { "qid|qid_1815059893214501395|category|烦|恼|恋|爱|title|请|问|深|入|骨|髓|地|喜|欢|一|个|人|怎|么|办|我|不|能|确|定|对|方|是|不|是|喜|欢|我|我|却|想|desc|我|不|能|确|定|对|方|是|不|是|喜|欢|我|我|却|想|分|分|秒|秒|跟|他|在|一|起|有|谁|能|告|诉|我|如|何|能|想|他|少|一|点|answer|一|定|要|告|诉|他|你|很|喜|欢|他|很|爱|他|虽|然|不|知|道|你|和|他|现|在|的|关|系|是|什|么|但|如|果|真|的|觉|得|很|喜|欢|就|向|他|表|白|啊|起|码|你|努|力|过|了|女|生|主|动|多|少|占|一|点|优|势|的|呵|呵|只|愿|曾|经|拥|有|到|以|后|就|算|感|情|没|现|在|这|么|强|烈|了|也|不|会|觉|得|遗|憾|啊|与|其|每|天|那|么|痛|苦|的|想|他|恋|他|还|不|如|直|接|告|诉|他|不|要|怕|回|破|坏|你|们|现|有|的|感|情|因|为|如|果|不|告|诉|他|你|可|能|回|后|悔|一|辈|子|", "qid|qid_2063849676113062517|category|游|戏|完|美|游|戏|诛|仙|title|我|登|陆|诛|仙|2|时|总|说|我|账|号|密|码|错|误|但|是|我|打|的|是|正|确|的|就|算|不|对|我|desc|answer|被|盗|号|了|我|的|号|在|22|号|那|天|被|盗|了|跟|你|一|样|情|况|link|密|码|与|账|号|错|误|我|密|保|都|有|了|呐|邮|箱|换|密|码|也|不|行|还|被|删|了|号|伤|心|兼|郁|闷|呵|呵|盗|号|了|建|议|跟|完|美|申|请|把|号|要|回|来|或|者|玩|新|的|号|", "qid|qid_6625582808814915192|category|游|戏|网|络|游|戏|title|斩|魔|仙|者|称|号|怎|么|得|来|的|desc|斩|魔|仙|者|称|号|怎|么|得|来|的|answer|楼|主|您|好|以|下|为|转|载|r|r|圣|诞|前|热|身|来|生|肖|传|说|做|斩|魔|仙|者|r|r|一|年|一|度|的|圣|诞|节|快|要|来|临|了|大|街|小|巷|商|户|们|都|在|忙|着|准|备|12|月|25|日|圣|诞|的|来|临|而|这|时|候|一|些|妖|魔|也|正|蠢|蠢|欲|动|准|备|作|乱|作|为|生|肖|世|界|肩|负|维|护|世|界|和|平|拯|救|全|人|类|的|生|肖|使|者|怎|么|能|不|有|所|行|动|为|了|生|肖|世|界|的|安|定|而|做|防|范|准|备|r|r|要|让|妖|魔|鬼|怪|能|对|你|有|所|心|悸|除|了|自|己|本|身|武|艺|要|高|强|最|好|能|在|妖|魔|界|打|出|知|名|度|这|样|当|你|的|亲|朋|好|友|被|妖|魔|袭|击|时|只|要|爆|出|你|的|名|号|这|些|妖|魔|上|就|会|落|荒|而|逃|岂|不|好|哉|那|么|斩|魔|仙|者|这|个|响|亮|的|称|号|应|该|足|够|能|震|慑|住|妖|魔|让|他|们|铭|记|在|心|了|吧|r|r|斩|魔|仙|者|的|称|号|r|r|而|且|这|个|斩|魔|仙|者|的|称|号|并|不|是|人|人|都|能|得|到|的|只|有|成|功|挑|战|70|级|副|本|中|的|隐|藏|boss|羽|翼|仙|的|人|才|能|获|得|此|称|号|并|且|前|提|条|件|是|在|12|月|18|日|12|月|25|日|之|间|第|一|队|成|功|挑|战|羽|翼|仙|的|人|才|能|获|此|称|号|因|此|此|称|号|在|全|服|范|围|内|是|绝|对|不|可|能|超|过|5|个|的|r|r|要|挑|战|羽|翼|仙|可|不|是|一|件|容|易|的|事|首|先|要|在|70|级|副|本|中|打|败|4|个|强|大|的|boss|在|打|完|副|本|的|第|4|个|boss|有|一|定|几|率|获|得|道|具|羽|翼|真|元|有|了|羽|翼|真|元|后|就|可|以|与|羽|翼|仙|进|行|一|场|战|斗|羽|翼|仙|就|站|在|第|4|个|boss|的|旁|边|只|是|没|有|道|具|是|不|能|进|入|战|斗|的|r|r|羽|翼|仙|r|r|在|12|月|18|日|12|月|25|日|活|动|期|间|成|功|挑|战|羽|翼|仙|后|的|第|一|支|队|伍|就|可|以|获|得|兑|换|斩|魔|仙|者|的|道|具|烈|火|珍|珠|旗|当|然|如|果|你|在|这|场|激|烈|的|战|斗|中|不|幸|捐|躯|那|么|当|然|是|不|会|得|到|这|个|道|具|的|得|到|了|这|把|烈|火|珍|珠|旗|的|玩|家|就|可|以|到|npc|燃|烧|使|处|兑|换|称|号|了|r|r|这|样|兼|具|高|强|能|力|和|超|强|人|品|才|能|获|得|的|称|号|怎|么|能|不|人|望|而|生|畏|怎|么|能|不|让|那|些|妖|魔|胆|怯|想|要|获|得|的|玩|家|就|快|快|行|动|莫|要|让|人|先|抢|了|这|全|服|唯|一|的|斩|魔|仙|者|称|号|r|r|如|果|满|意|请|采|纳|r|谢|谢|", @@ -671,7 +671,7 @@ static void testCompLucene95(CuTest *tc) { std::vector new_tokens; testCutLines(datas, new_tokens); - CLUCENE_ASSERT((tokens == new_tokens)); + CLUCENE_ASSERT((standard95_tokens == new_tokens)); } CuSuite *teststandard95(void) { From 668c93c7d37daeda2aff90d2590508751132c8a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E5=87=AF?= Date: Thu, 6 Mar 2025 15:19:04 +0800 Subject: [PATCH 3/4] [fix](build) fix mac ut for clucene --- src/core/CLucene/index/SDocumentWriter.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/core/CLucene/index/SDocumentWriter.h b/src/core/CLucene/index/SDocumentWriter.h index 0795a7aa372..664667c8f3a 100644 --- a/src/core/CLucene/index/SDocumentWriter.h +++ b/src/core/CLucene/index/SDocumentWriter.h @@ -733,10 +733,17 @@ class SDocumentsWriter : public IDocumentsWriter { std::string segmentFileName(const std::string &extension) { - return segment + "." + extension; + std::string result; + result.reserve(segment.size() + extension.size() + 1); + result = segment; + result += "."; + result += extension; + return result; } std::string segmentFileName(const char *extension) { - return segmentFileName(string(extension)); + if (!extension) return segment; + std::string ext(extension); + return segmentFileName(ext); } int32_t getMaxBufferedDocs() override { return maxBufferedDocs; From 6dec20ae84c5e59a4dddefba9ced3991133fc171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E5=87=AF?= Date: Thu, 6 Mar 2025 21:36:40 +0800 Subject: [PATCH 4/4] reserve --- src/contribs-lib/CLucene/analysis/jieba/FullSegment.hpp | 1 + src/contribs-lib/CLucene/analysis/jieba/MixSegment.hpp | 1 + src/contribs-lib/CLucene/analysis/jieba/StringUtil.hpp | 1 + src/core/CLucene/index/SDocumentWriter.cpp | 1 + src/core/CLucene/index/SegmentInfos.cpp | 4 ++-- 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/contribs-lib/CLucene/analysis/jieba/FullSegment.hpp b/src/contribs-lib/CLucene/analysis/jieba/FullSegment.hpp index df3c4c1ccc1..d37577f7847 100644 --- a/src/contribs-lib/CLucene/analysis/jieba/FullSegment.hpp +++ b/src/contribs-lib/CLucene/analysis/jieba/FullSegment.hpp @@ -161,6 +161,7 @@ class FullSegment: public SegmentBase { void LoadStopWordDict(const string& filePath) { ifstream ifs(filePath.c_str()); + stopWordList_.reserve(1000); if (ifs.is_open()) { string line; while (getline(ifs, line)) { diff --git a/src/contribs-lib/CLucene/analysis/jieba/MixSegment.hpp b/src/contribs-lib/CLucene/analysis/jieba/MixSegment.hpp index dcb99d231c6..80e06d86ac3 100644 --- a/src/contribs-lib/CLucene/analysis/jieba/MixSegment.hpp +++ b/src/contribs-lib/CLucene/analysis/jieba/MixSegment.hpp @@ -124,6 +124,7 @@ class MixSegment: public SegmentTagged { void LoadStopWordDict(const string& filePath) { ifstream ifs(filePath.c_str()); + stopWordList_.reserve(1000); if (ifs.is_open()) { string line; while (getline(ifs, line)) { diff --git a/src/contribs-lib/CLucene/analysis/jieba/StringUtil.hpp b/src/contribs-lib/CLucene/analysis/jieba/StringUtil.hpp index 6079ba45b24..88795e741ff 100644 --- a/src/contribs-lib/CLucene/analysis/jieba/StringUtil.hpp +++ b/src/contribs-lib/CLucene/analysis/jieba/StringUtil.hpp @@ -115,6 +115,7 @@ namespace limonp { inline void Split(const string& src, vector& res, const string& pattern, size_t maxsplit = string::npos) { res.clear(); + res.reserve(src.size()); size_t Start = 0; size_t end = 0; string sub; diff --git a/src/core/CLucene/index/SDocumentWriter.cpp b/src/core/CLucene/index/SDocumentWriter.cpp index e1336650f6f..f29f32dabce 100644 --- a/src/core/CLucene/index/SDocumentWriter.cpp +++ b/src/core/CLucene/index/SDocumentWriter.cpp @@ -1341,6 +1341,7 @@ int32_t SDocumentsWriter::flush(bool _closeDocStore) { } newFiles.clear(); + newFiles.reserve(10); docStoreOffset = numDocsInStore; diff --git a/src/core/CLucene/index/SegmentInfos.cpp b/src/core/CLucene/index/SegmentInfos.cpp index 60a36954746..e2d83d5272f 100644 --- a/src/core/CLucene/index/SegmentInfos.cpp +++ b/src/core/CLucene/index/SegmentInfos.cpp @@ -200,7 +200,7 @@ string SegmentInfo::segString(Directory* dir) { // Already cached: return _files; } - + _files.reserve(10); bool useCompoundFile = getUseCompoundFile(); if (useCompoundFile) { @@ -912,7 +912,7 @@ string SegmentInfo::segString(Directory* dir) { // contents (NOTE: NFS clients often have such stale // caching): vector files; - + files.reserve(10); int64_t genA = -1; if (directory != NULL){