From 2722518f0d0e41a81bef29cd739f37f640aa57b7 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Fri, 19 Dec 2025 12:15:12 -0800 Subject: [PATCH] [-Wunsafe-buffer-usage] Add check for custom printf/scanf functions This commit adds the support for functions annotated with `__attribute__((__format__(__printf__, ...)))` (or `__scanf__`). These functions will be treated the same way as printf/scanf functions in the standard C library by `-Wunsafe-buffer-usage` rdar://143233737 --- .../Analyses/UnsafeBufferUsageGadgets.def | 1 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 115 +++++++++++++++--- ...arn-unsafe-buffer-usage-libc-functions.cpp | 32 ++++- 3 files changed, 129 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index fae5f8b8aa8e3..f9bba5d54e9c7 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -40,6 +40,7 @@ WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) WARNING_GADGET(UniquePtrArrayAccess) WARNING_OPTIONAL_GADGET(UnsafeLibcFunctionCall) +WARNING_OPTIONAL_GADGET(UnsafeFormatAttributedFunctionCall) WARNING_OPTIONAL_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 7ef20726d0ab9..7c21ec86af544 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -825,9 +825,11 @@ struct LibcFunNamePrefixSuffixParser { // // `UnsafeArg` is the output argument that will be set only if this function // returns true. -static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, - const unsigned FmtArgIdx, ASTContext &Ctx, - bool isKprintf = false) { +// `FmtArgIdx` is insignificant if its value is negative, meaning that format +// arguments start at `FmtIdx` + 1. +static bool hasUnsafeFormatOrSArg(ASTContext &Ctx, const CallExpr *Call, + const Expr *&UnsafeArg, const unsigned FmtIdx, + int FmtArgIdx = -1, bool isKprintf = false) { class StringFormatStringHandler : public analyze_format_string::FormatStringHandler { const CallExpr *Call; @@ -850,8 +852,8 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, unsigned PArgIdx = -1; if (Precision.hasDataArgument()) - PArgIdx = Precision.getPositionalArgIndex() + FmtArgIdx; - if (0 < PArgIdx && PArgIdx < Call->getNumArgs()) { + PArgIdx = Precision.getArgIndex() + FmtArgIdx; + if (0 <= PArgIdx && PArgIdx < Call->getNumArgs()) { const Expr *PArg = Call->getArg(PArgIdx); // Strip the cast if `PArg` is a cast-to-int expression: @@ -886,9 +888,9 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, analyze_printf::PrintfConversionSpecifier::sArg) return true; // continue parsing - unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx; + unsigned ArgIdx = FS.getArgIndex() + FmtArgIdx; - if (!(0 < ArgIdx && ArgIdx < Call->getNumArgs())) + if (!(0 <= ArgIdx && ArgIdx < Call->getNumArgs())) // If the `ArgIdx` is invalid, give up. return true; // continue parsing @@ -921,12 +923,15 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, bool isUnsafeArgSet() { return UnsafeArgSet; } }; - const Expr *Fmt = Call->getArg(FmtArgIdx); + const Expr *Fmt = Call->getArg(FmtIdx); + unsigned FmtArgStartingIdx = + FmtArgIdx < 0 ? FmtIdx + 1 : static_cast(FmtArgIdx); if (auto *SL = dyn_cast(Fmt->IgnoreParenImpCasts())) { if (SL->getCharByteWidth() == 1) { StringRef FmtStr = SL->getString(); - StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx); + StringFormatStringHandler Handler(Call, FmtArgStartingIdx, UnsafeArg, + Ctx); return analyze_format_string::ParsePrintfString( Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(), @@ -935,7 +940,8 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, } if (auto FmtStr = SL->tryEvaluateString(Ctx)) { - StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx); + StringFormatStringHandler Handler(Call, FmtArgStartingIdx, UnsafeArg, + Ctx); return analyze_format_string::ParsePrintfString( Handler, FmtStr->data(), FmtStr->data() + FmtStr->size(), Ctx.getLangOpts(), Ctx.getTargetInfo(), isKprintf) && @@ -946,7 +952,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, // In this case, this call is considered unsafe if at least one argument // (including the format argument) is unsafe pointer. return llvm::any_of( - llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), + llvm::make_range(Call->arg_begin() + FmtArgStartingIdx, Call->arg_end()), [&UnsafeArg, &Ctx](const Expr *Arg) -> bool { if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg, Ctx)) { UnsafeArg = Arg; @@ -1161,7 +1167,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx, // It is a fprintf: const Expr *UnsafeArg; - if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 1, Ctx, false)) { + if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 1)) { Result.addNode(Tag, DynTypedNode::create(*UnsafeArg)); return true; } @@ -1175,7 +1181,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx, if (auto *II = FD->getIdentifier()) isKprintf = II->getName() == "kprintf"; - if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf)) { + if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 0, -1, isKprintf)) { Result.addNode(Tag, DynTypedNode::create(*UnsafeArg)); return true; } @@ -1190,7 +1196,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx, // second is an integer, it is a snprintf: const Expr *UnsafeArg; - if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 2, Ctx, false)) { + if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 2)) { Result.addNode(Tag, DynTypedNode::create(*UnsafeArg)); return true; } @@ -2068,6 +2074,7 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget { constexpr static const char *const UnsafeVaListTag = "UnsafeLibcFunctionCall_va_list"; +public: enum UnsafeKind { OTHERS = 0, // no specific information, the callee function is unsafe SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead. @@ -2080,7 +2087,6 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget { // considered unsafe as it is not compile-time check } WarnedFunKind = OTHERS; -public: UnsafeLibcFunctionCallGadget(const MatchResult &Result) : WarningGadget(Kind::UnsafeLibcFunctionCall), Call(Result.getNodeAs(Tag)) { @@ -2171,6 +2177,85 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget { SmallVector getUnsafePtrs() const override { return {}; } }; +class UnsafeFormatAttributedFunctionCallGadget : public WarningGadget { + const CallExpr *const Call; + const Expr *UnsafeArg = nullptr; + constexpr static const char *const Tag = "UnsafeFormatAttributedFunctionCall"; + constexpr static const char *const UnsafeStringTag = + "UnsafeFormatAttributedFunctionCall_string"; + +public: + UnsafeFormatAttributedFunctionCallGadget(const MatchResult &Result) + : WarningGadget(Kind::UnsafeLibcFunctionCall), + Call(Result.getNodeAs(Tag)), + UnsafeArg(Result.getNodeAs(UnsafeStringTag)) {} + + static bool matches(const Stmt *S, ASTContext &Ctx, + const UnsafeBufferUsageHandler *Handler, + MatchResult &Result) { + if (ignoreUnsafeLibcCall(Ctx, *S, Handler)) + return false; + auto *CE = dyn_cast(S); + if (!CE || !CE->getDirectCallee()) + return false; + const auto *FD = dyn_cast(CE->getDirectCallee()); + if (!FD) + return false; + + const FormatAttr *Attr = nullptr; + bool IsPrintf = false; + bool AnyAttr = llvm::any_of( + FD->specific_attrs(), + [&Attr, &IsPrintf](const FormatAttr *FA) -> bool { + if (const auto *II = FA->getType()) { + if (II->getName() == "printf" || II->getName() == "scanf") { + Attr = FA; + IsPrintf = II->getName() == "printf"; + return true; + } + } + return false; + }); + const Expr *UnsafeArg; + + if (AnyAttr && !IsPrintf && + (CE->getNumArgs() >= static_cast(Attr->getFirstArg()))) { + // for scanf-like functions, any format argument is considered unsafe: + Result.addNode(Tag, DynTypedNode::create(*CE)); + return true; + } + if (AnyAttr && libc_func_matchers::hasUnsafeFormatOrSArg( + Ctx, CE, UnsafeArg, + // FormatAttribute indexes are 1-based: + Attr->getFormatIdx() - 1, Attr->getFirstArg() - 1)) { + Result.addNode(Tag, DynTypedNode::create(*CE)); + Result.addNode(UnsafeStringTag, DynTypedNode::create(*UnsafeArg)); + return true; + } + return false; + } + + const Stmt *getBaseStmt() const { return Call; } + + SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + if (UnsafeArg) + Handler.handleUnsafeLibcCall( + Call, UnsafeLibcFunctionCallGadget::UnsafeKind::STRING, Ctx, + UnsafeArg); + else + Handler.handleUnsafeLibcCall( + Call, UnsafeLibcFunctionCallGadget::UnsafeKind::OTHERS, Ctx); + } + + DeclUseList getClaimedVarUseSites() const override { return {}; } + + SmallVector getUnsafePtrs() const override { return {}; } +}; + // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue // Context (see `findStmtsInUnspecifiedLvalueContext`). // Note here `[]` is the built-in subscript operator. diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index 4f1af79609223..8df65ebc2eaf0 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -Wno-gcc-compat\ // RUN: -verify %s -// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -Wno-gcc-compat\ // RUN: -verify %s -x objective-c++ -// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call -Wno-gcc-compat\ // RUN: -verify %s -// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call -Wno-gcc-compat\ // RUN: -verify %s -DTEST_STD_NS typedef struct {} FILE; @@ -255,3 +255,27 @@ void dontCrashForInvalidFormatString() { snprintf((char*)0, 0, "%"); snprintf((char*)0, 0, "\0"); } + + +// Also warn about unsafe printf/scanf-like functions: +void myprintf(const char *F, ...) __attribute__((__format__ (__printf__, 1, 2))); +void myprintf_2(const char *F, int irrelevant, const char *Str) __attribute__((__format__ (__printf__, 1, 3))); +void myscanf(const char *F, ...) __attribute__((__format__ (__scanf__, 1, 2))); + +void test_myprintf(char * Str, std::string StdStr) { + myprintf("hello", Str); + myprintf("hello %s", StdStr.c_str()); + myprintf("hello %s", Str); // expected-warning{{function 'myprintf' is unsafe}} \ + expected-note{{string argument is not guaranteed to be null-terminated}} + + myprintf_2("hello", 0, Str); + myprintf_2("hello %s", 0, StdStr.c_str()); + myprintf_2("hello %s", 0, Str); // expected-warning{{function 'myprintf_2' is unsafe}} \ + expected-note{{string argument is not guaranteed to be null-terminated}} + myscanf("hello %s"); + myscanf("hello %s", Str); // expected-warning{{function 'myscanf' is unsafe}} + + int X; + + myscanf("hello %d", &X); // expected-warning{{function 'myscanf' is unsafe}} +}