I'm using clang/llvm to programmatically compile and link bits of C source. I'm finding that the llvm Linker doesn't seem to report the fact that unresolved externals exist in a module as an error.
I've the following code (forgive the length, but this really is the minimum required):
int CompileAndLink()
{
llvm::InitializeNativeTarget();
std::string code = "int UnresolvedFunction();\n"
"int main() { return UnresolvedFunction(); }";
clang::DiagnosticOptions diagnosticOptions;
clang::TextDiagnosticPrinter tdp( llvm::outs(), diagnosticOptions );
llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> diagIDs( new clang::DiagnosticIDs );
clang::Diagnostic diag( diagIDs, &tdp, false );
clang::FileSystemOptions fsOptions;
clang::FileManager fm( fsOptions );
clang::SourceManager sm( diag, fm );
clang::HeaderSearch hs( fm );
clang::TargetOptions targetOptions;
targetOptions.Triple = llvm::sys::getHostTriple();
clang::TargetInfo* ti = clang::TargetInfo::CreateTargetInfo( diag, targetOptions );
clang::HeaderSearchOptions headerSearchOptions;
clang::LangOptions langOptions;
clang::ApplyHeaderSearchOptions( hs, headerSearchOptions, langOptions, ti->getTriple() );
clang::PreprocessorOptions ppo;
clang::Preprocessor pp( diag, langOptions, *ti, sm, hs );
clang::FrontendOptions frontendOptions;
clang::InitializePreprocessor( pp, ppo, headerSearchOptions, frontendOptions );
pp.getBuiltinInfo().InitializeBuiltins( pp.getIdentifierTable(), langOptions );
llvm::MemoryBuffer* sourceBuffer = llvm::MemoryBuffer::getMemBufferCopy( code );
sm.createMainFileIDForMemBuffer( sourceBuffer );
clang::Builtin::Context bic( *ti );
clang::ASTContext astc( langOptions, sm, *ti,
pp.getIdentifierTable(),
pp.getSelectorTable(),
bic,
0 );
llvm::LLVMContext lc;
clang::CodeGenOptions codeGenOptions;
llvm::OwningPtr<clang::CodeGenerator> cg;
cg.reset( clang::CreateLLVMCodeGen( diag, "clang_test", codeGenOptions, lc ) );
if( cg == NULL ) {
printf( "could not create CodeGenerator\n" );
return -1;
}
clang::ParseAST( pp, cg.get(), astc );
if( tdp.getNumErrors() ) {
printf( "error parsing AST\n" );
return -2;
}
llvm::Module* new_module = cg->ReleaseModule();
if( !new_module ) {
printf( "error generating code\n" );
return -2;
}
llvm::Linker linker( "clang_test", "clang_test", lc, llvm::Linker::Verbose );
std::string error;
if( linker.LinkInModule( new_module, &error ) || !error.empty() ) {
printf( "link error\n" );
return -3;
}
llvm::Module* composite_module = linker.getModule();
if( composite_module == NULL ) {
printf( "link error\n" );
return -3;
}
llvm::ExecutionEngine *pEngine = llvm::ExecutionEngine::create( composite_module,
false,
&error );
if( !error.empty() || pEngine == NULL ) {
printf( "error creating ExecutionEngine\n" );
return -4;
}
llvm::Function* f = composite_module->getFunction( "main" );
if( f == NULL ) {
printf( "couldn't find main function\n" );
return -5;
}
// This will abort with the message:
// LLVM ERROR: Program used external function 'UnresolvedFunction' which could not be resolved!
std::vector<llvm::GenericValue> params;
llvm::GenericValue result = pEngine->runFunction( f, params );
printf( "function main returned %llu\n", result.IntVal.getZExtValue() );
return 0;
}
No errors happen anywhere until we call runFunction near the end, which gives the error "LLVM ERROR: Program used external function 'UnresolvedFunction' which could not be resolved!" before aborting.
I kinda expected LinkInModule or getModule to fail with some error, but this isn't the case. My question is: is there some way to determine that a module has unresolved externals, so as not to crash and burn when trying to execute the code? I've been spelunking through the llvm source for quite a while, and so far can't find what I'm looking for.
I'm using llvm/clang 2.9 on Mac OS X (x86_64), if that matters.
Edit: I've found a private function called GetAllUndefinedSymbols
in the llvm sources (llvm-2.9/lib/Linker/LinkArchives.cpp), which appears to do what I want. I guess I was hoping there was an actual API for this, something I missed?
IIRC, nobody has ever asked for such an API, so none exists. I'm not entirely sure what you would do with such an API, anyway... any non-trivial program will reference symbols not defined in any .bc file, like malloc
.
If you really want to check, something like the following should work:
for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
if (I->isDeclaration())
UndefGlobals.insert(&*I);
for (Module::global_iterator I = M->global_begin(),
E = M->global_end();
I != E; ++I)
if (I->isDeclaration())
UndefGlobals.insert(&*I);
LLVM linker is used to link the Modules. Given that it's perfectly valid for Module to have the external declarations no errors should be given. So it's up to you to decide whether you need to error in such situation (some externals e.g. to C library functions in some situtation might be automagically resolved by JIT)
So, servn's code is what you have to do here.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With