Double free error on exiting certain applications

8 posts / 0 new
Last post
Offline
Last seen: 1 year 10 months ago
Joined: 09/17/2015
Posts: 53
Double free error on exiting certain applications

We get the following error:

Press Enter to quit: 
*** Error in `*/add-service': double free or corruption (fasttop): 0x0000000000f7b710 ***

Debugging this a bit shows that the problem seems to be due to a library cleanup routine (in this case, from our libdds-factory.so) trying to delete a previously deleted RTI Semaphore. Running gdb with the following breakpoint shows the same address referred to twice (stored in the edi register):

(gdb) b RTIOsapiSemaphore_delete
Breakpoint 7 at 0xa9b0cc (3 locations)
(gdb) commands
Type commands for breakpoint(s) 7, one per line.
End with a line saying just "end".
>print $rdi
>bt
>continue
>end
(gdb) r
Starting program: /*/add-service 

The last two RTIOsapiSemaphore_delete try to delete the same object:

Breakpoint 7, 0x0000000000a9b0cc in RTIOsapiSemaphore_delete ()
$24 = 16234256
#0  0x0000000000a9b0cc in RTIOsapiSemaphore_delete ()
#1  0x000000000057d5a6 in rti::core::Semaphore::~Semaphore() ()
#2  0x00007ffff366fbc9 in __run_exit_handlers (status=0, listp=0x7ffff39db5a8 <__exit_funcs>, run_list_atexit=run_list_atexit@entry=true) at exit.c:82
#3  0x00007ffff366fc15 in __GI_exit (status=<optimized out>) at exit.c:104
#4  0x00007ffff3659b4c in __libc_start_main (main=0x534660 <main()>, argc=1, argv=0x7fffffffe4c8, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7fffffffe4b8) at libc-start.c:321
#5  0x0000000000534a40 in _start ()

Breakpoint 7, 0x0000000000a9b0cc in RTIOsapiSemaphore_delete ()
$25 = 16234256
#0  0x0000000000a9b0cc in RTIOsapiSemaphore_delete ()
#1  0x000000000057d5a6 in rti::core::Semaphore::~Semaphore() ()
#2  0x00007ffff366ff4f in __cxa_finalize (d=0x7ffff5cb5900) at cxa_finalize.c:56
#3  0x00007ffff526b893 in __do_global_dtors_aux () from *
#4  0x00007fffffffe3a0 in ?? ()
#5  0x00007ffff7deb00a in _dl_fini () at dl-fini.c:252
Backtrace stopped: frame did not save the PC
*** Error in *': double free or corruption (fasttop): 0x0000000000f7b710 ***

Program received signal SIGABRT, Aborted.
0x00007ffff366d107 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
56	../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.

(0xf7b710 == 16234256.)

So the main RTI exit handler deletes the same stuff that a library exit handler also deletes.

The CMAKE File we are using for linking:

##############################################################################
# Try to find RTI Connext DDS
# Once done this will define:
#
#  Connext_FOUND         - system has Connext.
#  Connext_INCLUDE_DIRS  - the Connext include directory.
#  Connext_LIBRARIES     - Link these to use Connext.
#  Connext_ROOT          - Root directory of Connext installation.
#  Connext_IDLGEN_BINARY - Binary for the IDL compiler.
#
# You should set the environment variable $NDDSHOME to your Connext installation
# directory.  Otherwise only a few standard locations are checked.  This script
# also includes the MacroConnext.cmake script, which is useful for generating
# code from your idl.
#
##############################################################################

##############################################################################

# Will set this to false if we cannot find anything.
SET(Connext_FOUND TRUE)

# Get the Connext architecture prefix ("i86" or "x64").
IF (CMAKE_SIZEOF_VOID_P EQUAL 8)
  SET(Connext_ARCH_PREFIX "x64")
ELSE ()
  SET(Connext_ARCH_PREFIX "i86")
ENDIF ()

# Get the CXX compiler version (CMake 2.8.12 and newer provide
# CMAKE_CXX_COMPILER_VERSION, but 2.8.2 on Debian 6 does not).
IF ("${CMAKE_CXX_COMPILER_VERSION}" STREQUAL "")
  EXECUTE_PROCESS(
    COMMAND "${CMAKE_CXX_COMPILER}" "-v"
    ERROR_VARIABLE compiler_output)
  string(REGEX REPLACE "^.*gcc version ([.0-9]*).*$" "\\1" CMAKE_CXX_COMPILER_VERSION ${compiler_output})
ENDIF ()
MESSAGE(STATUS "Compiler version: ${CMAKE_CXX_COMPILER_VERSION}")

# Try to find Connext_ROOT by way of a known include directory.
FIND_PATH(Connext_INCLUDE_DIR
  NAMES
  ndds_version.h
  PATHS
  ${NDDSHOME}/include/ndds
  $ENV{NDDSHOME}/include/ndds
  /opt/rti_connext_dds-5.2.0/include/ndds
  )
IF (NOT "${Connext_INCLUDE_DIR}" STREQUAL "Connext_INCLUDE_DIR-NOTFOUND")
  GET_FILENAME_COMPONENT(Connext_ROOT "${Connext_INCLUDE_DIR}" PATH) # ndds
  GET_FILENAME_COMPONENT(Connext_ROOT "${Connext_ROOT}" PATH)        # include
ELSE ()
  SET(Connext_FOUND FALSE)
ENDIF ()

# Now try to find a valid architecture.
FILE(GLOB Connext_ARCH_DIR_CANDIDATES
  "${Connext_ROOT}/lib/${Connext_ARCH_PREFIX}*/"
  )

# Filter out directories corresponding to newer compilers than the one we're
# using, and order the remaining ones by version number (newest to oldest).
SET(Connext_ARCH_DIRS)
FOREACH (dir ${Connext_ARCH_DIR_CANDIDATES})
  # Extract the compiler version.
  string(REGEX REPLACE "^.*gcc" "" dir_gcc_version ${dir})
  IF (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${dir_gcc_version})
    LIST(APPEND Connext_ARCH_DIRS ${dir})
  ENDIF ()
ENDFOREACH ()
LIST(SORT Connext_ARCH_DIRS)
LIST(REVERSE Connext_ARCH_DIRS)

MESSAGE(STATUS "Discovered suitable Connext architecture candidates: ${Connext_ARCH_DIRS}")

FIND_PATH(Connext_LIBRARY_PATH
  NAMES
  libnddscorez.a
  PATHS
  ${Connext_ROOT}/lib/${Connext_ARCHITECTURE}
  ${Connext_ARCH_DIRS}
  )

IF (NOT "${Connext_LIBRARY_PATH}" STREQUAL "Connext_LIBRARY_PATH-NOTFOUND")
  GET_FILENAME_COMPONENT(Connext_ARCHITECTURE "${Connext_LIBRARY_PATH}" NAME)
ELSE ()
  SET(Connext_FOUND FALSE)
ENDIF ()

IF (DEFINED Connext_ARCHITECTURE)
  MESSAGE(STATUS "Using RTI Connext architecture '${Connext_ARCHITECTURE}'.")
  MESSAGE(STATUS "Set -DConnext_ARCHITECTURE=... to use a different architecture.")
ENDIF()

# Connext DDS Definitions
SET(Connext_DEFINITIONS "-DRTI -DRTI_UNIX -DRTI_LINUX -DRTI_SHARED_MEMORY")
add_definitions(${Connext_DEFINITIONS})
# Find ndds_c.h header file
FIND_PATH(Connext_INCLUDE_DIRS_NDDS
  NAMES ndds_cpp.h
  PATHS ${Connext_ROOT}/include/ndds
  )

# We need to include both include and include/ndds directories
set(Connext_INCLUDE_DIRS
  ${Connext_ROOT}/include
  ${Connext_ROOT}/include/hpp
  ${Connext_ROOT}/include/hpp/rtiboost
  ${Connext_INCLUDE_DIRS_NDDS}
  ${Connext_INCLUDE_DIRS_NDDS}/hpp
  )

set(nddscore_libname libnddscorez.a)
set(nddsc_libname libnddscz.a)
set(nddscpp_libname libnddscppz.a)
set(nddscpp2_libname libnddscpp2z.a)

find_library(nddscore_lib
  NAMES ${nddscore_libname}
  PATHS ${Connext_LIBRARY_PATH}
  )
find_library(nddsc_lib
  NAMES ${nddsc_libname}
  PATHS ${Connext_LIBRARY_PATH}
  )

find_library(nddscpp_lib
  NAMES ${nddscpp_libname}
  PATHS ${Connext_LIBRARY_PATH}
  )

find_library(nddscpp2_lib
  NAMES ${nddscpp2_libname}
  PATHS ${Connext_LIBRARY_PATH}
  )

set(Connext_LIBRARIES
  ${nddscpp2_lib}
  ${nddscpp_lib}
  ${nddsc_lib}
  ${nddscore_lib}
  ${CMAKE_DL_LIBS}
  ${external_libs}
  )


LIST(APPEND Connext_LIBRARIES "-ldl -lm -lpthread -lrt")

# Binary for the IDL compiler
SET (Connext_IDLGEN_BINARY ${Connext_ROOT}/bin/rtiddsgen)

IF (Connext_FOUND)
  MESSAGE(STATUS "Found Connext DDS libraries: ${Connext_LIBRARIES}")
ELSE (Connext_FOUND)
  IF (Connext_FIND_REQUIRED)
    MESSAGE(FATAL_ERROR "Could not find Connext DDS")
  ENDIF (Connext_FIND_REQUIRED)
ENDIF (Connext_FOUND)


MARK_AS_ADVANCED(Connext_INCLUDE_DIRS Connext_LIBRARIES Connext_IDLGEN_BINARY Connext_DEFINITIONS)


Offline
Last seen: 1 week 3 days ago
Joined: 04/02/2013
Posts: 110

Hi,

We haven't seen this problem before. Can you share with us an application that reproduces the crash so we can debug it?

Thank you.

Offline
Last seen: 1 year 10 months ago
Joined: 09/17/2015
Posts: 53

The problem arises from overlinking. If a shared library links to RTI DDS and the main application links to RTI DDS as well this problem shows up, because clean up routines run twice. While I still think it is a bug, you can work around it by making sure you only link to DDS once.

A similar problem occurs if you are using RTI DDS with dlopen and dlclose. 

I guess there are missing some checks to make sure library cleanup routines behave correctly even if run multiple times. 

Offline
Last seen: 1 year 10 months ago
Joined: 09/17/2015
Posts: 53

A quick solution would be to remove the static libraries from the RTI installation, so no one ever links statically to RTI. Then this problem is resolved.

Offline
Last seen: 1 week 3 days ago
Joined: 04/02/2013
Posts: 110

Thank you for the details. We'll look into it.

Offline
Last seen: 1 week 3 days ago
Joined: 04/02/2013
Posts: 110

Hi,

I've been trying to reproduce the double destruction you reported but haven't been succesful.

I've compiled a shared library and an application.

The shared library links with Connext DDS (I've tried linking statically and dynamically).

The application links with Connext DDS (again both modes) and with the shared library (I've also tried dlopen).

In all cases, the ~Semaphore destructor gets called on different objects and I never see the double free.

Are you doing something different?

I appreciate your help debugging this problem.

 

jwillemsen's picture
Offline
Last seen: 7 months 1 week ago
Joined: 09/24/2013
Posts: 53

We do see something similar with our CIAO DDS4CCM solution. It happens mostly in cases where DDS is just initialized and shutdown without sending any real data.

Offline
Last seen: 1 year 10 months ago
Joined: 09/17/2015
Posts: 53

I think the final solution is to link to the shared libraries and not the static ones. Then the error was gone for good.