Recently, I hardened my Keycloak deployment to use a dedicated Infinispan cluster as a remote-store
for an extra layer of persistence for Keycloak's various caches. The change itself went reasonably well, although after making this change, we started seeing a lot of login errors due to the expired_code
error message:
WARN [org.keycloak.events] (default task-2007) type=LOGIN_ERROR, realmId=my-realm, clientId=null, userId=null, ipAddress=192.168.50.38, error=expired_code, restart_after_timeout=true
This error message is typically repeated dozens of times all within a short period of time and from the same IP address. The cause of this appears to be the end-user's browser infinitely redirecting on login until the browser itself stops the loop.
I have seen various GitHub issues (https://github.com/helm/charts/issues/8355) that also document this behavior, and the consensus seems to be that this is caused by the Keycloak cluster not able to correctly discover its members via JGroups.
This explanation makes sense when you consider that some of the Keycloak caches are distributed across the Keycloak nodes in the default configuration within standalone-ha.xml
. However, I have modified these caches to be local caches with a remote-store
pointing to my new Infinispan cluster, and I believe I have made some incorrect assumptions about how this works, causing this error to start happening.
Here is how my Keycloak caches are configured:
<subsystem xmlns="urn:jboss:domain:infinispan:7.0">
<cache-container name="keycloak" module="org.keycloak.keycloak-model-infinispan">
<transport lock-timeout="60000"/>
<local-cache name="realms">
<object-memory size="10000"/>
</local-cache>
<local-cache name="users">
<object-memory size="10000"/>
</local-cache>
<local-cache name="authorization">
<object-memory size="10000"/>
</local-cache>
<local-cache name="keys">
<object-memory size="1000"/>
<expiration max-idle="3600000"/>
</local-cache>
<local-cache name="sessions">
<remote-store cache="sessions" remote-servers="remote-cache" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</local-cache>
<local-cache name="authenticationSessions">
<remote-store cache="authenticationSessions" remote-servers="remote-cache" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</local-cache>
<local-cache name="offlineSessions">
<remote-store cache="offlineSessions" remote-servers="remote-cache" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</local-cache>
<local-cache name="clientSessions">
<remote-store cache="clientSessions" remote-servers="remote-cache" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</local-cache>
<local-cache name="offlineClientSessions">
<remote-store cache="offlineClientSessions" remote-servers="remote-cache" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</local-cache>
<local-cache name="loginFailures">
<remote-store cache="loginFailures" remote-servers="remote-cache" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</local-cache>
<local-cache name="actionTokens">
<remote-store cache="actionTokens" remote-servers="remote-cache" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</local-cache>
<replicated-cache name="work">
<remote-store cache="work" remote-servers="remote-cache" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</replicated-cache>
</cache-container>
<cache-container name="server" aliases="singleton cluster" default-cache="default" module="org.wildfly.clustering.server">
<transport lock-timeout="60000"/>
<replicated-cache name="default">
<transaction mode="BATCH"/>
</replicated-cache>
</cache-container>
<cache-container name="web" default-cache="dist" module="org.wildfly.clustering.web.infinispan">
<transport lock-timeout="60000"/>
<distributed-cache name="dist">
<locking isolation="REPEATABLE_READ"/>
<transaction mode="BATCH"/>
<file-store/>
</distributed-cache>
</cache-container>
<cache-container name="ejb" aliases="sfsb" default-cache="dist" module="org.wildfly.clustering.ejb.infinispan">
<transport lock-timeout="60000"/>
<distributed-cache name="dist">
<locking isolation="REPEATABLE_READ"/>
<transaction mode="BATCH"/>
<file-store/>
</distributed-cache>
</cache-container>
<cache-container name="hibernate" module="org.infinispan.hibernate-cache">
<transport lock-timeout="60000"/>
<local-cache name="local-query">
<object-memory size="10000"/>
<expiration max-idle="100000"/>
</local-cache>
<invalidation-cache name="entity">
<transaction mode="NON_XA"/>
<object-memory size="10000"/>
<expiration max-idle="100000"/>
</invalidation-cache>
<replicated-cache name="timestamps"/>
</cache-container>
</subsystem>
Note that most of this cache configuration is unchanged when compared to the default standalone-ha.xml
configuration file. The changes I have made here are changing the following caches to be local
and pointing them to my remote Infinispan cluster:
sessions
authenticationSessions
offlineSessions
clientSessions
offlineClientSessions
loginFailures
actionTokens
work
Here is the configuration for my remote-cache
server:
<socket-binding-group name="standard-sockets" default-interface="public" port-offset="${jboss.socket.binding.port-offset:0}">
<!-- Default socket bindings from standalone-ha.xml are not listed here for brevity -->
<outbound-socket-binding name="remote-cache">
<remote-destination host="${env.INFINISPAN_HOST}" port="${remote.cache.port:11222}"/>
</outbound-socket-binding>
</socket-binding-group>
Here is how my caches are configured on the Infinispan side:
<subsystem xmlns="urn:infinispan:server:core:9.4" default-cache-container="clustered">
<cache-container name="clustered" default-cache="default">
<transport lock-timeout="60000"/>
<global-state/>
<replicated-cache-configuration name="replicated-keycloak" mode="SYNC">
<locking acquire-timeout="3000" />
</replicated-cache-configuration>
<replicated-cache name="work" configuration="replicated-keycloak"/>
<replicated-cache name="sessions" configuration="replicated-keycloak"/>
<replicated-cache name="authenticationSessions" configuration="replicated-keycloak"/>
<replicated-cache name="clientSessions" configuration="replicated-keycloak"/>
<replicated-cache name="offlineSessions" configuration="replicated-keycloak"/>
<replicated-cache name="offlineClientSessions" configuration="replicated-keycloak"/>
<replicated-cache name="actionTokens" configuration="replicated-keycloak"/>
<replicated-cache name="loginFailures" configuration="replicated-keycloak"/>
</cache-container>
</subsystem>
I believe I have made some incorrect assumptions about how local caches with remote stores work, and I was hoping someone would be able to clear this up for me. My intention was to make the Infinispan cluster the source of truth for all of Keycloak's caches. By making every cache local, I assumed that data would be replicated to each Keycloak node through the Infinispan cluster, such that a write to the local authenticationSessions
cache on keycloak-0
would be synchronously persisted to keycloak-1
through the Infinispan cluster.
What I believe is happening is that the write to a local cache on Keycloak is not synchronous with respect to persisting that value to the remote Infinispan cluster. In other words, when a write is performed to the authenticationSessions
cache, it does not block while waiting for this value to be written to the Infinispan cluster, so an immediate read for this data on another Keycloak node results in a cache miss, locally and in the Infinispan cluster.
I'm looking for some help with identifying why my current configuration is causing this issue, and some clarification on the behavior of a remote-store
- is there a way to get cache writes to a local cache backed by a remote-store
to be synchronous? If not, is there a better way to do what I'm trying to accomplish here?
Some other potentially relevant details:
KUBE_PING
for JGroups discovery.work
cache is being propagated across all Keycloak nodes.Thanks in advance!
The HotRod protocol used for communication between Infinispan servers and Keycloak servers has a feature that Infinispan servers will automatically send new topology to the Keycloak servers about the change in the Infinispan cluster, so the remote store on Keycloak side will know to which Infinispan servers it can connect.
The main reason to make a configuration of the type you are trying to do instead of one similar to the one proposed by Keycloak (standalone-ha.xml), is when you have a requirement to independently scale the infinispan cluster of the application or using infinispan as a persistent store.
Cross-Datacenter Replication Mode is Technology Preview and is not fully supported. Cross-Datacenter Replication mode is for when you want to run Keycloak in a cluster across multiple data centers, most typically using data center sites that are in different geographic regions.
Infinispan’s main application is distributed cache, but it is also used as KC storage in NoSQL databases. The platform supports two launch methods: deploying as a stand-alone server / server cluster and using it as an embedded library to extend the functionality of the main application. KC uses the built-in Infinispan cache by default.
I will try to clarify some points to take in mind when you configure Keycloak in cluster.
Talking about subject of "infinite redirects", I have experienced a similar problem in development environments years ago. While the keycloak team has corrected several bugs related to infinite loops (e.g. KEYCLOAK-5856, KEYCLOAK-5022, KEYCLOAK-4717, KEYCLOAK-4552, KEYCLOAK-3878) sometimes it is happening due to configuration issues.
One thing to check if the site is HTTPS is to be accessing a Keycloak server by HTTPS as well.
I remember suffered a similar problem to the infinite loop when the Keycloak was placed behind an HTTPS reverse proxy and the needed headers were not propagated to the Keycloak (headers X-FOWARDED...). It was solved setting up the environment well. It can happen a similar problem when the nodes discovery in the cluster does not work correctly (JGroups).
About the error message "expired_code", I would verify that the clocks of each node are synchronized since it can lead to this kind of expired token / code error.
Now understanding better your configuration, it does not seem inappropriate to use the "local-cache" mode with a remote-store, pointing to the infinispan cluster.
Although, usually, the shared store (such as a remote-cache) is usually used with an invalidation-cache where it is avoided to replicate the complete data by the cluster (see comment that can be applied here https://developer.jboss.org/message/986847#986847), there may not be big differences with a distributed or invalidation cache.
I believe that a distributed-cache with a remote-store would apply better (or an invalidation-cache to avoid replicating heavy data to the owners) however I could not ensure how a "local-cache" works with a remote storage (shared) since I have never tried this kind of configuration. I would first choose to test a distributed-cache or an invalidation-cache given by how it works with the evicted / invalidated data. Normally, local caches do not synchronize with other remote nodes in the cluster. If this kind of implementation keeps a local map in memory, it is likely that even if the data in the remote-storage is modified, these changes may be not reflected in some situations. I can give you a Jmeter test file that you can use so that you can try to perform your own tests with both configurations.
Returning to the topic of your configuration, you have to take into account in addition to that the replicated cache have certain limitations and are usually a little slower than the distributed ones that only replicate the data to the defined owners (the replicated ones write in all the nodes). There is also a variant called scattered-cache that performs better but for example lacks Transaction support (you can see here a comparative chart https://infinispan.org/docs/stable/user_guide/user_guide.html#which_cache_mode_should_i_use). Replication usually only performs well in small clusters (under 8 or 10 servers), due to the number of replication messages that need to send. Distributed cache allows Infinispan to scale linearly by defining a number of replicas by entry.
The main reason to make a configuration of the type you are trying to do instead of one similar to the one proposed by Keycloak (standalone-ha.xml), is when you have a requirement to independently scale the infinispan cluster of the application or using infinispan as a persistent store.
I will explain how Keycloak manages its cache and how it divides it into two or three groups basically so you can better understand the configuration you need.
Usually, to configure Keycloak in a cluster, simply raise and configure the Keycloak in HA mode just as you would do with a traditional instance of Wildfly. If one observes the differences between the standalone.xml and the standalone-ha.xml that comes in the keycloak installation, one notices that basically support is added to "Jgroups", "modcluster", and the caches are distributed (which were previously local) between the nodes in Wildfly / Keycloak (HA).
In detail:
I think that your cache configuration should be defined as "distributed-cache" for caches like "sessions", "authenticationSessions", "offlineSessions", "clientSessions", "offlineClientSessions", "loginFailures" and "actionTokens" (instead of "local"). However, because you use a remote shared store, you should test it to see how it works as I said before.
Also, cache named "work" should be "replicated-cache" and the others ("keys", "authorization", "realms" and "users") should be defined as "local-cache".
In your infinispan cluster you can define it as "distributed-cache" (or "replicated-cache").
Remember that:
In a replicated cache all nodes in a cluster hold all keys i.e. if a key exists on one node, it will also exist on all other nodes. In a distributed cache, a number of copies are maintained to provide redundancy and fault tolerance, however this is typically far fewer than the number of nodes in the cluster. A distributed cache provides a far greater degree of scalability than a replicated cache. A distributed cache is also able to transparently locate keys across a cluster, and provides an L1 cache for fast local read access of state that is stored remotely. You can read more in the relevant User Guide chapter.
Infinispan doc. ref: cache mode
As the Keycloak (6.0) documentation says:
Keycloak has two types of caches. One type of cache sits in front of the database to decrease load on the DB and to decrease overall response times by keeping data in memory. Realm, client, role, and user metadata is kept in this type of cache. This cache is a local cache. Local caches do not use replication even if you are in the cluster with more Keycloak servers. Instead, they only keep copies locally and if the entry is updated an invalidation message is sent to the rest of the cluster and the entry is evicted. There is separate replicated cache work, which task is to send the invalidation messages to the whole cluster about what entries should be evicted from local caches. This greatly reduces network traffic, makes things efficient, and avoids transmitting sensitive metadata over the wire.
The second type of cache handles managing user sessions, offline tokens, and keeping track of login failures so that the server can detect password phishing and other attacks. The data held in these caches is temporary, in memory only, but is possibly replicated across the cluster.
Doc. Reference: cache configuration
If you want to read another good document, you can take a look to "cross-dc" section (cross-dc mode) especially section "3.4.6 Infinispan cache" (infinispan cache)
I tried with Keycloak 6.0.1 and Infinispan 9.4.11.Final, here is my test configuration (based on standalone-ha.xml file).
Keycloak infinispan subsystem:
<subsystem xmlns="urn:jboss:domain:infinispan:8.0">
<cache-container name="keycloak" module="org.keycloak.keycloak-model-infinispan">
<transport lock-timeout="60000"/>
<local-cache name="realms">
<object-memory size="10000"/>
</local-cache>
<local-cache name="users">
<object-memory size="10000"/>
</local-cache>
<distributed-cache name="sessions" owners="1" remote-timeout="30000">
<remote-store cache="sessions" remote-servers="remote-cache" socket-timeout="60000" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</distributed-cache>
<distributed-cache name="authenticationSessions" owners="1" remote-timeout="30000">
<remote-store cache="authenticationSessions" remote-servers="remote-cache" socket-timeout="60000" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</distributed-cache>
<distributed-cache name="offlineSessions" owners="1" remote-timeout="30000">
<remote-store cache="offlineSessions" remote-servers="remote-cache" socket-timeout="60000" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</distributed-cache>
<distributed-cache name="clientSessions" owners="1" remote-timeout="30000">
<remote-store cache="clientSessions" remote-servers="remote-cache" socket-timeout="60000" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</distributed-cache>
<distributed-cache name="offlineClientSessions" owners="1" remote-timeout="30000">
<remote-store cache="offlineClientSessions" remote-servers="remote-cache" socket-timeout="60000" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</distributed-cache>
<distributed-cache name="loginFailures" owners="1" remote-timeout="30000">
<remote-store cache="loginFailures" remote-servers="remote-cache" socket-timeout="60000" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
</distributed-cache>
<replicated-cache name="work"/>
<local-cache name="authorization">
<object-memory size="10000"/>
</local-cache>
<local-cache name="keys">
<object-memory size="1000"/>
<expiration max-idle="3600000"/>
</local-cache>
<distributed-cache name="actionTokens" owners="1" remote-timeout="30000">
<remote-store cache="actionTokens" remote-servers="remote-cache" socket-timeout="60000" fetch-state="false" passivation="false" preload="false" purge="false" shared="true">
<property name="rawValues">
true
</property>
<property name="marshaller">
org.keycloak.cluster.infinispan.KeycloakHotRodMarshallerFactory
</property>
</remote-store>
<object-memory size="-1"/>
<expiration max-idle="-1" interval="300000"/>
</distributed-cache>
</cache-container>
Keycloak socket bindings:
<socket-binding-group name="standard-sockets" default-interface="public" port-offset="${jboss.socket.binding.port-offset:0}">
<socket-binding name="management-http" interface="management" port="${jboss.management.http.port:9990}"/>
<socket-binding name="management-https" interface="management" port="${jboss.management.https.port:9993}"/>
<socket-binding name="ajp" port="${jboss.ajp.port:8009}"/>
<socket-binding name="http" port="${jboss.http.port:8080}"/>
<socket-binding name="https" port="${jboss.https.port:8443}"/>
<socket-binding name="jgroups-mping" interface="private" multicast-address="${jboss.default.multicast.address:230.0.0.4}" multicast-port="45700"/>
<socket-binding name="jgroups-tcp" interface="private" port="7600"/>
<socket-binding name="jgroups-udp" interface="private" port="55200" multicast-address="${jboss.default.multicast.address:230.0.0.4}" multicast-port="45688"/>
<socket-binding name="modcluster" multicast-address="${jboss.modcluster.multicast.address:224.0.1.105}" multicast-port="23364"/>
<socket-binding name="txn-recovery-environment" port="4712"/>
<socket-binding name="txn-status-manager" port="4713"/>
<outbound-socket-binding name="remote-cache">
<remote-destination host="my-server-domain.com" port="11222"/>
</outbound-socket-binding>
<outbound-socket-binding name="mail-smtp">
<remote-destination host="localhost" port="25"/>
</outbound-socket-binding>
</socket-binding-group>
Infinispan cluster configuration:
<subsystem xmlns="urn:infinispan:server:core:9.4" default-cache-container="clustered">
<cache-container name="clustered" default-cache="default" statistics="true">
<transport lock-timeout="60000"/>
<global-state/>
<distributed-cache-configuration name="transactional">
<transaction mode="NON_XA" locking="PESSIMISTIC"/>
</distributed-cache-configuration>
<distributed-cache-configuration name="async" mode="ASYNC"/>
<replicated-cache-configuration name="replicated"/>
<distributed-cache-configuration name="persistent-file-store">
<persistence>
<file-store shared="false" fetch-state="true"/>
</persistence>
</distributed-cache-configuration>
<distributed-cache-configuration name="indexed">
<indexing index="LOCAL" auto-config="true"/>
</distributed-cache-configuration>
<distributed-cache-configuration name="memory-bounded">
<memory>
<binary size="10000000" eviction="MEMORY"/>
</memory>
</distributed-cache-configuration>
<distributed-cache-configuration name="persistent-file-store-passivation">
<memory>
<object size="10000"/>
</memory>
<persistence passivation="true">
<file-store shared="false" fetch-state="true">
<write-behind modification-queue-size="1024" thread-pool-size="1"/>
</file-store>
</persistence>
</distributed-cache-configuration>
<distributed-cache-configuration name="persistent-file-store-write-behind">
<persistence>
<file-store shared="false" fetch-state="true">
<write-behind modification-queue-size="1024" thread-pool-size="1"/>
</file-store>
</persistence>
</distributed-cache-configuration>
<distributed-cache-configuration name="persistent-rocksdb-store">
<persistence>
<rocksdb-store shared="false" fetch-state="true"/>
</persistence>
</distributed-cache-configuration>
<distributed-cache-configuration name="persistent-jdbc-string-keyed">
<persistence>
<string-keyed-jdbc-store datasource="java:jboss/datasources/ExampleDS" fetch-state="true" preload="false" purge="false" shared="false">
<string-keyed-table prefix="ISPN">
<id-column name="id" type="VARCHAR"/>
<data-column name="datum" type="BINARY"/>
<timestamp-column name="version" type="BIGINT"/>
</string-keyed-table>
<write-behind modification-queue-size="1024" thread-pool-size="1"/>
</string-keyed-jdbc-store>
</persistence>
</distributed-cache-configuration>
<distributed-cache name="default"/>
<replicated-cache name="repl" configuration="replicated"/>
<replicated-cache name="work" configuration="replicated"/>
<replicated-cache name="sessions" configuration="replicated"/>
<replicated-cache name="authenticationSessions" configuration="replicated"/>
<replicated-cache name="clientSessions" configuration="replicated"/>
<replicated-cache name="offlineSessions" configuration="replicated"/>
<replicated-cache name="offlineClientSessions" configuration="replicated"/>
<replicated-cache name="actionTokens" configuration="replicated"/>
<replicated-cache name="loginFailures" configuration="replicated"/>
</cache-container>
</subsystem>
P.S. Change attribute "owners" from 1 to your favorite value.
I hope to be helpful.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With