/* mmanon.c
 * anonymize IP addresses inside the syslog message part
 *
 * Copyright 2013 Adiscon GmbH.
 *
 * This file is part of rsyslog.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *       -or-
 *       see COPYING.ASL20 in the source distribution
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include "config.h"
#include "rsyslog.h"
#include <stdio.h>
#include <ctype.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <signal.h>
#include <errno.h>
#include <unistd.h>
#include <stdint.h>
#include "conf.h"
#include "syslogd-types.h"
#include "srUtils.h"
#include "template.h"
#include "module-template.h"
#include "errmsg.h"
#include "parserif.h"
#include "hashtable.h"
#include <pthread.h>


MODULE_TYPE_OUTPUT;
MODULE_TYPE_NOKEEP;
MODULE_CNFNAME("mmanon")


DEF_OMOD_STATIC_DATA;

/* config variables */

// enumerator for the mode
enum mode { ZERO, RANDOMINT, SIMPLE };

// options for handling the max-retry limit in random-consistent-unique mode
enum maxRetryOption { MAX_RETRY_ZERO, MAX_RETRY_ACCEPT_DUPLICATES };

/*
 * Binary tree node used in consistent IPv4 randomization. Each level mirrors
 * one bit of the original address; the final node stores two replacement
 * strings so we do not need to allocate a separate leaf for the last bit.
 */
union node {
    struct {
        union node *more;
        union node *less;
    } pointer;
    struct {
        char ip_high[16];
        char ip_low[16];
    } ips;
};

/* Split IPv6 into two 64-bit halves so masks and randomization can be applied
 * without using compiler-specific 128-bit integers. */
struct ipv6_int {
    unsigned long long high;
    unsigned long long low;
};
/* define operation modes we have */
#define SIMPLE_MODE 0  // just overwrite
#define REWRITE_MODE 1  // rewrite IP address, canonicalized
typedef struct _instanceData {
    /*
     * Concurrency & Locking
     * - ipv4Mutex protects the IPv4 consistency trie shared across workers and
     *   prevents concurrent insertions from corrupting the shared prefixes.
     * - ipv6Mutex protects the IPv6 and embedded-IPv4 consistency hash tables
     *   so worker lookups do not race with first-time allocations of entries.
     */
    pthread_mutex_t ipv4Mutex;
    pthread_mutex_t ipv6Mutex;
    struct {
        sbool enable;
        int8_t bits;
        union node *Root;
        int randConsis;
        int randConsisUnique;
        sbool limitMaxRetries;
        unsigned int maxRetryCount;
        enum maxRetryOption maxRetryFallback;
        struct hashtable *randConsisUniqueGeneratedIPs;
        enum mode mode;
        uchar replaceChar;
    } ipv4;

    struct {
        sbool enable;
        uint8_t bits;
        enum mode anonmode;
        int randConsis;
        int randConsisUnique;
        sbool limitMaxRetries;
        unsigned int maxRetryCount;
        enum maxRetryOption maxRetryFallback;
        struct hashtable *randConsisUniqueGeneratedIPs;
        struct hashtable *randConsisIPs;
    } ipv6;

    struct {
        sbool enable;
        uint8_t bits;
        enum mode anonmode;
        int randConsis;
        int randConsisUnique;
        sbool limitMaxRetries;
        unsigned int maxRetryCount;
        enum maxRetryOption maxRetryFallback;
        struct hashtable *randConsisUniqueGeneratedIPs;
        struct hashtable *randConsisIPs;
    } embeddedIPv4;
} instanceData;

typedef struct wrkrInstanceData {
    instanceData *pData;
    unsigned randstatus;
} wrkrInstanceData_t;

struct modConfData_s {
    rsconf_t *pConf;  // our overall config object
};
static modConfData_t *loadModConf = NULL;  // modConf ptr to use for the current load process
static modConfData_t *runModConf = NULL;  // modConf ptr to use for the current exec process

static unsigned hash_from_u32(void *k);
static int key_equals_u32(void *key1, void *key2);


/* tables for interfacing with the v6 config system */
/* action (instance) parameters */
static struct cnfparamdescr actpdescr[] = {{"ipv4.enable", eCmdHdlrBinary, 0},
                                           {"ipv4.mode", eCmdHdlrGetWord, 0},
                                           {"mode", eCmdHdlrGetWord, 0},
                                           {"ipv4.bits", eCmdHdlrPositiveInt, 0},
                                           {"ipv4.replacechar", eCmdHdlrGetChar, 0},
                                           {"replacementchar", eCmdHdlrGetChar, 0},
                                           {"ipv4.limituniquemaxretries", eCmdHdlrBinary, 0},
                                           {"ipv4.uniqueretrycount", eCmdHdlrPositiveInt, 0},
                                           {"ipv4.maxretryhandling", eCmdHdlrGetWord, 0},
                                           {"ipv6.enable", eCmdHdlrBinary, 0},
                                           {"ipv6.anonmode", eCmdHdlrGetWord, 0},
                                           {"ipv6.bits", eCmdHdlrPositiveInt, 0},
                                           {"ipv6.limituniquemaxretries", eCmdHdlrBinary, 0},
                                           {"ipv6.uniqueretrycount", eCmdHdlrPositiveInt, 0},
                                           {"ipv6.maxretryhandling", eCmdHdlrGetWord, 0},
                                           {"embeddedipv4.enable", eCmdHdlrBinary, 0},
                                           {"embeddedipv4.anonmode", eCmdHdlrGetWord, 0},
                                           {"embeddedipv4.bits", eCmdHdlrPositiveInt, 0},
                                           {"embeddedipv4.limituniquemaxretries", eCmdHdlrBinary, 0},
                                           {"embeddedipv4.uniqueretrycount", eCmdHdlrPositiveInt, 0},
                                           {"embeddedipv4.maxretryhandling", eCmdHdlrGetWord, 0}};
static struct cnfparamblk actpblk = {CNFPARAMBLK_VERSION, sizeof(actpdescr) / sizeof(struct cnfparamdescr), actpdescr};

BEGINbeginCnfLoad
    CODESTARTbeginCnfLoad;
    loadModConf = pModConf;
    pModConf->pConf = pConf;
ENDbeginCnfLoad

BEGINendCnfLoad
    CODESTARTendCnfLoad;
ENDendCnfLoad

BEGINcheckCnf
    CODESTARTcheckCnf;
ENDcheckCnf

BEGINactivateCnf
    CODESTARTactivateCnf;
    runModConf = pModConf;
ENDactivateCnf

BEGINfreeCnf
    CODESTARTfreeCnf;
ENDfreeCnf


BEGINcreateInstance
    CODESTARTcreateInstance;
    pthread_mutex_init(&pData->ipv4Mutex, NULL);
    pthread_mutex_init(&pData->ipv6Mutex, NULL);
ENDcreateInstance

BEGINcreateWrkrInstance
    CODESTARTcreateWrkrInstance;
    pWrkrData->randstatus = time(NULL);
ENDcreateWrkrInstance


BEGINisCompatibleWithFeature
    CODESTARTisCompatibleWithFeature;
ENDisCompatibleWithFeature


/**
 * \brief Recursively release the IPv4 consistency trie.
 *
 * The trie stores anonymized IPv4 replacements. This helper tears down all
 * dynamically allocated nodes after configuration is unloaded.
 *
 * \param node current trie node.
 * \param layer depth used to stop the recursion at the leaves.
 */
static void delTree(union node *node, const int layer) {
    if (node == NULL) {
        return;
    }
    if (layer == 31) {
        free(node);
    } else {
        delTree(node->pointer.more, layer + 1);
        delTree(node->pointer.less, layer + 1);
        free(node);
    }
}


BEGINfreeInstance
    CODESTARTfreeInstance;
    delTree(pData->ipv4.Root, 0);
    if (pData->ipv4.randConsisUniqueGeneratedIPs != NULL) {
        hashtable_destroy(pData->ipv4.randConsisUniqueGeneratedIPs, 0);
    }
    if (pData->ipv6.randConsisIPs != NULL) {
        hashtable_destroy(pData->ipv6.randConsisIPs, 1);
    }
    if (pData->ipv6.randConsisUniqueGeneratedIPs != NULL) {
        hashtable_destroy(pData->ipv6.randConsisUniqueGeneratedIPs, 0);
    }
    if (pData->embeddedIPv4.randConsisIPs != NULL) {
        hashtable_destroy(pData->embeddedIPv4.randConsisIPs, 1);
    }
    if (pData->embeddedIPv4.randConsisUniqueGeneratedIPs != NULL) {
        hashtable_destroy(pData->embeddedIPv4.randConsisUniqueGeneratedIPs, 0);
    }
    pthread_mutex_destroy(&pData->ipv4Mutex);
    pthread_mutex_destroy(&pData->ipv6Mutex);
ENDfreeInstance


BEGINfreeWrkrInstance
    CODESTARTfreeWrkrInstance;
ENDfreeWrkrInstance


/**
 * \brief Set default anonymization settings for a new action instance.
 *
 * Initializes IPv4, IPv6, and embedded-IPv4 settings with sensible defaults
 * before configuration parsing applies user overrides.
 *
 * \param pData instance data to populate.
 */
static inline void setInstParamDefaults(instanceData *pData) {
    pData->ipv4.enable = 1;
    pData->ipv4.bits = 16;
    pData->ipv4.Root = NULL;
    pData->ipv4.randConsis = 0;
    pData->ipv4.randConsisUnique = 0;
    pData->ipv4.limitMaxRetries = 0;
    pData->ipv4.maxRetryCount = 1000;
    pData->ipv4.maxRetryFallback = MAX_RETRY_ACCEPT_DUPLICATES;
    pData->ipv4.randConsisUniqueGeneratedIPs = NULL;
    pData->ipv4.mode = ZERO;
    pData->ipv4.replaceChar = 'x';

    pData->ipv6.enable = 1;
    pData->ipv6.bits = 96;
    pData->ipv6.anonmode = ZERO;
    pData->ipv6.randConsis = 0;
    pData->ipv6.randConsisUnique = 0;
    pData->ipv6.limitMaxRetries = 0;
    pData->ipv6.maxRetryCount = 1000;
    pData->ipv6.maxRetryFallback = MAX_RETRY_ACCEPT_DUPLICATES;
    pData->ipv6.randConsisUniqueGeneratedIPs = NULL;
    pData->ipv6.randConsisIPs = NULL;

    pData->embeddedIPv4.enable = 1;
    pData->embeddedIPv4.bits = 96;
    pData->embeddedIPv4.anonmode = ZERO;
    pData->embeddedIPv4.randConsis = 0;
    pData->embeddedIPv4.randConsisUnique = 0;
    pData->embeddedIPv4.limitMaxRetries = 0;
    pData->embeddedIPv4.maxRetryCount = 1000;
    pData->embeddedIPv4.maxRetryFallback = MAX_RETRY_ACCEPT_DUPLICATES;
    pData->embeddedIPv4.randConsisUniqueGeneratedIPs = NULL;
    pData->embeddedIPv4.randConsisIPs = NULL;
}

BEGINnewActInst
    struct cnfparamvals *pvals;
    int i;
    CODESTARTnewActInst;
    DBGPRINTF("newActInst (mmanon)\n");
    if ((pvals = nvlstGetParams(lst, &actpblk, NULL)) == NULL) {
        ABORT_FINALIZE(RS_RET_MISSING_CNFPARAMS);
    }

    CODE_STD_STRING_REQUESTnewActInst(1);
    CHKiRet(OMSRsetEntry(*ppOMSR, 0, NULL, OMSR_TPL_AS_MSG));
    CHKiRet(createInstance(&pData));
    setInstParamDefaults(pData);

    for (i = 0; i < actpblk.nParams; ++i) {
        if (!pvals[i].bUsed) continue;
        if (!strcmp(actpblk.descr[i].name, "ipv4.mode") || !strcmp(actpblk.descr[i].name, "mode")) {
            if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"zero", sizeof("zero") - 1)) {
                pData->ipv4.mode = ZERO;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"random", sizeof("random") - 1)) {
                pData->ipv4.mode = RANDOMINT;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"simple", sizeof("simple") - 1) ||
                       !es_strbufcmp(pvals[i].val.d.estr, (uchar *)"rewrite", sizeof("rewrite") - 1)) {
                pData->ipv4.mode = SIMPLE;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"random-consistent",
                                     sizeof("random-consistent") - 1)) {
                pData->ipv4.mode = RANDOMINT;
                pData->ipv4.randConsis = 1;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"random-consistent-unique",
                                     sizeof("random-consistent-unique") - 1)) {
                pData->ipv4.mode = RANDOMINT;
                pData->ipv4.randConsis = 1;
                pData->ipv4.randConsisUnique = 1;
            } else {
                parser_errmsg(
                    "mmanon: configuration error, unknown option for ipv4.mode, "
                    "will use \"zero\"\n");
            }
        } else if (!strcmp(actpblk.descr[i].name, "ipv4.bits")) {
            if ((int8_t)pvals[i].val.d.n <= 32) {
                pData->ipv4.bits = (int8_t)pvals[i].val.d.n;
            } else {
                pData->ipv4.bits = 32;
                parser_warnmsg(
                    "warning: invalid number of ipv4.bits (%d), corrected "
                    "to 32",
                    (int)pvals[i].val.d.n);
            }
        } else if (!strcmp(actpblk.descr[i].name, "ipv4.enable")) {
            pData->ipv4.enable = (int)pvals[i].val.d.n;
        } else if (!strcmp(actpblk.descr[i].name, "ipv4.replacechar") ||
                   !strcmp(actpblk.descr[i].name, "replacementchar")) {
            uchar *tmp = (uchar *)es_str2cstr(pvals[i].val.d.estr, NULL);
            pData->ipv4.replaceChar = tmp[0];
            free(tmp);
        } else if (!strcmp(actpblk.descr[i].name, "ipv4.limituniquemaxretries")) {
            pData->ipv4.limitMaxRetries = (int)pvals[i].val.d.n;
        } else if (!strcmp(actpblk.descr[i].name, "ipv4.uniqueretrycount")) {
            if (pvals[i].val.d.n >= 0) {
                pData->ipv4.maxRetryCount = (unsigned int)pvals[i].val.d.n;
            } else {
                pData->ipv4.maxRetryCount = 1000;
                parser_warnmsg(
                    "warning: invalid number of ipv4.uniqueRetryCount (%d), "
                    "corrected to 1000",
                    (int)pvals[i].val.d.n);
            }
        } else if (!strcmp(actpblk.descr[i].name, "ipv4.maxretryhandling")) {
            if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"zero", sizeof("zero") - 1)) {
                pData->ipv4.maxRetryFallback = MAX_RETRY_ZERO;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"accept-duplicates",
                                     sizeof("accept-duplicates") - 1)) {
                pData->ipv4.maxRetryFallback = MAX_RETRY_ACCEPT_DUPLICATES;
            } else {
                pData->ipv4.maxRetryFallback = MAX_RETRY_ACCEPT_DUPLICATES;
                parser_errmsg(
                    "mmanon: configuration error, unknown option for "
                    "ipv4.maxRetryHandling, will use \"accept-duplicates\"\n");
            }
        } else if (!strcmp(actpblk.descr[i].name, "ipv6.enable")) {
            pData->ipv6.enable = (int)pvals[i].val.d.n;
        } else if (!strcmp(actpblk.descr[i].name, "ipv6.bits")) {
            if ((uint8_t)pvals[i].val.d.n <= 128) {
                pData->ipv6.bits = (uint8_t)pvals[i].val.d.n;
            } else {
                pData->ipv6.bits = 128;
                parser_warnmsg(
                    "warning: invalid number of ipv6.bits (%d), corrected "
                    "to 128",
                    (int)pvals[i].val.d.n);
            }
        } else if (!strcmp(actpblk.descr[i].name, "ipv6.anonmode")) {
            if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"zero", sizeof("zero") - 1)) {
                pData->ipv6.anonmode = ZERO;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"random", sizeof("random") - 1)) {
                pData->ipv6.anonmode = RANDOMINT;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"random-consistent",
                                     sizeof("random-consistent") - 1)) {
                pData->ipv6.anonmode = RANDOMINT;
                pData->ipv6.randConsis = 1;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"random-consistent-unique",
                                     sizeof("random-consistent-unique") - 1)) {
                pData->ipv6.anonmode = RANDOMINT;
                pData->ipv6.randConsis = 1;
                pData->ipv6.randConsisUnique = 1;
            } else {
                parser_errmsg(
                    "mmanon: configuration error, unknown option for "
                    "ipv6.anonMode, will use \"zero\"\n");
            }
        } else if (!strcmp(actpblk.descr[i].name, "ipv6.limituniquemaxretries")) {
            pData->ipv6.limitMaxRetries = (int)pvals[i].val.d.n;
        } else if (!strcmp(actpblk.descr[i].name, "ipv6.uniqueretrycount")) {
            if (pvals[i].val.d.n >= 0) {
                pData->ipv6.maxRetryCount = (unsigned int)pvals[i].val.d.n;
            } else {
                pData->ipv6.maxRetryCount = 1000;
                parser_warnmsg(
                    "warning: invalid number of ipv6.uniqueRetryCount (%d), "
                    "corrected to 1000",
                    (int)pvals[i].val.d.n);
            }
        } else if (!strcmp(actpblk.descr[i].name, "ipv6.maxretryhandling")) {
            if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"zero", sizeof("zero") - 1)) {
                pData->ipv6.maxRetryFallback = MAX_RETRY_ZERO;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"accept-duplicates",
                                     sizeof("accept-duplicates") - 1)) {
                pData->ipv6.maxRetryFallback = MAX_RETRY_ACCEPT_DUPLICATES;
            } else {
                pData->ipv6.maxRetryFallback = MAX_RETRY_ACCEPT_DUPLICATES;
                parser_errmsg(
                    "mmanon: configuration error, unknown option for "
                    "ipv6.maxRetryHandling, will use \"accept-duplicates\"\n");
            }
        } else if (!strcmp(actpblk.descr[i].name, "embeddedipv4.enable")) {
            pData->embeddedIPv4.enable = (int)pvals[i].val.d.n;
        } else if (!strcmp(actpblk.descr[i].name, "embeddedipv4.bits")) {
            if ((uint8_t)pvals[i].val.d.n <= 128) {
                pData->embeddedIPv4.bits = (uint8_t)pvals[i].val.d.n;
            } else {
                pData->embeddedIPv4.bits = 128;
                parser_warnmsg(
                    "warning: invalid number of embeddedIpv4.bits (%d), "
                    "corrected to 128",
                    (int)pvals[i].val.d.n);
            }
        } else if (!strcmp(actpblk.descr[i].name, "embeddedipv4.anonmode")) {
            if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"zero", sizeof("zero") - 1)) {
                pData->embeddedIPv4.anonmode = ZERO;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"random", sizeof("random") - 1)) {
                pData->embeddedIPv4.anonmode = RANDOMINT;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"random-consistent",
                                     sizeof("random-consistent") - 1)) {
                pData->embeddedIPv4.anonmode = RANDOMINT;
                pData->embeddedIPv4.randConsis = 1;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"random-consistent-unique",
                                     sizeof("random-consistent-unique") - 1)) {
                pData->embeddedIPv4.anonmode = RANDOMINT;
                pData->embeddedIPv4.randConsis = 1;
                pData->embeddedIPv4.randConsisUnique = 1;
            } else {
                parser_errmsg(
                    "mmanon: configuration error, unknown option for embeddedIpv4.anonMode, "
                    "will use \"zero\"\n");
            }
        } else if (!strcmp(actpblk.descr[i].name, "embeddedipv4.limituniquemaxretries")) {
            pData->embeddedIPv4.limitMaxRetries = (int)pvals[i].val.d.n;
        } else if (!strcmp(actpblk.descr[i].name, "embeddedipv4.uniqueretrycount")) {
            if (pvals[i].val.d.n >= 0) {
                pData->embeddedIPv4.maxRetryCount = (unsigned int)pvals[i].val.d.n;
            } else {
                pData->embeddedIPv4.maxRetryCount = 1000;
                parser_warnmsg(
                    "warning: invalid number of embeddedIpv4.uniqueRetryCount (%d), "
                    "corrected to 1000",
                    (int)pvals[i].val.d.n);
            }
        } else if (!strcmp(actpblk.descr[i].name, "embeddedipv4.maxretryhandling")) {
            if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"zero", sizeof("zero") - 1)) {
                pData->embeddedIPv4.maxRetryFallback = MAX_RETRY_ZERO;
            } else if (!es_strbufcmp(pvals[i].val.d.estr, (uchar *)"accept-duplicates",
                                     sizeof("accept-duplicates") - 1)) {
                pData->embeddedIPv4.maxRetryFallback = MAX_RETRY_ACCEPT_DUPLICATES;
            } else {
                pData->embeddedIPv4.maxRetryFallback = MAX_RETRY_ACCEPT_DUPLICATES;
                parser_errmsg(
                    "mmanon: configuration error, unknown option for "
                    "embeddedIpv4.maxRetryHandling, will use \"accept-duplicates\"\n");
            }
        } else {
            parser_errmsg(
                "mmanon: program error, non-handled "
                "param '%s'\n",
                actpblk.descr[i].name);
        }
    }

    int bHadBitsErr = 0;
    if (pData->ipv4.mode == SIMPLE) {
        if (pData->ipv4.bits < 8 && pData->ipv4.bits > -1) {
            pData->ipv4.bits = 8;
            bHadBitsErr = 1;
        } else if (pData->ipv4.bits < 16 && pData->ipv4.bits > 8) {
            pData->ipv4.bits = 16;
            bHadBitsErr = 1;
        } else if (pData->ipv4.bits < 24 && pData->ipv4.bits > 16) {
            pData->ipv4.bits = 24;
            bHadBitsErr = 1;
        } else if ((pData->ipv4.bits != 32 && pData->ipv4.bits > 24) || pData->ipv4.bits < 0) {
            pData->ipv4.bits = 32;
            bHadBitsErr = 1;
        }
        if (bHadBitsErr) {
            LogError(0, RS_RET_INVLD_ANON_BITS,
                     "mmanon: invalid number of ipv4 bits "
                     "in simple mode, corrected to %d",
                     pData->ipv4.bits);
        }
    }

    CODE_STD_FINALIZERnewActInst;
    cnfparamvalsDestruct(pvals, &actpblk);
ENDnewActInst


BEGINdbgPrintInstInfo
    CODESTARTdbgPrintInstInfo;
ENDdbgPrintInstInfo


BEGINtryResume
    CODESTARTtryResume;
ENDtryResume


/**
 * \brief Convert a hexadecimal digit to its numeric value.
 *
 * \param c character representing a hex digit.
 * \return numeric value in the range [0, 15] or -1 if invalid.
 */
static int getHexVal(char c) {
    if ('0' <= c && c <= '9') {
        return c - '0';
    } else if ('a' <= c && c <= 'f') {
        return (c - 'a') + 10;
    } else if ('A' <= c && c <= 'F') {
        return (c - 'A') + 10;
    } else {
        return -1;
    }
}


/**
 * \brief Parse a single IPv4 byte.
 *
 * Consumes up to three digits and reports whether they form a valid octet.
 *
 * \param buf input buffer start.
 * \param buflen number of bytes available.
 * \param nprocessed bytes consumed while scanning the octet.
 * \return 1 if a valid byte was parsed, 0 otherwise.
 */
static int isPosByte(const uchar *const __restrict__ buf, const size_t buflen, size_t *const __restrict__ nprocessed) {
    int val = 0;  // Default means no byte found
    size_t i;
    for (i = 0; i < buflen; i++) {
        if ('0' <= buf[i] && buf[i] <= '9') {
            /* Maximum 3 digits for single IPv4 Number, we only copy up to 4 numbers
             * but process forward to non digits */
            if (i < 4) {
                val = val * 10 + buf[i] - '0';
            }
        } else
            break;
    }
    *nprocessed = i;
    // Return 1 if more than 1 and less than 4 digits and between 0 and 255
    if (i > 0 && i < 4 && (val >= 0 && val <= 255)) {
        return 1;
    } else {
        return 0;
    }
}

/**
 * \brief Check whether a buffer starts with an IPv4 address.
 *
 * Advances through dotted decimal octets, returning success after four
 * well-formed segments.
 *
 * \param buf input buffer start.
 * \param buflen number of bytes available.
 * \param nprocessed bytes consumed while parsing the address.
 * \return 1 if a valid IPv4 address is found, 0 otherwise.
 */
static int syntax_ipv4(const uchar *const __restrict__ buf,
                       const size_t buflen,
                       size_t *const __restrict__ nprocessed) {
    size_t nproc = 0;
    size_t i;
    int r = 0;
    if (isPosByte(buf, buflen, &i) == 0) {
        goto done;
    }
    if (i >= buflen || buf[i] != '.') {
        goto done;
    }
    i++;
    if (isdigit(buf[i]) == 0 || isPosByte(buf + i, buflen - i, &nproc) == 0) {
        goto done;
    }
    i += nproc;

    if (i >= buflen || buf[i] != '.') {
        goto done;
    }
    i++;
    if (isdigit(buf[i]) == 0 || isPosByte(buf + i, buflen - i, &nproc) == 0) {
        goto done;
    }
    i += nproc;

    if (i >= buflen || buf[i] != '.') {
        goto done;
    }
    i++;
    if (isdigit(buf[i]) == 0 || isPosByte(buf + i, buflen - i, &nproc) == 0) {
        goto done;
    }
    i += nproc;

    *nprocessed = i;
    r = 1;
done:
    return r;
}


/**
 * \brief Parse a hexadecimal component within an IPv6 candidate.
 *
 * Scans hexadecimal digits until a non-hex separator is found. Callers can
 * optionally treat an immediate dot as a separator when parsing embedded IPv4
 * notation.
 *
 * \param buf input buffer start.
 * \param buflen number of bytes available.
 * \param nprocessed bytes consumed while scanning this component.
 * \param handleDot whether a leading '.' should be treated as a separator.
 * \return number of hex digits read, -1 for ':', or -2 for '.'.
 */
static int isValidHexNum(const uchar *const __restrict__ buf,
                         const size_t buflen,
                         size_t *const __restrict__ nprocessed,
                         int handleDot) {
    size_t idx = 0;
    int cyc = 0;

    while (idx < buflen) {
        switch (buf[idx]) {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':

            case 'a':
            case 'b':
            case 'c':
            case 'd':
            case 'e':
            case 'f':

            case 'A':
            case 'B':
            case 'C':
            case 'D':
            case 'E':
            case 'F':
                cyc++;
                (*nprocessed)++;
                if (cyc == 5) {
                    goto done;
                }
                break;
            case '.':
                if (handleDot && cyc == 0) {
                    (*nprocessed)++;
                    cyc = -2;
                }
                goto done;
            case ':':
                if (cyc == 0) {
                    (*nprocessed)++;
                    cyc = -1;
                }
                goto done;
            default:
                goto done;
        }
        idx++;
    }
done:
    return cyc;
}


/**
 * \brief Check whether a buffer starts with an IPv6 address.
 *
 * Walks hexadecimal segments from left to right, handling abbreviation rules
 * and detecting trailing port numbers so callers can trim them back out.
 *
 * \param buf input buffer start.
 * \param buflen number of bytes available.
 * \param nprocessed bytes consumed while parsing the address.
 * \return 1 if a valid IPv6 address is found, 0 otherwise.
 */
static int syntax_ipv6(const uchar *const __restrict__ buf,
                       const size_t buflen,
                       size_t *const __restrict__ nprocessed) {
    int lastSep = 0;
    sbool hadAbbrev = 0;
    sbool lastAbbrev = 0;
    int ipParts = 0;
    int numLen;
    int isIP = 0;

    /*
     * The parser walks the address left to right, accounting for IPv6
     * abbreviation (::) rules. Ports are treated as a five-hex-digit tail and
     * trimmed back out when detected so the caller keeps the original port.
     */

    while (*nprocessed < buflen) {
        numLen = isValidHexNum(buf + *nprocessed, buflen - *nprocessed, nprocessed, 0);
        if (numLen > 0 && numLen < 5) {  // found a valid num
            if ((ipParts == 7 && hadAbbrev) || ipParts > 7) {
                isIP = 0;
                goto done;
            }
            if (ipParts == 0 && lastSep && !hadAbbrev) {
                isIP = 0;
                goto done;
            }
            lastSep = 0;
            lastAbbrev = 0;
            ipParts++;
        } else if (numLen < 0) {  //':'
            if (lastSep) {
                if (hadAbbrev) {
                    isIP = 0;
                    goto done;
                } else {
                    hadAbbrev = 1;
                    lastAbbrev = 1;
                }
            }
            lastSep = 1;
        } else if (numLen == 5) {  // maybe truncated with port
            if (hadAbbrev && ipParts >= 2) {
                isIP = 1;
                /* we need to go back 6 chars:
                 * 5 digits plus leading ":" which designates port!
                 */
                *nprocessed -= 6;
            } else {
                isIP = 0;
                // nprocessed need not be corrected - it's only used if isIP == 1
            }
            goto done;
        } else {  // no valid num
            if (lastSep) {
                if (lastAbbrev && ipParts < 8) {
                    isIP = 1;
                    goto done;
                }
                isIP = 0;
                goto done;
            }
            if ((ipParts == 8 && !hadAbbrev) || (ipParts < 8 && hadAbbrev)) {
                isIP = 1;
                goto done;
            } else {
                isIP = 0;
                goto done;
            }
        }
        if (ipParts == 8 && !hadAbbrev) {
            isIP = 1;
            goto done;
        }
    }

    if ((!lastSep && (ipParts == 8 && !hadAbbrev)) || (ipParts < 8 && hadAbbrev)) {
        isIP = 1;
    }

done:
    return isIP;
}


/**
 * \brief Convert dotted IPv4 text to an integer.
 *
 * \param str NUL-terminated IPv4 address string.
 * \return numeric IPv4 value.
 */
static unsigned ipv42num(const char *str) {
    unsigned num[4] = {0, 0, 0, 0};
    unsigned value = -1;
    size_t len = strlen(str);
    int cyc = 0;
    for (unsigned i = 0; i < len; i++) {
        switch (str[i]) {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
                num[cyc] = num[cyc] * 10 + (str[i] - '0');
                break;
            case '.':
                cyc++;
                break;
            default:
                // No action needed for other cases
                break;
        }
    }

    value = num[0] * 256 * 256 * 256 + num[1] * 256 * 256 + num[2] * 256 + num[3];
    return (value);
}


/**
 * \brief Apply IPv4 anonymization to a numeric address.
 *
 * Zeroes or randomizes the configured prefix width using the worker's RNG
 * state. Simple mode is filtered out before entry.
 *
 * \param ip original IPv4 value.
 * \param pWrkrData worker context providing configuration and RNG state.
 * \param bits number of bits to anonymize.
 * \param mode anonymization mode to apply.
 * \return rewritten IPv4 value.
 */
static unsigned code_ipv4_int(unsigned ip, wrkrInstanceData_t *pWrkrData, int bits, enum mode mode) {
    unsigned random;
    unsigned long long shiftIP_subst = ip;
    // variable needed because shift operation of 32nd bit in unsigned does not work
    switch (mode) {
        case ZERO:
            // zero out the configured prefix length
            shiftIP_subst = ((shiftIP_subst >> bits) << bits);
            return (unsigned)shiftIP_subst;
        case RANDOMINT:
            shiftIP_subst = ((shiftIP_subst >> bits) << bits);
            // multiply the random number between 0 and 1 with a mask of (2^n)-1:
            random = (unsigned)((rand_r(&(pWrkrData->randstatus)) / (double)RAND_MAX) * ((1ull << bits) - 1));
            return (unsigned)shiftIP_subst + random;
        case SIMPLE:  // can't happen, since this case is caught at the start of anonipv4()
        default:
            LogError(0, RS_RET_INTERNAL_ERROR, "mmanon: unexpected code path reached in code_int function");
            return 0;
    }
}


/**
 * \brief Convert a numeric IPv4 value into dotted text.
 *
 * \param num numeric IPv4 value.
 * \param str buffer to receive the textual representation.
 * \return number of bytes written (excluding NUL terminator).
 */
static int num2ipv4(unsigned num, char *str) {
    int numip[4];
    size_t len;
    for (int i = 0; i < 4; i++) {
        numip[i] = num % 256;
        num = num / 256;
    }
    len = snprintf(str, 16, "%d.%d.%d.%d", numip[3], numip[2], numip[1], numip[0]);
    return len;
}


/**
 * \brief Copy a substring into a temporary address buffer.
 *
 * \param start pointer to the substring start.
 * \param end length to copy.
 * \param address destination buffer terminated with NUL.
 */
static void getip(uchar *start, size_t end, char *address) {
    size_t i;

    for (i = 0; i < end; i++) {
        address[i] = *(start + i);
    }
    address[i] = '\0';
}

/**
 * \brief Log a warning when the unique retry limit is reached.
 *
 * \param addressType label for the address family (e.g., ipv4, ipv6).
 * \param retryCount configured maximum retries.
 * \param handling fallback behavior selected for max-retry handling.
 */
static void log_max_retry_warning(const char *addressType, unsigned int retryCount, enum maxRetryOption handling) {
    const char *handlingLabel = handling == MAX_RETRY_ZERO ? "zero" : "accept-duplicates";

    LogMsg(0, RS_RET_OK, LOG_WARNING,
           "mmanon: unique retry limit %u reached for %s random-consistent-unique; handling=%s", retryCount,
           addressType, handlingLabel);
}

/**
 * \brief Ensure consistent IPv4 anonymization for repeat callers.
 *
 * Walks or builds the IPv4 trie under a lock and stores the mapped replacement
 * string on the terminal node. If allocation fails, the input buffer remains
 * unchanged.
 *
 * \param address IPv4 text buffer to anonymize in place.
 * \param pWrkrData worker context with shared trie state.
 * \return RS_RET_OK on success, an error otherwise.
 */
static rsRetVal findip(char *address, wrkrInstanceData_t *pWrkrData) {
    DEFiRet;
    int i;
    unsigned num;
    unsigned origNum;
    union node *current;
    union node *Last;
    int MoreLess;
    char *CurrentCharPtr;
    uint32_t *uniqueKey = NULL;
    sbool locked = 0;
    const int bits = pWrkrData->pData->ipv4.bits;
    const enum mode anonmode = pWrkrData->pData->ipv4.mode;

    /*
     * Walk/construct the prefix trie for the incoming address. The last bit is
     * used to pick one of two string slots on the terminal node, letting us
     * reuse the same allocation for both possible children.
     */
    if (pthread_mutex_lock(&pWrkrData->pData->ipv4Mutex) != 0) {
        ABORT_FINALIZE(RS_RET_ERR);
    }
    locked = 1;
    current = pWrkrData->pData->ipv4.Root;
    origNum = ipv42num(address);
    num = origNum;
    for (i = 0; i < 31; i++) {
        if (pWrkrData->pData->ipv4.Root == NULL) {
            CHKmalloc(current = (union node *)calloc(1, sizeof(union node)));
            pWrkrData->pData->ipv4.Root = current;
        }
        Last = current;
        if ((num >> (31 - i)) & 1) {
            current = current->pointer.more;
            MoreLess = 1;
        } else {
            current = current->pointer.less;
            MoreLess = 0;
        }
        if (current == NULL) {
            CHKmalloc(current = (union node *)calloc(1, sizeof(union node)));
            if (MoreLess == 1) {
                Last->pointer.more = current;
            } else {
                Last->pointer.less = current;
            }
        }
    }
    if (num & 1) {
        CurrentCharPtr = current->ips.ip_high;
    } else {
        CurrentCharPtr = current->ips.ip_low;
    }
    if (CurrentCharPtr[0] != '\0') {
        strcpy(address, CurrentCharPtr);
    } else {
        if (pWrkrData->pData->ipv4.randConsisUnique && pWrkrData->pData->ipv4.randConsisUniqueGeneratedIPs == NULL) {
            CHKmalloc(pWrkrData->pData->ipv4.randConsisUniqueGeneratedIPs =
                          create_hashtable(512, hash_from_u32, key_equals_u32, NULL));
        }
        unsigned int attempts = 0;
        const sbool limitRetries = pWrkrData->pData->ipv4.limitMaxRetries;
        const unsigned int maxRetries = pWrkrData->pData->ipv4.maxRetryCount;
        const enum maxRetryOption handling = pWrkrData->pData->ipv4.maxRetryFallback;
        sbool maxRetryReached = 0;
        sbool duplicateFound = 0;

        if (pWrkrData->pData->ipv4.randConsisUnique) {
            do {
                num = code_ipv4_int(origNum, pWrkrData, bits, anonmode);
                duplicateFound = (hashtable_search(pWrkrData->pData->ipv4.randConsisUniqueGeneratedIPs, &num) != NULL);
                if (duplicateFound) {
                    if (limitRetries && attempts >= maxRetries) {
                        maxRetryReached = 1;
                        break;
                    }
                    attempts++;  // Retries count excludes the initial attempt, so increment after check.
                }
            } while (duplicateFound);
        } else {
            num = code_ipv4_int(origNum, pWrkrData, bits, anonmode);
        }

        if (maxRetryReached) {
            log_max_retry_warning("ipv4", maxRetries, handling);
            if (handling == MAX_RETRY_ZERO) {
                num = code_ipv4_int(origNum, pWrkrData, bits, ZERO);
                // duplicateFound determines whether the zeroed IP should be added to the table of unique generated IPs.
                duplicateFound = (hashtable_search(pWrkrData->pData->ipv4.randConsisUniqueGeneratedIPs, &num) != NULL);
            } else {
                // Accept-duplicates keeps the last randomized IP; no extra work needed.
            }
        }

        num2ipv4(num, CurrentCharPtr);

        if (pWrkrData->pData->ipv4.randConsisUnique && !duplicateFound) {
            CHKmalloc(uniqueKey = (uint32_t *)malloc(sizeof(uint32_t)));
            *uniqueKey = num;
            if (!hashtable_insert(pWrkrData->pData->ipv4.randConsisUniqueGeneratedIPs, uniqueKey, (void *)1)) {
                DBGPRINTF("hashtable error: insert to ipv4 unique table failed");
                ABORT_FINALIZE(RS_RET_ERR);
            }
            uniqueKey = NULL;
        }

        strcpy(address, CurrentCharPtr);
    }
finalize_it:
    if (locked) {
        pthread_mutex_unlock(&pWrkrData->pData->ipv4Mutex);
    }
    free(uniqueKey);
    RETiRet;
}


/**
 * \brief Rewrite an IPv4 address according to the configured mode.
 *
 * Chooses between consistent anonymization via the trie, zeroing the prefix,
 * or replacing digits with a fixed character for simple mode.
 *
 * \param address IPv4 text buffer to rewrite in place.
 * \param pWrkrData worker context providing configuration and shared state.
 */
static void process_IPv4(char *address, wrkrInstanceData_t *pWrkrData) {
    unsigned num;

    if (pWrkrData->pData->ipv4.randConsis) {
        findip(address, pWrkrData);
    } else {
        num = ipv42num(address);
        num = code_ipv4_int(num, pWrkrData, pWrkrData->pData->ipv4.bits, pWrkrData->pData->ipv4.mode);
        num2ipv4(num, address);
    }
}


/**
 * \brief Apply simple anonymization by replacing digits with a fixed character.
 *
 * \param pWrkrData worker context providing replacement configuration.
 * \param msg message buffer containing the address.
 * \param hasChanged flag updated when the buffer is modified.
 * \param iplen length of the detected address segment.
 */
static void simpleAnon(wrkrInstanceData_t *const pWrkrData, uchar *const msg, int *const hasChanged, int iplen) {
    int maxidx = iplen - 1;

    int j = -1;
    /*
     * Overwrite the lower `bits` of the textual IPv4 octets (counted from the
     * end of the address) with the configured replacement character. This keeps
     * delimiters untouched while ensuring the tail of the address is obscured.
     */
    for (int i = (pWrkrData->pData->ipv4.bits / 8); i > 0; i--) {
        j++;
        while ('0' <= msg[maxidx - j] && msg[maxidx - j] <= '9') {
            if (msg[maxidx - j] != pWrkrData->pData->ipv4.replaceChar) {
                msg[maxidx - j] = pWrkrData->pData->ipv4.replaceChar;
                *hasChanged = 1;
            }
            j++;
        }
    }
}


/**
 * \brief Find and anonymize IPv4 addresses within a message.
 *
 * Scans forward from the current index, rewrites detected addresses, and
 * adjusts the buffer if the replacement length changes.
 *
 * \param pWrkrData worker context providing anonymization configuration.
 * \param msg pointer to the message buffer pointer.
 * \param pLenMsg pointer to the buffer length, updated when resized.
 * \param idx current scan index, advanced past any rewritten address.
 * \param hasChanged flag updated when the buffer is modified.
 */
static void anonipv4(wrkrInstanceData_t *pWrkrData, uchar **msg, int *pLenMsg, int *idx, int *hasChanged) {
    char address[16];
    char caddress[16];
    int offset = *idx;
    uchar *msgcpy = *msg;
    size_t iplen;
    size_t caddresslen;
    int oldLen = *pLenMsg;

    if (syntax_ipv4((*msg) + offset, *pLenMsg - offset, &iplen)) {
        if (pWrkrData->pData->ipv4.mode == SIMPLE) {
            simpleAnon(pWrkrData, *msg + *idx, hasChanged, iplen);
            *idx += iplen;
            return;
        }

        assert(iplen < sizeof(address));
        getip(*msg + offset, iplen, address);
        offset += iplen;
        strcpy(caddress, address);
        process_IPv4(caddress, pWrkrData);
        caddresslen = strlen(caddress);
        *hasChanged = 1;

        /*
         * If the anonymized address differs in length, allocate a fresh buffer
         * and splice the untouched prefix/suffix back in around the rewritten
         * IP literal.
         */
        if (caddresslen != strlen(address)) {
            *pLenMsg = *pLenMsg + ((int)caddresslen - (int)strlen(address));
            *msg = (uchar *)malloc(*pLenMsg);
            memcpy(*msg, msgcpy, *idx);
        }
        memcpy(*msg + *idx, caddress, caddresslen);
        *idx = *idx + caddresslen;
        if (*idx < *pLenMsg) {
            memcpy(*msg + *idx, msgcpy + offset, oldLen - offset);
        }
        if (msgcpy != *msg) {
            free(msgcpy);
        }
    }
}


/**
 * \brief Apply IPv6 anonymization to a split integer representation.
 *
 * Zeroes or randomizes the configured prefix length using either the IPv6 or
 * embedded-IPv4 configuration depending on the caller.
 *
 * \param ip address split into high and low 64-bit halves.
 * \param pWrkrData worker context providing configuration and RNG state.
 * \param useEmbedded non-zero to use the embedded-IPv4 settings.
 */
static void code_ipv6_int(struct ipv6_int *ip, wrkrInstanceData_t *pWrkrData, int bits, enum mode anonmode) {
    unsigned long long randlow = 0;
    unsigned long long randhigh = 0;
    unsigned tmpRand;
    int fullbits;

    /*
     * Apply the mask first, then optionally fill the cleared bits with random
     * data. 128-bit shifts are emulated by zeroing both halves explicitly.
     */
    if (bits == 128) {  // has to be handled separately, since shift
                        // 128 bits doesn't work on unsigned long long
        ip->high = 0;
        ip->low = 0;
    } else if (bits > 64) {
        ip->low = 0;
        ip->high = (ip->high >> (bits - 64)) << (bits - 64);
    } else if (bits == 64) {
        ip->low = 0;
    } else {
        ip->low = (ip->low >> bits) << bits;
    }
    switch (anonmode) {
        case ZERO:
            break;
        case RANDOMINT:
            if (bits == 128) {
                for (int i = 0; i < 8; i++) {
                    tmpRand = (unsigned)((rand_r(&(pWrkrData->randstatus)) / (double)RAND_MAX) * 0xff);
                    ip->high <<= 8;
                    ip->high |= tmpRand;

                    tmpRand = (unsigned)((rand_r(&(pWrkrData->randstatus)) / (double)RAND_MAX) * 0xff);
                    ip->low <<= 8;
                    ip->low |= tmpRand;
                }
            } else if (bits > 64) {
                for (int i = 0; i < 8; i++) {
                    tmpRand = (unsigned)((rand_r(&(pWrkrData->randstatus)) / (double)RAND_MAX) * 0xff);
                    ip->low <<= 8;
                    ip->low |= tmpRand;
                }

                bits -= 64;
                fullbits = bits / 8;
                bits = bits % 8;
                while (fullbits > 0) {
                    tmpRand = (unsigned)((rand_r(&(pWrkrData->randstatus)) / (double)RAND_MAX) * 0xff);
                    randhigh <<= 8;
                    randhigh |= tmpRand;
                    fullbits--;
                }
                tmpRand = (unsigned)((rand_r(&(pWrkrData->randstatus)) / (double)RAND_MAX) * ((1 << bits) - 1));
                randhigh <<= bits;
                randhigh |= tmpRand;

                ip->high |= randhigh;
            } else if (bits == 64) {
                for (int i = 0; i < 8; i++) {
                    tmpRand = (unsigned)((rand_r(&(pWrkrData->randstatus)) / (double)RAND_MAX) * 0xff);
                    ip->low <<= 8;
                    ip->low |= tmpRand;
                }
            } else {
                fullbits = bits / 8;
                bits = bits % 8;
                while (fullbits > 0) {
                    tmpRand = (unsigned)((rand_r(&(pWrkrData->randstatus)) / (double)RAND_MAX) * 0xff);
                    randlow <<= 8;
                    randlow |= tmpRand;
                    fullbits--;
                }
                tmpRand = (unsigned)((rand_r(&(pWrkrData->randstatus)) / (double)RAND_MAX) * ((1 << bits) - 1));
                randlow <<= bits;
                randlow |= tmpRand;

                ip->low |= randlow;
            }
            break;
        case SIMPLE:  // can't happen, since this case is caught at the start of anonipv4()
        default:
            LogError(0, RS_RET_INTERNAL_ERROR, "mmanon: unexpected code path reached in code_int function");
    }
}


// separate function from recognising ipv6, since the recognition might get more
// complex. This function always stays
// the same, since it always gets an valid ipv6 input
/**
 * \brief Parse textual IPv6 (or embedded IPv4) into split integers.
 *
 * Handles abbreviations and embedded IPv4 syntax, filling an ipv6_int with
 * the parsed value.
 *
 * \param address textual IPv6 address.
 * \param iplen length of the address substring.
 * \param ip output structure for the parsed value.
 */
static void ipv62num(char *const address, const size_t iplen, struct ipv6_int *const ip) {
    int num[8] = {0, 0, 0, 0, 0, 0, 0, 0};
    int cyc = 0;
    int dots = 0;
    int val;
    unsigned i;

    for (i = 0; i < iplen && dots < 2; i++) {
        val = getHexVal(address[i]);
        if (val == -1) {
            dots++;
            if (dots < 2) {
                cyc++;
            }
        } else {
            num[cyc] = num[cyc] * 16 + val;
            dots = 0;
        }
    }
    if (dots == 2) {
        if (i < iplen - 1) {
            int shift = 0;
            cyc = 7;
            for (unsigned j = iplen - 1; j >= i; j--) {
                val = getHexVal(address[j]);
                if (val == -1) {
                    cyc--;
                    shift = 0;
                } else {
                    val <<= shift;
                    shift += 4;
                    num[cyc] += val;
                }
            }
        } else {
            while (cyc < 8) {
                num[cyc] = 0;
                cyc++;
            }
        }
    }

    for (i = 0; i < 4; i++) {
        ip->high <<= 16;
        ip->high |= num[i];
    }
    while (i < 8) {
        ip->low <<= 16;
        ip->low |= num[i];
        i++;
    }
}


/**
 * \brief Convert split IPv6 integers back to canonical text.
 *
 * \param ip address halves to stringify.
 * \param address buffer to receive the textual representation.
 */
static void num2ipv6(struct ipv6_int *ip, char *address) {
    int num[8];
    int i;

    for (i = 7; i > 3; i--) {
        num[i] = ip->low & 0xffff;
        ip->low >>= 16;
    }
    while (i > -1) {
        num[i] = ip->high & 0xffff;
        ip->high >>= 16;
        i--;
    }

    snprintf(address, 40, "%x:%x:%x:%x:%x:%x:%x:%x", num[0], num[1], num[2], num[3], num[4], num[5], num[6], num[7]);
}


/**
 * \brief Compare two ipv6_int keys for hash table equality.
 *
 * \param key1 first key.
 * \param key2 second key.
 * \return 1 if keys match, 0 otherwise.
 */
static int keys_equal_fn(void *key1, void *key2) {
    struct ipv6_int *const k1 = (struct ipv6_int *)key1;
    struct ipv6_int *const k2 = (struct ipv6_int *)key2;

    return ((k1->high == k2->high) && (k1->low == k2->low));
}


/**
 * \brief Hash function for ipv6_int entries.
 *
 * Mixes the high and low halves to produce a size_t hash value.
 *
 * \param k key to hash.
 * \return hash code.
 */
static unsigned hash_from_key_fn(void *k) {
    struct ipv6_int *const key = (struct ipv6_int *)k;
    unsigned hashVal;

    hashVal = (key->high & 0xFFC00000) | (key->low & 0x3FFFFF);
    return hashVal;
}

/**
 * \brief Hash function for 32-bit IPv4 keys.
 *
 * Uses the numeric IPv4 value directly, which is already randomized, so the
 * low bits provide sufficient entropy for the hash table.
 */
static unsigned hash_from_u32(void *k) {
    const uint32_t key = *((uint32_t *)k);

    return (unsigned)key;
}

/**
 * \brief Key comparator for 32-bit IPv4 entries.
 */
static int key_equals_u32(void *key1, void *key2) {
    return *((uint32_t *)key1) == *((uint32_t *)key2);
}

/**
 * \brief Convert split integers to an embedded-IPv4 textual address.
 *
 * Formats the lower 32 bits as IPv4 dotted decimal appended to the IPv6
 * prefix stored in the upper bits.
 *
 * \param ip address halves to stringify.
 * \param address buffer to receive the textual representation.
 */
static void num2embedded(struct ipv6_int *ip, char *address) {
    int num[8];
    int i;

    for (i = 7; i > 3; i--) {
        num[i] = ip->low & 0xffff;
        ip->low >>= 16;
    }
    while (i > -1) {
        num[i] = ip->high & 0xffff;
        ip->high >>= 16;
        i--;
    }

    snprintf(address, 46, "%x:%x:%x:%x:%x:%x:%d.%d.%d.%d", num[0], num[1], num[2], num[3], num[4], num[5],
             (num[6] & 0xff00) >> 8, num[6] & 0xff, (num[7] & 0xff00) >> 8, num[7] & 0xff);
}

/**
 * \brief Helper to generate a randomized IPv6/embedded address string.
 *
 * This helper is intentionally scoped to the findIPv6() retry loop, where
 * all required preconditions and locking have already been validated. Do not
 * use it elsewhere.
 *
 * \param num address value updated in place.
 * \param original source address to randomize from.
 * \param address buffer to receive the textual representation.
 * \param pWrkrData worker context with RNG state.
 * \param useEmbedded non-zero to format as embedded IPv4.
 * \param bits number of bits to anonymize.
 * \param anonmode anonymization mode to apply.
 */
static void generate_ipv6_candidate(struct ipv6_int *num,
                                    const struct ipv6_int original,
                                    char *address,
                                    wrkrInstanceData_t *const pWrkrData,
                                    int useEmbedded,
                                    int bits,
                                    enum mode anonmode) {
    *num = original;
    code_ipv6_int(num, pWrkrData, bits, anonmode);
    if (useEmbedded) {
        num2embedded(num, address);
    } else {
        num2ipv6(num, address);
    }
}


/**
 * \brief Ensure consistent IPv6/embedded anonymization for repeat callers.
 *
 * Looks up or inserts an anonymized value in the appropriate hash table under
 * a lock, updating the provided text buffer with the stored replacement.
 *
 * \param num split integer representation of the address.
 * \param address buffer containing the textual address to rewrite.
 * \param pWrkrData worker context with hash tables and RNG state.
 * \param useEmbedded non-zero to use the embedded-IPv4 state.
 * \return RS_RET_OK on success, an error otherwise.
 */
static rsRetVal findIPv6(struct ipv6_int *num, char *address, wrkrInstanceData_t *const pWrkrData, int useEmbedded) {
    struct ipv6_int *hashKey = NULL;
    DEFiRet;
    struct hashtable *randConsisIPs =
        useEmbedded ? pWrkrData->pData->embeddedIPv4.randConsisIPs : pWrkrData->pData->ipv6.randConsisIPs;
    struct hashtable *randConsisUniqueGeneratedIPs = useEmbedded
                                                         ? pWrkrData->pData->embeddedIPv4.randConsisUniqueGeneratedIPs
                                                         : pWrkrData->pData->ipv6.randConsisUniqueGeneratedIPs;
    const int uniqueMode =
        useEmbedded ? pWrkrData->pData->embeddedIPv4.randConsisUnique : pWrkrData->pData->ipv6.randConsisUnique;
    struct ipv6_int original = *num;
    struct ipv6_int *uniqueKey = NULL;
    sbool locked = 0;
    const sbool limitRetries =
        useEmbedded ? pWrkrData->pData->embeddedIPv4.limitMaxRetries : pWrkrData->pData->ipv6.limitMaxRetries;
    const unsigned int maxRetries =
        useEmbedded ? pWrkrData->pData->embeddedIPv4.maxRetryCount : pWrkrData->pData->ipv6.maxRetryCount;
    const enum maxRetryOption handling =
        useEmbedded ? pWrkrData->pData->embeddedIPv4.maxRetryFallback : pWrkrData->pData->ipv6.maxRetryFallback;
    unsigned int attempts = 0;
    sbool maxRetryReached = 0;
    sbool duplicateFound = 0;
    const char *addressType = useEmbedded ? "embeddedipv4" : "ipv6";
    const int bits = useEmbedded ? pWrkrData->pData->embeddedIPv4.bits : pWrkrData->pData->ipv6.bits;
    const enum mode anonmode = useEmbedded ? pWrkrData->pData->embeddedIPv4.anonmode : pWrkrData->pData->ipv6.anonmode;

    /*
     * Consistent randomization keeps a per-action hash table of original->
     * anonymized addresses. The mutex guards the first allocation of the hash
     * and concurrent inserts so workers share one stable mapping.
     */

    if (pthread_mutex_lock(&pWrkrData->pData->ipv6Mutex) != 0) {
        ABORT_FINALIZE(RS_RET_ERR);
    }
    locked = 1;

    if (randConsisIPs == NULL) {
        CHKmalloc(randConsisIPs = create_hashtable(512, hash_from_key_fn, keys_equal_fn, NULL));
        if (useEmbedded) {
            pWrkrData->pData->embeddedIPv4.randConsisIPs = randConsisIPs;
        } else {
            pWrkrData->pData->ipv6.randConsisIPs = randConsisIPs;
        }
    }

    if (uniqueMode && randConsisUniqueGeneratedIPs == NULL) {
        CHKmalloc(randConsisUniqueGeneratedIPs = create_hashtable(512, hash_from_key_fn, keys_equal_fn, NULL));
        if (useEmbedded) {
            pWrkrData->pData->embeddedIPv4.randConsisUniqueGeneratedIPs = randConsisUniqueGeneratedIPs;
        } else {
            pWrkrData->pData->ipv6.randConsisUniqueGeneratedIPs = randConsisUniqueGeneratedIPs;
        }
    }

    char *val = (char *)(hashtable_search(randConsisIPs, num));

    if (val != NULL) {
        strcpy(address, val);
    } else {
        CHKmalloc(hashKey = (struct ipv6_int *)malloc(sizeof(struct ipv6_int)));
        *hashKey = original;

        if (uniqueMode) {
            do {
                generate_ipv6_candidate(num, original, address, pWrkrData, useEmbedded, bits, anonmode);
                duplicateFound = (hashtable_search(randConsisUniqueGeneratedIPs, num) != NULL);
                if (duplicateFound) {
                    if (limitRetries && attempts >= maxRetries) {
                        maxRetryReached = 1;
                        break;
                    }
                    attempts++;  // Retries count excludes the initial attempt, so increment after check.
                }
            } while (duplicateFound);
        } else {
            generate_ipv6_candidate(num, original, address, pWrkrData, useEmbedded, bits, anonmode);
        }

        if (maxRetryReached) {
            log_max_retry_warning(addressType, maxRetries, handling);
            if (handling == MAX_RETRY_ZERO) {
                generate_ipv6_candidate(num, original, address, pWrkrData, useEmbedded, bits, ZERO);
                // duplicateFound determines whether the zeroed IP should be added to the table of unique generated IPs.
                duplicateFound = (hashtable_search(randConsisUniqueGeneratedIPs, num) != NULL);
            } else {
                // Accept-duplicates keeps the last randomized IP; no extra work needed.
            }
        }
        char *hashString;
        CHKmalloc(hashString = strdup(address));

        if (!hashtable_insert(randConsisIPs, hashKey, hashString)) {
            DBGPRINTF("hashtable error: insert to %s-table failed", useEmbedded ? "embedded ipv4" : "ipv6");
            free(hashString);
            ABORT_FINALIZE(RS_RET_ERR);
        }
        hashKey = NULL;

        if (uniqueMode && !duplicateFound) {
            CHKmalloc(uniqueKey = (struct ipv6_int *)malloc(sizeof(struct ipv6_int)));
            *uniqueKey = *num;
            if (!hashtable_insert(randConsisUniqueGeneratedIPs, uniqueKey, (void *)1)) {
                DBGPRINTF("hashtable error: insert to %s unique table failed", useEmbedded ? "embedded ipv4" : "ipv6");
                ABORT_FINALIZE(RS_RET_ERR);
            }
            uniqueKey = NULL;
        }
    }
finalize_it:
    if (locked) {
        pthread_mutex_unlock(&pWrkrData->pData->ipv6Mutex);
    }
    free(hashKey);
    free(uniqueKey);
    RETiRet;
}


/**
 * \brief Rewrite an IPv6 address according to the configured mode.
 *
 * Chooses between consistent anonymization, zeroing, or randomization of the
 * configured prefix. Embedded-IPv4 mode can reuse this helper.
 *
 * \param address IPv6 text buffer to rewrite in place.
 * \param pWrkrData worker context providing configuration and shared state.
 * \param iplen length of the address substring within the buffer.
 */
static void process_IPv6(char *address, wrkrInstanceData_t *pWrkrData, const size_t iplen) {
    struct ipv6_int num = {0, 0};

    ipv62num(address, iplen, &num);

    if (pWrkrData->pData->ipv6.randConsis) {
        findIPv6(&num, address, pWrkrData, 0);
    } else {
        code_ipv6_int(&num, pWrkrData, pWrkrData->pData->ipv6.bits, pWrkrData->pData->ipv6.anonmode);
        num2ipv6(&num, address);
    }
}


/**
 * \brief Find and anonymize IPv6 addresses within a message.
 *
 * Scans forward from the current index, rewrites detected addresses, and
 * adjusts the buffer if the replacement length changes.
 *
 * \param pWrkrData worker context providing anonymization configuration.
 * \param msg pointer to the message buffer pointer.
 * \param pLenMsg pointer to the buffer length, updated when resized.
 * \param idx current scan index, advanced past any rewritten address.
 * \param hasChanged flag updated when the buffer is modified.
 */
static void anonipv6(wrkrInstanceData_t *pWrkrData, uchar **msg, int *pLenMsg, int *idx, int *hasChanged) {
    size_t iplen = 0;
    int offset = *idx;
    char address[40];
    uchar *msgcpy = *msg;
    size_t caddresslen;
    size_t oldLen = *pLenMsg;

    int syn = syntax_ipv6(*msg + offset, *pLenMsg - offset, &iplen);
    if (syn) {
        assert(iplen < sizeof(address));  // has to be < instead of <= since address includes space for a '\0'
        getip(*msg + offset, iplen, address);
        offset += iplen;
        process_IPv6(address, pWrkrData, iplen);

        caddresslen = strlen(address);
        *hasChanged = 1;

        if (caddresslen != iplen) {
            *pLenMsg = *pLenMsg + ((int)caddresslen - (int)iplen);
            *msg = (uchar *)malloc(*pLenMsg);
            memcpy(*msg, msgcpy, *idx);
        }
        memcpy(*msg + *idx, address, caddresslen);
        *idx = *idx + caddresslen;
        if (*idx < *pLenMsg) {
            memcpy(*msg + *idx, msgcpy + offset, oldLen - offset);
        }
        if (msgcpy != *msg) {
            free(msgcpy);
        }
    }
}


/**
 * Locate the start offset of the IPv4 tail inside an embedded IPv4-in-IPv6
 * literal. The caller must pass the substring that begins at the first hex
 * group of the IPv6 address and a dot position that is guaranteed to belong
 * to the IPv4 tail; this routine is not a general-purpose search helper.
 * The substring must contain a ':' before the provided dot index.
 */
static size_t findV4Start(const uchar *const __restrict__ buf, size_t dotPos) {
    while (dotPos > 0) {
        if (buf[dotPos] == ':') {
            return dotPos + 1;
        }
        dotPos--;
    }
    assert(!"embedded IPv4 must have a ':' before its first '.'");
    /* If assertions are disabled, fall back to start-of-substring; parsing will
     * then fail and the caller will treat the sequence as non-IPv4. */
    return 0;
}


/**
 * \brief Check whether a buffer starts with an embedded IPv4 address.
 *
 * Parses hexadecimal IPv6 segments followed by dotted-decimal IPv4, supporting
 * abbreviation rules and returning the start of the IPv4 suffix.
 *
 * \param buf input buffer start.
 * \param buflen number of bytes available.
 * \param nprocessed bytes consumed while parsing the address.
 * \param v4Start offset to the embedded IPv4 portion within the buffer.
 * \return 1 if a valid embedded address is found, 0 otherwise.
 */
static int syntax_embedded(const uchar *const __restrict__ buf,
                           const size_t buflen,
                           size_t *const __restrict__ nprocessed,
                           size_t *v4Start) {
    int lastSep = 0;
    sbool hadAbbrev = 0;
    int ipParts = 0;
    int numLen;
    int isIP = 0;
    size_t ipv4Len;

    /*
     * Similar to syntax_ipv6 but keeps an eye out for an IPv4 tail (last two
     * 16-bit groups written as dotted quad). v4Start tracks where the IPv4
     * substring begins so anonymization can re-encode it later.
     */

    while (*nprocessed < buflen) {
        numLen = isValidHexNum(buf + *nprocessed, buflen - *nprocessed, nprocessed, 1);
        if (numLen > 0) {  // found a valid num
            if ((ipParts == 6 && hadAbbrev) || ipParts > 6) {  // is 6 since the first part of
                // IPv4 will also result in a valid hexvalue
                isIP = 0;
                goto done;
            }
            if (ipParts == 0 && lastSep && !hadAbbrev) {
                isIP = 0;
                goto done;
            }
            lastSep = 0;
            ipParts++;
        } else if (numLen == -1) {  //':'
            if (lastSep) {
                if (hadAbbrev) {
                    isIP = 0;
                    goto done;
                } else {
                    hadAbbrev = 1;
                }
            }
            lastSep = 1;
        } else if (numLen == -2) {  //'.'
            if (lastSep || (ipParts == 0 && hadAbbrev) || (ipParts <= 6 && !hadAbbrev)) {
                isIP = 0;
                goto done;
            }
            *v4Start = findV4Start(buf, (*nprocessed) - 1);
            if (syntax_ipv4(buf + (*v4Start), buflen, &ipv4Len)) {
                *nprocessed += (ipv4Len - ((*nprocessed) - (*v4Start)));
                isIP = 1;
                goto done;
            } else {
                isIP = 0;
                goto done;
            }
        } else {  // no valid num
            isIP = 0;
            goto done;
        }
    }

    isIP = 0;

done:
    return isIP;
}


/**
 * \brief Parse embedded IPv4 notation into split integers.
 *
 * Converts the dotted IPv4 tail and hexadecimal prefix into the ipv6_int
 * representation used by the anonymizer.
 *
 * \param address textual embedded IPv4 address.
 * \param v4Start offset where the dotted tail begins.
 * \param ip output structure for the parsed value.
 */
static void embedded2num(char *address, size_t v4Start, struct ipv6_int *ip) {
    int num[8] = {0, 0, 0, 0, 0, 0, 0, 0};
    int cyc = 0;
    int dots = 0;
    int val;
    unsigned i;

    unsigned v4Val = ipv42num(address + v4Start);
    num[7] = v4Val & 0xffff;
    num[6] = (v4Val & 0xffff0000) >> 16;

    for (i = 0; i < v4Start && dots < 2; i++) {
        val = getHexVal(address[i]);
        if (val == -1) {
            dots++;
            if (dots < 2) {
                cyc++;
            }
        } else {
            num[cyc] = num[cyc] * 16 + val;
            dots = 0;
        }
    }
    if (dots == 2) {
        if (i < v4Start) {
            int shift = 0;
            cyc = 5;
            for (unsigned j = v4Start - 1; j >= i; j--) {
                val = getHexVal(address[j]);
                if (val == -1) {
                    cyc--;
                    shift = 0;
                } else {
                    val <<= shift;
                    shift += 4;
                    num[cyc] += val;
                }
            }
        } else {
            while (cyc < 6) {
                num[cyc] = 0;
                cyc++;
            }
        }
    }

    for (i = 0; i < 4; i++) {
        ip->high <<= 16;
        ip->high |= num[i];
    }
    while (i < 8) {
        ip->low <<= 16;
        ip->low |= num[i];
        i++;
    }
}


/**
 * \brief Rewrite an embedded IPv4 address according to configuration.
 *
 * Selects consistent anonymization or direct rewriting using the embedded
 * configuration and converts the numeric result back into text.
 *
 * \param address embedded IPv4 text buffer to rewrite in place.
 * \param pWrkrData worker context providing configuration and shared state.
 * \param v4Start offset where the embedded IPv4 portion begins.
 */
static void process_embedded(char *address, wrkrInstanceData_t *pWrkrData, size_t v4Start) {
    struct ipv6_int num = {0, 0};

    embedded2num(address, v4Start, &num);

    if (pWrkrData->pData->embeddedIPv4.randConsis) {
        findIPv6(&num, address, pWrkrData, 1);
    } else {
        code_ipv6_int(&num, pWrkrData, pWrkrData->pData->embeddedIPv4.bits, pWrkrData->pData->embeddedIPv4.anonmode);
        num2embedded(&num, address);
    }
}


/**
 * \brief Find and anonymize embedded IPv4 addresses within a message.
 *
 * Scans forward from the current index, rewrites detected addresses, and
 * resizes the buffer if the replacement length changes.
 *
 * \param pWrkrData worker context providing anonymization configuration.
 * \param msg pointer to the message buffer pointer.
 * \param pLenMsg pointer to the buffer length, updated when resized.
 * \param idx current scan index, advanced past any rewritten address.
 * \param hasChanged flag updated when the buffer is modified.
 */
static void anonEmbedded(wrkrInstanceData_t *pWrkrData, uchar **msg, int *pLenMsg, int *idx, int *hasChanged) {
    size_t iplen = 0;
    int offset = *idx;
    char address[46];
    uchar *msgcpy = *msg;
    unsigned caddresslen;
    size_t oldLen = *pLenMsg;
    size_t v4Start;

    int syn = syntax_embedded(*msg + offset, *pLenMsg - offset, &iplen, &v4Start);
    if (syn) {
        assert(iplen < sizeof(address));
        getip(*msg + offset, iplen, address);
        offset += iplen;
        process_embedded(address, pWrkrData, v4Start);

        caddresslen = strlen(address);
        *hasChanged = 1;

        // Reallocate if the embedded IPv4 shrinks or grows after rewriting.
        if (caddresslen != iplen) {
            *pLenMsg = *pLenMsg + ((int)caddresslen - (int)iplen);
            *msg = (uchar *)malloc(*pLenMsg);
            memcpy(*msg, msgcpy, *idx);
        }
        memcpy(*msg + *idx, address, caddresslen);
        *idx = *idx + caddresslen;
        if (*idx < *pLenMsg) {
            memcpy(*msg + *idx, msgcpy + offset, oldLen - offset);
        }
        if (msgcpy != *msg) {
            free(msgcpy);
        }
    }
}

BEGINdoAction_NoStrings
    smsg_t **ppMsg = (smsg_t **)pMsgData;
    smsg_t *pMsg = ppMsg[0];
    uchar *msg;
    int lenMsg;
    int i;
    int hasChanged = 0;
    CODESTARTdoAction;
    lenMsg = getMSGLen(pMsg);
    msg = (uchar *)strdup((char *)getMSG(pMsg));

    for (i = 0; i <= lenMsg - 2; i++) {
        if (pWrkrData->pData->embeddedIPv4.enable) {
            anonEmbedded(pWrkrData, &msg, &lenMsg, &i, &hasChanged);
        }
        if (pWrkrData->pData->ipv4.enable) {
            anonipv4(pWrkrData, &msg, &lenMsg, &i, &hasChanged);
        }
        if (pWrkrData->pData->ipv6.enable) {
            anonipv6(pWrkrData, &msg, &lenMsg, &i, &hasChanged);
        }
    }
    if (hasChanged) {
        MsgReplaceMSG(pMsg, msg, lenMsg);
    }
    free(msg);
ENDdoAction


NO_LEGACY_CONF_parseSelectorAct


    BEGINmodExit CODESTARTmodExit;
ENDmodExit


BEGINqueryEtryPt
    CODESTARTqueryEtryPt;
    CODEqueryEtryPt_STD_OMOD_QUERIES;
    CODEqueryEtryPt_STD_OMOD8_QUERIES;
    CODEqueryEtryPt_STD_CONF2_OMOD_QUERIES;
    CODEqueryEtryPt_STD_CONF2_QUERIES;
ENDqueryEtryPt


BEGINmodInit()
    CODESTARTmodInit;
    *ipIFVersProvided = CURR_MOD_IF_VERSION;  // we only support the current interface specification
    CODEmodInit_QueryRegCFSLineHdlr DBGPRINTF("mmanon: module compiled with rsyslog version %s.\n", VERSION);
ENDmodInit
