Source code for src.CemantixSolver

import logging
import time
import re
import requests
import pickle
from requests.structures import CaseInsensitiveDict
from gensim.models import KeyedVectors
from src.configLoader import setup_logging
import os
import time
from dotenv import load_dotenv
import csv
import numpy as np
import concurrent.futures


[docs]
class CemantixSolver:
    """
    Automatic solver for the Cemantix word game.

    This solver uses a Word2Vec model and a beam search strategy
    to guess the hidden word by querying the Cemantix API for similarity scores.
    """

    def __init__(self, config):
        """
        Initialize the solver with the provided configuration.

        :param dict config: Dictionary containing configuration keys (see configLoader).
        """
        load_dotenv()
        setup_logging(config["log_level"], config["log_file"])
        self.logger = logging.getLogger(__name__)

        if not(os.getenv("NTFY_URL") and os.getenv("NTFY_SUBJECT")):
            self.logger.info("No NTFY config found")

        self.start_words = config["start_words"]
        self.api_delay = config["api_delay"]
        self.model_path = config["model_path"]
        self.schema = config["schema"]
        self.url = config["url"]
        self.user_agent = config["user_agent"]
        self.content_type = config["content_type"]
        self.max_retries = config["max_retries"]
        self.similarity_delta = config["similarity_delta"]
        self.max_workers = config["max_workers"]

        self.headers = CaseInsensitiveDict({
            "Content-Type": self.content_type,
            "Host": self.url,
            "Origin": f"{self.schema}://{self.url}",
            "referrer": f"{self.schema}://{self.url}/",
            "User-Agent": self.user_agent
        })
        self.similar_cache = {}

        self.logger.info("Loading model '%s'", self.model_path)
        self.model = KeyedVectors.load_word2vec_format(self.model_path, binary=True, unicode_errors="ignore")

        self.model.fill_norms(force=True)
        self.normalized_vectors = self.model.get_normed_vectors()
        self.logger.info("Model loaded with normalized vectors.")

    def __get_puzzle_number(self):
        """
        Fetch the current day's puzzle number from the Cemantix website.

        :returns: The puzzle number as an integer if found, otherwise None.
        :rtype: int or None
        """
        self.logger.info("Getting puzzle number")
        resp = requests.get(f"{self.schema}://{self.url}", headers=self.headers, timeout=30)
        if resp.status_code == 200:
            match = re.search(r'data-puzzle-number="(\d+)"', resp.text)
            if match:
                puzzle_number = int(match.group(1))
                self.logger.info("Puzzle number: %d", puzzle_number)
                return puzzle_number
        self.logger.error("No puzzle number found")
        return None

    def __get_score(self, word, day):
        """
        Send a word to the API and retrieve its similarity score.

        :param str word: The word to test.
        :param int day: The puzzle number for which to retrieve the score.

        :returns: The similarity score (float between 0.0 and 1.0), or None if the request failed or word is invalid.
        :rtype: float or None
        """
        for attempt in range(self.max_retries):
            try:
                url = f"{self.schema}://{self.url}/score?n={day}"
                resp = requests.post(url, headers=self.headers, data=f"word={word}".encode("utf-8"), timeout=30)
                resp_json = resp.json()
                if resp.status_code != 200:
                    return None
                if 'e' in resp_json:
                    self.logger.warning("Word '%s' error: %s", word, resp_json['e'])
                    return None
                return resp_json['s']
            except Exception as e:
                self.logger.warning("Error fetching score for '%s': %s (retry %d/%d)", word, e, attempt+1, self.max_retries)
                time.sleep(self.api_delay)
        return None

    def __log_and_notify(self, word, score, exec_time):
        """
        Send a notification when the solution is found.

        :param str word: The found word (the solution).
        :param float exec_time: Execution time in seconds.
        """
        msg = f"Word found: {word} (score : {score}), Requests: {self.request_count}, Execution time: {exec_time:.2f} sec"
        self.logger.info("Résultat final → %s", msg)
        self.__ntfy(msg)
        return

    def __ntfy(self, msg):
        """
        Send a notification containing msg using NTFY API

        :param str msg: Message to send as notification
        :returns: None
        """

        token = os.getenv("NTFY_TOKEN")
        subject = os.getenv("NTFY_SUBJECT")
        ntfy_url = os.getenv("NTFY_URL")
        if  subject and ntfy_url:
            #os.system(f'ntfy publish --token {token} {ntfy_url}/{subject} "{msg}"')
            os.system(f'curl -H "Authorization: Bearer {token}" -d "{msg}" {ntfy_url}/{subject}')
        else :
            self.logger.error("No NTFY config found, set it up inside of .env file")



    def __fetch_scores_parallel(self, words, day, max_workers=5):
        """
        Fetches the scores for a list of words in parallel.

        :param list words: List of words to score.
        :param int day: Puzzle number.
        :param int max_workers: Maximum number of concurrent threads.
        :returns: Dictionary {word: score} of valid words and their scores.
        """
        startings_score = {}

        def fetch_score(word):
            score = self.__get_score(word, day)
            return word, score

        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_word = {executor.submit(fetch_score, w): w for w in words}
            for future in concurrent.futures.as_completed(future_to_word):
                w = future_to_word[future]
                try:
                    word, score = future.result()
                    self.request_count += 1
                    if score is not None:
                        startings_score[word] = score
                except Exception as exc:
                    self.logger.warning('Error fetching score for word %r: %s', w, exc)

        return startings_score


[docs]
    def solve(self, day=None, ntfy=False):
        """
        Optimized Cemantix solver using NumPy vectorization to find words whose similarity
        with the start words exactly matches the expected values (within a small epsilon).

        :param int day: (Optional) Puzzle number to solve. If None, the current day's puzzle will be used.
        :param bool ntfy: (Optional) Send a notification using NTFY .env configuration
        :returns: A tuple (best_word, best_score) or None if no solution was found.
        :rtype: tuple or None
        """
        self.logger.info("Local solver started")
        start_time = time.time()
        self.request_count = 0

        word_found = None
        best_score = 0.0

        if day is None:
            day = self.__get_puzzle_number()
            if day is None:
                return None

        # Fecthing starting words score
        startings_score = self.__fetch_scores_parallel(self.start_words, day, self.max_workers)

        # Check if we have already found a solution
        for w, score in startings_score.items():
            if score == 1.0:
                self.logger.info("Solution found as a starting word: %s → %.4f", w, 1.0)
                word_found = w
                break

        self.logger.info(f"Starting scores: {startings_score}")

        if word_found:
            exec_time = time.time() - start_time
            self.logger.info(f"Execution time : {exec_time}")
            if ntfy:
                self.__log_and_notify(word_found, 1.0, exec_time)
            return word_found, 1.0

        # 2. Computing cosine similarity between all words and starting words
        # 2.1 Getting starting words vectors
        try:
            start_vectors = np.array([self.model.get_vector(w) for w in startings_score])
        except KeyError as e:
            self.logger.error(f"Start word not in model vocabulary: {e}")
            return None

        start_scores = np.array(list(startings_score.values()))

        # 2.2 Getting model vectors
        all_vectors = self.normalized_vectors
        all_words = self.model.index_to_key

        # 2.3 Normalise starting words vectors
        start_vectors = np.array([self.model.get_vector(w) for w in startings_score])
        start_vectors = start_vectors / np.linalg.norm(start_vectors, axis=1, keepdims=True)

        # 2.4 Computing cosine similarity
        cosine_similarities = np.dot(start_vectors, all_vectors.T)

        # Using cosine similarity to find candidates
        diffs = np.abs(cosine_similarities - start_scores[:, np.newaxis])
        matches = np.all(diffs <= self.similarity_delta, axis=0)
        matching_words = [all_words[i] for i, ok in enumerate(matches) if ok]

        self.logger.info(f"{len(matching_words)} matching candidates found after vectorized filtering.")

        # 3. Check if words found are correct
        for candidate in matching_words:
            self.request_count += 1
            score = self.__get_score(candidate, day)
            if score is None:
                continue
            if score == 1.0:
                self.logger.info("Solution found: %s → %.4f", candidate, 1.0)
                word_found = candidate
                best_score = 1.0
                break
            time.sleep(self.api_delay)

        exec_time = time.time() - start_time
        self.logger.info(f"Execution time : {exec_time}")
        if word_found and ntfy:
            self.__log_and_notify(word_found, best_score, exec_time)

        return word_found, best_score