import logging
import time
import re
import requests
import pickle
from requests.structures import CaseInsensitiveDict
from gensim.models import KeyedVectors
from src.configLoader import setup_logging
import os
import time
from dotenv import load_dotenv
import csv
import numpy as np
import concurrent.futures
[docs]
class CemantixSolver:
"""
Automatic solver for the Cemantix word game.
This solver uses a Word2Vec model and a beam search strategy
to guess the hidden word by querying the Cemantix API for similarity scores.
"""
def __init__(self, config):
"""
Initialize the solver with the provided configuration.
:param dict config: Dictionary containing configuration keys (see configLoader).
"""
load_dotenv()
setup_logging(config["log_level"], config["log_file"])
self.logger = logging.getLogger(__name__)
if not(os.getenv("NTFY_URL") and os.getenv("NTFY_SUBJECT")):
self.logger.info("No NTFY config found")
self.start_words = config["start_words"]
self.api_delay = config["api_delay"]
self.model_path = config["model_path"]
self.schema = config["schema"]
self.url = config["url"]
self.user_agent = config["user_agent"]
self.content_type = config["content_type"]
self.max_retries = config["max_retries"]
self.similarity_delta = config["similarity_delta"]
self.max_workers = config["max_workers"]
self.headers = CaseInsensitiveDict({
"Content-Type": self.content_type,
"Host": self.url,
"Origin": f"{self.schema}://{self.url}",
"referrer": f"{self.schema}://{self.url}/",
"User-Agent": self.user_agent
})
self.similar_cache = {}
self.logger.info("Loading model '%s'", self.model_path)
self.model = KeyedVectors.load_word2vec_format(self.model_path, binary=True, unicode_errors="ignore")
self.model.fill_norms(force=True)
self.normalized_vectors = self.model.get_normed_vectors()
self.logger.info("Model loaded with normalized vectors.")
def __get_puzzle_number(self):
"""
Fetch the current day's puzzle number from the Cemantix website.
:returns: The puzzle number as an integer if found, otherwise None.
:rtype: int or None
"""
self.logger.info("Getting puzzle number")
resp = requests.get(f"{self.schema}://{self.url}", headers=self.headers, timeout=30)
if resp.status_code == 200:
match = re.search(r'data-puzzle-number="(\d+)"', resp.text)
if match:
puzzle_number = int(match.group(1))
self.logger.info("Puzzle number: %d", puzzle_number)
return puzzle_number
self.logger.error("No puzzle number found")
return None
def __get_score(self, word, day):
"""
Send a word to the API and retrieve its similarity score.
:param str word: The word to test.
:param int day: The puzzle number for which to retrieve the score.
:returns: The similarity score (float between 0.0 and 1.0), or None if the request failed or word is invalid.
:rtype: float or None
"""
for attempt in range(self.max_retries):
try:
url = f"{self.schema}://{self.url}/score?n={day}"
resp = requests.post(url, headers=self.headers, data=f"word={word}".encode("utf-8"), timeout=30)
resp_json = resp.json()
if resp.status_code != 200:
return None
if 'e' in resp_json:
self.logger.warning("Word '%s' error: %s", word, resp_json['e'])
return None
return resp_json['s']
except Exception as e:
self.logger.warning("Error fetching score for '%s': %s (retry %d/%d)", word, e, attempt+1, self.max_retries)
time.sleep(self.api_delay)
return None
def __log_and_notify(self, word, score, exec_time):
"""
Send a notification when the solution is found.
:param str word: The found word (the solution).
:param float exec_time: Execution time in seconds.
"""
msg = f"Word found: {word} (score : {score}), Requests: {self.request_count}, Execution time: {exec_time:.2f} sec"
self.logger.info("Résultat final → %s", msg)
self.__ntfy(msg)
return
def __ntfy(self, msg):
"""
Send a notification containing msg using NTFY API
:param str msg: Message to send as notification
:returns: None
"""
token = os.getenv("NTFY_TOKEN")
subject = os.getenv("NTFY_SUBJECT")
ntfy_url = os.getenv("NTFY_URL")
if subject and ntfy_url:
#os.system(f'ntfy publish --token {token} {ntfy_url}/{subject} "{msg}"')
os.system(f'curl -H "Authorization: Bearer {token}" -d "{msg}" {ntfy_url}/{subject}')
else :
self.logger.error("No NTFY config found, set it up inside of .env file")
def __fetch_scores_parallel(self, words, day, max_workers=5):
"""
Fetches the scores for a list of words in parallel.
:param list words: List of words to score.
:param int day: Puzzle number.
:param int max_workers: Maximum number of concurrent threads.
:returns: Dictionary {word: score} of valid words and their scores.
"""
startings_score = {}
def fetch_score(word):
score = self.__get_score(word, day)
return word, score
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_word = {executor.submit(fetch_score, w): w for w in words}
for future in concurrent.futures.as_completed(future_to_word):
w = future_to_word[future]
try:
word, score = future.result()
self.request_count += 1
if score is not None:
startings_score[word] = score
except Exception as exc:
self.logger.warning('Error fetching score for word %r: %s', w, exc)
return startings_score
[docs]
def solve(self, day=None, ntfy=False):
"""
Optimized Cemantix solver using NumPy vectorization to find words whose similarity
with the start words exactly matches the expected values (within a small epsilon).
:param int day: (Optional) Puzzle number to solve. If None, the current day's puzzle will be used.
:param bool ntfy: (Optional) Send a notification using NTFY .env configuration
:returns: A tuple (best_word, best_score) or None if no solution was found.
:rtype: tuple or None
"""
self.logger.info("Local solver started")
start_time = time.time()
self.request_count = 0
word_found = None
best_score = 0.0
if day is None:
day = self.__get_puzzle_number()
if day is None:
return None
# Fecthing starting words score
startings_score = self.__fetch_scores_parallel(self.start_words, day, self.max_workers)
# Check if we have already found a solution
for w, score in startings_score.items():
if score == 1.0:
self.logger.info("Solution found as a starting word: %s → %.4f", w, 1.0)
word_found = w
break
self.logger.info(f"Starting scores: {startings_score}")
if word_found:
exec_time = time.time() - start_time
self.logger.info(f"Execution time : {exec_time}")
if ntfy:
self.__log_and_notify(word_found, 1.0, exec_time)
return word_found, 1.0
# 2. Computing cosine similarity between all words and starting words
# 2.1 Getting starting words vectors
try:
start_vectors = np.array([self.model.get_vector(w) for w in startings_score])
except KeyError as e:
self.logger.error(f"Start word not in model vocabulary: {e}")
return None
start_scores = np.array(list(startings_score.values()))
# 2.2 Getting model vectors
all_vectors = self.normalized_vectors
all_words = self.model.index_to_key
# 2.3 Normalise starting words vectors
start_vectors = np.array([self.model.get_vector(w) for w in startings_score])
start_vectors = start_vectors / np.linalg.norm(start_vectors, axis=1, keepdims=True)
# 2.4 Computing cosine similarity
cosine_similarities = np.dot(start_vectors, all_vectors.T)
# Using cosine similarity to find candidates
diffs = np.abs(cosine_similarities - start_scores[:, np.newaxis])
matches = np.all(diffs <= self.similarity_delta, axis=0)
matching_words = [all_words[i] for i, ok in enumerate(matches) if ok]
self.logger.info(f"{len(matching_words)} matching candidates found after vectorized filtering.")
# 3. Check if words found are correct
for candidate in matching_words:
self.request_count += 1
score = self.__get_score(candidate, day)
if score is None:
continue
if score == 1.0:
self.logger.info("Solution found: %s → %.4f", candidate, 1.0)
word_found = candidate
best_score = 1.0
break
time.sleep(self.api_delay)
exec_time = time.time() - start_time
self.logger.info(f"Execution time : {exec_time}")
if word_found and ntfy:
self.__log_and_notify(word_found, best_score, exec_time)
return word_found, best_score