Source code for morphkit.get_word_blocks

# morphkit/get_word_blocks.py
# SPDX-License-Identifier: CC-BY-4.0
# Copyright (c) 2025 Tony Jurg
__version__ = "0.0.1"

# import required packages
from typing import Callable, Dict, Any, List, Tuple
import beta_code
import urllib.parse
import requests
import time

[docs] def get_word_blocks( word_beta : str, api_endpoint : str, # IP adress & port of Morpheus API endpoint language : str = "greek", # Language: 'greek' (default) or 'latin' output : str = "full", # Output format: "full" (default) or "compact" debug : bool = False )-> str: """Retrieve the raw word blocks data for a given beta-code word from a Morpheus endpoint. Args: ----- :word_beta (str): The input word in beta-code format to look up. Backslashes in the input string need to be escaped: e.g., 'a)nh/r\' -> 'a)nh/r\\' :api_endpoint (str): IP adress & port of the Morpheus API endpoint (e.g., '192.168.0.5:1315'). :language (str): Optional argument. Defaults to `greek`. Sets the language of the word to analyse. It can be set to `greek` or `latin`. :output {str}: Optional argument. Defaults to `full`. Output format of the Analytic block. Either `full` for the internal database format, or `compact` for a brief output. :debug (bool): Optional argument. Defaults to `False`. If set to `True`, prints the constructed URL and response size. Returns: -------- :str: The plain text response containing the word blocks for the requested beta-code form. Raises: ------- :ValueError: The language parameter is invalid (only 'greek' and 'latin' are allowed). :ValueError: The api_endpoint parameter is malformed (format should be 'host(IP or name):port'). :requests.HTTPError: HTTP request failed (non-2xx status code). Example: -------- .. code-block:: python api_endpoint = "10.10.0.10:1315" blocs=morphkit.get_word_blocks('sune/rxomai', api_endpoint) """ # A very basic check that `endpoint` contains a ':' and that the part after it is all digits. if ":" not in api_endpoint: raise ValueError( f"[get_word_blocks] Invalid api_endpoint '{api_endpoint}'. Missing ':' separator." "Format should be 'host(IP or name):port'") host, port_str = api_endpoint.split(":", 1) if not port_str.isdigit(): raise ValueError( f"[get_word_blocks] Invalid api_endpoint '{api_endpoint}': port '{port_str}' is not numeric." "Format should be 'host(IP or name):port'") # Define the mapping from value of argumet 'language' to actual API path lang_args_list = { 'greek' : '/greek', 'latin' : '/latin', } if language in lang_args_list: api_path=lang_args_list[language] else: raise ValueError( f"[get_word_blocks] Unknown language format {language!r}. " "Choose from {'greek', 'latin'}." ) # Start timer start = time.perf_counter() # Define the mapping from value of argumet 'output' to actual API arguments api_args_list = { 'compact' : '?opts=n', 'full' : '?opts=d?opts=n', } if output in api_args_list: api_args=api_args_list[output] else: raise ValueError( f"[get_word_blocks] Unknown output format {output!r}. " "Choose from {'full', 'compact'}." ) print(f"Unknown output format: {output}") exit() # 1. Encode the Betacode word for safe URL inclusion encoded = urllib.parse.quote(word_beta, safe='') url= f"http://{api_endpoint}{api_path}/{encoded}{api_args}" if debug==True: print(f"[get_word_blocks] Sending GET request: {url}") # 2. Perform the HTTP GET request resp = requests.get(url) elapsed = time.perf_counter() - start if debug==True: # Status and timing print(f"[get_word_blocks] Received status code: {resp.status_code}") print(f"[get_word_blocks] Response time: {elapsed:.3f}s") # Request headers print(f"[get_word_blocks] Request headers: {resp.request.headers}") # Response headers print(f"[get_word_blocks] Response headers: {resp.headers}") # 3. Check for HTTP errors try: resp.raise_for_status() except requests.exceptions.HTTPError as e: print(f"[get_word_blocks] HTTP error: {e} (status code: {resp.status_code})") text = resp.text if debug==True: # Show the first 100 characters (or whole thing if smaller) snippet = text[:100] + ("..." if len(text) > 100 else "") print(f"[get_word_blocks] Response snippet (max 100 bytes):\n{snippet}") return text