federator/php/language.php

300 lines
10 KiB
PHP

<?php
/**
* SPDX-FileCopyrightText: 2024 Sascha Nitsch (grumpydeveloper) https://contentnation.net/@grumpydevelop
* SPDX-License-Identifier: GPL-3.0-or-later
* @author Author: Sascha Nitsch (grumpydeveloper)
*/
namespace Federator;
/**
* Language abstraction class
* @author Sascha Nitsch
*
*/
class Language
{
/**
* list of valid languages
*
* @var array $validLanguages
*/
private $validLanguages = array(
"de" => true,
"en" => true,
"xy" => true
);
/**
* language to use
*
* @var string $uselang
*/
private $uselang;
/**
* actual language data
*
* @var array<string, array<string, string>> $lang
*/
private $lang = [];
/**
* constructor that tries to autodetect language
*
* @param ?string $uselang
* use this language instead of autodetection, set to null if no preference
*/
public function __construct($uselang = null)
{
$this->lang = [];
if ($uselang !== null) {
if (! array_key_exists($uselang, $this->validLanguages)) {
$uselang = null;
}
}
if ($uselang === null && array_key_exists('_lang', $_REQUEST)) {
$language = (string)$_REQUEST['_lang'];
if (array_key_exists($language, $this->validLanguages)) {
$uselang = $language;
}
}
if ($uselang === null && array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) {
$matches = array();
if (preg_match("/^(\S\S)\-/", $_SERVER['HTTP_ACCEPT_LANGUAGE'], $matches) == 1) {
$language = $matches[1];
if (array_key_exists($language, $this->validLanguages)) {
$uselang = $language;
}
}
}
if ($uselang === null) {
$uselang = 'en';
}
$this->uselang = $uselang;
}
/**
* print translation of given group and id, optionally using variables
*
* @param string $group
* group name
* @param string $key
* string name
* @param array<mixed> $values
* optional values to replace
* @return string translated string
*/
public function printlang($group, $key, array $values = array())
{
if ($this->uselang === 'xy') {
return "$group:$key";
}
if (! isset($this->lang[$group])) {
$l = [];
$root = $_SERVER['DOCUMENT_ROOT'];
if ($root === '') {
$root = '.';
}
if (@file_exists($root . '/../lang/' . $this->uselang . "/$group.inc")) {
require($root . '/../lang/' . $this->uselang . "/$group.inc");
$this->lang[$group] = $l;
}
}
if (array_key_exists($group, $this->lang) && array_key_exists($key, $this->lang[$group])) {
$string = $this->lang[$group][$key];
for ($i = 0; $i < 9; $i ++) {
if (isset($values[$i])) {
$string = str_replace("\$$i", $values[$i], $string);
} else {
$string = str_replace("\$$i", "", $string);
}
}
return $string;
}
$basedir = $_SERVER['DOCUMENT_ROOT'] . '/../';
$fh = @fopen("$basedir/logs/missingtrans.txt", 'a');
if ($fh !== false) {
fwrite($fh, $this->uselang.":$group:$key\n");
fclose($fh);
}
return "&gt;&gt;$group:$key&lt;&lt;";
}
/**
* get keys (valid ids) of a named group
*
* @param string $group
* group name to fetch keys
* @return list<string> list of keys
*/
public function getKeys(string $group)
{
if (! isset($this->lang[$group])) {
$l = [];
require_once($_SERVER['DOCUMENT_ROOT'] . '/../lang/' . $this->uselang . "/$group.inc");
$this->lang[$group] = $l;
}
// @phan-suppress-next-line PhanPartialTypeMismatchReturn
return array_keys($this->lang[$group]);
}
/**
* return current used language
*
* @return string current language
*/
public function getLang()
{
return $this->uselang;
}
/**
* guess langauge of text
*
* @param string $text input text
* @param string $default default language
* @param bool $debug debug flag
* @return string detected language
*/
public static function guessLanguage($text, $default, $debug = false)
{
$supported_languages = array(
'en',
'de',
);
$wordList = [];
// German word list
// from http://wortschatz.uni-leipzig.de/Papers/top100de.txt
$wordList['de'] = array (
'die', 'der', 'und', /*'in',*/ 'zu', 'den', 'das', 'nicht', 'von', 'sie',
'ist', 'des', 'sich', 'mit', 'dem', 'dass', 'er', 'es', 'ein', 'ich',
'auf', 'so', 'eine', 'auch', 'als', 'an', 'nach', 'wie', 'im', 'für',
'man', 'aber', 'aus', 'durch', 'wenn', 'nur', 'war', 'noch', 'werden',
'bei', 'hat', 'wir', 'was', 'wird', 'sein', 'einen', 'welche', 'sind',
'oder', 'zur', 'um', 'haben', 'einer', 'mir', 'über', 'ihm', 'diese',
'einem', 'ihr', 'uns', 'da', 'zum', 'kann', 'doch', 'vor', 'dieser',
'mich', 'ihn', 'du', 'hatte', 'seine', 'mehr', 'am', 'denn', 'nun',
'unter', 'sehr', 'selbst', 'schon', 'hier', 'bis', 'habe', 'ihre',
'dann', 'ihnen', 'seiner', 'alle', 'wieder', 'meine', 'Zeit', 'gegen',
'vom', 'ganz', 'einzelnen', 'wo', 'muss', 'ohne', 'eines', 'können',
'sei', 'geschrieben', 'instanzen', 'deutsch','aktualisierung', 'registrierung'
);
// English word list
// from http://en.wikipedia.org/wiki/Most_common_words_in_English
$wordList['en'] = array ('the', 'be', 'to', 'of', 'and', 'a', /*'in',*/
'that', 'have', 'I', 'it', 'for', 'not', 'on', 'with', 'he',
'as', 'you', 'do', 'at', 'this', 'but', 'his', 'by', 'from', 'they',
'we', 'say', 'her', 'she', 'or', 'an', 'will', 'my', 'one', 'all',
'would', 'there', 'their', 'what', 'so', 'up', 'out', 'if', 'about'
);
// French word list
// from https://1000mostcommonwords.com/1000-most-common-french-words/
/*$wordList['fr'] = array ('comme', 'que', 'tait', 'pour', 'sur', 'sont', 'avec',
'tre', 'un', 'ce', 'par', 'mais', 'que', 'est',
'il', 'eu', 'la', 'et', 'dans');*/
// Spanish word list
// from https://spanishforyourjob.com/commonwords/
/*$wordList['es'] = array ('que', 'no', 'a', 'la', 'el', 'es', 'y',
'en', 'lo', 'un', 'por', 'qu', 'si', 'una',
'los', 'con', 'para', 'est', 'eso', 'las');*/
// clean out the input string - note we don't have any non-ASCII
// characters in the word lists... change this if it is not the
// case in your language wordlists!
$txt = strip_tags($text);
$txt = preg_replace("/[^A-Za-z:\\/\\.]+/", ' ', $txt);
if ($debug) {
echo "text: '$txt'\n";
}
$counter = [];
// count the occurrences of the most frequent words
foreach ($supported_languages as $language) {
$counter[$language] = 0;
}
foreach ($supported_languages as $language) {
for ($i = 0; $i < sizeof($wordList[$language]); ++$i) {
$count = substr_count($txt, ' ' .$wordList[$language][$i] . ' ');
if ($debug && $count > 0) {
echo $language . " " . $wordList[$language][$i] . " => $count\n";
}
$counter[$language] += $count;
}
}
if ($debug) {
print_r($counter);
}
// get max counter value
$max = max($counter);
$maxs = array_keys($counter, $max);
// if there are two winners - fall back to default!
if (count($maxs) == 1) {
$winner = $maxs[0];
$second = 0;
// get runner-up (second place)
foreach ($supported_languages as $language) {
if ($language !== $winner) {
if ($counter[$language]>$second) {
$second = $counter[$language];
}
}
}
// apply arbitrary threshold of 50%
if (($second / $max) < 0.5) {
return $winner;
}
}
return $default;
}
}
/**
* function called from smarty templates to print language translation
*
* @param array<string, mixed> $params
* smarty params, used are 'group', 'txt' and optionally 'var'
* @param \Smarty\Template $template
* template instance
* @return string translated text
*/
function smarty_function_printlang($params, $template) : string
{
$lang = $template->getTemplateVars("language");
<<<'PHAN'
@phan-var \Federator\Language $lang
PHAN;
$forcelang = array_key_exists('lang', $params) ? $params['lang'] : null;
if ($forcelang !== null) {
$lang = new \Federator\Language($forcelang);
}
if (isset($params['var'])) {
return $lang->printlang($params['group'], $params['key'], $params['var']);
} else {
return $lang->printlang($params['group'], $params['key']);
}
}
/**
* function called from smarty templates to set language translation as JS string
*
* @param array<string, mixed> $params
* smarty params, used are 'group', 'txt' and optionally 'var'
* @param \Smarty\Template $template
* template instance
* @return string translated text as JS line
*/
function smarty_function_printjslang($params, $template) : string
{
$lang = $template->getTemplateVars("language");
$prefix = 'window.translations.' . $params['group'] . '.' . $params['key'] . ' = \'';
$postfix = '\';';
if (isset($params['var'])) {
return $prefix . $lang->printlang($params['group'], $params['key'], $params['var']) . $postfix;
} else {
return $prefix . $lang->printlang($params['group'], $params['key']) . $postfix;
}
}