aboutsummaryrefslogtreecommitdiffstats
path: root/mayor-orig/www/include/share/str/hyphen.php
blob: 0973e4d9d7b108216dda505373523f71399ab291 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
<?php

class huHyphen
{
    // member declaration
    public $Patterns = array();
    private $patternFilePath = '../print/share/huhyphn.tex';

    // method declaration
    function __construct() {

	$patterns = 0;
	$mbConv = array(		
	    mb_convert_encoding('ś', 'ISO-8859-2', 'UTF-8') => mb_convert_encoding('ű', 'ISO-8859-2', 'UTF-8'),
	    mb_convert_encoding('Ž', 'ISO-8859-2', 'UTF-8') => mb_convert_encoding('ő', 'ISO-8859-2', 'UTF-8'));

	if (file_exists($this->patternFilePath)) {
	    $fp = fopen($this->patternFilePath, 'r');
	    while ($aLine = fgets($fp, 128)) {
		// deprecated // if (!ereg('[\\%{}]', $aLine)) {
		if (!preg_match('#[\\%{}]#', $aLine)) {
		    // Cork --> ISO-8859-2 kódolás
		    $aLine = strtr($aLine, $mbConv);
		    $letter = false;
		    $key = $value = '';
		    $aLine = chop($aLine);
		    for ($i = 0; $i < strlen($aLine); $i++) {
			// deprecated // if (ereg('[0-9]',$aLine[$i])) {
			if (preg_match('#[0-9]#',$aLine[$i])) {
			    $value .= $aLine[$i];
			    $letter = false;
			} else {
			    if ($letter) $value .= '0';
			    $key .= $aLine[$i];
			    $letter = true;
			} 
		    }
		    if ($letter) $value .= '0';
		    if (!is_array($Patterns[strlen($key)])) $Patterns[strlen($key)] = array();
		    $Patterns[strlen($key)][$key] = $value;
		    $patterns++;
		} else {
		    //if (ereg('\\message\{(.*)\}', $aLine, $reg)) echo $reg[1]."\n";
		}
	    }
	} else { $GLOBALS['alert'][] = 'message:file_not_found:'.($this->patternFilePath); }
	for ($i = 0; $i < count($Patterns); $i++) {
	    if (!is_array($Patterns[$i])) $Patterns[$i] = array();
	}
	$this->Patterns = $Patterns;
    }
    function getPattern($key, $value) {
	$pattern = '';
	if (strlen($value) > strlen($key)) { $pattern = $value[0]; $value = substr($value, 1); }
	for ($i = 0; $i < strlen($key); $i++) {
	    $pattern .= $key[$i];
	    if (isset($value[$i])) $pattern .= $value[$i];
	}
	$pattern = str_replace('0','',$pattern);
	return $pattern;
    }
    public function hyphen($word) {
	$word = mb_convert_encoding($word, 'ISO-8859-2', 'UTF-8');
	if (strlen($word) > 1) {

	    $key = '.'.$word.'.';
	    $value = str_repeat('0', strlen($key));
	    $key = strtr($key, mb_convert_encoding('A-ZÁÄÉÍÓÖŐÚÜŰ', 'ISO-8859-2', 'UTF-8'), mb_convert_encoding('a-záäéíóöőúüű', 'ISO-8859-2', 'UTF-8'));
    	    $pattern = '';
	    for ($i = 1; $i <= strlen($key); $i++) {
		for ($j = 0; $j <= strlen($key)-$i; $j++) {
		    if (is_array($this->Patterns[$i]) && ($pattern = $this->Patterns[$i][ substr($key, $j, $i) ])) {
        		for ($k = 0; $k < strlen($pattern); $k++) {
			    if ($value[$j + $i - strlen($pattern) + $k] < $pattern[$k])
                		$value[$j + $i - strlen($pattern) + $k] = $pattern[$k];
        		}
		    }
		}
	    }
	    $hyphenated = '';
	    for ($i = 0; $i <= strlen($word)-2; $i++) {
		$hyphenated .= $word[$i];
		// deprecated // if (ereg('[13579]', $value[$i+1])) $hyphenated .= '-';
		if (preg_match('#[13579]#', $value[$i+1])) $hyphenated .= '-';
	    }
	    $hyphenated .= substr($word, -1);
	} else {
	    $hyphenated = $word;
	}
	return mb_convert_encoding($hyphenated, 'UTF-8', 'ISO-8859-2');
    }
}

$huHyphen = new huHyphen();
//while gets
//  break if $_=="\n"
//  test.hyphen($_.strip)
//end

//$test->hyphen(trim('állomásfőnök'));
//$test->hyphen(trim('álamigazgatás'));
//$test->hyphen(trim('informatika alapismeretek, programozás'));

?>