private constant numbers =    (< '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+',
				 '\xFF10' /* FULLWIDTH DIGIT ZERO */, 
				 '\xFF11' /* FULLWIDTH DIGIT ONE */, 
				 '\xFF12' /* FULLWIDTH DIGIT TWO */, 
				 '\xFF13' /* FULLWIDTH DIGIT THREE */, 
				 '\xFF14' /* FULLWIDTH DIGIT FOUR */, 
				 '\xFF15' /* FULLWIDTH DIGIT FIVE */, 
				 '\xFF16' /* FULLWIDTH DIGIT SIX */, 
				 '\xFF17' /* FULLWIDTH DIGIT SEVEN */, 
				 '\xFF18' /* FULLWIDTH DIGIT EIGHT */, 
				 '\xFF19' /* FULLWIDTH DIGIT NINE */ >);
 
private constant whitespace = (< ' ', '\240', '-', '.', '(', ')', '\t', '\n', '\r', 
                                 '\x30CE' /* KATAKANA LETTER NO */,
				 '\x306E' /* HIRAGANA LETTER NO */,
				 '\xFF0D' /* FULLWIDTH HYPHEN-MINUS */,
				 '\xFF0E' /* FULLWIDTH FULL STOP */,
				 '\xFF08' /* FULLWIDTH LEFT PARENTHESIS */,
				 '\xFF09' /* FULLWIDTH RIGHT PARENTHESIS */,
				 '\x200B' /* ZERO WIDTH SPACE */,
				 '\x2060' /* WORD JOINER */,
				 '\x3000' /* IDEOGRAPHIC SPACE */,
				 '\xFEFF' /* ZERO WIDTH NO-BREAK SPACE */
				  >);
 
//! Parse phone numbers from plain text. Separates text into text and
//! numbers. Whitespace around numbers is put in the text parts.
//!
//! A number consists of at least 5 digits. It can also contain these
//! characters:
//!
//! ' ', '\240', '-', '.', '(', ')', '\t', '\n', '\r',
//! U30CE (KATAKANA LETTER NO), U306E (HIRAGANA LETTER NO),
//! UFF0D (FULLWIDTH HYPHEN-MINUS), UFF0E (FULLWIDTH FULL STOP), 
//! UFF08 (FULLWIDTH LEFT PARENTHESIS), UFF09 (FULLWIDTH RIGHT PARENTHESIS),
//! U200B (ZERO WIDTH SPACE), U2060 (WORD JOINER), U3000 (IDEOGRAPHIC SPACE), 
//! UFEFF (ZERO WIDTH NO-BREAK SPACE)
//!
//! Todo: * Remove leading zeroes inside parenthesis
//!       * Keep + in phone numbers
//!
//! @returns
//!	Returns an array of strings and string tuplets. String entries
//!	are plain text, and string tuplets (arrays of strings) are
//!	phone numbers. The first element in the tuplet is the phone
//!	number without whitespace, the second is with the original
//!	whitespace kept verbatim.
 
 
// thread about japanese phone numbers: http://www.appelsiini.net/keitai-l/archives/2001-01/0186.html / ee
 
array(string|array(string)) parse_phonenumbers(string in)
{
    int in_phonenumber = 0;
    int startpos = 0;
    array(string) res = ({});
    int last_num_pos;
    int num_digits;
    string number;
 
    void add_string(string s)
    {
        // Add to last element if last element is a string, otherwise add
        // a new element to the res array.
 
        if(sizeof(res) && stringp(res[-1]))
            res[-1] += s;
        else
            res += ({ s });
    };
 
    foreach(in; int pos; int char)
    {
        if(numbers[char])
        {
            last_num_pos = pos;
            num_digits++;
            if(!in_phonenumber)
            {
                num_digits = 1;
                in_phonenumber = 1;
                number = "";
                res += ({ in[startpos..pos-1] });
                startpos = pos;
            }
	    if(char >= '\xFF10')
  	        char -= '\xFF10' - '0';
            number += sprintf("%c", char);
        }
        else if(in_phonenumber && !whitespace[char])
        {
            if(num_digits >= 5)
                res += ({ ({ number, in[startpos..last_num_pos]  }) });
            else
                add_string( in[startpos..last_num_pos] );
            in_phonenumber = 0;
            number = 0;
            startpos = last_num_pos+1;
        }
    }
 
    if(number && num_digits >= 5 && num_digits < 31)
        res += ({ ({ number, in[startpos..] }) });
    else
        add_string( in[startpos..] );
 
    if(!sizeof(res))
        return ({ in });
 
    return res;
}
 
void test()
{
    werror("%O\n", parse_phonenumbers("04800, +4722852600, 61874955"));
    werror("%O\n", parse_phonenumbers("\251""2005 Google - S\366ker igenom 1 305 093 600 bilder"));
    werror("%O\n", parse_phonenumbers("foo bar gazonk"));    
    werror("%O\n", parse_phonenumbers("NO-0175 OSLO 1234324523 a 12"));
    werror("%O\n", parse_phonenumbers(" Telefon: 2340 7320 Foo 0708-343002 a"));
    werror("%O\n", parse_phonenumbers(" Telefon: +46 708343002"));    
    werror("%O\n", parse_phonenumbers("Telefon: 2340\240""7320\240\240Telefaks: 2340\240""7301"));
    werror("%O\n", parse_phonenumbers("\23006\30252\30332\30351\30373\30337\30313\23006"));
    werror("%O\n", parse_phonenumbers("\20441\113373\105161\72552\51767\30000\30306\30271\30310\30332\30374\30270\23005"));
    werror("%O\n", parse_phonenumbers("half-width numbers"));
    werror("%O\n", parse_phonenumbers("hyphen: 03-4444-4444"));
    werror("%O\n", parse_phonenumbers("full-width hyphen: 03\177415""4444\177415""4444"));
    werror("%O\n", parse_phonenumbers("hiragana no: 03\30156""4444\30156""4444"));
    werror("%O\n", parse_phonenumbers("hiragana no and hyphen: 03\30156""4444-4444"));
    werror("%O\n", parse_phonenumbers("hiragana no and full-width hyphen: 03\30156""4444\177415""4444"));
    werror("%O\n", parse_phonenumbers("full-width numbers"));
    werror("%O\n", parse_phonenumbers("hyphen: \177420\177423-\177424\177424\177424\177424-\177424\177424\177424\177424"));
    werror("%O\n", parse_phonenumbers("full-width hyphen: \177420\177423\177415\177424\177424\177424\177424\177415\177424\177424\177424\177424"));
    werror("%O\n", parse_phonenumbers("hiragana no: \177420\177423\30156\177424\177424\177424\177424\30156\177424\177424\177424\177424"));
    werror("%O\n", parse_phonenumbers("hiragana no and hyphen: \177420\177423\30156\177424\177424\177424\177424-\177424\177424\177424\177424"));
    werror("%O\n", parse_phonenumbers("hiragana no and full-width hyphen: \177420\177423\30156\177424\177424\177424\177424\177415\177424\177424\177424\177424"));
    werror("%O\n", parse_phonenumbers("parentheses: \177420\177423\177410\177424\177424\177424\177424\177411\177424\177424\177424\177424"));
}