Hi,
when testing TABLE-returning UDFs, I noticed that there are some issues with handling of None values from Python.
Is there any magic approach on how to return NULLS / None values from such a UDF?
Please see my examples below which are all not working ;)
Kind regards,
Daniel
# 1 Does not work - Henk is just repeated in the second row
drop function if exists python_table;
CREATE FUNCTION python_table()
RETURNS TABLE(name STRING, country STRING, age INTEGER)
LANGUAGE PYTHON {
result = dict()
result['name'] = ['Henk', None, 'Elizabeth']
result['country'] = ['NL', 'USA', 'UK']
result['age'] = [25, 30, 33]
return result
};
SELECT * FROM python_table() ;
# 2 Also does not work - Henk is just repeated in the second row
drop function if exists python_table;
CREATE FUNCTION python_table()
RETURNS TABLE(name STRING, country STRING, age INTEGER)
LANGUAGE PYTHON {
result = dict()
names = numpy.empty(3, dtype=object)
names[0] = 'Henk'
names[2] = 'Elizabeth'
result['name'] = names
result['country'] = ['NL', 'USA', 'UK']
result['age'] = [25, 30, 33]
return result
};
SELECT * FROM python_table() ;
# 3 Also does not work - Henk is just repeated in the second row
drop function if exists python_table;
CREATE FUNCTION python_table()
RETURNS TABLE(name STRING, country STRING, age INTEGER)
LANGUAGE PYTHON {
result = dict()
names = numpy.full_like(numpy.arange(3, dtype=object), None)
#names[0] = 'Henk'
names[2] = 'Elizabeth'
result['name'] = names
result['country'] = ['NL', 'USA', 'UK']
result['age'] = [25, 30, 33]
return result
};
SELECT * FROM python_table() ;
# 4 breaks if all values for a column are None
# "Error converting dict return value "name": An array of size 3 was returned, yet we expect a list of 1 columns. The result is invalid.. "
drop function if exists python_table;
CREATE FUNCTION python_table()
RETURNS TABLE(name STRING, country STRING, age INTEGER)
LANGUAGE PYTHON {
result = dict()
names = numpy.full_like(numpy.arange(3, dtype=object), None)
result['name'] = names
result['country'] = ['NL', 'USA', 'UK']
result['age'] = [25, 30, 33]
return result
};
SELECT * FROM python_table() ;