"""Collection of utilities to manipulate structured arrays.Most of these functions were initially implemented by John Hunter formatplotlib. They have been rewritten and extended for convenience."""importitertoolsimportnumpyasnpimportnumpy.maasmafromnumpyimportndarrayfromnumpy.maimportMaskedArrayfromnumpy.ma.mrecordsimportMaskedRecordsfromnumpy._core.overridesimportarray_function_dispatchfromnumpy._core.recordsimportrecarrayfromnumpy.lib._iotoolsimport_is_string_like_check_fill_value=np.ma.core._check_fill_value__all__=['append_fields','apply_along_fields','assign_fields_by_name','drop_fields','find_duplicates','flatten_descr','get_fieldstructure','get_names','get_names_flat','join_by','merge_arrays','rec_append_fields','rec_drop_fields','rec_join','recursive_fill_fields','rename_fields','repack_fields','require_fields','stack_arrays','structured_to_unstructured','unstructured_to_structured',]def_recursive_fill_fields_dispatcher(input,output):return(input,output)
[文档]@array_function_dispatch(_recursive_fill_fields_dispatcher)defrecursive_fill_fields(input,output):""" Fills fields from output with fields from input, with support for nested structures. Parameters ---------- input : ndarray Input array. output : ndarray Output array. Notes ----- * `output` should be at least the same size as `input` Examples -------- >>> from numpy.lib import recfunctions as rfn >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', np.int64), ('B', np.float64)]) >>> b = np.zeros((3,), dtype=a.dtype) >>> rfn.recursive_fill_fields(a, b) array([(1, 10.), (2, 20.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')]) """newdtype=output.dtypeforfieldinnewdtype.names:try:current=input[field]exceptValueError:continueifcurrent.dtype.namesisnotNone:recursive_fill_fields(current,output[field])else:output[field][:len(current)]=currentreturnoutput
def_get_fieldspec(dtype):""" Produce a list of name/dtype pairs corresponding to the dtype fields Similar to dtype.descr, but the second item of each tuple is a dtype, not a string. As a result, this handles subarray dtypes Can be passed to the dtype constructor to reconstruct the dtype, noting that this (deliberately) discards field offsets. Examples -------- >>> dt = np.dtype([(('a', 'A'), np.int64), ('b', np.double, 3)]) >>> dt.descr [(('a', 'A'), '<i8'), ('b', '<f8', (3,))] >>> _get_fieldspec(dt) [(('a', 'A'), dtype('int64')), ('b', dtype(('<f8', (3,))))] """ifdtype.namesisNone:# .descr returns a nameless field, so we should tooreturn[('',dtype)]else:fields=((name,dtype.fields[name])fornameindtype.names)# keep any titles, if presentreturn[(nameiflen(f)==2else(f[2],name),f[0])forname,finfields]
[文档]defget_names(adtype):""" Returns the field names of the input datatype as a tuple. Input datatype must have fields otherwise error is raised. Parameters ---------- adtype : dtype Input datatype Examples -------- >>> from numpy.lib import recfunctions as rfn >>> rfn.get_names(np.empty((1,), dtype=[('A', int)]).dtype) ('A',) >>> rfn.get_names(np.empty((1,), dtype=[('A',int), ('B', float)]).dtype) ('A', 'B') >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) >>> rfn.get_names(adtype) ('a', ('b', ('ba', 'bb'))) """listnames=[]names=adtype.namesfornameinnames:current=adtype[name]ifcurrent.namesisnotNone:listnames.append((name,tuple(get_names(current))))else:listnames.append(name)returntuple(listnames)
[文档]defget_names_flat(adtype):""" Returns the field names of the input datatype as a tuple. Input datatype must have fields otherwise error is raised. Nested structure are flattened beforehand. Parameters ---------- adtype : dtype Input datatype Examples -------- >>> from numpy.lib import recfunctions as rfn >>> rfn.get_names_flat(np.empty((1,), dtype=[('A', int)]).dtype) is None False >>> rfn.get_names_flat(np.empty((1,), dtype=[('A',int), ('B', str)]).dtype) ('A', 'B') >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) >>> rfn.get_names_flat(adtype) ('a', 'b', 'ba', 'bb') """listnames=[]names=adtype.namesfornameinnames:listnames.append(name)current=adtype[name]ifcurrent.namesisnotNone:listnames.extend(get_names_flat(current))returntuple(listnames)
def_zip_dtype(seqarrays,flatten=False):newdtype=[]ifflatten:forainseqarrays:newdtype.extend(flatten_descr(a.dtype))else:forainseqarrays:current=a.dtypeifcurrent.namesisnotNoneandlen(current.names)==1:# special case - dtypes of 1 field are flattenednewdtype.extend(_get_fieldspec(current))else:newdtype.append(('',current))returnnp.dtype(newdtype)def_zip_descr(seqarrays,flatten=False):""" Combine the dtype description of a series of arrays. Parameters ---------- seqarrays : sequence of arrays Sequence of arrays flatten : {boolean}, optional Whether to collapse nested descriptions. """return_zip_dtype(seqarrays,flatten=flatten).descr
[文档]defget_fieldstructure(adtype,lastname=None,parents=None,):""" Returns a dictionary with fields indexing lists of their parent fields. This function is used to simplify access to fields nested in other fields. Parameters ---------- adtype : np.dtype Input datatype lastname : optional Last processed field name (used internally during recursion). parents : dictionary Dictionary of parent fields (used interbally during recursion). Examples -------- >>> from numpy.lib import recfunctions as rfn >>> ndtype = np.dtype([('A', int), ... ('B', [('BA', int), ... ('BB', [('BBA', int), ('BBB', int)])])]) >>> rfn.get_fieldstructure(ndtype) ... # XXX: possible regression, order of BBA and BBB is swapped {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'], 'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']} """ifparentsisNone:parents={}names=adtype.namesfornameinnames:current=adtype[name]ifcurrent.namesisnotNone:iflastname:parents[name]=[lastname,]else:parents[name]=[]parents.update(get_fieldstructure(current,name,parents))else:lastparent=[_for_in(parents.get(lastname,[])or[])]iflastparent:lastparent.append(lastname)eliflastname:lastparent=[lastname,]parents[name]=lastparentor[]returnparents
def_izip_fields_flat(iterable):""" Returns an iterator of concatenated fields from a sequence of arrays, collapsing any nested structure. """forelementiniterable:ifisinstance(element,np.void):yield from_izip_fields_flat(tuple(element))else:yieldelementdef_izip_fields(iterable):""" Returns an iterator of concatenated fields from a sequence of arrays. """forelementiniterable:if(hasattr(element,'__iter__')andnotisinstance(element,str)):yield from_izip_fields(element)elifisinstance(element,np.void)andlen(tuple(element))==1:# this statement is the same from the previous expressionyield from_izip_fields(element)else:yieldelementdef_izip_records(seqarrays,fill_value=None,flatten=True):""" Returns an iterator of concatenated items from a sequence of arrays. Parameters ---------- seqarrays : sequence of arrays Sequence of arrays. fill_value : {None, integer} Value used to pad shorter iterables. flatten : {True, False}, Whether to """# Should we flatten the items, or just use a nested approachifflatten:zipfunc=_izip_fields_flatelse:zipfunc=_izip_fieldsfortupinitertools.zip_longest(*seqarrays,fillvalue=fill_value):yieldtuple(zipfunc(tup))def_fix_output(output,usemask=True,asrecarray=False):""" Private function: return a recarray, a ndarray, a MaskedArray or a MaskedRecords depending on the input parameters """ifnotisinstance(output,MaskedArray):usemask=Falseifusemask:ifasrecarray:output=output.view(MaskedRecords)else:output=ma.filled(output)ifasrecarray:output=output.view(recarray)returnoutputdef_fix_defaults(output,defaults=None):""" Update the fill_value and masked data of `output` from the default given in a dictionary defaults. """names=output.dtype.names(data,mask,fill_value)=(output.data,output.mask,output.fill_value)for(k,v)in(defaultsor{}).items():ifkinnames:fill_value[k]=vdata[k][mask[k]]=vreturnoutputdef_merge_arrays_dispatcher(seqarrays,fill_value=None,flatten=None,usemask=None,asrecarray=None):returnseqarrays
[文档]@array_function_dispatch(_merge_arrays_dispatcher)defmerge_arrays(seqarrays,fill_value=-1,flatten=False,usemask=False,asrecarray=False):""" Merge arrays field by field. Parameters ---------- seqarrays : sequence of ndarrays Sequence of arrays fill_value : {float}, optional Filling value used to pad missing data on the shorter arrays. flatten : {False, True}, optional Whether to collapse nested fields. usemask : {False, True}, optional Whether to return a masked array or not. asrecarray : {False, True}, optional Whether to return a recarray (MaskedRecords) or not. Examples -------- >>> from numpy.lib import recfunctions as rfn >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.]))) array([( 1, 10.), ( 2, 20.), (-1, 30.)], dtype=[('f0', '<i8'), ('f1', '<f8')]) >>> rfn.merge_arrays((np.array([1, 2], dtype=np.int64), ... np.array([10., 20., 30.])), usemask=False) array([(1, 10.0), (2, 20.0), (-1, 30.0)], dtype=[('f0', '<i8'), ('f1', '<f8')]) >>> rfn.merge_arrays((np.array([1, 2]).view([('a', np.int64)]), ... np.array([10., 20., 30.])), ... usemask=False, asrecarray=True) rec.array([( 1, 10.), ( 2, 20.), (-1, 30.)], dtype=[('a', '<i8'), ('f1', '<f8')]) Notes ----- * Without a mask, the missing value will be filled with something, depending on what its corresponding type: * ``-1`` for integers * ``-1.0`` for floating point numbers * ``'-'`` for characters * ``'-1'`` for strings * ``True`` for boolean values * XXX: I just obtained these values empirically """# Only one item in the input sequence ?if(len(seqarrays)==1):seqarrays=np.asanyarray(seqarrays[0])# Do we have a single ndarray as input ?ifisinstance(seqarrays,(ndarray,np.void)):seqdtype=seqarrays.dtype# Make sure we have named fieldsifseqdtype.namesisNone:seqdtype=np.dtype([('',seqdtype)])ifnotflattenor_zip_dtype((seqarrays,),flatten=True)==seqdtype:# Minimal processing needed: just make sure everything's a-okseqarrays=seqarrays.ravel()# Find what type of array we must returnifusemask:ifasrecarray:seqtype=MaskedRecordselse:seqtype=MaskedArrayelifasrecarray:seqtype=recarrayelse:seqtype=ndarrayreturnseqarrays.view(dtype=seqdtype,type=seqtype)else:seqarrays=(seqarrays,)else:# Make sure we have arrays in the input sequenceseqarrays=[np.asanyarray(_m)for_minseqarrays]# Find the sizes of the inputs and their maximumsizes=tuple(a.sizeforainseqarrays)maxlength=max(sizes)# Get the dtype of the output (flattening if needed)newdtype=_zip_dtype(seqarrays,flatten=flatten)# Initialize the sequences for data and maskseqdata=[]seqmask=[]# If we expect some kind of MaskedArray, make a special loop.ifusemask:for(a,n)inzip(seqarrays,sizes):nbmissing=(maxlength-n)# Get the data and maskdata=a.ravel().__array__()mask=ma.getmaskarray(a).ravel()# Get the filling value (if needed)ifnbmissing:fval=_check_fill_value(fill_value,a.dtype)ifisinstance(fval,(ndarray,np.void)):iflen(fval.dtype)==1:fval=fval.item()[0]fmsk=Trueelse:fval=np.array(fval,dtype=a.dtype,ndmin=1)fmsk=np.ones((1,),dtype=mask.dtype)else:fval=Nonefmsk=True# Store an iterator padding the input to the expected lengthseqdata.append(itertools.chain(data,[fval]*nbmissing))seqmask.append(itertools.chain(mask,[fmsk]*nbmissing))# Create an iterator for the datadata=tuple(_izip_records(seqdata,flatten=flatten))output=ma.array(np.fromiter(data,dtype=newdtype,count=maxlength),mask=list(_izip_records(seqmask,flatten=flatten)))ifasrecarray:output=output.view(MaskedRecords)else:# Same as before, without the mask we don't need...for(a,n)inzip(seqarrays,sizes):nbmissing=(maxlength-n)data=a.ravel().__array__()ifnbmissing:fval=_check_fill_value(fill_value,a.dtype)ifisinstance(fval,(ndarray,np.void)):iflen(fval.dtype)==1:fval=fval.item()[0]else:fval=np.array(fval,dtype=a.dtype,ndmin=1)else:fval=Noneseqdata.append(itertools.chain(data,[fval]*nbmissing))output=np.fromiter(tuple(_izip_records(seqdata,flatten=flatten)),dtype=newdtype,count=maxlength)ifasrecarray:output=output.view(recarray)# And we're done...returnoutput
[文档]@array_function_dispatch(_drop_fields_dispatcher)defdrop_fields(base,drop_names,usemask=True,asrecarray=False):""" Return a new array with fields in `drop_names` dropped. Nested fields are supported. .. versionchanged:: 1.18.0 `drop_fields` returns an array with 0 fields if all fields are dropped, rather than returning ``None`` as it did previously. Parameters ---------- base : array Input array drop_names : string or sequence String or sequence of strings corresponding to the names of the fields to drop. usemask : {False, True}, optional Whether to return a masked array or not. asrecarray : string or sequence, optional Whether to return a recarray or a mrecarray (`asrecarray=True`) or a plain ndarray or masked array with flexible dtype. The default is False. Examples -------- >>> from numpy.lib import recfunctions as rfn >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], ... dtype=[('a', np.int64), ('b', [('ba', np.double), ('bb', np.int64)])]) >>> rfn.drop_fields(a, 'a') array([((2., 3),), ((5., 6),)], dtype=[('b', [('ba', '<f8'), ('bb', '<i8')])]) >>> rfn.drop_fields(a, 'ba') array([(1, (3,)), (4, (6,))], dtype=[('a', '<i8'), ('b', [('bb', '<i8')])]) >>> rfn.drop_fields(a, ['ba', 'bb']) array([(1,), (4,)], dtype=[('a', '<i8')]) """if_is_string_like(drop_names):drop_names=[drop_names]else:drop_names=set(drop_names)def_drop_descr(ndtype,drop_names):names=ndtype.namesnewdtype=[]fornameinnames:current=ndtype[name]ifnameindrop_names:continueifcurrent.namesisnotNone:descr=_drop_descr(current,drop_names)ifdescr:newdtype.append((name,descr))else:newdtype.append((name,current))returnnewdtypenewdtype=_drop_descr(base.dtype,drop_names)output=np.empty(base.shape,dtype=newdtype)output=recursive_fill_fields(base,output)return_fix_output(output,usemask=usemask,asrecarray=asrecarray)
def_keep_fields(base,keep_names,usemask=True,asrecarray=False):""" Return a new array keeping only the fields in `keep_names`, and preserving the order of those fields. Parameters ---------- base : array Input array keep_names : string or sequence String or sequence of strings corresponding to the names of the fields to keep. Order of the names will be preserved. usemask : {False, True}, optional Whether to return a masked array or not. asrecarray : string or sequence, optional Whether to return a recarray or a mrecarray (`asrecarray=True`) or a plain ndarray or masked array with flexible dtype. The default is False. """newdtype=[(n,base.dtype[n])forninkeep_names]output=np.empty(base.shape,dtype=newdtype)output=recursive_fill_fields(base,output)return_fix_output(output,usemask=usemask,asrecarray=asrecarray)def_rec_drop_fields_dispatcher(base,drop_names):return(base,)
[文档]@array_function_dispatch(_rec_drop_fields_dispatcher)defrec_drop_fields(base,drop_names):""" Returns a new numpy.recarray with fields in `drop_names` dropped. """returndrop_fields(base,drop_names,usemask=False,asrecarray=True)
[文档]@array_function_dispatch(_append_fields_dispatcher)defappend_fields(base,names,data,dtypes=None,fill_value=-1,usemask=True,asrecarray=False):""" Add new fields to an existing array. The names of the fields are given with the `names` arguments, the corresponding values with the `data` arguments. If a single field is appended, `names`, `data` and `dtypes` do not have to be lists but just values. Parameters ---------- base : array Input array to extend. names : string, sequence String or sequence of strings corresponding to the names of the new fields. data : array or sequence of arrays Array or sequence of arrays storing the fields to add to the base. dtypes : sequence of datatypes, optional Datatype or sequence of datatypes. If None, the datatypes are estimated from the `data`. fill_value : {float}, optional Filling value used to pad missing data on the shorter arrays. usemask : {False, True}, optional Whether to return a masked array or not. asrecarray : {False, True}, optional Whether to return a recarray (MaskedRecords) or not. """# Check the namesifisinstance(names,(tuple,list)):iflen(names)!=len(data):msg="The number of arrays does not match the number of names"raiseValueError(msg)elifisinstance(names,str):names=[names,]data=[data,]#ifdtypesisNone:data=[np.array(a,copy=None,subok=True)foraindata]data=[a.view([(name,a.dtype)])for(name,a)inzip(names,data)]else:ifnotisinstance(dtypes,(tuple,list)):dtypes=[dtypes,]iflen(data)!=len(dtypes):iflen(dtypes)==1:dtypes=dtypes*len(data)else:msg="The dtypes argument must be None, a dtype, or a list."raiseValueError(msg)data=[np.array(a,copy=None,subok=True,dtype=d).view([(n,d)])for(a,n,d)inzip(data,names,dtypes)]#base=merge_arrays(base,usemask=usemask,fill_value=fill_value)iflen(data)>1:data=merge_arrays(data,flatten=True,usemask=usemask,fill_value=fill_value)else:data=data.pop()#output=ma.masked_all(max(len(base),len(data)),dtype=_get_fieldspec(base.dtype)+_get_fieldspec(data.dtype))output=recursive_fill_fields(base,output)output=recursive_fill_fields(data,output)#return_fix_output(output,usemask=usemask,asrecarray=asrecarray)
[文档]@array_function_dispatch(_rec_append_fields_dispatcher)defrec_append_fields(base,names,data,dtypes=None):""" Add new fields to an existing array. The names of the fields are given with the `names` arguments, the corresponding values with the `data` arguments. If a single field is appended, `names`, `data` and `dtypes` do not have to be lists but just values. Parameters ---------- base : array Input array to extend. names : string, sequence String or sequence of strings corresponding to the names of the new fields. data : array or sequence of arrays Array or sequence of arrays storing the fields to add to the base. dtypes : sequence of datatypes, optional Datatype or sequence of datatypes. If None, the datatypes are estimated from the `data`. See Also -------- append_fields Returns ------- appended_array : np.recarray """returnappend_fields(base,names,data=data,dtypes=dtypes,asrecarray=True,usemask=False)
[文档]@array_function_dispatch(_repack_fields_dispatcher)defrepack_fields(a,align=False,recurse=False):""" Re-pack the fields of a structured array or dtype in memory. The memory layout of structured datatypes allows fields at arbitrary byte offsets. This means the fields can be separated by padding bytes, their offsets can be non-monotonically increasing, and they can overlap. This method removes any overlaps and reorders the fields in memory so they have increasing byte offsets, and adds or removes padding bytes depending on the `align` option, which behaves like the `align` option to `numpy.dtype`. If `align=False`, this method produces a "packed" memory layout in which each field starts at the byte the previous field ended, and any padding bytes are removed. If `align=True`, this methods produces an "aligned" memory layout in which each field's offset is a multiple of its alignment, and the total itemsize is a multiple of the largest alignment, by adding padding bytes as needed. Parameters ---------- a : ndarray or dtype array or dtype for which to repack the fields. align : boolean If true, use an "aligned" memory layout, otherwise use a "packed" layout. recurse : boolean If True, also repack nested structures. Returns ------- repacked : ndarray or dtype Copy of `a` with fields repacked, or `a` itself if no repacking was needed. Examples -------- >>> from numpy.lib import recfunctions as rfn >>> def print_offsets(d): ... print("offsets:", [d.fields[name][1] for name in d.names]) ... print("itemsize:", d.itemsize) ... >>> dt = np.dtype('u1, <i8, <f8', align=True) >>> dt dtype({'names': ['f0', 'f1', 'f2'], 'formats': ['u1', '<i8', '<f8'], \'offsets': [0, 8, 16], 'itemsize': 24}, align=True) >>> print_offsets(dt) offsets: [0, 8, 16] itemsize: 24 >>> packed_dt = rfn.repack_fields(dt) >>> packed_dt dtype([('f0', 'u1'), ('f1', '<i8'), ('f2', '<f8')]) >>> print_offsets(packed_dt) offsets: [0, 1, 9] itemsize: 17 """ifnotisinstance(a,np.dtype):dt=repack_fields(a.dtype,align=align,recurse=recurse)returna.astype(dt,copy=False)ifa.namesisNone:returnafieldinfo=[]fornameina.names:tup=a.fields[name]ifrecurse:fmt=repack_fields(tup[0],align=align,recurse=True)else:fmt=tup[0]iflen(tup)==3:name=(tup[2],name)fieldinfo.append((name,fmt))dt=np.dtype(fieldinfo,align=align)returnnp.dtype((a.type,dt))
def_get_fields_and_offsets(dt,offset=0):""" Returns a flat list of (dtype, count, offset) tuples of all the scalar fields in the dtype "dt", including nested fields, in left to right order. """# counts up elements in subarrays, including nested subarrays, and returns# base dtype and countdefcount_elem(dt):count=1whiledt.shape!=():forsizeindt.shape:count*=sizedt=dt.basereturndt,countfields=[]fornameindt.names:field=dt.fields[name]f_dt,f_offset=field[0],field[1]f_dt,n=count_elem(f_dt)iff_dt.namesisNone:fields.append((np.dtype((f_dt,(n,))),n,f_offset+offset))else:subfields=_get_fields_and_offsets(f_dt,f_offset+offset)size=f_dt.itemsizeforiinrange(n):ifi==0:# optimization: avoid list comprehension if no subarrayfields.extend(subfields)else:fields.extend([(d,c,o+i*size)ford,c,oinsubfields])returnfieldsdef_common_stride(offsets,counts,itemsize):""" Returns the stride between the fields, or None if the stride is not constant. The values in "counts" designate the lengths of subarrays. Subarrays are treated as many contiguous fields, with always positive stride. """iflen(offsets)<=1:returnitemsizenegative=offsets[1]<offsets[0]# negative strideifnegative:# reverse, so offsets will be ascendingit=zip(reversed(offsets),reversed(counts))else:it=zip(offsets,counts)prev_offset=Nonestride=Noneforoffset,countinit:ifcount!=1:# subarray: always c-contiguousifnegative:returnNone# subarrays can never have a negative strideifstrideisNone:stride=itemsizeifstride!=itemsize:returnNoneend_offset=offset+(count-1)*itemsizeelse:end_offset=offsetifprev_offsetisnotNone:new_stride=offset-prev_offsetifstrideisNone:stride=new_strideifstride!=new_stride:returnNoneprev_offset=end_offsetifnegative:return-stridereturnstridedef_structured_to_unstructured_dispatcher(arr,dtype=None,copy=None,casting=None):return(arr,)
[文档]@array_function_dispatch(_structured_to_unstructured_dispatcher)defstructured_to_unstructured(arr,dtype=None,copy=False,casting='unsafe'):""" Converts an n-D structured array into an (n+1)-D unstructured array. The new array will have a new last dimension equal in size to the number of field-elements of the input array. If not supplied, the output datatype is determined from the numpy type promotion rules applied to all the field datatypes. Nested fields, as well as each element of any subarray fields, all count as a single field-elements. Parameters ---------- arr : ndarray Structured array or dtype to convert. Cannot contain object datatype. dtype : dtype, optional The dtype of the output unstructured array. copy : bool, optional If true, always return a copy. If false, a view is returned if possible, such as when the `dtype` and strides of the fields are suitable and the array subtype is one of `numpy.ndarray`, `numpy.recarray` or `numpy.memmap`. .. versionchanged:: 1.25.0 A view can now be returned if the fields are separated by a uniform stride. casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional See casting argument of `numpy.ndarray.astype`. Controls what kind of data casting may occur. Returns ------- unstructured : ndarray Unstructured array with one more dimension. Examples -------- >>> from numpy.lib import recfunctions as rfn >>> a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) >>> a array([(0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.])], dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))]) >>> rfn.structured_to_unstructured(a) array([[0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.]]) >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) >>> np.mean(rfn.structured_to_unstructured(b[['x', 'z']]), axis=-1) array([ 3. , 5.5, 9. , 11. ]) """ifarr.dtype.namesisNone:raiseValueError('arr must be a structured array')fields=_get_fields_and_offsets(arr.dtype)n_fields=len(fields)ifn_fields==0anddtypeisNone:raiseValueError("arr has no fields. Unable to guess dtype")elifn_fields==0:# too many bugs elsewhere for this to work nowraiseNotImplementedError("arr with no fields is not supported")dts,counts,offsets=zip(*fields)names=['f{}'.format(n)forninrange(n_fields)]ifdtypeisNone:out_dtype=np.result_type(*[dt.basefordtindts])else:out_dtype=np.dtype(dtype)# Use a series of views and casts to convert to an unstructured array:# first view using flattened fields (doesn't work for object arrays)# Note: dts may include a shape for subarraysflattened_fields=np.dtype({'names':names,'formats':dts,'offsets':offsets,'itemsize':arr.dtype.itemsize})arr=arr.view(flattened_fields)# we only allow a few types to be unstructured by manipulating the# strides, because we know it won't work with, for example, np.matrix nor# np.ma.MaskedArray.can_view=type(arr)in(np.ndarray,np.recarray,np.memmap)if(notcopy)andcan_viewandall(dt.base==out_dtypefordtindts):# all elements have the right dtype already; if they have a common# stride, we can just return a viewcommon_stride=_common_stride(offsets,counts,out_dtype.itemsize)ifcommon_strideisnotNone:wrap=arr.__array_wrap__new_shape=arr.shape+(sum(counts),out_dtype.itemsize)new_strides=arr.strides+(abs(common_stride),1)arr=arr[...,np.newaxis].view(np.uint8)# view as bytesarr=arr[...,min(offsets):]# remove the leading unused dataarr=np.lib.stride_tricks.as_strided(arr,new_shape,new_strides,subok=True)# cast and drop the last dimension againarr=arr.view(out_dtype)[...,0]ifcommon_stride<0:arr=arr[...,::-1]# reverse, if the stride was negativeiftype(arr)isnottype(wrap.__self__):# Some types (e.g. recarray) turn into an ndarray along the# way, so we have to wrap it again in order to match the# behavior with copy=True.arr=wrap(arr)returnarr# next cast to a packed format with all fields converted to new dtypepacked_fields=np.dtype({'names':names,'formats':[(out_dtype,dt.shape)fordtindts]})arr=arr.astype(packed_fields,copy=copy,casting=casting)# finally is it safe to view the packed fields as the unstructured typereturnarr.view((out_dtype,(sum(counts),)))
[文档]@array_function_dispatch(_unstructured_to_structured_dispatcher)defunstructured_to_structured(arr,dtype=None,names=None,align=False,copy=False,casting='unsafe'):""" Converts an n-D unstructured array into an (n-1)-D structured array. The last dimension of the input array is converted into a structure, with number of field-elements equal to the size of the last dimension of the input array. By default all output fields have the input array's dtype, but an output structured dtype with an equal number of fields-elements can be supplied instead. Nested fields, as well as each element of any subarray fields, all count towards the number of field-elements. Parameters ---------- arr : ndarray Unstructured array or dtype to convert. dtype : dtype, optional The structured dtype of the output array names : list of strings, optional If dtype is not supplied, this specifies the field names for the output dtype, in order. The field dtypes will be the same as the input array. align : boolean, optional Whether to create an aligned memory layout. copy : bool, optional See copy argument to `numpy.ndarray.astype`. If true, always return a copy. If false, and `dtype` requirements are satisfied, a view is returned. casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional See casting argument of `numpy.ndarray.astype`. Controls what kind of data casting may occur. Returns ------- structured : ndarray Structured array with fewer dimensions. Examples -------- >>> from numpy.lib import recfunctions as rfn >>> dt = np.dtype([('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) >>> a = np.arange(20).reshape((4,5)) >>> a array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19]]) >>> rfn.unstructured_to_structured(a, dt) array([( 0, ( 1., 2), [ 3., 4.]), ( 5, ( 6., 7), [ 8., 9.]), (10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])], dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))]) """ifarr.shape==():raiseValueError('arr must have at least one dimension')n_elem=arr.shape[-1]ifn_elem==0:# too many bugs elsewhere for this to work nowraiseNotImplementedError("last axis with size 0 is not supported")ifdtypeisNone:ifnamesisNone:names=['f{}'.format(n)forninrange(n_elem)]out_dtype=np.dtype([(n,arr.dtype)forninnames],align=align)fields=_get_fields_and_offsets(out_dtype)dts,counts,offsets=zip(*fields)else:ifnamesisnotNone:raiseValueError("don't supply both dtype and names")# if dtype is the args of np.dtype, construct itdtype=np.dtype(dtype)# sanity check of the input dtypefields=_get_fields_and_offsets(dtype)iflen(fields)==0:dts,counts,offsets=[],[],[]else:dts,counts,offsets=zip(*fields)ifn_elem!=sum(counts):raiseValueError('The length of the last dimension of arr must ''be equal to the number of fields in dtype')out_dtype=dtypeifalignandnotout_dtype.isalignedstruct:raiseValueError("align was True but dtype is not aligned")names=['f{}'.format(n)forninrange(len(fields))]# Use a series of views and casts to convert to a structured array:# first view as a packed structured array of one dtypepacked_fields=np.dtype({'names':names,'formats':[(arr.dtype,dt.shape)fordtindts]})arr=np.ascontiguousarray(arr).view(packed_fields)# next cast to an unpacked but flattened format with varied dtypesflattened_fields=np.dtype({'names':names,'formats':dts,'offsets':offsets,'itemsize':out_dtype.itemsize})arr=arr.astype(flattened_fields,copy=copy,casting=casting)# finally view as the final nested dtype and remove the last axisreturnarr.view(out_dtype)[...,0]
[文档]@array_function_dispatch(_apply_along_fields_dispatcher)defapply_along_fields(func,arr):""" Apply function 'func' as a reduction across fields of a structured array. This is similar to `numpy.apply_along_axis`, but treats the fields of a structured array as an extra axis. The fields are all first cast to a common type following the type-promotion rules from `numpy.result_type` applied to the field's dtypes. Parameters ---------- func : function Function to apply on the "field" dimension. This function must support an `axis` argument, like `numpy.mean`, `numpy.sum`, etc. arr : ndarray Structured array for which to apply func. Returns ------- out : ndarray Result of the recution operation Examples -------- >>> from numpy.lib import recfunctions as rfn >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) >>> rfn.apply_along_fields(np.mean, b) array([ 2.66666667, 5.33333333, 8.66666667, 11. ]) >>> rfn.apply_along_fields(np.mean, b[['x', 'z']]) array([ 3. , 5.5, 9. , 11. ]) """ifarr.dtype.namesisNone:raiseValueError('arr must be a structured array')uarr=structured_to_unstructured(arr)returnfunc(uarr,axis=-1)
# works and avoids axis requirement, but very, very slow:#return np.apply_along_axis(func, -1, uarr)def_assign_fields_by_name_dispatcher(dst,src,zero_unassigned=None):returndst,src
[文档]@array_function_dispatch(_assign_fields_by_name_dispatcher)defassign_fields_by_name(dst,src,zero_unassigned=True):""" Assigns values from one structured array to another by field name. Normally in numpy >= 1.14, assignment of one structured array to another copies fields "by position", meaning that the first field from the src is copied to the first field of the dst, and so on, regardless of field name. This function instead copies "by field name", such that fields in the dst are assigned from the identically named field in the src. This applies recursively for nested structures. This is how structure assignment worked in numpy >= 1.6 to <= 1.13. Parameters ---------- dst : ndarray src : ndarray The source and destination arrays during assignment. zero_unassigned : bool, optional If True, fields in the dst for which there was no matching field in the src are filled with the value 0 (zero). This was the behavior of numpy <= 1.13. If False, those fields are not modified. """ifdst.dtype.namesisNone:dst[...]=srcreturnfornameindst.dtype.names:ifnamenotinsrc.dtype.names:ifzero_unassigned:dst[name]=0else:assign_fields_by_name(dst[name],src[name],zero_unassigned)
[文档]@array_function_dispatch(_require_fields_dispatcher)defrequire_fields(array,required_dtype):""" Casts a structured array to a new dtype using assignment by field-name. This function assigns from the old to the new array by name, so the value of a field in the output array is the value of the field with the same name in the source array. This has the effect of creating a new ndarray containing only the fields "required" by the required_dtype. If a field name in the required_dtype does not exist in the input array, that field is created and set to 0 in the output array. Parameters ---------- a : ndarray array to cast required_dtype : dtype datatype for output array Returns ------- out : ndarray array with the new dtype, with field values copied from the fields in the input array with the same name Examples -------- >>> from numpy.lib import recfunctions as rfn >>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')]) >>> rfn.require_fields(a, [('b', 'f4'), ('c', 'u1')]) array([(1., 1), (1., 1), (1., 1), (1., 1)], dtype=[('b', '<f4'), ('c', 'u1')]) >>> rfn.require_fields(a, [('b', 'f4'), ('newf', 'u1')]) array([(1., 0), (1., 0), (1., 0), (1., 0)], dtype=[('b', '<f4'), ('newf', 'u1')]) """out=np.empty(array.shape,dtype=required_dtype)assign_fields_by_name(out,array)returnout
[文档]@array_function_dispatch(_find_duplicates_dispatcher)deffind_duplicates(a,key=None,ignoremask=True,return_index=False):""" Find the duplicates in a structured array along a given key Parameters ---------- a : array-like Input array key : {string, None}, optional Name of the fields along which to check the duplicates. If None, the search is performed by records ignoremask : {True, False}, optional Whether masked data should be discarded or considered as duplicates. return_index : {False, True}, optional Whether to return the indices of the duplicated values. Examples -------- >>> from numpy.lib import recfunctions as rfn >>> ndtype = [('a', int)] >>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3], ... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype) >>> rfn.find_duplicates(a, ignoremask=True, return_index=True) (masked_array(data=[(1,), (1,), (2,), (2,)], mask=[(False,), (False,), (False,), (False,)], fill_value=(999999,), dtype=[('a', '<i8')]), array([0, 1, 3, 4])) """a=np.asanyarray(a).ravel()# Get a dictionary of fieldsfields=get_fieldstructure(a.dtype)# Get the sorting data (by selecting the corresponding field)base=aifkey:forfinfields[key]:base=base[f]base=base[key]# Get the sorting indices and the sorted datasortidx=base.argsort()sortedbase=base[sortidx]sorteddata=sortedbase.filled()# Compare the sorting dataflag=(sorteddata[:-1]==sorteddata[1:])# If masked data must be ignored, set the flag to false where neededifignoremask:sortedmask=sortedbase.recordmaskflag[sortedmask[1:]]=Falseflag=np.concatenate(([False],flag))# We need to take the point on the left as well (else we're missing it)flag[:-1]=flag[:-1]+flag[1:]duplicates=a[sortidx][flag]ifreturn_index:return(duplicates,sortidx[flag])else:returnduplicates
[文档]@array_function_dispatch(_join_by_dispatcher)defjoin_by(key,r1,r2,jointype='inner',r1postfix='1',r2postfix='2',defaults=None,usemask=True,asrecarray=False):""" Join arrays `r1` and `r2` on key `key`. The key should be either a string or a sequence of string corresponding to the fields used to join the array. An exception is raised if the `key` field cannot be found in the two input arrays. Neither `r1` nor `r2` should have any duplicates along `key`: the presence of duplicates will make the output quite unreliable. Note that duplicates are not looked for by the algorithm. Parameters ---------- key : {string, sequence} A string or a sequence of strings corresponding to the fields used for comparison. r1, r2 : arrays Structured arrays. jointype : {'inner', 'outer', 'leftouter'}, optional If 'inner', returns the elements common to both r1 and r2. If 'outer', returns the common elements as well as the elements of r1 not in r2 and the elements of not in r2. If 'leftouter', returns the common elements and the elements of r1 not in r2. r1postfix : string, optional String appended to the names of the fields of r1 that are present in r2 but absent of the key. r2postfix : string, optional String appended to the names of the fields of r2 that are present in r1 but absent of the key. defaults : {dictionary}, optional Dictionary mapping field names to the corresponding default values. usemask : {True, False}, optional Whether to return a MaskedArray (or MaskedRecords is `asrecarray==True`) or a ndarray. asrecarray : {False, True}, optional Whether to return a recarray (or MaskedRecords if `usemask==True`) or just a flexible-type ndarray. Notes ----- * The output is sorted along the key. * A temporary array is formed by dropping the fields not in the key for the two arrays and concatenating the result. This array is then sorted, and the common entries selected. The output is constructed by filling the fields with the selected entries. Matching is not preserved if there are some duplicates... """# Check jointypeifjointypenotin('inner','outer','leftouter'):raiseValueError("The 'jointype' argument should be in 'inner', ""'outer' or 'leftouter' (got '%s' instead)"%jointype)# If we have a single key, put it in a tupleifisinstance(key,str):key=(key,)# Check the keysiflen(set(key))!=len(key):dup=next(xforn,xinenumerate(key)ifxinkey[n+1:])raiseValueError("duplicate join key %r"%dup)fornameinkey:ifnamenotinr1.dtype.names:raiseValueError('r1 does not have key field %r'%name)ifnamenotinr2.dtype.names:raiseValueError('r2 does not have key field %r'%name)# Make sure we work with ravelled arraysr1=r1.ravel()r2=r2.ravel()# Fixme: nb2 below is never used. Commenting out for pyflakes.# (nb1, nb2) = (len(r1), len(r2))nb1=len(r1)(r1names,r2names)=(r1.dtype.names,r2.dtype.names)# Check the names for collisioncollisions=(set(r1names)&set(r2names))-set(key)ifcollisionsandnot(r1postfixorr2postfix):msg="r1 and r2 contain common names, r1postfix and r2postfix "msg+="can't both be empty"raiseValueError(msg)# Make temporary arrays of just the keys# (use order of keys in `r1` for back-compatibility)key1=[nforninr1namesifninkey]r1k=_keep_fields(r1,key1)r2k=_keep_fields(r2,key1)# Concatenate the two arrays for comparisonaux=ma.concatenate((r1k,r2k))idx_sort=aux.argsort(order=key)aux=aux[idx_sort]## Get the common keysflag_in=ma.concatenate(([False],aux[1:]==aux[:-1]))flag_in[:-1]=flag_in[1:]+flag_in[:-1]idx_in=idx_sort[flag_in]idx_1=idx_in[(idx_in<nb1)]idx_2=idx_in[(idx_in>=nb1)]-nb1(r1cmn,r2cmn)=(len(idx_1),len(idx_2))ifjointype=='inner':(r1spc,r2spc)=(0,0)elifjointype=='outer':idx_out=idx_sort[~flag_in]idx_1=np.concatenate((idx_1,idx_out[(idx_out<nb1)]))idx_2=np.concatenate((idx_2,idx_out[(idx_out>=nb1)]-nb1))(r1spc,r2spc)=(len(idx_1)-r1cmn,len(idx_2)-r2cmn)elifjointype=='leftouter':idx_out=idx_sort[~flag_in]idx_1=np.concatenate((idx_1,idx_out[(idx_out<nb1)]))(r1spc,r2spc)=(len(idx_1)-r1cmn,0)# Select the entries from each input(s1,s2)=(r1[idx_1],r2[idx_2])## Build the new description of the output array .......# Start with the key fieldsndtype=_get_fieldspec(r1k.dtype)# Add the fields from r1forfname,fdtypein_get_fieldspec(r1.dtype):iffnamenotinkey:ndtype.append((fname,fdtype))# Add the fields from r2forfname,fdtypein_get_fieldspec(r2.dtype):# Have we seen the current name already ?# we need to rebuild this list every timenames=list(nameforname,dtypeinndtype)try:nameidx=names.index(fname)exceptValueError:#... we haven't: just add the description to the current listndtype.append((fname,fdtype))else:# collision_,cdtype=ndtype[nameidx]iffnameinkey:# The current field is part of the key: take the largest dtypendtype[nameidx]=(fname,max(fdtype,cdtype))else:# The current field is not part of the key: add the suffixes,# and place the new field adjacent to the old onendtype[nameidx:nameidx+1]=[(fname+r1postfix,cdtype),(fname+r2postfix,fdtype)]# Rebuild a dtype from the new fieldsndtype=np.dtype(ndtype)# Find the largest nb of common fields :# r1cmn and r2cmn should be equal, but...cmn=max(r1cmn,r2cmn)# Construct an empty arrayoutput=ma.masked_all((cmn+r1spc+r2spc,),dtype=ndtype)names=output.dtype.namesforfinr1names:selected=s1[f]iffnotinnamesor(finr2namesandnotr2postfixandfnotinkey):f+=r1postfixcurrent=output[f]current[:r1cmn]=selected[:r1cmn]ifjointypein('outer','leftouter'):current[cmn:cmn+r1spc]=selected[r1cmn:]forfinr2names:selected=s2[f]iffnotinnamesor(finr1namesandnotr1postfixandfnotinkey):f+=r2postfixcurrent=output[f]current[:r2cmn]=selected[:r2cmn]if(jointype=='outer')andr2spc:current[-r2spc:]=selected[r2cmn:]# Sort and finalize the outputoutput.sort(order=key)kwargs=dict(usemask=usemask,asrecarray=asrecarray)return_fix_output(_fix_defaults(output,defaults),**kwargs)
[文档]@array_function_dispatch(_rec_join_dispatcher)defrec_join(key,r1,r2,jointype='inner',r1postfix='1',r2postfix='2',defaults=None):""" Join arrays `r1` and `r2` on keys. Alternative to join_by, that always returns a np.recarray. See Also -------- join_by : equivalent function """kwargs=dict(jointype=jointype,r1postfix=r1postfix,r2postfix=r2postfix,defaults=defaults,usemask=False,asrecarray=True)returnjoin_by(key,r1,r2,**kwargs)