# This Source Code Form is subject to the terms of the Mozilla Public# License, v. 2.0. If a copy of the MPL was not distributed with this# file, You can obtain one at https://mozilla.org/MPL/2.0/.from__future__importannotationsfromcollections.abcimportSequencefromtypingimportOptional,UnionimportnumpyasnpfromsislimportGeometryfromsisl._internalimportset_modulefromsisl.typingimportAtomsIndexfromsisl.utilsimportsize_to_elementsfrom.import_operationsfrom._neighborlistsimport(FullNeighborList,PartialNeighborList,PointsNeighborList,UniqueNeighborList,)__all__=["NeighborFinder",]@set_module("sisl.geom")classNeighborFinder:r"""Fast and linear scaling finding of neighbors. Once this class is instantiated, a table is built. Then, the neighbor finder can be queried as many times as wished as long as the geometry doesn't change. Note that the radius (`R`) is used to build the table. Therefore, if one wants to look for neighbors using a different R, one needs to create another finder or call `setup`. Parameters ---------- geometry: Geometry the geometry to find neighbors in R: float or array-like of shape (geometry.na), optional The radius to consider two atoms neighbors. If it is a single float, the same radius is used for all atoms. If it is an array, it should contain the radius for each atom. If not provided, an array is constructed, where the radius for each atom is its maxR. overlap: bool, optional If `True`, two atoms are considered neighbors if their spheres overlap. If `False`, two atoms are considered neighbors if the second atom is within the sphere of the first atom. Note that this implies that atom :math:`I` might be atom :math:`J`'s neighbor while the opposite is not true. If not provided, it will be `True` if `R` is an array and `False` if it is a single float. bin_size : float or tuple of float, optional the factor for the radius to determine how large the bins are, optionally along each lattice vector. It can minimally be 2, meaning that the maximum radius to consider is twice the radius considered. For larger values, more atoms will be in each bin (and thus fewer bins). Hence, this value can be used to fine-tune the memory requirement by decreasing number of bins, at the cost of a bit more run-time searching bins. Examples -------- You need to initialize it with a geometry and the cutoff radius. Then, you can call the ``find_neighbors``, ``find_unique_pairs`` or ``find_close`` methods to query the finder for neighbors. These methods will return a neighbors list (e.g ``find_neighbors`` returns a ``FullNeighborList``). Here is an example of how to find all neighbors on a graphene structure with a vacancy: .. code-block:: python import sisl # Build a graphene supercell with a vacancy graphene = sisl.geom.graphene().tile(2, 0).tile(2, 1) graphene = graphene.remove(2).translate2uc() # Initialize a finder for neighbors that are within 1.5 Angstrom finder = sisl.geom.NeighborFinder(graphene, R=1.5) # Find all neighbors neighbors = finder.find_neighbors() # You can get the neighbor pairs (I,J) from the I and J attributes # The supercell index of atom J is in the isc attribute. print("ATOM I SHAPE:", neighbors.I.shape) print("ATOM J (NEIGHBOR) SHAPE:", neighbors.J.shape) print("NEIGHBORS ISC:", neighbors.isc.shape) # You can also loop through atoms to get their neighbors for at_neighs in neighbors: print() print(f"NEIGHBORS OF ATOM {at_neighs.atom} ({at_neighs.n_neighbors} neighbors): ") print("J", at_neighs.J) print("ISC", at_neighs.isc) # Or get the neighbors of a particular atom: neighbors[0].J See Also -------- FullNeighborList, UniqueNeighborList, PartialNeighborList, PointsNeighborList: The neighbor lists returned by this class when neighbors are requested. """#: Memory control of the findermemory:tuple[str,float]=("200MB",1.5)#: Number of bins along each cell directionnbins:tuple[int,int,int]#: Total number of binstotal_nbins:int#: The geometry associated with the findergeometry:Geometry# Geometry actually used for binning. Can be the provided geometry# or a tiled geometry if the search radius is too big (compared to the lattice size)._bins_geometry:Geometry#: The cutoff radius for each atom in the geometry.R:np.ndarray_aux_R:np.ndarray_overlap:bool# Data structure_list:np.ndarray# (natoms, )_heads:np.ndarray# (total_nbins, )_counts:np.ndarray# (total_nbins, )def__init__(self,geometry:Geometry,R:Optional[Union[float,np.ndarray]]=None,overlap:bool=False,bin_size:Union[float,tuple[float,float,float]]=2,):self.setup(geometry,R=R,overlap=overlap,bin_size=bin_size)
[docs]defsetup(self,geometry:Optional[Geometry]=None,R:Optional[Union[float,np.ndarray]]=None,overlap:bool=None,bin_size:Union[float,tuple[float,float,float]]=2,):r"""Prepares everything for neighbor finding. **You don't need to call this method after initializing the finder**, this is called internally already! This method might be used to reset the finder with new parameters. Parameters ---------- geometry: the geometry to find neighbors in. If not provided, the current geometry is kept. R: float or array-like of shape (geometry.na), optional The radius to consider two atoms neighbors. If it is a single float, the same radius is used for all atoms. If it is an array, it should contain the radius for each atom. If not provided, an array is constructed, where the radius for each atom is its maxR. Note that negative R values are allowed overlap: If `True`, two atoms are considered neighbors if their spheres overlap. If `False`, two atoms are considered neighbors if the second atom is within the sphere of the first atom. Note that this implies that atom :math:`I` might be atom :math:`J`'s neighbor while the opposite is not true. bin_size : the factor for the radius to determine how large the bins are, optionally along each lattice vector. It can minimally be 2, meaning that the maximum radius to consider is twice the radius considered. For larger values, more atoms will be in each bin (and thus fewer bins). Hence, this value can be used to fine-tune the memory requirement by decreasing number of bins, at the cost of a bit more run-time searching bins. """# Set the geometry. Copy it because we may need to modify the supercell size.ifgeometryisnotNone:# Warn that we do not support atoms outside the unit cell just yet.fxyz=geometry.fxyzifnp.any((fxyz<-1e-8)|(fxyz>(1+1e-8))):raiseValueError(f"Coordinates outside the unit cell are not supported by {self.__class__.__name__} for now. ""You can do geometry.translate2uc() to move atoms to the unit cell, but note that ""this will change the supercell indices of the connections and might not be compatible ""with the indices of your sparse matrices, for example.")self.geometry=geometry.copy()# If R was not provided, build an array of RsifRisNone:R=self.geometry.atoms.maxR(all=True)else:R=np.asarray(R)# Set the radiusself.R=Rself._aux_R=R# If sphere overlap was not provided, set it to False if R is a single float# and True otherwise.self._overlap=overlap# Determine the bin_size as the maximum DIAMETER to ensure that we ALWAYS# only need to look one bin away for neighbors.max_R=np.max(self.R)ifoverlap:# In the case when we want to check for sphere overlap, the size should# be twice as big.max_R*=2ifmax_R<=0:raiseValueError("All R values are 0 or less. Please provide some positive values")bin_size=np.asarray(bin_size)ifnp.any(bin_size<2):raiseValueError("The bin_size must be larger than 2 to only search in the ""neighboring bins. Please increase to a value >=2")bin_size=max_R*bin_size# We add a small amount to bin_size to avoid ambiguities when# a position is exactly at the center of a bin.bin_size+=0.001lattice_sizes=self.geometry.lengthself._R_too_big=np.any(bin_size>lattice_sizes)ifself._R_too_big:# This means that nsc must be at least 5.# We round the amount of cells needed in each direction# to the closest next odd number.nsc=np.ceil(bin_size/lattice_sizes)//2*2+1# And then set it as the number of supercells.self.geometry.set_nsc(nsc.astype(int))ifself._aux_R.ndim==1:self._aux_R=np.tile(self._aux_R,self.geometry.n_s)all_xyz=[]foriscinself.geometry.sc_off:ats_xyz=self.geometry.axyz(isc=isc)all_xyz.append(ats_xyz)self._bins_geometry=Geometry(np.concatenate(all_xyz),atoms=self.geometry.atoms)# Recompute lattice sizeslattice_sizes=self._bins_geometry.lengthelse:self._bins_geometry=self.geometry# Get the number of bins along each cell direction.nbins_float=lattice_sizes/bin_sizeself.nbins=tuple(np.floor(nbins_float).astype(int))self.total_nbins=np.prod(self.nbins)# Get the scalar bin indices of all atomsscalar_bin_indices=self._get_bin_indices(self._bins_geometry.fxyz)# Build the tables that will allow us to look for neighbors in an efficient# and linear scaling manner.self._build_table(scalar_bin_indices)
def_build_table(self,bin_indices):"""Builds the tables that will allow efficient linear scaling neighbor search. Parameters ---------- bin_indices: array-like of shape (self.total_nbins, ) Array containing the scalar bin index for each atom. """# Call the fortran routine that builds the tableself._list,self._heads,self._counts=_operations.build_table(self.total_nbins,bin_indices)
[docs]defassert_consistency(self):"""Asserts that the data structure (self._list, self._heads, self._counts) is consistent. It also stores that the shape is consistent with the stored geometry and the store total_nbins. """# Check shapesassertself._list.shape==(self._bins_geometry.na,)assertself._counts.shape==self._heads.shape==(self.total_nbins,)# Check valuesfori_bin,bin_countinenumerate(self._counts):count=0head=self._heads[i_bin]whilehead!=-1:count+=1head=self._list[head]assert(count==bin_count),f"Bin {i_bin} has {bin_count} atoms but we found {count} atoms"
def_get_search_atom_counts(self,scalar_bin_indices):"""Computes the number of atoms that will be explored for each search Parameters ----------- scalar_bin_indices: np.ndarray of shape ([n_searches], 8) Array containing the bin indices for each search. Bin indices should be within the unit cell! Returns ----------- np.ndarray of shape (n_searches, ): For each search, the number of atoms that will be involved. """returnself._counts[scalar_bin_indices.ravel()].reshape(-1,8).sum(axis=1)def_get_bin_indices(self,fxyz,cartesian=False,floor=True):"""Gets the bin indices for a given fractional coordinate. Parameters ----------- fxyz: np.ndarray of shape (N, 3) the fractional coordinates for which we want to get the bin indices. cartesian: bool, optional whether the indices should be returned as cartesian. If `False`, scalar indices are returned. floor: bool, optional whether to floor the indices or not. If asking for scalar indices (i.e. `cartesian=False`), the indices will ALWAYS be floored regardless of this argument. Returns -------- np.ndarray: The bin indices. If `cartesian=True`, the shape of the array is (N, 3), otherwise it is (N,). """# Avoid numerical errors in coordinatesfxyz[(fxyz<=0)&(fxyz>-1e-8)]=1e-8fxyz[(fxyz>=1)&(fxyz<1+1e-8)]=1-1e-8bin_indices=((fxyz)%1)*self.nbins# Atoms that are exactly at the limit of the cell might have fxyz = 1# which would result in a bin index outside of range.# We just bring it back to the unit cell.bin_indices=bin_indices%self.nbinsiffloorornotcartesian:bin_indices=np.floor(bin_indices).astype(int)ifnotcartesian:bin_indices=self._cartesian_to_scalar_index(bin_indices)returnbin_indicesdef_get_search_indices(self,fxyz,cartesian=False):r"""Gets the bin indices to explore for a given fractional coordinate. Given a fractional coordinate, we will need to look for neighbors in its own bin, and one bin away in each direction. That is, :math:`2^3=8` bins. Parameters ----------- fxyz: np.ndarray of shape (N, 3) the fractional coordinates for which we want to get the search indices. cartesian: bool, optional whether the indices should be returned as cartesian. If `False`, scalar indices are returned. Returns -------- np.ndarray: The bin indices where we need to perform the search for each fractional coordinate. These indices are all inside the unit cell. If ``cartesian=True``, cartesian indices are returned and the array has shape (N, 8, 3). If ``cartesian=False``, scalar indices are returned and the array has shape (N, 8). np.ndarray of shape (N, 8, 3): The supercell indices of each bin index in the search. """# Get the bin indices for the positions that are requested.# Note that we don't floor the indices so that we can know to which# border of the bin are we closer in each direction.bin_indices=self._get_bin_indices(fxyz,floor=False,cartesian=True)bin_indices=np.atleast_2d(bin_indices)# Determine which is the neighboring cell that we need to look for# along each direction.signs=np.ones(bin_indices.shape,dtype=int)signs[(bin_indices%1)<0.5]=-1# Build and arrays with all the indices that we need to look for. Since# we have to move one bin away in each direction, we have to look for# neighbors along a total of 8 bins (2**3)search_indices=np.tile(bin_indices.astype(int),8).reshape(-1,8,3)search_indices[:,1::2,0]+=signs[:,0].reshape(-1,1)search_indices[:,[2,3,6,7],1]+=signs[:,1].reshape(-1,1)search_indices[:,4:,2]+=signs[:,2].reshape(-1,1)# Convert search indices to unit cell indices, but keep the supercell indices.isc,search_indices=np.divmod(search_indices,self.nbins)ifnotcartesian:search_indices=self._cartesian_to_scalar_index(search_indices)returnsearch_indices,iscdef_cartesian_to_scalar_index(self,index):"""Converts cartesian indices to scalar indices"""ifnotnp.issubdtype(index.dtype,int):raiseValueError("Decimal scalar indices do not make sense, please floor your cartesian indices.")returnindex.dot([1,self.nbins[0],self.nbins[0]*self.nbins[1]])def_scalar_to_cartesian_index(self,index):"""Converts cartesian indices to scalar indices"""ifnp.min(index)<0ornp.max(index)>self.total_nbins:raiseValueError("Some scalar indices are not within the unit cell. We cannot uniquely convert to cartesian")third,index=np.divmod(index,self.nbins[0]*self.nbins[1])second,first=np.divmod(index,self.nbins[0])returnnp.column_stack([first,second,third])def_correct_pairs_R_too_big(self,neighbor_pairs:np.ndarray,# (n_pairs, 5)split_ind:Union[int,np.ndarray],# (n_queried_atoms, )):"""Correction to atom and supercell indices when the binning has been done on a tiled geometry"""is_sc_neigh=neighbor_pairs[:,1]>=self.geometry.napbc=self.geometry.lattice.pbcinvalid=Noneifnotnp.any(pbc):invalid=is_sc_neighelse:pbc_neighs=neighbor_pairs.copy()sc_neigh,uc_neigh=np.divmod(neighbor_pairs[:,1][is_sc_neigh],self.geometry.na)isc_neigh=self.geometry.sc_off[sc_neigh]pbc_neighs[is_sc_neigh,1]=uc_neighpbc_neighs[is_sc_neigh,2:]=isc_neighifnotnp.all(pbc):invalid=pbc_neighs[:,2:][:,~pbc].any(axis=1)neighbor_pairs=pbc_neighsifinvalidisnotNone:neighbor_pairs=neighbor_pairs[~invalid]ifisinstance(split_ind,int):split_ind=split_ind-invalid.sum()else:split_ind=split_ind-np.cumsum(invalid)[split_ind-1]returnneighbor_pairs,split_ind
[docs]deffind_neighbors(self,atoms:AtomsIndex=None,self_interaction:bool=False,)->Union[FullNeighborList,PartialNeighborList]:"""Find neighbors as specified in the finder. This routine only executes the action of finding neighbors, the parameters of the search (`geometry`, `R`, `overlap`...) are defined when the finder is initialized or by calling `setup`. Parameters ---------- atoms: the atoms for which neighbors are desired. Anything that can be sanitized by `sisl.Geometry` is a valid value. If not provided, neighbors for all atoms are searched. self_interaction: whether to consider an atom a neighbor of itself. Returns ---------- neighbors The neighbors list. It will be a partial list if `atoms` is provided. """unsanitized_atoms=atoms# Sanitize atomsatoms=self.geometry._sanitize_atoms(atoms)# Cast R into array of appropiate shape and type.thresholds=np.full(self._bins_geometry.na,self._aux_R,dtype=np.float64)# Get search indicessearch_indices,isc=self._get_search_indices(self.geometry.fxyz[atoms],cartesian=False)# Get atom countsat_counts=self._get_search_atom_counts(search_indices)max_pairs=at_counts.sum()ifnotself_interaction:max_pairs-=search_indices.shape[0]init_pairs=min(max_pairs,# 8 bytes per element (int64)# While this might be wrong on Win, it shouldn't mattersize_to_elements(self.memory[0],8),)# Find the neighbor pairsneighbor_pairs,split_ind=_operations.get_pairs(atoms,search_indices,isc,self._heads,self._list,self_interaction,self._bins_geometry.xyz,self._bins_geometry.cell,self.geometry.lattice.pbc,thresholds,self._overlap,init_pairs,self.memory[1],)# Correct neighbor indices for the case where R was too big and# we needed to create an auxiliary supercell.ifself._R_too_big:neighbor_pairs,split_ind=self._correct_pairs_R_too_big(neighbor_pairs,split_ind)ifunsanitized_atomsisNone:returnFullNeighborList(self.geometry,neighbor_pairs,split_indices=split_ind)else:returnPartialNeighborList(self.geometry,neighbor_pairs,atoms=atoms,split_indices=split_ind)
[docs]deffind_unique_pairs(self,self_interaction:bool=False,)->UniqueNeighborList:r"""Find unique neighbor pairs within the geometry. This function only returns one direction of a given connection between atoms :math:`I` and :math:`J`. In particular, it returns the connection where :math:`I < J`. Note that this routine can not be called if `overlap` is set to `False` and the radius is not a single float. In that case, there is no way of defining *uniqueness* since pair :math:`IJ` can have a different threshold radius than pair :math:`JI`. Parameters ---------- self_interaction : whether to consider an atom a neighbor of itself. """ifnotself._overlapandself._aux_R.ndim==1:raiseValueError("Unique atom pairs do not make sense if we are not looking for sphere overlaps."" Please setup the finder again setting `overlap` to `True` if you wish so.")# In the case where we tiled the geometry to do the binning, it is much better to# just find all neighbors and then drop duplicate connections. Otherwise it is a bit of a mess.ifself._R_too_big:# Find all neighborsall_neighbors=self.find_neighbors(self_interaction=self_interaction)returnall_neighbors.to_unique()# Cast R into array of appropiate shape and type.thresholds=np.full(self.geometry.na,self.R,dtype=np.float64)# Get search indicessearch_indices,isc=self._get_search_indices(self.geometry.fxyz,cartesian=False)# Get atom countsat_counts=self._get_search_atom_counts(search_indices)max_pairs=at_counts.sum()ifnotself_interaction:max_pairs-=search_indices.shape[0]init_pairs=min(max_pairs,# 8 bytes per element (int64)# While this might be wrong on Win, it shouldn't mattersize_to_elements(self.memory[0],8),)# Find all unique neighbor pairsneighbor_pairs=_operations.get_all_unique_pairs(search_indices,isc,self._heads,self._list,self_interaction,self.geometry.xyz,self.geometry.cell,self.geometry.lattice.pbc,thresholds,self._overlap,init_pairs,self.memory[1],)returnUniqueNeighborList(self.geometry,neighbor_pairs)
[docs]deffind_close(self,xyz:Sequence,)->PointsNeighborList:"""Find all atoms that are close to some coordinates in space. This routine only executes the action of finding neighbors, the parameters of the search (`geometry`, `R`, `overlap`...) are defined when the finder is initialized or by calling `setup`. Parameters ---------- xyz: array-like of shape ([npoints], 3) the coordinates for which neighbors are desired. """# Cast R into array of appropiate shape and type.thresholds=np.full(self._bins_geometry.na,self._aux_R,dtype=np.float64)xyz=np.atleast_2d(xyz).astype(float)# Get search indicessearch_indices,isc=self._get_search_indices(xyz.dot(self._bins_geometry.icell.T)%1,cartesian=False)# Get atom countsat_counts=self._get_search_atom_counts(search_indices)max_pairs=at_counts.sum()init_pairs=min(max_pairs,# 8 bytes per element (int64)# While this might be wrong on Win, it shouldn't mattersize_to_elements(self.memory[0],8),)# Find the neighbor pairsneighbor_pairs,split_ind=_operations.get_close(xyz,search_indices,isc,self._heads,self._list,self._bins_geometry.xyz,self._bins_geometry.cell,self.geometry.lattice.pbc,thresholds,init_pairs,self.memory[1],)# Correct neighbor indices for the case where R was too big and# we needed to create an auxiliary supercell.ifself._R_too_big:neighbor_pairs,split_ind=self._correct_pairs_R_too_big(neighbor_pairs,split_ind)returnPointsNeighborList(self.geometry,xyz,neighbor_pairs[:split_ind[-1]],split_indices=split_ind)