fsspec 简介

目录

fsspec 简介#

安装#

fsspec 可以从 PyPI 或 conda 安装,并且没有自己的依赖项。

pip install fsspec
conda install -c conda-forge fsspec

并非所有文件系统实现都可以在不安装额外依赖项的情况下使用。例如,要访问 GCS(Google Cloud Storage)中的数据,你可以使用下面的可选 pip 安装语法,或者安装所需的特定包。

pip install fsspec[gcs]
conda install -c conda-forge gcsfs

fsspec 尝试在您尝试使用需要额外依赖项的文件系统时提供正确的消息。已知实现的当前列表可以在以下位置找到:

from fsspec.registry import known_implementations

known_implementations
{'abfs': {'class': 'adlfs.AzureBlobFileSystem',
  'err': 'Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage'},
 'adl': {'class': 'adlfs.AzureDatalakeFileSystem',
  'err': 'Install adlfs to access Azure Datalake Gen1'},
 'arrow_hdfs': {'class': 'fsspec.implementations.arrow.HadoopFileSystem',
  'err': 'pyarrow and local java libraries required for HDFS'},
 'asynclocal': {'class': 'morefs.asyn_local.AsyncLocalFileSystem',
  'err': "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem"},
 'az': {'class': 'adlfs.AzureBlobFileSystem',
  'err': 'Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage'},
 'blockcache': {'class': 'fsspec.implementations.cached.CachingFileSystem'},
 'box': {'class': 'boxfs.BoxFileSystem',
  'err': 'Please install boxfs to access BoxFileSystem'},
 'cached': {'class': 'fsspec.implementations.cached.CachingFileSystem'},
 'dask': {'class': 'fsspec.implementations.dask.DaskWorkerFileSystem',
  'err': 'Install dask distributed to access worker file system'},
 'data': {'class': 'fsspec.implementations.data.DataFileSystem'},
 'dbfs': {'class': 'fsspec.implementations.dbfs.DatabricksFileSystem',
  'err': 'Install the requests package to use the DatabricksFileSystem'},
 'dir': {'class': 'fsspec.implementations.dirfs.DirFileSystem'},
 'dropbox': {'class': 'dropboxdrivefs.DropboxDriveFileSystem',
  'err': 'DropboxFileSystem requires "dropboxdrivefs","requests" and ""dropbox" to be installed'},
 'dvc': {'class': 'dvc.api.DVCFileSystem',
  'err': 'Install dvc to access DVCFileSystem'},
 'file': {'class': 'fsspec.implementations.local.LocalFileSystem'},
 'filecache': {'class': 'fsspec.implementations.cached.WholeFileCacheFileSystem'},
 'ftp': {'class': 'fsspec.implementations.ftp.FTPFileSystem'},
 'gcs': {'class': 'gcsfs.GCSFileSystem',
  'err': 'Please install gcsfs to access Google Storage'},
 'gdrive': {'class': 'gdrivefs.GoogleDriveFileSystem',
  'err': 'Please install gdrivefs for access to Google Drive'},
 'generic': {'class': 'fsspec.generic.GenericFileSystem'},
 'git': {'class': 'fsspec.implementations.git.GitFileSystem',
  'err': 'Install pygit2 to browse local git repos'},
 'github': {'class': 'fsspec.implementations.github.GithubFileSystem',
  'err': 'Install the requests package to use the github FS'},
 'gs': {'class': 'gcsfs.GCSFileSystem',
  'err': 'Please install gcsfs to access Google Storage'},
 'hdfs': {'class': 'fsspec.implementations.arrow.HadoopFileSystem',
  'err': 'pyarrow and local java libraries required for HDFS'},
 'hf': {'class': 'huggingface_hub.HfFileSystem',
  'err': "Unable to load filesystem from EntryPoint(name='hf', value='huggingface_hub.HfFileSystem', group='fsspec.specs')"},
 'http': {'class': 'fsspec.implementations.http.HTTPFileSystem',
  'err': 'HTTPFileSystem requires "requests" and "aiohttp" to be installed'},
 'https': {'class': 'fsspec.implementations.http.HTTPFileSystem',
  'err': 'HTTPFileSystem requires "requests" and "aiohttp" to be installed'},
 'jlab': {'class': 'fsspec.implementations.jupyter.JupyterFileSystem',
  'err': 'Jupyter FS requires requests to be installed'},
 'jupyter': {'class': 'fsspec.implementations.jupyter.JupyterFileSystem',
  'err': 'Jupyter FS requires requests to be installed'},
 'lakefs': {'class': 'lakefs_spec.LakeFSFileSystem',
  'err': 'Please install lakefs-spec to access LakeFSFileSystem'},
 'libarchive': {'class': 'fsspec.implementations.libarchive.LibArchiveFileSystem',
  'err': 'LibArchive requires to be installed'},
 'local': {'class': 'fsspec.implementations.local.LocalFileSystem'},
 'memory': {'class': 'fsspec.implementations.memory.MemoryFileSystem'},
 'oci': {'class': 'ocifs.OCIFileSystem',
  'err': 'Install ocifs to access OCI Object Storage'},
 'ocilake': {'class': 'ocifs.OCIFileSystem',
  'err': 'Install ocifs to access OCI Data Lake'},
 'oss': {'class': 'ossfs.OSSFileSystem',
  'err': 'Install ossfs to access Alibaba Object Storage System'},
 'reference': {'class': 'fsspec.implementations.reference.ReferenceFileSystem'},
 'root': {'class': 'fsspec_xrootd.XRootDFileSystem',
  'err': "Install fsspec-xrootd to access xrootd storage system. Note: 'root' is the protocol name for xrootd storage systems, not referring to root directories"},
 's3': {'class': 's3fs.S3FileSystem', 'err': 'Install s3fs to access S3'},
 's3a': {'class': 's3fs.S3FileSystem', 'err': 'Install s3fs to access S3'},
 'sftp': {'class': 'fsspec.implementations.sftp.SFTPFileSystem',
  'err': 'SFTPFileSystem requires "paramiko" to be installed'},
 'simplecache': {'class': 'fsspec.implementations.cached.SimpleCacheFileSystem'},
 'smb': {'class': 'fsspec.implementations.smb.SMBFileSystem',
  'err': 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed'},
 'ssh': {'class': 'fsspec.implementations.sftp.SFTPFileSystem',
  'err': 'SFTPFileSystem requires "paramiko" to be installed'},
 'tar': {'class': 'fsspec.implementations.tar.TarFileSystem'},
 'wandb': {'class': 'wandbfs.WandbFS',
  'err': 'Install wandbfs to access wandb'},
 'webdav': {'class': 'webdav4.fsspec.WebdavFileSystem',
  'err': 'Install webdav4 to access WebDAV'},
 'webhdfs': {'class': 'fsspec.implementations.webhdfs.WebHDFS',
  'err': 'webHDFS access requires "requests" to be installed'},
 'zip': {'class': 'fsspec.implementations.zip.ZipFileSystem'}}