fsspec
简介#
安装#
fsspec
可以从 PyPI 或 conda 安装,并且没有自己的依赖项。
pip install fsspec
conda install -c conda-forge fsspec
并非所有文件系统实现都可以在不安装额外依赖项的情况下使用。例如,要访问 GCS(Google Cloud Storage)中的数据,你可以使用下面的可选 pip
安装语法,或者安装所需的特定包。
pip install fsspec[gcs]
conda install -c conda-forge gcsfs
fsspec
尝试在您尝试使用需要额外依赖项的文件系统时提供正确的消息。已知实现的当前列表可以在以下位置找到:
from fsspec.registry import known_implementations
known_implementations
{'abfs': {'class': 'adlfs.AzureBlobFileSystem',
'err': 'Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage'},
'adl': {'class': 'adlfs.AzureDatalakeFileSystem',
'err': 'Install adlfs to access Azure Datalake Gen1'},
'arrow_hdfs': {'class': 'fsspec.implementations.arrow.HadoopFileSystem',
'err': 'pyarrow and local java libraries required for HDFS'},
'asynclocal': {'class': 'morefs.asyn_local.AsyncLocalFileSystem',
'err': "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem"},
'az': {'class': 'adlfs.AzureBlobFileSystem',
'err': 'Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage'},
'blockcache': {'class': 'fsspec.implementations.cached.CachingFileSystem'},
'box': {'class': 'boxfs.BoxFileSystem',
'err': 'Please install boxfs to access BoxFileSystem'},
'cached': {'class': 'fsspec.implementations.cached.CachingFileSystem'},
'dask': {'class': 'fsspec.implementations.dask.DaskWorkerFileSystem',
'err': 'Install dask distributed to access worker file system'},
'data': {'class': 'fsspec.implementations.data.DataFileSystem'},
'dbfs': {'class': 'fsspec.implementations.dbfs.DatabricksFileSystem',
'err': 'Install the requests package to use the DatabricksFileSystem'},
'dir': {'class': 'fsspec.implementations.dirfs.DirFileSystem'},
'dropbox': {'class': 'dropboxdrivefs.DropboxDriveFileSystem',
'err': 'DropboxFileSystem requires "dropboxdrivefs","requests" and ""dropbox" to be installed'},
'dvc': {'class': 'dvc.api.DVCFileSystem',
'err': 'Install dvc to access DVCFileSystem'},
'file': {'class': 'fsspec.implementations.local.LocalFileSystem'},
'filecache': {'class': 'fsspec.implementations.cached.WholeFileCacheFileSystem'},
'ftp': {'class': 'fsspec.implementations.ftp.FTPFileSystem'},
'gcs': {'class': 'gcsfs.GCSFileSystem',
'err': 'Please install gcsfs to access Google Storage'},
'gdrive': {'class': 'gdrivefs.GoogleDriveFileSystem',
'err': 'Please install gdrivefs for access to Google Drive'},
'generic': {'class': 'fsspec.generic.GenericFileSystem'},
'git': {'class': 'fsspec.implementations.git.GitFileSystem',
'err': 'Install pygit2 to browse local git repos'},
'github': {'class': 'fsspec.implementations.github.GithubFileSystem',
'err': 'Install the requests package to use the github FS'},
'gs': {'class': 'gcsfs.GCSFileSystem',
'err': 'Please install gcsfs to access Google Storage'},
'hdfs': {'class': 'fsspec.implementations.arrow.HadoopFileSystem',
'err': 'pyarrow and local java libraries required for HDFS'},
'hf': {'class': 'huggingface_hub.HfFileSystem',
'err': "Unable to load filesystem from EntryPoint(name='hf', value='huggingface_hub.HfFileSystem', group='fsspec.specs')"},
'http': {'class': 'fsspec.implementations.http.HTTPFileSystem',
'err': 'HTTPFileSystem requires "requests" and "aiohttp" to be installed'},
'https': {'class': 'fsspec.implementations.http.HTTPFileSystem',
'err': 'HTTPFileSystem requires "requests" and "aiohttp" to be installed'},
'jlab': {'class': 'fsspec.implementations.jupyter.JupyterFileSystem',
'err': 'Jupyter FS requires requests to be installed'},
'jupyter': {'class': 'fsspec.implementations.jupyter.JupyterFileSystem',
'err': 'Jupyter FS requires requests to be installed'},
'lakefs': {'class': 'lakefs_spec.LakeFSFileSystem',
'err': 'Please install lakefs-spec to access LakeFSFileSystem'},
'libarchive': {'class': 'fsspec.implementations.libarchive.LibArchiveFileSystem',
'err': 'LibArchive requires to be installed'},
'local': {'class': 'fsspec.implementations.local.LocalFileSystem'},
'memory': {'class': 'fsspec.implementations.memory.MemoryFileSystem'},
'oci': {'class': 'ocifs.OCIFileSystem',
'err': 'Install ocifs to access OCI Object Storage'},
'ocilake': {'class': 'ocifs.OCIFileSystem',
'err': 'Install ocifs to access OCI Data Lake'},
'oss': {'class': 'ossfs.OSSFileSystem',
'err': 'Install ossfs to access Alibaba Object Storage System'},
'reference': {'class': 'fsspec.implementations.reference.ReferenceFileSystem'},
'root': {'class': 'fsspec_xrootd.XRootDFileSystem',
'err': "Install fsspec-xrootd to access xrootd storage system. Note: 'root' is the protocol name for xrootd storage systems, not referring to root directories"},
's3': {'class': 's3fs.S3FileSystem', 'err': 'Install s3fs to access S3'},
's3a': {'class': 's3fs.S3FileSystem', 'err': 'Install s3fs to access S3'},
'sftp': {'class': 'fsspec.implementations.sftp.SFTPFileSystem',
'err': 'SFTPFileSystem requires "paramiko" to be installed'},
'simplecache': {'class': 'fsspec.implementations.cached.SimpleCacheFileSystem'},
'smb': {'class': 'fsspec.implementations.smb.SMBFileSystem',
'err': 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed'},
'ssh': {'class': 'fsspec.implementations.sftp.SFTPFileSystem',
'err': 'SFTPFileSystem requires "paramiko" to be installed'},
'tar': {'class': 'fsspec.implementations.tar.TarFileSystem'},
'wandb': {'class': 'wandbfs.WandbFS',
'err': 'Install wandbfs to access wandb'},
'webdav': {'class': 'webdav4.fsspec.WebdavFileSystem',
'err': 'Install webdav4 to access WebDAV'},
'webhdfs': {'class': 'fsspec.implementations.webhdfs.WebHDFS',
'err': 'webHDFS access requires "requests" to be installed'},
'zip': {'class': 'fsspec.implementations.zip.ZipFileSystem'}}