#! /usr/bin/env python
import os
import warnings
from glob import glob
from six import string_types
from .standardname import StandardName
from .error import BadNameError, BadRegistryError
[docs]def load_names_from_txt(file_like, onerror="raise"):
"""Load names from a text file.
Parameters
----------
file_like : file-like
A file-like object that represents the contents of a text file
(only a ``readline`` method need be available).
onerror : {'raise', 'warn', 'pass'}
What to do if a bad name is encountered in the file.
Returns
-------
set of str
The Standard Names read from the file.
Examples
--------
>>> from six.moves import StringIO
>>> import standard_names as csn
>>> names = StringIO(\"\"\"
... air__temperature
... Water__Temperature
... \"\"\")
>>> set_of_names = csn.registry.load_names_from_txt(names, onerror='warn')
>>> [name.name for name in set_of_names]
['air__temperature']
"""
if onerror not in ("pass", "raise", "warn"):
raise ValueError("value for onerror keyword not understood")
bad_names = set()
names = set()
for name in file_like:
name = name.strip()
if name:
try:
csn = StandardName(name)
except BadNameError:
bad_names.add(name)
else:
names.add(csn)
if bad_names:
if onerror == "warn":
for name in bad_names:
warnings.warn("{name}: not a valid name".format(name=name))
elif onerror == "raise":
raise BadRegistryError(bad_names)
return names
def _strict_version_or_raise(version_str):
from distutils.version import StrictVersion
if StrictVersion.version_re.match(version_str):
return StrictVersion(version_str)
else:
raise ValueError("{version}: Not a version string".format(version=version_str))
def _get_latest_names_file(path=None, prefix="names-", suffix=".txt"):
"""Get the most recent version of a names file.
Parameters
----------
path : str, optional
If given, the path to a folder holding names files. Otherwise,
the default location within the *standard_names* package.
prefix : str, optional
The prefix for names-file glob.
suffix : str, optional
The suffix for names-file glob.
Returns
-------
tuple of str
Tuple of the name of the latest file and its version.
Examples
--------
>>> import os
>>> from standard_names.registry import _get_latest_names_file
>>> fname, version = _get_latest_names_file()
>>> os.path.basename(fname)
'names-0.8.5.txt'
>>> version
'0.8.5'
>>> _get_latest_names_file(prefix='garbage')
(None, None)
>>> _get_latest_names_file(prefix='names-0.8.3')
(None, None)
"""
data_dir = path or os.path.join(os.path.dirname(__file__), "data")
name_glob = "{prefix}*{suffix}".format(prefix=prefix, suffix=suffix)
data_file_pattern = os.path.join(data_dir, name_glob)
files = [os.path.basename(file_) for file_ in glob(data_file_pattern)]
newest = None
for file in files:
version_str = file[len(prefix) : -len(suffix)]
try:
version = _strict_version_or_raise(version_str)
except ValueError:
pass
else:
if newest is None or version > newest:
newest = version
if newest:
version = str(newest)
names_file = os.path.join(
data_dir,
"{prefix}{version}{suffix}".format(
prefix=prefix, suffix=suffix, version=version
),
)
return names_file, version
else:
return None, None
[docs]class NamesRegistry(object):
"""A registry of CSDMS Standard Names.
Parameters
----------
paths : str or iterable of str, optional
Name(s) of the data file(s) from which to read. If not given,
use a default database. If ``None``, create an empty registry.
version : str, optional
The version of the names registry.
Attributes
----------
version
names
objects
quantities
operators
Examples
--------
>>> from standard_names import NamesRegistry
Get the default set of names.
>>> registry = NamesRegistry()
>>> len(registry) > 0
True
Create an empty registry and add a name to it.
>>> registry = NamesRegistry(None)
>>> len(registry)
0
>>> registry.add('air__temperature')
>>> len(registry)
1
Use the ``names``, ``objects``, ``quantities``, and ``operators`` to
get lists of each in the registry.
>>> registry.names
('air__temperature',)
>>> registry.objects
('air',)
>>> registry.quantities
('temperature',)
>>> registry.operators
()
You can search the registry for names using the ``names_with``,
``match``, and ``search`` methods.
Use ``names_with`` to look for names that contain a given string or
strings.
>>> registry.add('water__temperature')
>>> sorted(registry.names_with('temperature'))
['air__temperature', 'water__temperature']
>>> registry.names_with(['temperature', 'air'])
['air__temperature']
Use ``match`` to match names using a glob-style pattern.
>>> registry.match('air*')
['air__temperature']
Use ``search`` to do a fuzzy search of the list.
>>> registry.search('air__temp')
['air__temperature']
"""
def __init__(self, *args, **kwds):
if len(args) == 0:
paths, version = _get_latest_names_file()
elif len(args) == 1:
paths, version = args[0], None
else:
raise ValueError("0 or 1 arguments expected")
if paths is None:
paths = []
if isinstance(paths, string_types) or hasattr(paths, "readline"):
paths = [paths]
self._names = set()
self._objects = set()
self._quantities = set()
self._operators = set()
self._version = version or "0.0.0"
for path in paths:
if isinstance(path, string_types):
with open(path, "r") as fp:
self._load(fp)
else:
self._load(path)
def _load(self, file_like, onerror="raise"):
for name in load_names_from_txt(file_like, onerror=onerror):
self.add(name)
@property
def version(self):
"""The version of the names database.
Returns
-------
str
The registry version.
"""
return self._version
@property
def names(self):
"""All names in the registry.
Returns
-------
tuple of str
All of the names in the registry.
"""
return tuple(self._names)
@property
def objects(self):
"""All objects in the registry.
Returns
-------
tuple of str
All of the objects in the registry.
"""
return tuple(self._objects)
@property
def quantities(self):
"""All quantities in the registry.
Returns
-------
tuple of str
All of the quantities in the registry.
"""
return tuple(self._quantities)
@property
def operators(self):
"""All operators in the registry.
Returns
-------
tuple of str
All of the operators in the registry.
"""
return tuple(self._operators)
[docs] @classmethod
def from_path(cls, path):
"""Create a new registry from a text file.
Parameters
----------
path : str
Path to a text file of Standard Names.
Returns
-------
NamesRegistry
A newly-created registry filled with names from the file.
"""
return cls(path)
[docs] def add(self, name):
"""Add a name to the registry.
Parameters
----------
name : str
A Standard Name.
"""
if not isinstance(name, StandardName):
name = StandardName(name)
self._names.add(name.name)
self._objects.add(name.object)
self._quantities.add(name.quantity)
for op in name.operators:
self._operators.add(op)
def __contains__(self, name):
if isinstance(name, StandardName):
name = name.name
return name in self._names
def __len__(self):
return len(self._names)
def __iter__(self):
for name in self._names:
yield name
[docs] def search(self, name):
"""Search the registry for a name.
Parameters
----------
name : str
Name to search for.
Returns
-------
tuple of str
Names that closely match the given name.
"""
from difflib import get_close_matches
return get_close_matches(name, self._names)
[docs] def match(self, pattern):
"""Search the registry for names that match a pattern.
Parameters
----------
pattern : str
Glob-style pattern with which to search the registry.
Returns
-------
list of str
List of names matching the pattern.
"""
import re, fnmatch
p = re.compile(fnmatch.translate(pattern))
names = []
for name in self._names:
if p.match(name):
names.append(name)
return names
[docs] def names_with(self, parts):
"""Search the registry for names containing words.
Parameters
----------
parts : str or iterable of str
Word(s) to search for.
Returns
-------
tuple of str
Names from the registry that contains the given words.
"""
if isinstance(parts, string_types):
parts = (parts,)
remaining_names = self._names
for part in parts:
names = []
for name in remaining_names:
if part in name:
names.append(name)
remaining_names = names
return names
REGISTRY = NamesRegistry()
NAMES = REGISTRY.names
OBJECTS = REGISTRY.objects
QUANTITIES = REGISTRY.quantities
OPERATORS = REGISTRY.operators
VERSION = REGISTRY.version