excelparser¶
Module from parsing an excelfile and creating an ontology from it.
The excelfile is read by pandas and the pandas dataframe should have column names: prefLabel, altLabel, Elucidation, Comments, Examples, subClassOf, Relations.
Note that correct case is mandatory.
ExcelError (EMMOntoPyException)
¶
Raised on errors in Excel file.
Source code in ontopy/excelparser.py
class ExcelError(EMMOntoPyException):
"""Raised on errors in Excel file."""
create_ontology_from_excel(excelpath, *, concept_sheet_name='Concepts', metadata_sheet_name='Metadata', imports_sheet_name='ImportedOntologies', dataproperties_sheet_name='DataProperties', objectproperties_sheet_name='ObjectProperties', annotationproperties_sheet_name='AnnotationProperties', base_iri='http://emmo.info/emmo/domain/onto#', base_iri_from_metadata=True, imports=None, catalog=None, force=False, input_ontology=None)
¶
Creates an ontology from an Excel-file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
excelpath |
str |
Path to Excel workbook. |
required |
concept_sheet_name |
str |
Name of sheet where concepts are defined. The second row of this sheet should contain column names that are supported. Currently these are 'prefLabel','altLabel', 'Elucidation', 'Comments', 'Examples', 'subClassOf', 'Relations'. Multiple entries are separated with ';'. |
'Concepts' |
metadata_sheet_name |
str |
Name of sheet where metadata are defined.
The first row contains column names 'Metadata name' and 'Value'
Supported 'Metadata names' are: 'Ontology IRI',
'Ontology vesion IRI', 'Ontology version Info', 'Title',
'Abstract', 'License', 'Comment', 'Author', 'Contributor'.
Multiple entries are separated with a semi-colon ( |
'Metadata' |
imports_sheet_name |
str |
Name of sheet where imported ontologies are defined. Column name is 'Imported ontologies'. Fully resolvable URL or path to imported ontologies provided one per row. |
'ImportedOntologies' |
dataproperties_sheet_name |
str |
Name of sheet where data properties are defined. The second row of this sheet should contain column names that are supported. Currently these are 'prefLabel','altLabel', 'Elucidation', 'Comments', 'Examples', 'subPropertyOf', 'Domain', 'Range', 'dijointWith', 'equivalentTo'. |
'DataProperties' |
annotationproperties_sheet_name |
str |
Name of sheet where annotation properties are defined. The second row of this sheet should contain column names that are supported. Currently these are 'prefLabel', 'altLabel', 'Elucidation', 'Comments', 'Examples', 'subPropertyOf', 'Domain', 'Range'. |
'AnnotationProperties' |
objectproperties_sheet_name |
str |
Name of sheet where object properties are defined.The second row of this sheet should contain column names that are supported. Currently these are 'prefLabel','altLabel', 'Elucidation', 'Comments', 'Examples', 'subPropertyOf', 'Domain', 'Range', 'inverseOf', 'dijointWith', 'equivalentTo'. |
'ObjectProperties' |
base_iri |
str |
Base IRI of the new ontology. |
'http://emmo.info/emmo/domain/onto#' |
base_iri_from_metadata |
bool |
Whether to use base IRI defined from metadata. |
True |
imports |
list |
List of imported ontologies. |
None |
catalog |
dict |
Imported ontologies with (name, full path) key/value-pairs. |
None |
force |
bool |
Forcibly make an ontology by skipping concepts that are erroneously defined or other errors in the excel sheet. |
False |
input_ontology |
Optional[ontopy.ontology.Ontology] |
Ontology that should be updated. Default is None, which means that a completely new ontology is generated. If an input_ontology to be updated is provided, the metadata sheet in the excel sheet will not be considered. |
None |
Returns:
Type | Description |
---|---|
A tuple with the |
|
Source code in ontopy/excelparser.py
def create_ontology_from_excel( # pylint: disable=too-many-arguments, too-many-locals
excelpath: str,
*,
concept_sheet_name: str = "Concepts",
metadata_sheet_name: str = "Metadata",
imports_sheet_name: str = "ImportedOntologies",
dataproperties_sheet_name: str = "DataProperties",
objectproperties_sheet_name: str = "ObjectProperties",
annotationproperties_sheet_name: str = "AnnotationProperties",
base_iri: str = "http://emmo.info/emmo/domain/onto#",
base_iri_from_metadata: bool = True,
imports: list = None,
catalog: dict = None,
force: bool = False,
input_ontology: Union[ontopy.ontology.Ontology, None] = None,
) -> Tuple[ontopy.ontology.Ontology, dict, dict]:
"""
Creates an ontology from an Excel-file.
Arguments:
excelpath: Path to Excel workbook.
concept_sheet_name: Name of sheet where concepts are defined.
The second row of this sheet should contain column names that are
supported. Currently these are 'prefLabel','altLabel',
'Elucidation', 'Comments', 'Examples', 'subClassOf', 'Relations'.
Multiple entries are separated with ';'.
metadata_sheet_name: Name of sheet where metadata are defined.
The first row contains column names 'Metadata name' and 'Value'
Supported 'Metadata names' are: 'Ontology IRI',
'Ontology vesion IRI', 'Ontology version Info', 'Title',
'Abstract', 'License', 'Comment', 'Author', 'Contributor'.
Multiple entries are separated with a semi-colon (`;`).
imports_sheet_name: Name of sheet where imported ontologies are
defined.
Column name is 'Imported ontologies'.
Fully resolvable URL or path to imported ontologies provided one
per row.
dataproperties_sheet_name: Name of sheet where data properties are
defined. The second row of this sheet should contain column names
that are supported. Currently these are 'prefLabel','altLabel',
'Elucidation', 'Comments', 'Examples', 'subPropertyOf',
'Domain', 'Range', 'dijointWith', 'equivalentTo'.
annotationproperties_sheet_name: Name of sheet where annotation
properties are defined. The second row of this sheet should contain
column names that are supported. Currently these are 'prefLabel',
'altLabel', 'Elucidation', 'Comments', 'Examples', 'subPropertyOf',
'Domain', 'Range'.
objectproperties_sheet_name: Name of sheet where object properties are
defined.The second row of this sheet should contain column names
that are supported. Currently these are 'prefLabel','altLabel',
'Elucidation', 'Comments', 'Examples', 'subPropertyOf',
'Domain', 'Range', 'inverseOf', 'dijointWith', 'equivalentTo'.
base_iri: Base IRI of the new ontology.
base_iri_from_metadata: Whether to use base IRI defined from metadata.
imports: List of imported ontologies.
catalog: Imported ontologies with (name, full path) key/value-pairs.
force: Forcibly make an ontology by skipping concepts
that are erroneously defined or other errors in the excel sheet.
input_ontology: Ontology that should be updated.
Default is None,
which means that a completely new ontology is generated.
If an input_ontology to be updated is provided,
the metadata sheet in the excel sheet will not be considered.
Returns:
A tuple with the:
* created ontology
* associated catalog of ontology names and resolvable path as dict
* a dictionary with lists of concepts that raise errors, with the
following keys:
- "already_defined": These are concepts (classes)
that are already in the
ontology, because they were already added in a
previous line of the excelfile/pandas dataframe, or because
it is already defined in an imported ontology with the same
base_iri as the newly created ontology.
- "in_imported_ontologies": Concepts (classes)
that are defined in the
excel, but already exist in the imported ontologies.
- "wrongly_defined": Concepts (classes) that are given an
invalid prefLabel (e.g. with a space in the name).
- "missing_subClassOf": Concepts (classes) that are missing
parents. These concepts are added directly under owl:Thing.
- "invalid_subClassOf": Concepts (classes) with invalidly
defined parents.
These concepts are added directly under owl:Thing.
- "nonadded_concepts": List of all concepts (classes) that are
not added,
either because the prefLabel is invalid, or because the
concept has already been added once or already exists in an
imported ontology.
- "obj_prop_already_defined": Object properties that are already
defined in the ontology.
- "obj_prop_in_imported_ontologies": Object properties that are
defined in the excel, but already exist in the imported
ontologies.
- "obj_prop_wrongly_defined": Object properties that are given
an invalid prefLabel (e.g. with a space in the name).
- "obj_prop_missing_subPropertyOf": Object properties that are
missing parents.
- "obj_prop_invalid_subPropertyOf": Object properties with
invalidly defined parents.
- "obj_prop_nonadded_entities": List of all object properties
that are not added, either because the prefLabel is invalid,
or because the concept has already been added once or
already exists in an imported ontology.
- "obj_prop_errors_in_properties": Object properties with
invalidly defined properties.
- "obj_prop_errors_in_range": Object properties with invalidly
defined range.
- "obj_prop_errors_in_domain": Object properties with invalidly
defined domain.
- "annot_prop_already_defined": Annotation properties that are
already defined in the ontology.
- "annot_prop_in_imported_ontologies": Annotation properties
that
are defined in the excel, but already exist in the imported
ontologies.
- "annot_prop_wrongly_defined": Annotation properties that are
given an invalid prefLabel (e.g. with a space in the name).
- "annot_prop_missing_subPropertyOf": Annotation properties that
are missing parents.
- "annot_prop_invalid_subPropertyOf": Annotation properties with
invalidly defined parents.
- "annot_prop_nonadded_entities": List of all annotation
properties that are not added, either because the prefLabel
is invalid, or because the concept has already been added
once or already exists in an imported ontology.
- "annot_prop_errors_in_properties": Annotation properties with
invalidly defined properties.
- "data_prop_already_defined": Data properties that are already
defined in the ontology.
- "data_prop_in_imported_ontologies": Data properties that are
defined in the excel, but already exist in the imported
ontologies.
- "data_prop_wrongly_defined": Data properties that are given
an invalid prefLabel (e.g. with a space in the name).
- "data_prop_missing_subPropertyOf": Data properties that are
missing parents.
- "data_prop_invalid_subPropertyOf": Data properties with
invalidly defined parents.
- "data_prop_nonadded_entities": List of all data properties
that are not added, either because the prefLabel is invalid,
or because the concept has already been added once or
already exists in an imported ontology.
- "data_prop_errors_in_properties": Data properties with
invalidly defined properties.
- "data_prop_errors_in_range": Data properties with invalidly
defined range.
- "data_prop_errors_in_domain": Data properties with invalidly
defined domain.
"""
web_protocol = "http://", "https://", "ftp://"
def _relative_to_absolute_paths(path):
if isinstance(path, str):
if not path.startswith(web_protocol):
path = os.path.dirname(excelpath) + "/" + str(path)
return path
try:
imports = pd.read_excel(
excelpath, sheet_name=imports_sheet_name, skiprows=[1]
)
except ValueError:
imports = pd.DataFrame()
else:
# Strip leading and trailing white spaces in paths
imports.replace(r"^\s+", "", regex=True).replace(
r"\s+$", "", regex=True
)
# Set empty strings to nan
imports = imports.replace(r"^\s*$", np.nan, regex=True)
if "Imported ontologies" in imports.columns:
imports["Imported ontologies"] = imports[
"Imported ontologies"
].apply(_relative_to_absolute_paths)
# Read datafile TODO: Some magic to identify the header row
conceptdata = pd.read_excel(
excelpath, sheet_name=concept_sheet_name, skiprows=[0, 2]
)
try:
objectproperties = pd.read_excel(
excelpath, sheet_name=objectproperties_sheet_name, skiprows=[0, 2]
)
if "prefLabel" not in objectproperties.columns:
warnings.warn(
"The 'prefLabel' column is missing in "
f"{objectproperties_sheet_name}. "
"New object properties will not be added to the ontology."
)
objectproperties = None
except ValueError:
warnings.warn(
f"No sheet named {objectproperties_sheet_name} found "
f"in {excelpath}. "
"New object properties will not be added to the ontology."
)
objectproperties = None
try:
annotationproperties = pd.read_excel(
excelpath,
sheet_name=annotationproperties_sheet_name,
skiprows=[0, 2],
)
if "prefLabel" not in annotationproperties.columns:
warnings.warn(
"The 'prefLabel' column is missing in "
f"{annotationproperties_sheet_name}. "
"New annotation properties will not be added to the ontology."
)
annotationproperties = None
except ValueError:
warnings.warn(
f"No sheet named {annotationproperties_sheet_name} "
f"found in {excelpath}. "
"New annotation properties will not be added to the ontology."
)
annotationproperties = None
try:
dataproperties = pd.read_excel(
excelpath, sheet_name=dataproperties_sheet_name, skiprows=[0, 2]
)
if "prefLabel" not in dataproperties.columns:
warnings.warn(
"The 'prefLabel' column is missing in "
f"{dataproperties_sheet_name}. "
"New data properties will not be added to the ontology."
)
dataproperties = None
except ValueError:
warnings.warn(
f"No sheet named {dataproperties_sheet_name} found in {excelpath}. "
"New data properties will not be added to the ontology."
)
dataproperties = None
metadata = pd.read_excel(excelpath, sheet_name=metadata_sheet_name)
return create_ontology_from_pandas(
data=conceptdata,
objectproperties=objectproperties,
dataproperties=dataproperties,
annotationproperties=annotationproperties,
metadata=metadata,
imports=imports,
base_iri=base_iri,
base_iri_from_metadata=base_iri_from_metadata,
catalog=catalog,
force=force,
input_ontology=input_ontology,
)
create_ontology_from_pandas(data, objectproperties, annotationproperties, dataproperties, metadata, imports, base_iri='http://emmo.info/emmo/domain/onto#', base_iri_from_metadata=True, catalog=None, force=False, input_ontology=None)
¶
Create an ontology from a pandas DataFrame.
Check 'create_ontology_from_excel' for complete documentation.
Source code in ontopy/excelparser.py
def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-branches,too-many-statements,too-many-arguments, too-many-positional-arguments
data: pd.DataFrame,
objectproperties: pd.DataFrame,
annotationproperties: pd.DataFrame,
dataproperties: pd.DataFrame,
metadata: pd.DataFrame,
imports: pd.DataFrame,
base_iri: str = "http://emmo.info/emmo/domain/onto#",
base_iri_from_metadata: bool = True,
catalog: dict = None,
force: bool = False,
input_ontology: Union[ontopy.ontology.Ontology, None] = None,
) -> Tuple[ontopy.ontology.Ontology, dict]:
"""
Create an ontology from a pandas DataFrame.
Check 'create_ontology_from_excel' for complete documentation.
"""
# Get ontology to which new concepts should be added
if input_ontology:
onto = input_ontology
catalog = {}
else: # Create new ontology
onto, catalog = get_metadata_from_dataframe(
metadata, base_iri, imports=imports
)
# Set given or default base_iri if base_iri_from_metadata is False.
if not base_iri_from_metadata:
onto.base_iri = base_iri
# onto.sync_python_names()
# prefLabel, label, and altLabel
# are default label annotations
onto.set_default_label_annotations()
# Add object properties
if objectproperties is not None:
objectproperties = _clean_dataframe(objectproperties)
(
onto,
objectproperties_with_errors,
added_objprop_indices,
) = _add_entities(
onto=onto,
data=objectproperties,
entitytype=owlready2.ObjectPropertyClass,
force=force,
)
if annotationproperties is not None:
annotationproperties = _clean_dataframe(annotationproperties)
(
onto,
annotationproperties_with_errors,
added_annotprop_indices,
) = _add_entities(
onto=onto,
data=annotationproperties,
entitytype=owlready2.AnnotationPropertyClass,
force=force,
)
if dataproperties is not None:
dataproperties = _clean_dataframe(dataproperties)
(
onto,
dataproperties_with_errors,
added_dataprop_indices,
) = _add_entities(
onto=onto,
data=dataproperties,
entitytype=owlready2.DataPropertyClass,
force=force,
)
onto.sync_attributes(
name_policy="uuid", name_prefix="EMMO_", class_docstring="elucidation"
)
# Clean up data frame with new concepts
data = _clean_dataframe(data)
# Add entities
onto, entities_with_errors, added_concept_indices = _add_entities(
onto=onto, data=data, entitytype=owlready2.ThingClass, force=force
)
# Add entity properties in a second loop
for index in added_concept_indices:
row = data.loc[index]
properties = row["Relations"]
if properties == "nan":
properties = None
if isinstance(properties, str):
try:
entity = onto.get_by_label(row["prefLabel"].strip())
except NoSuchLabelError:
pass
props = properties.split(";")
for prop in props:
try:
entity.is_a.append(evaluate(onto, prop.strip()))
except pyparsing.ParseException as exc:
warnings.warn(
# This is currently not tested
f"Error in Property assignment for: '{entity}'. "
f"Property to be Evaluated: '{prop}'. "
f"{exc}"
)
entities_with_errors["errors_in_properties"].append(
entity.name
)
except NoSuchLabelError as exc:
msg = (
f"Error in Property assignment for: {entity}. "
f"Property to be Evaluated: {prop}. "
f"{exc}"
)
if force is True:
warnings.warn(msg)
entities_with_errors["errors_in_properties"].append(
entity.name
)
else:
raise ExcelError(msg) from exc
# Add range and domain for object properties
if objectproperties is not None:
onto, objectproperties_with_errors = _add_range_domain(
onto=onto,
properties=objectproperties,
added_prop_indices=added_objprop_indices,
properties_with_errors=objectproperties_with_errors,
force=force,
)
for key, value in objectproperties_with_errors.items():
entities_with_errors["obj_prop_" + key] = value
# Add range and domain for annotation properties
if annotationproperties is not None:
onto, annotationproperties_with_errors = _add_range_domain(
onto=onto,
properties=annotationproperties,
added_prop_indices=added_annotprop_indices,
properties_with_errors=annotationproperties_with_errors,
force=force,
)
for key, value in annotationproperties_with_errors.items():
entities_with_errors["annot_prop_" + key] = value
# Add range and domain for data properties
if dataproperties is not None:
onto, dataproperties_with_errors = _add_range_domain(
onto=onto,
properties=dataproperties,
added_prop_indices=added_dataprop_indices,
properties_with_errors=dataproperties_with_errors,
force=force,
)
for key, value in dataproperties_with_errors.items():
entities_with_errors["data_prop_" + key] = value
# Synchronise Python attributes to ontology
onto.sync_attributes(
name_policy="uuid", name_prefix="EMMO_", class_docstring="elucidation"
)
onto.dir_label = False
entities_with_errors = {
key: set(value) for key, value in entities_with_errors.items()
}
return onto, catalog, entities_with_errors
get_metadata_from_dataframe(metadata, base_iri, base_iri_from_metadata=True, imports=None, catalog=None)
¶
Create ontology with metadata from pd.DataFrame
Source code in ontopy/excelparser.py
def get_metadata_from_dataframe( # pylint: disable=too-many-locals,too-many-branches,too-many-statements
metadata: pd.DataFrame,
base_iri: str,
base_iri_from_metadata: bool = True,
imports: pd.DataFrame = None,
catalog: dict = None,
) -> Tuple[ontopy.ontology.Ontology, dict]:
"""Create ontology with metadata from pd.DataFrame"""
# base_iri from metadata if it exists and base_iri_from_metadata
if base_iri_from_metadata:
try:
base_iris = _parse_literal(metadata, "Ontology IRI", metadata=True)
if len(base_iris) > 1:
warnings.warn(
"More than one Ontology IRI given. The first was chosen."
)
base_iri = base_iris[0] + "#"
except (TypeError, ValueError, AttributeError, IndexError):
pass
# Create new ontology
onto = get_ontology(base_iri)
# Add imported ontologies
catalog = {} if catalog is None else catalog
locations = set()
for _, row in imports.iterrows():
# for location in imports:
location = row["Imported ontologies"]
if not pd.isna(location) and location not in locations:
imported = onto.world.get_ontology(location).load()
onto.imported_ontologies.append(imported)
catalog[imported.base_iri.rstrip("#/")] = location
try:
cat = read_catalog(location.rsplit("/", 1)[0])
catalog.update(cat)
except ReadCatalogError:
warnings.warn(f"Catalog for {imported} not found.")
locations.add(location)
# set defined prefix
if not pd.isna(row["prefix"]):
# set prefix for all ontologies with same 'base_iri_root'
if not pd.isna(row["base_iri_root"]):
onto.set_common_prefix(
iri_base=row["base_iri_root"], prefix=row["prefix"]
)
# If base_root not given, set prefix only to top ontology
else:
imported.prefix = row["prefix"]
with onto:
# Add title
try:
_add_literal(
metadata,
onto.metadata.title,
"Title",
metadata=True,
only_one=True,
)
except AttributeError:
pass
# Add license
try:
_add_literal(
metadata, onto.metadata.license, "License", metadata=True
)
except AttributeError:
pass
# Add authors/creators
try:
_add_literal(
metadata, onto.metadata.creator, "Author", metadata=True
)
except AttributeError:
pass
# Add contributors
try:
_add_literal(
metadata,
onto.metadata.contributor,
"Contributor",
metadata=True,
)
except AttributeError:
pass
# Add versionInfo
try:
_add_literal(
metadata,
onto.metadata.versionInfo,
"Ontology version Info",
metadata=True,
only_one=True,
)
except AttributeError:
pass
return onto, catalog