excelparser¶
Module from parsing an excelfile and creating an ontology from it.
The excelfile is read by pandas and the pandas dataframe should have column names: prefLabel, altLabel, Elucidation, Comments, Examples, subClassOf, Relations.
Note that correct case is mandatory.
ExcelError (EMMOntoPyException)
¶
Raised on errors in Excel file.
Source code in ontopy/excelparser.py
class ExcelError(EMMOntoPyException):
"""Raised on errors in Excel file."""
create_ontology_from_excel(excelpath, concept_sheet_name='Concepts', metadata_sheet_name='Metadata', imports_sheet_name='ImportedOntologies', base_iri='http://emmo.info/emmo/domain/onto#', base_iri_from_metadata=True, imports=None, catalog=None, force=False)
¶
Creates an ontology from an Excel-file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
excelpath |
str |
Path to Excel workbook. |
required |
concept_sheet_name |
str |
Name of sheet where concepts are defined. The second row of this sheet should contain column names that are supported. Currently these are 'prefLabel','altLabel', 'Elucidation', 'Comments', 'Examples', 'subClassOf', 'Relations'. Multiple entries are separated with ';'. |
'Concepts' |
metadata_sheet_name |
str |
Name of sheet where metadata are defined.
The first row contains column names 'Metadata name' and 'Value'
Supported 'Metadata names' are: 'Ontology IRI',
'Ontology vesion IRI', 'Ontology version Info', 'Title',
'Abstract', 'License', 'Comment', 'Author', 'Contributor'.
Multiple entries are separated with a semi-colon ( |
'Metadata' |
imports_sheet_name |
str |
Name of sheet where imported ontologies are defined. Column name is 'Imported ontologies'. Fully resolvable URL or path to imported ontologies provided one per row. |
'ImportedOntologies' |
base_iri |
str |
Base IRI of the new ontology. |
'http://emmo.info/emmo/domain/onto#' |
base_iri_from_metadata |
bool |
Whether to use base IRI defined from metadata. |
True |
imports |
list |
List of imported ontologies. |
None |
catalog |
dict |
Imported ontologies with (name, full path) key/value-pairs. |
None |
force |
bool |
Forcibly make an ontology by skipping concepts with a prefLabel that is erroneously defined. |
False |
Returns:
Type | Description |
---|---|
Tuple[ontopy.ontology.Ontology, dict] |
A tuple of the created ontology and the associated catalog of ontology names and resolvable path as dict. |
Source code in ontopy/excelparser.py
def create_ontology_from_excel( # pylint: disable=too-many-arguments
excelpath: str,
concept_sheet_name: str = "Concepts",
metadata_sheet_name: str = "Metadata",
imports_sheet_name: str = "ImportedOntologies",
base_iri: str = "http://emmo.info/emmo/domain/onto#",
base_iri_from_metadata: bool = True,
imports: list = None,
catalog: dict = None,
force: bool = False,
) -> Tuple[ontopy.ontology.Ontology, dict]:
"""
Creates an ontology from an Excel-file.
Arguments:
excelpath: Path to Excel workbook.
concept_sheet_name: Name of sheet where concepts are defined.
The second row of this sheet should contain column names that are
supported. Currently these are 'prefLabel','altLabel',
'Elucidation', 'Comments', 'Examples', 'subClassOf', 'Relations'.
Multiple entries are separated with ';'.
metadata_sheet_name: Name of sheet where metadata are defined.
The first row contains column names 'Metadata name' and 'Value'
Supported 'Metadata names' are: 'Ontology IRI',
'Ontology vesion IRI', 'Ontology version Info', 'Title',
'Abstract', 'License', 'Comment', 'Author', 'Contributor'.
Multiple entries are separated with a semi-colon (`;`).
imports_sheet_name: Name of sheet where imported ontologies are
defined.
Column name is 'Imported ontologies'.
Fully resolvable URL or path to imported ontologies provided one
per row.
base_iri: Base IRI of the new ontology.
base_iri_from_metadata: Whether to use base IRI defined from metadata.
imports: List of imported ontologies.
catalog: Imported ontologies with (name, full path) key/value-pairs.
force: Forcibly make an ontology by skipping concepts with a prefLabel
that is erroneously defined.
Returns:
A tuple of the created ontology and the associated catalog of ontology
names and resolvable path as dict.
"""
# Get imported ontologies from optional "Imports" sheet
if not imports:
imports = []
try:
imports_frame = pd.read_excel(
excelpath, sheet_name=imports_sheet_name, skiprows=[1]
)
except ValueError:
pass
else:
imports.extend(imports_frame["Imported ontologies"].to_list())
# Read datafile TODO: Some magic to identify the header row
conceptdata = pd.read_excel(
excelpath, sheet_name=concept_sheet_name, skiprows=[0, 2]
)
metadata = pd.read_excel(excelpath, sheet_name=metadata_sheet_name)
return create_ontology_from_pandas(
data=conceptdata,
metadata=metadata,
imports=imports,
base_iri=base_iri,
base_iri_from_metadata=base_iri_from_metadata,
catalog=catalog,
force=force,
)
create_ontology_from_pandas(data, metadata, imports, base_iri='http://emmo.info/emmo/domain/onto#', base_iri_from_metadata=True, catalog=None, force=False)
¶
Create an ontology from a pandas DataFrame.
Source code in ontopy/excelparser.py
def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-branches,too-many-statements,too-many-arguments
data: pd.DataFrame,
metadata: pd.DataFrame,
imports: list,
base_iri: str = "http://emmo.info/emmo/domain/onto#",
base_iri_from_metadata: bool = True,
catalog: dict = None,
force: bool = False,
) -> Tuple[ontopy.ontology.Ontology, dict]:
"""
Create an ontology from a pandas DataFrame.
"""
# Remove lines with empty prefLabel
data = data[data["prefLabel"].notna()]
# Convert all data to string, remove spaces, and finally remove
# additional rows with empty prefLabel.
data = data.astype(str)
data["prefLabel"] = data["prefLabel"].str.strip()
data = data[data["prefLabel"].str.len() > 0]
data.reset_index(drop=True, inplace=True)
# Make new ontology
onto, catalog = get_metadata_from_dataframe(
metadata, base_iri, imports=imports
)
# Set given or default base_iri if base_iri_from_metadata is False.
if not base_iri_from_metadata:
onto.base_iri = base_iri
labels = set(data["prefLabel"])
for altlabel in data["altLabel"].str.strip():
if not altlabel == "nan":
labels.update(altlabel.split(";"))
onto.sync_python_names()
with onto:
remaining_rows = set(range(len(data)))
while remaining_rows:
added_rows = set()
for index in remaining_rows:
row = data.loc[index]
name = row["prefLabel"]
try:
onto.get_by_label(name)
if not force:
raise ExcelError(
f'Concept "{name}" already in ontology'
)
warnings.warn(
f'Ignoring concept "{name}" since it is already in '
"the ontology."
)
# What to do if we want to add info to this concept?
# Should that be not allowed?
# If it should be allowed the index has to be added to
# added_rows
continue
except (ValueError, TypeError) as err:
warnings.warn(
f'Ignoring concept "{name}". '
f'The following error was raised: "{err}"'
)
continue
except NoSuchLabelError:
pass
if pd.isna(row["subClassOf"]):
if not force:
raise ExcelError(f"{row[0]} has no subClassOf")
parent_names = [] # Should be "owl:Thing"
else:
parent_names = str(row["subClassOf"]).split(";")
parents = []
invalid_parent = False
for parent_name in parent_names:
try:
parent = onto.get_by_label(parent_name.strip())
except (NoSuchLabelError, ValueError) as exc:
if parent_name not in labels:
if force:
warnings.warn(
f'Invalid parents for "{name}": '
f'"{parent_name}".'
)
break
raise ExcelError(
f'Invalid parents for "{name}": {exc}\n'
"Have you forgotten an imported ontology?"
) from exc
invalid_parent = True
break
else:
parents.append(parent)
if invalid_parent:
continue
if not parents:
parents = [owlready2.Thing]
concept = onto.new_entity(name, parents)
added_rows.add(index)
# Add elucidation
try:
_add_literal(
row,
concept.elucidation,
"Elucidation",
only_one=True,
)
except AttributeError as err:
if force:
_add_literal(
row,
concept.comment,
"Elucidation",
only_one=True,
)
warnings.warn("Elucidation added as comment.")
else:
raise ExcelError(
f"Not able to add elucidations. {err}."
) from err
# Add examples
try:
_add_literal(
row, concept.example, "Examples", expected=False
)
except AttributeError:
if force:
warnings.warn(
"Not able to add examples. "
"Did you forget to import an ontology?."
)
# Add comments
_add_literal(row, concept.comment, "Comments", expected=False)
# Add altLabels
try:
_add_literal(
row, concept.altLabel, "altLabel", expected=False
)
except AttributeError as err:
if force is True:
_add_literal(
row,
concept.label,
"altLabel",
expected=False,
)
warnings.warn("altLabel added as rdfs.label.")
else:
raise ExcelError(
f"Not able to add altLabels. " f"{err}."
) from err
remaining_rows.difference_update(added_rows)
# Detect infinite loop...
if not added_rows and remaining_rows:
unadded = [data.loc[i].prefLabel for i in remaining_rows]
if force is True:
warnings.warn(
f"Not able to add the following concepts: {unadded}."
" Will continue without these."
)
remaining_rows = False
else:
raise ExcelError(
f"Not able to add the following concepts: {unadded}."
)
# Add properties in a second loop
for index in added_rows:
row = data.loc[index]
properties = row["Relations"]
if properties == "nan":
properties = None
if isinstance(properties, str):
try:
concept = onto.get_by_label(row["prefLabel"].strip())
except NoSuchLabelError:
pass
props = properties.split(";")
for prop in props:
try:
concept.is_a.append(evaluate(onto, prop))
except pyparsing.ParseException as exc:
warnings.warn(
f"Error in Property assignment for: {concept}. "
f"Property to be Evaluated: {prop}. "
f"Error is {exc}."
)
except NoSuchLabelError as exc:
msg = (
f"Error in Property assignment for: {concept}. "
f"Property to be Evaluated: {prop}. "
f"Error is {exc}."
)
if force is True:
warnings.warn(msg)
else:
raise ExcelError(msg) from exc
# Synchronise Python attributes to ontology
onto.sync_attributes(
name_policy="uuid", name_prefix="EMMO_", class_docstring="elucidation"
)
onto.dir_label = False
return onto, catalog
english(string)
¶
Returns string
as an English location string.
Source code in ontopy/excelparser.py
def english(string):
"""Returns `string` as an English location string."""
return owlready2.locstr(string, lang="en")
get_metadata_from_dataframe(metadata, base_iri, base_iri_from_metadata=True, imports=(), catalog=None)
¶
Create ontology with metadata from pd.DataFrame
Source code in ontopy/excelparser.py
def get_metadata_from_dataframe( # pylint: disable=too-many-locals,too-many-branches,too-many-statements
metadata: pd.DataFrame,
base_iri: str,
base_iri_from_metadata: bool = True,
imports: Sequence = (),
catalog: dict = None,
) -> Tuple[ontopy.ontology.Ontology, dict]:
"""Create ontology with metadata from pd.DataFrame"""
# base_iri from metadata if it exists and base_iri_from_metadata
if base_iri_from_metadata:
try:
base_iris = _parse_literal(metadata, "Ontology IRI", metadata=True)
if len(base_iris) > 1:
warnings.warn(
"More than one Ontology IRI given. The first was chosen."
)
base_iri = base_iris[0] + "#"
except (TypeError, ValueError, AttributeError, IndexError):
pass
# Create new ontology
onto = get_ontology(base_iri)
# Add imported ontologies
catalog = {} if catalog is None else catalog
locations = set()
for location in imports:
if not pd.isna(location) and location not in locations:
imported = onto.world.get_ontology(location).load()
onto.imported_ontologies.append(imported)
catalog[imported.base_iri.rstrip("#/")] = location
locations.add(location)
with onto:
# Add title
try:
_add_literal(
metadata,
onto.metadata.title,
"Title",
metadata=True,
only_one=True,
)
except AttributeError:
pass
# Add license
try:
_add_literal(
metadata, onto.metadata.license, "License", metadata=True
)
except AttributeError:
pass
# Add authors/creators
try:
_add_literal(
metadata, onto.metadata.creator, "Author", metadata=True
)
except AttributeError:
pass
# Add contributors
try:
_add_literal(
metadata,
onto.metadata.contributor,
"Contributor",
metadata=True,
)
except AttributeError:
pass
# Add versionInfo
try:
_add_literal(
metadata,
onto.metadata.versionInfo,
"Ontology version Info",
metadata=True,
only_one=True,
)
except AttributeError:
pass
return onto, catalog