Skip to content

Commit

Permalink
Merge pull request #4527 from thehyve/mouse_validator
Browse files Browse the repository at this point in the history
Allow gene symbols starting with numbers for mouse studies
  • Loading branch information
jjgao authored Feb 8, 2019
2 parents 8a137b1 + 7f41ce2 commit 2324d12
Showing 1 changed file with 18 additions and 17 deletions.
35 changes: 18 additions & 17 deletions core/src/main/scripts/importer/validateData.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,23 +677,24 @@ def checkGeneIdentification(self, gene_symbol=None, entrez_id=None):
# set to upper, as both maps contain symbols in upper
if gene_symbol is not None:
gene_symbol = gene_symbol.upper()
# Check in case gene symbol is not null if it starts with an integer
if gene_symbol is not '':
# Check if the gene_symbol starts with a number
if gene_symbol[0] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']:
# In case portal properties are defined check if the gene symbol that starts
# with an integer is in the alias table, if not report an error
if self.portal.hugo_entrez_map is not None and self.portal.alias_entrez_map is not None:
if gene_symbol not in self.portal.hugo_entrez_map and \
gene_symbol not in self.portal.alias_entrez_map:
self.logger.error('Hugo Symbol is not in gene or alias table and starts with a '
'number. This can be caused by unintentional gene conversion in Excel.',
extra={'line_number': self.line_number, 'cause': gene_symbol})
# If alias table cannot be checked report warning that hugo symbols normally do not start
# with a number
else:
self.logger.warning('Hugo Symbol should not start with a number.',
extra={'line_number': self.line_number, 'cause': gene_symbol})
if self.portal.species == "human":
# Check in case gene symbol is not null if it starts with an integer
if gene_symbol is not '':
# Check if the gene_symbol starts with a number
if gene_symbol[0] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']:
# In case portal properties are defined check if the gene symbol that starts
# with an integer is in the alias table, if not report an error
if self.portal.hugo_entrez_map is not None and self.portal.alias_entrez_map is not None:
if gene_symbol not in self.portal.hugo_entrez_map and \
gene_symbol not in self.portal.alias_entrez_map:
self.logger.error('Hugo Symbol is not in gene or alias table and starts with a '
'number. This can be caused by unintentional gene conversion in Excel.',
extra={'line_number': self.line_number, 'cause': gene_symbol})
# If alias table cannot be checked report warning that hugo symbols normally do not start
# with a number
else:
self.logger.warning('Hugo Symbol should not start with a number.',
extra={'line_number': self.line_number, 'cause': gene_symbol})

if entrez_id is not None:
try:
Expand Down

0 comments on commit 2324d12

Please sign in to comment.