(K|R|H)(L|V|M|I|A|F)(K|R|H)...(S|T)...(I|L|V|M|A|F)", proteomehuman)]
- Remove any duplicated hits by running the following code at
the console in RStudio:
humanduprm<- myhitshuman[!duplicated(myhitshuman),]
- Compile the protein sequences containing an AMPK consen-
sus sequence to individual strings by running the following
code at the console in RStudio:
seq<- NULL;
for (z in 1:length(humanduprm)){;
seq[z]<- toString(humanduprm[z])};
Sequence<- data.table(sequence=seq)
- Compile protein IDs by running the following code at the
console in RStudio:
accession<- data.table(longname=humanduprm@ranges@NAMES);
accession[,ensembl_peptide_id:= word(longname)];
accession$longname<- NULL;
accession$ensembl_peptide_id<- sub("\cr..*", "", accession$ensembl_peptide_id)
- Download gene symbols IDs by running the following code at
the console in RStudio:
Keys<- accession$ensembl_peptide_id
mymarthuman<- useMart("ENSEMBL_MART_ENSEMBL",
host="www.ensembl.org");
mymarthuman
useMart("ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl",
host="www.ensembl.org");
myhitshuman_genename<- getBM(attributes=c("ensembl_peptide_id",
"external_gene_name"), values = Keys, mart = mymarthuman)
- Create a data table by running the following code at the
console in RStudio:
myhitshuman<- cbind(Sequence, accession);
myhitshuman<- merge(myhitshuman, myhitshuman_genename, by =
"ensembl_peptide_id")
- The data table can then be written out to an Excel workbook
by running the following code at the console in RStudio (see
Notes 11and 12 ):
write.table(myhitshuman, file = "AMPK consensus sequence containing proteins.xls",
sep = "\t", quote = FALSE, row.names = FALSE)
Identification and Validation of Novel Targets 103