PREDICTED: Drosophila obscura polyhomeotic-proximal chromatin


LOCUS       XM_041591948            5220 bp    mRNA    linear   INV 14-MAY-2021
            protein (LOC111070666), mRNA.
ACCESSION   XM_041591948
VERSION     XM_041591948.1
DBLINK      BioProject: PRJNA728747
KEYWORDS    RefSeq.
SOURCE      Drosophila obscura
  ORGANISM  Drosophila obscura
            Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta;
            Pterygota; Neoptera; Endopterygota; Diptera; Brachycera;
            Muscomorpha; Ephydroidea; Drosophilidae; Drosophila; Sophophora.
COMMENT     MODEL REFSEQ:  This record is predicted by automated computational
            analysis. This record is derived from a genomic sequence
            (NW_024542752.1) annotated using gene prediction method: Gnomon.
            Also see:
                Documentation of NCBI's Annotation Process
            
            ##Genome-Annotation-Data-START##
            Annotation Provider         :: NCBI
            Annotation Status           :: Full annotation
            Annotation Name             :: Drosophila obscura Annotation
                                           Release 101
            Annotation Version          :: 101
            Annotation Pipeline         :: NCBI eukaryotic genome annotation
                                           pipeline
            Annotation Software Version :: 8.6
            Annotation Method           :: Best-placed RefSeq; Gnomon
            Features Annotated          :: Gene; mRNA; CDS; ncRNA
            ##Genome-Annotation-Data-END##
FEATURES             Location/Qualifiers
     source          1..5220
                     /organism="Drosophila obscura"
                     /mol_type="mRNA"
                     /isolate="BZ-5 IFL"
                     /db_xref="taxon:7282"
                     /chromosome="Unknown"
                     /sex="male"
                     /tissue_type="whole fly"
                     /dev_stage="Adult fly"
                     /geo_loc_name="Serbia: Babin Zub"
                     /collection_date="2017"
     gene            1..5220
                     /gene="LOC111070666"
                     /note="Derived by automated computational analysis using
                     gene prediction method: Gnomon. Supporting evidence
                     includes similarity to: 3 Proteins, and 80% coverage of
                     the annotated genomic feature by RNAseq alignments"
                     /db_xref="GeneID:111070666"
     CDS             76..5067
                     /gene="LOC111070666"
                     /codon_start=1
                     /product="polyhomeotic-proximal chromatin protein"
                     /protein_id="XP_041447882.1"
                     /db_xref="GeneID:111070666"
                     /translation="MKHAPRHRTRSKADTQTALSPQTGGATTLPLKDTSNISEKSLIA
                     KEPQRPLQCLETLAQKAGISFDDSLDVTQQQATPTTKRRKGNGNENGEGAAGEGAGTP
                     RTTRRTRTPSVTPQHARHNSNSSSSHSHTMEKSQSPAQQVASATTVPLQISPEQLQQF
                     YASNPYAIQVKQEFPTHTAGTTTTELKHATGLLDASQASQLQQMQLQQLTAAAADAAG
                     GNGSAGGGGGAQGGGAPSPANQQGQQQQQQQHSTAISTMSPMQLAAATGGVTGDWSQG
                     RTVQLMQPSTGLFYPPMMISGNLLHSAGLGQQPIQVITAGKPFQGNGPQMITTTTQNA
                     KQMIGAQGGFAGGTYAIPSSQSPQTLLISPVNVISHSPQQQQSLLQSMVAQQQQQQQQ
                     LNAQQQQLTAQQAVAMAKAGVGVGVGADAQGKMQAQKVVQKVTTTTNTVQAASAGAGG
                     AQSQQQQQQQTTTQQCVQVSQSTLPGVGVGVGVGGQLLNPLGGAGAGQAQQMQLGPWF
                     WQNGLQPFGSNSIILRGQPDGTQGMFIQQQPTTQTLQTQQNQIIQCNVTQTPTKPRTQ
                     LDALASKQQQQQQQQQQQAAANSQAQQQQQQQQQQQQQLAVATAQLQQQQQQLTALQR
                     PGAPIMPHNGTQVRPASSVSTQTAQNQNLLKAKMRNKQQPVRPALPALKTENGQVVAV
                     GAVQSKAVGQHMAAVQQQQQQHQQQQQANLHQVVTTAGNKMVVMSTGTPITLQNGQTL
                     HAATAAGVDKQQQQQQQLQLMQKQQFLQQQMFQQQIAAIQIQQQAAAQQQQQQQVAQQ
                     QQQQQQQHQQQQQQQQQAVAQAQQDQRQQVAQAQAQAQVQAQQHQQQQALAQQILQVA
                     PNTFITSHQQQQQQLHNQLLQQQLQQQAQAQVQAQVQAQAQQQQQQREQQQQNIIQQI
                     VVQQAAGAGQQQQQQQQQQQQQQQTQPAQLQLSSVPFSVSSTTTPAGIATSSALQAAL
                     SASGAIFQTAKSTSSSSSLPTSSVVTISNHTTGPLVTSSTMAASIHQAQLQQQQHQQQ
                     QQQQQQQQQQQQQQQHQLISASIAAATQQQQQQQQQHQQQQQQQGPPALAAASPSPAT
                     NPIMAMTSMMNATVGPVTSSGVMSSPATLVAFSAASGGSHPATPTKETPLKMSTPTAT
                     LVPIGSPLNSSATSQDHQPSSVNTTPRSAANASASASATAEASSSTSDSSRVNGEAPE
                     ASHSSSSTTTTPTKATTSTPTTRQSNVVLPTSSCSTTSSSTTSSCTTTHSGKDEGKGG
                     AATATSISSSSAPSTPTTTTVSNGIGIGIATLARAGSTTVTTTTTTSSSSTATTTPTT
                     TTTTTTSISNGSSNAGGKDLPKAMIKPNVLTHVIDGFIIQEANEPFPVTRQRYADKDT
                     SDEPPKKKAAMQEEAKPCGIATATATDMVACEQCGKLEHKAKLKRKRFCSPGCARQAK
                     TGVAGVGVGESNGMGMEMEIGGIVGVDAMALVDKLDEAMAEEKMQMQTDALQALQPEP
                     MSLVPLSSNTEVPLVSLPVLPVMAGTPVPVPPLVAVALAVPASVALPATPSPGATPPA
                     AAVAPQPPVPAAASSSSAAGERSPICNWSVDEVADFIRNLPGCQDYVDDFVQQEIDGQ
                     ALLLLKENHLVNAMGMKLGPALKIVAKVESMKEVVPAPGSGEAKEATAAGGAQ"
     misc_feature    4798..5004
                     /gene="LOC111070666"
                     /note="SAM domain of Ph (polyhomeotic) proteins of
                     Polycomb group; Region: SAM_Ph1,2,3; cd09577"
                     /db_xref="CDD:188976"
     misc_feature    order(4852..4857,4957..4962,4969..4974,4981..4983)
                     /gene="LOC111070666"
                     /note="oligomer interface EH [polypeptide binding]; other
                     site"
                     /db_xref="CDD:188976"
     misc_feature    order(4885..4899,4903..4908,4915..4920,4930..4932,
                     4945..4947)
                     /gene="LOC111070666"
                     /note="oligomer interface ML [polypeptide binding]; other
                     site"
                     /db_xref="CDD:188976"
ORIGIN      
        1 tgccaacctg tttgcattgt acattgtatt tttttgtgtg agattttggg gcaacaagag
       61 agattatacc ccaagatgaa gcatgccccg cgccatcgta cccgttcaaa agcggacacg
      121 caaacagcat tgtcacccca gacaggagga gccaccacgc tacccctcaa ggacacatcg
      181 aacatcagcg agaagtcgct gatcgccaag gagccacagc gaccgctcca gtgcctggag
      241 acacttgccc agaaggcggg catcagcttt gacgattccc tggacgtgac ccagcagcag
      301 gcaaccccaa ccacaaaaag gcgaaagggc aacggaaacg aaaacggtga aggagcagca
      361 ggagaaggag ctgggacacc gcgcactaca cgtcgcaccc gcactcccag tgttacccca
      421 cagcacgccc gacacaacag caacagcagt agcagccaca gccacacgat ggagaagtca
      481 cagagccccg cacaacaggt ggcgtccgcc acgacggtgc ccctgcagat ctcaccggag
      541 cagctgcagc agttctacgc gagcaacccg tacgccatcc aggtgaagca ggagttcccc
      601 acgcacacgg ccggcacaac caccacggaa ctgaagcatg cgacgggtct gctggacgcc
      661 agccaggcga gccagttgca gcagatgcag ctccagcagc tgacggcggc ggcagcggat
      721 gcagccgggg gaaacggttc tgcaggcggt ggaggaggag cccagggcgg aggcgcaccc
      781 agtccggcga accagcaggg acagcaacag cagcagcaac agcactcgac ggccattagt
      841 acgatgtcgc cgatgcagct ggcggcagcc accggcggag tgaccggcga ctggtcacag
      901 ggtcggaccg tgcagctgat gcagccttcg acggggttat tctacccacc catgatgata
      961 tccggcaacc tgctgcactc cgcgggcctc ggccagcagc ccatacaagt gatcaccgcc
     1021 gggaagccgt tccagggcaa cggcccacag atgatcacca ccaccacgca gaacgccaaa
     1081 cagatgatcg gggcgcaggg cggtttcgcc ggcggcacct acgccatccc ttccagccag
     1141 tcaccgcaga cgctgctcat ctctccagtc aacgtcatct cccactcgcc gcagcagcag
     1201 cagagcctcc tccagtcgat ggtcgcccag cagcaacagc agcagcaaca actgaacgcc
     1261 cagcagcagc agctgacggc tcagcaggcg gtggccatgg ccaaggcagg agtgggagtg
     1321 ggtgtgggag ccgacgccca gggcaagatg caggcgcaga aggtggtcca gaaggtgacc
     1381 accaccacca acacggtgca ggctgcgtcg gcaggcgctg ggggggcaca gtcgcagcag
     1441 caacagcagc agcaaaccac cacccagcag tgcgtacagg tctctcagtc gacactgccc
     1501 ggcgtgggag tgggagtggg tgtgggcggg cagctgctga atccgctggg aggtgccggc
     1561 gcgggccagg cgcagcagat gcagctcggt ccctggttct ggcagaacgg cctgcagccc
     1621 ttcggctcga actccatcat cctgcggggc cagccggacg gcactcaggg catgttcatc
     1681 cagcagcagc ccaccacgca gaccctccag acgcagcaga accaaatcat ccagtgcaat
     1741 gtaacccaga cacctaccaa gcctcgcacc cagctggatg ccctggcttc caagcaacaa
     1801 cagcagcagc aacaacaaca gcagcaggcg gcggccaaca gccaagcgca gcagcagcaa
     1861 caacaacaac agcagcagca acaacagctg gctgtggcca cggcccaact gcaacaacag
     1921 cagcagcagc tgacggccct gcagcgtcct ggcgcaccga ttatgcccca caatgggacg
     1981 caggtgcgcc cggccagctc cgtgtccacg cagacggcgc agaaccagaa cctgctgaag
     2041 gccaagatgc ggaacaagca gcagcccgtc cgtccggcat tgccggccct caagacggag
     2101 aatggtcagg tggtggcggt tggtgcggtg cagagcaagg cagtgggcca gcacatggct
     2161 gccgtacagc agcagcaaca gcagcaccag cagcaacaac aggcgaacct tcaccaggtg
     2221 gtcaccacag cgggaaacaa gatggtcgtg atgagcacgg gcacgcccat aaccctgcag
     2281 aatggccaga ccctgcatgc agccactgcg gccggagtgg acaagcagca gcaacagcag
     2341 cagcagctgc agctcatgca gaagcagcag ttcctgcagc agcaaatgtt ccaacagcag
     2401 atagccgcca tccagatcca gcagcaggca gcagcgcaac agcagcagca gcaacaagtc
     2461 gcccagcagc aacagcagca gcaacagcaa catcagcaac aacaacagca gcagcagcag
     2521 gcggtggccc aagcgcagca ggatcagcgg caacaggtgg cacaggctca ggcccaagct
     2581 caggttcagg cgcagcaaca ccagcagcaa caggccctgg ctcagcaaat actgcaggta
     2641 gcgcccaaca ccttcatcac ctcccaccaa cagcagcagc agcagctcca caaccaactg
     2701 cttcagcagc agctccagca gcaggcacag gctcaagtgc aagctcaggt tcaggctcag
     2761 gcacagcagc aacaacaaca acgggagcag cagcagcaga acatcatcca acaaattgtg
     2821 gtgcagcagg cggccggggc aggccaacag cagcagcaac aacaacaaca acaacagcag
     2881 cagcagcaga cgcaaccggc acaattgcag ctgagcagcg tccccttctc ggtatcctcg
     2941 accacgacgc ccgcaggaat agccacctcg agtgccctcc aggccgccct ctcggcctct
     3001 ggcgccatct tccagacggc caagtcgacc agcagcagct cctctctgcc caccagcagc
     3061 gtagtgacaa taagtaacca cacaacgggt cccctggtca ccagcagcac gatggcagcc
     3121 agcatccacc aagcccagct ccagcagcag caacaccaac agcagcagca gcagcagcaa
     3181 caacagcagc agcaacaaca gcaacagcaa catcagttaa tctccgccag cattgcagcg
     3241 gccacacagc agcagcagca acagcagcag cagcatcaac aacaacaaca acagcaggga
     3301 ccacccgctc tggcggctgc atcgccctca cccgccacga accccatcat ggccatgaca
     3361 tccatgatga acgcgactgt tggacctgtc accagcagcg gagtgatgtc ctctcctgca
     3421 acgctggtcg cgttcagcgc tgccagtgga ggtagtcatc cggcgacacc caccaaggag
     3481 acgccgctga agatgtccac ccccaccgcc accctggtgc ccattgggtc ccctctaaac
     3541 agcagcgcca ctagccagga tcaccagcca tcgtccgtca acaccacccc cagatccgct
     3601 gcaaacgcca gtgccagtgc cagtgccacc gcggaggcaa gtagctccac gagtgactcc
     3661 tccagggtga atggagaggc cccggaggcg tctcatagca gcagcagcac caccaccacg
     3721 cccacgaagg ccaccaccag cacgcccacc acaaggcaga gcaatgtggt gctgcccacg
     3781 agtagctgca gcaccaccag cagcagcacc actagctcct gcacaaccac ccacagcgga
     3841 aaggatgagg gcaagggcgg agcggctact gccaccagca tcagcagcag cagcgcacct
     3901 tcaacgccga ccacgacgac agtcagcaac gggattggga ttgggatagc caccctggcc
     3961 agggcaggga gcaccactgt gaccaccacc acgacgacca gcagcagcag cactgcgacg
     4021 actacaccca caactacaac tacaacgaca acgagcatca gcaatggcag cagcaacgcg
     4081 ggagggaagg atctgccgaa ggccatgatc aagcccaatg tgctgaccca tgtcatcgac
     4141 ggattcatca tccaagaggc caacgagccc ttccctgtta cgaggcagcg ctatgctgac
     4201 aaggacacga gcgacgagcc gccaaagaaa aaggctgcca tgcaggagga ggcgaagcca
     4261 tgcggcatag ccaccgcaac tgcgacggac atggtggcct gcgagcagtg cggcaagctg
     4321 gagcacaagg cgaagctcaa gcggaagcgc ttctgctccc caggctgcgc caggcaggcg
     4381 aagactggcg tcgcaggagt aggagtagga gagagcaatg gaatgggaat ggaaatggaa
     4441 attggaggaa ttgtgggagt ggatgccatg gcgctggtgg acaaactgga cgaggccatg
     4501 gccgaggaga agatgcagat gcagacggac gcactgcagg cgctgcagcc cgaaccgatg
     4561 tcccttgtgc cattgtcaag caacacggag gtgccactgg tgtcccttcc tgtcctgcca
     4621 gtcatggcag gcacccccgt tccagtgcct cccctagttg cagtcgcact cgcagttccc
     4681 gcttccgtgg cgctgcctgc gactccgtct ccgggtgcca caccaccagc tgcagcggtg
     4741 gcgccccagc caccagtacc agcagcagca tcctcctcga gcgcagcggg cgagcgttcg
     4801 cccatctgca actggagcgt ggacgaggtg gctgacttca tacggaacct gccaggctgc
     4861 caggactatg tggacgactt tgtccagcag gagatcgacg gccaggcgct gctgctgctc
     4921 aaggagaatc acctggtgaa tgccatgggg atgaagctgg gccccgccct caagattgtg
     4981 gccaaggtgg agtccatgaa ggaggtggtc ccggcgccgg gctctggcga ggccaaggag
     5041 gcaacggccg cgggaggagc tcaataatac cagcctgatg ttccagccga tgccattgcc
     5101 gatgcagatg acgaggacat tcccatgccc tcctactcga catctccgcc accattctcg
     5161 cttctccgtc tccggcttac gtacggatcg aggcaacaga gggaattgcc agagggaact