Skip to main content

6-10

 Lab 6 

import nltk

#define the text

sentence = "I told the children I was going to tell them a story. They were excited"

#tokenize the text

tokens = nltk.word_tokenize(sentence)

#perform POS tagging

tags = nltk.pos_tag(tokens)

#define a chunk grammar named mychunk

chunk_grammar = """ mychunk: {<NNS.?>*<PRP.?>*<VBD?>}"""

#parse the grammar with regular expression parser

parser = nltk.RegexpParser(chunk_grammar)

#assign the chunk

tree = parser.parse(tags)

# Print the tree instead of drawing it

print(tree)



Lab 7 

!pip install nltk
import nltk
from nltk import CFG
from nltk.parse import ChartParser
cnf_grammar = CFG.fromstring("""
S -> NP VP
VP -> V NP | VP PP
PP -> P NP
V -> 'saw' | 'ate' | 'walked'
NP -> 'John' | 'Mary' | 'Bob' | Det N | NP PP
Det -> 'a' | 'an' | 'the'
N -> 'man' | 'dog' | 'cat' | 'telescope' | 'park'
P -> 'in' | 'on' | 'by' | 'with'
""")
parser = ChartParser(cnf_grammar)
sentence = "John saw a man with a telescope"
tokens = sentence.split()
parse_trees = list(parser.parse(tokens))
for tree in parse_trees:
    # Indent the line below to be part of the for loop
    tree.pretty_print()


Lab 8 

!pip install nltk
import nltk
nltk.download('punkt')
from nltk.util import ngrams
from collections import Counter
def extract_ngrams(text):
  tokens = nltk.word_tokenize(text)
  unigrams = list(tokens)
  bigrams = list(ngrams(tokens, 2))
  trigrams = list(ngrams(tokens, 3))
  return unigrams, bigrams, trigrams
def main():
  nltk.download('punkt') # Ensure necessary resources are downloaded
  text = "This is a sample text for n-gram extraction. N-grams are useful in NLP."
  unigrams, bigrams, trigrams = extract_ngrams(text)
  print("Unigrams:", unigrams)
  print("Bigrams:", bigrams)
  print("Trigrams:", trigrams)
if __name__ == "__main__":
  main()



Lab 9 


import nltk
import string
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

nltk.download('punkt')

# Sample documents
documents = [
    "This is the first document.",
    "This document is the second document.",
    "And this is the third one.",
    "Is this the first document?",
]

# Tokenize and preprocess the documents
def preprocess_text(doc):
    tokens = nltk.word_tokenize(doc)
    tokens = [word for word in tokens if word not in string.punctuation]
    tokens = [word.lower() for word in tokens]
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    # Join the tokens back into a single string
    preprocessed_doc = ' '.join(tokens)
    # Ensure the return statement is properly indented
    return preprocessed_doc

# Preprocess all documents
preprocessed_documents = [preprocess_text(doc) for doc in documents]

# Compute TF-IDF scores using scikit-learn
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(preprocessed_documents)

# Print TF-IDF matrix
print(tfidf_matrix.toarray())


Lab 10 

import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag, ne_chunk

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
# Download the specific resource for the chunker's model data
nltk.download('maxent_ne_chunker_tab')

def ner(text):
  words = word_tokenize(text)
  tagged_words = pos_tag(words)
  named_entities = ne_chunk(tagged_words)
  return named_entities

text = "Apple is a company based in California, United States. Steve Jobs was one of its founders."
named_entities = ner(text)
print(named_entities)

Comments

Popular posts from this blog

DL

Comments

Popular posts from this blog

Web

Lab 1 ai