6-10

June 10, 2025

Lab 6

import nltk

#define the text

sentence = "I told the children I was going to tell them a story. They were excited"

#tokenize the text

tokens = nltk.word_tokenize(sentence)

#perform POS tagging

tags = nltk.pos_tag(tokens)

#define a chunk grammar named mychunk

chunk_grammar = """ mychunk: {<NNS.?>*<PRP.?>*<VBD?>}"""

#parse the grammar with regular expression parser

parser = nltk.RegexpParser(chunk_grammar)

#assign the chunk

tree = parser.parse(tags)

# Print the tree instead of drawing it

print(tree)

Lab 7

!pip install nltk

import nltk

from nltk import CFG

from nltk.parse import ChartParser

cnf_grammar = CFG.fromstring("""

S -> NP VP

VP -> V NP | VP PP

PP -> P NP

V -> 'saw' | 'ate' | 'walked'

NP -> 'John' | 'Mary' | 'Bob' | Det N | NP PP

Det -> 'a' | 'an' | 'the'

N -> 'man' | 'dog' | 'cat' | 'telescope' | 'park'

P -> 'in' | 'on' | 'by' | 'with'

""")

parser = ChartParser(cnf_grammar)

sentence = "John saw a man with a telescope"

tokens = sentence.split()

parse_trees = list(parser.parse(tokens))

for tree in parse_trees:

# Indent the line below to be part of the for loop

tree.pretty_print()

Lab 8

!pip install nltk

import nltk

nltk.download('punkt')

from nltk.util import ngrams

from collections import Counter

def extract_ngrams(text):

tokens = nltk.word_tokenize(text)

unigrams = list(tokens)

bigrams = list(ngrams(tokens, 2))

trigrams = list(ngrams(tokens, 3))

return unigrams, bigrams, trigrams

def main():

nltk.download('punkt') # Ensure necessary resources are downloaded

text = "This is a sample text for n-gram extraction. N-grams are useful in NLP."

unigrams, bigrams, trigrams = extract_ngrams(text)

print("Unigrams:", unigrams)

print("Bigrams:", bigrams)

print("Trigrams:", trigrams)

if __name__ == "__main__":

main()

Lab 9

import nltk

import string

from nltk.corpus import stopwords

from sklearn.feature_extraction.text import TfidfVectorizer

nltk.download('punkt')

# Sample documents

documents = [

"This is the first document.",

"This document is the second document.",

"And this is the third one.",

"Is this the first document?",

]

# Tokenize and preprocess the documents

def preprocess_text(doc):

tokens = nltk.word_tokenize(doc)

tokens = [word for word in tokens if word not in string.punctuation]

tokens = [word.lower() for word in tokens]

stop_words = set(stopwords.words('english'))

tokens = [word for word in tokens if word not in stop_words]

# Join the tokens back into a single string

preprocessed_doc = ' '.join(tokens)

# Ensure the return statement is properly indented

return preprocessed_doc

# Preprocess all documents

preprocessed_documents = [preprocess_text(doc) for doc in documents]

# Compute TF-IDF scores using scikit-learn

vectorizer = TfidfVectorizer()

tfidf_matrix = vectorizer.fit_transform(preprocessed_documents)

# Print TF-IDF matrix

print(tfidf_matrix.toarray())

Lab 10

import nltk

from nltk.tokenize import word_tokenize

from nltk import pos_tag, ne_chunk

# Download necessary NLTK resources

nltk.download('punkt')

nltk.download('averaged_perceptron_tagger')

nltk.download('maxent_ne_chunker')

nltk.download('words')

# Download the specific resource for the chunker's model data

nltk.download('maxent_ne_chunker_tab')

def ner(text):

words = word_tokenize(text)

tagged_words = pos_tag(words)

named_entities = ne_chunk(tagged_words)

return named_entities

text = "Apple is a company based in California, United States. Steve Jobs was one of its founders."

named_entities = ner(text)

print(named_entities)

Search This Blog

mona

6-10

Comments

Popular posts from this blog

DL

Comments

Post a Comment

Popular posts from this blog

Web

Lab 1 ai