6-10
- Get link
- X
- Other Apps
Lab 6
import nltk
#define the text
sentence = "I told the children I was going to tell them a story. They were excited"
#tokenize the text
tokens = nltk.word_tokenize(sentence)
#perform POS tagging
tags = nltk.pos_tag(tokens)
#define a chunk grammar named mychunk
chunk_grammar = """ mychunk: {<NNS.?>*<PRP.?>*<VBD?>}"""
#parse the grammar with regular expression parser
parser = nltk.RegexpParser(chunk_grammar)
#assign the chunk
tree = parser.parse(tags)
# Print the tree instead of drawing it
print(tree)
Lab 7
!pip install nltk
import nltk
from nltk import CFG
from nltk.parse import ChartParser
cnf_grammar = CFG.fromstring("""
S -> NP VP
VP -> V NP | VP PP
PP -> P NP
V -> 'saw' | 'ate' | 'walked'
NP -> 'John' | 'Mary' | 'Bob' | Det N | NP PP
Det -> 'a' | 'an' | 'the'
N -> 'man' | 'dog' | 'cat' | 'telescope' | 'park'
P -> 'in' | 'on' | 'by' | 'with'
""")
parser = ChartParser(cnf_grammar)
sentence = "John saw a man with a telescope"
tokens = sentence.split()
parse_trees = list(parser.parse(tokens))
for tree in parse_trees:
# Indent the line below to be part of the for loop
tree.pretty_print()
Lab 8
!pip install nltk
import nltk
nltk.download('punkt')
from nltk.util import ngrams
from collections import Counter
def extract_ngrams(text):
tokens = nltk.word_tokenize(text)
unigrams = list(tokens)
bigrams = list(ngrams(tokens, 2))
trigrams = list(ngrams(tokens, 3))
return unigrams, bigrams, trigrams
def main():
nltk.download('punkt') # Ensure necessary resources are downloaded
text = "This is a sample text for n-gram extraction. N-grams are useful in NLP."
unigrams, bigrams, trigrams = extract_ngrams(text)
print("Unigrams:", unigrams)
print("Bigrams:", bigrams)
print("Trigrams:", trigrams)
if __name__ == "__main__":
main()
Lab 9
import nltk
import string
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
nltk.download('punkt')
# Sample documents
documents = [
"This is the first document.",
"This document is the second document.",
"And this is the third one.",
"Is this the first document?",
]
# Tokenize and preprocess the documents
def preprocess_text(doc):
tokens = nltk.word_tokenize(doc)
tokens = [word for word in tokens if word not in string.punctuation]
tokens = [word.lower() for word in tokens]
stop_words = set(stopwords.words('english'))
tokens = [word for word in tokens if word not in stop_words]
# Join the tokens back into a single string
preprocessed_doc = ' '.join(tokens)
# Ensure the return statement is properly indented
return preprocessed_doc
# Preprocess all documents
preprocessed_documents = [preprocess_text(doc) for doc in documents]
# Compute TF-IDF scores using scikit-learn
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(preprocessed_documents)
# Print TF-IDF matrix
print(tfidf_matrix.toarray())
Lab 10
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag, ne_chunk
# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
# Download the specific resource for the chunker's model data
nltk.download('maxent_ne_chunker_tab')
def ner(text):
words = word_tokenize(text)
tagged_words = pos_tag(words)
named_entities = ne_chunk(tagged_words)
return named_entities
text = "Apple is a company based in California, United States. Steve Jobs was one of its founders."
named_entities = ner(text)
print(named_entities)
Popular posts from this blog
DL
LAB 1(IMAGE CLASSIFICATION) import tensorflow as tf from tensorflow.keras import datasets, layers, models import matplotlib.pyplot as plt import numpy as np # Load CIFAR-10 dataset (train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data() train_images, test_images = train_images / 255.0, test_images / 255.0 # Normalize # Class names in CIFAR-10 class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # Plot some sample images plt.figure(figsize=(10,10)) for i in range(25): plt.subplot(5,5,i+1) plt.xticks([]) plt.yticks([]) plt.grid(False) plt.imshow(train_images[i]) plt.xlabel(class_names[int(train_labels[i])]) plt.show() # Define CNN model model = models.Sequential() model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(32, 32, 3))) model.add...
- Get link
- X
- Other Apps
Comments