< All Topics
Print

A Program to find Tokens in C code?

A C program consists of various tokens and a token is either a keyword, an identifier, a constant, a string literal, or a symbol. In C programming, keywords are reserved words that have special meaning and cannot be used for any other purpose. Identifiers are user-defined names for variables, functions, or any other user-defined item. Constants are fixed values that do not change during the execution of a program. String literals are sequences of characters enclosed in double quotes. Symbols are special characters used for various purposes in the program. Each token plays a crucial role in the syntax and semantics of a C program.

Here is a small and simple explanation to find the tokens in a c statement. You can easily check the following C statement consists of five tokens −

printf("Hello, World! \n"); 

/*
The individual tokens are −

printf (
"Hello, World! \n"
)
;
Semicolons

*/

In a C program, the semicolon is a statement terminator. That is, each individual statement must be ended with a semicolon. It indicates the end of one logical entity.

Sample program to check tokens in c statement.

#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

// Returns 'true' if the character is a DELIMITER.
bool isDelimiter(char ch)
{
if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' || 
ch == '/' || ch == ',' || ch == ';' || ch == '>' || 
ch == '<' || ch == '=' || ch == '(' || ch == ')' || 
ch == '[' || ch == ']' || ch == '{' || ch == '}')
return (true);
return (false);
}

// Returns 'true' if the character is an OPERATOR.
bool isOperator(char ch)
{
if (ch == '+' || ch == '-' || ch == '*' || 
ch == '/' || ch == '>' || ch == '<' || 
ch == '=')
return (true);
return (false);
}



// Returns 'true' if the string is a KEYWORD.
bool isKeyword(char* str)
{
if (!strcmp(str, "if") || !strcmp(str, "else") ||
!strcmp(str, "while") || !strcmp(str, "do") || 
!strcmp(str, "break") || 
!strcmp(str, "continue") || !strcmp(str, "int")
|| !strcmp(str, "double") || !strcmp(str, "float")
|| !strcmp(str, "return") || !strcmp(str, "char")
|| !strcmp(str, "case") || !strcmp(str, "char")
|| !strcmp(str, "sizeof") || !strcmp(str, "long")
|| !strcmp(str, "short") || !strcmp(str, "typedef")
|| !strcmp(str, "switch") || !strcmp(str, "unsigned")
|| !strcmp(str, "void") || !strcmp(str, "static")
|| !strcmp(str, "struct") || !strcmp(str, "goto"))
return (true);
return (false);
}

// Returns 'true' if the string is an INTEGER.
bool isInteger(char* str)
{
int i, len = strlen(str);

if (len == 0)
return (false);
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' || (str[i] == '-' && i > 0))
return (false);
}
return (true);
}
// Returns 'true' if the string is a VALID IDENTIFIER.
bool validIdentifier(char* str)
{
if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||
str[0] == '3' || str[0] == '4' || str[0] == '5' || 
str[0] == '6' || str[0] == '7' || str[0] == '8' || 
str[0] == '9' || isDelimiter(str[0]) == true)
return (false);
return (true);
}
// Returns 'true' if the string is a REAL NUMBER.
bool isRealNumber(char* str)
{
int i, len = strlen(str);
bool hasDecimal = false;

if (len == 0)
return (false);
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' && str[i] != '.' || 
(str[i] == '-' && i > 0))
return (false);
if (str[i] == '.')
hasDecimal = true;
}
return (hasDecimal);
}

// Extracts the SUBSTRING.
char* subString(char* str, int left, int right)
{
int i;
char* subStr = (char*)malloc(
sizeof(char) * (right - left + 2));

for (i = left; i <= right; i++)
subStr[i - left] = str[i];
subStr[right - left + 1] = '\0';
return (subStr);
}

// Parsing the input STRING.
void parsing(char* str)
{
int left = 0, right = 0;
int len = strlen(str);

while (right <= len && left <= right) {
if (isDelimiter(str[right]) == false)
right++;

if (isDelimiter(str[right]) == true && left == right) {
if (isOperator(str[right]) == true)
printf("'%c' IS AN OPERATOR\n", str[right]);

right++;
left = right;
} else if (isDelimiter(str[right]) == true && left != right
|| (right == len && left != right)) {
char* subStr = subString(str, left, right - 1);

if (isKeyword(subStr) == true)
printf("'%s' IS A KEYWORD\n", subStr);

else if (isInteger(subStr) == true)
printf("'%s' IS AN INTEGER\n", subStr);

else if (isRealNumber(subStr) == true)
printf("'%s' IS A REAL NUMBER\n", subStr);

else if (validIdentifier(subStr) == true
&& isDelimiter(str[right - 1]) == false)
printf("'%s' IS A VALID IDENTIFIER\n", subStr);

else if (validIdentifier(subStr) == false
&& isDelimiter(str[right - 1]) == false)
printf("'%s' IS AN INVALID IDENTIFIER\n", subStr);
left = right;
}
}
return;
}

// Now write a main function as a driver function to all the checks 
int main()
{
// maximum length of string is 100 here 
char str[100] = "int x = y + 5z; ";

parsing(str); // calling the parse function

return (0);
}

Leave a comment

Table of Contents