[NoSQL] Introduction to MongoDB(1)

2 minute read


MongoDB

  • Mongoclient.my_database.my_collection - access database & collection
  • .count_documents() - count documents in collection
  • .list_database_names() & .list_collection_names() - list database/collection names
  • .find_one() - retrieve a single document
  • .keys() - return keys(field names)
  • {[criteria]} - filter data
  • Substructure - reach substructure using dot(.)

import sys
# install pymongo
!conda install --yes --prefix {sys.prefix} pymongo
#dataset

import requests
from pymongo import MongoClient

# Connecting to MognoDB (default connection is "localhost")
client = MongoClient(
    username="root",
    password="1234"
)
# Crate "nobel" local database
db = client["nobel"]

# create collection & insert data
for collection_name in ["prizes", "laureates"]:
    # collect data from API
    response = requests.get("http://api.nobelprize.org/v1/{}.json".format(collection_name[:-1]))
    # convert the data to json
    documents = response.json()[collection_name]
    # Insert documents into collection
    db[collection_name].insert_many(documents)
# connect mongoDB
from pymongo import MongoClient
client = MongoClient(
    username="root",
    password="1234"
)

.count_documents()

  • always need filter
filter = {}
print(client.nobel.prizes.count_documents(filter))
print(client.nobel.laureates.count_documents(filter))
658
968

.list_database_names() & .list_collection_names()

print(client.list_database_names())
print(client.nobel.list_collection_names())
['admin', 'config', 'local', 'nobel', 'testDB']
['prizes', 'laureates']

.find_one() & .keys()

  • convenient to use when keys are saved as a list
db = client.nobel
prize = db.prizes.find_one()
print(prize)
prize_fields = list(prize.keys())
print(prize_fields)
{'_id': ObjectId('61e62ed9f373e66efa5b9694'), 'year': '2021', 'category': 'chemistry', 'laureates': [{'id': '1002', 'firstname': 'Benjamin', 'surname': 'List', 'motivation': '"for the development of asymmetric organocatalysis"', 'share': '2'}, {'id': '1003', 'firstname': 'David', 'surname': 'MacMillan', 'motivation': '"for the development of asymmetric organocatalysis"', 'share': '2'}]}
['_id', 'year', 'category', 'laureates']

{}

  • filter data
    • $in
    • $ne - not equal to
    • $exists
# filter criterion for Germany-born laureates who died in the USA and with the first name "Albert"
criteria = {"diedCountry": "USA", 
            "bornCountry": "Germany",
            "firstname": "Albert"}
# result
db.laureates.count_documents(criteria)
1
# filter criterion for laureates born in the USA, Canada, or Mexico
criteria = { "bornCountry": 
                { "$in": ["Canada", "Mexico", "USA"]}
             }
# result
db.laureates.count_documents(criteria)
305
# filter criterion for laureates who died in the USA and were not born there
criteria = { "diedCountry": "USA",
               "bornCountry": { "$ne": "USA"}, 
             }
# result
db.laureates.count_documents(criteria)
73
# filter for documents without a "born" field
criteria = {"born": {"$exists": False}}

# result
db.laureates.count_documents(criteria)
2

Substructure

  • can reach substructure using dot(.)
# one example of document
db.laureates.find_one()
{'_id': ObjectId('61e62ed9f373e66efa5b9926'),
 'id': '1',
 'firstname': 'Wilhelm Conrad',
 'surname': 'Röntgen',
 'born': '1845-03-27',
 'died': '1923-02-10',
 'bornCountry': 'Prussia (now Germany)',
 'bornCountryCode': 'DE',
 'bornCity': 'Lennep (now Remscheid)',
 'diedCountry': 'Germany',
 'diedCountryCode': 'DE',
 'diedCity': 'Munich',
 'gender': 'male',
 'prizes': [{'year': '1901',
   'category': 'physics',
   'share': '1',
   'motivation': '"in recognition of the extraordinary services he has rendered by the discovery of the remarkable rays subsequently named after him"',
   'affiliations': [{'name': 'Munich University',
     'city': 'Munich',
     'country': 'Germany'}]}]}
# Filter laureates born in Austria with non-Austria prize affiliation
criteria = {"bornCountry": "Austria", 
            "prizes.affiliations.country": {"$ne": "Austria"}}
# result
db.laureates.count_documents(criteria)
11
# filter for laureates with at least 3 elements in "prizes" array
# javascript 처럼 array.[number]로 array 내의 값을 가져올 수 있음
criteria = {"prizes.2": {"$exists": True}}

# result
db.laureates.count_documents(criteria)
1